aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap1
-rw-r--r--CREDITS4
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt19
-rw-r--r--Documentation/arch/riscv/cmodx.rst4
-rw-r--r--Documentation/bpf/standardization/instruction-set.rst80
-rw-r--r--Documentation/devicetree/bindings/net/fsl,enetc-ierb.yaml38
-rw-r--r--Documentation/devicetree/bindings/net/fsl,enetc-mdio.yaml57
-rw-r--r--Documentation/devicetree/bindings/net/fsl,enetc.yaml66
-rw-r--r--Documentation/devicetree/bindings/net/fsl,fman.yaml6
-rw-r--r--Documentation/devicetree/bindings/net/fsl-enetc.txt119
-rw-r--r--Documentation/devicetree/bindings/net/mediatek,net.yaml28
-rw-r--r--Documentation/devicetree/bindings/net/pcs/snps,dw-xpcs.yaml136
-rw-r--r--Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml40
-rw-r--r--Documentation/devicetree/bindings/net/snps,dwmac.yaml147
-rw-r--r--Documentation/driver-api/cxl/memory-devices.rst15
-rw-r--r--Documentation/kbuild/modules.rst8
-rw-r--r--Documentation/netlink/specs/ethtool.yaml113
-rw-r--r--Documentation/netlink/specs/ovs_flow.yaml17
-rw-r--r--Documentation/netlink/specs/tcp_metrics.yaml169
-rw-r--r--Documentation/networking/devlink/devlink-region.rst2
-rw-r--r--Documentation/networking/ethtool-netlink.rst157
-rw-r--r--Documentation/userspace-api/ioctl/ioctl-number.rst1
-rw-r--r--MAINTAINERS17
-rw-r--r--Makefile2
-rw-r--r--arch/arm/boot/dts/rockchip/rk3066a.dtsi1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts18
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3308.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3368.dtsi3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi5
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts2
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h1
-rw-r--r--arch/arm64/include/asm/unistd32.h2
-rw-r--r--arch/arm64/kernel/pi/map_kernel.c2
-rw-r--r--arch/arm64/kernel/syscall.c16
-rw-r--r--arch/arm64/mm/mmu.c3
-rw-r--r--arch/arm64/net/bpf_jit_comp.c12
-rw-r--r--arch/csky/include/uapi/asm/unistd.h1
-rw-r--r--arch/csky/kernel/syscall.c2
-rw-r--r--arch/hexagon/include/asm/syscalls.h6
-rw-r--r--arch/hexagon/include/uapi/asm/unistd.h1
-rw-r--r--arch/hexagon/kernel/syscalltab.c7
-rw-r--r--arch/loongarch/kernel/syscall.c2
-rw-r--r--arch/microblaze/kernel/sys_microblaze.c2
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl2
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl2
-rw-r--r--arch/parisc/Kconfig1
-rw-r--r--arch/parisc/kernel/sys_parisc32.c9
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl6
-rw-r--r--arch/powerpc/kernel/eeh_pe.c7
-rw-r--r--arch/powerpc/kernel/head_64.S5
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl6
-rw-r--r--arch/powerpc/kexec/core_64.c11
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c4
-rw-r--r--arch/powerpc/platforms/pseries/kexec.c8
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h1
-rw-r--r--arch/powerpc/platforms/pseries/setup.c5
-rw-r--r--arch/riscv/boot/dts/canaan/canaan_kd233.dts7
-rw-r--r--arch/riscv/boot/dts/canaan/k210.dtsi23
-rw-r--r--arch/riscv/boot/dts/canaan/k210_generic.dts5
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts9
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts7
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_go.dts9
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maixduino.dts10
-rw-r--r--arch/riscv/boot/dts/starfive/jh7110-common.dtsi2
-rw-r--r--arch/riscv/include/asm/insn.h2
-rw-r--r--arch/riscv/kernel/ftrace.c7
-rw-r--r--arch/riscv/kernel/machine_kexec.c10
-rw-r--r--arch/riscv/kernel/patch.c26
-rw-r--r--arch/riscv/kernel/stacktrace.c5
-rw-r--r--arch/riscv/kernel/sys_riscv.c4
-rw-r--r--arch/riscv/kvm/vcpu_pmu.c2
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c123
-rw-r--r--arch/riscv/net/bpf_jit_core.c5
-rw-r--r--arch/s390/boot/startup.c11
-rw-r--r--arch/s390/configs/debug_defconfig5
-rw-r--r--arch/s390/configs/defconfig5
-rw-r--r--arch/s390/include/asm/entry-common.h2
-rw-r--r--arch/s390/include/asm/kvm_host.h1
-rw-r--r--arch/s390/kernel/syscall.c27
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/s390/kvm/kvm-s390.c1
-rw-r--r--arch/s390/kvm/kvm-s390.h15
-rw-r--r--arch/s390/kvm/priv.c32
-rw-r--r--arch/s390/mm/pgalloc.c4
-rw-r--r--arch/s390/net/bpf_jit_comp.c489
-rw-r--r--arch/s390/pci/pci_irq.c2
-rw-r--r--arch/sh/kernel/sys_sh32.c11
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl3
-rw-r--r--arch/sparc/kernel/sys32.S221
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl8
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl2
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h12
-rw-r--r--arch/x86/include/asm/entry-common.h15
-rw-r--r--arch/x86/kernel/time.c20
-rw-r--r--arch/x86/net/bpf_jit_comp.c15
-rw-r--r--arch/xtensa/include/asm/current.h2
-rw-r--r--arch/xtensa/include/asm/thread_info.h2
-rw-r--r--drivers/acpi/processor_idle.c37
-rw-r--r--drivers/ata/ahci.c17
-rw-r--r--drivers/ata/libata-core.c32
-rw-r--r--drivers/bluetooth/btintel_pcie.c2
-rw-r--r--drivers/bluetooth/btnxpuart.c2
-rw-r--r--drivers/bluetooth/hci_bcm4377.c10
-rw-r--r--drivers/bluetooth/hci_qca.c18
-rw-r--r--drivers/char/tpm/Makefile2
-rw-r--r--drivers/char/tpm/tpm2-sessions.c419
-rw-r--r--drivers/clk/mediatek/clk-mt8183-mfgcfg.c1
-rw-r--r--drivers/clk/mediatek/clk-mtk.c24
-rw-r--r--drivers/clk/mediatek/clk-mtk.h2
-rw-r--r--drivers/clk/qcom/apss-ipq-pll.c2
-rw-r--r--drivers/clk/qcom/clk-alpha-pll.c3
-rw-r--r--drivers/clk/qcom/gcc-ipq9574.c10
-rw-r--r--drivers/clk/qcom/gcc-sm6350.c10
-rw-r--r--drivers/clk/sunxi-ng/ccu_common.c18
-rw-r--r--drivers/counter/ti-eqep.c6
-rw-r--r--drivers/cpufreq/acpi-cpufreq.c4
-rw-r--r--drivers/cpufreq/cpufreq.c3
-rw-r--r--drivers/cpufreq/intel_pstate.c13
-rw-r--r--drivers/crypto/caam/Kconfig2
-rw-r--r--drivers/crypto/caam/caamalg_qi2.c28
-rw-r--r--drivers/crypto/caam/caamalg_qi2.h2
-rw-r--r--drivers/crypto/caam/ctrl.c2
-rw-r--r--drivers/crypto/caam/qi.c43
-rw-r--r--drivers/crypto/intel/qat/qat_common/Makefile5
-rw-r--r--drivers/cxl/core/hdm.c13
-rw-r--r--drivers/cxl/core/pmem.c16
-rw-r--r--drivers/cxl/core/region.c103
-rw-r--r--drivers/cxl/cxl.h6
-rw-r--r--drivers/cxl/cxlmem.h21
-rw-r--r--drivers/cxl/mem.c17
-rw-r--r--drivers/firmware/sysfb.c12
-rw-r--r--drivers/gpio/gpio-davinci.c5
-rw-r--r--drivers/gpio/gpio-graniterapids.c2
-rw-r--r--drivers/gpio/gpio-mmio.c2
-rw-r--r--drivers/gpio/gpio-pca953x.c2
-rw-r--r--drivers/gpio/gpiolib-cdev.c28
-rw-r--r--drivers/gpio/gpiolib-of.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h5
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c53
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c10
-rw-r--r--drivers/gpu/drm/amd/display/include/dpcd_defs.h5
-rw-r--r--drivers/gpu/drm/amd/include/atomfirmware.h2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c13
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h5
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c73
-rw-r--r--drivers/gpu/drm/drm_fb_helper.c6
-rw-r--r--drivers/gpu/drm/drm_fbdev_dma.c5
-rw-r--r--drivers/gpu/drm/drm_fbdev_generic.c3
-rw-r--r--drivers/gpu/drm/drm_file.c8
-rw-r--r--drivers/gpu/drm/drm_panel_orientation_quirks.c9
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c1
-rw-r--r--drivers/gpu/drm/nouveau/dispnv04/tvnv17.c6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_connector.c3
-rw-r--r--drivers/gpu/drm/panel/panel-simple.c1
-rw-r--r--drivers/gpu/drm/panthor/panthor_drv.c6
-rw-r--r--drivers/gpu/drm/panthor/panthor_sched.c44
-rw-r--r--drivers/gpu/drm/radeon/radeon_gem.c2
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.c6
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c8
-rw-r--r--drivers/i2c/busses/Makefile6
-rw-r--r--drivers/i2c/busses/i2c-pnx.c48
-rw-r--r--drivers/i2c/busses/i2c-viai2c-common.c71
-rw-r--r--drivers/i2c/busses/i2c-viai2c-common.h2
-rw-r--r--drivers/i2c/busses/i2c-viai2c-wmt.c36
-rw-r--r--drivers/i2c/busses/i2c-viai2c-zhaoxin.c113
-rw-r--r--drivers/i2c/i2c-slave-testunit.c5
-rw-r--r--drivers/iio/accel/Kconfig2
-rw-r--r--drivers/iio/adc/ad7266.c2
-rw-r--r--drivers/iio/adc/xilinx-ams.c8
-rw-r--r--drivers/iio/chemical/bme680.h2
-rw-r--r--drivers/iio/chemical/bme680_core.c62
-rw-r--r--drivers/iio/dac/Kconfig2
-rw-r--r--drivers/iio/humidity/hdc3020.c325
-rw-r--r--drivers/iommu/amd/init.c1
-rw-r--r--drivers/iommu/amd/iommu.c12
-rw-r--r--drivers/iommu/intel/iommu.c20
-rw-r--r--drivers/irqchip/irq-loongson-eiointc.c5
-rw-r--r--drivers/irqchip/irq-loongson-liointc.c4
-rw-r--r--drivers/media/pci/intel/ipu6/ipu6-isys-video.c2
-rw-r--r--drivers/media/pci/intel/ipu6/ipu6-isys.c2
-rw-r--r--drivers/media/pci/intel/ivsc/Kconfig1
-rw-r--r--drivers/mmc/host/moxart-mmc.c78
-rw-r--r--drivers/mmc/host/sdhci-brcmstb.c4
-rw-r--r--drivers/mmc/host/sdhci-pci-core.c11
-rw-r--r--drivers/mmc/host/sdhci-pci-o2micro.c41
-rw-r--r--drivers/mmc/host/sdhci.c25
-rw-r--r--drivers/mtd/nand/raw/nand_base.c66
-rw-r--r--drivers/mtd/nand/raw/rockchip-nand-controller.c6
-rw-r--r--drivers/net/bonding/bond_options.c6
-rw-r--r--drivers/net/can/m_can/m_can.h2
-rw-r--r--drivers/net/can/m_can/m_can_pci.c2
-rw-r--r--drivers/net/can/m_can/m_can_platform.c2
-rw-r--r--drivers/net/can/m_can/tcan4x5x-core.c2
-rw-r--r--drivers/net/can/rcar/rcar_canfd.c41
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c82
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.c2
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c2
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c5
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c165
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c129
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c29
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd.h56
-rw-r--r--drivers/net/can/usb/gs_usb.c5
-rw-r--r--drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c1
-rw-r--r--drivers/net/dsa/lan9303-core.c23
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c3
-rw-r--r--drivers/net/dsa/microchip/ksz_common.h7
-rw-r--r--drivers/net/dsa/microchip/lan937x_main.c32
-rw-r--r--drivers/net/dsa/microchip/lan937x_reg.h5
-rw-r--r--drivers/net/dsa/qca/ar9331.c2
-rw-r--r--drivers/net/dsa/qca/qca8k-8xxx.c2
-rw-r--r--drivers/net/ethernet/broadcom/asp2/bcmasp.c1
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x.h2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c133
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h43
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c6
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c151
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h36
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_ethtool.c23
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.c73
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c132
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c9
-rw-r--r--drivers/net/ethernet/intel/ice/devlink/devlink_port.c59
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adapter.c60
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adminq_cmd.h51
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c99
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.h30
-rw-r--r--drivers/net/ethernet/intel/ice/ice_controlq.c30
-rw-r--r--drivers/net/ethernet/intel/ice/ice_controlq.h15
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c442
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.h29
-rw-r--r--drivers/net/ethernet/intel/ice/ice_hwmon.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c19
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.c131
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.h9
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp_hw.c20
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sriov.c34
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sriov.h8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h8
-rw-r--r--drivers/net/ethernet/lantiq_etop.c4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/mbox.h8
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.c68
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.h1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c24
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c20
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe.h2
-rw-r--r--drivers/net/ethernet/mediatek/mtk_star_emac.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c48
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_env.c57
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_env.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_linecards.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_thermal.c51
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/item.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/minimal.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c15
-rw-r--r--drivers/net/ethernet/micrel/ks8851_common.c10
-rw-r--r--drivers/net/ethernet/micrel/ks8851_spi.c4
-rw-r--r--drivers/net/ethernet/microchip/encx24j600-regmap.c6
-rw-r--r--drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c7
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c4
-rw-r--r--drivers/net/ethernet/renesas/rswitch.c4
-rw-r--r--drivers/net/ethernet/sfc/ef10.c2
-rw-r--r--drivers/net/ethernet/sfc/ef100_ethtool.c4
-rw-r--r--drivers/net/ethernet/sfc/efx.c2
-rw-r--r--drivers/net/ethernet/sfc/efx.h2
-rw-r--r--drivers/net/ethernet/sfc/efx_common.c10
-rw-r--r--drivers/net/ethernet/sfc/ethtool.c4
-rw-r--r--drivers/net/ethernet/sfc/ethtool_common.c168
-rw-r--r--drivers/net/ethernet/sfc/ethtool_common.h12
-rw-r--r--drivers/net/ethernet/sfc/mcdi_filters.c135
-rw-r--r--drivers/net/ethernet/sfc/mcdi_filters.h8
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h28
-rw-r--r--drivers/net/ethernet/sfc/rx_common.c64
-rw-r--r--drivers/net/ethernet/sfc/rx_common.h8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c36
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c9
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c7
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c32
-rw-r--r--drivers/net/ethernet/tehuti/tn40.c64
-rw-r--r--drivers/net/ethernet/tehuti/tn40.h1
-rw-r--r--drivers/net/ethernet/ti/Kconfig1
-rw-r--r--drivers/net/ethernet/ti/icssg/icss_iep.c4
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.c1
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_lib.c10
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_type.h1
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c4
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_main.c4
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c124
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h2
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_main.c9
-rw-r--r--drivers/net/ntb_netdev.c2
-rw-r--r--drivers/net/pcs/Kconfig6
-rw-r--r--drivers/net/pcs/Makefile3
-rw-r--r--drivers/net/pcs/pcs-xpcs-plat.c460
-rw-r--r--drivers/net/pcs/pcs-xpcs.c361
-rw-r--r--drivers/net/pcs/pcs-xpcs.h7
-rw-r--r--drivers/net/phy/aquantia/aquantia.h6
-rw-r--r--drivers/net/phy/aquantia/aquantia_firmware.c4
-rw-r--r--drivers/net/phy/aquantia/aquantia_main.c40
-rw-r--r--drivers/net/phy/microchip.c126
-rw-r--r--drivers/net/phy/microchip_t1.c2
-rw-r--r--drivers/net/phy/phy.c2
-rw-r--r--drivers/net/phy/phy_device.c9
-rw-r--r--drivers/net/phy/phylink.c2
-rw-r--r--drivers/net/phy/realtek.c106
-rw-r--r--drivers/net/ppp/ppp_generic.c15
-rw-r--r--drivers/net/pse-pd/pd692x0.c317
-rw-r--r--drivers/net/pse-pd/pse_core.c172
-rw-r--r--drivers/net/tun.c7
-rw-r--r--drivers/net/wireguard/allowedips.c4
-rw-r--r--drivers/net/wireguard/queueing.h4
-rw-r--r--drivers/net/wireguard/send.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c14
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ops.c8
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rx.c15
-rw-r--r--drivers/net/wireless/microchip/wilc1000/hif.c3
-rw-r--r--drivers/nvme/host/apple.c1
-rw-r--r--drivers/nvme/host/nvme.h2
-rw-r--r--drivers/nvme/target/configfs.c41
-rw-r--r--drivers/nvme/target/fc.c2
-rw-r--r--drivers/of/irq.c18
-rw-r--r--drivers/pci/msi/msi.c10
-rw-r--r--drivers/perf/riscv_pmu.c2
-rw-r--r--drivers/perf/riscv_pmu_sbi.c44
-rw-r--r--drivers/platform/mellanox/nvsw-sn2201.c5
-rw-r--r--drivers/platform/x86/amilo-rfkill.c1
-rw-r--r--drivers/platform/x86/firmware_attributes_class.c1
-rw-r--r--drivers/platform/x86/ibm_rtl.c1
-rw-r--r--drivers/platform/x86/intel/hid.c1
-rw-r--r--drivers/platform/x86/intel/pmc/pltdrv.c1
-rw-r--r--drivers/platform/x86/intel/rst.c1
-rw-r--r--drivers/platform/x86/intel/smartconnect.c1
-rw-r--r--drivers/platform/x86/intel/vbtn.c1
-rw-r--r--drivers/platform/x86/lg-laptop.c89
-rw-r--r--drivers/platform/x86/siemens/simatic-ipc-batt-apollolake.c1
-rw-r--r--drivers/platform/x86/siemens/simatic-ipc-batt-elkhartlake.c1
-rw-r--r--drivers/platform/x86/siemens/simatic-ipc-batt-f7188x.c1
-rw-r--r--drivers/platform/x86/siemens/simatic-ipc-batt.c1
-rw-r--r--drivers/platform/x86/siemens/simatic-ipc.c1
-rw-r--r--drivers/platform/x86/toshiba_acpi.c32
-rw-r--r--drivers/platform/x86/uv_sysfs.c1
-rw-r--r--drivers/platform/x86/wireless-hotkey.c3
-rw-r--r--drivers/platform/x86/xo1-rfkill.c1
-rw-r--r--drivers/reset/Kconfig1
-rw-r--r--drivers/reset/hisilicon/hi6220_reset.c1
-rw-r--r--drivers/s390/block/dasd_eckd.c4
-rw-r--r--drivers/s390/block/dasd_fba.c2
-rw-r--r--drivers/s390/char/sclp.c1
-rw-r--r--drivers/s390/cio/vfio_ccw_cp.c9
-rw-r--r--drivers/s390/virtio/virtio_ccw.c4
-rw-r--r--drivers/scsi/libsas/sas_ata.c6
-rw-r--r--drivers/scsi/libsas/sas_discover.c2
-rw-r--r--drivers/scsi/libsas/sas_internal.h14
-rw-r--r--drivers/scsi/scsi_debug.c6
-rw-r--r--drivers/scsi/sd.c5
-rw-r--r--drivers/soc/litex/Kconfig2
-rw-r--r--drivers/soc/litex/litex_soc_ctrl.c4
-rw-r--r--drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c4
-rw-r--r--drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h5
-rw-r--r--drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c2
-rw-r--r--drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c7
-rw-r--r--drivers/tee/optee/ffa_abi.c12
-rw-r--r--drivers/thermal/gov_power_allocator.c3
-rw-r--r--drivers/thermal/gov_step_wise.c23
-rw-r--r--drivers/thermal/thermal_core.c15
-rw-r--r--drivers/thermal/thermal_core.h6
-rw-r--r--drivers/tty/mxser.c2
-rw-r--r--drivers/tty/serial/8250/8250_core.c5
-rw-r--r--drivers/tty/serial/8250/8250_omap.c25
-rw-r--r--drivers/tty/serial/8250/8250_pci.c13
-rw-r--r--drivers/tty/serial/bcm63xx_uart.c7
-rw-r--r--drivers/tty/serial/imx.c4
-rw-r--r--drivers/tty/serial/mcf.c2
-rw-r--r--drivers/tty/serial/serial_base.h30
-rw-r--r--drivers/tty/serial/serial_base_bus.c129
-rw-r--r--drivers/tty/serial/serial_core.c4
-rw-r--r--drivers/ufs/core/ufs-mcq.c11
-rw-r--r--drivers/ufs/core/ufshcd.c2
-rw-r--r--drivers/usb/atm/cxacru.c14
-rw-r--r--drivers/usb/dwc3/core.c26
-rw-r--r--drivers/usb/gadget/function/f_printer.c40
-rw-r--r--drivers/usb/gadget/function/u_ether.c4
-rw-r--r--drivers/usb/gadget/udc/aspeed_udc.c4
-rw-r--r--drivers/usb/musb/da8xx.c8
-rw-r--r--drivers/usb/typec/ucsi/ucsi_acpi.c61
-rw-r--r--drivers/usb/typec/ucsi/ucsi_glink.c5
-rw-r--r--drivers/usb/typec/ucsi/ucsi_stm32g0.c19
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c2
-rw-r--r--drivers/watchdog/Kconfig1
-rw-r--r--drivers/watchdog/menz69_wdt.c1
-rw-r--r--drivers/watchdog/omap_wdt.c1
-rw-r--r--drivers/watchdog/simatic-ipc-wdt.c1
-rw-r--r--drivers/watchdog/ts4800_wdt.c1
-rw-r--r--drivers/watchdog/twl4030_wdt.c1
-rw-r--r--fs/afs/inode.c4
-rw-r--r--fs/bcachefs/alloc_background.c311
-rw-r--r--fs/bcachefs/alloc_background.h6
-rw-r--r--fs/bcachefs/alloc_foreground.c6
-rw-r--r--fs/bcachefs/backpointers.c70
-rw-r--r--fs/bcachefs/bcachefs.h16
-rw-r--r--fs/bcachefs/bkey.c5
-rw-r--r--fs/bcachefs/bkey.h7
-rw-r--r--fs/bcachefs/btree_gc.c24
-rw-r--r--fs/bcachefs/btree_iter.c19
-rw-r--r--fs/bcachefs/btree_write_buffer.c37
-rw-r--r--fs/bcachefs/btree_write_buffer.h3
-rw-r--r--fs/bcachefs/chardev.c9
-rw-r--r--fs/bcachefs/clock.c7
-rw-r--r--fs/bcachefs/data_update.c44
-rw-r--r--fs/bcachefs/data_update.h5
-rw-r--r--fs/bcachefs/debug.c113
-rw-r--r--fs/bcachefs/eytzinger.h6
-rw-r--r--fs/bcachefs/fs.c11
-rw-r--r--fs/bcachefs/io_read.c4
-rw-r--r--fs/bcachefs/journal.c23
-rw-r--r--fs/bcachefs/journal.h2
-rw-r--r--fs/bcachefs/journal_io.c19
-rw-r--r--fs/bcachefs/journal_seq_blacklist.c2
-rw-r--r--fs/bcachefs/lru.c39
-rw-r--r--fs/bcachefs/lru.h3
-rw-r--r--fs/bcachefs/move.c25
-rw-r--r--fs/bcachefs/sb-errors.c14
-rw-r--r--fs/bcachefs/sb-errors_format.h3
-rw-r--r--fs/bcachefs/seqmutex.h11
-rw-r--r--fs/bcachefs/snapshot.c3
-rw-r--r--fs/bcachefs/super.c17
-rw-r--r--fs/btrfs/block-group.c13
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/free-space-cache.c2
-rw-r--r--fs/btrfs/inode.c2
-rw-r--r--fs/btrfs/qgroup.c14
-rw-r--r--fs/btrfs/ref-verify.c9
-rw-r--r--fs/btrfs/scrub.c24
-rw-r--r--fs/btrfs/space-info.c24
-rw-r--r--fs/btrfs/tree-log.c43
-rw-r--r--fs/cachefiles/cache.c45
-rw-r--r--fs/cachefiles/daemon.c4
-rw-r--r--fs/cachefiles/internal.h3
-rw-r--r--fs/cachefiles/ondemand.c52
-rw-r--r--fs/cachefiles/volume.c1
-rw-r--r--fs/cachefiles/xattr.c5
-rw-r--r--fs/dcache.c43
-rw-r--r--fs/erofs/super.c2
-rw-r--r--fs/erofs/zmap.c2
-rw-r--r--fs/erofs/zutil.c8
-rw-r--r--fs/hfsplus/xattr.c2
-rw-r--r--fs/locks.c11
-rw-r--r--fs/minix/namei.c3
-rw-r--r--fs/namei.c10
-rw-r--r--fs/netfs/buffered_read.c14
-rw-r--r--fs/netfs/buffered_write.c24
-rw-r--r--fs/netfs/direct_read.c2
-rw-r--r--fs/netfs/direct_write.c11
-rw-r--r--fs/netfs/fscache_cache.c4
-rw-r--r--fs/netfs/fscache_cookie.c28
-rw-r--r--fs/netfs/fscache_io.c12
-rw-r--r--fs/netfs/fscache_main.c2
-rw-r--r--fs/netfs/fscache_volume.c18
-rw-r--r--fs/netfs/internal.h44
-rw-r--r--fs/netfs/io.c12
-rw-r--r--fs/netfs/main.c4
-rw-r--r--fs/netfs/misc.c85
-rw-r--r--fs/netfs/write_collect.c16
-rw-r--r--fs/netfs/write_issue.c38
-rw-r--r--fs/nfsd/nfsctl.c2
-rw-r--r--fs/nfsd/nfssvc.c1
-rw-r--r--fs/nilfs2/alloc.c19
-rw-r--r--fs/nilfs2/alloc.h4
-rw-r--r--fs/nilfs2/dat.c2
-rw-r--r--fs/nilfs2/dir.c38
-rw-r--r--fs/nilfs2/ifile.c7
-rw-r--r--fs/nilfs2/nilfs.h10
-rw-r--r--fs/nilfs2/the_nilfs.c6
-rw-r--r--fs/nilfs2/the_nilfs.h2
-rw-r--r--fs/open.c26
-rw-r--r--fs/smb/client/file.c30
-rw-r--r--fs/smb/common/smb2pdu.h34
-rw-r--r--fs/smb/server/smb2pdu.c22
-rw-r--r--fs/super.c11
-rw-r--r--fs/userfaultfd.c7
-rw-r--r--fs/verity/measure.c5
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c31
-rw-r--r--fs/xfs/libxfs/xfs_fs.h2
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c23
-rw-r--r--fs/xfs/xfs_bmap_util.c30
-rw-r--r--fs/xfs/xfs_bmap_util.h2
-rw-r--r--fs/xfs/xfs_icache.c2
-rw-r--r--fs/xfs/xfs_inode.c24
-rw-r--r--fs/xfs/xfs_iomap.c34
-rw-r--r--include/asm-generic/syscalls.h2
-rw-r--r--include/linux/bpf.h21
-rw-r--r--include/linux/bpf_verifier.h12
-rw-r--r--include/linux/btf.h65
-rw-r--r--include/linux/closure.h30
-rw-r--r--include/linux/compat.h2
-rw-r--r--include/linux/ethtool.h152
-rw-r--r--include/linux/filter.h44
-rw-r--r--include/linux/fscache-cache.h6
-rw-r--r--include/linux/fsnotify.h8
-rw-r--r--include/linux/ftrace.h6
-rw-r--r--include/linux/io_uring_types.h1
-rw-r--r--include/linux/libata.h1
-rw-r--r--include/linux/mlx5/mlx5_ifc.h6
-rw-r--r--include/linux/mmzone.h3
-rw-r--r--include/linux/module.h16
-rw-r--r--include/linux/netdevice.h7
-rw-r--r--include/linux/nvme.h6
-rw-r--r--include/linux/page_ref.h57
-rw-r--r--include/linux/pagemap.h11
-rw-r--r--include/linux/pcs/pcs-xpcs.h49
-rw-r--r--include/linux/phy.h2
-rw-r--r--include/linux/printk.h3
-rw-r--r--include/linux/pse-pd/pse.h51
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/serial_core.h21
-rw-r--r--include/linux/sfp.h6
-rw-r--r--include/linux/skbuff.h3
-rw-r--r--include/linux/skbuff_ref.h4
-rw-r--r--include/linux/socket.h2
-rw-r--r--include/linux/stmmac.h2
-rw-r--r--include/linux/swap.h3
-rw-r--r--include/linux/syscalls.h26
-rw-r--r--include/linux/tpm.h81
-rw-r--r--include/net/bluetooth/hci.h11
-rw-r--r--include/net/bluetooth/hci_sync.h2
-rw-r--r--include/net/mac80211.h2
-rw-r--r--include/net/netfilter/nf_flow_table.h15
-rw-r--r--include/net/netfilter/nf_tables.h222
-rw-r--r--include/net/netmem.h15
-rw-r--r--include/net/page_pool/helpers.h91
-rw-r--r--include/net/page_pool/types.h15
-rw-r--r--include/net/psample.h5
-rw-r--r--include/net/sctp/stream_sched.h8
-rw-r--r--include/net/tcx.h13
-rw-r--r--include/net/xdp_sock.h14
-rw-r--r--include/trace/events/fscache.h4
-rw-r--r--include/trace/events/page_pool.h30
-rw-r--r--include/uapi/asm-generic/unistd.h2
-rw-r--r--include/uapi/drm/panthor_drm.h5
-rw-r--r--include/uapi/linux/bpf.h2
-rw-r--r--include/uapi/linux/ethtool.h209
-rw-r--r--include/uapi/linux/ethtool_netlink.h31
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h2
-rw-r--r--include/uapi/linux/openvswitch.h31
-rw-r--r--include/uapi/linux/psample.h11
-rw-r--r--include/uapi/linux/tcp_metrics.h22
-rw-r--r--include/uapi/linux/trace_mmap.h2
-rw-r--r--io_uring/io_uring.c4
-rw-r--r--io_uring/net.c26
-rw-r--r--kernel/bpf/Makefile8
-rw-r--r--kernel/bpf/bpf_lsm.c1
-rw-r--r--kernel/bpf/bpf_struct_ops.c2
-rw-r--r--kernel/bpf/btf.c189
-rw-r--r--kernel/bpf/core.c15
-rw-r--r--kernel/bpf/cpumap.c19
-rw-r--r--kernel/bpf/crypto.c42
-rw-r--r--kernel/bpf/devmap.c43
-rw-r--r--kernel/bpf/helpers.c144
-rw-r--r--kernel/bpf/log.c6
-rw-r--r--kernel/bpf/task_iter.c9
-rw-r--r--kernel/bpf/verifier.c140
-rw-r--r--kernel/cpu.c11
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/kallsyms.c23
-rw-r--r--kernel/module/kallsyms.c25
-rw-r--r--kernel/module/main.c5
-rw-r--r--kernel/printk/Makefile2
-rw-r--r--kernel/printk/conopt.c146
-rw-r--r--kernel/printk/console_cmdline.h6
-rw-r--r--kernel/printk/printk.c23
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/time/hrtimer.c2
-rw-r--r--kernel/trace/bpf_trace.c15
-rw-r--r--kernel/trace/ftrace.c13
-rwxr-xr-xlib/build_OID_registry4
-rw-r--r--lib/closure.c55
-rw-r--r--lib/string_helpers_kunit.c1
-rw-r--r--lib/string_kunit.c1
-rw-r--r--lib/test_bpf.c10
-rw-r--r--mm/damon/core.c23
-rw-r--r--mm/filemap.c20
-rw-r--r--mm/gup.c291
-rw-r--r--mm/huge_memory.c2
-rw-r--r--mm/hugetlb.c70
-rw-r--r--mm/hugetlb_vmemmap.c16
-rw-r--r--mm/internal.h4
-rw-r--r--mm/memcontrol.c11
-rw-r--r--mm/migrate.c13
-rw-r--r--mm/page-writeback.c32
-rw-r--r--mm/readahead.c8
-rw-r--r--mm/shmem.c15
-rw-r--r--mm/vmalloc.c10
-rw-r--r--mm/workingset.c14
-rw-r--r--net/bluetooth/hci_conn.c15
-rw-r--r--net/bluetooth/hci_core.c76
-rw-r--r--net/bluetooth/hci_event.c33
-rw-r--r--net/bluetooth/hci_sync.c13
-rw-r--r--net/bluetooth/iso.c3
-rw-r--r--net/bluetooth/l2cap_core.c3
-rw-r--r--net/bluetooth/l2cap_sock.c14
-rw-r--r--net/bpf/test_run.c33
-rw-r--r--net/core/datagram.c67
-rw-r--r--net/core/dev.c45
-rw-r--r--net/core/filter.c89
-rw-r--r--net/core/page_pool.c309
-rw-r--r--net/core/rtnetlink.c18
-rw-r--r--net/core/skbuff.c14
-rw-r--r--net/core/skmsg.c3
-rw-r--r--net/ethtool/Makefile2
-rw-r--r--net/ethtool/channels.c6
-rw-r--r--net/ethtool/cmis.h124
-rw-r--r--net/ethtool/cmis_cdb.c602
-rw-r--r--net/ethtool/cmis_fw_update.c399
-rw-r--r--net/ethtool/coalesce.c5
-rw-r--r--net/ethtool/common.c51
-rw-r--r--net/ethtool/common.h2
-rw-r--r--net/ethtool/eeprom.c6
-rw-r--r--net/ethtool/ioctl.c152
-rw-r--r--net/ethtool/linkstate.c41
-rw-r--r--net/ethtool/module.c394
-rw-r--r--net/ethtool/module_fw.h75
-rw-r--r--net/ethtool/netlink.c56
-rw-r--r--net/ethtool/netlink.h16
-rw-r--r--net/ethtool/pse-pd.c119
-rw-r--r--net/ethtool/wol.c2
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/tcp_input.c20
-rw-r--r--net/ipv4/tcp_metrics.c1
-rw-r--r--net/ipv4/tcp_timer.c17
-rw-r--r--net/ipv4/udp.c7
-rw-r--r--net/ipv4/udp_offload.c8
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/l2tp/l2tp_core.c12
-rw-r--r--net/mac802154/main.c14
-rw-r--r--net/mac802154/tx.c8
-rw-r--r--net/netfilter/Makefile7
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c4
-rw-r--r--net/netfilter/nf_conncount.c8
-rw-r--r--net/netfilter/nf_flow_table_bpf.c121
-rw-r--r--net/netfilter/nf_flow_table_inet.c2
-rw-r--r--net/netfilter/nf_flow_table_offload.c2
-rw-r--r--net/netfilter/nf_flow_table_xdp.c147
-rw-r--r--net/netfilter/nf_tables_api.c572
-rw-r--r--net/netfilter/nf_tables_offload.c40
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c3
-rw-r--r--net/netfilter/nfnetlink_queue.c2
-rw-r--r--net/netfilter/nft_immediate.c2
-rw-r--r--net/netfilter/xt_recent.c8
-rw-r--r--net/openvswitch/Kconfig1
-rw-r--r--net/openvswitch/actions.c66
-rw-r--r--net/openvswitch/conntrack.c47
-rw-r--r--net/openvswitch/datapath.h3
-rw-r--r--net/openvswitch/flow_netlink.c32
-rw-r--r--net/openvswitch/vport.c1
-rw-r--r--net/psample/psample.c16
-rw-r--r--net/sched/act_ct.c39
-rw-r--r--net/sched/act_sample.c12
-rw-r--r--net/sched/act_skbmod.c2
-rw-r--r--net/sched/sch_ingress.c12
-rw-r--r--net/sctp/socket.c14
-rw-r--r--net/sunrpc/svc.c5
-rw-r--r--net/sunrpc/svc_xprt.c8
-rw-r--r--net/sunrpc/xprtsock.c7
-rw-r--r--net/tls/tls_main.c9
-rw-r--r--net/unix/garbage.c9
-rw-r--r--net/xdp/xsk.c19
-rw-r--r--scripts/Makefile.btf7
-rw-r--r--scripts/Makefile.dtbinst2
-rw-r--r--scripts/Makefile.host2
-rw-r--r--scripts/Makefile.modfinal2
-rw-r--r--scripts/Makefile.package2
-rw-r--r--scripts/gdb/linux/Makefile2
-rw-r--r--scripts/package/kernel.spec8
-rw-r--r--security/integrity/ima/ima_fs.c3
-rw-r--r--tools/bpf/bpftool/btf.c55
-rw-r--r--tools/bpf/bpftool/cgroup.c40
-rw-r--r--tools/bpf/bpftool/gen.c2
-rw-r--r--tools/bpf/bpftool/prog.c4
-rw-r--r--tools/bpf/resolve_btfids/main.c8
-rw-r--r--tools/include/uapi/linux/bpf.h2
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/btf.c662
-rw-r--r--tools/lib/bpf/btf.h36
-rw-r--r--tools/lib/bpf/btf_iter.c177
-rw-r--r--tools/lib/bpf/btf_relocate.c519
-rw-r--r--tools/lib/bpf/libbpf.c4
-rw-r--r--tools/lib/bpf/libbpf.map2
-rw-r--r--tools/lib/bpf/libbpf_internal.h3
-rw-r--r--tools/lib/bpf/linker.c11
-rw-r--r--tools/net/ynl/Makefile6
-rw-r--r--tools/net/ynl/Makefile.deps4
-rw-r--r--tools/net/ynl/lib/Makefile4
-rwxr-xr-xtools/net/ynl/ynl-gen-c.py52
-rw-r--r--tools/perf/util/comm.c29
-rw-r--r--tools/perf/util/dsos.c26
-rw-r--r--tools/power/x86/turbostat/Makefile6
-rw-r--r--tools/power/x86/turbostat/turbostat.c20
-rw-r--r--tools/testing/cxl/test/cxl.c4
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch642
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x4
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_common.h2
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h32
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h2
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c71
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h10
-rw-r--r--tools/testing/selftests/bpf/config16
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c100
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_atomics.c18
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_distill.c552
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_stress.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_call.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_param_nullable.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mptcp.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_links.c61
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_lockup.c91
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tracing_struct.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c168
-rw-r--r--tools/testing/selftests/bpf/progs/arena_atomics.c143
-rw-r--r--tools/testing/selftests/bpf/progs/arena_htab.c17
-rw-r--r--tools/testing/selftests/bpf/progs/arena_list.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c36
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h15
-rw-r--r--tools/testing/selftests/bpf/progs/crypto_bench.c10
-rw-r--r--tools/testing/selftests/bpf/progs/crypto_sanity.c16
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c30
-rw-r--r--tools/testing/selftests/bpf/progs/get_func_ip_test.c7
-rw-r--r--tools/testing/selftests/bpf/progs/ip_check_defrag.c10
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c2
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test.c37
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi_session.c3
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c2
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list.c5
-rw-r--r--tools/testing/selftests/bpf/progs/map_percpu_stats.c2
-rw-r--r--tools/testing/selftests/bpf/progs/nested_trust_common.h2
-rw-r--r--tools/testing/selftests/bpf/progs/netif_receive_skb.c5
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.inc.h5
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_fail.c2
-rw-r--r--tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c4
-rw-r--r--tools/testing/selftests/bpf/progs/setget_sockopt.c5
-rw-r--r--tools/testing/selftests/bpf/progs/skb_pkt_end.c11
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_ma.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_nf.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c43
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop1.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop2.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_prog.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h2
-rw-r--r--tools/testing/selftests/bpf/progs/timer_lockup.c87
-rw-r--r--tools/testing/selftests/bpf/progs/tracing_struct.c54
-rw-r--r--tools/testing/selftests/bpf/progs/tracing_struct_many_args.c95
-rw-r--r--tools/testing/selftests/bpf/progs/user_ringbuf_fail.c22
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena.c1
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena_large.c1
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c236
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c6
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subprog_precision.c2
-rw-r--r--tools/testing/selftests/bpf/progs/wq.c19
-rw-r--r--tools/testing/selftests/bpf/progs/wq_failures.c4
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_flowtable.c144
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c1
-rw-r--r--tools/testing/selftests/bpf/progs/xfrm_info.c1
-rw-r--r--tools/testing/selftests/bpf/test_loader.c115
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c1
-rw-r--r--tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c29
-rw-r--r--tools/testing/selftests/bpf/verifier/precise.c22
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c40
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h2
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_ctx.py437
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh71
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh18
-rw-r--r--tools/testing/selftests/kselftest_harness.h43
-rw-r--r--tools/testing/selftests/net/af_unix/scm_rights.c25
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh83
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre.sh45
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bound.sh23
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh21
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh21
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh21
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh29
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_changes.sh73
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_flower.sh43
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh65
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_lib.sh90
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_neigh.sh39
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_nh.sh35
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan.sh21
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh69
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_lib.sh79
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_vlan.sh43
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh8
-rw-r--r--tools/testing/selftests/net/lib.sh4
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py60
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py34
-rw-r--r--tools/testing/selftests/net/msg_zerocopy.c14
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh37
-rwxr-xr-xtools/testing/selftests/net/openvswitch/openvswitch.sh138
-rw-r--r--tools/testing/selftests/net/openvswitch/ovs-dpctl.py272
-rw-r--r--tools/testing/selftests/net/openvswitch/settings1
-rw-r--r--tools/testing/selftests/net/udpgso.c15
-rwxr-xr-xtools/testing/selftests/net/udpgso.sh43
-rw-r--r--tools/testing/selftests/net/ynl.mk21
-rw-r--r--tools/testing/selftests/powerpc/flags.mk5
-rw-r--r--tools/testing/selftests/resctrl/cat_test.c32
-rw-r--r--tools/testing/selftests/riscv/sigreturn/sigreturn.c2
-rw-r--r--tools/testing/selftests/timens/exec.c6
-rw-r--r--tools/testing/selftests/timens/timer.c2
-rw-r--r--tools/testing/selftests/timens/timerfd.c2
-rw-r--r--tools/testing/selftests/timens/vfork_exec.c4
-rw-r--r--tools/testing/selftests/vDSO/Makefile29
-rw-r--r--tools/testing/selftests/vDSO/parse_vdso.c16
-rw-r--r--tools/testing/selftests/vDSO/vdso_standalone_test_x86.c18
-rw-r--r--tools/testing/selftests/wireguard/qemu/Makefile8
853 files changed, 19210 insertions, 7086 deletions
diff --git a/.mailmap b/.mailmap
index a6c619e22efc..81ac1e17ac3c 100644
--- a/.mailmap
+++ b/.mailmap
@@ -384,6 +384,7 @@ Li Yang <[email protected]> <[email protected]>
Lorenzo Pieralisi <[email protected]> <[email protected]>
diff --git a/CREDITS b/CREDITS
index 1a1a54555e11..f87c0fa62cfc 100644
--- a/CREDITS
+++ b/CREDITS
@@ -3150,9 +3150,11 @@ S: Triftstra=DFe 55
S: 13353 Berlin
S: Germany
-N: Gustavo Pimental
+N: Gustavo Pimentel
D: PCI driver for Synopsys DesignWare
+D: Synopsys DesignWare eDMA driver
+D: Synopsys DesignWare xData traffic generator
N: Emanuel Pirker
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 11e57ba2985c..27ec49af1bf2 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -788,25 +788,6 @@
Documentation/networking/netconsole.rst for an
alternative.
- <DEVNAME>:<n>.<n>[,options]
- Use the specified serial port on the serial core bus.
- The addressing uses DEVNAME of the physical serial port
- device, followed by the serial core controller instance,
- and the serial port instance. The options are the same
- as documented for the ttyS addressing above.
-
- The mapping of the serial ports to the tty instances
- can be viewed with:
-
- $ ls -d /sys/bus/serial-base/devices/*:*.*/tty/*
- /sys/bus/serial-base/devices/00:04:0.0/tty/ttyS0
-
- In the above example, the console can be addressed with
- console=00:04:0.0. Note that a console addressed this
- way will only get added when the related device driver
- is ready. The use of an earlycon parameter in addition to
- the console may be desired for console output early on.
-
uart[8250],io,<addr>[,options]
uart[8250],mmio,<addr>[,options]
uart[8250],mmio16,<addr>[,options]
diff --git a/Documentation/arch/riscv/cmodx.rst b/Documentation/arch/riscv/cmodx.rst
index 1c0ca06b6c97..8c48bcff3df9 100644
--- a/Documentation/arch/riscv/cmodx.rst
+++ b/Documentation/arch/riscv/cmodx.rst
@@ -62,10 +62,10 @@ cmodx.c::
printf("Value before cmodx: %d\n", value);
// Call prctl before first fence.i is called inside modify_instruction
- prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_ON, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS);
+ prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_ON, PR_RISCV_SCOPE_PER_PROCESS);
modify_instruction();
// Call prctl after final fence.i is called in process
- prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_OFF, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS);
+ prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_OFF, PR_RISCV_SCOPE_PER_PROCESS);
value = get_value();
printf("Value after cmodx: %d\n", value);
diff --git a/Documentation/bpf/standardization/instruction-set.rst b/Documentation/bpf/standardization/instruction-set.rst
index 8d19810504b8..ab820d565052 100644
--- a/Documentation/bpf/standardization/instruction-set.rst
+++ b/Documentation/bpf/standardization/instruction-set.rst
@@ -5,12 +5,19 @@
BPF Instruction Set Architecture (ISA)
======================================
-eBPF (which is no longer an acronym for anything), also commonly
+eBPF, also commonly
referred to as BPF, is a technology with origins in the Linux kernel
that can run untrusted programs in a privileged context such as an
operating system kernel. This document specifies the BPF instruction
set architecture (ISA).
+As a historical note, BPF originally stood for Berkeley Packet Filter,
+but now that it can do so much more than packet filtering, the acronym
+no longer makes sense. BPF is now considered a standalone term that
+does not stand for anything. The original BPF is sometimes referred to
+as cBPF (classic BPF) to distinguish it from the now widely deployed
+eBPF (extended BPF).
+
Documentation conventions
=========================
@@ -18,7 +25,7 @@ The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
"SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and
"OPTIONAL" in this document are to be interpreted as described in
BCP 14 `<https://www.rfc-editor.org/info/rfc2119>`_
-`RFC8174 <https://www.rfc-editor.org/info/rfc8174>`_
+`<https://www.rfc-editor.org/info/rfc8174>`_
when, and only when, they appear in all capitals, as shown here.
For brevity and consistency, this document refers to families
@@ -59,24 +66,18 @@ numbers.
Functions
---------
-* htobe16: Takes an unsigned 16-bit number in host-endian format and
- returns the equivalent number as an unsigned 16-bit number in big-endian
- format.
-* htobe32: Takes an unsigned 32-bit number in host-endian format and
- returns the equivalent number as an unsigned 32-bit number in big-endian
- format.
-* htobe64: Takes an unsigned 64-bit number in host-endian format and
- returns the equivalent number as an unsigned 64-bit number in big-endian
- format.
-* htole16: Takes an unsigned 16-bit number in host-endian format and
- returns the equivalent number as an unsigned 16-bit number in little-endian
- format.
-* htole32: Takes an unsigned 32-bit number in host-endian format and
- returns the equivalent number as an unsigned 32-bit number in little-endian
- format.
-* htole64: Takes an unsigned 64-bit number in host-endian format and
- returns the equivalent number as an unsigned 64-bit number in little-endian
- format.
+
+The following byteswap functions are direction-agnostic. That is,
+the same function is used for conversion in either direction discussed
+below.
+
+* be16: Takes an unsigned 16-bit number and converts it between
+ host byte order and big-endian
+ (`IEN137 <https://www.rfc-editor.org/ien/ien137.txt>`_) byte order.
+* be32: Takes an unsigned 32-bit number and converts it between
+ host byte order and big-endian byte order.
+* be64: Takes an unsigned 64-bit number and converts it between
+ host byte order and big-endian byte order.
* bswap16: Takes an unsigned 16-bit number in either big- or little-endian
format and returns the equivalent number with the same bit width but
opposite endianness.
@@ -86,7 +87,12 @@ Functions
* bswap64: Takes an unsigned 64-bit number in either big- or little-endian
format and returns the equivalent number with the same bit width but
opposite endianness.
-
+* le16: Takes an unsigned 16-bit number and converts it between
+ host byte order and little-endian byte order.
+* le32: Takes an unsigned 32-bit number and converts it between
+ host byte order and little-endian byte order.
+* le64: Takes an unsigned 64-bit number and converts it between
+ host byte order and little-endian byte order.
Definitions
-----------
@@ -437,8 +443,8 @@ and MUST be set to 0.
===== ======== ===== =================================================
class source value description
===== ======== ===== =================================================
- ALU TO_LE 0 convert between host byte order and little endian
- ALU TO_BE 1 convert between host byte order and big endian
+ ALU LE 0 convert between host byte order and little endian
+ ALU BE 1 convert between host byte order and big endian
ALU64 Reserved 0 do byte swap unconditionally
===== ======== ===== =================================================
@@ -449,19 +455,19 @@ conformance group.
Examples:
-``{END, TO_LE, ALU}`` with 'imm' = 16/32/64 means::
+``{END, LE, ALU}`` with 'imm' = 16/32/64 means::
- dst = htole16(dst)
- dst = htole32(dst)
- dst = htole64(dst)
+ dst = le16(dst)
+ dst = le32(dst)
+ dst = le64(dst)
-``{END, TO_BE, ALU}`` with 'imm' = 16/32/64 means::
+``{END, BE, ALU}`` with 'imm' = 16/32/64 means::
- dst = htobe16(dst)
- dst = htobe32(dst)
- dst = htobe64(dst)
+ dst = be16(dst)
+ dst = be32(dst)
+ dst = be64(dst)
-``{END, TO_LE, ALU64}`` with 'imm' = 16/32/64 means::
+``{END, TO, ALU64}`` with 'imm' = 16/32/64 means::
dst = bswap16(dst)
dst = bswap32(dst)
@@ -541,13 +547,17 @@ Helper functions are a concept whereby BPF programs can call into a
set of function calls exposed by the underlying platform.
Historically, each helper function was identified by a static ID
-encoded in the 'imm' field. The available helper functions may differ
-for each program type, but static IDs are unique across all program types.
+encoded in the 'imm' field. Further documentation of helper functions
+is outside the scope of this document and standardization is left for
+future work, but use is widely deployed and more information can be
+found in platform-specific documentation (e.g., Linux kernel documentation).
Platforms that support the BPF Type Format (BTF) support identifying
a helper function by a BTF ID encoded in the 'imm' field, where the BTF ID
identifies the helper name and type. Further documentation of BTF
-is outside the scope of this document and is left for future work.
+is outside the scope of this document and standardization is left for
+future work, but use is widely deployed and more information can be
+found in platform-specific documentation (e.g., Linux kernel documentation).
Program-local functions
~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/Documentation/devicetree/bindings/net/fsl,enetc-ierb.yaml b/Documentation/devicetree/bindings/net/fsl,enetc-ierb.yaml
new file mode 100644
index 000000000000..c8a654310b90
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/fsl,enetc-ierb.yaml
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/fsl,enetc-ierb.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Integrated Endpoint Register Block
+
+description:
+ The fsl_enetc driver can probe on the Integrated Endpoint Register Block,
+ which preconfigures the FIFO limits for the ENETC ports.
+
+maintainers:
+ - Frank Li <[email protected]>
+ - Vladimir Oltean <[email protected]>
+ - Wei Fang <[email protected]>
+ - Claudiu Manoil <[email protected]>
+
+properties:
+ compatible:
+ enum:
+ - fsl,ls1028a-enetc-ierb
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ endpoint-config@f0800000 {
+ compatible = "fsl,ls1028a-enetc-ierb";
+ reg = <0xf0800000 0x10000>;
+ };
diff --git a/Documentation/devicetree/bindings/net/fsl,enetc-mdio.yaml b/Documentation/devicetree/bindings/net/fsl,enetc-mdio.yaml
new file mode 100644
index 000000000000..c1dd6aa04321
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/fsl,enetc-mdio.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/fsl,enetc-mdio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ENETC external MDIO PCIe endpoint device
+
+description:
+ NETC provides an external master MDIO interface (EMDIO) for managing external
+ devices (PHYs). EMDIO supports both Clause 22 and 45 protocols. And the EMDIO
+ provides a means for different software modules to share a single set of MDIO
+ signals to access their PHYs.
+
+maintainers:
+ - Frank Li <[email protected]>
+ - Vladimir Oltean <[email protected]>
+ - Wei Fang <[email protected]>
+ - Claudiu Manoil <[email protected]>
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - pci1957,ee01
+ - const: fsl,enetc-mdio
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+allOf:
+ - $ref: mdio.yaml
+ - $ref: /schemas/pci/pci-device.yaml
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ pcie{
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+ mdio@0,3 {
+ compatible = "pci1957,ee01", "fsl,enetc-mdio";
+ reg = <0x000300 0 0 0 0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ethernet-phy@2 {
+ reg = <0x2>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/fsl,enetc.yaml b/Documentation/devicetree/bindings/net/fsl,enetc.yaml
new file mode 100644
index 000000000000..e152c93998fe
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/fsl,enetc.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/fsl,enetc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: The NIC functionality of NXP NETC
+
+description:
+ The NIC functionality in NETC is known as EtherNET Controller (ENETC). ENETC
+ supports virtualization/isolation based on PCIe Single Root IO Virtualization
+ (SR-IOV), advanced QoS with 8 traffic classes and 4 drop resilience levels,
+ and a full range of TSN standards and NIC offload capabilities
+
+maintainers:
+ - Frank Li <[email protected]>
+ - Vladimir Oltean <[email protected]>
+ - Wei Fang <[email protected]>
+ - Claudiu Manoil <[email protected]>
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - pci1957,e100
+ - const: fsl,enetc
+
+ reg:
+ maxItems: 1
+
+ mdio:
+ $ref: mdio.yaml
+ unevaluatedProperties: false
+ description: Optional child node for ENETC instance, otherwise use NETC EMDIO.
+
+required:
+ - compatible
+ - reg
+
+allOf:
+ - $ref: /schemas/pci/pci-device.yaml
+ - $ref: ethernet-controller.yaml
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ pcie {
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+ ethernet@0,0 {
+ compatible = "pci1957,e100", "fsl,enetc";
+ reg = <0x000000 0 0 0 0>;
+ phy-handle = <&sgmii_phy0>;
+ phy-connection-type = "sgmii";
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy@2 {
+ reg = <0x2>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/fsl,fman.yaml b/Documentation/devicetree/bindings/net/fsl,fman.yaml
index 7908f67413de..9bbf39ef31a2 100644
--- a/Documentation/devicetree/bindings/net/fsl,fman.yaml
+++ b/Documentation/devicetree/bindings/net/fsl,fman.yaml
@@ -78,6 +78,12 @@ properties:
- description: The first element is associated with the event interrupts.
- description: the second element is associated with the error interrupts.
+ dma-coherent: true
+
+ ptimer-handle:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: see ptp/fsl,ptp.yaml
+
fsl,qman-channel-range:
$ref: /schemas/types.yaml#/definitions/uint32-array
description:
diff --git a/Documentation/devicetree/bindings/net/fsl-enetc.txt b/Documentation/devicetree/bindings/net/fsl-enetc.txt
deleted file mode 100644
index 9b9a3f197e2d..000000000000
--- a/Documentation/devicetree/bindings/net/fsl-enetc.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-* ENETC ethernet device tree bindings
-
-Depending on board design and ENETC port type (internal or
-external) there are two supported link modes specified by
-below device tree bindings.
-
-Required properties:
-
-- reg : Specifies PCIe Device Number and Function
- Number of the ENETC endpoint device, according
- to parent node bindings.
-- compatible : Should be "fsl,enetc".
-
-1. The ENETC external port is connected to a MDIO configurable phy
-
-1.1. Using the local ENETC Port MDIO interface
-
-In this case, the ENETC node should include a "mdio" sub-node
-that in turn should contain the "ethernet-phy" node describing the
-external phy. Below properties are required, their bindings
-already defined in Documentation/devicetree/bindings/net/ethernet.txt or
-Documentation/devicetree/bindings/net/phy.txt.
-
-Required:
-
-- phy-handle : Phandle to a PHY on the MDIO bus.
- Defined in ethernet.txt.
-
-- phy-connection-type : Defined in ethernet.txt.
-
-- mdio : "mdio" node, defined in mdio.txt.
-
-- ethernet-phy : "ethernet-phy" node, defined in phy.txt.
-
-Example:
-
- ethernet@0,0 {
- compatible = "fsl,enetc";
- reg = <0x000000 0 0 0 0>;
- phy-handle = <&sgmii_phy0>;
- phy-connection-type = "sgmii";
-
- mdio {
- #address-cells = <1>;
- #size-cells = <0>;
- sgmii_phy0: ethernet-phy@2 {
- reg = <0x2>;
- };
- };
- };
-
-1.2. Using the central MDIO PCIe endpoint device
-
-In this case, the mdio node should be defined as another PCIe
-endpoint node, at the same level with the ENETC port nodes.
-
-Required properties:
-
-- reg : Specifies PCIe Device Number and Function
- Number of the ENETC endpoint device, according
- to parent node bindings.
-- compatible : Should be "fsl,enetc-mdio".
-
-The remaining required mdio bus properties are standard, their bindings
-already defined in Documentation/devicetree/bindings/net/mdio.txt.
-
-Example:
-
- ethernet@0,0 {
- compatible = "fsl,enetc";
- reg = <0x000000 0 0 0 0>;
- phy-handle = <&sgmii_phy0>;
- phy-connection-type = "sgmii";
- };
-
- mdio@0,3 {
- compatible = "fsl,enetc-mdio";
- reg = <0x000300 0 0 0 0>;
- #address-cells = <1>;
- #size-cells = <0>;
- sgmii_phy0: ethernet-phy@2 {
- reg = <0x2>;
- };
- };
-
-2. The ENETC port is an internal port or has a fixed-link external
-connection
-
-In this case, the ENETC port node defines a fixed link connection,
-as specified by Documentation/devicetree/bindings/net/fixed-link.txt.
-
-Required:
-
-- fixed-link : "fixed-link" node, defined in "fixed-link.txt".
-
-Example:
- ethernet@0,2 {
- compatible = "fsl,enetc";
- reg = <0x000200 0 0 0 0>;
- fixed-link {
- speed = <1000>;
- full-duplex;
- };
- };
-
-* Integrated Endpoint Register Block bindings
-
-Optionally, the fsl_enetc driver can probe on the Integrated Endpoint Register
-Block, which preconfigures the FIFO limits for the ENETC ports. This is a node
-with the following properties:
-
-- reg : Specifies the address in the SoC memory space.
-- compatible : Must be "fsl,ls1028a-enetc-ierb".
-
-Example:
- ierb@1f0800000 {
- compatible = "fsl,ls1028a-enetc-ierb";
- reg = <0x01 0xf0800000 0x0 0x10000>;
- };
diff --git a/Documentation/devicetree/bindings/net/mediatek,net.yaml b/Documentation/devicetree/bindings/net/mediatek,net.yaml
index 3202dc7967c5..686b5c2fae40 100644
--- a/Documentation/devicetree/bindings/net/mediatek,net.yaml
+++ b/Documentation/devicetree/bindings/net/mediatek,net.yaml
@@ -68,6 +68,17 @@ properties:
Phandle to the syscon node that handles the path from GMAC to
PHY variants.
+ mediatek,pcie-mirror:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the mediatek pcie-mirror controller.
+
+ mediatek,pctl:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the syscon node that handles the ports slew rate and
+ driver current.
+
mediatek,sgmiisys:
$ref: /schemas/types.yaml#/definitions/phandle-array
minItems: 1
@@ -131,15 +142,12 @@ allOf:
mediatek,infracfg: false
- mediatek,pctl:
- $ref: /schemas/types.yaml#/definitions/phandle
- description:
- Phandle to the syscon node that handles the ports slew rate and
- driver current.
-
mediatek,wed: false
mediatek,wed-pcie: false
+ else:
+ properties:
+ mediatek,pctl: false
- if:
properties:
@@ -201,12 +209,10 @@ allOf:
minItems: 1
maxItems: 1
- mediatek,pcie-mirror:
- $ref: /schemas/types.yaml#/definitions/phandle
- description:
- Phandle to the mediatek pcie-mirror controller.
-
mediatek,wed-pcie: false
+ else:
+ properties:
+ mediatek,pcie-mirror: false
- if:
properties:
diff --git a/Documentation/devicetree/bindings/net/pcs/snps,dw-xpcs.yaml b/Documentation/devicetree/bindings/net/pcs/snps,dw-xpcs.yaml
new file mode 100644
index 000000000000..e77eec9ac9ee
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/pcs/snps,dw-xpcs.yaml
@@ -0,0 +1,136 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/pcs/snps,dw-xpcs.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synopsys DesignWare Ethernet PCS
+
+maintainers:
+ - Serge Semin <[email protected]>
+
+description:
+ Synopsys DesignWare Ethernet Physical Coding Sublayer provides an interface
+ between Media Access Control and Physical Medium Attachment Sublayer through
+ the Media Independent Interface (XGMII, USXGMII, XLGMII, GMII, etc)
+ controlled by means of the IEEE std. Clause 45 registers set. The PCS can be
+ optionally synthesized with a vendor-specific interface connected to
+ Synopsys PMA (also called DesignWare Consumer/Enterprise PHY) although in
+ general it can be used to communicate with any compatible PHY.
+
+ The PCS CSRs can be accessible either over the Ethernet MDIO bus or directly
+ by means of the APB3/MCI interfaces. In the later case the XPCS can be mapped
+ right to the system IO memory space.
+
+properties:
+ compatible:
+ oneOf:
+ - description: Synopsys DesignWare XPCS with none or unknown PMA
+ const: snps,dw-xpcs
+ - description: Synopsys DesignWare XPCS with Consumer Gen1 3G PMA
+ const: snps,dw-xpcs-gen1-3g
+ - description: Synopsys DesignWare XPCS with Consumer Gen2 3G PMA
+ const: snps,dw-xpcs-gen2-3g
+ - description: Synopsys DesignWare XPCS with Consumer Gen2 6G PMA
+ const: snps,dw-xpcs-gen2-6g
+ - description: Synopsys DesignWare XPCS with Consumer Gen4 3G PMA
+ const: snps,dw-xpcs-gen4-3g
+ - description: Synopsys DesignWare XPCS with Consumer Gen4 6G PMA
+ const: snps,dw-xpcs-gen4-6g
+ - description: Synopsys DesignWare XPCS with Consumer Gen5 10G PMA
+ const: snps,dw-xpcs-gen5-10g
+ - description: Synopsys DesignWare XPCS with Consumer Gen5 12G PMA
+ const: snps,dw-xpcs-gen5-12g
+
+ reg:
+ items:
+ - description:
+ In case of the MDIO management interface this just a 5-bits ID
+ of the MDIO bus device. If DW XPCS CSRs space is accessed over the
+ MCI or APB3 management interfaces, then the space mapping can be
+ either 'direct' or 'indirect'. In the former case all Clause 45
+ registers are contiguously mapped within the address space
+ MMD '[20:16]', Reg '[15:0]'. In the later case the space is divided
+ to the multiple 256 register sets. There is a special viewport CSR
+ which is responsible for the set selection. The upper part of
+ the CSR address MMD+REG[20:8] is supposed to be written in there
+ so the corresponding subset would be mapped to the lowest 255 CSRs.
+
+ reg-names:
+ items:
+ - enum: [ direct, indirect ]
+
+ reg-io-width:
+ description:
+ The way the CSRs are mapped to the memory is platform depended. Since
+ each Clause 45 CSR is of 16-bits wide the access instructions must be
+ two bytes aligned at least.
+ default: 2
+ enum: [ 2, 4 ]
+
+ interrupts:
+ description:
+ System interface interrupt output (sbd_intr_o) indicating Clause 73/37
+ auto-negotiation events':' Page received, AN is completed or incompatible
+ link partner.
+ maxItems: 1
+
+ clocks:
+ description:
+ The MCI and APB3 interfaces are supposed to be equipped with a clock
+ source connected to the clk_csr_i line.
+
+ PCS/PMA layer can be clocked by an internal reference clock source
+ (phyN_core_refclk) or by an externally connected (phyN_pad_refclk) clock
+ generator. Both clocks can be supplied at a time.
+ minItems: 1
+ maxItems: 3
+
+ clock-names:
+ oneOf:
+ - minItems: 1
+ items: # MDIO
+ - enum: [core, pad]
+ - const: pad
+ - minItems: 1
+ items: # MCI or APB
+ - const: csr
+ - enum: [core, pad]
+ - const: pad
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ ethernet-pcs@1f05d000 {
+ compatible = "snps,dw-xpcs";
+ reg = <0x1f05d000 0x1000>;
+ reg-names = "indirect";
+
+ reg-io-width = <4>;
+
+ interrupts = <79 IRQ_TYPE_LEVEL_HIGH>;
+
+ clocks = <&ccu_pclk>, <&ccu_core>, <&ccu_pad>;
+ clock-names = "csr", "core", "pad";
+ };
+ - |
+ mdio-bus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ethernet-pcs@0 {
+ compatible = "snps,dw-xpcs";
+ reg = <0>;
+
+ clocks = <&ccu_core>, <&ccu_pad>;
+ clock-names = "core", "pad";
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml b/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml
index bb94a2388520..d248a08a2136 100644
--- a/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml
+++ b/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml
@@ -14,10 +14,32 @@ maintainers:
description:
Bindings for Realtek RTL82xx PHYs
-allOf:
- - $ref: ethernet-phy.yaml#
-
properties:
+ compatible:
+ enum:
+ - ethernet-phy-id001c.c800
+ - ethernet-phy-id001c.c816
+ - ethernet-phy-id001c.c838
+ - ethernet-phy-id001c.c840
+ - ethernet-phy-id001c.c848
+ - ethernet-phy-id001c.c849
+ - ethernet-phy-id001c.c84a
+ - ethernet-phy-id001c.c862
+ - ethernet-phy-id001c.c878
+ - ethernet-phy-id001c.c880
+ - ethernet-phy-id001c.c910
+ - ethernet-phy-id001c.c912
+ - ethernet-phy-id001c.c913
+ - ethernet-phy-id001c.c914
+ - ethernet-phy-id001c.c915
+ - ethernet-phy-id001c.c916
+ - ethernet-phy-id001c.c942
+ - ethernet-phy-id001c.c961
+ - ethernet-phy-id001c.cad0
+ - ethernet-phy-id001c.cb00
+
+ leds: true
+
realtek,clkout-disable:
type: boolean
description:
@@ -31,6 +53,18 @@ properties:
unevaluatedProperties: false
+allOf:
+ - $ref: ethernet-phy.yaml#
+ - if:
+ not:
+ properties:
+ compatible:
+ contains:
+ const: ethernet-phy-id001c.c916
+ then:
+ properties:
+ leds: false
+
examples:
- |
mdio {
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 3bab4e1f3fbf..3eb65e63fdae 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -436,6 +436,32 @@ properties:
description:
Use Address-Aligned Beats
+ snps,pbl:
+ description:
+ Programmable Burst Length (tx and rx)
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [1, 2, 4, 8, 16, 32]
+
+ snps,txpbl:
+ description:
+ Tx Programmable Burst Length. If set, DMA tx will use this
+ value rather than snps,pbl.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [1, 2, 4, 8, 16, 32]
+
+ snps,rxpbl:
+ description:
+ Rx Programmable Burst Length. If set, DMA rx will use this
+ value rather than snps,pbl.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [1, 2, 4, 8, 16, 32]
+
+ snps,no-pbl-x8:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ Don\'t multiply the pbl/txpbl/rxpbl values by 8. For core
+ rev < 3.50, don\'t multiply the values by 4.
+
snps,fixed-burst:
$ref: /schemas/types.yaml#/definitions/flag
description:
@@ -486,6 +512,12 @@ properties:
description:
Frequency division factor for MDC clock.
+ snps,tso:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ Enables the TSO feature otherwise it will be managed by MAC HW capability
+ register.
+
mdio:
$ref: mdio.yaml#
unevaluatedProperties: false
@@ -569,95 +601,38 @@ allOf:
- if:
properties:
compatible:
- contains:
- enum:
- - allwinner,sun7i-a20-gmac
- - allwinner,sun8i-a83t-emac
- - allwinner,sun8i-h3-emac
- - allwinner,sun8i-r40-gmac
- - allwinner,sun8i-v3s-emac
- - allwinner,sun50i-a64-emac
- - ingenic,jz4775-mac
- - ingenic,x1000-mac
- - ingenic,x1600-mac
- - ingenic,x1830-mac
- - ingenic,x2000-mac
- - qcom,sa8775p-ethqos
- - qcom,sc8280xp-ethqos
- - snps,dwmac-3.50a
- - snps,dwmac-4.10a
- - snps,dwmac-4.20a
- - snps,dwmac-5.20
- - snps,dwxgmac
- - snps,dwxgmac-2.10
- - st,spear600-gmac
-
- then:
- properties:
- snps,pbl:
- description:
- Programmable Burst Length (tx and rx)
- $ref: /schemas/types.yaml#/definitions/uint32
- enum: [1, 2, 4, 8, 16, 32]
-
- snps,txpbl:
- description:
- Tx Programmable Burst Length. If set, DMA tx will use this
- value rather than snps,pbl.
- $ref: /schemas/types.yaml#/definitions/uint32
- enum: [1, 2, 4, 8, 16, 32]
-
- snps,rxpbl:
- description:
- Rx Programmable Burst Length. If set, DMA rx will use this
- value rather than snps,pbl.
- $ref: /schemas/types.yaml#/definitions/uint32
- enum: [1, 2, 4, 8, 16, 32]
-
- snps,no-pbl-x8:
- $ref: /schemas/types.yaml#/definitions/flag
- description:
- Don\'t multiply the pbl/txpbl/rxpbl values by 8. For core
- rev < 3.50, don\'t multiply the values by 4.
-
- - if:
- properties:
- compatible:
- contains:
- enum:
- - allwinner,sun7i-a20-gmac
- - allwinner,sun8i-a83t-emac
- - allwinner,sun8i-h3-emac
- - allwinner,sun8i-r40-gmac
- - allwinner,sun8i-v3s-emac
- - allwinner,sun50i-a64-emac
- - loongson,ls2k-dwmac
- - loongson,ls7a-dwmac
- - ingenic,jz4775-mac
- - ingenic,x1000-mac
- - ingenic,x1600-mac
- - ingenic,x1830-mac
- - ingenic,x2000-mac
- - qcom,qcs404-ethqos
- - qcom,sa8775p-ethqos
- - qcom,sc8280xp-ethqos
- - qcom,sm8150-ethqos
- - snps,dwmac-4.00
- - snps,dwmac-4.10a
- - snps,dwmac-4.20a
- - snps,dwmac-5.10a
- - snps,dwmac-5.20
- - snps,dwxgmac
- - snps,dwxgmac-2.10
- - st,spear600-gmac
+ not:
+ contains:
+ enum:
+ - allwinner,sun7i-a20-gmac
+ - allwinner,sun8i-a83t-emac
+ - allwinner,sun8i-h3-emac
+ - allwinner,sun8i-r40-gmac
+ - allwinner,sun8i-v3s-emac
+ - allwinner,sun50i-a64-emac
+ - loongson,ls2k-dwmac
+ - loongson,ls7a-dwmac
+ - ingenic,jz4775-mac
+ - ingenic,x1000-mac
+ - ingenic,x1600-mac
+ - ingenic,x1830-mac
+ - ingenic,x2000-mac
+ - qcom,qcs404-ethqos
+ - qcom,sa8775p-ethqos
+ - qcom,sc8280xp-ethqos
+ - qcom,sm8150-ethqos
+ - snps,dwmac-4.00
+ - snps,dwmac-4.10a
+ - snps,dwmac-4.20a
+ - snps,dwmac-5.10a
+ - snps,dwmac-5.20
+ - snps,dwxgmac
+ - snps,dwxgmac-2.10
+ - st,spear600-gmac
then:
properties:
- snps,tso:
- $ref: /schemas/types.yaml#/definitions/flag
- description:
- Enables the TSO feature otherwise it will be managed by
- MAC HW capability register.
+ snps,tso: false
additionalProperties: true
diff --git a/Documentation/driver-api/cxl/memory-devices.rst b/Documentation/driver-api/cxl/memory-devices.rst
index 5149ecdc53c7..d732c42526df 100644
--- a/Documentation/driver-api/cxl/memory-devices.rst
+++ b/Documentation/driver-api/cxl/memory-devices.rst
@@ -328,6 +328,12 @@ CXL Memory Device
.. kernel-doc:: drivers/cxl/mem.c
:doc: cxl mem
+.. kernel-doc:: drivers/cxl/cxlmem.h
+ :internal:
+
+.. kernel-doc:: drivers/cxl/core/memdev.c
+ :identifiers:
+
CXL Port
--------
.. kernel-doc:: drivers/cxl/port.c
@@ -341,6 +347,15 @@ CXL Core
.. kernel-doc:: drivers/cxl/cxl.h
:internal:
+.. kernel-doc:: drivers/cxl/core/hdm.c
+ :doc: cxl core hdm
+
+.. kernel-doc:: drivers/cxl/core/hdm.c
+ :identifiers:
+
+.. kernel-doc:: drivers/cxl/core/cdat.c
+ :identifiers:
+
.. kernel-doc:: drivers/cxl/core/port.c
:doc: cxl core
diff --git a/Documentation/kbuild/modules.rst b/Documentation/kbuild/modules.rst
index a1f3eb7a43e2..131863142cbb 100644
--- a/Documentation/kbuild/modules.rst
+++ b/Documentation/kbuild/modules.rst
@@ -128,7 +128,7 @@ executed to make module versioning work.
modules_install
Install the external module(s). The default location is
- /lib/modules/<kernel_release>/extra/, but a prefix may
+ /lib/modules/<kernel_release>/updates/, but a prefix may
be added with INSTALL_MOD_PATH (discussed in section 5).
clean
@@ -417,7 +417,7 @@ directory:
And external modules are installed in:
- /lib/modules/$(KERNELRELEASE)/extra/
+ /lib/modules/$(KERNELRELEASE)/updates/
5.1 INSTALL_MOD_PATH
--------------------
@@ -438,10 +438,10 @@ And external modules are installed in:
-------------------
External modules are by default installed to a directory under
- /lib/modules/$(KERNELRELEASE)/extra/, but you may wish to
+ /lib/modules/$(KERNELRELEASE)/updates/, but you may wish to
locate modules for a specific functionality in a separate
directory. For this purpose, use INSTALL_MOD_DIR to specify an
- alternative name to "extra."::
+ alternative name to "updates."::
$ make INSTALL_MOD_DIR=gandalf -C $KDIR \
M=$PWD modules_install
diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 683f5c3f30ad..495e35fcfb21 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -20,6 +20,25 @@ definitions:
name: header-flags
type: flags
entries: [ compact-bitsets, omit-reply, stats ]
+ -
+ name: module-fw-flash-status
+ type: enum
+ entries: [ started, in_progress, completed, error ]
+ -
+ name: c33-pse-ext-state
+ enum-name:
+ type: enum
+ name-prefix: ethtool-c33-pse-ext-state-
+ entries:
+ - none
+ - error-condition
+ - mr-mps-valid
+ - mr-pse-enable
+ - option-detect-ted
+ - option-vport-lim
+ - ovld-detected
+ - power-not-available
+ - short-detected
attribute-sets:
-
@@ -921,6 +940,15 @@ attribute-sets:
name: power-mode
type: u8
-
+ name: c33-pse-pw-limit
+ attributes:
+ -
+ name: min
+ type: u32
+ -
+ name: max
+ type: u32
+ -
name: pse
attributes:
-
@@ -951,6 +979,33 @@ attribute-sets:
name: c33-pse-pw-d-status
type: u32
name-prefix: ethtool-a-
+ -
+ name: c33-pse-pw-class
+ type: u32
+ name-prefix: ethtool-a-
+ -
+ name: c33-pse-actual-pw
+ type: u32
+ name-prefix: ethtool-a-
+ -
+ name: c33-pse-ext-state
+ type: u32
+ name-prefix: ethtool-a-
+ enum: c33-pse-ext-state
+ -
+ name: c33-pse-ext-substate
+ type: u32
+ name-prefix: ethtool-a-
+ -
+ name: c33-pse-avail-pw-limit
+ type: u32
+ name-prefix: ethtool-a-
+ -
+ name: c33-pse-pw-limit-ranges
+ name-prefix: ethtool-a-
+ type: nest
+ multi-attr: true
+ nested-attributes: c33-pse-pw-limit
-
name: rss
attributes:
@@ -1004,6 +1059,32 @@ attribute-sets:
-
name: burst-tmr
type: u32
+ -
+ name: module-fw-flash
+ attributes:
+ -
+ name: header
+ type: nest
+ nested-attributes: header
+ -
+ name: file-name
+ type: string
+ -
+ name: password
+ type: u32
+ -
+ name: status
+ type: u32
+ enum: module-fw-flash-status
+ -
+ name: status-msg
+ type: string
+ -
+ name: done
+ type: uint
+ -
+ name: total
+ type: uint
operations:
enum-model: directional
@@ -1642,6 +1723,12 @@ operations:
- c33-pse-admin-state
- c33-pse-admin-control
- c33-pse-pw-d-status
+ - c33-pse-pw-class
+ - c33-pse-actual-pw
+ - c33-pse-ext-state
+ - c33-pse-ext-substate
+ - c33-pse-avail-pw-limit
+ - c33-pse-pw-limit-ranges
dump: *pse-get-op
-
name: pse-set
@@ -1655,6 +1742,7 @@ operations:
- header
- podl-pse-admin-control
- c33-pse-admin-control
+ - c33-pse-avail-pw-limit
-
name: rss-get
doc: Get RSS params.
@@ -1764,3 +1852,28 @@ operations:
name: mm-ntf
doc: Notification for change in MAC Merge configuration.
notify: mm-get
+ -
+ name: module-fw-flash-act
+ doc: Flash transceiver module firmware.
+
+ attribute-set: module-fw-flash
+
+ do:
+ request:
+ attributes:
+ - header
+ - file-name
+ - password
+ -
+ name: module-fw-flash-ntf
+ doc: Notification for firmware flashing progress and status.
+
+ attribute-set: module-fw-flash
+
+ event:
+ attributes:
+ - header
+ - status
+ - status-msg
+ - done
+ - total
diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml
index 4fdfc6b5cae9..46f5d1cd8a5f 100644
--- a/Documentation/netlink/specs/ovs_flow.yaml
+++ b/Documentation/netlink/specs/ovs_flow.yaml
@@ -727,6 +727,12 @@ attribute-sets:
name: dec-ttl
type: nest
nested-attributes: dec-ttl-attrs
+ -
+ name: psample
+ type: nest
+ nested-attributes: psample-attrs
+ doc: |
+ Sends a packet sample to psample for external observation.
-
name: tunnel-key-attrs
enum-name: ovs-tunnel-key-attr
@@ -938,6 +944,17 @@ attribute-sets:
-
name: gbp
type: u32
+ -
+ name: psample-attrs
+ enum-name: ovs-psample-attr
+ name-prefix: ovs-psample-attr-
+ attributes:
+ -
+ name: group
+ type: u32
+ -
+ name: cookie
+ type: binary
operations:
name-prefix: ovs-flow-cmd-
diff --git a/Documentation/netlink/specs/tcp_metrics.yaml b/Documentation/netlink/specs/tcp_metrics.yaml
new file mode 100644
index 000000000000..1bd94f43e526
--- /dev/null
+++ b/Documentation/netlink/specs/tcp_metrics.yaml
@@ -0,0 +1,169 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+
+name: tcp_metrics
+
+protocol: genetlink-legacy
+
+doc: |
+ Management interface for TCP metrics.
+
+c-family-name: tcp-metrics-genl-name
+c-version-name: tcp-metrics-genl-version
+max-by-define: true
+kernel-policy: global
+
+definitions:
+ -
+ name: tcp-fastopen-cookie-max
+ type: const
+ value: 16
+
+attribute-sets:
+ -
+ name: tcp-metrics
+ name-prefix: tcp-metrics-attr-
+ attributes:
+ -
+ name: addr-ipv4
+ type: u32
+ byte-order: big-endian
+ display-hint: ipv4
+ -
+ name: addr-ipv6
+ type: binary
+ checks:
+ min-len: 16
+ byte-order: big-endian
+ display-hint: ipv6
+ -
+ name: age
+ type: u64
+ -
+ name: tw-tsval
+ type: u32
+ doc: unused
+ -
+ name: tw-ts-stamp
+ type: s32
+ doc: unused
+ -
+ name: vals
+ type: nest
+ nested-attributes: metrics
+ -
+ name: fopen-mss
+ type: u16
+ -
+ name: fopen-syn-drops
+ type: u16
+ -
+ name: fopen-syn-drop-ts
+ type: u64
+ -
+ name: fopen-cookie
+ type: binary
+ checks:
+ min-len: tcp-fastopen-cookie-max
+ -
+ name: saddr-ipv4
+ type: u32
+ byte-order: big-endian
+ display-hint: ipv4
+ -
+ name: saddr-ipv6
+ type: binary
+ checks:
+ min-len: 16
+ byte-order: big-endian
+ display-hint: ipv6
+ -
+ name: pad
+ type: pad
+
+ -
+ name: metrics
+ # Intentionally don't define the name-prefix, see below.
+ doc: |
+ Attributes with metrics. Note that the values here do not match
+ the TCP_METRIC_* defines in the kernel, because kernel defines
+ are off-by one (e.g. rtt is defined as enum 0, while netlink carries
+ attribute type 1).
+ attributes:
+ -
+ name: rtt
+ type: u32
+ doc: |
+ Round Trip Time (RTT), in msecs with 3 bits fractional
+ (left-shift by 3 to get the msec value).
+ -
+ name: rttvar
+ type: u32
+ doc: |
+ Round Trip Time VARiance (RTT), in msecs with 2 bits fractional
+ (left-shift by 2 to get the msec value).
+ -
+ name: ssthresh
+ type: u32
+ doc: Slow Start THRESHold.
+ -
+ name: cwnd
+ type: u32
+ doc: Congestion Window.
+ -
+ name: reodering
+ type: u32
+ doc: Reodering metric.
+ -
+ name: rtt-us
+ type: u32
+ doc: |
+ Round Trip Time (RTT), in usecs, with 3 bits fractional
+ (left-shift by 3 to get the msec value).
+ -
+ name: rttvar-us
+ type: u32
+ doc: |
+ Round Trip Time (RTT), in usecs, with 2 bits fractional
+ (left-shift by 3 to get the msec value).
+
+operations:
+ list:
+ -
+ name: get
+ doc: Retrieve metrics.
+ attribute-set: tcp-metrics
+
+ dont-validate: [ strict, dump ]
+
+ do:
+ request: &sel_attrs
+ attributes:
+ - addr-ipv4
+ - addr-ipv6
+ - saddr-ipv4
+ - saddr-ipv6
+ reply: &all_attrs
+ attributes:
+ - addr-ipv4
+ - addr-ipv6
+ - saddr-ipv4
+ - saddr-ipv6
+ - age
+ - vals
+ - fopen-mss
+ - fopen-syn-drops
+ - fopen-syn-drop-ts
+ - fopen-cookie
+ dump:
+ reply: *all_attrs
+
+ -
+ name: del
+ doc: Delete metrics.
+ attribute-set: tcp-metrics
+
+ dont-validate: [ strict, dump ]
+ flags: [ admin-perm ]
+
+ do:
+ request: *sel_attrs
diff --git a/Documentation/networking/devlink/devlink-region.rst b/Documentation/networking/devlink/devlink-region.rst
index 9232cd7da301..5d0b68f752c0 100644
--- a/Documentation/networking/devlink/devlink-region.rst
+++ b/Documentation/networking/devlink/devlink-region.rst
@@ -49,7 +49,7 @@ example usage
$ devlink region show [ DEV/REGION ]
$ devlink region del DEV/REGION snapshot SNAPSHOT_ID
$ devlink region dump DEV/REGION [ snapshot SNAPSHOT_ID ]
- $ devlink region read DEV/REGION [ snapshot SNAPSHOT_ID ] address ADDRESS length length
+ $ devlink region read DEV/REGION [ snapshot SNAPSHOT_ID ] address ADDRESS length LENGTH
# Show all of the exposed regions with region sizes:
$ devlink region show
diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index 7ec08e903bab..3ab423b80e91 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -228,6 +228,7 @@ Userspace to kernel:
``ETHTOOL_MSG_PLCA_GET_STATUS`` get PLCA RS status
``ETHTOOL_MSG_MM_GET`` get MAC merge layer state
``ETHTOOL_MSG_MM_SET`` set MAC merge layer parameters
+ ``ETHTOOL_MSG_MODULE_FW_FLASH_ACT`` flash transceiver module firmware
===================================== =================================
Kernel to userspace:
@@ -274,6 +275,7 @@ Kernel to userspace:
``ETHTOOL_MSG_PLCA_GET_STATUS_REPLY`` PLCA RS status
``ETHTOOL_MSG_PLCA_NTF`` PLCA RS parameters
``ETHTOOL_MSG_MM_GET_REPLY`` MAC merge layer status
+ ``ETHTOOL_MSG_MODULE_FW_FLASH_NTF`` transceiver module flash updates
======================================== =================================
``GET`` requests are sent by userspace applications to retrieve device
@@ -1728,17 +1730,28 @@ Request contents:
Kernel response contents:
- ====================================== ====== =============================
- ``ETHTOOL_A_PSE_HEADER`` nested reply header
- ``ETHTOOL_A_PODL_PSE_ADMIN_STATE`` u32 Operational state of the PoDL
- PSE functions
- ``ETHTOOL_A_PODL_PSE_PW_D_STATUS`` u32 power detection status of the
- PoDL PSE.
- ``ETHTOOL_A_C33_PSE_ADMIN_STATE`` u32 Operational state of the PoE
- PSE functions.
- ``ETHTOOL_A_C33_PSE_PW_D_STATUS`` u32 power detection status of the
- PoE PSE.
- ====================================== ====== =============================
+ ========================================== ====== =============================
+ ``ETHTOOL_A_PSE_HEADER`` nested reply header
+ ``ETHTOOL_A_PODL_PSE_ADMIN_STATE`` u32 Operational state of the PoDL
+ PSE functions
+ ``ETHTOOL_A_PODL_PSE_PW_D_STATUS`` u32 power detection status of the
+ PoDL PSE.
+ ``ETHTOOL_A_C33_PSE_ADMIN_STATE`` u32 Operational state of the PoE
+ PSE functions.
+ ``ETHTOOL_A_C33_PSE_PW_D_STATUS`` u32 power detection status of the
+ PoE PSE.
+ ``ETHTOOL_A_C33_PSE_PW_CLASS`` u32 power class of the PoE PSE.
+ ``ETHTOOL_A_C33_PSE_ACTUAL_PW`` u32 actual power drawn on the
+ PoE PSE.
+ ``ETHTOOL_A_C33_PSE_EXT_STATE`` u32 power extended state of the
+ PoE PSE.
+ ``ETHTOOL_A_C33_PSE_EXT_SUBSTATE`` u32 power extended substatus of
+ the PoE PSE.
+ ``ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT`` u32 currently configured power
+ limit of the PoE PSE.
+ ``ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES`` nested Supported power limit
+ configuration ranges.
+ ========================================== ====== =============================
When set, the optional ``ETHTOOL_A_PODL_PSE_ADMIN_STATE`` attribute identifies
the operational state of the PoDL PSE functions. The operational state of the
@@ -1770,6 +1783,46 @@ The same goes for ``ETHTOOL_A_C33_PSE_ADMIN_PW_D_STATUS`` implementing
.. kernel-doc:: include/uapi/linux/ethtool.h
:identifiers: ethtool_c33_pse_pw_d_status
+When set, the optional ``ETHTOOL_A_C33_PSE_PW_CLASS`` attribute identifies
+the power class of the C33 PSE. It depends on the class negotiated between
+the PSE and the PD. This option is corresponding to ``IEEE 802.3-2022``
+30.9.1.1.8 aPSEPowerClassification.
+
+When set, the optional ``ETHTOOL_A_C33_PSE_ACTUAL_PW`` attribute identifies
+This option is corresponding to ``IEEE 802.3-2022`` 30.9.1.1.23 aPSEActualPower.
+Actual power is reported in mW.
+
+When set, the optional ``ETHTOOL_A_C33_PSE_EXT_STATE`` attribute identifies
+the extended error state of the C33 PSE. Possible values are:
+
+.. kernel-doc:: include/uapi/linux/ethtool.h
+ :identifiers: ethtool_c33_pse_ext_state
+
+When set, the optional ``ETHTOOL_A_C33_PSE_EXT_SUBSTATE`` attribute identifies
+the extended error state of the C33 PSE. Possible values are:
+Possible values are:
+
+.. kernel-doc:: include/uapi/linux/ethtool.h
+ :identifiers: ethtool_c33_pse_ext_substate_class_num_events
+ ethtool_c33_pse_ext_substate_error_condition
+ ethtool_c33_pse_ext_substate_mr_pse_enable
+ ethtool_c33_pse_ext_substate_option_detect_ted
+ ethtool_c33_pse_ext_substate_option_vport_lim
+ ethtool_c33_pse_ext_substate_ovld_detected
+ ethtool_c33_pse_ext_substate_pd_dll_power_type
+ ethtool_c33_pse_ext_substate_power_not_available
+ ethtool_c33_pse_ext_substate_short_detected
+
+When set, the optional ``ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT`` attribute
+identifies the C33 PSE power limit in mW.
+
+When set the optional ``ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES`` nested attribute
+identifies the C33 PSE power limit ranges through
+``ETHTOOL_A_C33_PSE_PWR_VAL_LIMIT_RANGE_MIN`` and
+``ETHTOOL_A_C33_PSE_PWR_VAL_LIMIT_RANGE_MAX``.
+If the controller works with fixed classes, the min and max values will be
+equal.
+
PSE_SET
=======
@@ -1781,6 +1834,8 @@ Request contents:
``ETHTOOL_A_PSE_HEADER`` nested request header
``ETHTOOL_A_PODL_PSE_ADMIN_CONTROL`` u32 Control PoDL PSE Admin state
``ETHTOOL_A_C33_PSE_ADMIN_CONTROL`` u32 Control PSE Admin state
+ ``ETHTOOL_A_C33_PSE_AVAIL_PWR_LIMIT`` u32 Control PoE PSE available
+ power limit
====================================== ====== =============================
When set, the optional ``ETHTOOL_A_PODL_PSE_ADMIN_CONTROL`` attribute is used
@@ -1791,6 +1846,18 @@ to control PoDL PSE Admin functions. This option is implementing
The same goes for ``ETHTOOL_A_C33_PSE_ADMIN_CONTROL`` implementing
``IEEE 802.3-2022`` 30.9.1.2.1 acPSEAdminControl.
+When set, the optional ``ETHTOOL_A_C33_PSE_AVAIL_PWR_LIMIT`` attribute is
+used to control the available power value limit for C33 PSE in milliwatts.
+This attribute corresponds to the `pse_available_power` variable described in
+``IEEE 802.3-2022`` 33.2.4.4 Variables and `pse_avail_pwr` in 145.2.5.4
+Variables, which are described in power classes.
+
+It was decided to use milliwatts for this interface to unify it with other
+power monitoring interfaces, which also use milliwatts, and to align with
+various existing products that document power consumption in watts rather than
+classes. If power limit configuration based on classes is needed, the
+conversion can be done in user space, for example by ethtool.
+
RSS_GET
=======
@@ -2041,6 +2108,73 @@ The attributes are propagated to the driver through the following structure:
.. kernel-doc:: include/linux/ethtool.h
:identifiers: ethtool_mm_cfg
+MODULE_FW_FLASH_ACT
+===================
+
+Flashes transceiver module firmware.
+
+Request contents:
+
+ ======================================= ====== ===========================
+ ``ETHTOOL_A_MODULE_FW_FLASH_HEADER`` nested request header
+ ``ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME`` string firmware image file name
+ ``ETHTOOL_A_MODULE_FW_FLASH_PASSWORD`` u32 transceiver module password
+ ======================================= ====== ===========================
+
+The firmware update process consists of three logical steps:
+
+1. Downloading a firmware image to the transceiver module and validating it.
+2. Running the firmware image.
+3. Committing the firmware image so that it is run upon reset.
+
+When flash command is given, those three steps are taken in that order.
+
+This message merely schedules the update process and returns immediately
+without blocking. The process then runs asynchronously.
+Since it can take several minutes to complete, during the update process
+notifications are emitted from the kernel to user space updating it about
+the status and progress.
+
+The ``ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME`` attribute encodes the firmware
+image file name. The firmware image is downloaded to the transceiver module,
+validated, run and committed.
+
+The optional ``ETHTOOL_A_MODULE_FW_FLASH_PASSWORD`` attribute encodes a password
+that might be required as part of the transceiver module firmware update
+process.
+
+The firmware update process can take several minutes to complete. Therefore,
+during the update process notifications are emitted from the kernel to user
+space updating it about the status and progress.
+
+
+
+Notification contents:
+
+ +---------------------------------------------------+--------+----------------+
+ | ``ETHTOOL_A_MODULE_FW_FLASH_HEADER`` | nested | reply header |
+ +---------------------------------------------------+--------+----------------+
+ | ``ETHTOOL_A_MODULE_FW_FLASH_STATUS`` | u32 | status |
+ +---------------------------------------------------+--------+----------------+
+ | ``ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG`` | string | status message |
+ +---------------------------------------------------+--------+----------------+
+ | ``ETHTOOL_A_MODULE_FW_FLASH_DONE`` | uint | progress |
+ +---------------------------------------------------+--------+----------------+
+ | ``ETHTOOL_A_MODULE_FW_FLASH_TOTAL`` | uint | total |
+ +---------------------------------------------------+--------+----------------+
+
+The ``ETHTOOL_A_MODULE_FW_FLASH_STATUS`` attribute encodes the current status
+of the firmware update process. Possible values are:
+
+.. kernel-doc:: include/uapi/linux/ethtool.h
+ :identifiers: ethtool_module_fw_flash_status
+
+The ``ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG`` attribute encodes a status message
+string.
+
+The ``ETHTOOL_A_MODULE_FW_FLASH_DONE`` and ``ETHTOOL_A_MODULE_FW_FLASH_TOTAL``
+attributes encode the completed and total amount of work, respectively.
+
Request translation
===================
@@ -2147,4 +2281,5 @@ are netlink only.
n/a ``ETHTOOL_MSG_PLCA_GET_STATUS``
n/a ``ETHTOOL_MSG_MM_GET``
n/a ``ETHTOOL_MSG_MM_SET``
+ n/a ``ETHTOOL_MSG_MODULE_FW_FLASH_ACT``
=================================== =====================================
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index a141e8e65c5d..9a97030c6c8d 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -186,6 +186,7 @@ Code Seq# Include File Comments
'Q' all linux/soundcard.h
'R' 00-1F linux/random.h conflict!
'R' 01 linux/rfkill.h conflict!
+'R' 20-2F linux/trace_mmap.h
'R' C0-DF net/bluetooth/rfcomm.h
'R' E0 uapi/linux/fsl_mc.h
'S' all linux/cdrom.h conflict!
diff --git a/MAINTAINERS b/MAINTAINERS
index 22328600cfd0..9540f9290f56 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1044,7 +1044,7 @@ M: Joerg Roedel <[email protected]>
R: Suravee Suthikulpanit <[email protected]>
S: Maintained
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git
F: drivers/iommu/amd/
F: include/linux/amd-iommu.h
@@ -6239,9 +6239,8 @@ S: Maintained
F: drivers/usb/dwc3/
DESIGNWARE XDATA IP DRIVER
-M: Gustavo Pimentel <[email protected]>
-S: Maintained
+S: Orphan
F: Documentation/misc-devices/dw-xdata-pcie.rst
F: drivers/misc/dw-xdata-pcie.c
@@ -11051,8 +11050,8 @@ F: include/drm/xe*
F: include/uapi/drm/xe_drm.h
INTEL ETHERNET DRIVERS
-M: Jesse Brandeburg <[email protected]>
M: Tony Nguyen <[email protected]>
+M: Przemek Kitszel <[email protected]>
L: [email protected] (moderated for non-subscribers)
S: Supported
W: https://www.intel.com/content/www/us/en/support.html
@@ -11157,7 +11156,7 @@ M: David Woodhouse <[email protected]>
M: Lu Baolu <[email protected]>
S: Supported
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git
F: drivers/iommu/intel/
INTEL IPU3 CSI-2 CIO2 DRIVER
@@ -11530,7 +11529,7 @@ IOMMU DMA-API LAYER
M: Robin Murphy <[email protected]>
S: Maintained
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git
F: drivers/iommu/dma-iommu.c
F: drivers/iommu/dma-iommu.h
F: drivers/iommu/iova.c
@@ -11542,7 +11541,7 @@ M: Will Deacon <[email protected]>
R: Robin Murphy <[email protected]>
S: Maintained
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git
F: Documentation/devicetree/bindings/iommu/
F: Documentation/userspace-api/iommu.rst
F: drivers/iommu/
@@ -14475,7 +14474,7 @@ MEMORY MAPPING
M: Andrew Morton <[email protected]>
R: Liam R. Howlett <[email protected]>
R: Vlastimil Babka <[email protected]>
-R: Lorenzo Stoakes <[email protected]>
+R: Lorenzo Stoakes <[email protected]>
S: Maintained
W: http://www.linux-mm.org
@@ -19324,7 +19323,7 @@ F: drivers/perf/riscv_pmu_legacy.c
F: drivers/perf/riscv_pmu_sbi.c
RISC-V THEAD SoC SUPPORT
-M: Jisheng Zhang <[email protected]>
+M: Drew Fustini <[email protected]>
M: Guo Ren <[email protected]>
M: Fu Wei <[email protected]>
diff --git a/Makefile b/Makefile
index 4d36f943b3b1..b25b5b44af10 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 10
SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc7
NAME = Baby Opossum Posse
# *DOCUMENTATION*
diff --git a/arch/arm/boot/dts/rockchip/rk3066a.dtsi b/arch/arm/boot/dts/rockchip/rk3066a.dtsi
index 51ae0418a7db..a187404aab13 100644
--- a/arch/arm/boot/dts/rockchip/rk3066a.dtsi
+++ b/arch/arm/boot/dts/rockchip/rk3066a.dtsi
@@ -128,6 +128,7 @@
pinctrl-0 = <&hdmii2c_xfer>, <&hdmi_hpd>;
power-domains = <&power RK3066_PD_VIO>;
rockchip,grf = <&grf>;
+ #sound-dai-cells = <0>;
status = "disabled";
ports {
diff --git a/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts b/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts
index b47fe02c33fb..079101cddd65 100644
--- a/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts
@@ -5,6 +5,8 @@
*/
/dts-v1/;
+
+#include <dt-bindings/leds/common.h>
#include "rk3308.dtsi"
/ {
@@ -24,17 +26,21 @@
leds {
compatible = "gpio-leds";
pinctrl-names = "default";
- pinctrl-0 = <&green_led_gio>, <&heartbeat_led_gpio>;
+ pinctrl-0 = <&green_led>, <&heartbeat_led>;
green-led {
+ color = <LED_COLOR_ID_GREEN>;
default-state = "on";
+ function = LED_FUNCTION_POWER;
gpios = <&gpio0 RK_PA6 GPIO_ACTIVE_HIGH>;
label = "rockpis:green:power";
linux,default-trigger = "default-on";
};
blue-led {
+ color = <LED_COLOR_ID_BLUE>;
default-state = "on";
+ function = LED_FUNCTION_HEARTBEAT;
gpios = <&gpio0 RK_PA5 GPIO_ACTIVE_HIGH>;
label = "rockpis:blue:user";
linux,default-trigger = "heartbeat";
@@ -126,10 +132,12 @@
};
&emmc {
- bus-width = <4>;
cap-mmc-highspeed;
- mmc-hs200-1_8v;
+ cap-sd-highspeed;
+ no-sdio;
non-removable;
+ pinctrl-names = "default";
+ pinctrl-0 = <&emmc_bus8 &emmc_clk &emmc_cmd>;
vmmc-supply = <&vcc_io>;
status = "okay";
};
@@ -214,11 +222,11 @@
pinctrl-0 = <&rtc_32k>;
leds {
- green_led_gio: green-led-gpio {
+ green_led: green-led {
rockchip,pins = <0 RK_PA6 RK_FUNC_GPIO &pcfg_pull_none>;
};
- heartbeat_led_gpio: heartbeat-led-gpio {
+ heartbeat_led: heartbeat-led {
rockchip,pins = <0 RK_PA5 RK_FUNC_GPIO &pcfg_pull_none>;
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3308.dtsi b/arch/arm64/boot/dts/rockchip/rk3308.dtsi
index 962ea893999b..c00da150a22f 100644
--- a/arch/arm64/boot/dts/rockchip/rk3308.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3308.dtsi
@@ -811,7 +811,7 @@
clocks = <&cru SCLK_I2S2_8CH_TX_OUT>,
<&cru SCLK_I2S2_8CH_RX_OUT>,
<&cru PCLK_ACODEC>;
- reset-names = "codec-reset";
+ reset-names = "codec";
resets = <&cru SRST_ACODEC_P>;
#sound-dai-cells = <0>;
status = "disabled";
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts
index f09d60bbe6c4..a608a219543e 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts
@@ -241,8 +241,8 @@
rk805: pmic@18 {
compatible = "rockchip,rk805";
reg = <0x18>;
- interrupt-parent = <&gpio2>;
- interrupts = <6 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
#clock-cells = <1>;
clock-output-names = "xin32k", "rk805-clkout2";
gpio-controller;
diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi
index 734f87db4d11..73618df7a889 100644
--- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi
@@ -793,6 +793,7 @@
dma-names = "tx";
pinctrl-names = "default";
pinctrl-0 = <&spdif_tx>;
+ #sound-dai-cells = <0>;
status = "disabled";
};
@@ -804,6 +805,7 @@
clocks = <&cru SCLK_I2S_2CH>, <&cru HCLK_I2S_2CH>;
dmas = <&dmac_bus 6>, <&dmac_bus 7>;
dma-names = "tx", "rx";
+ #sound-dai-cells = <0>;
status = "disabled";
};
@@ -817,6 +819,7 @@
dma-names = "tx", "rx";
pinctrl-names = "default";
pinctrl-0 = <&i2s_8ch_bus>;
+ #sound-dai-cells = <0>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
index 789fd0dcc88b..3cd63d1e8f15 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi
@@ -450,7 +450,7 @@ ap_i2c_audio: &i2c8 {
dlg,btn-cfg = <50>;
dlg,mic-det-thr = <500>;
dlg,jack-ins-deb = <20>;
- dlg,jack-det-rate = "32ms_64ms";
+ dlg,jack-det-rate = "32_64";
dlg,jack-rem-deb = <1>;
dlg,a-d-btn-thr = <0xa>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts
index 26322a358d91..b908ce006c26 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts
@@ -289,7 +289,7 @@
regulator-name = "vdd_gpu";
regulator-always-on;
regulator-boot-on;
- regulator-min-microvolt = <900000>;
+ regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
regulator-ramp-delay = <6001>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts
index 1a604429fb26..e74871491ef5 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts
@@ -444,6 +444,7 @@
&sdmmc {
bus-width = <4>;
cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
max-frequency = <150000000>;
no-sdio;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts
index b4f22d95ac0e..e80caa36f8e4 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts
@@ -435,6 +435,7 @@
&sdmmc {
bus-width = <4>;
cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
max-frequency = <150000000>;
no-sdio;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
index b8e15b76a8a6..2e7512676b7e 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
@@ -383,6 +383,7 @@
bus-width = <4>;
cap-mmc-highspeed;
cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
sd-uhs-sdr104;
vmmc-supply = <&vcc_3v3_s3>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi
index aebe1fedd2d8..615094bb8ba3 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi
@@ -344,6 +344,11 @@
};
};
+&pwm0 {
+ pinctrl-0 = <&pwm0m1_pins>;
+ pinctrl-names = "default";
+};
+
&saradc {
vref-supply = <&vcc_1v8_s0>;
status = "okay";
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
index 3b2ec1d0c542..074c316a9a69 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
@@ -288,9 +288,9 @@
pinctrl-0 = <&i2c7m0_xfer>;
status = "okay";
- es8316: audio-codec@11 {
+ es8316: audio-codec@10 {
compatible = "everest,es8316";
- reg = <0x11>;
+ reg = <0x10>;
assigned-clocks = <&cru I2S0_8CH_MCLKOUT>;
assigned-clock-rates = <12288000>;
clocks = <&cru I2S0_8CH_MCLKOUT>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts b/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts
index 8e2a07612d17..3b9a349362db 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts
@@ -366,6 +366,7 @@
bus-width = <4>;
cap-mmc-highspeed;
cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
max-frequency = <150000000>;
no-sdio;
@@ -393,6 +394,7 @@
pinctrl-0 = <&pmic_pins>, <&rk806_dvs1_null>,
<&rk806_dvs2_null>, <&rk806_dvs3_null>;
spi-max-frequency = <1000000>;
+ system-power-controller;
vcc1-supply = <&vcc5v0_sys>;
vcc2-supply = <&vcc5v0_sys>;
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 9943ff0af4c9..1f60aa1bc750 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -170,6 +170,7 @@
#define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */
#define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */
#define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */
+#define PTE_SWBITS_MASK _AT(pteval_t, (BIT(63) | GENMASK(58, 55)))
#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (50 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
#ifdef CONFIG_ARM64_PA_BITS_52
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 266b96acc014..1386e8e751f2 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -840,7 +840,7 @@ __SYSCALL(__NR_pselect6_time64, compat_sys_pselect6_time64)
#define __NR_ppoll_time64 414
__SYSCALL(__NR_ppoll_time64, compat_sys_ppoll_time64)
#define __NR_io_pgetevents_time64 416
-__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents)
+__SYSCALL(__NR_io_pgetevents_time64, compat_sys_io_pgetevents_time64)
#define __NR_recvmmsg_time64 417
__SYSCALL(__NR_recvmmsg_time64, compat_sys_recvmmsg_time64)
#define __NR_mq_timedsend_time64 418
diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c
index 5fa08e13e17e..f374a3e5a5fe 100644
--- a/arch/arm64/kernel/pi/map_kernel.c
+++ b/arch/arm64/kernel/pi/map_kernel.c
@@ -173,7 +173,7 @@ static void __init remap_idmap_for_lpa2(void)
* Don't bother with the FDT, we no longer need it after this.
*/
memset(init_idmap_pg_dir, 0,
- (u64)init_idmap_pg_dir - (u64)init_idmap_pg_end);
+ (u64)init_idmap_pg_end - (u64)init_idmap_pg_dir);
create_init_idmap(init_idmap_pg_dir, mask);
dsb(ishst);
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index ad198262b981..7230f6e20ab8 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -53,17 +53,15 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
syscall_set_return_value(current, regs, 0, ret);
/*
- * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
- * but not enough for arm64 stack utilization comfort. To keep
- * reasonable stack head room, reduce the maximum offset to 9 bits.
+ * This value will get limited by KSTACK_OFFSET_MAX(), which is 10
+ * bits. The actual entropy will be further reduced by the compiler
+ * when applying stack alignment constraints: the AAPCS mandates a
+ * 16-byte aligned SP at function boundaries, which will remove the
+ * 4 low bits from any entropy chosen here.
*
- * The actual entropy will be further reduced by the compiler when
- * applying stack alignment constraints: the AAPCS mandates a
- * 16-byte (i.e. 4-bit) aligned SP at function boundaries.
- *
- * The resulting 5 bits of entropy is seen in SP[8:4].
+ * The resulting 6 bits of entropy is seen in SP[9:4].
*/
- choose_random_kstack_offset(get_random_u16() & 0x1FF);
+ choose_random_kstack_offset(get_random_u16());
}
static inline bool has_syscall_work(unsigned long flags)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index c927e9312f10..353ea5dc32b8 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -124,7 +124,8 @@ bool pgattr_change_is_safe(u64 old, u64 new)
* The following mapping attributes may be updated in live
* kernel mappings without the need for break-before-make.
*/
- pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG;
+ pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG |
+ PTE_SWBITS_MASK;
/* creating or taking down mappings is always safe */
if (!pte_valid(__pte(old)) || !pte_valid(__pte(new)))
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 720336d28856..751331f5ba90 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1244,6 +1244,13 @@ emit_cond_jmp:
break;
}
+ /* Implement helper call to bpf_get_current_task/_btf() inline */
+ if (insn->src_reg == 0 && (insn->imm == BPF_FUNC_get_current_task ||
+ insn->imm == BPF_FUNC_get_current_task_btf)) {
+ emit(A64_MRS_SP_EL0(r0), ctx);
+ break;
+ }
+
ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
&func_addr, &func_addr_fixed);
if (ret < 0)
@@ -1829,8 +1836,7 @@ skip_init_ctx:
prog->jited_len = 0;
goto out_free_hdr;
}
- if (WARN_ON(bpf_jit_binary_pack_finalize(prog, ro_header,
- header))) {
+ if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) {
/* ro_header has been freed */
ro_header = NULL;
prog = orig_prog;
@@ -2581,6 +2587,8 @@ bool bpf_jit_inlines_helper_call(s32 imm)
{
switch (imm) {
case BPF_FUNC_get_smp_processor_id:
+ case BPF_FUNC_get_current_task:
+ case BPF_FUNC_get_current_task_btf:
return true;
default:
return false;
diff --git a/arch/csky/include/uapi/asm/unistd.h b/arch/csky/include/uapi/asm/unistd.h
index 7ff6a2466af1..e0594b6370a6 100644
--- a/arch/csky/include/uapi/asm/unistd.h
+++ b/arch/csky/include/uapi/asm/unistd.h
@@ -6,6 +6,7 @@
#define __ARCH_WANT_SYS_CLONE3
#define __ARCH_WANT_SET_GET_RLIMIT
#define __ARCH_WANT_TIME32_SYSCALLS
+#define __ARCH_WANT_SYNC_FILE_RANGE2
#include <asm-generic/unistd.h>
#define __NR_set_thread_area (__NR_arch_specific_syscall + 0)
diff --git a/arch/csky/kernel/syscall.c b/arch/csky/kernel/syscall.c
index 3d30e58a45d2..4540a271ee39 100644
--- a/arch/csky/kernel/syscall.c
+++ b/arch/csky/kernel/syscall.c
@@ -20,7 +20,7 @@ SYSCALL_DEFINE6(mmap2,
unsigned long, prot,
unsigned long, flags,
unsigned long, fd,
- off_t, offset)
+ unsigned long, offset)
{
if (unlikely(offset & (~PAGE_MASK >> 12)))
return -EINVAL;
diff --git a/arch/hexagon/include/asm/syscalls.h b/arch/hexagon/include/asm/syscalls.h
new file mode 100644
index 000000000000..40f2d08bec92
--- /dev/null
+++ b/arch/hexagon/include/asm/syscalls.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm-generic/syscalls.h>
+
+asmlinkage long sys_hexagon_fadvise64_64(int fd, int advice,
+ u32 a2, u32 a3, u32 a4, u32 a5);
diff --git a/arch/hexagon/include/uapi/asm/unistd.h b/arch/hexagon/include/uapi/asm/unistd.h
index 432c4db1b623..21ae22306b5d 100644
--- a/arch/hexagon/include/uapi/asm/unistd.h
+++ b/arch/hexagon/include/uapi/asm/unistd.h
@@ -36,5 +36,6 @@
#define __ARCH_WANT_SYS_VFORK
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_TIME32_SYSCALLS
+#define __ARCH_WANT_SYNC_FILE_RANGE2
#include <asm-generic/unistd.h>
diff --git a/arch/hexagon/kernel/syscalltab.c b/arch/hexagon/kernel/syscalltab.c
index 0fadd582cfc7..5d98bdc494ec 100644
--- a/arch/hexagon/kernel/syscalltab.c
+++ b/arch/hexagon/kernel/syscalltab.c
@@ -14,6 +14,13 @@
#undef __SYSCALL
#define __SYSCALL(nr, call) [nr] = (call),
+SYSCALL_DEFINE6(hexagon_fadvise64_64, int, fd, int, advice,
+ SC_ARG64(offset), SC_ARG64(len))
+{
+ return ksys_fadvise64_64(fd, SC_VAL64(loff_t, offset), SC_VAL64(loff_t, len), advice);
+}
+#define sys_fadvise64_64 sys_hexagon_fadvise64_64
+
void *sys_call_table[__NR_syscalls] = {
#include <asm/unistd.h>
};
diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c
index b4c5acd7aa3b..8801611143ab 100644
--- a/arch/loongarch/kernel/syscall.c
+++ b/arch/loongarch/kernel/syscall.c
@@ -22,7 +22,7 @@
#define __SYSCALL(nr, call) [nr] = (call),
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long,
- prot, unsigned long, flags, unsigned long, fd, off_t, offset)
+ prot, unsigned long, flags, unsigned long, fd, unsigned long, offset)
{
if (offset & ~PAGE_MASK)
return -EINVAL;
diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c
index ed9f34da1a2a..0850b099f300 100644
--- a/arch/microblaze/kernel/sys_microblaze.c
+++ b/arch/microblaze/kernel/sys_microblaze.c
@@ -35,7 +35,7 @@
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags, unsigned long, fd,
- off_t, pgoff)
+ unsigned long, pgoff)
{
if (pgoff & ~PAGE_MASK)
return -EINVAL;
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index cc869f5d5693..953f5b7dc723 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -354,7 +354,7 @@
412 n32 utimensat_time64 sys_utimensat
413 n32 pselect6_time64 compat_sys_pselect6_time64
414 n32 ppoll_time64 compat_sys_ppoll_time64
-416 n32 io_pgetevents_time64 sys_io_pgetevents
+416 n32 io_pgetevents_time64 compat_sys_io_pgetevents_time64
417 n32 recvmmsg_time64 compat_sys_recvmmsg_time64
418 n32 mq_timedsend_time64 sys_mq_timedsend
419 n32 mq_timedreceive_time64 sys_mq_timedreceive
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 81428a2eb660..2439a2491cff 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -403,7 +403,7 @@
412 o32 utimensat_time64 sys_utimensat sys_utimensat
413 o32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
414 o32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
-416 o32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents
+416 o32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64
417 o32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
418 o32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend
419 o32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index daafeb20f993..dc9b902de8ea 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -16,6 +16,7 @@ config PARISC
select ARCH_HAS_UBSAN
select ARCH_HAS_PTE_SPECIAL
select ARCH_NO_SG_CHAIN
+ select ARCH_SPLIT_ARG64 if !64BIT
select ARCH_SUPPORTS_HUGETLBFS if PA20
select ARCH_SUPPORTS_MEMORY_FAILURE
select ARCH_STACKWALK
diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c
index 2a12a547b447..826c8e51b585 100644
--- a/arch/parisc/kernel/sys_parisc32.c
+++ b/arch/parisc/kernel/sys_parisc32.c
@@ -23,12 +23,3 @@ asmlinkage long sys32_unimplemented(int r26, int r25, int r24, int r23,
current->comm, current->pid, r20);
return -ENOSYS;
}
-
-asmlinkage long sys32_fanotify_mark(compat_int_t fanotify_fd, compat_uint_t flags,
- compat_uint_t mask0, compat_uint_t mask1, compat_int_t dfd,
- const char __user * pathname)
-{
- return sys_fanotify_mark(fanotify_fd, flags,
- ((__u64)mask1 << 32) | mask0,
- dfd, pathname);
-}
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index b13c21373974..66dc406b12e4 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -108,7 +108,7 @@
95 common fchown sys_fchown
96 common getpriority sys_getpriority
97 common setpriority sys_setpriority
-98 common recv sys_recv
+98 common recv sys_recv compat_sys_recv
99 common statfs sys_statfs compat_sys_statfs
100 common fstatfs sys_fstatfs compat_sys_fstatfs
101 common stat64 sys_stat64
@@ -135,7 +135,7 @@
120 common clone sys_clone_wrapper
121 common setdomainname sys_setdomainname
122 common sendfile sys_sendfile compat_sys_sendfile
-123 common recvfrom sys_recvfrom
+123 common recvfrom sys_recvfrom compat_sys_recvfrom
124 32 adjtimex sys_adjtimex_time32
124 64 adjtimex sys_adjtimex
125 common mprotect sys_mprotect
@@ -364,7 +364,7 @@
320 common accept4 sys_accept4
321 common prlimit64 sys_prlimit64
322 common fanotify_init sys_fanotify_init
-323 common fanotify_mark sys_fanotify_mark sys32_fanotify_mark
+323 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark
324 32 clock_adjtime sys_clock_adjtime32
324 64 clock_adjtime sys_clock_adjtime
325 common name_to_handle_at sys_name_to_handle_at
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index d1030bc52564..d283d281d28e 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -849,6 +849,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
{
struct eeh_dev *edev;
struct pci_dev *pdev;
+ struct pci_bus *bus = NULL;
if (pe->type & EEH_PE_PHB)
return pe->phb->bus;
@@ -859,9 +860,11 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
/* Retrieve the parent PCI bus of first (top) PCI device */
edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);
+ pci_lock_rescan_remove();
pdev = eeh_dev_to_pci_dev(edev);
if (pdev)
- return pdev->bus;
+ bus = pdev->bus;
+ pci_unlock_rescan_remove();
- return NULL;
+ return bus;
}
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 4690c219bfa4..63432a33ec49 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -647,8 +647,9 @@ __after_prom_start:
* Note: This process overwrites the OF exception vectors.
*/
LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET)
- mr. r4,r26 /* In some cases the loader may */
- beq 9f /* have already put us at zero */
+ mr r4,r26 /* Load the virtual source address into r4 */
+ cmpld r3,r4 /* Check if source == dest */
+ beq 9f /* If so skip the copy */
li r6,0x100 /* Start offset, the first 0x100 */
/* bytes were copied earlier. */
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 3656f1ca7a21..ebae8415dfbb 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -230,8 +230,10 @@
178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend
179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64
179 64 pread64 sys_pread64
+179 spu pread64 sys_pread64
180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64
180 64 pwrite64 sys_pwrite64
+180 spu pwrite64 sys_pwrite64
181 common chown sys_chown
182 common getcwd sys_getcwd
183 common capget sys_capget
@@ -246,6 +248,7 @@
190 common ugetrlimit sys_getrlimit compat_sys_getrlimit
191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead
191 64 readahead sys_readahead
+191 spu readahead sys_readahead
192 32 mmap2 sys_mmap2 compat_sys_mmap2
193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64
194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64
@@ -293,6 +296,7 @@
232 nospu set_tid_address sys_set_tid_address
233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64
233 64 fadvise64 sys_fadvise64
+233 spu fadvise64 sys_fadvise64
234 nospu exit_group sys_exit_group
235 nospu lookup_dcookie sys_ni_syscall
236 common epoll_create sys_epoll_create
@@ -502,7 +506,7 @@
412 32 utimensat_time64 sys_utimensat sys_utimensat
413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
-416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents
+416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64
417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend
419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 85050be08a23..72b12bc10f90 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -27,6 +27,7 @@
#include <asm/paca.h>
#include <asm/mmu.h>
#include <asm/sections.h> /* _end */
+#include <asm/setup.h>
#include <asm/smp.h>
#include <asm/hw_breakpoint.h>
#include <asm/svm.h>
@@ -317,6 +318,16 @@ void default_machine_kexec(struct kimage *image)
if (!kdump_in_progress())
kexec_prepare_cpus();
+#ifdef CONFIG_PPC_PSERIES
+ /*
+ * This must be done after other CPUs have shut down, otherwise they
+ * could execute the 'scv' instruction, which is not supported with
+ * reloc disabled (see configure_exceptions()).
+ */
+ if (firmware_has_feature(FW_FEATURE_SET_MODE))
+ pseries_disable_reloc_on_exc();
+#endif
+
printk("kexec: Starting switchover sequence.\n");
/* switch to a staticly allocated stack. Based on irq stack code.
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 984655419da5..2a36cc2e7e9e 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -225,7 +225,7 @@ skip_init_ctx:
fp->jited_len = proglen + FUNCTION_DESCR_SIZE;
if (!fp->is_func || extra_pass) {
- if (bpf_jit_binary_pack_finalize(fp, fhdr, hdr)) {
+ if (bpf_jit_binary_pack_finalize(fhdr, hdr)) {
fp = org_fp;
goto out_addrs;
}
@@ -348,7 +348,7 @@ void bpf_jit_free(struct bpf_prog *fp)
* before freeing it.
*/
if (jit_data) {
- bpf_jit_binary_pack_finalize(fp, jit_data->fhdr, jit_data->hdr);
+ bpf_jit_binary_pack_finalize(jit_data->fhdr, jit_data->hdr);
kvfree(jit_data->addrs);
kfree(jit_data);
}
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
index 096d09ed89f6..431be156ca9b 100644
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -61,11 +61,3 @@ void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
} else
xics_kexec_teardown_cpu(secondary);
}
-
-void pseries_machine_kexec(struct kimage *image)
-{
- if (firmware_has_feature(FW_FEATURE_SET_MODE))
- pseries_disable_reloc_on_exc();
-
- default_machine_kexec(image);
-}
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index bba4ad192b0f..3968a6970fa8 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -38,7 +38,6 @@ static inline void smp_init_pseries(void) { }
#endif
extern void pseries_kexec_cpu_down(int crash_shutdown, int secondary);
-void pseries_machine_kexec(struct kimage *image);
extern void pSeries_final_fixup(void);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 284a6fa04b0c..b10a25325238 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -343,8 +343,8 @@ static int alloc_dispatch_log_kmem_cache(void)
{
void (*ctor)(void *) = get_dtl_cache_ctor();
- dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
- DISPATCH_LOG_BYTES, 0, ctor);
+ dtl_cache = kmem_cache_create_usercopy("dtl", DISPATCH_LOG_BYTES,
+ DISPATCH_LOG_BYTES, 0, 0, DISPATCH_LOG_BYTES, ctor);
if (!dtl_cache) {
pr_warn("Failed to create dispatch trace log buffer cache\n");
pr_warn("Stolen time statistics will be unreliable\n");
@@ -1159,7 +1159,6 @@ define_machine(pseries) {
.machine_check_exception = pSeries_machine_check_exception,
.machine_check_log_err = pSeries_machine_check_log_err,
#ifdef CONFIG_KEXEC_CORE
- .machine_kexec = pseries_machine_kexec,
.kexec_cpu_down = pseries_kexec_cpu_down,
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/arch/riscv/boot/dts/canaan/canaan_kd233.dts b/arch/riscv/boot/dts/canaan/canaan_kd233.dts
index 8df4cf3656f2..a7d753b6fdfd 100644
--- a/arch/riscv/boot/dts/canaan/canaan_kd233.dts
+++ b/arch/riscv/boot/dts/canaan/canaan_kd233.dts
@@ -15,6 +15,10 @@
model = "Kendryte KD233";
compatible = "canaan,kendryte-kd233", "canaan,kendryte-k210";
+ aliases {
+ serial0 = &uarths0;
+ };
+
chosen {
bootargs = "earlycon console=ttySIF0";
stdout-path = "serial0:115200n8";
@@ -46,7 +50,6 @@
&fpioa {
pinctrl-0 = <&jtag_pinctrl>;
pinctrl-names = "default";
- status = "okay";
jtag_pinctrl: jtag-pinmux {
pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
@@ -118,6 +121,7 @@
#sound-dai-cells = <1>;
pinctrl-0 = <&i2s0_pinctrl>;
pinctrl-names = "default";
+ status = "okay";
};
&spi0 {
@@ -125,6 +129,7 @@
pinctrl-names = "default";
num-cs = <1>;
cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+ status = "okay";
panel@0 {
compatible = "canaan,kd233-tft", "ilitek,ili9341";
diff --git a/arch/riscv/boot/dts/canaan/k210.dtsi b/arch/riscv/boot/dts/canaan/k210.dtsi
index f87c5164d9cf..4f5d40fa1e77 100644
--- a/arch/riscv/boot/dts/canaan/k210.dtsi
+++ b/arch/riscv/boot/dts/canaan/k210.dtsi
@@ -16,13 +16,6 @@
#size-cells = <1>;
compatible = "canaan,kendryte-k210";
- aliases {
- serial0 = &uarths0;
- serial1 = &uart1;
- serial2 = &uart2;
- serial3 = &uart3;
- };
-
/*
* The K210 has an sv39 MMU following the privileged specification v1.9.
* Since this is a non-ratified draft specification, the kernel does not
@@ -137,6 +130,7 @@
reg = <0x38000000 0x1000>;
interrupts = <33>;
clocks = <&sysclk K210_CLK_CPU>;
+ status = "disabled";
};
gpio0: gpio-controller@38001000 {
@@ -152,6 +146,7 @@
<62>, <63>, <64>, <65>;
gpio-controller;
ngpios = <32>;
+ status = "disabled";
};
dmac0: dma-controller@50000000 {
@@ -187,6 +182,7 @@
<&sysclk K210_CLK_GPIO>;
clock-names = "bus", "db";
resets = <&sysrst K210_RST_GPIO>;
+ status = "disabled";
gpio1_0: gpio-port@0 {
#gpio-cells = <2>;
@@ -214,6 +210,7 @@
dsr-override;
cts-override;
ri-override;
+ status = "disabled";
};
uart2: serial@50220000 {
@@ -230,6 +227,7 @@
dsr-override;
cts-override;
ri-override;
+ status = "disabled";
};
uart3: serial@50230000 {
@@ -246,6 +244,7 @@
dsr-override;
cts-override;
ri-override;
+ status = "disabled";
};
spi2: spi@50240000 {
@@ -259,6 +258,7 @@
<&sysclk K210_CLK_APB0>;
clock-names = "ssi_clk", "pclk";
resets = <&sysrst K210_RST_SPI2>;
+ status = "disabled";
};
i2s0: i2s@50250000 {
@@ -268,6 +268,7 @@
clocks = <&sysclk K210_CLK_I2S0>;
clock-names = "i2sclk";
resets = <&sysrst K210_RST_I2S0>;
+ status = "disabled";
};
i2s1: i2s@50260000 {
@@ -277,6 +278,7 @@
clocks = <&sysclk K210_CLK_I2S1>;
clock-names = "i2sclk";
resets = <&sysrst K210_RST_I2S1>;
+ status = "disabled";
};
i2s2: i2s@50270000 {
@@ -286,6 +288,7 @@
clocks = <&sysclk K210_CLK_I2S2>;
clock-names = "i2sclk";
resets = <&sysrst K210_RST_I2S2>;
+ status = "disabled";
};
i2c0: i2c@50280000 {
@@ -296,6 +299,7 @@
<&sysclk K210_CLK_APB0>;
clock-names = "ref", "pclk";
resets = <&sysrst K210_RST_I2C0>;
+ status = "disabled";
};
i2c1: i2c@50290000 {
@@ -306,6 +310,7 @@
<&sysclk K210_CLK_APB0>;
clock-names = "ref", "pclk";
resets = <&sysrst K210_RST_I2C1>;
+ status = "disabled";
};
i2c2: i2c@502a0000 {
@@ -316,6 +321,7 @@
<&sysclk K210_CLK_APB0>;
clock-names = "ref", "pclk";
resets = <&sysrst K210_RST_I2C2>;
+ status = "disabled";
};
fpioa: pinmux@502b0000 {
@@ -464,6 +470,7 @@
reset-names = "spi";
num-cs = <4>;
reg-io-width = <4>;
+ status = "disabled";
};
spi1: spi@53000000 {
@@ -479,6 +486,7 @@
reset-names = "spi";
num-cs = <4>;
reg-io-width = <4>;
+ status = "disabled";
};
spi3: spi@54000000 {
@@ -495,6 +503,7 @@
num-cs = <4>;
reg-io-width = <4>;
+ status = "disabled";
};
};
};
diff --git a/arch/riscv/boot/dts/canaan/k210_generic.dts b/arch/riscv/boot/dts/canaan/k210_generic.dts
index 396c8ca4d24d..5734cc03753b 100644
--- a/arch/riscv/boot/dts/canaan/k210_generic.dts
+++ b/arch/riscv/boot/dts/canaan/k210_generic.dts
@@ -15,6 +15,10 @@
model = "Kendryte K210 generic";
compatible = "canaan,kendryte-k210";
+ aliases {
+ serial0 = &uarths0;
+ };
+
chosen {
bootargs = "earlycon console=ttySIF0";
stdout-path = "serial0:115200n8";
@@ -24,7 +28,6 @@
&fpioa {
pinctrl-0 = <&jtag_pins>;
pinctrl-names = "default";
- status = "okay";
jtag_pins: jtag-pinmux {
pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
index 6d25bf07481a..2ab376d609d2 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
@@ -17,6 +17,10 @@
compatible = "sipeed,maix-bit", "sipeed,maix-bitm",
"canaan,kendryte-k210";
+ aliases {
+ serial0 = &uarths0;
+ };
+
chosen {
bootargs = "earlycon console=ttySIF0";
stdout-path = "serial0:115200n8";
@@ -58,7 +62,6 @@
&fpioa {
pinctrl-names = "default";
pinctrl-0 = <&jtag_pinctrl>;
- status = "okay";
jtag_pinctrl: jtag-pinmux {
pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
@@ -156,6 +159,7 @@
#sound-dai-cells = <1>;
pinctrl-0 = <&i2s0_pinctrl>;
pinctrl-names = "default";
+ status = "okay";
};
&i2c1 {
@@ -170,6 +174,7 @@
pinctrl-names = "default";
num-cs = <1>;
cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+ status = "okay";
panel@0 {
compatible = "sitronix,st7789v";
@@ -199,6 +204,8 @@
};
&spi3 {
+ status = "okay";
+
flash@0 {
compatible = "jedec,spi-nor";
reg = <0>;
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
index f4f4d8d5e8b8..d98e20775c07 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
@@ -17,6 +17,10 @@
compatible = "sipeed,maix-dock-m1", "sipeed,maix-dock-m1w",
"canaan,kendryte-k210";
+ aliases {
+ serial0 = &uarths0;
+ };
+
chosen {
bootargs = "earlycon console=ttySIF0";
stdout-path = "serial0:115200n8";
@@ -63,7 +67,6 @@
&fpioa {
pinctrl-0 = <&jtag_pinctrl>;
pinctrl-names = "default";
- status = "okay";
jtag_pinctrl: jtag-pinmux {
pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
@@ -159,6 +162,7 @@
#sound-dai-cells = <1>;
pinctrl-0 = <&i2s0_pinctrl>;
pinctrl-names = "default";
+ status = "okay";
};
&i2c1 {
@@ -173,6 +177,7 @@
pinctrl-names = "default";
num-cs = <1>;
cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+ status = "okay";
panel@0 {
compatible = "sitronix,st7789v";
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
index 0d86df47e1ed..79ecd549700a 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
@@ -16,6 +16,10 @@
model = "SiPeed MAIX GO";
compatible = "sipeed,maix-go", "canaan,kendryte-k210";
+ aliases {
+ serial0 = &uarths0;
+ };
+
chosen {
bootargs = "earlycon console=ttySIF0";
stdout-path = "serial0:115200n8";
@@ -69,7 +73,6 @@
&fpioa {
pinctrl-0 = <&jtag_pinctrl>;
pinctrl-names = "default";
- status = "okay";
jtag_pinctrl: jtag-pinmux {
pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
@@ -167,6 +170,7 @@
#sound-dai-cells = <1>;
pinctrl-0 = <&i2s0_pinctrl>;
pinctrl-names = "default";
+ status = "okay";
};
&i2c1 {
@@ -181,6 +185,7 @@
pinctrl-names = "default";
num-cs = <1>;
cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+ status = "okay";
panel@0 {
compatible = "sitronix,st7789v";
@@ -209,6 +214,8 @@
};
&spi3 {
+ status = "okay";
+
flash@0 {
compatible = "jedec,spi-nor";
reg = <0>;
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
index 5c05c498e2b8..019c03ae51f6 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
@@ -15,6 +15,10 @@
model = "SiPeed MAIXDUINO";
compatible = "sipeed,maixduino", "canaan,kendryte-k210";
+ aliases {
+ serial0 = &uarths0;
+ };
+
chosen {
bootargs = "earlycon console=ttySIF0";
stdout-path = "serial0:115200n8";
@@ -39,8 +43,6 @@
};
&fpioa {
- status = "okay";
-
uarths_pinctrl: uarths-pinmux {
pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>, /* Header "0" */
<K210_FPIOA(5, K210_PCF_UARTHS_TX)>; /* Header "1" */
@@ -132,6 +134,7 @@
#sound-dai-cells = <1>;
pinctrl-0 = <&i2s0_pinctrl>;
pinctrl-names = "default";
+ status = "okay";
};
&i2c1 {
@@ -146,6 +149,7 @@
pinctrl-names = "default";
num-cs = <1>;
cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+ status = "okay";
panel@0 {
compatible = "sitronix,st7789v";
@@ -174,6 +178,8 @@
};
&spi3 {
+ status = "okay";
+
flash@0 {
compatible = "jedec,spi-nor";
reg = <0>;
diff --git a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi
index 8ff6ea64f048..68d16717db8c 100644
--- a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi
@@ -244,7 +244,7 @@
regulator-boot-on;
regulator-always-on;
regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
regulator-name = "emmc_vdd";
};
};
diff --git a/arch/riscv/include/asm/insn.h b/arch/riscv/include/asm/insn.h
index 06e439eeef9a..09fde95a5e8f 100644
--- a/arch/riscv/include/asm/insn.h
+++ b/arch/riscv/include/asm/insn.h
@@ -145,7 +145,7 @@
/* parts of opcode for RVF, RVD and RVQ */
#define RVFDQ_FL_FS_WIDTH_OFF 12
-#define RVFDQ_FL_FS_WIDTH_MASK GENMASK(3, 0)
+#define RVFDQ_FL_FS_WIDTH_MASK GENMASK(2, 0)
#define RVFDQ_FL_FS_WIDTH_W 2
#define RVFDQ_FL_FS_WIDTH_D 3
#define RVFDQ_LS_FS_WIDTH_Q 4
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index 87cbd86576b2..4b95c574fd04 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -120,9 +120,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
out = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
mutex_unlock(&text_mutex);
- if (!mod)
- local_flush_icache_range(rec->ip, rec->ip + MCOUNT_INSN_SIZE);
-
return out;
}
@@ -156,9 +153,9 @@ static int __ftrace_modify_code(void *data)
} else {
while (atomic_read(&param->cpu_count) <= num_online_cpus())
cpu_relax();
- }
- local_flush_icache_all();
+ local_flush_icache_all();
+ }
return 0;
}
diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c
index ed9cad20c039..3c830a6f7ef4 100644
--- a/arch/riscv/kernel/machine_kexec.c
+++ b/arch/riscv/kernel/machine_kexec.c
@@ -121,20 +121,12 @@ static void machine_kexec_mask_interrupts(void)
for_each_irq_desc(i, desc) {
struct irq_chip *chip;
- int ret;
chip = irq_desc_get_chip(desc);
if (!chip)
continue;
- /*
- * First try to remove the active state. If this
- * fails, try to EOI the interrupt.
- */
- ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
-
- if (ret && irqd_irq_inprogress(&desc->irq_data) &&
- chip->irq_eoi)
+ if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
chip->irq_eoi(&desc->irq_data);
if (chip->irq_mask)
diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
index 4007563fb607..ab03732d06c4 100644
--- a/arch/riscv/kernel/patch.c
+++ b/arch/riscv/kernel/patch.c
@@ -89,6 +89,14 @@ static int __patch_insn_set(void *addr, u8 c, size_t len)
memset(waddr, c, len);
+ /*
+ * We could have just patched a function that is about to be
+ * called so make sure we don't execute partially patched
+ * instructions by flushing the icache as soon as possible.
+ */
+ local_flush_icache_range((unsigned long)waddr,
+ (unsigned long)waddr + len);
+
patch_unmap(FIX_TEXT_POKE0);
if (across_pages)
@@ -135,6 +143,14 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len)
ret = copy_to_kernel_nofault(waddr, insn, len);
+ /*
+ * We could have just patched a function that is about to be
+ * called so make sure we don't execute partially patched
+ * instructions by flushing the icache as soon as possible.
+ */
+ local_flush_icache_range((unsigned long)waddr,
+ (unsigned long)waddr + len);
+
patch_unmap(FIX_TEXT_POKE0);
if (across_pages)
@@ -189,9 +205,6 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len)
ret = patch_insn_set(tp, c, len);
- if (!ret)
- flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len);
-
return ret;
}
NOKPROBE_SYMBOL(patch_text_set_nosync);
@@ -224,9 +237,6 @@ int patch_text_nosync(void *addr, const void *insns, size_t len)
ret = patch_insn_write(tp, insns, len);
- if (!ret)
- flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
-
return ret;
}
NOKPROBE_SYMBOL(patch_text_nosync);
@@ -253,9 +263,9 @@ static int patch_text_cb(void *data)
} else {
while (atomic_read(&patch->cpu_count) <= num_online_cpus())
cpu_relax();
- }
- local_flush_icache_all();
+ local_flush_icache_all();
+ }
return ret;
}
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index 528ec7cc9a62..10e311b2759d 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -32,6 +32,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
bool (*fn)(void *, unsigned long), void *arg)
{
unsigned long fp, sp, pc;
+ int graph_idx = 0;
int level = 0;
if (regs) {
@@ -68,7 +69,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
pc = regs->ra;
} else {
fp = frame->fp;
- pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
+ pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra,
&frame->ra);
if (pc == (unsigned long)ret_from_exception) {
if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))
@@ -156,7 +157,7 @@ unsigned long __get_wchan(struct task_struct *task)
return pc;
}
-noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
struct task_struct *task, struct pt_regs *regs)
{
walk_stackframe(task, regs, consume_entry, cookie);
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index 64155323cc92..d77afe05578f 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -23,7 +23,7 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len,
#ifdef CONFIG_64BIT
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
- unsigned long, fd, off_t, offset)
+ unsigned long, fd, unsigned long, offset)
{
return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 0);
}
@@ -32,7 +32,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
- unsigned long, fd, off_t, offset)
+ unsigned long, fd, unsigned long, offset)
{
/*
* Note that the shift for mmap2 is constant (12),
diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c
index 04db1f993c47..bcf41d6e0df0 100644
--- a/arch/riscv/kvm/vcpu_pmu.c
+++ b/arch/riscv/kvm/vcpu_pmu.c
@@ -327,7 +327,7 @@ static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_att
event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
if (IS_ERR(event)) {
- pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
+ pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
return PTR_ERR(event);
}
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index d5cebb0b0afe..0795efdd3519 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -15,7 +15,10 @@
#include <asm/percpu.h>
#include "bpf_jit.h"
+#define RV_MAX_REG_ARGS 8
#define RV_FENTRY_NINSNS 2
+/* imm that allows emit_imm to emit max count insns */
+#define RV_MAX_COUNT_IMM 0x7FFF7FF7FF7FF7FF
#define RV_REG_TCC RV_REG_A6
#define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */
@@ -690,26 +693,45 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
return ret;
}
-static void store_args(int nregs, int args_off, struct rv_jit_context *ctx)
+static void store_args(int nr_arg_slots, int args_off, struct rv_jit_context *ctx)
{
int i;
- for (i = 0; i < nregs; i++) {
- emit_sd(RV_REG_FP, -args_off, RV_REG_A0 + i, ctx);
+ for (i = 0; i < nr_arg_slots; i++) {
+ if (i < RV_MAX_REG_ARGS) {
+ emit_sd(RV_REG_FP, -args_off, RV_REG_A0 + i, ctx);
+ } else {
+ /* skip slots for T0 and FP of traced function */
+ emit_ld(RV_REG_T1, 16 + (i - RV_MAX_REG_ARGS) * 8, RV_REG_FP, ctx);
+ emit_sd(RV_REG_FP, -args_off, RV_REG_T1, ctx);
+ }
args_off -= 8;
}
}
-static void restore_args(int nregs, int args_off, struct rv_jit_context *ctx)
+static void restore_args(int nr_reg_args, int args_off, struct rv_jit_context *ctx)
{
int i;
- for (i = 0; i < nregs; i++) {
+ for (i = 0; i < nr_reg_args; i++) {
emit_ld(RV_REG_A0 + i, -args_off, RV_REG_FP, ctx);
args_off -= 8;
}
}
+static void restore_stack_args(int nr_stack_args, int args_off, int stk_arg_off,
+ struct rv_jit_context *ctx)
+{
+ int i;
+
+ for (i = 0; i < nr_stack_args; i++) {
+ emit_ld(RV_REG_T1, -(args_off - RV_MAX_REG_ARGS * 8), RV_REG_FP, ctx);
+ emit_sd(RV_REG_FP, -stk_arg_off, RV_REG_T1, ctx);
+ args_off -= 8;
+ stk_arg_off -= 8;
+ }
+}
+
static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_off,
int run_ctx_off, bool save_ret, struct rv_jit_context *ctx)
{
@@ -782,8 +804,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
{
int i, ret, offset;
int *branches_off = NULL;
- int stack_size = 0, nregs = m->nr_args;
- int retval_off, args_off, nregs_off, ip_off, run_ctx_off, sreg_off;
+ int stack_size = 0, nr_arg_slots = 0;
+ int retval_off, args_off, nregs_off, ip_off, run_ctx_off, sreg_off, stk_arg_off;
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
@@ -829,20 +851,21 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
* FP - sreg_off [ callee saved reg ]
*
* [ pads ] pads for 16 bytes alignment
+ *
+ * [ stack_argN ]
+ * [ ... ]
+ * FP - stk_arg_off [ stack_arg1 ] BPF_TRAMP_F_CALL_ORIG
*/
if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY))
return -ENOTSUPP;
- /* extra regiters for struct arguments */
- for (i = 0; i < m->nr_args; i++)
- if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
- nregs += round_up(m->arg_size[i], 8) / 8 - 1;
-
- /* 8 arguments passed by registers */
- if (nregs > 8)
+ if (m->nr_args > MAX_BPF_FUNC_ARGS)
return -ENOTSUPP;
+ for (i = 0; i < m->nr_args; i++)
+ nr_arg_slots += round_up(m->arg_size[i], 8) / 8;
+
/* room of trampoline frame to store return address and frame pointer */
stack_size += 16;
@@ -852,7 +875,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
retval_off = stack_size;
}
- stack_size += nregs * 8;
+ stack_size += nr_arg_slots * 8;
args_off = stack_size;
stack_size += 8;
@@ -869,8 +892,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
stack_size += 8;
sreg_off = stack_size;
+ if ((flags & BPF_TRAMP_F_CALL_ORIG) && (nr_arg_slots - RV_MAX_REG_ARGS > 0))
+ stack_size += (nr_arg_slots - RV_MAX_REG_ARGS) * 8;
+
stack_size = round_up(stack_size, STACK_ALIGN);
+ /* room for args on stack must be at the top of stack */
+ stk_arg_off = stack_size;
+
if (!is_struct_ops) {
/* For the trampoline called from function entry,
* the frame of traced function and the frame of
@@ -906,17 +935,17 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
emit_sd(RV_REG_FP, -ip_off, RV_REG_T1, ctx);
}
- emit_li(RV_REG_T1, nregs, ctx);
+ emit_li(RV_REG_T1, nr_arg_slots, ctx);
emit_sd(RV_REG_FP, -nregs_off, RV_REG_T1, ctx);
- store_args(nregs, args_off, ctx);
+ store_args(nr_arg_slots, args_off, ctx);
/* skip to actual body of traced function */
if (flags & BPF_TRAMP_F_SKIP_FRAME)
orig_call += RV_FENTRY_NINSNS * 4;
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- emit_imm(RV_REG_A0, (const s64)im, ctx);
+ emit_imm(RV_REG_A0, ctx->insns ? (const s64)im : RV_MAX_COUNT_IMM, ctx);
ret = emit_call((const u64)__bpf_tramp_enter, true, ctx);
if (ret)
return ret;
@@ -949,13 +978,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
}
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- restore_args(nregs, args_off, ctx);
+ restore_args(min_t(int, nr_arg_slots, RV_MAX_REG_ARGS), args_off, ctx);
+ restore_stack_args(nr_arg_slots - RV_MAX_REG_ARGS, args_off, stk_arg_off, ctx);
ret = emit_call((const u64)orig_call, true, ctx);
if (ret)
goto out;
emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx);
emit_sd(RV_REG_FP, -(retval_off - 8), regmap[BPF_REG_0], ctx);
- im->ip_after_call = ctx->insns + ctx->ninsns;
+ im->ip_after_call = ctx->ro_insns + ctx->ninsns;
/* 2 nops reserved for auipc+jalr pair */
emit(rv_nop(), ctx);
emit(rv_nop(), ctx);
@@ -976,15 +1006,15 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
}
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- im->ip_epilogue = ctx->insns + ctx->ninsns;
- emit_imm(RV_REG_A0, (const s64)im, ctx);
+ im->ip_epilogue = ctx->ro_insns + ctx->ninsns;
+ emit_imm(RV_REG_A0, ctx->insns ? (const s64)im : RV_MAX_COUNT_IMM, ctx);
ret = emit_call((const u64)__bpf_tramp_exit, true, ctx);
if (ret)
goto out;
}
if (flags & BPF_TRAMP_F_RESTORE_REGS)
- restore_args(nregs, args_off, ctx);
+ restore_args(min_t(int, nr_arg_slots, RV_MAX_REG_ARGS), args_off, ctx);
if (save_ret) {
emit_ld(RV_REG_A0, -retval_off, RV_REG_FP, ctx);
@@ -1039,31 +1069,52 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
return ret < 0 ? ret : ninsns_rvoff(ctx.ninsns);
}
-int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
- void *image_end, const struct btf_func_model *m,
+void *arch_alloc_bpf_trampoline(unsigned int size)
+{
+ return bpf_prog_pack_alloc(size, bpf_fill_ill_insns);
+}
+
+void arch_free_bpf_trampoline(void *image, unsigned int size)
+{
+ bpf_prog_pack_free(image, size);
+}
+
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
+ void *ro_image_end, const struct btf_func_model *m,
u32 flags, struct bpf_tramp_links *tlinks,
void *func_addr)
{
int ret;
+ void *image, *res;
struct rv_jit_context ctx;
+ u32 size = ro_image_end - ro_image;
+
+ image = kvmalloc(size, GFP_KERNEL);
+ if (!image)
+ return -ENOMEM;
ctx.ninsns = 0;
- /*
- * The bpf_int_jit_compile() uses a RW buffer (ctx.insns) to write the
- * JITed instructions and later copies it to a RX region (ctx.ro_insns).
- * It also uses ctx.ro_insns to calculate offsets for jumps etc. As the
- * trampoline image uses the same memory area for writing and execution,
- * both ctx.insns and ctx.ro_insns can be set to image.
- */
ctx.insns = image;
- ctx.ro_insns = image;
+ ctx.ro_insns = ro_image;
ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
if (ret < 0)
- return ret;
+ goto out;
- bpf_flush_icache(ctx.insns, ctx.insns + ctx.ninsns);
+ if (WARN_ON(size < ninsns_rvoff(ctx.ninsns))) {
+ ret = -E2BIG;
+ goto out;
+ }
- return ninsns_rvoff(ret);
+ res = bpf_arch_text_copy(ro_image, image, size);
+ if (IS_ERR(res)) {
+ ret = PTR_ERR(res);
+ goto out;
+ }
+
+ bpf_flush_icache(ro_image, ro_image_end);
+out:
+ kvfree(image);
+ return ret < 0 ? ret : size;
}
int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index 0a96abdaca65..6de753c667f4 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -178,8 +178,7 @@ skip_init_ctx:
prog->jited_len = prog_size - cfi_get_offset();
if (!prog->is_func || extra_pass) {
- if (WARN_ON(bpf_jit_binary_pack_finalize(prog, jit_data->ro_header,
- jit_data->header))) {
+ if (WARN_ON(bpf_jit_binary_pack_finalize(jit_data->ro_header, jit_data->header))) {
/* ro_header has been freed */
jit_data->ro_header = NULL;
prog = orig_prog;
@@ -258,7 +257,7 @@ void bpf_jit_free(struct bpf_prog *prog)
* before freeing it.
*/
if (jit_data) {
- bpf_jit_binary_pack_finalize(prog, jit_data->ro_header, jit_data->header);
+ bpf_jit_binary_pack_finalize(jit_data->ro_header, jit_data->header);
kfree(jit_data);
}
hdr = bpf_jit_binary_pack_hdr(prog);
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 48ef5fe5c08a..5a36d5538dae 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -170,11 +170,14 @@ static void kaslr_adjust_got(unsigned long offset)
u64 *entry;
/*
- * Even without -fPIE, Clang still uses a global offset table for some
- * reason. Adjust the GOT entries.
+ * Adjust GOT entries, except for ones for undefined weak symbols
+ * that resolved to zero. This also skips the first three reserved
+ * entries on s390x that are zero.
*/
- for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++)
- *entry += offset - __START_KERNEL;
+ for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) {
+ if (*entry)
+ *entry += offset - __START_KERNEL;
+ }
}
/*
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 8c4adece8911..f3602414a961 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -601,17 +601,16 @@ CONFIG_WATCHDOG=y
CONFIG_WATCHDOG_NOWAYOUT=y
CONFIG_SOFT_WATCHDOG=m
CONFIG_DIAG288_WATCHDOG=m
+CONFIG_DRM=m
+CONFIG_DRM_VIRTIO_GPU=m
CONFIG_FB=y
# CONFIG_FB_DEVICE is not set
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_MLX4_INFINIBAND=m
CONFIG_MLX5_INFINIBAND=m
-CONFIG_SYNC_FILE=y
CONFIG_VFIO=m
CONFIG_VFIO_PCI=m
CONFIG_MLX5_VFIO_PCI=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 6dd11d3b6aaa..d0d8925fdf09 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -592,17 +592,16 @@ CONFIG_WATCHDOG_CORE=y
CONFIG_WATCHDOG_NOWAYOUT=y
CONFIG_SOFT_WATCHDOG=m
CONFIG_DIAG288_WATCHDOG=m
+CONFIG_DRM=m
+CONFIG_DRM_VIRTIO_GPU=m
CONFIG_FB=y
# CONFIG_FB_DEVICE is not set
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_MLX4_INFINIBAND=m
CONFIG_MLX5_INFINIBAND=m
-CONFIG_SYNC_FILE=y
CONFIG_VFIO=m
CONFIG_VFIO_PCI=m
CONFIG_MLX5_VFIO_PCI=m
diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h
index 7f5004065e8a..35555c944630 100644
--- a/arch/s390/include/asm/entry-common.h
+++ b/arch/s390/include/asm/entry-common.h
@@ -54,7 +54,7 @@ static __always_inline void arch_exit_to_user_mode(void)
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
unsigned long ti_work)
{
- choose_random_kstack_offset(get_tod_clock_fast() & 0xff);
+ choose_random_kstack_offset(get_tod_clock_fast());
}
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 95990461888f..9281063636a7 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -427,6 +427,7 @@ struct kvm_vcpu_stat {
u64 instruction_io_other;
u64 instruction_lpsw;
u64 instruction_lpswe;
+ u64 instruction_lpswey;
u64 instruction_pfmf;
u64 instruction_ptff;
u64 instruction_sck;
diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c
index dc2355c623d6..50cbcbbaa03d 100644
--- a/arch/s390/kernel/syscall.c
+++ b/arch/s390/kernel/syscall.c
@@ -38,33 +38,6 @@
#include "entry.h"
-/*
- * Perform the mmap() system call. Linux for S/390 isn't able to handle more
- * than 5 system call parameters, so this system call uses a memory block
- * for parameter passing.
- */
-
-struct s390_mmap_arg_struct {
- unsigned long addr;
- unsigned long len;
- unsigned long prot;
- unsigned long flags;
- unsigned long fd;
- unsigned long offset;
-};
-
-SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg)
-{
- struct s390_mmap_arg_struct a;
- int error = -EFAULT;
-
- if (copy_from_user(&a, arg, sizeof(a)))
- goto out;
- error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
-out:
- return error;
-}
-
#ifdef CONFIG_SYSVIPC
/*
* sys_ipc() is the de-multiplexer for the SysV IPC calls.
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index bd0fee24ad10..01071182763e 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -418,7 +418,7 @@
412 32 utimensat_time64 - sys_utimensat
413 32 pselect6_time64 - compat_sys_pselect6_time64
414 32 ppoll_time64 - compat_sys_ppoll_time64
-416 32 io_pgetevents_time64 - sys_io_pgetevents
+416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64
417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64
418 32 mq_timedsend_time64 - sys_mq_timedsend
419 32 mq_timedreceive_time64 - sys_mq_timedreceive
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 82e9631cd9ef..54b5b2565df8 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -132,6 +132,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, instruction_io_other),
STATS_DESC_COUNTER(VCPU, instruction_lpsw),
STATS_DESC_COUNTER(VCPU, instruction_lpswe),
+ STATS_DESC_COUNTER(VCPU, instruction_lpswey),
STATS_DESC_COUNTER(VCPU, instruction_pfmf),
STATS_DESC_COUNTER(VCPU, instruction_ptff),
STATS_DESC_COUNTER(VCPU, instruction_sck),
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 111eb5c74784..bf8534218af3 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -138,6 +138,21 @@ static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, u8 *ar)
return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
}
+static inline u64 kvm_s390_get_base_disp_siy(struct kvm_vcpu *vcpu, u8 *ar)
+{
+ u32 base1 = vcpu->arch.sie_block->ipb >> 28;
+ s64 disp1;
+
+ /* The displacement is a 20bit _SIGNED_ value */
+ disp1 = sign_extend64(((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
+ ((vcpu->arch.sie_block->ipb & 0xff00) << 4), 19);
+
+ if (ar)
+ *ar = base1;
+
+ return (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1;
+}
+
static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
u64 *address1, u64 *address2,
u8 *ar_b1, u8 *ar_b2)
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 1be19cc9d73c..1a49b89706f8 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -797,6 +797,36 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
return 0;
}
+static int handle_lpswey(struct kvm_vcpu *vcpu)
+{
+ psw_t new_psw;
+ u64 addr;
+ int rc;
+ u8 ar;
+
+ vcpu->stat.instruction_lpswey++;
+
+ if (!test_kvm_facility(vcpu->kvm, 193))
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
+ if (addr & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+
+ vcpu->arch.sie_block->gpsw = new_psw;
+ if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ return 0;
+}
+
static int handle_stidp(struct kvm_vcpu *vcpu)
{
u64 stidp_data = vcpu->kvm->arch.model.cpuid;
@@ -1462,6 +1492,8 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
case 0x61:
case 0x62:
return handle_ri(vcpu);
+ case 0x71:
+ return handle_lpswey(vcpu);
default:
return -EOPNOTSUPP;
}
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index abb629d7e131..7e3e767ab87d 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -55,6 +55,8 @@ unsigned long *crst_table_alloc(struct mm_struct *mm)
void crst_table_free(struct mm_struct *mm, unsigned long *table)
{
+ if (!table)
+ return;
pagetable_free(virt_to_ptdesc(table));
}
@@ -262,6 +264,8 @@ static unsigned long *base_crst_alloc(unsigned long val)
static void base_crst_free(unsigned long *table)
{
+ if (!table)
+ return;
pagetable_free(virt_to_ptdesc(table));
}
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 4be8f5cadd02..9d440a0b729e 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -31,11 +31,12 @@
#include <asm/nospec-branch.h>
#include <asm/set_memory.h>
#include <asm/text-patching.h>
+#include <asm/unwind.h>
#include "bpf_jit.h"
struct bpf_jit {
u32 seen; /* Flags to remember seen eBPF instructions */
- u32 seen_reg[16]; /* Array to remember which registers are used */
+ u16 seen_regs; /* Mask to remember which registers are used */
u32 *addrs; /* Array with relative instruction addresses */
u8 *prg_buf; /* Start of program */
int size; /* Size of program and literal pool */
@@ -53,6 +54,8 @@ struct bpf_jit {
int excnt; /* Number of exception table entries */
int prologue_plt_ret; /* Return address for prologue hotpatch PLT */
int prologue_plt; /* Start of prologue hotpatch PLT */
+ int kern_arena; /* Pool offset of kernel arena address */
+ u64 user_arena; /* User arena address */
};
#define SEEN_MEM BIT(0) /* use mem[] for temporary storage */
@@ -60,6 +63,8 @@ struct bpf_jit {
#define SEEN_FUNC BIT(2) /* calls C functions */
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM)
+#define NVREGS 0xffc0 /* %r6-%r15 */
+
/*
* s390 registers
*/
@@ -118,8 +123,8 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
{
u32 r1 = reg2hex[b1];
- if (r1 >= 6 && r1 <= 15 && !jit->seen_reg[r1])
- jit->seen_reg[r1] = 1;
+ if (r1 >= 6 && r1 <= 15)
+ jit->seen_regs |= (1 << r1);
}
#define REG_SET_SEEN(b1) \
@@ -127,8 +132,6 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
reg_set_seen(jit, b1); \
})
-#define REG_SEEN(b1) jit->seen_reg[reg2hex[(b1)]]
-
/*
* EMIT macros for code generation
*/
@@ -436,12 +439,12 @@ static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re, u32 stack_depth)
/*
* Return first seen register (from start)
*/
-static int get_start(struct bpf_jit *jit, int start)
+static int get_start(u16 seen_regs, int start)
{
int i;
for (i = start; i <= 15; i++) {
- if (jit->seen_reg[i])
+ if (seen_regs & (1 << i))
return i;
}
return 0;
@@ -450,15 +453,15 @@ static int get_start(struct bpf_jit *jit, int start)
/*
* Return last seen register (from start) (gap >= 2)
*/
-static int get_end(struct bpf_jit *jit, int start)
+static int get_end(u16 seen_regs, int start)
{
int i;
for (i = start; i < 15; i++) {
- if (!jit->seen_reg[i] && !jit->seen_reg[i + 1])
+ if (!(seen_regs & (3 << i)))
return i - 1;
}
- return jit->seen_reg[15] ? 15 : 14;
+ return (seen_regs & (1 << 15)) ? 15 : 14;
}
#define REGS_SAVE 1
@@ -467,8 +470,10 @@ static int get_end(struct bpf_jit *jit, int start)
* Save and restore clobbered registers (6-15) on stack.
* We save/restore registers in chunks with gap >= 2 registers.
*/
-static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
+static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth,
+ u16 extra_regs)
{
+ u16 seen_regs = jit->seen_regs | extra_regs;
const int last = 15, save_restore_size = 6;
int re = 6, rs;
@@ -482,10 +487,10 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
}
do {
- rs = get_start(jit, re);
+ rs = get_start(seen_regs, re);
if (!rs)
break;
- re = get_end(jit, rs + 1);
+ re = get_end(seen_regs, rs + 1);
if (op == REGS_SAVE)
save_regs(jit, rs, re);
else
@@ -570,8 +575,21 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
}
/* Tail calls have to skip above initialization */
jit->tail_call_start = jit->prg;
- /* Save registers */
- save_restore_regs(jit, REGS_SAVE, stack_depth);
+ if (fp->aux->exception_cb) {
+ /*
+ * Switch stack, the new address is in the 2nd parameter.
+ *
+ * Arrange the restoration of %r6-%r15 in the epilogue.
+ * Do not restore them now, the prog does not need them.
+ */
+ /* lgr %r15,%r3 */
+ EMIT4(0xb9040000, REG_15, REG_3);
+ jit->seen_regs |= NVREGS;
+ } else {
+ /* Save registers */
+ save_restore_regs(jit, REGS_SAVE, stack_depth,
+ fp->aux->exception_boundary ? NVREGS : 0);
+ }
/* Setup literal pool */
if (is_first_pass(jit) || (jit->seen & SEEN_LITERAL)) {
if (!is_first_pass(jit) &&
@@ -647,7 +665,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
/* Load exit code: lgr %r2,%b0 */
EMIT4(0xb9040000, REG_2, BPF_REG_0);
/* Restore registers */
- save_restore_regs(jit, REGS_RESTORE, stack_depth);
+ save_restore_regs(jit, REGS_RESTORE, stack_depth, 0);
if (nospec_uses_trampoline()) {
jit->r14_thunk_ip = jit->prg;
/* Generate __s390_indirect_jump_r14 thunk */
@@ -667,50 +685,111 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
jit->prg += sizeof(struct bpf_plt);
}
-static int get_probe_mem_regno(const u8 *insn)
-{
- /*
- * insn must point to llgc, llgh, llgf, lg, lgb, lgh or lgf, which have
- * destination register at the same position.
- */
- if (insn[0] != 0xe3) /* common prefix */
- return -1;
- if (insn[5] != 0x90 && /* llgc */
- insn[5] != 0x91 && /* llgh */
- insn[5] != 0x16 && /* llgf */
- insn[5] != 0x04 && /* lg */
- insn[5] != 0x77 && /* lgb */
- insn[5] != 0x15 && /* lgh */
- insn[5] != 0x14) /* lgf */
- return -1;
- return insn[1] >> 4;
-}
-
bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
{
regs->psw.addr = extable_fixup(x);
- regs->gprs[x->data] = 0;
+ if (x->data != -1)
+ regs->gprs[x->data] = 0;
return true;
}
-static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp,
- int probe_prg, int nop_prg)
+/*
+ * A single BPF probe instruction
+ */
+struct bpf_jit_probe {
+ int prg; /* JITed instruction offset */
+ int nop_prg; /* JITed nop offset */
+ int reg; /* Register to clear on exception */
+ int arena_reg; /* Register to use for arena addressing */
+};
+
+static void bpf_jit_probe_init(struct bpf_jit_probe *probe)
+{
+ probe->prg = -1;
+ probe->nop_prg = -1;
+ probe->reg = -1;
+ probe->arena_reg = REG_0;
+}
+
+/*
+ * Handlers of certain exceptions leave psw.addr pointing to the instruction
+ * directly after the failing one. Therefore, create two exception table
+ * entries and also add a nop in case two probing instructions come directly
+ * after each other.
+ */
+static void bpf_jit_probe_emit_nop(struct bpf_jit *jit,
+ struct bpf_jit_probe *probe)
+{
+ if (probe->prg == -1 || probe->nop_prg != -1)
+ /* The probe is not armed or nop is already emitted. */
+ return;
+
+ probe->nop_prg = jit->prg;
+ /* bcr 0,%0 */
+ _EMIT2(0x0700);
+}
+
+static void bpf_jit_probe_load_pre(struct bpf_jit *jit, struct bpf_insn *insn,
+ struct bpf_jit_probe *probe)
+{
+ if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
+ BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
+ BPF_MODE(insn->code) != BPF_PROBE_MEM32)
+ return;
+
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
+ /* lgrl %r1,kern_arena */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W1, jit->kern_arena);
+ probe->arena_reg = REG_W1;
+ }
+ probe->prg = jit->prg;
+ probe->reg = reg2hex[insn->dst_reg];
+}
+
+static void bpf_jit_probe_store_pre(struct bpf_jit *jit, struct bpf_insn *insn,
+ struct bpf_jit_probe *probe)
+{
+ if (BPF_MODE(insn->code) != BPF_PROBE_MEM32)
+ return;
+
+ /* lgrl %r1,kern_arena */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W1, jit->kern_arena);
+ probe->arena_reg = REG_W1;
+ probe->prg = jit->prg;
+}
+
+static void bpf_jit_probe_atomic_pre(struct bpf_jit *jit,
+ struct bpf_insn *insn,
+ struct bpf_jit_probe *probe)
+{
+ if (BPF_MODE(insn->code) != BPF_PROBE_ATOMIC)
+ return;
+
+ /* lgrl %r1,kern_arena */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W1, jit->kern_arena);
+ /* agr %r1,%dst */
+ EMIT4(0xb9080000, REG_W1, insn->dst_reg);
+ probe->arena_reg = REG_W1;
+ probe->prg = jit->prg;
+}
+
+static int bpf_jit_probe_post(struct bpf_jit *jit, struct bpf_prog *fp,
+ struct bpf_jit_probe *probe)
{
struct exception_table_entry *ex;
- int reg, prg;
+ int i, prg;
s64 delta;
u8 *insn;
- int i;
+ if (probe->prg == -1)
+ /* The probe is not armed. */
+ return 0;
+ bpf_jit_probe_emit_nop(jit, probe);
if (!fp->aux->extable)
/* Do nothing during early JIT passes. */
return 0;
- insn = jit->prg_buf + probe_prg;
- reg = get_probe_mem_regno(insn);
- if (WARN_ON_ONCE(reg < 0))
- /* JIT bug - unexpected probe instruction. */
- return -1;
- if (WARN_ON_ONCE(probe_prg + insn_length(*insn) != nop_prg))
+ insn = jit->prg_buf + probe->prg;
+ if (WARN_ON_ONCE(probe->prg + insn_length(*insn) != probe->nop_prg))
/* JIT bug - gap between probe and nop instructions. */
return -1;
for (i = 0; i < 2; i++) {
@@ -719,23 +798,24 @@ static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp,
return -1;
ex = &fp->aux->extable[jit->excnt];
/* Add extable entries for probe and nop instructions. */
- prg = i == 0 ? probe_prg : nop_prg;
+ prg = i == 0 ? probe->prg : probe->nop_prg;
delta = jit->prg_buf + prg - (u8 *)&ex->insn;
if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
/* JIT bug - code and extable must be close. */
return -1;
ex->insn = delta;
/*
- * Always land on the nop. Note that extable infrastructure
- * ignores fixup field, it is handled by ex_handler_bpf().
+ * Land on the current instruction. Note that the extable
+ * infrastructure ignores the fixup field; it is handled by
+ * ex_handler_bpf().
*/
- delta = jit->prg_buf + nop_prg - (u8 *)&ex->fixup;
+ delta = jit->prg_buf + jit->prg - (u8 *)&ex->fixup;
if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
/* JIT bug - landing pad and extable must be close. */
return -1;
ex->fixup = delta;
ex->type = EX_TYPE_BPF;
- ex->data = reg;
+ ex->data = probe->reg;
jit->excnt++;
}
return 0;
@@ -782,19 +862,15 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
s32 branch_oc_off = insn->off;
u32 dst_reg = insn->dst_reg;
u32 src_reg = insn->src_reg;
+ struct bpf_jit_probe probe;
int last, insn_count = 1;
u32 *addrs = jit->addrs;
s32 imm = insn->imm;
s16 off = insn->off;
- int probe_prg = -1;
unsigned int mask;
- int nop_prg;
int err;
- if (BPF_CLASS(insn->code) == BPF_LDX &&
- (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
- BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
- probe_prg = jit->prg;
+ bpf_jit_probe_init(&probe);
switch (insn->code) {
/*
@@ -823,6 +899,22 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
}
break;
case BPF_ALU64 | BPF_MOV | BPF_X:
+ if (insn_is_cast_user(insn)) {
+ int patch_brc;
+
+ /* ltgr %dst,%src */
+ EMIT4(0xb9020000, dst_reg, src_reg);
+ /* brc 8,0f */
+ patch_brc = jit->prg;
+ EMIT4_PCREL_RIC(0xa7040000, 8, 0);
+ /* iihf %dst,user_arena>>32 */
+ EMIT6_IMM(0xc0080000, dst_reg, jit->user_arena >> 32);
+ /* 0: */
+ if (jit->prg_buf)
+ *(u16 *)(jit->prg_buf + patch_brc + 2) =
+ (jit->prg - patch_brc) >> 1;
+ break;
+ }
switch (insn->off) {
case 0: /* DST = SRC */
/* lgr %dst,%src */
@@ -1366,51 +1458,99 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
* BPF_ST(X)
*/
case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src_reg */
- /* stcy %src,off(%dst) */
- EMIT6_DISP_LH(0xe3000000, 0x0072, src_reg, dst_reg, REG_0, off);
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
+ bpf_jit_probe_store_pre(jit, insn, &probe);
+ /* stcy %src,off(%dst,%arena) */
+ EMIT6_DISP_LH(0xe3000000, 0x0072, src_reg, dst_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
break;
case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
- /* sthy %src,off(%dst) */
- EMIT6_DISP_LH(0xe3000000, 0x0070, src_reg, dst_reg, REG_0, off);
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
+ bpf_jit_probe_store_pre(jit, insn, &probe);
+ /* sthy %src,off(%dst,%arena) */
+ EMIT6_DISP_LH(0xe3000000, 0x0070, src_reg, dst_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
break;
case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
- /* sty %src,off(%dst) */
- EMIT6_DISP_LH(0xe3000000, 0x0050, src_reg, dst_reg, REG_0, off);
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
+ bpf_jit_probe_store_pre(jit, insn, &probe);
+ /* sty %src,off(%dst,%arena) */
+ EMIT6_DISP_LH(0xe3000000, 0x0050, src_reg, dst_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
break;
case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
- /* stg %src,off(%dst) */
- EMIT6_DISP_LH(0xe3000000, 0x0024, src_reg, dst_reg, REG_0, off);
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
+ bpf_jit_probe_store_pre(jit, insn, &probe);
+ /* stg %src,off(%dst,%arena) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, src_reg, dst_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
break;
case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
/* lhi %w0,imm */
EMIT4_IMM(0xa7080000, REG_W0, (u8) imm);
- /* stcy %w0,off(dst) */
- EMIT6_DISP_LH(0xe3000000, 0x0072, REG_W0, dst_reg, REG_0, off);
+ bpf_jit_probe_store_pre(jit, insn, &probe);
+ /* stcy %w0,off(%dst,%arena) */
+ EMIT6_DISP_LH(0xe3000000, 0x0072, REG_W0, dst_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
break;
case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
/* lhi %w0,imm */
EMIT4_IMM(0xa7080000, REG_W0, (u16) imm);
- /* sthy %w0,off(dst) */
- EMIT6_DISP_LH(0xe3000000, 0x0070, REG_W0, dst_reg, REG_0, off);
+ bpf_jit_probe_store_pre(jit, insn, &probe);
+ /* sthy %w0,off(%dst,%arena) */
+ EMIT6_DISP_LH(0xe3000000, 0x0070, REG_W0, dst_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
break;
case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
/* llilf %w0,imm */
EMIT6_IMM(0xc00f0000, REG_W0, (u32) imm);
- /* sty %w0,off(%dst) */
- EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, dst_reg, REG_0, off);
+ bpf_jit_probe_store_pre(jit, insn, &probe);
+ /* sty %w0,off(%dst,%arena) */
+ EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, dst_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
break;
case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
/* lgfi %w0,imm */
EMIT6_IMM(0xc0010000, REG_W0, imm);
- /* stg %w0,off(%dst) */
- EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, dst_reg, REG_0, off);
+ bpf_jit_probe_store_pre(jit, insn, &probe);
+ /* stg %w0,off(%dst,%arena) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, dst_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
break;
/*
@@ -1418,15 +1558,30 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
*/
case BPF_STX | BPF_ATOMIC | BPF_DW:
case BPF_STX | BPF_ATOMIC | BPF_W:
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
{
bool is32 = BPF_SIZE(insn->code) == BPF_W;
+ /*
+ * Unlike loads and stores, atomics have only a base register,
+ * but no index register. For the non-arena case, simply use
+ * %dst as a base. For the arena case, use the work register
+ * %r1: first, load the arena base into it, and then add %dst
+ * to it.
+ */
+ probe.arena_reg = dst_reg;
+
switch (insn->imm) {
-/* {op32|op64} {%w0|%src},%src,off(%dst) */
#define EMIT_ATOMIC(op32, op64) do { \
+ bpf_jit_probe_atomic_pre(jit, insn, &probe); \
+ /* {op32|op64} {%w0|%src},%src,off(%arena) */ \
EMIT6_DISP_LH(0xeb000000, is32 ? (op32) : (op64), \
(insn->imm & BPF_FETCH) ? src_reg : REG_W0, \
- src_reg, dst_reg, off); \
+ src_reg, probe.arena_reg, off); \
+ err = bpf_jit_probe_post(jit, fp, &probe); \
+ if (err < 0) \
+ return err; \
if (insn->imm & BPF_FETCH) { \
/* bcr 14,0 - see atomic_fetch_{add,and,or,xor}() */ \
_EMIT2(0x07e0); \
@@ -1455,25 +1610,50 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT_ATOMIC(0x00f7, 0x00e7);
break;
#undef EMIT_ATOMIC
- case BPF_XCHG:
- /* {ly|lg} %w0,off(%dst) */
+ case BPF_XCHG: {
+ struct bpf_jit_probe load_probe = probe;
+ int loop_start;
+
+ bpf_jit_probe_atomic_pre(jit, insn, &load_probe);
+ /* {ly|lg} %w0,off(%arena) */
EMIT6_DISP_LH(0xe3000000,
is32 ? 0x0058 : 0x0004, REG_W0, REG_0,
- dst_reg, off);
- /* 0: {csy|csg} %w0,%src,off(%dst) */
+ load_probe.arena_reg, off);
+ bpf_jit_probe_emit_nop(jit, &load_probe);
+ /* Reuse {ly|lg}'s arena_reg for {csy|csg}. */
+ if (load_probe.prg != -1) {
+ probe.prg = jit->prg;
+ probe.arena_reg = load_probe.arena_reg;
+ }
+ loop_start = jit->prg;
+ /* 0: {csy|csg} %w0,%src,off(%arena) */
EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030,
- REG_W0, src_reg, dst_reg, off);
+ REG_W0, src_reg, probe.arena_reg, off);
+ bpf_jit_probe_emit_nop(jit, &probe);
/* brc 4,0b */
- EMIT4_PCREL_RIC(0xa7040000, 4, jit->prg - 6);
+ EMIT4_PCREL_RIC(0xa7040000, 4, loop_start);
/* {llgfr|lgr} %src,%w0 */
EMIT4(is32 ? 0xb9160000 : 0xb9040000, src_reg, REG_W0);
+ /* Both probes should land here on exception. */
+ err = bpf_jit_probe_post(jit, fp, &load_probe);
+ if (err < 0)
+ return err;
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
if (is32 && insn_is_zext(&insn[1]))
insn_count = 2;
break;
+ }
case BPF_CMPXCHG:
- /* 0: {csy|csg} %b0,%src,off(%dst) */
+ bpf_jit_probe_atomic_pre(jit, insn, &probe);
+ /* 0: {csy|csg} %b0,%src,off(%arena) */
EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030,
- BPF_REG_0, src_reg, dst_reg, off);
+ BPF_REG_0, src_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
break;
default:
pr_err("Unknown atomic operation %02x\n", insn->imm);
@@ -1488,51 +1668,87 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
*/
case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEM | BPF_B:
- /* llgc %dst,0(off,%src) */
- EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off);
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
+ bpf_jit_probe_load_pre(jit, insn, &probe);
+ /* llgc %dst,off(%src,%arena) */
+ EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
case BPF_LDX | BPF_MEMSX | BPF_B: /* dst = *(s8 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
- /* lgb %dst,0(off,%src) */
+ bpf_jit_probe_load_pre(jit, insn, &probe);
+ /* lgb %dst,off(%src) */
EMIT6_DISP_LH(0xe3000000, 0x0077, dst_reg, src_reg, REG_0, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
break;
case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEM | BPF_H:
- /* llgh %dst,0(off,%src) */
- EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off);
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
+ bpf_jit_probe_load_pre(jit, insn, &probe);
+ /* llgh %dst,off(%src,%arena) */
+ EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
case BPF_LDX | BPF_MEMSX | BPF_H: /* dst = *(s16 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
- /* lgh %dst,0(off,%src) */
+ bpf_jit_probe_load_pre(jit, insn, &probe);
+ /* lgh %dst,off(%src) */
EMIT6_DISP_LH(0xe3000000, 0x0015, dst_reg, src_reg, REG_0, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
break;
case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEM | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
+ bpf_jit_probe_load_pre(jit, insn, &probe);
/* llgf %dst,off(%src) */
jit->seen |= SEEN_MEM;
- EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off);
+ EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
case BPF_LDX | BPF_MEMSX | BPF_W: /* dst = *(s32 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
+ bpf_jit_probe_load_pre(jit, insn, &probe);
/* lgf %dst,off(%src) */
jit->seen |= SEEN_MEM;
EMIT6_DISP_LH(0xe3000000, 0x0014, dst_reg, src_reg, REG_0, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
break;
case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
- /* lg %dst,0(off,%src) */
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
+ bpf_jit_probe_load_pre(jit, insn, &probe);
+ /* lg %dst,off(%src,%arena) */
jit->seen |= SEEN_MEM;
- EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off);
+ EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
break;
/*
* BPF_JMP / CALL
@@ -1647,7 +1863,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/*
* Restore registers before calling function
*/
- save_restore_regs(jit, REGS_RESTORE, stack_depth);
+ save_restore_regs(jit, REGS_RESTORE, stack_depth, 0);
/*
* goto *(prog->bpf_func + tail_call_start);
@@ -1897,22 +2113,6 @@ branch_oc:
return -1;
}
- if (probe_prg != -1) {
- /*
- * Handlers of certain exceptions leave psw.addr pointing to
- * the instruction directly after the failing one. Therefore,
- * create two exception table entries and also add a nop in
- * case two probing instructions come directly after each
- * other.
- */
- nop_prg = jit->prg;
- /* bcr 0,%0 */
- _EMIT2(0x0700);
- err = bpf_jit_probe_mem(jit, fp, probe_prg, nop_prg);
- if (err < 0)
- return err;
- }
-
return insn_count;
}
@@ -1958,12 +2158,18 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
bool extra_pass, u32 stack_depth)
{
int i, insn_count, lit32_size, lit64_size;
+ u64 kern_arena;
jit->lit32 = jit->lit32_start;
jit->lit64 = jit->lit64_start;
jit->prg = 0;
jit->excnt = 0;
+ kern_arena = bpf_arena_get_kern_vm_start(fp->aux->arena);
+ if (kern_arena)
+ jit->kern_arena = _EMIT_CONST_U64(kern_arena);
+ jit->user_arena = bpf_arena_get_user_vm_start(fp->aux->arena);
+
bpf_jit_prologue(jit, fp, stack_depth);
if (bpf_set_addr(jit, 0) < 0)
return -1;
@@ -2011,9 +2217,25 @@ static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit,
struct bpf_prog *fp)
{
struct bpf_binary_header *header;
+ struct bpf_insn *insn;
u32 extable_size;
u32 code_size;
+ int i;
+
+ for (i = 0; i < fp->len; i++) {
+ insn = &fp->insnsi[i];
+ if (BPF_CLASS(insn->code) == BPF_STX &&
+ BPF_MODE(insn->code) == BPF_PROBE_ATOMIC &&
+ (BPF_SIZE(insn->code) == BPF_DW ||
+ BPF_SIZE(insn->code) == BPF_W) &&
+ insn->imm == BPF_XCHG)
+ /*
+ * bpf_jit_insn() emits a load and a compare-and-swap,
+ * both of which need to be probed.
+ */
+ fp->aux->num_exentries += 1;
+ }
/* We need two entries per insn. */
fp->aux->num_exentries *= 2;
@@ -2689,3 +2911,52 @@ bool bpf_jit_supports_subprog_tailcalls(void)
{
return true;
}
+
+bool bpf_jit_supports_arena(void)
+{
+ return true;
+}
+
+bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
+{
+ /*
+ * Currently the verifier uses this function only to check which
+ * atomic stores to arena are supported, and they all are.
+ */
+ return true;
+}
+
+bool bpf_jit_supports_exceptions(void)
+{
+ /*
+ * Exceptions require unwinding support, which is always available,
+ * because the kernel is always built with backchain.
+ */
+ return true;
+}
+
+void arch_bpf_stack_walk(bool (*consume_fn)(void *, u64, u64, u64),
+ void *cookie)
+{
+ unsigned long addr, prev_addr = 0;
+ struct unwind_state state;
+
+ unwind_for_each_frame(&state, NULL, NULL, 0) {
+ addr = unwind_get_return_address(&state);
+ if (!addr)
+ break;
+ /*
+ * addr is a return address and state.sp is the value of %r15
+ * at this address. exception_cb needs %r15 at entry to the
+ * function containing addr, so take the next state.sp.
+ *
+ * There is no bp, and the exception_cb prog does not need one
+ * to perform a quasi-longjmp. The common code requires a
+ * non-zero bp, so pass sp there as well.
+ */
+ if (prev_addr && !consume_fn(cookie, prev_addr, state.sp,
+ state.sp))
+ break;
+ prev_addr = addr;
+ }
+}
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index ff8f24854c64..0ef83b6ac0db 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -410,7 +410,7 @@ static void __init cpu_enable_directed_irq(void *unused)
union zpci_sic_iib iib = {{0}};
union zpci_sic_iib ziib = {{0}};
- iib.cdiib.dibv_addr = (u64) zpci_ibv[smp_processor_id()]->vector;
+ iib.cdiib.dibv_addr = virt_to_phys(zpci_ibv[smp_processor_id()]->vector);
zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &ziib);
diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c
index 9dca568509a5..d6f4afcb0e87 100644
--- a/arch/sh/kernel/sys_sh32.c
+++ b/arch/sh/kernel/sys_sh32.c
@@ -59,3 +59,14 @@ asmlinkage int sys_fadvise64_64_wrapper(int fd, u32 offset0, u32 offset1,
(u64)len0 << 32 | len1, advice);
#endif
}
+
+/*
+ * swap the arguments the way that libc wants them instead of
+ * moving flags ahead of the 64-bit nbytes argument
+ */
+SYSCALL_DEFINE6(sh_sync_file_range6, int, fd, SC_ARG64(offset),
+ SC_ARG64(nbytes), unsigned int, flags)
+{
+ return ksys_sync_file_range(fd, SC_VAL64(loff_t, offset),
+ SC_VAL64(loff_t, nbytes), flags);
+}
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index bbf83a2db986..c55fd7696d40 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -321,7 +321,7 @@
311 common set_robust_list sys_set_robust_list
312 common get_robust_list sys_get_robust_list
313 common splice sys_splice
-314 common sync_file_range sys_sync_file_range
+314 common sync_file_range sys_sh_sync_file_range6
315 common tee sys_tee
316 common vmsplice sys_vmsplice
317 common move_pages sys_move_pages
@@ -395,6 +395,7 @@
385 common pkey_alloc sys_pkey_alloc
386 common pkey_free sys_pkey_free
387 common rseq sys_rseq
+388 common sync_file_range2 sys_sync_file_range2
# room for arch specific syscalls
393 common semget sys_semget
394 common semctl sys_semctl
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index a45f0f31fe51..a3d308f2043e 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -18,224 +18,3 @@ sys32_mmap2:
sethi %hi(sys_mmap), %g1
jmpl %g1 + %lo(sys_mmap), %g0
sllx %o5, 12, %o5
-
- .align 32
- .globl sys32_socketcall
-sys32_socketcall: /* %o0=call, %o1=args */
- cmp %o0, 1
- bl,pn %xcc, do_einval
- cmp %o0, 18
- bg,pn %xcc, do_einval
- sub %o0, 1, %o0
- sllx %o0, 5, %o0
- sethi %hi(__socketcall_table_begin), %g2
- or %g2, %lo(__socketcall_table_begin), %g2
- jmpl %g2 + %o0, %g0
- nop
-do_einval:
- retl
- mov -EINVAL, %o0
-
- .align 32
-__socketcall_table_begin:
-
- /* Each entry is exactly 32 bytes. */
-do_sys_socket: /* sys_socket(int, int, int) */
-1: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_socket), %g1
-2: ldswa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(sys_socket), %g0
-3: ldswa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_bind: /* sys_bind(int fd, struct sockaddr *, int) */
-4: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_bind), %g1
-5: ldswa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(sys_bind), %g0
-6: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_connect: /* sys_connect(int, struct sockaddr *, int) */
-7: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_connect), %g1
-8: ldswa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(sys_connect), %g0
-9: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_listen: /* sys_listen(int, int) */
-10: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_listen), %g1
- jmpl %g1 + %lo(sys_listen), %g0
-11: ldswa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
- nop
-do_sys_accept: /* sys_accept(int, struct sockaddr *, int *) */
-12: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_accept), %g1
-13: lduwa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(sys_accept), %g0
-14: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_getsockname: /* sys_getsockname(int, struct sockaddr *, int *) */
-15: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_getsockname), %g1
-16: lduwa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(sys_getsockname), %g0
-17: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_getpeername: /* sys_getpeername(int, struct sockaddr *, int *) */
-18: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_getpeername), %g1
-19: lduwa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(sys_getpeername), %g0
-20: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_socketpair: /* sys_socketpair(int, int, int, int *) */
-21: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_socketpair), %g1
-22: ldswa [%o1 + 0x8] %asi, %o2
-23: lduwa [%o1 + 0xc] %asi, %o3
- jmpl %g1 + %lo(sys_socketpair), %g0
-24: ldswa [%o1 + 0x4] %asi, %o1
- nop
- nop
-do_sys_send: /* sys_send(int, void *, size_t, unsigned int) */
-25: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_send), %g1
-26: lduwa [%o1 + 0x8] %asi, %o2
-27: lduwa [%o1 + 0xc] %asi, %o3
- jmpl %g1 + %lo(sys_send), %g0
-28: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
-do_sys_recv: /* sys_recv(int, void *, size_t, unsigned int) */
-29: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_recv), %g1
-30: lduwa [%o1 + 0x8] %asi, %o2
-31: lduwa [%o1 + 0xc] %asi, %o3
- jmpl %g1 + %lo(sys_recv), %g0
-32: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
-do_sys_sendto: /* sys_sendto(int, u32, compat_size_t, unsigned int, u32, int) */
-33: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_sendto), %g1
-34: lduwa [%o1 + 0x8] %asi, %o2
-35: lduwa [%o1 + 0xc] %asi, %o3
-36: lduwa [%o1 + 0x10] %asi, %o4
-37: ldswa [%o1 + 0x14] %asi, %o5
- jmpl %g1 + %lo(sys_sendto), %g0
-38: lduwa [%o1 + 0x4] %asi, %o1
-do_sys_recvfrom: /* sys_recvfrom(int, u32, compat_size_t, unsigned int, u32, u32) */
-39: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_recvfrom), %g1
-40: lduwa [%o1 + 0x8] %asi, %o2
-41: lduwa [%o1 + 0xc] %asi, %o3
-42: lduwa [%o1 + 0x10] %asi, %o4
-43: lduwa [%o1 + 0x14] %asi, %o5
- jmpl %g1 + %lo(sys_recvfrom), %g0
-44: lduwa [%o1 + 0x4] %asi, %o1
-do_sys_shutdown: /* sys_shutdown(int, int) */
-45: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_shutdown), %g1
- jmpl %g1 + %lo(sys_shutdown), %g0
-46: ldswa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
- nop
-do_sys_setsockopt: /* sys_setsockopt(int, int, int, char *, int) */
-47: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_setsockopt), %g1
-48: ldswa [%o1 + 0x8] %asi, %o2
-49: lduwa [%o1 + 0xc] %asi, %o3
-50: ldswa [%o1 + 0x10] %asi, %o4
- jmpl %g1 + %lo(sys_setsockopt), %g0
-51: ldswa [%o1 + 0x4] %asi, %o1
- nop
-do_sys_getsockopt: /* sys_getsockopt(int, int, int, u32, u32) */
-52: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_getsockopt), %g1
-53: ldswa [%o1 + 0x8] %asi, %o2
-54: lduwa [%o1 + 0xc] %asi, %o3
-55: lduwa [%o1 + 0x10] %asi, %o4
- jmpl %g1 + %lo(sys_getsockopt), %g0
-56: ldswa [%o1 + 0x4] %asi, %o1
- nop
-do_sys_sendmsg: /* compat_sys_sendmsg(int, struct compat_msghdr *, unsigned int) */
-57: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(compat_sys_sendmsg), %g1
-58: lduwa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(compat_sys_sendmsg), %g0
-59: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_recvmsg: /* compat_sys_recvmsg(int, struct compat_msghdr *, unsigned int) */
-60: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(compat_sys_recvmsg), %g1
-61: lduwa [%o1 + 0x8] %asi, %o2
- jmpl %g1 + %lo(compat_sys_recvmsg), %g0
-62: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
- nop
-do_sys_accept4: /* sys_accept4(int, struct sockaddr *, int *, int) */
-63: ldswa [%o1 + 0x0] %asi, %o0
- sethi %hi(sys_accept4), %g1
-64: lduwa [%o1 + 0x8] %asi, %o2
-65: ldswa [%o1 + 0xc] %asi, %o3
- jmpl %g1 + %lo(sys_accept4), %g0
-66: lduwa [%o1 + 0x4] %asi, %o1
- nop
- nop
-
- .section __ex_table,"a"
- .align 4
- .word 1b, __retl_efault, 2b, __retl_efault
- .word 3b, __retl_efault, 4b, __retl_efault
- .word 5b, __retl_efault, 6b, __retl_efault
- .word 7b, __retl_efault, 8b, __retl_efault
- .word 9b, __retl_efault, 10b, __retl_efault
- .word 11b, __retl_efault, 12b, __retl_efault
- .word 13b, __retl_efault, 14b, __retl_efault
- .word 15b, __retl_efault, 16b, __retl_efault
- .word 17b, __retl_efault, 18b, __retl_efault
- .word 19b, __retl_efault, 20b, __retl_efault
- .word 21b, __retl_efault, 22b, __retl_efault
- .word 23b, __retl_efault, 24b, __retl_efault
- .word 25b, __retl_efault, 26b, __retl_efault
- .word 27b, __retl_efault, 28b, __retl_efault
- .word 29b, __retl_efault, 30b, __retl_efault
- .word 31b, __retl_efault, 32b, __retl_efault
- .word 33b, __retl_efault, 34b, __retl_efault
- .word 35b, __retl_efault, 36b, __retl_efault
- .word 37b, __retl_efault, 38b, __retl_efault
- .word 39b, __retl_efault, 40b, __retl_efault
- .word 41b, __retl_efault, 42b, __retl_efault
- .word 43b, __retl_efault, 44b, __retl_efault
- .word 45b, __retl_efault, 46b, __retl_efault
- .word 47b, __retl_efault, 48b, __retl_efault
- .word 49b, __retl_efault, 50b, __retl_efault
- .word 51b, __retl_efault, 52b, __retl_efault
- .word 53b, __retl_efault, 54b, __retl_efault
- .word 55b, __retl_efault, 56b, __retl_efault
- .word 57b, __retl_efault, 58b, __retl_efault
- .word 59b, __retl_efault, 60b, __retl_efault
- .word 61b, __retl_efault, 62b, __retl_efault
- .word 63b, __retl_efault, 64b, __retl_efault
- .word 65b, __retl_efault, 66b, __retl_efault
- .previous
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index ac6c281ccfe0..cfdfb3707c16 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -117,7 +117,7 @@
90 common dup2 sys_dup2
91 32 setfsuid32 sys_setfsuid
92 common fcntl sys_fcntl compat_sys_fcntl
-93 common select sys_select
+93 common select sys_select compat_sys_select
94 32 setfsgid32 sys_setfsgid
95 common fsync sys_fsync
96 common setpriority sys_setpriority
@@ -155,7 +155,7 @@
123 32 fchown sys_fchown16
123 64 fchown sys_fchown
124 common fchmod sys_fchmod
-125 common recvfrom sys_recvfrom
+125 common recvfrom sys_recvfrom compat_sys_recvfrom
126 32 setreuid sys_setreuid16
126 64 setreuid sys_setreuid
127 32 setregid sys_setregid16
@@ -247,7 +247,7 @@
204 32 readdir sys_old_readdir compat_sys_old_readdir
204 64 readdir sys_nis_syscall
205 common readahead sys_readahead compat_sys_readahead
-206 common socketcall sys_socketcall sys32_socketcall
+206 common socketcall sys_socketcall compat_sys_socketcall
207 common syslog sys_syslog
208 common lookup_dcookie sys_ni_syscall
209 common fadvise64 sys_fadvise64 compat_sys_fadvise64
@@ -461,7 +461,7 @@
412 32 utimensat_time64 sys_utimensat sys_utimensat
413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
-416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents
+416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64
417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend
419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 7fd1f57ad3d3..d6ebcab1d8b2 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -420,7 +420,7 @@
412 i386 utimensat_time64 sys_utimensat
413 i386 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
414 i386 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
-416 i386 io_pgetevents_time64 sys_io_pgetevents
+416 i386 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64
417 i386 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
418 i386 mq_timedsend_time64 sys_mq_timedsend
419 i386 mq_timedreceive_time64 sys_mq_timedreceive
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index ed2797f132ce..62cef2113ca7 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -93,10 +93,9 @@ static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp,
\
asm volatile(ALTERNATIVE(_lock_loc \
"call cmpxchg8b_emu", \
- _lock "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \
- : [ptr] "+m" (*(_ptr)), \
- "+a" (o.low), "+d" (o.high) \
- : "b" (n.low), "c" (n.high), "S" (_ptr) \
+ _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
+ : "+a" (o.low), "+d" (o.high) \
+ : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \
: "memory"); \
\
o.full; \
@@ -122,12 +121,11 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64
\
asm volatile(ALTERNATIVE(_lock_loc \
"call cmpxchg8b_emu", \
- _lock "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \
+ _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
CC_SET(e) \
: CC_OUT(e) (ret), \
- [ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
- : "b" (n.low), "c" (n.high), "S" (_ptr) \
+ : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \
: "memory"); \
\
if (unlikely(!ret)) \
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 7e523bb3d2d3..fb2809b20b0a 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -73,19 +73,16 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
#endif
/*
- * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
- * but not enough for x86 stack utilization comfort. To keep
- * reasonable stack head room, reduce the maximum offset to 8 bits.
- *
- * The actual entropy will be further reduced by the compiler when
- * applying stack alignment constraints (see cc_stack_align4/8 in
+ * This value will get limited by KSTACK_OFFSET_MAX(), which is 10
+ * bits. The actual entropy will be further reduced by the compiler
+ * when applying stack alignment constraints (see cc_stack_align4/8 in
* arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32)
* low bits from any entropy chosen here.
*
- * Therefore, final stack offset entropy will be 5 (x86_64) or
- * 6 (ia32) bits.
+ * Therefore, final stack offset entropy will be 7 (x86_64) or
+ * 8 (ia32) bits.
*/
- choose_random_kstack_offset(rdtsc() & 0xFF);
+ choose_random_kstack_offset(rdtsc());
}
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index e42faa792c07..52e1f3f0b361 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -27,25 +27,7 @@
unsigned long profile_pc(struct pt_regs *regs)
{
- unsigned long pc = instruction_pointer(regs);
-
- if (!user_mode(regs) && in_lock_functions(pc)) {
-#ifdef CONFIG_FRAME_POINTER
- return *(unsigned long *)(regs->bp + sizeof(long));
-#else
- unsigned long *sp = (unsigned long *)regs->sp;
- /*
- * Return address is either directly at stack pointer
- * or above a saved flags. Eflags has bits 22-31 zero,
- * kernel addresses don't.
- */
- if (sp[0] >> 22)
- return sp[0];
- if (sp[1] >> 22)
- return sp[1];
-#endif
- }
- return pc;
+ return instruction_pointer(regs);
}
EXPORT_SYMBOL(profile_pc);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5159c7a22922..d25d81c8ecc0 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1234,13 +1234,11 @@ bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
}
static void detect_reg_usage(struct bpf_insn *insn, int insn_cnt,
- bool *regs_used, bool *tail_call_seen)
+ bool *regs_used)
{
int i;
for (i = 1; i <= insn_cnt; i++, insn++) {
- if (insn->code == (BPF_JMP | BPF_TAIL_CALL))
- *tail_call_seen = true;
if (insn->dst_reg == BPF_REG_6 || insn->src_reg == BPF_REG_6)
regs_used[0] = true;
if (insn->dst_reg == BPF_REG_7 || insn->src_reg == BPF_REG_7)
@@ -1324,7 +1322,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
struct bpf_insn *insn = bpf_prog->insnsi;
bool callee_regs_used[4] = {};
int insn_cnt = bpf_prog->len;
- bool tail_call_seen = false;
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
u64 arena_vm_start, user_vm_start;
@@ -1336,11 +1333,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena);
user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena);
- detect_reg_usage(insn, insn_cnt, callee_regs_used,
- &tail_call_seen);
-
- /* tail call's presence in current prog implies it is reachable */
- tail_call_reachable |= tail_call_seen;
+ detect_reg_usage(insn, insn_cnt, callee_regs_used);
emit_prologue(&prog, bpf_prog->aux->stack_depth,
bpf_prog_was_classic(bpf_prog), tail_call_reachable,
@@ -3363,7 +3356,7 @@ out_image:
*
* Both cases are serious bugs and justify WARN_ON.
*/
- if (WARN_ON(bpf_jit_binary_pack_finalize(prog, header, rw_header))) {
+ if (WARN_ON(bpf_jit_binary_pack_finalize(header, rw_header))) {
/* header has been freed */
header = NULL;
goto out_image;
@@ -3442,7 +3435,7 @@ void bpf_jit_free(struct bpf_prog *prog)
* before freeing it.
*/
if (jit_data) {
- bpf_jit_binary_pack_finalize(prog, jit_data->header,
+ bpf_jit_binary_pack_finalize(jit_data->header,
jit_data->rw_header);
kvfree(jit_data->addrs);
kfree(jit_data);
diff --git a/arch/xtensa/include/asm/current.h b/arch/xtensa/include/asm/current.h
index 08010dbf5e09..df275d554788 100644
--- a/arch/xtensa/include/asm/current.h
+++ b/arch/xtensa/include/asm/current.h
@@ -19,7 +19,7 @@
struct task_struct;
-static inline struct task_struct *get_current(void)
+static __always_inline struct task_struct *get_current(void)
{
return current_thread_info()->task;
}
diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h
index 326db1c1d5d8..e0dffcc43b9e 100644
--- a/arch/xtensa/include/asm/thread_info.h
+++ b/arch/xtensa/include/asm/thread_info.h
@@ -91,7 +91,7 @@ struct thread_info {
}
/* how to get the thread information struct from C */
-static inline struct thread_info *current_thread_info(void)
+static __always_inline struct thread_info *current_thread_info(void)
{
struct thread_info *ti;
__asm__("extui %0, a1, 0, "__stringify(CURRENT_SHIFT)"\n\t"
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index bd6a7857ce05..831fa4a12159 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -16,7 +16,6 @@
#include <linux/acpi.h>
#include <linux/dmi.h>
#include <linux/sched.h> /* need_resched() */
-#include <linux/sort.h>
#include <linux/tick.h>
#include <linux/cpuidle.h>
#include <linux/cpu.h>
@@ -386,25 +385,24 @@ static void acpi_processor_power_verify_c3(struct acpi_processor *pr,
acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
}
-static int acpi_cst_latency_cmp(const void *a, const void *b)
+static void acpi_cst_latency_sort(struct acpi_processor_cx *states, size_t length)
{
- const struct acpi_processor_cx *x = a, *y = b;
+ int i, j, k;
- if (!(x->valid && y->valid))
- return 0;
- if (x->latency > y->latency)
- return 1;
- if (x->latency < y->latency)
- return -1;
- return 0;
-}
-static void acpi_cst_latency_swap(void *a, void *b, int n)
-{
- struct acpi_processor_cx *x = a, *y = b;
+ for (i = 1; i < length; i++) {
+ if (!states[i].valid)
+ continue;
- if (!(x->valid && y->valid))
- return;
- swap(x->latency, y->latency);
+ for (j = i - 1, k = i; j >= 0; j--) {
+ if (!states[j].valid)
+ continue;
+
+ if (states[j].latency > states[k].latency)
+ swap(states[j].latency, states[k].latency);
+
+ k = j;
+ }
+ }
}
static int acpi_processor_power_verify(struct acpi_processor *pr)
@@ -449,10 +447,7 @@ static int acpi_processor_power_verify(struct acpi_processor *pr)
if (buggy_latency) {
pr_notice("FW issue: working around C-state latencies out of order\n");
- sort(&pr->power.states[1], max_cstate,
- sizeof(struct acpi_processor_cx),
- acpi_cst_latency_cmp,
- acpi_cst_latency_swap);
+ acpi_cst_latency_sort(&pr->power.states[1], max_cstate);
}
lapic_timer_propagate_broadcast(pr);
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 5eb38fbbbecd..fc6fd583faf8 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1975,8 +1975,10 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
n_ports = max(ahci_nr_ports(hpriv->cap), fls(hpriv->port_map));
host = ata_host_alloc_pinfo(&pdev->dev, ppi, n_ports);
- if (!host)
- return -ENOMEM;
+ if (!host) {
+ rc = -ENOMEM;
+ goto err_rm_sysfs_file;
+ }
host->private_data = hpriv;
if (ahci_init_msi(pdev, n_ports, hpriv) < 0) {
@@ -2031,11 +2033,11 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* initialize adapter */
rc = ahci_configure_dma_masks(pdev, hpriv);
if (rc)
- return rc;
+ goto err_rm_sysfs_file;
rc = ahci_pci_reset_controller(host);
if (rc)
- return rc;
+ goto err_rm_sysfs_file;
ahci_pci_init_controller(host);
ahci_pci_print_info(host);
@@ -2044,10 +2046,15 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
rc = ahci_host_activate(host, &ahci_sht);
if (rc)
- return rc;
+ goto err_rm_sysfs_file;
pm_runtime_put_noidle(&pdev->dev);
return 0;
+
+err_rm_sysfs_file:
+ sysfs_remove_file_from_group(&pdev->dev.kobj,
+ &dev_attr_remapped_nvme.attr, NULL);
+ return rc;
}
static void ahci_shutdown_one(struct pci_dev *pdev)
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index e1bf8a19b3c8..74b59b78d278 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4137,8 +4137,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
{ "PIONEER BD-RW BDR-205", NULL, ATA_HORKAGE_NOLPM },
/* Crucial devices with broken LPM support */
- { "CT500BX100SSD1", NULL, ATA_HORKAGE_NOLPM },
- { "CT240BX500SSD1", NULL, ATA_HORKAGE_NOLPM },
+ { "CT*0BX*00SSD1", NULL, ATA_HORKAGE_NOLPM },
/* 512GB MX100 with MU01 firmware has both queued TRIM and LPM issues */
{ "Crucial_CT512MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
@@ -5490,6 +5489,18 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
return ap;
}
+void ata_port_free(struct ata_port *ap)
+{
+ if (!ap)
+ return;
+
+ kfree(ap->pmp_link);
+ kfree(ap->slave_link);
+ kfree(ap->ncq_sense_buf);
+ kfree(ap);
+}
+EXPORT_SYMBOL_GPL(ata_port_free);
+
static void ata_devres_release(struct device *gendev, void *res)
{
struct ata_host *host = dev_get_drvdata(gendev);
@@ -5516,12 +5527,7 @@ static void ata_host_release(struct kref *kref)
int i;
for (i = 0; i < host->n_ports; i++) {
- struct ata_port *ap = host->ports[i];
-
- kfree(ap->pmp_link);
- kfree(ap->slave_link);
- kfree(ap->ncq_sense_buf);
- kfree(ap);
+ ata_port_free(host->ports[i]);
host->ports[i] = NULL;
}
kfree(host);
@@ -5571,8 +5577,10 @@ struct ata_host *ata_host_alloc(struct device *dev, int max_ports)
if (!host)
return NULL;
- if (!devres_open_group(dev, NULL, GFP_KERNEL))
- goto err_free;
+ if (!devres_open_group(dev, NULL, GFP_KERNEL)) {
+ kfree(host);
+ return NULL;
+ }
dr = devres_alloc(ata_devres_release, 0, GFP_KERNEL);
if (!dr)
@@ -5604,8 +5612,6 @@ struct ata_host *ata_host_alloc(struct device *dev, int max_ports)
err_out:
devres_release_group(dev, NULL);
- err_free:
- kfree(host);
return NULL;
}
EXPORT_SYMBOL_GPL(ata_host_alloc);
@@ -5904,7 +5910,7 @@ int ata_host_register(struct ata_host *host, const struct scsi_host_template *sh
* allocation time.
*/
for (i = host->n_ports; host->ports[i]; i++)
- kfree(host->ports[i]);
+ ata_port_free(host->ports[i]);
/* give ports names and add SCSI hosts */
for (i = 0; i < host->n_ports; i++) {
diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c
index 5b6805d87fcf..dd3c0626c72d 100644
--- a/drivers/bluetooth/btintel_pcie.c
+++ b/drivers/bluetooth/btintel_pcie.c
@@ -382,7 +382,7 @@ static int btintel_pcie_recv_frame(struct btintel_pcie_data *data,
/* The first 4 bytes indicates the Intel PCIe specific packet type */
pdata = skb_pull_data(skb, BTINTEL_PCIE_HCI_TYPE_LEN);
- if (!data) {
+ if (!pdata) {
bt_dev_err(hdev, "Corrupted packet received");
ret = -EILSEQ;
goto exit_error;
diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c
index 9d0c7e278114..9bfa9a6ad56c 100644
--- a/drivers/bluetooth/btnxpuart.c
+++ b/drivers/bluetooth/btnxpuart.c
@@ -281,7 +281,7 @@ static u8 crc8_table[CRC8_TABLE_SIZE];
/* Default configurations */
#define DEFAULT_H2C_WAKEUP_MODE WAKEUP_METHOD_BREAK
-#define DEFAULT_PS_MODE PS_MODE_DISABLE
+#define DEFAULT_PS_MODE PS_MODE_ENABLE
#define FW_INIT_BAUDRATE HCI_NXP_PRI_BAUDRATE
static struct sk_buff *nxp_drv_send_cmd(struct hci_dev *hdev, u16 opcode,
diff --git a/drivers/bluetooth/hci_bcm4377.c b/drivers/bluetooth/hci_bcm4377.c
index 0c2f15235b4c..d90858ea2fe5 100644
--- a/drivers/bluetooth/hci_bcm4377.c
+++ b/drivers/bluetooth/hci_bcm4377.c
@@ -495,6 +495,10 @@ struct bcm4377_data;
* extended scanning
* broken_mws_transport_config: Set to true if the chip erroneously claims to
* support MWS Transport Configuration
+ * broken_le_ext_adv_report_phy: Set to true if this chip stuffs flags inside
+ * reserved bits of Primary/Secondary_PHY inside
+ * LE Extended Advertising Report events which
+ * have to be ignored
* send_calibration: Optional callback to send calibration data
* send_ptb: Callback to send "PTB" regulatory/calibration data
*/
@@ -513,6 +517,7 @@ struct bcm4377_hw {
unsigned long broken_ext_scan : 1;
unsigned long broken_mws_transport_config : 1;
unsigned long broken_le_coded : 1;
+ unsigned long broken_le_ext_adv_report_phy : 1;
int (*send_calibration)(struct bcm4377_data *bcm4377);
int (*send_ptb)(struct bcm4377_data *bcm4377,
@@ -716,7 +721,7 @@ static void bcm4377_handle_ack(struct bcm4377_data *bcm4377,
ring->events[msgid] = NULL;
}
- bitmap_release_region(ring->msgids, msgid, ring->n_entries);
+ bitmap_release_region(ring->msgids, msgid, 0);
unlock:
spin_unlock_irqrestore(&ring->lock, flags);
@@ -2373,6 +2378,8 @@ static int bcm4377_probe(struct pci_dev *pdev, const struct pci_device_id *id)
set_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &hdev->quirks);
if (bcm4377->hw->broken_le_coded)
set_bit(HCI_QUIRK_BROKEN_LE_CODED, &hdev->quirks);
+ if (bcm4377->hw->broken_le_ext_adv_report_phy)
+ set_bit(HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY, &hdev->quirks);
pci_set_drvdata(pdev, bcm4377);
hci_set_drvdata(hdev, bcm4377);
@@ -2477,6 +2484,7 @@ static const struct bcm4377_hw bcm4377_hw_variants[] = {
.clear_pciecfg_subsystem_ctrl_bit19 = true,
.broken_mws_transport_config = true,
.broken_le_coded = true,
+ .broken_le_ext_adv_report_phy = true,
.send_calibration = bcm4387_send_calibration,
.send_ptb = bcm4378_send_ptb,
},
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 0c9c9ee56592..9a0bc86f9aac 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -2450,15 +2450,27 @@ static void qca_serdev_shutdown(struct device *dev)
struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev);
struct hci_uart *hu = &qcadev->serdev_hu;
struct hci_dev *hdev = hu->hdev;
- struct qca_data *qca = hu->priv;
const u8 ibs_wake_cmd[] = { 0xFD };
const u8 edl_reset_soc_cmd[] = { 0x01, 0x00, 0xFC, 0x01, 0x05 };
if (qcadev->btsoc_type == QCA_QCA6390) {
- if (test_bit(QCA_BT_OFF, &qca->flags) ||
- !test_bit(HCI_RUNNING, &hdev->flags))
+ /* The purpose of sending the VSC is to reset SOC into a initial
+ * state and the state will ensure next hdev->setup() success.
+ * if HCI_QUIRK_NON_PERSISTENT_SETUP is set, it means that
+ * hdev->setup() can do its job regardless of SoC state, so
+ * don't need to send the VSC.
+ * if HCI_SETUP is set, it means that hdev->setup() was never
+ * invoked and the SOC is already in the initial state, so
+ * don't also need to send the VSC.
+ */
+ if (test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks) ||
+ hci_dev_test_flag(hdev, HCI_SETUP))
return;
+ /* The serdev must be in open state when conrol logic arrives
+ * here, so also fix the use-after-free issue caused by that
+ * the serdev is flushed or wrote after it is closed.
+ */
serdev_device_write_flush(serdev);
ret = serdev_device_write_buf(serdev, ibs_wake_cmd,
sizeof(ibs_wake_cmd));
diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
index 4c695b0388f3..9bb142c75243 100644
--- a/drivers/char/tpm/Makefile
+++ b/drivers/char/tpm/Makefile
@@ -16,8 +16,8 @@ tpm-y += eventlog/common.o
tpm-y += eventlog/tpm1.o
tpm-y += eventlog/tpm2.o
tpm-y += tpm-buf.o
+tpm-y += tpm2-sessions.o
-tpm-$(CONFIG_TCG_TPM2_HMAC) += tpm2-sessions.o
tpm-$(CONFIG_ACPI) += tpm_ppi.o eventlog/acpi.o
tpm-$(CONFIG_EFI) += eventlog/efi.o
tpm-$(CONFIG_OF) += eventlog/of.o
diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c
index 907ac9956a78..2281d55df545 100644
--- a/drivers/char/tpm/tpm2-sessions.c
+++ b/drivers/char/tpm/tpm2-sessions.c
@@ -83,9 +83,6 @@
#define AES_KEY_BYTES AES_KEYSIZE_128
#define AES_KEY_BITS (AES_KEY_BYTES*8)
-static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy,
- u32 *handle, u8 *name);
-
/*
* This is the structure that carries all the auth information (like
* session handle, nonces, session key and auth) from use to use it is
@@ -148,6 +145,7 @@ struct tpm2_auth {
u8 name[AUTH_MAX_NAMES][2 + SHA512_DIGEST_SIZE];
};
+#ifdef CONFIG_TCG_TPM2_HMAC
/*
* Name Size based on TPM algorithm (assumes no hash bigger than 255)
*/
@@ -163,6 +161,226 @@ static u8 name_size(const u8 *name)
return size_map[alg] + 2;
}
+static int tpm2_parse_read_public(char *name, struct tpm_buf *buf)
+{
+ struct tpm_header *head = (struct tpm_header *)buf->data;
+ off_t offset = TPM_HEADER_SIZE;
+ u32 tot_len = be32_to_cpu(head->length);
+ u32 val;
+
+ /* we're starting after the header so adjust the length */
+ tot_len -= TPM_HEADER_SIZE;
+
+ /* skip public */
+ val = tpm_buf_read_u16(buf, &offset);
+ if (val > tot_len)
+ return -EINVAL;
+ offset += val;
+ /* name */
+ val = tpm_buf_read_u16(buf, &offset);
+ if (val != name_size(&buf->data[offset]))
+ return -EINVAL;
+ memcpy(name, &buf->data[offset], val);
+ /* forget the rest */
+ return 0;
+}
+
+static int tpm2_read_public(struct tpm_chip *chip, u32 handle, char *name)
+{
+ struct tpm_buf buf;
+ int rc;
+
+ rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_READ_PUBLIC);
+ if (rc)
+ return rc;
+
+ tpm_buf_append_u32(&buf, handle);
+ rc = tpm_transmit_cmd(chip, &buf, 0, "read public");
+ if (rc == TPM2_RC_SUCCESS)
+ rc = tpm2_parse_read_public(name, &buf);
+
+ tpm_buf_destroy(&buf);
+
+ return rc;
+}
+#endif /* CONFIG_TCG_TPM2_HMAC */
+
+/**
+ * tpm_buf_append_name() - add a handle area to the buffer
+ * @chip: the TPM chip structure
+ * @buf: The buffer to be appended
+ * @handle: The handle to be appended
+ * @name: The name of the handle (may be NULL)
+ *
+ * In order to compute session HMACs, we need to know the names of the
+ * objects pointed to by the handles. For most objects, this is simply
+ * the actual 4 byte handle or an empty buf (in these cases @name
+ * should be NULL) but for volatile objects, permanent objects and NV
+ * areas, the name is defined as the hash (according to the name
+ * algorithm which should be set to sha256) of the public area to
+ * which the two byte algorithm id has been appended. For these
+ * objects, the @name pointer should point to this. If a name is
+ * required but @name is NULL, then TPM2_ReadPublic() will be called
+ * on the handle to obtain the name.
+ *
+ * As with most tpm_buf operations, success is assumed because failure
+ * will be caused by an incorrect programming model and indicated by a
+ * kernel message.
+ */
+void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
+ u32 handle, u8 *name)
+{
+#ifdef CONFIG_TCG_TPM2_HMAC
+ enum tpm2_mso_type mso = tpm2_handle_mso(handle);
+ struct tpm2_auth *auth;
+ int slot;
+#endif
+
+ if (!tpm2_chip_auth(chip)) {
+ tpm_buf_append_u32(buf, handle);
+ /* count the number of handles in the upper bits of flags */
+ buf->handles++;
+ return;
+ }
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+ slot = (tpm_buf_length(buf) - TPM_HEADER_SIZE) / 4;
+ if (slot >= AUTH_MAX_NAMES) {
+ dev_err(&chip->dev, "TPM: too many handles\n");
+ return;
+ }
+ auth = chip->auth;
+ WARN(auth->session != tpm_buf_length(buf),
+ "name added in wrong place\n");
+ tpm_buf_append_u32(buf, handle);
+ auth->session += 4;
+
+ if (mso == TPM2_MSO_PERSISTENT ||
+ mso == TPM2_MSO_VOLATILE ||
+ mso == TPM2_MSO_NVRAM) {
+ if (!name)
+ tpm2_read_public(chip, handle, auth->name[slot]);
+ } else {
+ if (name)
+ dev_err(&chip->dev, "TPM: Handle does not require name but one is specified\n");
+ }
+
+ auth->name_h[slot] = handle;
+ if (name)
+ memcpy(auth->name[slot], name, name_size(name));
+#endif
+}
+EXPORT_SYMBOL_GPL(tpm_buf_append_name);
+
+/**
+ * tpm_buf_append_hmac_session() - Append a TPM session element
+ * @chip: the TPM chip structure
+ * @buf: The buffer to be appended
+ * @attributes: The session attributes
+ * @passphrase: The session authority (NULL if none)
+ * @passphrase_len: The length of the session authority (0 if none)
+ *
+ * This fills in a session structure in the TPM command buffer, except
+ * for the HMAC which cannot be computed until the command buffer is
+ * complete. The type of session is controlled by the @attributes,
+ * the main ones of which are TPM2_SA_CONTINUE_SESSION which means the
+ * session won't terminate after tpm_buf_check_hmac_response(),
+ * TPM2_SA_DECRYPT which means this buffers first parameter should be
+ * encrypted with a session key and TPM2_SA_ENCRYPT, which means the
+ * response buffer's first parameter needs to be decrypted (confusing,
+ * but the defines are written from the point of view of the TPM).
+ *
+ * Any session appended by this command must be finalized by calling
+ * tpm_buf_fill_hmac_session() otherwise the HMAC will be incorrect
+ * and the TPM will reject the command.
+ *
+ * As with most tpm_buf operations, success is assumed because failure
+ * will be caused by an incorrect programming model and indicated by a
+ * kernel message.
+ */
+void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
+ u8 attributes, u8 *passphrase,
+ int passphrase_len)
+{
+#ifdef CONFIG_TCG_TPM2_HMAC
+ u8 nonce[SHA256_DIGEST_SIZE];
+ struct tpm2_auth *auth;
+ u32 len;
+#endif
+
+ if (!tpm2_chip_auth(chip)) {
+ /* offset tells us where the sessions area begins */
+ int offset = buf->handles * 4 + TPM_HEADER_SIZE;
+ u32 len = 9 + passphrase_len;
+
+ if (tpm_buf_length(buf) != offset) {
+ /* not the first session so update the existing length */
+ len += get_unaligned_be32(&buf->data[offset]);
+ put_unaligned_be32(len, &buf->data[offset]);
+ } else {
+ tpm_buf_append_u32(buf, len);
+ }
+ /* auth handle */
+ tpm_buf_append_u32(buf, TPM2_RS_PW);
+ /* nonce */
+ tpm_buf_append_u16(buf, 0);
+ /* attributes */
+ tpm_buf_append_u8(buf, 0);
+ /* passphrase */
+ tpm_buf_append_u16(buf, passphrase_len);
+ tpm_buf_append(buf, passphrase, passphrase_len);
+ return;
+ }
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+ /*
+ * The Architecture Guide requires us to strip trailing zeros
+ * before computing the HMAC
+ */
+ while (passphrase && passphrase_len > 0 && passphrase[passphrase_len - 1] == '\0')
+ passphrase_len--;
+
+ auth = chip->auth;
+ auth->attrs = attributes;
+ auth->passphrase_len = passphrase_len;
+ if (passphrase_len)
+ memcpy(auth->passphrase, passphrase, passphrase_len);
+
+ if (auth->session != tpm_buf_length(buf)) {
+ /* we're not the first session */
+ len = get_unaligned_be32(&buf->data[auth->session]);
+ if (4 + len + auth->session != tpm_buf_length(buf)) {
+ WARN(1, "session length mismatch, cannot append");
+ return;
+ }
+
+ /* add our new session */
+ len += 9 + 2 * SHA256_DIGEST_SIZE;
+ put_unaligned_be32(len, &buf->data[auth->session]);
+ } else {
+ tpm_buf_append_u32(buf, 9 + 2 * SHA256_DIGEST_SIZE);
+ }
+
+ /* random number for our nonce */
+ get_random_bytes(nonce, sizeof(nonce));
+ memcpy(auth->our_nonce, nonce, sizeof(nonce));
+ tpm_buf_append_u32(buf, auth->handle);
+ /* our new nonce */
+ tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE);
+ tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE);
+ tpm_buf_append_u8(buf, auth->attrs);
+ /* and put a placeholder for the hmac */
+ tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE);
+ tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE);
+#endif
+}
+EXPORT_SYMBOL_GPL(tpm_buf_append_hmac_session);
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+
+static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy,
+ u32 *handle, u8 *name);
+
/*
* It turns out the crypto hmac(sha256) is hard for us to consume
* because it assumes a fixed key and the TPM seems to change the key
@@ -344,82 +562,6 @@ static void tpm_buf_append_salt(struct tpm_buf *buf, struct tpm_chip *chip)
}
/**
- * tpm_buf_append_hmac_session() - Append a TPM session element
- * @chip: the TPM chip structure
- * @buf: The buffer to be appended
- * @attributes: The session attributes
- * @passphrase: The session authority (NULL if none)
- * @passphrase_len: The length of the session authority (0 if none)
- *
- * This fills in a session structure in the TPM command buffer, except
- * for the HMAC which cannot be computed until the command buffer is
- * complete. The type of session is controlled by the @attributes,
- * the main ones of which are TPM2_SA_CONTINUE_SESSION which means the
- * session won't terminate after tpm_buf_check_hmac_response(),
- * TPM2_SA_DECRYPT which means this buffers first parameter should be
- * encrypted with a session key and TPM2_SA_ENCRYPT, which means the
- * response buffer's first parameter needs to be decrypted (confusing,
- * but the defines are written from the point of view of the TPM).
- *
- * Any session appended by this command must be finalized by calling
- * tpm_buf_fill_hmac_session() otherwise the HMAC will be incorrect
- * and the TPM will reject the command.
- *
- * As with most tpm_buf operations, success is assumed because failure
- * will be caused by an incorrect programming model and indicated by a
- * kernel message.
- */
-void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
- u8 attributes, u8 *passphrase,
- int passphrase_len)
-{
- u8 nonce[SHA256_DIGEST_SIZE];
- u32 len;
- struct tpm2_auth *auth = chip->auth;
-
- /*
- * The Architecture Guide requires us to strip trailing zeros
- * before computing the HMAC
- */
- while (passphrase && passphrase_len > 0
- && passphrase[passphrase_len - 1] == '\0')
- passphrase_len--;
-
- auth->attrs = attributes;
- auth->passphrase_len = passphrase_len;
- if (passphrase_len)
- memcpy(auth->passphrase, passphrase, passphrase_len);
-
- if (auth->session != tpm_buf_length(buf)) {
- /* we're not the first session */
- len = get_unaligned_be32(&buf->data[auth->session]);
- if (4 + len + auth->session != tpm_buf_length(buf)) {
- WARN(1, "session length mismatch, cannot append");
- return;
- }
-
- /* add our new session */
- len += 9 + 2 * SHA256_DIGEST_SIZE;
- put_unaligned_be32(len, &buf->data[auth->session]);
- } else {
- tpm_buf_append_u32(buf, 9 + 2 * SHA256_DIGEST_SIZE);
- }
-
- /* random number for our nonce */
- get_random_bytes(nonce, sizeof(nonce));
- memcpy(auth->our_nonce, nonce, sizeof(nonce));
- tpm_buf_append_u32(buf, auth->handle);
- /* our new nonce */
- tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE);
- tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE);
- tpm_buf_append_u8(buf, auth->attrs);
- /* and put a placeholder for the hmac */
- tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE);
- tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE);
-}
-EXPORT_SYMBOL(tpm_buf_append_hmac_session);
-
-/**
* tpm_buf_fill_hmac_session() - finalize the session HMAC
* @chip: the TPM chip structure
* @buf: The buffer to be appended
@@ -449,6 +591,9 @@ void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf)
u8 cphash[SHA256_DIGEST_SIZE];
struct sha256_state sctx;
+ if (!auth)
+ return;
+
/* save the command code in BE format */
auth->ordinal = head->ordinal;
@@ -567,104 +712,6 @@ void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf)
}
EXPORT_SYMBOL(tpm_buf_fill_hmac_session);
-static int tpm2_parse_read_public(char *name, struct tpm_buf *buf)
-{
- struct tpm_header *head = (struct tpm_header *)buf->data;
- off_t offset = TPM_HEADER_SIZE;
- u32 tot_len = be32_to_cpu(head->length);
- u32 val;
-
- /* we're starting after the header so adjust the length */
- tot_len -= TPM_HEADER_SIZE;
-
- /* skip public */
- val = tpm_buf_read_u16(buf, &offset);
- if (val > tot_len)
- return -EINVAL;
- offset += val;
- /* name */
- val = tpm_buf_read_u16(buf, &offset);
- if (val != name_size(&buf->data[offset]))
- return -EINVAL;
- memcpy(name, &buf->data[offset], val);
- /* forget the rest */
- return 0;
-}
-
-static int tpm2_read_public(struct tpm_chip *chip, u32 handle, char *name)
-{
- struct tpm_buf buf;
- int rc;
-
- rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_READ_PUBLIC);
- if (rc)
- return rc;
-
- tpm_buf_append_u32(&buf, handle);
- rc = tpm_transmit_cmd(chip, &buf, 0, "read public");
- if (rc == TPM2_RC_SUCCESS)
- rc = tpm2_parse_read_public(name, &buf);
-
- tpm_buf_destroy(&buf);
-
- return rc;
-}
-
-/**
- * tpm_buf_append_name() - add a handle area to the buffer
- * @chip: the TPM chip structure
- * @buf: The buffer to be appended
- * @handle: The handle to be appended
- * @name: The name of the handle (may be NULL)
- *
- * In order to compute session HMACs, we need to know the names of the
- * objects pointed to by the handles. For most objects, this is simply
- * the actual 4 byte handle or an empty buf (in these cases @name
- * should be NULL) but for volatile objects, permanent objects and NV
- * areas, the name is defined as the hash (according to the name
- * algorithm which should be set to sha256) of the public area to
- * which the two byte algorithm id has been appended. For these
- * objects, the @name pointer should point to this. If a name is
- * required but @name is NULL, then TPM2_ReadPublic() will be called
- * on the handle to obtain the name.
- *
- * As with most tpm_buf operations, success is assumed because failure
- * will be caused by an incorrect programming model and indicated by a
- * kernel message.
- */
-void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
- u32 handle, u8 *name)
-{
- enum tpm2_mso_type mso = tpm2_handle_mso(handle);
- struct tpm2_auth *auth = chip->auth;
- int slot;
-
- slot = (tpm_buf_length(buf) - TPM_HEADER_SIZE)/4;
- if (slot >= AUTH_MAX_NAMES) {
- dev_err(&chip->dev, "TPM: too many handles\n");
- return;
- }
- WARN(auth->session != tpm_buf_length(buf),
- "name added in wrong place\n");
- tpm_buf_append_u32(buf, handle);
- auth->session += 4;
-
- if (mso == TPM2_MSO_PERSISTENT ||
- mso == TPM2_MSO_VOLATILE ||
- mso == TPM2_MSO_NVRAM) {
- if (!name)
- tpm2_read_public(chip, handle, auth->name[slot]);
- } else {
- if (name)
- dev_err(&chip->dev, "TPM: Handle does not require name but one is specified\n");
- }
-
- auth->name_h[slot] = handle;
- if (name)
- memcpy(auth->name[slot], name, name_size(name));
-}
-EXPORT_SYMBOL(tpm_buf_append_name);
-
/**
* tpm_buf_check_hmac_response() - check the TPM return HMAC for correctness
* @chip: the TPM chip structure
@@ -705,6 +752,9 @@ int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf,
u32 cc = be32_to_cpu(auth->ordinal);
int parm_len, len, i, handles;
+ if (!auth)
+ return rc;
+
if (auth->session >= TPM_HEADER_SIZE) {
WARN(1, "tpm session not filled correctly\n");
goto out;
@@ -824,8 +874,13 @@ EXPORT_SYMBOL(tpm_buf_check_hmac_response);
*/
void tpm2_end_auth_session(struct tpm_chip *chip)
{
- tpm2_flush_context(chip, chip->auth->handle);
- memzero_explicit(chip->auth, sizeof(*chip->auth));
+ struct tpm2_auth *auth = chip->auth;
+
+ if (!auth)
+ return;
+
+ tpm2_flush_context(chip, auth->handle);
+ memzero_explicit(auth, sizeof(*auth));
}
EXPORT_SYMBOL(tpm2_end_auth_session);
@@ -907,6 +962,11 @@ int tpm2_start_auth_session(struct tpm_chip *chip)
int rc;
u32 null_key;
+ if (!auth) {
+ dev_warn_once(&chip->dev, "auth session is not active\n");
+ return 0;
+ }
+
rc = tpm2_load_null(chip, &null_key);
if (rc)
goto out;
@@ -1301,3 +1361,4 @@ int tpm2_sessions_init(struct tpm_chip *chip)
return rc;
}
+#endif /* CONFIG_TCG_TPM2_HMAC */
diff --git a/drivers/clk/mediatek/clk-mt8183-mfgcfg.c b/drivers/clk/mediatek/clk-mt8183-mfgcfg.c
index ba504e19d420..62d876e150e1 100644
--- a/drivers/clk/mediatek/clk-mt8183-mfgcfg.c
+++ b/drivers/clk/mediatek/clk-mt8183-mfgcfg.c
@@ -29,6 +29,7 @@ static const struct mtk_gate mfg_clks[] = {
static const struct mtk_clk_desc mfg_desc = {
.clks = mfg_clks,
.num_clks = ARRAY_SIZE(mfg_clks),
+ .need_runtime_pm = true,
};
static const struct of_device_id of_match_clk_mt8183_mfg[] = {
diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c
index bd37ab4d1a9b..ba1d1c495bc2 100644
--- a/drivers/clk/mediatek/clk-mtk.c
+++ b/drivers/clk/mediatek/clk-mtk.c
@@ -496,14 +496,16 @@ static int __mtk_clk_simple_probe(struct platform_device *pdev,
}
- devm_pm_runtime_enable(&pdev->dev);
- /*
- * Do a pm_runtime_resume_and_get() to workaround a possible
- * deadlock between clk_register() and the genpd framework.
- */
- r = pm_runtime_resume_and_get(&pdev->dev);
- if (r)
- return r;
+ if (mcd->need_runtime_pm) {
+ devm_pm_runtime_enable(&pdev->dev);
+ /*
+ * Do a pm_runtime_resume_and_get() to workaround a possible
+ * deadlock between clk_register() and the genpd framework.
+ */
+ r = pm_runtime_resume_and_get(&pdev->dev);
+ if (r)
+ return r;
+ }
/* Calculate how many clk_hw_onecell_data entries to allocate */
num_clks = mcd->num_clks + mcd->num_composite_clks;
@@ -585,7 +587,8 @@ static int __mtk_clk_simple_probe(struct platform_device *pdev,
goto unregister_clks;
}
- pm_runtime_put(&pdev->dev);
+ if (mcd->need_runtime_pm)
+ pm_runtime_put(&pdev->dev);
return r;
@@ -618,7 +621,8 @@ free_base:
if (mcd->shared_io && base)
iounmap(base);
- pm_runtime_put(&pdev->dev);
+ if (mcd->need_runtime_pm)
+ pm_runtime_put(&pdev->dev);
return r;
}
diff --git a/drivers/clk/mediatek/clk-mtk.h b/drivers/clk/mediatek/clk-mtk.h
index 22096501a60a..c17fe1c2d732 100644
--- a/drivers/clk/mediatek/clk-mtk.h
+++ b/drivers/clk/mediatek/clk-mtk.h
@@ -237,6 +237,8 @@ struct mtk_clk_desc {
int (*clk_notifier_func)(struct device *dev, struct clk *clk);
unsigned int mfg_clk_idx;
+
+ bool need_runtime_pm;
};
int mtk_clk_pdev_probe(struct platform_device *pdev);
diff --git a/drivers/clk/qcom/apss-ipq-pll.c b/drivers/clk/qcom/apss-ipq-pll.c
index 5f7f537e4ecb..e8632db2c542 100644
--- a/drivers/clk/qcom/apss-ipq-pll.c
+++ b/drivers/clk/qcom/apss-ipq-pll.c
@@ -70,7 +70,6 @@ static struct clk_alpha_pll ipq_pll_stromer_plus = {
static const struct alpha_pll_config ipq5018_pll_config = {
.l = 0x2a,
.config_ctl_val = 0x4001075b,
- .config_ctl_hi_val = 0x304,
.main_output_mask = BIT(0),
.aux_output_mask = BIT(1),
.early_output_mask = BIT(3),
@@ -84,7 +83,6 @@ static const struct alpha_pll_config ipq5018_pll_config = {
static const struct alpha_pll_config ipq5332_pll_config = {
.l = 0x2d,
.config_ctl_val = 0x4001075b,
- .config_ctl_hi_val = 0x304,
.main_output_mask = BIT(0),
.aux_output_mask = BIT(1),
.early_output_mask = BIT(3),
diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c
index d4227909d1fe..c51647e37df8 100644
--- a/drivers/clk/qcom/clk-alpha-pll.c
+++ b/drivers/clk/qcom/clk-alpha-pll.c
@@ -2574,6 +2574,9 @@ static int clk_alpha_pll_stromer_plus_set_rate(struct clk_hw *hw,
regmap_write(pll->clkr.regmap, PLL_ALPHA_VAL_U(pll),
a >> ALPHA_BITWIDTH);
+ regmap_update_bits(pll->clkr.regmap, PLL_USER_CTL(pll),
+ PLL_ALPHA_EN, PLL_ALPHA_EN);
+
regmap_write(pll->clkr.regmap, PLL_MODE(pll), PLL_BYPASSNL);
/* Wait five micro seconds or more */
diff --git a/drivers/clk/qcom/gcc-ipq9574.c b/drivers/clk/qcom/gcc-ipq9574.c
index 0a3f846695b8..f8b9a1e93bef 100644
--- a/drivers/clk/qcom/gcc-ipq9574.c
+++ b/drivers/clk/qcom/gcc-ipq9574.c
@@ -2140,9 +2140,10 @@ static struct clk_rcg2 pcnoc_bfdcd_clk_src = {
static struct clk_branch gcc_crypto_axi_clk = {
.halt_reg = 0x16010,
+ .halt_check = BRANCH_HALT_VOTED,
.clkr = {
- .enable_reg = 0x16010,
- .enable_mask = BIT(0),
+ .enable_reg = 0xb004,
+ .enable_mask = BIT(15),
.hw.init = &(const struct clk_init_data) {
.name = "gcc_crypto_axi_clk",
.parent_hws = (const struct clk_hw *[]) {
@@ -2156,9 +2157,10 @@ static struct clk_branch gcc_crypto_axi_clk = {
static struct clk_branch gcc_crypto_ahb_clk = {
.halt_reg = 0x16014,
+ .halt_check = BRANCH_HALT_VOTED,
.clkr = {
- .enable_reg = 0x16014,
- .enable_mask = BIT(0),
+ .enable_reg = 0xb004,
+ .enable_mask = BIT(16),
.hw.init = &(const struct clk_init_data) {
.name = "gcc_crypto_ahb_clk",
.parent_hws = (const struct clk_hw *[]) {
diff --git a/drivers/clk/qcom/gcc-sm6350.c b/drivers/clk/qcom/gcc-sm6350.c
index cf4a7b6e0b23..0559a33faf00 100644
--- a/drivers/clk/qcom/gcc-sm6350.c
+++ b/drivers/clk/qcom/gcc-sm6350.c
@@ -100,8 +100,8 @@ static struct clk_alpha_pll gpll6 = {
.enable_mask = BIT(6),
.hw.init = &(struct clk_init_data){
.name = "gpll6",
- .parent_hws = (const struct clk_hw*[]){
- &gpll0.clkr.hw,
+ .parent_data = &(const struct clk_parent_data){
+ .fw_name = "bi_tcxo",
},
.num_parents = 1,
.ops = &clk_alpha_pll_fixed_fabia_ops,
@@ -124,7 +124,7 @@ static struct clk_alpha_pll_postdiv gpll6_out_even = {
.clkr.hw.init = &(struct clk_init_data){
.name = "gpll6_out_even",
.parent_hws = (const struct clk_hw*[]){
- &gpll0.clkr.hw,
+ &gpll6.clkr.hw,
},
.num_parents = 1,
.ops = &clk_alpha_pll_postdiv_fabia_ops,
@@ -139,8 +139,8 @@ static struct clk_alpha_pll gpll7 = {
.enable_mask = BIT(7),
.hw.init = &(struct clk_init_data){
.name = "gpll7",
- .parent_hws = (const struct clk_hw*[]){
- &gpll0.clkr.hw,
+ .parent_data = &(const struct clk_parent_data){
+ .fw_name = "bi_tcxo",
},
.num_parents = 1,
.ops = &clk_alpha_pll_fixed_fabia_ops,
diff --git a/drivers/clk/sunxi-ng/ccu_common.c b/drivers/clk/sunxi-ng/ccu_common.c
index ac0091b4ce24..be375ce0149c 100644
--- a/drivers/clk/sunxi-ng/ccu_common.c
+++ b/drivers/clk/sunxi-ng/ccu_common.c
@@ -132,7 +132,6 @@ static int sunxi_ccu_probe(struct sunxi_ccu *ccu, struct device *dev,
for (i = 0; i < desc->hw_clks->num ; i++) {
struct clk_hw *hw = desc->hw_clks->hws[i];
- struct ccu_common *common = hw_to_ccu_common(hw);
const char *name;
if (!hw)
@@ -147,14 +146,21 @@ static int sunxi_ccu_probe(struct sunxi_ccu *ccu, struct device *dev,
pr_err("Couldn't register clock %d - %s\n", i, name);
goto err_clk_unreg;
}
+ }
+
+ for (i = 0; i < desc->num_ccu_clks; i++) {
+ struct ccu_common *cclk = desc->ccu_clks[i];
+
+ if (!cclk)
+ continue;
- if (common->max_rate)
- clk_hw_set_rate_range(hw, common->min_rate,
- common->max_rate);
+ if (cclk->max_rate)
+ clk_hw_set_rate_range(&cclk->hw, cclk->min_rate,
+ cclk->max_rate);
else
- WARN(common->min_rate,
+ WARN(cclk->min_rate,
"No max_rate, ignoring min_rate of clock %d - %s\n",
- i, name);
+ i, clk_hw_get_name(&cclk->hw));
}
ret = of_clk_add_hw_provider(node, of_clk_hw_onecell_get,
diff --git a/drivers/counter/ti-eqep.c b/drivers/counter/ti-eqep.c
index 072b11fd6b32..825ae22c3ebc 100644
--- a/drivers/counter/ti-eqep.c
+++ b/drivers/counter/ti-eqep.c
@@ -6,6 +6,7 @@
*/
#include <linux/bitops.h>
+#include <linux/clk.h>
#include <linux/counter.h>
#include <linux/kernel.h>
#include <linux/mod_devicetable.h>
@@ -376,6 +377,7 @@ static int ti_eqep_probe(struct platform_device *pdev)
struct counter_device *counter;
struct ti_eqep_cnt *priv;
void __iomem *base;
+ struct clk *clk;
int err;
counter = devm_counter_alloc(dev, sizeof(*priv));
@@ -415,6 +417,10 @@ static int ti_eqep_probe(struct platform_device *pdev)
pm_runtime_enable(dev);
pm_runtime_get_sync(dev);
+ clk = devm_clk_get_enabled(dev, NULL);
+ if (IS_ERR(clk))
+ return dev_err_probe(dev, PTR_ERR(clk), "failed to enable clock\n");
+
err = counter_add(counter);
if (err < 0) {
pm_runtime_put_sync(dev);
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 37f1cdf46d29..4ac3a35dcd98 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -890,8 +890,10 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency)
pr_warn(FW_WARN "P-state 0 is not max freq\n");
- if (acpi_cpufreq_driver.set_boost)
+ if (acpi_cpufreq_driver.set_boost) {
set_boost(policy, acpi_cpufreq_driver.boost_enabled);
+ policy->boost_enabled = acpi_cpufreq_driver.boost_enabled;
+ }
return result;
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index a45aac17c20f..9e5060b27864 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1431,7 +1431,8 @@ static int cpufreq_online(unsigned int cpu)
}
/* Let the per-policy boost flag mirror the cpufreq_driver boost during init */
- policy->boost_enabled = cpufreq_boost_enabled() && policy_has_boost_freq(policy);
+ if (cpufreq_boost_enabled() && policy_has_boost_freq(policy))
+ policy->boost_enabled = true;
/*
* The initialization has succeeded and the policy is online.
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 15de5e3d96fd..c31914a9876f 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -355,15 +355,14 @@ static void intel_pstate_set_itmt_prio(int cpu)
int ret;
ret = cppc_get_perf_caps(cpu, &cppc_perf);
- if (ret)
- return;
-
/*
- * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff.
- * In this case we can't use CPPC.highest_perf to enable ITMT.
- * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide.
+ * If CPPC is not available, fall back to MSR_HWP_CAPABILITIES bits [8:0].
+ *
+ * Also, on some systems with overclocking enabled, CPPC.highest_perf is
+ * hardcoded to 0xff, so CPPC.highest_perf cannot be used to enable ITMT.
+ * Fall back to MSR_HWP_CAPABILITIES then too.
*/
- if (cppc_perf.highest_perf == CPPC_MAX_PERF)
+ if (ret || cppc_perf.highest_perf == CPPC_MAX_PERF)
cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached));
/*
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
index c631f99e415f..05210a0edb8a 100644
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -10,7 +10,7 @@ config CRYPTO_DEV_FSL_CAAM_AHASH_API_DESC
config CRYPTO_DEV_FSL_CAAM
tristate "Freescale CAAM-Multicore platform driver backend"
- depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE
+ depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE || COMPILE_TEST
select SOC_BUS
select CRYPTO_DEV_FSL_CAAM_COMMON
imply FSL_MC_BUS
diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c
index a4f6884416a0..207dc422785a 100644
--- a/drivers/crypto/caam/caamalg_qi2.c
+++ b/drivers/crypto/caam/caamalg_qi2.c
@@ -4990,11 +4990,23 @@ err_dma_map:
return err;
}
+static void free_dpaa2_pcpu_netdev(struct dpaa2_caam_priv *priv, const cpumask_t *cpus)
+{
+ struct dpaa2_caam_priv_per_cpu *ppriv;
+ int i;
+
+ for_each_cpu(i, cpus) {
+ ppriv = per_cpu_ptr(priv->ppriv, i);
+ free_netdev(ppriv->net_dev);
+ }
+}
+
static int __cold dpaa2_dpseci_setup(struct fsl_mc_device *ls_dev)
{
struct device *dev = &ls_dev->dev;
struct dpaa2_caam_priv *priv;
struct dpaa2_caam_priv_per_cpu *ppriv;
+ cpumask_t clean_mask;
int err, cpu;
u8 i;
@@ -5073,6 +5085,7 @@ static int __cold dpaa2_dpseci_setup(struct fsl_mc_device *ls_dev)
}
}
+ cpumask_clear(&clean_mask);
i = 0;
for_each_online_cpu(cpu) {
u8 j;
@@ -5096,15 +5109,23 @@ static int __cold dpaa2_dpseci_setup(struct fsl_mc_device *ls_dev)
priv->rx_queue_attr[j].fqid,
priv->tx_queue_attr[j].fqid);
- ppriv->net_dev.dev = *dev;
- INIT_LIST_HEAD(&ppriv->net_dev.napi_list);
- netif_napi_add_tx_weight(&ppriv->net_dev, &ppriv->napi,
+ ppriv->net_dev = alloc_netdev_dummy(0);
+ if (!ppriv->net_dev) {
+ err = -ENOMEM;
+ goto err_alloc_netdev;
+ }
+ cpumask_set_cpu(cpu, &clean_mask);
+ ppriv->net_dev->dev = *dev;
+
+ netif_napi_add_tx_weight(ppriv->net_dev, &ppriv->napi,
dpaa2_dpseci_poll,
DPAA2_CAAM_NAPI_WEIGHT);
}
return 0;
+err_alloc_netdev:
+ free_dpaa2_pcpu_netdev(priv, &clean_mask);
err_get_rx_queue:
dpaa2_dpseci_congestion_free(priv);
err_get_vers:
@@ -5153,6 +5174,7 @@ static int __cold dpaa2_dpseci_disable(struct dpaa2_caam_priv *priv)
ppriv = per_cpu_ptr(priv->ppriv, i);
napi_disable(&ppriv->napi);
netif_napi_del(&ppriv->napi);
+ free_netdev(ppriv->net_dev);
}
return 0;
diff --git a/drivers/crypto/caam/caamalg_qi2.h b/drivers/crypto/caam/caamalg_qi2.h
index abb502bb675c..61d1219a202f 100644
--- a/drivers/crypto/caam/caamalg_qi2.h
+++ b/drivers/crypto/caam/caamalg_qi2.h
@@ -81,7 +81,7 @@ struct dpaa2_caam_priv {
*/
struct dpaa2_caam_priv_per_cpu {
struct napi_struct napi;
- struct net_device net_dev;
+ struct net_device *net_dev;
int req_fqid;
int rsp_fqid;
int prio;
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index bd418dea586d..d4b39184dbdb 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -80,6 +80,7 @@ static void build_deinstantiation_desc(u32 *desc, int handle)
append_jump(desc, JUMP_CLASS_CLASS1 | JUMP_TYPE_HALT);
}
+#ifdef CONFIG_OF
static const struct of_device_id imx8m_machine_match[] = {
{ .compatible = "fsl,imx8mm", },
{ .compatible = "fsl,imx8mn", },
@@ -88,6 +89,7 @@ static const struct of_device_id imx8m_machine_match[] = {
{ .compatible = "fsl,imx8ulp", },
{ }
};
+#endif
/*
* run_descriptor_deco0 - runs a descriptor on DECO0, under direct control of
diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index 46a083849a8e..ba8fb5d8a7b2 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -57,7 +57,7 @@ struct caam_napi {
*/
struct caam_qi_pcpu_priv {
struct caam_napi caam_napi;
- struct net_device net_dev;
+ struct net_device *net_dev;
struct qman_fq *rsp_fq;
} ____cacheline_aligned;
@@ -144,7 +144,7 @@ static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq,
{
const struct qm_fd *fd;
struct caam_drv_req *drv_req;
- struct device *qidev = &(raw_cpu_ptr(&pcpu_qipriv)->net_dev.dev);
+ struct device *qidev = &(raw_cpu_ptr(&pcpu_qipriv)->net_dev->dev);
struct caam_drv_private *priv = dev_get_drvdata(qidev);
fd = &msg->ern.fd;
@@ -530,6 +530,7 @@ static void caam_qi_shutdown(void *data)
if (kill_fq(qidev, per_cpu(pcpu_qipriv.rsp_fq, i)))
dev_err(qidev, "Rsp FQ kill failed, cpu: %d\n", i);
+ free_netdev(per_cpu(pcpu_qipriv.net_dev, i));
}
qman_delete_cgr_safe(&priv->cgr);
@@ -573,7 +574,7 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p,
struct caam_napi *caam_napi = raw_cpu_ptr(&pcpu_qipriv.caam_napi);
struct caam_drv_req *drv_req;
const struct qm_fd *fd;
- struct device *qidev = &(raw_cpu_ptr(&pcpu_qipriv)->net_dev.dev);
+ struct device *qidev = &(raw_cpu_ptr(&pcpu_qipriv)->net_dev->dev);
struct caam_drv_private *priv = dev_get_drvdata(qidev);
u32 status;
@@ -718,12 +719,24 @@ static void free_rsp_fqs(void)
kfree(per_cpu(pcpu_qipriv.rsp_fq, i));
}
+static void free_caam_qi_pcpu_netdev(const cpumask_t *cpus)
+{
+ struct caam_qi_pcpu_priv *priv;
+ int i;
+
+ for_each_cpu(i, cpus) {
+ priv = per_cpu_ptr(&pcpu_qipriv, i);
+ free_netdev(priv->net_dev);
+ }
+}
+
int caam_qi_init(struct platform_device *caam_pdev)
{
int err, i;
struct device *ctrldev = &caam_pdev->dev, *qidev;
struct caam_drv_private *ctrlpriv;
const cpumask_t *cpus = qman_affine_cpus();
+ cpumask_t clean_mask;
ctrlpriv = dev_get_drvdata(ctrldev);
qidev = ctrldev;
@@ -743,6 +756,8 @@ int caam_qi_init(struct platform_device *caam_pdev)
return err;
}
+ cpumask_clear(&clean_mask);
+
/*
* Enable the NAPI contexts on each of the core which has an affine
* portal.
@@ -751,10 +766,16 @@ int caam_qi_init(struct platform_device *caam_pdev)
struct caam_qi_pcpu_priv *priv = per_cpu_ptr(&pcpu_qipriv, i);
struct caam_napi *caam_napi = &priv->caam_napi;
struct napi_struct *irqtask = &caam_napi->irqtask;
- struct net_device *net_dev = &priv->net_dev;
+ struct net_device *net_dev;
+ net_dev = alloc_netdev_dummy(0);
+ if (!net_dev) {
+ err = -ENOMEM;
+ goto fail;
+ }
+ cpumask_set_cpu(i, &clean_mask);
+ priv->net_dev = net_dev;
net_dev->dev = *qidev;
- INIT_LIST_HEAD(&net_dev->napi_list);
netif_napi_add_tx_weight(net_dev, irqtask, caam_qi_poll,
CAAM_NAPI_WEIGHT);
@@ -766,16 +787,22 @@ int caam_qi_init(struct platform_device *caam_pdev)
dma_get_cache_alignment(), 0, NULL);
if (!qi_cache) {
dev_err(qidev, "Can't allocate CAAM cache\n");
- free_rsp_fqs();
- return -ENOMEM;
+ err = -ENOMEM;
+ goto fail2;
}
caam_debugfs_qi_init(ctrlpriv);
err = devm_add_action_or_reset(qidev, caam_qi_shutdown, ctrlpriv);
if (err)
- return err;
+ goto fail2;
dev_info(qidev, "Linux CAAM Queue I/F driver initialised\n");
return 0;
+
+fail2:
+ free_rsp_fqs();
+fail:
+ free_caam_qi_pcpu_netdev(&clean_mask);
+ return err;
}
diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile
index 6f9266edc9f1..eac73cbfdd38 100644
--- a/drivers/crypto/intel/qat/qat_common/Makefile
+++ b/drivers/crypto/intel/qat/qat_common/Makefile
@@ -39,7 +39,8 @@ intel_qat-objs := adf_cfg.o \
adf_sysfs_rl.o \
qat_uclo.o \
qat_hal.o \
- qat_bl.o
+ qat_bl.o \
+ qat_mig_dev.o
intel_qat-$(CONFIG_DEBUG_FS) += adf_transport_debug.o \
adf_fw_counters.o \
@@ -56,6 +57,6 @@ intel_qat-$(CONFIG_DEBUG_FS) += adf_transport_debug.o \
intel_qat-$(CONFIG_PCI_IOV) += adf_sriov.o adf_vf_isr.o adf_pfvf_utils.o \
adf_pfvf_pf_msg.o adf_pfvf_pf_proto.o \
adf_pfvf_vf_msg.o adf_pfvf_vf_proto.o \
- adf_gen2_pfvf.o adf_gen4_pfvf.o qat_mig_dev.o
+ adf_gen2_pfvf.o adf_gen4_pfvf.o
intel_qat-$(CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION) += adf_heartbeat_inject.o
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 784843fa2a22..3df10517a327 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -52,6 +52,14 @@ int devm_cxl_add_passthrough_decoder(struct cxl_port *port)
struct cxl_dport *dport = NULL;
int single_port_map[1];
unsigned long index;
+ struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
+
+ /*
+ * Capability checks are moot for passthrough decoders, support
+ * any and all possibilities.
+ */
+ cxlhdm->interleave_mask = ~0U;
+ cxlhdm->iw_cap_mask = ~0UL;
cxlsd = cxl_switch_decoder_alloc(port, 1);
if (IS_ERR(cxlsd))
@@ -79,6 +87,11 @@ static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm)
cxlhdm->interleave_mask |= GENMASK(11, 8);
if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_14_12, hdm_cap))
cxlhdm->interleave_mask |= GENMASK(14, 12);
+ cxlhdm->iw_cap_mask = BIT(1) | BIT(2) | BIT(4) | BIT(8);
+ if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY, hdm_cap))
+ cxlhdm->iw_cap_mask |= BIT(3) | BIT(6) | BIT(12);
+ if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_16_WAY, hdm_cap))
+ cxlhdm->iw_cap_mask |= BIT(16);
}
static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
index e69625a8d6a1..c00f3a933164 100644
--- a/drivers/cxl/core/pmem.c
+++ b/drivers/cxl/core/pmem.c
@@ -62,10 +62,14 @@ static int match_nvdimm_bridge(struct device *dev, void *data)
return is_cxl_nvdimm_bridge(dev);
}
-struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd)
+/**
+ * cxl_find_nvdimm_bridge() - find a bridge device relative to a port
+ * @port: any descendant port of an nvdimm-bridge associated
+ * root-cxl-port
+ */
+struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port)
{
- struct cxl_root *cxl_root __free(put_cxl_root) =
- find_cxl_root(cxlmd->endpoint);
+ struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);
struct device *dev;
if (!cxl_root)
@@ -242,18 +246,20 @@ static void cxlmd_release_nvdimm(void *_cxlmd)
/**
* devm_cxl_add_nvdimm() - add a bridge between a cxl_memdev and an nvdimm
+ * @parent_port: parent port for the (to be added) @cxlmd endpoint port
* @cxlmd: cxl_memdev instance that will perform LIBNVDIMM operations
*
* Return: 0 on success negative error code on failure.
*/
-int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd)
+int devm_cxl_add_nvdimm(struct cxl_port *parent_port,
+ struct cxl_memdev *cxlmd)
{
struct cxl_nvdimm_bridge *cxl_nvb;
struct cxl_nvdimm *cxl_nvd;
struct device *dev;
int rc;
- cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
+ cxl_nvb = cxl_find_nvdimm_bridge(parent_port);
if (!cxl_nvb)
return -ENODEV;
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 3c2b6144be23..538ebd5a64fd 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -1101,6 +1101,26 @@ static int cxl_port_attach_region(struct cxl_port *port,
}
cxld = cxl_rr->decoder;
+ /*
+ * the number of targets should not exceed the target_count
+ * of the decoder
+ */
+ if (is_switch_decoder(&cxld->dev)) {
+ struct cxl_switch_decoder *cxlsd;
+
+ cxlsd = to_cxl_switch_decoder(&cxld->dev);
+ if (cxl_rr->nr_targets > cxlsd->nr_targets) {
+ dev_dbg(&cxlr->dev,
+ "%s:%s %s add: %s:%s @ %d overflows targets: %d\n",
+ dev_name(port->uport_dev), dev_name(&port->dev),
+ dev_name(&cxld->dev), dev_name(&cxlmd->dev),
+ dev_name(&cxled->cxld.dev), pos,
+ cxlsd->nr_targets);
+ rc = -ENXIO;
+ goto out_erase;
+ }
+ }
+
rc = cxl_rr_ep_add(cxl_rr, cxled);
if (rc) {
dev_dbg(&cxlr->dev,
@@ -1210,6 +1230,50 @@ static int check_last_peer(struct cxl_endpoint_decoder *cxled,
return 0;
}
+static int check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig)
+{
+ struct cxl_port *port = to_cxl_port(cxld->dev.parent);
+ struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
+ unsigned int interleave_mask;
+ u8 eiw;
+ u16 eig;
+ int high_pos, low_pos;
+
+ if (!test_bit(iw, &cxlhdm->iw_cap_mask))
+ return -ENXIO;
+ /*
+ * Per CXL specification r3.1(8.2.4.20.13 Decoder Protection),
+ * if eiw < 8:
+ * DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + 8 + eiw]
+ * DPAOFFSET[eig + 7: 0] = HPAOFFSET[eig + 7: 0]
+ *
+ * when the eiw is 0, all the bits of HPAOFFSET[51: 0] are used, the
+ * interleave bits are none.
+ *
+ * if eiw >= 8:
+ * DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + eiw] / 3
+ * DPAOFFSET[eig + 7: 0] = HPAOFFSET[eig + 7: 0]
+ *
+ * when the eiw is 8, all the bits of HPAOFFSET[51: 0] are used, the
+ * interleave bits are none.
+ */
+ ways_to_eiw(iw, &eiw);
+ if (eiw == 0 || eiw == 8)
+ return 0;
+
+ granularity_to_eig(ig, &eig);
+ if (eiw > 8)
+ high_pos = eiw + eig - 1;
+ else
+ high_pos = eiw + eig + 7;
+ low_pos = eig + 8;
+ interleave_mask = GENMASK(high_pos, low_pos);
+ if (interleave_mask & ~cxlhdm->interleave_mask)
+ return -ENXIO;
+
+ return 0;
+}
+
static int cxl_port_setup_targets(struct cxl_port *port,
struct cxl_region *cxlr,
struct cxl_endpoint_decoder *cxled)
@@ -1360,6 +1424,15 @@ static int cxl_port_setup_targets(struct cxl_port *port,
return -ENXIO;
}
} else {
+ rc = check_interleave_cap(cxld, iw, ig);
+ if (rc) {
+ dev_dbg(&cxlr->dev,
+ "%s:%s iw: %d ig: %d is not supported\n",
+ dev_name(port->uport_dev),
+ dev_name(&port->dev), iw, ig);
+ return rc;
+ }
+
cxld->interleave_ways = iw;
cxld->interleave_granularity = ig;
cxld->hpa_range = (struct range) {
@@ -1796,6 +1869,15 @@ static int cxl_region_attach(struct cxl_region *cxlr,
struct cxl_dport *dport;
int rc = -ENXIO;
+ rc = check_interleave_cap(&cxled->cxld, p->interleave_ways,
+ p->interleave_granularity);
+ if (rc) {
+ dev_dbg(&cxlr->dev, "%s iw: %d ig: %d is not supported\n",
+ dev_name(&cxled->cxld.dev), p->interleave_ways,
+ p->interleave_granularity);
+ return rc;
+ }
+
if (cxled->mode != cxlr->mode) {
dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n",
dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode);
@@ -2688,22 +2770,33 @@ static int __cxl_dpa_to_region(struct device *dev, void *arg)
{
struct cxl_dpa_to_region_context *ctx = arg;
struct cxl_endpoint_decoder *cxled;
+ struct cxl_region *cxlr;
u64 dpa = ctx->dpa;
if (!is_endpoint_decoder(dev))
return 0;
cxled = to_cxl_endpoint_decoder(dev);
- if (!cxled->dpa_res || !resource_size(cxled->dpa_res))
+ if (!cxled || !cxled->dpa_res || !resource_size(cxled->dpa_res))
return 0;
if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start)
return 0;
- dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa,
- dev_name(&cxled->cxld.region->dev));
+ /*
+ * Stop the region search (return 1) when an endpoint mapping is
+ * found. The region may not be fully constructed so offering
+ * the cxlr in the context structure is not guaranteed.
+ */
+ cxlr = cxled->cxld.region;
+ if (cxlr)
+ dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa,
+ dev_name(&cxlr->dev));
+ else
+ dev_dbg(dev, "dpa:0x%llx mapped in endpoint:%s\n", dpa,
+ dev_name(dev));
- ctx->cxlr = cxled->cxld.region;
+ ctx->cxlr = cxlr;
return 1;
}
@@ -2847,7 +2940,7 @@ static int cxl_pmem_region_alloc(struct cxl_region *cxlr)
* bridge for one device is the same for all.
*/
if (i == 0) {
- cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
+ cxl_nvb = cxl_find_nvdimm_bridge(cxlmd->endpoint);
if (!cxl_nvb)
return -ENODEV;
cxlr->cxl_nvb = cxl_nvb;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 603c0120cff8..a6613a6f8923 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -47,6 +47,8 @@ extern const struct nvdimm_security_ops *cxl_security_ops;
#define CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4)
#define CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8)
#define CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9)
+#define CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY BIT(11)
+#define CXL_HDM_DECODER_INTERLEAVE_16_WAY BIT(12)
#define CXL_HDM_DECODER_CTRL_OFFSET 0x4
#define CXL_HDM_DECODER_ENABLE BIT(1)
#define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10)
@@ -855,8 +857,8 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host,
struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev);
bool is_cxl_nvdimm(struct device *dev);
bool is_cxl_nvdimm_bridge(struct device *dev);
-int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd);
-struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd);
+int devm_cxl_add_nvdimm(struct cxl_port *parent_port, struct cxl_memdev *cxlmd);
+struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port);
#ifdef CONFIG_CXL_REGION
bool is_cxl_pmem_region(struct device *dev);
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 19aba81cdf13..af8169ccdbc0 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -395,9 +395,9 @@ enum cxl_devtype {
/**
* struct cxl_dpa_perf - DPA performance property entry
- * @dpa_range - range for DPA address
- * @coord - QoS performance data (i.e. latency, bandwidth)
- * @qos_class - QoS Class cookies
+ * @dpa_range: range for DPA address
+ * @coord: QoS performance data (i.e. latency, bandwidth)
+ * @qos_class: QoS Class cookies
*/
struct cxl_dpa_perf {
struct range dpa_range;
@@ -464,13 +464,14 @@ struct cxl_dev_state {
* @active_persistent_bytes: sum of hard + soft persistent
* @next_volatile_bytes: volatile capacity change pending device reset
* @next_persistent_bytes: persistent capacity change pending device reset
+ * @ram_perf: performance data entry matched to RAM partition
+ * @pmem_perf: performance data entry matched to PMEM partition
* @event: event log driver state
* @poison: poison driver state info
* @security: security driver state info
* @fw: firmware upload / activation state
+ * @mbox_wait: RCU wait for mbox send completely
* @mbox_send: @dev specific transport for transmitting mailbox commands
- * @ram_perf: performance data entry matched to RAM partition
- * @pmem_perf: performance data entry matched to PMEM partition
*
* See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for
* details on capacity parameters.
@@ -851,11 +852,21 @@ static inline void cxl_mem_active_dec(void)
int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd);
+/**
+ * struct cxl_hdm - HDM Decoder registers and cached / decoded capabilities
+ * @regs: mapped registers, see devm_cxl_setup_hdm()
+ * @decoder_count: number of decoders for this port
+ * @target_count: for switch decoders, max downstream port targets
+ * @interleave_mask: interleave granularity capability, see check_interleave_cap()
+ * @iw_cap_mask: bitmask of supported interleave ways, see check_interleave_cap()
+ * @port: mapped cxl_port, see devm_cxl_setup_hdm()
+ */
struct cxl_hdm {
struct cxl_component_regs regs;
unsigned int decoder_count;
unsigned int target_count;
unsigned int interleave_mask;
+ unsigned long iw_cap_mask;
struct cxl_port *port;
};
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 0c79d9ce877c..2f1b49bfe162 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -152,6 +152,15 @@ static int cxl_mem_probe(struct device *dev)
return -ENXIO;
}
+ if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) {
+ rc = devm_cxl_add_nvdimm(parent_port, cxlmd);
+ if (rc) {
+ if (rc == -ENODEV)
+ dev_info(dev, "PMEM disabled by platform\n");
+ return rc;
+ }
+ }
+
if (dport->rch)
endpoint_parent = parent_port->uport_dev;
else
@@ -174,14 +183,6 @@ unlock:
if (rc)
return rc;
- if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) {
- rc = devm_cxl_add_nvdimm(cxlmd);
- if (rc == -ENODEV)
- dev_info(dev, "PMEM disabled by platform\n");
- else
- return rc;
- }
-
/*
* The kernel may be operating out of CXL memory on this device,
* there is no spec defined way to determine whether this device
diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c
index 880ffcb50088..921f61507ae8 100644
--- a/drivers/firmware/sysfb.c
+++ b/drivers/firmware/sysfb.c
@@ -101,8 +101,10 @@ static __init struct device *sysfb_parent_dev(const struct screen_info *si)
if (IS_ERR(pdev)) {
return ERR_CAST(pdev);
} else if (pdev) {
- if (!sysfb_pci_dev_is_enabled(pdev))
+ if (!sysfb_pci_dev_is_enabled(pdev)) {
+ pci_dev_put(pdev);
return ERR_PTR(-ENODEV);
+ }
return &pdev->dev;
}
@@ -137,7 +139,7 @@ static __init int sysfb_init(void)
if (compatible) {
pd = sysfb_create_simplefb(si, &mode, parent);
if (!IS_ERR(pd))
- goto unlock_mutex;
+ goto put_device;
}
/* if the FB is incompatible, create a legacy framebuffer device */
@@ -155,7 +157,7 @@ static __init int sysfb_init(void)
pd = platform_device_alloc(name, 0);
if (!pd) {
ret = -ENOMEM;
- goto unlock_mutex;
+ goto put_device;
}
pd->dev.parent = parent;
@@ -170,9 +172,11 @@ static __init int sysfb_init(void)
if (ret)
goto err;
- goto unlock_mutex;
+ goto put_device;
err:
platform_device_put(pd);
+put_device:
+ put_device(parent);
unlock_mutex:
mutex_unlock(&disable_lock);
return ret;
diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
index bb499e362912..1d0175d6350b 100644
--- a/drivers/gpio/gpio-davinci.c
+++ b/drivers/gpio/gpio-davinci.c
@@ -225,6 +225,11 @@ static int davinci_gpio_probe(struct platform_device *pdev)
else
nirq = DIV_ROUND_UP(ngpio, 16);
+ if (nirq > MAX_INT_PER_BANK) {
+ dev_err(dev, "Too many IRQs!\n");
+ return -EINVAL;
+ }
+
chips = devm_kzalloc(dev, sizeof(*chips), GFP_KERNEL);
if (!chips)
return -ENOMEM;
diff --git a/drivers/gpio/gpio-graniterapids.c b/drivers/gpio/gpio-graniterapids.c
index c693fe05d50f..f2e911a3d2ca 100644
--- a/drivers/gpio/gpio-graniterapids.c
+++ b/drivers/gpio/gpio-graniterapids.c
@@ -296,6 +296,8 @@ static int gnr_gpio_probe(struct platform_device *pdev)
if (!priv)
return -ENOMEM;
+ raw_spin_lock_init(&priv->lock);
+
regs = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(regs))
return PTR_ERR(regs);
diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c
index 71e1af7c2184..d89e78f0ead3 100644
--- a/drivers/gpio/gpio-mmio.c
+++ b/drivers/gpio/gpio-mmio.c
@@ -619,8 +619,6 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev,
ret = gpiochip_get_ngpios(gc, dev);
if (ret)
gc->ngpio = gc->bgpio_bits;
- else
- gc->bgpio_bits = roundup_pow_of_two(round_up(gc->ngpio, 8));
ret = bgpio_setup_io(gc, dat, set, clr, flags);
if (ret)
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 77a2812f2974..732a6964748c 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -758,6 +758,8 @@ static void pca953x_irq_bus_sync_unlock(struct irq_data *d)
int level;
if (chip->driver_data & PCA_PCAL) {
+ guard(mutex)(&chip->i2c_lock);
+
/* Enable latch on interrupt-enabled inputs */
pca953x_write_regs(chip, PCAL953X_IN_LATCH, chip->irq_mask);
diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index 9dad67ea2597..5639abce6ec5 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -89,6 +89,10 @@ struct linehandle_state {
GPIOHANDLE_REQUEST_OPEN_DRAIN | \
GPIOHANDLE_REQUEST_OPEN_SOURCE)
+#define GPIOHANDLE_REQUEST_DIRECTION_FLAGS \
+ (GPIOHANDLE_REQUEST_INPUT | \
+ GPIOHANDLE_REQUEST_OUTPUT)
+
static int linehandle_validate_flags(u32 flags)
{
/* Return an error if an unknown flag is set */
@@ -169,21 +173,21 @@ static long linehandle_set_config(struct linehandle_state *lh,
if (ret)
return ret;
+ /* Lines must be reconfigured explicitly as input or output. */
+ if (!(lflags & GPIOHANDLE_REQUEST_DIRECTION_FLAGS))
+ return -EINVAL;
+
for (i = 0; i < lh->num_descs; i++) {
desc = lh->descs[i];
- linehandle_flags_to_desc_flags(gcnf.flags, &desc->flags);
+ linehandle_flags_to_desc_flags(lflags, &desc->flags);
- /*
- * Lines have to be requested explicitly for input
- * or output, else the line will be treated "as is".
- */
if (lflags & GPIOHANDLE_REQUEST_OUTPUT) {
int val = !!gcnf.default_values[i];
ret = gpiod_direction_output(desc, val);
if (ret)
return ret;
- } else if (lflags & GPIOHANDLE_REQUEST_INPUT) {
+ } else {
ret = gpiod_direction_input(desc);
if (ret)
return ret;
@@ -1530,12 +1534,14 @@ static long linereq_set_config(struct linereq *lr, void __user *ip)
line = &lr->lines[i];
desc = lr->lines[i].desc;
flags = gpio_v2_line_config_flags(&lc, i);
- gpio_v2_line_config_flags_to_desc_flags(flags, &desc->flags);
- edflags = flags & GPIO_V2_LINE_EDGE_DETECTOR_FLAGS;
/*
- * Lines have to be requested explicitly for input
- * or output, else the line will be treated "as is".
+ * Lines not explicitly reconfigured as input or output
+ * are left unchanged.
*/
+ if (!(flags & GPIO_V2_LINE_DIRECTION_FLAGS))
+ continue;
+ gpio_v2_line_config_flags_to_desc_flags(flags, &desc->flags);
+ edflags = flags & GPIO_V2_LINE_EDGE_DETECTOR_FLAGS;
if (flags & GPIO_V2_LINE_FLAG_OUTPUT) {
int val = gpio_v2_line_config_output_value(&lc, i);
@@ -1543,7 +1549,7 @@ static long linereq_set_config(struct linereq *lr, void __user *ip)
ret = gpiod_direction_output(desc, val);
if (ret)
return ret;
- } else if (flags & GPIO_V2_LINE_FLAG_INPUT) {
+ } else {
ret = gpiod_direction_input(desc);
if (ret)
return ret;
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index d75f6ee37028..89d5e64cf68b 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -203,6 +203,24 @@ static void of_gpio_try_fixup_polarity(const struct device_node *np,
*/
{ "qi,lb60", "rb-gpios", true },
#endif
+#if IS_ENABLED(CONFIG_PCI_LANTIQ)
+ /*
+ * According to the PCI specification, the RST# pin is an
+ * active-low signal. However, most of the device trees that
+ * have been widely used for a long time incorrectly describe
+ * reset GPIO as active-high, and were also using wrong name
+ * for the property.
+ */
+ { "lantiq,pci-xway", "gpio-reset", false },
+#endif
+#if IS_ENABLED(CONFIG_TOUCHSCREEN_TSC2005)
+ /*
+ * DTS for Nokia N900 incorrectly specified "active high"
+ * polarity for the reset line, while the chip actually
+ * treats it as "active low".
+ */
+ { "ti,tsc2005", "reset-gpios", false },
+#endif
};
unsigned int i;
@@ -504,9 +522,9 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np,
{ "reset", "reset-n-io", "marvell,nfc-uart" },
{ "reset", "reset-n-io", "mrvl,nfc-uart" },
#endif
-#if !IS_ENABLED(CONFIG_PCI_LANTIQ)
+#if IS_ENABLED(CONFIG_PCI_LANTIQ)
/* MIPS Lantiq PCI */
- { "reset", "gpios-reset", "lantiq,pci-xway" },
+ { "reset", "gpio-reset", "lantiq,pci-xway" },
#endif
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 108003bdf1e9..2e13c7c4b2b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -400,7 +400,7 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
mem_channel_number = vram_info->v30.channel_num;
mem_channel_width = vram_info->v30.channel_width;
if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
+ *vram_width = mem_channel_number * 16;
break;
default:
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 932dc93b2e63..33f791d92ddf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5220,11 +5220,14 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
dev_info(adev->dev, "GPU mode1 reset\n");
+ /* Cache the state before bus master disable. The saved config space
+ * values are used in other cases like restore after mode-2 reset.
+ */
+ amdgpu_device_cache_pci_state(adev->pdev);
+
/* disable BM */
pci_clear_master(adev->pdev);
- amdgpu_device_cache_pci_state(adev->pdev);
-
if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
dev_info(adev->dev, "GPU smu mode1 reset\n");
ret = amdgpu_dpm_mode1_reset(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 4bd4602d11b1..cef9dd0a012b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -640,6 +640,20 @@ static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id)
}
}
+static bool psp_err_warn(struct psp_context *psp)
+{
+ struct psp_gfx_cmd_resp *cmd = psp->cmd_buf_mem;
+
+ /* This response indicates reg list is already loaded */
+ if (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 2) &&
+ cmd->cmd_id == GFX_CMD_ID_LOAD_IP_FW &&
+ cmd->cmd.cmd_load_ip_fw.fw_type == GFX_FW_TYPE_REG_LIST &&
+ cmd->resp.status == TEE_ERROR_CANCEL)
+ return false;
+
+ return true;
+}
+
static int
psp_cmd_submit_buf(struct psp_context *psp,
struct amdgpu_firmware_info *ucode,
@@ -699,10 +713,13 @@ psp_cmd_submit_buf(struct psp_context *psp,
dev_warn(psp->adev->dev,
"failed to load ucode %s(0x%X) ",
amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id);
- dev_warn(psp->adev->dev,
- "psp gfx command %s(0x%X) failed and response status is (0x%X)\n",
- psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), psp->cmd_buf_mem->cmd_id,
- psp->cmd_buf_mem->resp.status);
+ if (psp_err_warn(psp))
+ dev_warn(
+ psp->adev->dev,
+ "psp gfx command %s(0x%X) failed and response status is (0x%X)\n",
+ psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id),
+ psp->cmd_buf_mem->cmd_id,
+ psp->cmd_buf_mem->resp.status);
/* If any firmware (including CAP) load fails under SRIOV, it should
* return failure to stop the VF from initializing.
* Also return failure in case of timeout
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index e30eecd02ae1..fde66225c481 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -3,6 +3,7 @@
#include <drm/drm_atomic_helper.h>
#include <drm/drm_edid.h>
#include <drm/drm_simple_kms_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
#include <drm/drm_vblank.h>
#include "amdgpu.h"
@@ -314,7 +315,13 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane,
return 0;
}
afb = to_amdgpu_framebuffer(new_state->fb);
- obj = new_state->fb->obj[0];
+
+ obj = drm_gem_fb_get_obj(new_state->fb, 0);
+ if (!obj) {
+ DRM_ERROR("Failed to get obj from framebuffer\n");
+ return -EINVAL;
+ }
+
rbo = gem_to_amdgpu_bo(obj);
adev = amdgpu_ttm_adev(rbo->tbo.bdev);
@@ -368,12 +375,19 @@ static void amdgpu_vkms_cleanup_fb(struct drm_plane *plane,
struct drm_plane_state *old_state)
{
struct amdgpu_bo *rbo;
+ struct drm_gem_object *obj;
int r;
if (!old_state->fb)
return;
- rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]);
+ obj = drm_gem_fb_get_obj(old_state->fb, 0);
+ if (!obj) {
+ DRM_ERROR("Failed to get obj from framebuffer\n");
+ return;
+ }
+
+ rbo = gem_to_amdgpu_bo(obj);
r = amdgpu_bo_reserve(rbo, false);
if (unlikely(r)) {
DRM_ERROR("failed to reserve rbo before unpin\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 7566973ed8f5..37b5ddd6f13b 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -464,8 +464,9 @@ struct psp_gfx_rb_frame
#define PSP_ERR_UNKNOWN_COMMAND 0x00000100
enum tee_error_code {
- TEE_SUCCESS = 0x00000000,
- TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A,
+ TEE_SUCCESS = 0x00000000,
+ TEE_ERROR_CANCEL = 0xFFFF0002,
+ TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A,
};
#endif /* _PSP_TEE_GFX_IF_H_ */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e9ac20bed0f2..3cdcadd41be1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -10048,6 +10048,7 @@ skip_modeset:
}
/* Update Freesync settings. */
+ reset_freesync_config_for_crtc(dm_new_crtc_state);
get_freesync_config_for_crtc(dm_new_crtc_state,
dm_new_conn_state);
@@ -11181,6 +11182,49 @@ static bool parse_edid_cea(struct amdgpu_dm_connector *aconnector,
return ret;
}
+static void parse_edid_displayid_vrr(struct drm_connector *connector,
+ struct edid *edid)
+{
+ u8 *edid_ext = NULL;
+ int i;
+ int j = 0;
+ u16 min_vfreq;
+ u16 max_vfreq;
+
+ if (edid == NULL || edid->extensions == 0)
+ return;
+
+ /* Find DisplayID extension */
+ for (i = 0; i < edid->extensions; i++) {
+ edid_ext = (void *)(edid + (i + 1));
+ if (edid_ext[0] == DISPLAYID_EXT)
+ break;
+ }
+
+ if (edid_ext == NULL)
+ return;
+
+ while (j < EDID_LENGTH) {
+ /* Get dynamic video timing range from DisplayID if available */
+ if (EDID_LENGTH - j > 13 && edid_ext[j] == 0x25 &&
+ (edid_ext[j+1] & 0xFE) == 0 && (edid_ext[j+2] == 9)) {
+ min_vfreq = edid_ext[j+9];
+ if (edid_ext[j+1] & 7)
+ max_vfreq = edid_ext[j+10] + ((edid_ext[j+11] & 3) << 8);
+ else
+ max_vfreq = edid_ext[j+10];
+
+ if (max_vfreq && min_vfreq) {
+ connector->display_info.monitor_range.max_vfreq = max_vfreq;
+ connector->display_info.monitor_range.min_vfreq = min_vfreq;
+
+ return;
+ }
+ }
+ j++;
+ }
+}
+
static int parse_amd_vsdb(struct amdgpu_dm_connector *aconnector,
struct edid *edid, struct amdgpu_hdmi_vsdb_info *vsdb_info)
{
@@ -11302,6 +11346,11 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
if (!adev->dm.freesync_module)
goto update;
+ /* Some eDP panels only have the refresh rate range info in DisplayID */
+ if ((connector->display_info.monitor_range.min_vfreq == 0 ||
+ connector->display_info.monitor_range.max_vfreq == 0))
+ parse_edid_displayid_vrr(connector, edid);
+
if (edid && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT ||
sink->sink_signal == SIGNAL_TYPE_EDP)) {
bool edid_check_required = false;
@@ -11309,9 +11358,11 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
if (is_dp_capable_without_timing_msa(adev->dm.dc,
amdgpu_dm_connector)) {
if (edid->features & DRM_EDID_FEATURE_CONTINUOUS_FREQ) {
- freesync_capable = true;
amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq;
amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq;
+ if (amdgpu_dm_connector->max_vfreq -
+ amdgpu_dm_connector->min_vfreq > 10)
+ freesync_capable = true;
} else {
edid_check_required = edid->version > 1 ||
(edid->version == 1 &&
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index 6c84b0fa40f4..0782a34689a0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -3364,6 +3364,9 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
&mode_lib->vba.UrgentBurstFactorLumaPre[k],
&mode_lib->vba.UrgentBurstFactorChromaPre[k],
&mode_lib->vba.NotUrgentLatencyHidingPre[k]);
+
+ v->cursor_bw_pre[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] /
+ 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * v->VRatioPreY[i][j][k];
}
{
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index a41812598ce8..8ecc972dbffd 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -234,6 +234,7 @@ void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, s
out->round_trip_ping_latency_dcfclk_cycles = 106;
out->smn_latency_us = 2;
out->dispclk_dppclk_vco_speed_mhz = 3600;
+ out->pct_ideal_dram_bw_after_urgent_pixel_only = 65.0;
break;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
index 0f8b3336e26d..cbd1c1f26b7a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
@@ -294,7 +294,7 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont
context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = (unsigned int)in_ctx->v20.dml_core_ctx.mp.DCFCLKDeepSleep * 1000;
context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
- if (in_ctx->v20.dml_core_ctx.ms.support.FCLKChangeSupport[in_ctx->v20.scratch.mode_support_params.out_lowest_state_idx] == dml_fclock_change_unsupported)
+ if (in_ctx->v20.dml_core_ctx.ms.support.FCLKChangeSupport[0] == dml_fclock_change_unsupported)
context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false;
else
context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true;
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
index a01d0842bf8e..d487dfcd219b 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
@@ -1590,9 +1590,17 @@ static bool retrieve_link_cap(struct dc_link *link)
return false;
}
- if (dp_is_lttpr_present(link))
+ if (dp_is_lttpr_present(link)) {
configure_lttpr_mode_transparent(link);
+ // Echo TOTAL_LTTPR_CNT back downstream
+ core_link_write_dpcd(
+ link,
+ DP_TOTAL_LTTPR_CNT,
+ &link->dpcd_caps.lttpr_caps.phy_repeater_cnt,
+ sizeof(link->dpcd_caps.lttpr_caps.phy_repeater_cnt));
+ }
+
/* Read DP tunneling information. */
status = dpcd_get_tunneling_device_data(link);
diff --git a/drivers/gpu/drm/amd/display/include/dpcd_defs.h b/drivers/gpu/drm/amd/display/include/dpcd_defs.h
index 914f28e9f224..aee5170f5fb2 100644
--- a/drivers/gpu/drm/amd/display/include/dpcd_defs.h
+++ b/drivers/gpu/drm/amd/display/include/dpcd_defs.h
@@ -177,4 +177,9 @@ enum dpcd_psr_sink_states {
#define DP_SINK_PR_PIXEL_DEVIATION_PER_LINE 0x379
#define DP_SINK_PR_MAX_NUMBER_OF_DEVIATION_LINE 0x37A
+/* Remove once drm_dp_helper.h is updated upstream */
+#ifndef DP_TOTAL_LTTPR_CNT
+#define DP_TOTAL_LTTPR_CNT 0xF000A /* 2.1 */
+#endif
+
#endif /* __DAL_DPCD_DEFS_H__ */
diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h
index 571691837200..09cbc3afd6d8 100644
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@@ -734,7 +734,7 @@ struct atom_gpio_pin_lut_v2_1
{
struct atom_common_table_header table_header;
/*the real number of this included in the structure is calcualted by using the (whole structure size - the header size)/size of atom_gpio_pin_lut */
- struct atom_gpio_pin_assignment gpio_pin[8];
+ struct atom_gpio_pin_assignment gpio_pin[];
};
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 7789b313285c..e1796ecf9c05 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -324,6 +324,18 @@ static int smu_dpm_set_umsch_mm_enable(struct smu_context *smu,
return ret;
}
+static int smu_set_mall_enable(struct smu_context *smu)
+{
+ int ret = 0;
+
+ if (!smu->ppt_funcs->set_mall_enable)
+ return 0;
+
+ ret = smu->ppt_funcs->set_mall_enable(smu);
+
+ return ret;
+}
+
/**
* smu_dpm_set_power_gate - power gate/ungate the specific IP block
*
@@ -1791,6 +1803,7 @@ static int smu_hw_init(void *handle)
smu_dpm_set_jpeg_enable(smu, true);
smu_dpm_set_vpe_enable(smu, true);
smu_dpm_set_umsch_mm_enable(smu, true);
+ smu_set_mall_enable(smu);
smu_set_gfx_cgpg(smu, true);
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 0917dec8efe3..64ccdb5f14ea 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -1395,6 +1395,11 @@ struct pptable_funcs {
int (*dpm_set_umsch_mm_enable)(struct smu_context *smu, bool enable);
/**
+ * @set_mall_enable: Init MALL power gating control.
+ */
+ int (*set_mall_enable)(struct smu_context *smu);
+
+ /**
* @notify_rlc_state: Notify RLC power state to SMU.
*/
int (*notify_rlc_state)(struct smu_context *smu, bool en);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h
index c4dc5881d8df..e7f5ef49049f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h
@@ -106,8 +106,8 @@
#define PPSMC_MSG_DisableLSdma 0x35 ///< Disable LSDMA
#define PPSMC_MSG_SetSoftMaxVpe 0x36 ///<
#define PPSMC_MSG_SetSoftMinVpe 0x37 ///<
-#define PPSMC_MSG_AllocMALLCache 0x38 ///< Allocating MALL Cache
-#define PPSMC_MSG_ReleaseMALLCache 0x39 ///< Releasing MALL Cache
+#define PPSMC_MSG_MALLPowerController 0x38 ///< Set MALL control
+#define PPSMC_MSG_MALLPowerState 0x39 ///< Enter/Exit MALL PG
#define PPSMC_Message_Count 0x3A ///< Total number of PPSMC messages
/** @}*/
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index c48214e3dc8e..2e32b085824a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -272,7 +272,9 @@
__SMU_DUMMY_MAP(SetSoftMinVpe), \
__SMU_DUMMY_MAP(GetMetricsVersion), \
__SMU_DUMMY_MAP(EnableUCLKShadow), \
- __SMU_DUMMY_MAP(RmaDueToBadPageThreshold),
+ __SMU_DUMMY_MAP(RmaDueToBadPageThreshold), \
+ __SMU_DUMMY_MAP(MALLPowerController), \
+ __SMU_DUMMY_MAP(MALLPowerState),
#undef __SMU_DUMMY_MAP
#define __SMU_DUMMY_MAP(type) SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
index e4419e1561ef..18abfbd6d059 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
@@ -52,6 +52,19 @@
#define mmMP1_SMN_C2PMSG_90 0x029a
#define mmMP1_SMN_C2PMSG_90_BASE_IDX 0
+/* MALLPowerController message arguments (Defines for the Cache mode control) */
+#define SMU_MALL_PMFW_CONTROL 0
+#define SMU_MALL_DRIVER_CONTROL 1
+
+/*
+ * MALLPowerState message arguments
+ * (Defines for the Allocate/Release Cache mode if in driver mode)
+ */
+#define SMU_MALL_EXIT_PG 0
+#define SMU_MALL_ENTER_PG 1
+
+#define SMU_MALL_PG_CONFIG_DEFAULT SMU_MALL_PG_CONFIG_DRIVER_CONTROL_ALWAYS_ON
+
#define FEATURE_MASK(feature) (1ULL << feature)
#define SMC_DPM_FEATURE ( \
FEATURE_MASK(FEATURE_CCLK_DPM_BIT) | \
@@ -66,6 +79,12 @@
FEATURE_MASK(FEATURE_GFX_DPM_BIT) | \
FEATURE_MASK(FEATURE_VPE_DPM_BIT))
+enum smu_mall_pg_config {
+ SMU_MALL_PG_CONFIG_PMFW_CONTROL = 0,
+ SMU_MALL_PG_CONFIG_DRIVER_CONTROL_ALWAYS_ON = 1,
+ SMU_MALL_PG_CONFIG_DRIVER_CONTROL_ALWAYS_OFF = 2,
+};
+
static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] = {
MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1),
MSG_MAP(GetSmuVersion, PPSMC_MSG_GetPmfwVersion, 1),
@@ -113,6 +132,8 @@ static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(PowerDownUmsch, PPSMC_MSG_PowerDownUmsch, 1),
MSG_MAP(SetSoftMaxVpe, PPSMC_MSG_SetSoftMaxVpe, 1),
MSG_MAP(SetSoftMinVpe, PPSMC_MSG_SetSoftMinVpe, 1),
+ MSG_MAP(MALLPowerController, PPSMC_MSG_MALLPowerController, 1),
+ MSG_MAP(MALLPowerState, PPSMC_MSG_MALLPowerState, 1),
};
static struct cmn2asic_mapping smu_v14_0_0_feature_mask_map[SMU_FEATURE_COUNT] = {
@@ -1423,6 +1444,57 @@ static int smu_v14_0_common_get_dpm_table(struct smu_context *smu, struct dpm_cl
return 0;
}
+static int smu_v14_0_1_init_mall_power_gating(struct smu_context *smu, enum smu_mall_pg_config pg_config)
+{
+ struct amdgpu_device *adev = smu->adev;
+ int ret = 0;
+
+ if (pg_config == SMU_MALL_PG_CONFIG_PMFW_CONTROL) {
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_MALLPowerController,
+ SMU_MALL_PMFW_CONTROL, NULL);
+ if (ret) {
+ dev_err(adev->dev, "Init MALL PMFW CONTROL Failure\n");
+ return ret;
+ }
+ } else {
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_MALLPowerController,
+ SMU_MALL_DRIVER_CONTROL, NULL);
+ if (ret) {
+ dev_err(adev->dev, "Init MALL Driver CONTROL Failure\n");
+ return ret;
+ }
+
+ if (pg_config == SMU_MALL_PG_CONFIG_DRIVER_CONTROL_ALWAYS_ON) {
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_MALLPowerState,
+ SMU_MALL_EXIT_PG, NULL);
+ if (ret) {
+ dev_err(adev->dev, "EXIT MALL PG Failure\n");
+ return ret;
+ }
+ } else if (pg_config == SMU_MALL_PG_CONFIG_DRIVER_CONTROL_ALWAYS_OFF) {
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_MALLPowerState,
+ SMU_MALL_ENTER_PG, NULL);
+ if (ret) {
+ dev_err(adev->dev, "Enter MALL PG Failure\n");
+ return ret;
+ }
+ }
+ }
+
+ return ret;
+}
+
+static int smu_v14_0_common_set_mall_enable(struct smu_context *smu)
+{
+ enum smu_mall_pg_config pg_config = SMU_MALL_PG_CONFIG_DEFAULT;
+ int ret = 0;
+
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ ret = smu_v14_0_1_init_mall_power_gating(smu, pg_config);
+
+ return ret;
+}
+
static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
.check_fw_status = smu_v14_0_check_fw_status,
.check_fw_version = smu_v14_0_check_fw_version,
@@ -1454,6 +1526,7 @@ static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
.dpm_set_vpe_enable = smu_v14_0_0_set_vpe_enable,
.dpm_set_umsch_mm_enable = smu_v14_0_0_set_umsch_mm_enable,
.get_dpm_clock_table = smu_v14_0_common_get_dpm_table,
+ .set_mall_enable = smu_v14_0_common_set_mall_enable,
};
static void smu_v14_0_0_set_smu_mailbox_registers(struct smu_context *smu)
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index d612133e2cf7..117237d3528b 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -524,6 +524,9 @@ struct fb_info *drm_fb_helper_alloc_info(struct drm_fb_helper *fb_helper)
if (!info)
return ERR_PTR(-ENOMEM);
+ if (!drm_leak_fbdev_smem)
+ info->flags |= FBINFO_HIDE_SMEM_START;
+
ret = fb_alloc_cmap(&info->cmap, 256, 0);
if (ret)
goto err_release;
@@ -1860,9 +1863,6 @@ __drm_fb_helper_initial_config_and_unlock(struct drm_fb_helper *fb_helper)
info = fb_helper->info;
info->var.pixclock = 0;
- if (!drm_leak_fbdev_smem)
- info->flags |= FBINFO_HIDE_SMEM_START;
-
/* Need to drop locks to avoid recursive deadlock in
* register_framebuffer. This is ok because the only thing left to do is
* register the fbdev emulation instance in kernel_fb_helper_list. */
diff --git a/drivers/gpu/drm/drm_fbdev_dma.c b/drivers/gpu/drm/drm_fbdev_dma.c
index 6c9427bb4053..13cd754af311 100644
--- a/drivers/gpu/drm/drm_fbdev_dma.c
+++ b/drivers/gpu/drm/drm_fbdev_dma.c
@@ -130,7 +130,10 @@ static int drm_fbdev_dma_helper_fb_probe(struct drm_fb_helper *fb_helper,
info->flags |= FBINFO_READS_FAST; /* signal caching */
info->screen_size = sizes->surface_height * fb->pitches[0];
info->screen_buffer = map.vaddr;
- info->fix.smem_start = page_to_phys(virt_to_page(info->screen_buffer));
+ if (!(info->flags & FBINFO_HIDE_SMEM_START)) {
+ if (!drm_WARN_ON(dev, is_vmalloc_addr(info->screen_buffer)))
+ info->fix.smem_start = page_to_phys(virt_to_page(info->screen_buffer));
+ }
info->fix.smem_len = info->screen_size;
return 0;
diff --git a/drivers/gpu/drm/drm_fbdev_generic.c b/drivers/gpu/drm/drm_fbdev_generic.c
index 97e579c33d84..1e200d815e1a 100644
--- a/drivers/gpu/drm/drm_fbdev_generic.c
+++ b/drivers/gpu/drm/drm_fbdev_generic.c
@@ -84,7 +84,8 @@ static int drm_fbdev_generic_helper_fb_probe(struct drm_fb_helper *fb_helper,
sizes->surface_width, sizes->surface_height,
sizes->surface_bpp);
- format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth);
+ format = drm_driver_legacy_fb_format(dev, sizes->surface_bpp,
+ sizes->surface_depth);
buffer = drm_client_framebuffer_create(client, sizes->surface_width,
sizes->surface_height, format);
if (IS_ERR(buffer))
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 638ffa4444f5..714e42b05108 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -469,14 +469,12 @@ void drm_file_update_pid(struct drm_file *filp)
dev = filp->minor->dev;
mutex_lock(&dev->filelist_mutex);
+ get_pid(pid);
old = rcu_replace_pointer(filp->pid, pid, 1);
mutex_unlock(&dev->filelist_mutex);
- if (pid != old) {
- get_pid(pid);
- synchronize_rcu();
- put_pid(old);
- }
+ synchronize_rcu();
+ put_pid(old);
}
/**
diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
index 2166208a961d..3860a8ce1e2d 100644
--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
+++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
@@ -420,13 +420,20 @@ static const struct dmi_system_id orientation_data[] = {
DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Galaxy Book 10.6"),
},
.driver_data = (void *)&lcd1280x1920_rightside_up,
- }, { /* Valve Steam Deck */
+ }, { /* Valve Steam Deck (Jupiter) */
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Valve"),
DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jupiter"),
DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "1"),
},
.driver_data = (void *)&lcd800x1280_rightside_up,
+ }, { /* Valve Steam Deck (Galileo) */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Valve"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Galileo"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "1"),
+ },
+ .driver_data = (void *)&lcd800x1280_rightside_up,
}, { /* VIOS LTH17 */
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "VIOS"),
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 3c3fc53376ce..6bff169fa8d4 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -2088,6 +2088,9 @@ icl_program_mg_dp_mode(struct intel_digital_port *dig_port,
u32 ln0, ln1, pin_assignment;
u8 width;
+ if (DISPLAY_VER(dev_priv) >= 14)
+ return;
+
if (!intel_encoder_is_tc(&dig_port->base) ||
intel_tc_port_in_tbt_alt_mode(dig_port))
return;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
index 40371b8a9bbb..93bc1cc1ee7e 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
@@ -298,6 +298,7 @@ void i915_vma_revoke_fence(struct i915_vma *vma)
return;
GEM_BUG_ON(fence->vma != vma);
+ i915_active_wait(&fence->active);
GEM_BUG_ON(!i915_active_is_idle(&fence->active));
GEM_BUG_ON(atomic_read(&fence->pin_count));
diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
index 670c9739e5e1..2033214c4b78 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
@@ -209,6 +209,8 @@ static int nv17_tv_get_ld_modes(struct drm_encoder *encoder,
struct drm_display_mode *mode;
mode = drm_mode_duplicate(encoder->dev, tv_mode);
+ if (!mode)
+ continue;
mode->clock = tv_norm->tv_enc_mode.vrefresh *
mode->htotal / 1000 *
@@ -258,6 +260,8 @@ static int nv17_tv_get_hd_modes(struct drm_encoder *encoder,
if (modes[i].hdisplay == output_mode->hdisplay &&
modes[i].vdisplay == output_mode->vdisplay) {
mode = drm_mode_duplicate(encoder->dev, output_mode);
+ if (!mode)
+ continue;
mode->type |= DRM_MODE_TYPE_PREFERRED;
} else {
@@ -265,6 +269,8 @@ static int nv17_tv_get_hd_modes(struct drm_encoder *encoder,
modes[i].vdisplay, 60, false,
(output_mode->flags &
DRM_MODE_FLAG_INTERLACE), false);
+ if (!mode)
+ continue;
}
/* CVT modes are sometimes unsuitable... */
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 856b3ef5edb8..0c71d761d378 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -1001,6 +1001,9 @@ nouveau_connector_get_modes(struct drm_connector *connector)
struct drm_display_mode *mode;
mode = drm_mode_duplicate(dev, nv_connector->native_mode);
+ if (!mode)
+ return 0;
+
drm_mode_probed_add(connector, mode);
ret = 1;
}
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index dcb6d0b6ced0..c8cdc8356c58 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -2752,6 +2752,7 @@ static const struct display_timing koe_tx26d202vm0bwa_timing = {
.vfront_porch = { 3, 5, 10 },
.vback_porch = { 2, 5, 10 },
.vsync_len = { 5, 5, 5 },
+ .flags = DISPLAY_FLAGS_DE_HIGH,
};
static const struct panel_desc koe_tx26d202vm0bwa = {
diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
index b8a84f26b3ef..b5e7b919f241 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -86,15 +86,15 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride,
int ret = 0;
void *out_alloc;
+ if (!in->count)
+ return NULL;
+
/* User stride must be at least the minimum object size, otherwise it might
* lack useful information.
*/
if (in->stride < min_stride)
return ERR_PTR(-EINVAL);
- if (!in->count)
- return NULL;
-
out_alloc = kvmalloc_array(in->count, obj_size, GFP_KERNEL);
if (!out_alloc)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index 79ffcbc41d78..9a0ff48f7061 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -459,6 +459,16 @@ struct panthor_queue {
atomic64_t seqno;
/**
+ * @last_fence: Fence of the last submitted job.
+ *
+ * We return this fence when we get an empty command stream.
+ * This way, we are guaranteed that all earlier jobs have completed
+ * when drm_sched_job::s_fence::finished without having to feed
+ * the CS ring buffer with a dummy job that only signals the fence.
+ */
+ struct dma_fence *last_fence;
+
+ /**
* @in_flight_jobs: List containing all in-flight jobs.
*
* Used to keep track and signal panthor_job::done_fence when the
@@ -829,6 +839,9 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue *
panthor_kernel_bo_destroy(queue->ringbuf);
panthor_kernel_bo_destroy(queue->iface.mem);
+ /* Release the last_fence we were holding, if any. */
+ dma_fence_put(queue->fence_ctx.last_fence);
+
kfree(queue);
}
@@ -2784,9 +2797,6 @@ static void group_sync_upd_work(struct work_struct *work)
spin_lock(&queue->fence_ctx.lock);
list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
- if (!job->call_info.size)
- continue;
-
if (syncobj->seqno < job->done_fence->seqno)
break;
@@ -2865,11 +2875,14 @@ queue_run_job(struct drm_sched_job *sched_job)
static_assert(sizeof(call_instrs) % 64 == 0,
"call_instrs is not aligned on a cacheline");
- /* Stream size is zero, nothing to do => return a NULL fence and let
- * drm_sched signal the parent.
+ /* Stream size is zero, nothing to do except making sure all previously
+ * submitted jobs are done before we signal the
+ * drm_sched_job::s_fence::finished fence.
*/
- if (!job->call_info.size)
- return NULL;
+ if (!job->call_info.size) {
+ job->done_fence = dma_fence_get(queue->fence_ctx.last_fence);
+ return dma_fence_get(job->done_fence);
+ }
ret = pm_runtime_resume_and_get(ptdev->base.dev);
if (drm_WARN_ON(&ptdev->base, ret))
@@ -2928,6 +2941,10 @@ queue_run_job(struct drm_sched_job *sched_job)
}
}
+ /* Update the last fence. */
+ dma_fence_put(queue->fence_ctx.last_fence);
+ queue->fence_ctx.last_fence = dma_fence_get(job->done_fence);
+
done_fence = dma_fence_get(job->done_fence);
out_unlock:
@@ -3378,10 +3395,15 @@ panthor_job_create(struct panthor_file *pfile,
goto err_put_job;
}
- job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL);
- if (!job->done_fence) {
- ret = -ENOMEM;
- goto err_put_job;
+ /* Empty command streams don't need a fence, they'll pick the one from
+ * the previously submitted job.
+ */
+ if (job->call_info.size) {
+ job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL);
+ if (!job->done_fence) {
+ ret = -ENOMEM;
+ goto err_put_job;
+ }
}
ret = drm_sched_job_init(&job->base,
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 2ef201a072f1..e66a230331ee 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -642,7 +642,7 @@ static void radeon_gem_va_update_vm(struct radeon_device *rdev,
if (r)
goto error_unlock;
- if (bo_va->it.start)
+ if (bo_va->it.start && bo_va->bo)
r = radeon_vm_bo_update(rdev, bo_va, bo_va->bo->tbo.resource);
error_unlock:
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 6396dece0db1..2427be8bc97f 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -346,6 +346,7 @@ static void ttm_bo_release(struct kref *kref)
if (!dma_resv_test_signaled(bo->base.resv,
DMA_RESV_USAGE_BOOKKEEP) ||
(want_init_on_free() && (bo->ttm != NULL)) ||
+ bo->type == ttm_bo_type_sg ||
!dma_resv_trylock(bo->base.resv)) {
/* The BO is not idle, resurrect it for delayed destroy */
ttm_bo_flush_all_fences(bo);
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 577bd7043740..0443e07880a0 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -342,7 +342,7 @@ static void init_steering_oaddrm(struct xe_gt *gt)
else
gt->steering[OADDRM].group_target = 1;
- gt->steering[DSS].instance_target = 0; /* unused */
+ gt->steering[OADDRM].instance_target = 0; /* unused */
}
static void init_steering_sqidi_psmi(struct xe_gt *gt)
@@ -357,8 +357,8 @@ static void init_steering_sqidi_psmi(struct xe_gt *gt)
static void init_steering_inst0(struct xe_gt *gt)
{
- gt->steering[DSS].group_target = 0; /* unused */
- gt->steering[DSS].instance_target = 0; /* unused */
+ gt->steering[INSTANCE0].group_target = 0; /* unused */
+ gt->steering[INSTANCE0].instance_target = 0; /* unused */
}
static const struct {
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 65e5a3f4c340..198f5c2189cb 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -1334,7 +1334,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
GFP_KERNEL, true, 0);
if (IS_ERR(sa_bo)) {
err = PTR_ERR(sa_bo);
- goto err;
+ goto err_bb;
}
ppgtt_ofs = NUM_KERNEL_PDE +
@@ -1385,7 +1385,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
update_idx);
if (IS_ERR(job)) {
err = PTR_ERR(job);
- goto err_bb;
+ goto err_sa;
}
/* Wait on BO move */
@@ -1434,12 +1434,12 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
err_job:
xe_sched_job_put(job);
+err_sa:
+ drm_suballoc_free(sa_bo, NULL);
err_bb:
if (!q)
mutex_unlock(&m->job_mutex);
xe_bb_free(bb, NULL);
-err:
- drm_suballoc_free(sa_bo, NULL);
return ERR_PTR(err);
}
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 3d65934f5eb4..78d0561339e5 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -29,8 +29,7 @@ obj-$(CONFIG_I2C_SIS630) += i2c-sis630.o
obj-$(CONFIG_I2C_SIS96X) += i2c-sis96x.o
obj-$(CONFIG_I2C_VIA) += i2c-via.o
obj-$(CONFIG_I2C_VIAPRO) += i2c-viapro.o
-i2c-zhaoxin-objs := i2c-viai2c-zhaoxin.o i2c-viai2c-common.o
-obj-$(CONFIG_I2C_ZHAOXIN) += i2c-zhaoxin.o
+obj-$(CONFIG_I2C_ZHAOXIN) += i2c-viai2c-zhaoxin.o i2c-viai2c-common.o
# Mac SMBus host controller drivers
obj-$(CONFIG_I2C_HYDRA) += i2c-hydra.o
@@ -120,8 +119,7 @@ obj-$(CONFIG_I2C_TEGRA_BPMP) += i2c-tegra-bpmp.o
obj-$(CONFIG_I2C_UNIPHIER) += i2c-uniphier.o
obj-$(CONFIG_I2C_UNIPHIER_F) += i2c-uniphier-f.o
obj-$(CONFIG_I2C_VERSATILE) += i2c-versatile.o
-i2c-wmt-objs := i2c-viai2c-wmt.o i2c-viai2c-common.o
-obj-$(CONFIG_I2C_WMT) += i2c-wmt.o
+obj-$(CONFIG_I2C_WMT) += i2c-viai2c-wmt.o i2c-viai2c-common.o
i2c-octeon-objs := i2c-octeon-core.o i2c-octeon-platdrv.o
obj-$(CONFIG_I2C_OCTEON) += i2c-octeon.o
i2c-thunderx-objs := i2c-octeon-core.o i2c-thunderx-pcidrv.o
diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c
index a12525b3186b..f448505d5468 100644
--- a/drivers/i2c/busses/i2c-pnx.c
+++ b/drivers/i2c/busses/i2c-pnx.c
@@ -15,7 +15,6 @@
#include <linux/ioport.h>
#include <linux/delay.h>
#include <linux/i2c.h>
-#include <linux/timer.h>
#include <linux/completion.h>
#include <linux/platform_device.h>
#include <linux/io.h>
@@ -32,7 +31,6 @@ struct i2c_pnx_mif {
int ret; /* Return value */
int mode; /* Interface mode */
struct completion complete; /* I/O completion */
- struct timer_list timer; /* Timeout */
u8 * buf; /* Data buffer */
int len; /* Length of data buffer */
int order; /* RX Bytes to order via TX */
@@ -117,24 +115,6 @@ static inline int wait_reset(struct i2c_pnx_algo_data *data)
return (timeout <= 0);
}
-static inline void i2c_pnx_arm_timer(struct i2c_pnx_algo_data *alg_data)
-{
- struct timer_list *timer = &alg_data->mif.timer;
- unsigned long expires = msecs_to_jiffies(alg_data->timeout);
-
- if (expires <= 1)
- expires = 2;
-
- del_timer_sync(timer);
-
- dev_dbg(&alg_data->adapter.dev, "Timer armed at %lu plus %lu jiffies.\n",
- jiffies, expires);
-
- timer->expires = jiffies + expires;
-
- add_timer(timer);
-}
-
/**
* i2c_pnx_start - start a device
* @slave_addr: slave address
@@ -259,8 +239,6 @@ static int i2c_pnx_master_xmit(struct i2c_pnx_algo_data *alg_data)
~(mcntrl_afie | mcntrl_naie | mcntrl_drmie),
I2C_REG_CTL(alg_data));
- del_timer_sync(&alg_data->mif.timer);
-
dev_dbg(&alg_data->adapter.dev,
"%s(): Waking up xfer routine.\n",
__func__);
@@ -276,8 +254,6 @@ static int i2c_pnx_master_xmit(struct i2c_pnx_algo_data *alg_data)
~(mcntrl_afie | mcntrl_naie | mcntrl_drmie),
I2C_REG_CTL(alg_data));
- /* Stop timer. */
- del_timer_sync(&alg_data->mif.timer);
dev_dbg(&alg_data->adapter.dev,
"%s(): Waking up xfer routine after zero-xfer.\n",
__func__);
@@ -364,8 +340,6 @@ static int i2c_pnx_master_rcv(struct i2c_pnx_algo_data *alg_data)
mcntrl_drmie | mcntrl_daie);
iowrite32(ctl, I2C_REG_CTL(alg_data));
- /* Kill timer. */
- del_timer_sync(&alg_data->mif.timer);
complete(&alg_data->mif.complete);
}
}
@@ -400,8 +374,6 @@ static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id)
mcntrl_drmie);
iowrite32(ctl, I2C_REG_CTL(alg_data));
- /* Stop timer, to prevent timeout. */
- del_timer_sync(&alg_data->mif.timer);
complete(&alg_data->mif.complete);
} else if (stat & mstatus_nai) {
/* Slave did not acknowledge, generate a STOP */
@@ -419,8 +391,6 @@ static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id)
/* Our return value. */
alg_data->mif.ret = -EIO;
- /* Stop timer, to prevent timeout. */
- del_timer_sync(&alg_data->mif.timer);
complete(&alg_data->mif.complete);
} else {
/*
@@ -453,9 +423,8 @@ static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static void i2c_pnx_timeout(struct timer_list *t)
+static void i2c_pnx_timeout(struct i2c_pnx_algo_data *alg_data)
{
- struct i2c_pnx_algo_data *alg_data = from_timer(alg_data, t, mif.timer);
u32 ctl;
dev_err(&alg_data->adapter.dev,
@@ -472,7 +441,6 @@ static void i2c_pnx_timeout(struct timer_list *t)
iowrite32(ctl, I2C_REG_CTL(alg_data));
wait_reset(alg_data);
alg_data->mif.ret = -EIO;
- complete(&alg_data->mif.complete);
}
static inline void bus_reset_if_active(struct i2c_pnx_algo_data *alg_data)
@@ -514,6 +482,7 @@ i2c_pnx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
struct i2c_msg *pmsg;
int rc = 0, completed = 0, i;
struct i2c_pnx_algo_data *alg_data = adap->algo_data;
+ unsigned long time_left;
u32 stat;
dev_dbg(&alg_data->adapter.dev,
@@ -548,7 +517,6 @@ i2c_pnx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
dev_dbg(&alg_data->adapter.dev, "%s(): mode %d, %d bytes\n",
__func__, alg_data->mif.mode, alg_data->mif.len);
- i2c_pnx_arm_timer(alg_data);
/* initialize the completion var */
init_completion(&alg_data->mif.complete);
@@ -564,7 +532,10 @@ i2c_pnx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
break;
/* Wait for completion */
- wait_for_completion(&alg_data->mif.complete);
+ time_left = wait_for_completion_timeout(&alg_data->mif.complete,
+ alg_data->timeout);
+ if (time_left == 0)
+ i2c_pnx_timeout(alg_data);
if (!(rc = alg_data->mif.ret))
completed++;
@@ -653,7 +624,10 @@ static int i2c_pnx_probe(struct platform_device *pdev)
alg_data->adapter.algo_data = alg_data;
alg_data->adapter.nr = pdev->id;
- alg_data->timeout = I2C_PNX_TIMEOUT_DEFAULT;
+ alg_data->timeout = msecs_to_jiffies(I2C_PNX_TIMEOUT_DEFAULT);
+ if (alg_data->timeout <= 1)
+ alg_data->timeout = 2;
+
#ifdef CONFIG_OF
alg_data->adapter.dev.of_node = of_node_get(pdev->dev.of_node);
if (pdev->dev.of_node) {
@@ -673,8 +647,6 @@ static int i2c_pnx_probe(struct platform_device *pdev)
if (IS_ERR(alg_data->clk))
return PTR_ERR(alg_data->clk);
- timer_setup(&alg_data->mif.timer, i2c_pnx_timeout, 0);
-
snprintf(alg_data->adapter.name, sizeof(alg_data->adapter.name),
"%s", pdev->name);
diff --git a/drivers/i2c/busses/i2c-viai2c-common.c b/drivers/i2c/busses/i2c-viai2c-common.c
index 1844d13f1f79..162b31306cba 100644
--- a/drivers/i2c/busses/i2c-viai2c-common.c
+++ b/drivers/i2c/busses/i2c-viai2c-common.c
@@ -17,6 +17,7 @@ int viai2c_wait_bus_not_busy(struct viai2c *i2c)
return 0;
}
+EXPORT_SYMBOL_GPL(viai2c_wait_bus_not_busy);
static int viai2c_write(struct viai2c *i2c, struct i2c_msg *pmsg, int last)
{
@@ -121,6 +122,7 @@ int viai2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
return (ret < 0) ? ret : i;
}
+EXPORT_SYMBOL_GPL(viai2c_xfer);
/*
* Main process of the byte mode xfer
@@ -130,7 +132,7 @@ int viai2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
* 0: there is still data that needs to be transferred
* -EIO: error occurred
*/
-static int viai2c_irq_xfer(struct viai2c *i2c)
+int viai2c_irq_xfer(struct viai2c *i2c)
{
u16 val;
struct i2c_msg *msg = i2c->msg;
@@ -171,51 +173,11 @@ static int viai2c_irq_xfer(struct viai2c *i2c)
return i2c->xfered_len == msg->len;
}
-
-int __weak viai2c_fifo_irq_xfer(struct viai2c *i2c, bool irq)
-{
- return 0;
-}
-
-static irqreturn_t viai2c_isr(int irq, void *data)
-{
- struct viai2c *i2c = data;
- u8 status;
-
- /* save the status and write-clear it */
- status = readw(i2c->base + VIAI2C_REG_ISR);
- if (!status && i2c->platform == VIAI2C_PLAT_ZHAOXIN)
- return IRQ_NONE;
-
- writew(status, i2c->base + VIAI2C_REG_ISR);
-
- i2c->ret = 0;
- if (status & VIAI2C_ISR_NACK_ADDR)
- i2c->ret = -EIO;
-
- if (i2c->platform == VIAI2C_PLAT_WMT && (status & VIAI2C_ISR_SCL_TIMEOUT))
- i2c->ret = -ETIMEDOUT;
-
- if (!i2c->ret) {
- if (i2c->mode == VIAI2C_BYTE_MODE)
- i2c->ret = viai2c_irq_xfer(i2c);
- else
- i2c->ret = viai2c_fifo_irq_xfer(i2c, true);
- }
-
- /* All the data has been successfully transferred or error occurred */
- if (i2c->ret)
- complete(&i2c->complete);
-
- return IRQ_HANDLED;
-}
+EXPORT_SYMBOL_GPL(viai2c_irq_xfer);
int viai2c_init(struct platform_device *pdev, struct viai2c **pi2c, int plat)
{
- int err;
- int irq_flags;
struct viai2c *i2c;
- struct device_node *np = pdev->dev.of_node;
i2c = devm_kzalloc(&pdev->dev, sizeof(*i2c), GFP_KERNEL);
if (!i2c)
@@ -225,28 +187,8 @@ int viai2c_init(struct platform_device *pdev, struct viai2c **pi2c, int plat)
if (IS_ERR(i2c->base))
return PTR_ERR(i2c->base);
- if (plat == VIAI2C_PLAT_WMT) {
- irq_flags = 0;
- i2c->irq = irq_of_parse_and_map(np, 0);
- if (!i2c->irq)
- return -EINVAL;
- } else if (plat == VIAI2C_PLAT_ZHAOXIN) {
- irq_flags = IRQF_SHARED;
- i2c->irq = platform_get_irq(pdev, 0);
- if (i2c->irq < 0)
- return i2c->irq;
- } else {
- return dev_err_probe(&pdev->dev, -EINVAL, "wrong platform type\n");
- }
-
i2c->platform = plat;
- err = devm_request_irq(&pdev->dev, i2c->irq, viai2c_isr,
- irq_flags, pdev->name, i2c);
- if (err)
- return dev_err_probe(&pdev->dev, err,
- "failed to request irq %i\n", i2c->irq);
-
i2c->dev = &pdev->dev;
init_completion(&i2c->complete);
platform_set_drvdata(pdev, i2c);
@@ -254,3 +196,8 @@ int viai2c_init(struct platform_device *pdev, struct viai2c **pi2c, int plat)
*pi2c = i2c;
return 0;
}
+EXPORT_SYMBOL_GPL(viai2c_init);
+
+MODULE_DESCRIPTION("Via/Wondermedia/Zhaoxin I2C master-mode bus adapter");
+MODULE_AUTHOR("Tony Prisk <[email protected]>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/i2c/busses/i2c-viai2c-common.h b/drivers/i2c/busses/i2c-viai2c-common.h
index 81e827c54434..00f17733223c 100644
--- a/drivers/i2c/busses/i2c-viai2c-common.h
+++ b/drivers/i2c/busses/i2c-viai2c-common.h
@@ -80,6 +80,6 @@ struct viai2c {
int viai2c_wait_bus_not_busy(struct viai2c *i2c);
int viai2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num);
int viai2c_init(struct platform_device *pdev, struct viai2c **pi2c, int plat);
-int viai2c_fifo_irq_xfer(struct viai2c *i2c, bool irq);
+int viai2c_irq_xfer(struct viai2c *i2c);
#endif
diff --git a/drivers/i2c/busses/i2c-viai2c-wmt.c b/drivers/i2c/busses/i2c-viai2c-wmt.c
index e1988f946026..420fd10fe3aa 100644
--- a/drivers/i2c/busses/i2c-viai2c-wmt.c
+++ b/drivers/i2c/busses/i2c-viai2c-wmt.c
@@ -72,6 +72,32 @@ static int wmt_i2c_reset_hardware(struct viai2c *i2c)
return 0;
}
+static irqreturn_t wmt_i2c_isr(int irq, void *data)
+{
+ struct viai2c *i2c = data;
+ u8 status;
+
+ /* save the status and write-clear it */
+ status = readw(i2c->base + VIAI2C_REG_ISR);
+ writew(status, i2c->base + VIAI2C_REG_ISR);
+
+ i2c->ret = 0;
+ if (status & VIAI2C_ISR_NACK_ADDR)
+ i2c->ret = -EIO;
+
+ if (status & VIAI2C_ISR_SCL_TIMEOUT)
+ i2c->ret = -ETIMEDOUT;
+
+ if (!i2c->ret)
+ i2c->ret = viai2c_irq_xfer(i2c);
+
+ /* All the data has been successfully transferred or error occurred */
+ if (i2c->ret)
+ complete(&i2c->complete);
+
+ return IRQ_HANDLED;
+}
+
static int wmt_i2c_probe(struct platform_device *pdev)
{
struct device_node *np = pdev->dev.of_node;
@@ -84,6 +110,16 @@ static int wmt_i2c_probe(struct platform_device *pdev)
if (err)
return err;
+ i2c->irq = platform_get_irq(pdev, 0);
+ if (i2c->irq < 0)
+ return i2c->irq;
+
+ err = devm_request_irq(&pdev->dev, i2c->irq, wmt_i2c_isr,
+ 0, pdev->name, i2c);
+ if (err)
+ return dev_err_probe(&pdev->dev, err,
+ "failed to request irq %i\n", i2c->irq);
+
i2c->clk = of_clk_get(np, 0);
if (IS_ERR(i2c->clk)) {
dev_err(&pdev->dev, "unable to request clock\n");
diff --git a/drivers/i2c/busses/i2c-viai2c-zhaoxin.c b/drivers/i2c/busses/i2c-viai2c-zhaoxin.c
index 7e3ac2a3e1fd..ab3e44e147e9 100644
--- a/drivers/i2c/busses/i2c-viai2c-zhaoxin.c
+++ b/drivers/i2c/busses/i2c-viai2c-zhaoxin.c
@@ -49,8 +49,7 @@ struct viai2c_zhaoxin {
u16 xfer_len;
};
-/* 'irq == true' means in interrupt context */
-int viai2c_fifo_irq_xfer(struct viai2c *i2c, bool irq)
+static int viai2c_fifo_xfer(struct viai2c *i2c)
{
u16 i;
u8 tmp;
@@ -59,17 +58,6 @@ int viai2c_fifo_irq_xfer(struct viai2c *i2c, bool irq)
bool read = !!(msg->flags & I2C_M_RD);
struct viai2c_zhaoxin *priv = i2c->pltfm_priv;
- if (irq) {
- /* get the received data */
- if (read)
- for (i = 0; i < priv->xfer_len; i++)
- msg->buf[i2c->xfered_len + i] = ioread8(base + ZXI2C_REG_HRDR);
-
- i2c->xfered_len += priv->xfer_len;
- if (i2c->xfered_len == msg->len)
- return 1;
- }
-
/* reset fifo buffer */
tmp = ioread8(base + ZXI2C_REG_HCR);
iowrite8(tmp | ZXI2C_HCR_RST_FIFO, base + ZXI2C_REG_HCR);
@@ -92,18 +80,59 @@ int viai2c_fifo_irq_xfer(struct viai2c *i2c, bool irq)
iowrite8(tmp, base + VIAI2C_REG_CR);
}
- if (irq) {
- /* continue transmission */
- tmp = ioread8(base + VIAI2C_REG_CR);
- iowrite8(tmp |= VIAI2C_CR_CPU_RDY, base + VIAI2C_REG_CR);
+ u16 tcr_val = i2c->tcr;
+
+ /* start transmission */
+ tcr_val |= read ? VIAI2C_TCR_READ : 0;
+ writew(tcr_val | msg->addr, base + VIAI2C_REG_TCR);
+
+ return 0;
+}
+
+static int viai2c_fifo_irq_xfer(struct viai2c *i2c)
+{
+ u16 i;
+ u8 tmp;
+ struct i2c_msg *msg = i2c->msg;
+ void __iomem *base = i2c->base;
+ bool read = !!(msg->flags & I2C_M_RD);
+ struct viai2c_zhaoxin *priv = i2c->pltfm_priv;
+
+ /* get the received data */
+ if (read)
+ for (i = 0; i < priv->xfer_len; i++)
+ msg->buf[i2c->xfered_len + i] = ioread8(base + ZXI2C_REG_HRDR);
+
+ i2c->xfered_len += priv->xfer_len;
+ if (i2c->xfered_len == msg->len)
+ return 1;
+
+ /* reset fifo buffer */
+ tmp = ioread8(base + ZXI2C_REG_HCR);
+ iowrite8(tmp | ZXI2C_HCR_RST_FIFO, base + ZXI2C_REG_HCR);
+
+ /* set xfer len */
+ priv->xfer_len = min_t(u16, msg->len - i2c->xfered_len, ZXI2C_FIFO_SIZE);
+ if (read) {
+ iowrite8(priv->xfer_len - 1, base + ZXI2C_REG_HRLR);
} else {
- u16 tcr_val = i2c->tcr;
+ iowrite8(priv->xfer_len - 1, base + ZXI2C_REG_HTLR);
+ /* set write data */
+ for (i = 0; i < priv->xfer_len; i++)
+ iowrite8(msg->buf[i2c->xfered_len + i], base + ZXI2C_REG_HTDR);
+ }
- /* start transmission */
- tcr_val |= read ? VIAI2C_TCR_READ : 0;
- writew(tcr_val | msg->addr, base + VIAI2C_REG_TCR);
+ /* prepare to stop transmission */
+ if (priv->hrv && msg->len == (i2c->xfered_len + priv->xfer_len)) {
+ tmp = ioread8(base + VIAI2C_REG_CR);
+ tmp |= read ? VIAI2C_CR_RX_END : VIAI2C_CR_TX_END;
+ iowrite8(tmp, base + VIAI2C_REG_CR);
}
+ /* continue transmission */
+ tmp = ioread8(base + VIAI2C_REG_CR);
+ iowrite8(tmp |= VIAI2C_CR_CPU_RDY, base + VIAI2C_REG_CR);
+
return 0;
}
@@ -135,7 +164,7 @@ static int zxi2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int
priv->xfer_len = 0;
i2c->xfered_len = 0;
- viai2c_fifo_irq_xfer(i2c, 0);
+ viai2c_fifo_xfer(i2c);
if (!wait_for_completion_timeout(&i2c->complete, VIAI2C_TIMEOUT))
return -ETIMEDOUT;
@@ -228,6 +257,36 @@ static void zxi2c_get_bus_speed(struct viai2c *i2c)
dev_info(i2c->dev, "speed mode is %s\n", i2c_freq_mode_string(params[0]));
}
+static irqreturn_t zxi2c_isr(int irq, void *data)
+{
+ struct viai2c *i2c = data;
+ u8 status;
+
+ /* save the status and write-clear it */
+ status = readw(i2c->base + VIAI2C_REG_ISR);
+ if (!status)
+ return IRQ_NONE;
+
+ writew(status, i2c->base + VIAI2C_REG_ISR);
+
+ i2c->ret = 0;
+ if (status & VIAI2C_ISR_NACK_ADDR)
+ i2c->ret = -EIO;
+
+ if (!i2c->ret) {
+ if (i2c->mode == VIAI2C_BYTE_MODE)
+ i2c->ret = viai2c_irq_xfer(i2c);
+ else
+ i2c->ret = viai2c_fifo_irq_xfer(i2c);
+ }
+
+ /* All the data has been successfully transferred or error occurred */
+ if (i2c->ret)
+ complete(&i2c->complete);
+
+ return IRQ_HANDLED;
+}
+
static int zxi2c_probe(struct platform_device *pdev)
{
int error;
@@ -239,6 +298,16 @@ static int zxi2c_probe(struct platform_device *pdev)
if (error)
return error;
+ i2c->irq = platform_get_irq(pdev, 0);
+ if (i2c->irq < 0)
+ return i2c->irq;
+
+ error = devm_request_irq(&pdev->dev, i2c->irq, zxi2c_isr,
+ IRQF_SHARED, pdev->name, i2c);
+ if (error)
+ return dev_err_probe(&pdev->dev, error,
+ "failed to request irq %i\n", i2c->irq);
+
priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
diff --git a/drivers/i2c/i2c-slave-testunit.c b/drivers/i2c/i2c-slave-testunit.c
index a49642bbae4b..ca43e98cae1b 100644
--- a/drivers/i2c/i2c-slave-testunit.c
+++ b/drivers/i2c/i2c-slave-testunit.c
@@ -118,9 +118,12 @@ static int i2c_slave_testunit_slave_cb(struct i2c_client *client,
queue_delayed_work(system_long_wq, &tu->worker,
msecs_to_jiffies(10 * tu->regs[TU_REG_DELAY]));
}
- fallthrough;
+ break;
case I2C_SLAVE_WRITE_REQUESTED:
+ if (test_bit(TU_FLAG_IN_PROCESS, &tu->flags))
+ return -EBUSY;
+
memset(tu->regs, 0, TU_NUM_REGS);
tu->reg_idx = 0;
break;
diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig
index c2da5066e9a7..80b57d3ee3a7 100644
--- a/drivers/iio/accel/Kconfig
+++ b/drivers/iio/accel/Kconfig
@@ -330,6 +330,8 @@ config DMARD10
config FXLS8962AF
tristate
depends on I2C || !I2C # cannot be built-in for modular I2C
+ select IIO_BUFFER
+ select IIO_KFIFO_BUF
config FXLS8962AF_I2C
tristate "NXP FXLS8962AF/FXLS8964AF Accelerometer I2C Driver"
diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c
index 353a97f9c086..13ea8a1073d2 100644
--- a/drivers/iio/adc/ad7266.c
+++ b/drivers/iio/adc/ad7266.c
@@ -157,6 +157,8 @@ static int ad7266_read_raw(struct iio_dev *indio_dev,
ret = ad7266_read_single(st, val, chan->address);
iio_device_release_direct_mode(indio_dev);
+ if (ret < 0)
+ return ret;
*val = (*val >> 2) & 0xfff;
if (chan->scan_type.sign == 's')
*val = sign_extend32(*val,
diff --git a/drivers/iio/adc/xilinx-ams.c b/drivers/iio/adc/xilinx-ams.c
index f0b71a1220e0..f52abf759260 100644
--- a/drivers/iio/adc/xilinx-ams.c
+++ b/drivers/iio/adc/xilinx-ams.c
@@ -414,8 +414,12 @@ static void ams_enable_channel_sequence(struct iio_dev *indio_dev)
/* Run calibration of PS & PL as part of the sequence */
scan_mask = BIT(0) | BIT(AMS_PS_SEQ_MAX);
- for (i = 0; i < indio_dev->num_channels; i++)
- scan_mask |= BIT_ULL(indio_dev->channels[i].scan_index);
+ for (i = 0; i < indio_dev->num_channels; i++) {
+ const struct iio_chan_spec *chan = &indio_dev->channels[i];
+
+ if (chan->scan_index < AMS_CTRL_SEQ_BASE)
+ scan_mask |= BIT_ULL(chan->scan_index);
+ }
if (ams->ps_base) {
/* put sysmon in a soft reset to change the sequence */
diff --git a/drivers/iio/chemical/bme680.h b/drivers/iio/chemical/bme680.h
index 4edc5d21cb9f..f959252a4fe6 100644
--- a/drivers/iio/chemical/bme680.h
+++ b/drivers/iio/chemical/bme680.h
@@ -54,7 +54,9 @@
#define BME680_NB_CONV_MASK GENMASK(3, 0)
#define BME680_REG_MEAS_STAT_0 0x1D
+#define BME680_NEW_DATA_BIT BIT(7)
#define BME680_GAS_MEAS_BIT BIT(6)
+#define BME680_MEAS_BIT BIT(5)
/* Calibration Parameters */
#define BME680_T2_LSB_REG 0x8A
diff --git a/drivers/iio/chemical/bme680_core.c b/drivers/iio/chemical/bme680_core.c
index ef5e0e46fd34..500f56834b01 100644
--- a/drivers/iio/chemical/bme680_core.c
+++ b/drivers/iio/chemical/bme680_core.c
@@ -10,6 +10,7 @@
*/
#include <linux/acpi.h>
#include <linux/bitfield.h>
+#include <linux/delay.h>
#include <linux/device.h>
#include <linux/module.h>
#include <linux/log2.h>
@@ -38,7 +39,7 @@ struct bme680_calib {
s8 par_h3;
s8 par_h4;
s8 par_h5;
- s8 par_h6;
+ u8 par_h6;
s8 par_h7;
s8 par_gh1;
s16 par_gh2;
@@ -342,10 +343,10 @@ static s16 bme680_compensate_temp(struct bme680_data *data,
if (!calib->par_t2)
bme680_read_calib(data, calib);
- var1 = (adc_temp >> 3) - (calib->par_t1 << 1);
+ var1 = (adc_temp >> 3) - ((s32)calib->par_t1 << 1);
var2 = (var1 * calib->par_t2) >> 11;
var3 = ((var1 >> 1) * (var1 >> 1)) >> 12;
- var3 = (var3 * (calib->par_t3 << 4)) >> 14;
+ var3 = (var3 * ((s32)calib->par_t3 << 4)) >> 14;
data->t_fine = var2 + var3;
calc_temp = (data->t_fine * 5 + 128) >> 8;
@@ -368,9 +369,9 @@ static u32 bme680_compensate_press(struct bme680_data *data,
var1 = (data->t_fine >> 1) - 64000;
var2 = ((((var1 >> 2) * (var1 >> 2)) >> 11) * calib->par_p6) >> 2;
var2 = var2 + (var1 * calib->par_p5 << 1);
- var2 = (var2 >> 2) + (calib->par_p4 << 16);
+ var2 = (var2 >> 2) + ((s32)calib->par_p4 << 16);
var1 = (((((var1 >> 2) * (var1 >> 2)) >> 13) *
- (calib->par_p3 << 5)) >> 3) +
+ ((s32)calib->par_p3 << 5)) >> 3) +
((calib->par_p2 * var1) >> 1);
var1 = var1 >> 18;
var1 = ((32768 + var1) * calib->par_p1) >> 15;
@@ -388,7 +389,7 @@ static u32 bme680_compensate_press(struct bme680_data *data,
var3 = ((press_comp >> 8) * (press_comp >> 8) *
(press_comp >> 8) * calib->par_p10) >> 17;
- press_comp += (var1 + var2 + var3 + (calib->par_p7 << 7)) >> 4;
+ press_comp += (var1 + var2 + var3 + ((s32)calib->par_p7 << 7)) >> 4;
return press_comp;
}
@@ -414,7 +415,7 @@ static u32 bme680_compensate_humid(struct bme680_data *data,
(((temp_scaled * ((temp_scaled * calib->par_h5) / 100))
>> 6) / 100) + (1 << 14))) >> 10;
var3 = var1 * var2;
- var4 = calib->par_h6 << 7;
+ var4 = (s32)calib->par_h6 << 7;
var4 = (var4 + ((temp_scaled * calib->par_h7) / 100)) >> 4;
var5 = ((var3 >> 14) * (var3 >> 14)) >> 10;
var6 = (var4 * var5) >> 1;
@@ -532,6 +533,43 @@ static u8 bme680_oversampling_to_reg(u8 val)
return ilog2(val) + 1;
}
+/*
+ * Taken from Bosch BME680 API:
+ * https://github.com/boschsensortec/BME68x_SensorAPI/blob/v4.4.8/bme68x.c#L490
+ */
+static int bme680_wait_for_eoc(struct bme680_data *data)
+{
+ struct device *dev = regmap_get_device(data->regmap);
+ unsigned int check;
+ int ret;
+ /*
+ * (Sum of oversampling ratios * time per oversampling) +
+ * TPH measurement + gas measurement + wait transition from forced mode
+ * + heater duration
+ */
+ int wait_eoc_us = ((data->oversampling_temp + data->oversampling_press +
+ data->oversampling_humid) * 1936) + (477 * 4) +
+ (477 * 5) + 1000 + (data->heater_dur * 1000);
+
+ usleep_range(wait_eoc_us, wait_eoc_us + 100);
+
+ ret = regmap_read(data->regmap, BME680_REG_MEAS_STAT_0, &check);
+ if (ret) {
+ dev_err(dev, "failed to read measurement status register.\n");
+ return ret;
+ }
+ if (check & BME680_MEAS_BIT) {
+ dev_err(dev, "Device measurement cycle incomplete.\n");
+ return -EBUSY;
+ }
+ if (!(check & BME680_NEW_DATA_BIT)) {
+ dev_err(dev, "No new data available from the device.\n");
+ return -ENODATA;
+ }
+
+ return 0;
+}
+
static int bme680_chip_config(struct bme680_data *data)
{
struct device *dev = regmap_get_device(data->regmap);
@@ -622,6 +660,10 @@ static int bme680_read_temp(struct bme680_data *data, int *val)
if (ret < 0)
return ret;
+ ret = bme680_wait_for_eoc(data);
+ if (ret)
+ return ret;
+
ret = regmap_bulk_read(data->regmap, BME680_REG_TEMP_MSB,
&tmp, 3);
if (ret < 0) {
@@ -678,7 +720,7 @@ static int bme680_read_press(struct bme680_data *data,
}
*val = bme680_compensate_press(data, adc_press);
- *val2 = 100;
+ *val2 = 1000;
return IIO_VAL_FRACTIONAL;
}
@@ -738,6 +780,10 @@ static int bme680_read_gas(struct bme680_data *data,
if (ret < 0)
return ret;
+ ret = bme680_wait_for_eoc(data);
+ if (ret)
+ return ret;
+
ret = regmap_read(data->regmap, BME680_REG_MEAS_STAT_0, &check);
if (check & BME680_GAS_MEAS_BIT) {
dev_err(dev, "gas measurement incomplete\n");
diff --git a/drivers/iio/dac/Kconfig b/drivers/iio/dac/Kconfig
index 3c2bf620f00f..ee0d9798d8b4 100644
--- a/drivers/iio/dac/Kconfig
+++ b/drivers/iio/dac/Kconfig
@@ -133,7 +133,7 @@ config AD5624R_SPI
config AD9739A
tristate "Analog Devices AD9739A RF DAC spi driver"
- depends on SPI || COMPILE_TEST
+ depends on SPI
select REGMAP_SPI
select IIO_BACKEND
help
diff --git a/drivers/iio/humidity/hdc3020.c b/drivers/iio/humidity/hdc3020.c
index cdc4789213ba..a82dcc3da421 100644
--- a/drivers/iio/humidity/hdc3020.c
+++ b/drivers/iio/humidity/hdc3020.c
@@ -19,6 +19,7 @@
#include <linux/i2c.h>
#include <linux/init.h>
#include <linux/interrupt.h>
+#include <linux/math64.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/pm.h>
@@ -66,8 +67,10 @@
#define HDC3020_CRC8_POLYNOMIAL 0x31
-#define HDC3020_MIN_TEMP -40
-#define HDC3020_MAX_TEMP 125
+#define HDC3020_MIN_TEMP_MICRO -39872968
+#define HDC3020_MAX_TEMP_MICRO 124875639
+#define HDC3020_MAX_TEMP_HYST_MICRO 164748607
+#define HDC3020_MAX_HUM_MICRO 99220264
struct hdc3020_data {
struct i2c_client *client;
@@ -368,6 +371,105 @@ static int hdc3020_write_raw(struct iio_dev *indio_dev,
return -EINVAL;
}
+static int hdc3020_thresh_get_temp(u16 thresh)
+{
+ int temp;
+
+ /*
+ * Get the temperature threshold from 9 LSBs, shift them to get
+ * the truncated temperature threshold representation and
+ * calculate the threshold according to the formula in the
+ * datasheet. Result is degree celsius scaled by 65535.
+ */
+ temp = FIELD_GET(HDC3020_THRESH_TEMP_MASK, thresh) <<
+ HDC3020_THRESH_TEMP_TRUNC_SHIFT;
+
+ return -2949075 + (175 * temp);
+}
+
+static int hdc3020_thresh_get_hum(u16 thresh)
+{
+ int hum;
+
+ /*
+ * Get the humidity threshold from 7 MSBs, shift them to get the
+ * truncated humidity threshold representation and calculate the
+ * threshold according to the formula in the datasheet. Result is
+ * percent scaled by 65535.
+ */
+ hum = FIELD_GET(HDC3020_THRESH_HUM_MASK, thresh) <<
+ HDC3020_THRESH_HUM_TRUNC_SHIFT;
+
+ return hum * 100;
+}
+
+static u16 hdc3020_thresh_set_temp(int s_temp, u16 curr_thresh)
+{
+ u64 temp;
+ u16 thresh;
+
+ /*
+ * Calculate temperature threshold, shift it down to get the
+ * truncated threshold representation in the 9LSBs while keeping
+ * the current humidity threshold in the 7 MSBs.
+ */
+ temp = (u64)(s_temp + 45000000) * 65535ULL;
+ temp = div_u64(temp, 1000000 * 175) >> HDC3020_THRESH_TEMP_TRUNC_SHIFT;
+ thresh = FIELD_PREP(HDC3020_THRESH_TEMP_MASK, temp);
+ thresh |= (FIELD_GET(HDC3020_THRESH_HUM_MASK, curr_thresh) <<
+ HDC3020_THRESH_HUM_TRUNC_SHIFT);
+
+ return thresh;
+}
+
+static u16 hdc3020_thresh_set_hum(int s_hum, u16 curr_thresh)
+{
+ u64 hum;
+ u16 thresh;
+
+ /*
+ * Calculate humidity threshold, shift it down and up to get the
+ * truncated threshold representation in the 7MSBs while keeping
+ * the current temperature threshold in the 9 LSBs.
+ */
+ hum = (u64)(s_hum) * 65535ULL;
+ hum = div_u64(hum, 1000000 * 100) >> HDC3020_THRESH_HUM_TRUNC_SHIFT;
+ thresh = FIELD_PREP(HDC3020_THRESH_HUM_MASK, hum);
+ thresh |= FIELD_GET(HDC3020_THRESH_TEMP_MASK, curr_thresh);
+
+ return thresh;
+}
+
+static
+int hdc3020_thresh_clr(s64 s_thresh, s64 s_hyst, enum iio_event_direction dir)
+{
+ s64 s_clr;
+
+ /*
+ * Include directions when calculation the clear value,
+ * since hysteresis is unsigned by definition and the
+ * clear value is an absolute value which is signed.
+ */
+ if (dir == IIO_EV_DIR_RISING)
+ s_clr = s_thresh - s_hyst;
+ else
+ s_clr = s_thresh + s_hyst;
+
+ /* Divide by 65535 to get units of micro */
+ return div_s64(s_clr, 65535);
+}
+
+static int _hdc3020_write_thresh(struct hdc3020_data *data, u16 reg, u16 val)
+{
+ u8 buf[5];
+
+ put_unaligned_be16(reg, buf);
+ put_unaligned_be16(val, buf + 2);
+ buf[4] = crc8(hdc3020_crc8_table, buf + 2, 2, CRC8_INIT_VALUE);
+
+ return hdc3020_write_bytes(data, buf, 5);
+}
+
static int hdc3020_write_thresh(struct iio_dev *indio_dev,
const struct iio_chan_spec *chan,
enum iio_event_type type,
@@ -376,67 +478,126 @@ static int hdc3020_write_thresh(struct iio_dev *indio_dev,
int val, int val2)
{
struct hdc3020_data *data = iio_priv(indio_dev);
- u8 buf[5];
- u64 tmp;
- u16 reg;
- int ret;
-
- /* Supported temperature range is from –40 to 125 degree celsius */
- if (val < HDC3020_MIN_TEMP || val > HDC3020_MAX_TEMP)
- return -EINVAL;
-
- /* Select threshold register */
- if (info == IIO_EV_INFO_VALUE) {
- if (dir == IIO_EV_DIR_RISING)
- reg = HDC3020_S_T_RH_THRESH_HIGH;
- else
- reg = HDC3020_S_T_RH_THRESH_LOW;
+ u16 reg, reg_val, reg_thresh_rd, reg_clr_rd, reg_thresh_wr, reg_clr_wr;
+ s64 s_thresh, s_hyst, s_clr;
+ int s_val, thresh, clr, ret;
+
+ /* Select threshold registers */
+ if (dir == IIO_EV_DIR_RISING) {
+ reg_thresh_rd = HDC3020_R_T_RH_THRESH_HIGH;
+ reg_thresh_wr = HDC3020_S_T_RH_THRESH_HIGH;
+ reg_clr_rd = HDC3020_R_T_RH_THRESH_HIGH_CLR;
+ reg_clr_wr = HDC3020_S_T_RH_THRESH_HIGH_CLR;
} else {
- if (dir == IIO_EV_DIR_RISING)
- reg = HDC3020_S_T_RH_THRESH_HIGH_CLR;
- else
- reg = HDC3020_S_T_RH_THRESH_LOW_CLR;
+ reg_thresh_rd = HDC3020_R_T_RH_THRESH_LOW;
+ reg_thresh_wr = HDC3020_S_T_RH_THRESH_LOW;
+ reg_clr_rd = HDC3020_R_T_RH_THRESH_LOW_CLR;
+ reg_clr_wr = HDC3020_S_T_RH_THRESH_LOW_CLR;
}
guard(mutex)(&data->lock);
- ret = hdc3020_read_be16(data, reg);
+ ret = hdc3020_read_be16(data, reg_thresh_rd);
+ if (ret < 0)
+ return ret;
+
+ thresh = ret;
+ ret = hdc3020_read_be16(data, reg_clr_rd);
if (ret < 0)
return ret;
+ clr = ret;
+ /* Scale value to include decimal part into calculations */
+ s_val = (val < 0) ? (val * 1000000 - val2) : (val * 1000000 + val2);
switch (chan->type) {
case IIO_TEMP:
- /*
- * Calculate temperature threshold, shift it down to get the
- * truncated threshold representation in the 9LSBs while keeping
- * the current humidity threshold in the 7 MSBs.
- */
- tmp = ((u64)(((val + 45) * MICRO) + val2)) * 65535ULL;
- tmp = div_u64(tmp, MICRO * 175);
- val = tmp >> HDC3020_THRESH_TEMP_TRUNC_SHIFT;
- val = FIELD_PREP(HDC3020_THRESH_TEMP_MASK, val);
- val |= (FIELD_GET(HDC3020_THRESH_HUM_MASK, ret) <<
- HDC3020_THRESH_HUM_TRUNC_SHIFT);
+ switch (info) {
+ case IIO_EV_INFO_VALUE:
+ s_val = max(s_val, HDC3020_MIN_TEMP_MICRO);
+ s_val = min(s_val, HDC3020_MAX_TEMP_MICRO);
+ reg = reg_thresh_wr;
+ reg_val = hdc3020_thresh_set_temp(s_val, thresh);
+ ret = _hdc3020_write_thresh(data, reg, reg_val);
+ if (ret < 0)
+ return ret;
+
+ /* Calculate old hysteresis */
+ s_thresh = (s64)hdc3020_thresh_get_temp(thresh) * 1000000;
+ s_clr = (s64)hdc3020_thresh_get_temp(clr) * 1000000;
+ s_hyst = div_s64(abs(s_thresh - s_clr), 65535);
+ /* Set new threshold */
+ thresh = reg_val;
+ /* Set old hysteresis */
+ s_val = s_hyst;
+ fallthrough;
+ case IIO_EV_INFO_HYSTERESIS:
+ /*
+ * Function hdc3020_thresh_get_temp returns temperature
+ * in degree celsius scaled by 65535. Scale by 1000000
+ * to be able to subtract scaled hysteresis value.
+ */
+ s_thresh = (s64)hdc3020_thresh_get_temp(thresh) * 1000000;
+ /*
+ * Units of s_val are in micro degree celsius, scale by
+ * 65535 to get same units as s_thresh.
+ */
+ s_val = min(abs(s_val), HDC3020_MAX_TEMP_HYST_MICRO);
+ s_hyst = (s64)s_val * 65535;
+ s_clr = hdc3020_thresh_clr(s_thresh, s_hyst, dir);
+ s_clr = max(s_clr, HDC3020_MIN_TEMP_MICRO);
+ s_clr = min(s_clr, HDC3020_MAX_TEMP_MICRO);
+ reg = reg_clr_wr;
+ reg_val = hdc3020_thresh_set_temp(s_clr, clr);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
break;
case IIO_HUMIDITYRELATIVE:
- /*
- * Calculate humidity threshold, shift it down and up to get the
- * truncated threshold representation in the 7MSBs while keeping
- * the current temperature threshold in the 9 LSBs.
- */
- tmp = ((u64)((val * MICRO) + val2)) * 65535ULL;
- tmp = div_u64(tmp, MICRO * 100);
- val = tmp >> HDC3020_THRESH_HUM_TRUNC_SHIFT;
- val = FIELD_PREP(HDC3020_THRESH_HUM_MASK, val);
- val |= FIELD_GET(HDC3020_THRESH_TEMP_MASK, ret);
+ s_val = (s_val < 0) ? 0 : min(s_val, HDC3020_MAX_HUM_MICRO);
+ switch (info) {
+ case IIO_EV_INFO_VALUE:
+ reg = reg_thresh_wr;
+ reg_val = hdc3020_thresh_set_hum(s_val, thresh);
+ ret = _hdc3020_write_thresh(data, reg, reg_val);
+ if (ret < 0)
+ return ret;
+
+ /* Calculate old hysteresis */
+ s_thresh = (s64)hdc3020_thresh_get_hum(thresh) * 1000000;
+ s_clr = (s64)hdc3020_thresh_get_hum(clr) * 1000000;
+ s_hyst = div_s64(abs(s_thresh - s_clr), 65535);
+ /* Set new threshold */
+ thresh = reg_val;
+ /* Try to set old hysteresis */
+ s_val = min(abs(s_hyst), HDC3020_MAX_HUM_MICRO);
+ fallthrough;
+ case IIO_EV_INFO_HYSTERESIS:
+ /*
+ * Function hdc3020_thresh_get_hum returns relative
+ * humidity in percent scaled by 65535. Scale by 1000000
+ * to be able to subtract scaled hysteresis value.
+ */
+ s_thresh = (s64)hdc3020_thresh_get_hum(thresh) * 1000000;
+ /*
+ * Units of s_val are in micro percent, scale by 65535
+ * to get same units as s_thresh.
+ */
+ s_hyst = (s64)s_val * 65535;
+ s_clr = hdc3020_thresh_clr(s_thresh, s_hyst, dir);
+ s_clr = max(s_clr, 0);
+ s_clr = min(s_clr, HDC3020_MAX_HUM_MICRO);
+ reg = reg_clr_wr;
+ reg_val = hdc3020_thresh_set_hum(s_clr, clr);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
break;
default:
return -EOPNOTSUPP;
}
- put_unaligned_be16(reg, buf);
- put_unaligned_be16(val, buf + 2);
- buf[4] = crc8(hdc3020_crc8_table, buf + 2, 2, CRC8_INIT_VALUE);
- return hdc3020_write_bytes(data, buf, 5);
+ return _hdc3020_write_thresh(data, reg, reg_val);
}
static int hdc3020_read_thresh(struct iio_dev *indio_dev,
@@ -447,48 +608,60 @@ static int hdc3020_read_thresh(struct iio_dev *indio_dev,
int *val, int *val2)
{
struct hdc3020_data *data = iio_priv(indio_dev);
- u16 reg;
- int ret;
+ u16 reg_thresh, reg_clr;
+ int thresh, clr, ret;
- /* Select threshold register */
- if (info == IIO_EV_INFO_VALUE) {
- if (dir == IIO_EV_DIR_RISING)
- reg = HDC3020_R_T_RH_THRESH_HIGH;
- else
- reg = HDC3020_R_T_RH_THRESH_LOW;
+ /* Select threshold registers */
+ if (dir == IIO_EV_DIR_RISING) {
+ reg_thresh = HDC3020_R_T_RH_THRESH_HIGH;
+ reg_clr = HDC3020_R_T_RH_THRESH_HIGH_CLR;
} else {
- if (dir == IIO_EV_DIR_RISING)
- reg = HDC3020_R_T_RH_THRESH_HIGH_CLR;
- else
- reg = HDC3020_R_T_RH_THRESH_LOW_CLR;
+ reg_thresh = HDC3020_R_T_RH_THRESH_LOW;
+ reg_clr = HDC3020_R_T_RH_THRESH_LOW_CLR;
}
guard(mutex)(&data->lock);
- ret = hdc3020_read_be16(data, reg);
+ ret = hdc3020_read_be16(data, reg_thresh);
if (ret < 0)
return ret;
switch (chan->type) {
case IIO_TEMP:
- /*
- * Get the temperature threshold from 9 LSBs, shift them to get
- * the truncated temperature threshold representation and
- * calculate the threshold according to the formula in the
- * datasheet.
- */
- *val = FIELD_GET(HDC3020_THRESH_TEMP_MASK, ret);
- *val = *val << HDC3020_THRESH_TEMP_TRUNC_SHIFT;
- *val = -2949075 + (175 * (*val));
+ thresh = hdc3020_thresh_get_temp(ret);
+ switch (info) {
+ case IIO_EV_INFO_VALUE:
+ *val = thresh;
+ break;
+ case IIO_EV_INFO_HYSTERESIS:
+ ret = hdc3020_read_be16(data, reg_clr);
+ if (ret < 0)
+ return ret;
+
+ clr = hdc3020_thresh_get_temp(ret);
+ *val = abs(thresh - clr);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
*val2 = 65535;
return IIO_VAL_FRACTIONAL;
case IIO_HUMIDITYRELATIVE:
- /*
- * Get the humidity threshold from 7 MSBs, shift them to get the
- * truncated humidity threshold representation and calculate the
- * threshold according to the formula in the datasheet.
- */
- *val = FIELD_GET(HDC3020_THRESH_HUM_MASK, ret);
- *val = (*val << HDC3020_THRESH_HUM_TRUNC_SHIFT) * 100;
+ thresh = hdc3020_thresh_get_hum(ret);
+ switch (info) {
+ case IIO_EV_INFO_VALUE:
+ *val = thresh;
+ break;
+ case IIO_EV_INFO_HYSTERESIS:
+ ret = hdc3020_read_be16(data, reg_clr);
+ if (ret < 0)
+ return ret;
+
+ clr = hdc3020_thresh_get_hum(ret);
+ *val = abs(thresh - clr);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
*val2 = 65535;
return IIO_VAL_FRACTIONAL;
default:
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 161248067776..c89d85b54a1a 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -2743,6 +2743,7 @@ static void early_enable_iommu(struct amd_iommu *iommu)
iommu_enable_command_buffer(iommu);
iommu_enable_event_buffer(iommu);
iommu_set_exclusion_range(iommu);
+ iommu_enable_gt(iommu);
iommu_enable_ga(iommu);
iommu_enable_xt(iommu);
iommu_enable_irtcachedis(iommu);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index c2703599bb16..b19e8c0f48fa 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2061,6 +2061,12 @@ static void do_detach(struct iommu_dev_data *dev_data)
struct protection_domain *domain = dev_data->domain;
struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
+ /* Clear DTE and flush the entry */
+ amd_iommu_dev_update_dte(dev_data, false);
+
+ /* Flush IOTLB and wait for the flushes to finish */
+ amd_iommu_domain_flush_all(domain);
+
/* Clear GCR3 table */
if (pdom_is_sva_capable(domain))
destroy_gcr3_table(dev_data, domain);
@@ -2069,12 +2075,6 @@ static void do_detach(struct iommu_dev_data *dev_data)
dev_data->domain = NULL;
list_del(&dev_data->list);
- /* Clear DTE and flush the entry */
- amd_iommu_dev_update_dte(dev_data, false);
-
- /* Flush IOTLB and wait for the flushes to finish */
- amd_iommu_domain_flush_all(domain);
-
/* decrease reference counters - needs to happen after the flushes */
domain->dev_iommu[iommu->index] -= 1;
domain->dev_cnt -= 1;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 2e9811bf2a4e..fd11a080380c 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2114,12 +2114,6 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
if (ret)
return ret;
- ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID);
- if (ret) {
- domain_detach_iommu(domain, iommu);
- return ret;
- }
-
info->domain = domain;
spin_lock_irqsave(&domain->lock, flags);
list_add(&info->link, &domain->devices);
@@ -2137,15 +2131,21 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
else
ret = intel_pasid_setup_second_level(iommu, domain, dev, IOMMU_NO_PASID);
- if (ret) {
- device_block_translation(dev);
- return ret;
- }
+ if (ret)
+ goto out_block_translation;
if (sm_supported(info->iommu) || !domain_type_is_si(info->domain))
iommu_enable_pci_caps(info);
+ ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID);
+ if (ret)
+ goto out_block_translation;
+
return 0;
+
+out_block_translation:
+ device_block_translation(dev);
+ return ret;
}
/**
diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c
index c7ddebf312ad..b1f2080be2be 100644
--- a/drivers/irqchip/irq-loongson-eiointc.c
+++ b/drivers/irqchip/irq-loongson-eiointc.c
@@ -15,6 +15,7 @@
#include <linux/irqchip/chained_irq.h>
#include <linux/kernel.h>
#include <linux/syscore_ops.h>
+#include <asm/numa.h>
#define EIOINTC_REG_NODEMAP 0x14a0
#define EIOINTC_REG_IPMAP 0x14c0
@@ -339,7 +340,7 @@ static int __init pch_msi_parse_madt(union acpi_subtable_headers *header,
int node;
if (cpu_has_flatmode)
- node = cpu_to_node(eiointc_priv[nr_pics - 1]->node * CORES_PER_EIO_NODE);
+ node = early_cpu_to_node(eiointc_priv[nr_pics - 1]->node * CORES_PER_EIO_NODE);
else
node = eiointc_priv[nr_pics - 1]->node;
@@ -431,7 +432,7 @@ int __init eiointc_acpi_init(struct irq_domain *parent,
goto out_free_handle;
if (cpu_has_flatmode)
- node = cpu_to_node(acpi_eiointc->node * CORES_PER_EIO_NODE);
+ node = early_cpu_to_node(acpi_eiointc->node * CORES_PER_EIO_NODE);
else
node = acpi_eiointc->node;
acpi_set_vec_parent(node, priv->eiointc_domain, pch_group);
diff --git a/drivers/irqchip/irq-loongson-liointc.c b/drivers/irqchip/irq-loongson-liointc.c
index e4b33aed1c97..7c4fe7ab4b83 100644
--- a/drivers/irqchip/irq-loongson-liointc.c
+++ b/drivers/irqchip/irq-loongson-liointc.c
@@ -28,7 +28,7 @@
#define LIOINTC_INTC_CHIP_START 0x20
-#define LIOINTC_REG_INTC_STATUS (LIOINTC_INTC_CHIP_START + 0x20)
+#define LIOINTC_REG_INTC_STATUS(core) (LIOINTC_INTC_CHIP_START + 0x20 + (core) * 8)
#define LIOINTC_REG_INTC_EN_STATUS (LIOINTC_INTC_CHIP_START + 0x04)
#define LIOINTC_REG_INTC_ENABLE (LIOINTC_INTC_CHIP_START + 0x08)
#define LIOINTC_REG_INTC_DISABLE (LIOINTC_INTC_CHIP_START + 0x0c)
@@ -217,7 +217,7 @@ static int liointc_init(phys_addr_t addr, unsigned long size, int revision,
goto out_free_priv;
for (i = 0; i < LIOINTC_NUM_CORES; i++)
- priv->core_isr[i] = base + LIOINTC_REG_INTC_STATUS;
+ priv->core_isr[i] = base + LIOINTC_REG_INTC_STATUS(i);
for (i = 0; i < LIOINTC_NUM_PARENT; i++)
priv->handler[i].parent_int_map = parent_int_map[i];
diff --git a/drivers/media/pci/intel/ipu6/ipu6-isys-video.c b/drivers/media/pci/intel/ipu6/ipu6-isys-video.c
index c8a33e1e910c..06090cc0a476 100644
--- a/drivers/media/pci/intel/ipu6/ipu6-isys-video.c
+++ b/drivers/media/pci/intel/ipu6/ipu6-isys-video.c
@@ -943,7 +943,7 @@ ipu6_isys_query_stream_by_source(struct ipu6_isys *isys, int source, u8 vc)
return NULL;
if (source < 0) {
- dev_err(&stream->isys->adev->auxdev.dev,
+ dev_err(&isys->adev->auxdev.dev,
"query stream with invalid port number\n");
return NULL;
}
diff --git a/drivers/media/pci/intel/ipu6/ipu6-isys.c b/drivers/media/pci/intel/ipu6/ipu6-isys.c
index 8b9b77719bb1..c4aff2e2009b 100644
--- a/drivers/media/pci/intel/ipu6/ipu6-isys.c
+++ b/drivers/media/pci/intel/ipu6/ipu6-isys.c
@@ -799,7 +799,7 @@ static int isys_register_devices(struct ipu6_isys *isys)
isys->v4l2_dev.mdev = &isys->media_dev;
isys->v4l2_dev.ctrl_handler = NULL;
- ret = v4l2_device_register(&pdev->dev, &isys->v4l2_dev);
+ ret = v4l2_device_register(dev, &isys->v4l2_dev);
if (ret < 0)
goto out_media_device_unregister;
diff --git a/drivers/media/pci/intel/ivsc/Kconfig b/drivers/media/pci/intel/ivsc/Kconfig
index 407a800c81bc..a7d9607ecdc6 100644
--- a/drivers/media/pci/intel/ivsc/Kconfig
+++ b/drivers/media/pci/intel/ivsc/Kconfig
@@ -4,6 +4,7 @@
config INTEL_VSC
tristate "Intel Visual Sensing Controller"
depends on INTEL_MEI && ACPI && VIDEO_DEV
+ depends on IPU_BRIDGE || !IPU_BRIDGE
select MEDIA_CONTROLLER
select VIDEO_V4L2_SUBDEV_API
select V4L2_FWNODE
diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c
index 9a5f75163aca..8ede4ce93271 100644
--- a/drivers/mmc/host/moxart-mmc.c
+++ b/drivers/mmc/host/moxart-mmc.c
@@ -131,10 +131,12 @@ struct moxart_host {
struct dma_async_tx_descriptor *tx_desc;
struct mmc_host *mmc;
struct mmc_request *mrq;
+ struct scatterlist *cur_sg;
struct completion dma_complete;
struct completion pio_complete;
- struct sg_mapping_iter sg_miter;
+ u32 num_sg;
+ u32 data_remain;
u32 data_len;
u32 fifo_width;
u32 timeout;
@@ -146,6 +148,35 @@ struct moxart_host {
bool is_removed;
};
+static inline void moxart_init_sg(struct moxart_host *host,
+ struct mmc_data *data)
+{
+ host->cur_sg = data->sg;
+ host->num_sg = data->sg_len;
+ host->data_remain = host->cur_sg->length;
+
+ if (host->data_remain > host->data_len)
+ host->data_remain = host->data_len;
+}
+
+static inline int moxart_next_sg(struct moxart_host *host)
+{
+ int remain;
+ struct mmc_data *data = host->mrq->cmd->data;
+
+ host->cur_sg++;
+ host->num_sg--;
+
+ if (host->num_sg > 0) {
+ host->data_remain = host->cur_sg->length;
+ remain = host->data_len - data->bytes_xfered;
+ if (remain > 0 && remain < host->data_remain)
+ host->data_remain = remain;
+ }
+
+ return host->num_sg;
+}
+
static int moxart_wait_for_status(struct moxart_host *host,
u32 mask, u32 *status)
{
@@ -278,29 +309,14 @@ static void moxart_transfer_dma(struct mmc_data *data, struct moxart_host *host)
static void moxart_transfer_pio(struct moxart_host *host)
{
- struct sg_mapping_iter *sgm = &host->sg_miter;
struct mmc_data *data = host->mrq->cmd->data;
u32 *sgp, len = 0, remain, status;
if (host->data_len == data->bytes_xfered)
return;
- /*
- * By updating sgm->consumes this will get a proper pointer into the
- * buffer at any time.
- */
- if (!sg_miter_next(sgm)) {
- /* This shold not happen */
- dev_err(mmc_dev(host->mmc), "ran out of scatterlist prematurely\n");
- data->error = -EINVAL;
- complete(&host->pio_complete);
- return;
- }
- sgp = sgm->addr;
- remain = sgm->length;
- if (remain > host->data_len)
- remain = host->data_len;
- sgm->consumed = 0;
+ sgp = sg_virt(host->cur_sg);
+ remain = host->data_remain;
if (data->flags & MMC_DATA_WRITE) {
while (remain > 0) {
@@ -315,7 +331,6 @@ static void moxart_transfer_pio(struct moxart_host *host)
sgp++;
len += 4;
}
- sgm->consumed += len;
remain -= len;
}
@@ -332,22 +347,22 @@ static void moxart_transfer_pio(struct moxart_host *host)
sgp++;
len += 4;
}
- sgm->consumed += len;
remain -= len;
}
}
- data->bytes_xfered += sgm->consumed;
- if (host->data_len == data->bytes_xfered) {
+ data->bytes_xfered += host->data_remain - remain;
+ host->data_remain = remain;
+
+ if (host->data_len != data->bytes_xfered)
+ moxart_next_sg(host);
+ else
complete(&host->pio_complete);
- return;
- }
}
static void moxart_prepare_data(struct moxart_host *host)
{
struct mmc_data *data = host->mrq->cmd->data;
- unsigned int flags = SG_MITER_ATOMIC; /* Used from IRQ */
u32 datactrl;
int blksz_bits;
@@ -358,19 +373,15 @@ static void moxart_prepare_data(struct moxart_host *host)
blksz_bits = ffs(data->blksz) - 1;
BUG_ON(1 << blksz_bits != data->blksz);
+ moxart_init_sg(host, data);
+
datactrl = DCR_DATA_EN | (blksz_bits & DCR_BLK_SIZE);
- if (data->flags & MMC_DATA_WRITE) {
- flags |= SG_MITER_FROM_SG;
+ if (data->flags & MMC_DATA_WRITE)
datactrl |= DCR_DATA_WRITE;
- } else {
- flags |= SG_MITER_TO_SG;
- }
if (moxart_use_dma(host))
datactrl |= DCR_DMA_EN;
- else
- sg_miter_start(&host->sg_miter, data->sg, data->sg_len, flags);
writel(DCR_DATA_FIFO_RESET, host->base + REG_DATA_CONTROL);
writel(MASK_DATA | FIFO_URUN | FIFO_ORUN, host->base + REG_CLEAR);
@@ -443,9 +454,6 @@ static void moxart_request(struct mmc_host *mmc, struct mmc_request *mrq)
}
request_done:
- if (!moxart_use_dma(host))
- sg_miter_stop(&host->sg_miter);
-
spin_unlock_irqrestore(&host->lock, flags);
mmc_request_done(host->mmc, mrq);
}
diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c
index 9053526fa212..150fb477b7cc 100644
--- a/drivers/mmc/host/sdhci-brcmstb.c
+++ b/drivers/mmc/host/sdhci-brcmstb.c
@@ -24,6 +24,7 @@
#define BRCMSTB_MATCH_FLAGS_NO_64BIT BIT(0)
#define BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT BIT(1)
#define BRCMSTB_MATCH_FLAGS_HAS_CLOCK_GATE BIT(2)
+#define BRCMSTB_MATCH_FLAGS_USE_CARD_BUSY BIT(4)
#define BRCMSTB_PRIV_FLAGS_HAS_CQE BIT(0)
#define BRCMSTB_PRIV_FLAGS_GATE_CLOCK BIT(1)
@@ -384,6 +385,9 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev)
if (match_priv->flags & BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT)
host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL;
+ if (!(match_priv->flags & BRCMSTB_MATCH_FLAGS_USE_CARD_BUSY))
+ host->mmc_host_ops.card_busy = NULL;
+
/* Change the base clock frequency if the DT property exists */
if (device_property_read_u32(&pdev->dev, "clock-frequency",
&priv->base_freq_hz) != 0)
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index ef89ec382bfe..23e6ba70144c 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -1326,7 +1326,7 @@ static int jmicron_pmos(struct sdhci_pci_chip *chip, int on)
ret = pci_read_config_byte(chip->pdev, 0xAE, &scratch);
if (ret)
- return ret;
+ goto fail;
/*
* Turn PMOS on [bit 0], set over current detection to 2.4 V
@@ -1337,7 +1337,10 @@ static int jmicron_pmos(struct sdhci_pci_chip *chip, int on)
else
scratch &= ~0x47;
- return pci_write_config_byte(chip->pdev, 0xAE, scratch);
+ ret = pci_write_config_byte(chip->pdev, 0xAE, scratch);
+
+fail:
+ return pcibios_err_to_errno(ret);
}
static int jmicron_probe(struct sdhci_pci_chip *chip)
@@ -2202,7 +2205,7 @@ static int sdhci_pci_probe(struct pci_dev *pdev,
ret = pci_read_config_byte(pdev, PCI_SLOT_INFO, &slots);
if (ret)
- return ret;
+ return pcibios_err_to_errno(ret);
slots = PCI_SLOT_INFO_SLOTS(slots) + 1;
dev_dbg(&pdev->dev, "found %d slot(s)\n", slots);
@@ -2211,7 +2214,7 @@ static int sdhci_pci_probe(struct pci_dev *pdev,
ret = pci_read_config_byte(pdev, PCI_SLOT_INFO, &first_bar);
if (ret)
- return ret;
+ return pcibios_err_to_errno(ret);
first_bar &= PCI_SLOT_INFO_FIRST_BAR_MASK;
diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c
index d4a02184784a..058bef1c7e41 100644
--- a/drivers/mmc/host/sdhci-pci-o2micro.c
+++ b/drivers/mmc/host/sdhci-pci-o2micro.c
@@ -823,7 +823,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_byte(chip->pdev,
O2_SD_LOCK_WP, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch &= 0x7f;
pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
@@ -834,7 +834,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_byte(chip->pdev,
O2_SD_CLKREQ, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch |= 0x20;
pci_write_config_byte(chip->pdev, O2_SD_CLKREQ, scratch);
@@ -843,7 +843,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
*/
ret = pci_read_config_byte(chip->pdev, O2_SD_CAPS, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch |= 0x01;
pci_write_config_byte(chip->pdev, O2_SD_CAPS, scratch);
pci_write_config_byte(chip->pdev, O2_SD_CAPS, 0x73);
@@ -856,7 +856,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_byte(chip->pdev,
O2_SD_INF_MOD, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch |= 0x08;
pci_write_config_byte(chip->pdev, O2_SD_INF_MOD, scratch);
@@ -864,7 +864,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_byte(chip->pdev,
O2_SD_LOCK_WP, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch |= 0x80;
pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
break;
@@ -875,7 +875,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_byte(chip->pdev,
O2_SD_LOCK_WP, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch &= 0x7f;
pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
@@ -886,7 +886,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
O2_SD_FUNC_REG0,
&scratch_32);
if (ret)
- return ret;
+ goto read_fail;
scratch_32 = ((scratch_32 & 0xFF000000) >> 24);
/* Check Whether subId is 0x11 or 0x12 */
@@ -898,7 +898,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
O2_SD_FUNC_REG4,
&scratch_32);
if (ret)
- return ret;
+ goto read_fail;
/* Enable Base Clk setting change */
scratch_32 |= O2_SD_FREG4_ENABLE_CLK_SET;
@@ -921,7 +921,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_dword(chip->pdev,
O2_SD_CLK_SETTING, &scratch_32);
if (ret)
- return ret;
+ goto read_fail;
scratch_32 &= ~(0xFF00);
scratch_32 |= 0x07E0C800;
@@ -931,14 +931,14 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_dword(chip->pdev,
O2_SD_CLKREQ, &scratch_32);
if (ret)
- return ret;
+ goto read_fail;
scratch_32 |= 0x3;
pci_write_config_dword(chip->pdev, O2_SD_CLKREQ, scratch_32);
ret = pci_read_config_dword(chip->pdev,
O2_SD_PLL_SETTING, &scratch_32);
if (ret)
- return ret;
+ goto read_fail;
scratch_32 &= ~(0x1F3F070E);
scratch_32 |= 0x18270106;
@@ -949,7 +949,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_dword(chip->pdev,
O2_SD_CAP_REG2, &scratch_32);
if (ret)
- return ret;
+ goto read_fail;
scratch_32 &= ~(0xE0);
pci_write_config_dword(chip->pdev,
O2_SD_CAP_REG2, scratch_32);
@@ -961,7 +961,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_byte(chip->pdev,
O2_SD_LOCK_WP, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch |= 0x80;
pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
break;
@@ -971,7 +971,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_byte(chip->pdev,
O2_SD_LOCK_WP, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch &= 0x7f;
pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
@@ -979,7 +979,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_dword(chip->pdev,
O2_SD_PLL_SETTING, &scratch_32);
if (ret)
- return ret;
+ goto read_fail;
if ((scratch_32 & 0xff000000) == 0x01000000) {
scratch_32 &= 0x0000FFFF;
@@ -998,7 +998,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
O2_SD_FUNC_REG4,
&scratch_32);
if (ret)
- return ret;
+ goto read_fail;
scratch_32 |= (1 << 22);
pci_write_config_dword(chip->pdev,
O2_SD_FUNC_REG4, scratch_32);
@@ -1017,7 +1017,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
ret = pci_read_config_byte(chip->pdev,
O2_SD_LOCK_WP, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch |= 0x80;
pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
break;
@@ -1028,7 +1028,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
/* UnLock WP */
ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch &= 0x7f;
pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
@@ -1057,13 +1057,16 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
/* Lock WP */
ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch);
if (ret)
- return ret;
+ goto read_fail;
scratch |= 0x80;
pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
break;
}
return 0;
+
+read_fail:
+ return pcibios_err_to_errno(ret);
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 746f4cf7ab03..112584aa0772 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2515,26 +2515,29 @@ EXPORT_SYMBOL_GPL(sdhci_get_cd_nogpio);
static int sdhci_check_ro(struct sdhci_host *host)
{
- unsigned long flags;
+ bool allow_invert = false;
int is_readonly;
- spin_lock_irqsave(&host->lock, flags);
-
- if (host->flags & SDHCI_DEVICE_DEAD)
+ if (host->flags & SDHCI_DEVICE_DEAD) {
is_readonly = 0;
- else if (host->ops->get_ro)
+ } else if (host->ops->get_ro) {
is_readonly = host->ops->get_ro(host);
- else if (mmc_can_gpio_ro(host->mmc))
+ } else if (mmc_can_gpio_ro(host->mmc)) {
is_readonly = mmc_gpio_get_ro(host->mmc);
- else
+ /* Do not invert twice */
+ allow_invert = !(host->mmc->caps2 & MMC_CAP2_RO_ACTIVE_HIGH);
+ } else {
is_readonly = !(sdhci_readl(host, SDHCI_PRESENT_STATE)
& SDHCI_WRITE_PROTECT);
+ allow_invert = true;
+ }
- spin_unlock_irqrestore(&host->lock, flags);
+ if (is_readonly >= 0 &&
+ allow_invert &&
+ (host->quirks & SDHCI_QUIRK_INVERTED_WRITE_PROTECT))
+ is_readonly = !is_readonly;
- /* This quirk needs to be replaced by a callback-function later */
- return host->quirks & SDHCI_QUIRK_INVERTED_WRITE_PROTECT ?
- !is_readonly : is_readonly;
+ return is_readonly;
}
#define SAMPLE_COUNT 5
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index d7dbbd469b89..53e16d39af4b 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -1093,28 +1093,32 @@ static int nand_fill_column_cycles(struct nand_chip *chip, u8 *addrs,
unsigned int offset_in_page)
{
struct mtd_info *mtd = nand_to_mtd(chip);
+ bool ident_stage = !mtd->writesize;
- /* Make sure the offset is less than the actual page size. */
- if (offset_in_page > mtd->writesize + mtd->oobsize)
- return -EINVAL;
+ /* Bypass all checks during NAND identification */
+ if (likely(!ident_stage)) {
+ /* Make sure the offset is less than the actual page size. */
+ if (offset_in_page > mtd->writesize + mtd->oobsize)
+ return -EINVAL;
- /*
- * On small page NANDs, there's a dedicated command to access the OOB
- * area, and the column address is relative to the start of the OOB
- * area, not the start of the page. Asjust the address accordingly.
- */
- if (mtd->writesize <= 512 && offset_in_page >= mtd->writesize)
- offset_in_page -= mtd->writesize;
+ /*
+ * On small page NANDs, there's a dedicated command to access the OOB
+ * area, and the column address is relative to the start of the OOB
+ * area, not the start of the page. Asjust the address accordingly.
+ */
+ if (mtd->writesize <= 512 && offset_in_page >= mtd->writesize)
+ offset_in_page -= mtd->writesize;
- /*
- * The offset in page is expressed in bytes, if the NAND bus is 16-bit
- * wide, then it must be divided by 2.
- */
- if (chip->options & NAND_BUSWIDTH_16) {
- if (WARN_ON(offset_in_page % 2))
- return -EINVAL;
+ /*
+ * The offset in page is expressed in bytes, if the NAND bus is 16-bit
+ * wide, then it must be divided by 2.
+ */
+ if (chip->options & NAND_BUSWIDTH_16) {
+ if (WARN_ON(offset_in_page % 2))
+ return -EINVAL;
- offset_in_page /= 2;
+ offset_in_page /= 2;
+ }
}
addrs[0] = offset_in_page;
@@ -1123,7 +1127,7 @@ static int nand_fill_column_cycles(struct nand_chip *chip, u8 *addrs,
* Small page NANDs use 1 cycle for the columns, while large page NANDs
* need 2
*/
- if (mtd->writesize <= 512)
+ if (!ident_stage && mtd->writesize <= 512)
return 1;
addrs[1] = offset_in_page >> 8;
@@ -1436,16 +1440,19 @@ int nand_change_read_column_op(struct nand_chip *chip,
unsigned int len, bool force_8bit)
{
struct mtd_info *mtd = nand_to_mtd(chip);
+ bool ident_stage = !mtd->writesize;
if (len && !buf)
return -EINVAL;
- if (offset_in_page + len > mtd->writesize + mtd->oobsize)
- return -EINVAL;
+ if (!ident_stage) {
+ if (offset_in_page + len > mtd->writesize + mtd->oobsize)
+ return -EINVAL;
- /* Small page NANDs do not support column change. */
- if (mtd->writesize <= 512)
- return -ENOTSUPP;
+ /* Small page NANDs do not support column change. */
+ if (mtd->writesize <= 512)
+ return -ENOTSUPP;
+ }
if (nand_has_exec_op(chip)) {
const struct nand_interface_config *conf =
@@ -2173,7 +2180,7 @@ EXPORT_SYMBOL_GPL(nand_reset_op);
int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len,
bool force_8bit, bool check_only)
{
- if (!len || !buf)
+ if (!len || (!check_only && !buf))
return -EINVAL;
if (nand_has_exec_op(chip)) {
@@ -6301,6 +6308,7 @@ static const struct nand_ops rawnand_ops = {
static int nand_scan_tail(struct nand_chip *chip)
{
struct mtd_info *mtd = nand_to_mtd(chip);
+ struct nand_device *base = &chip->base;
struct nand_ecc_ctrl *ecc = &chip->ecc;
int ret, i;
@@ -6445,9 +6453,13 @@ static int nand_scan_tail(struct nand_chip *chip)
if (!ecc->write_oob_raw)
ecc->write_oob_raw = ecc->write_oob;
- /* propagate ecc info to mtd_info */
+ /* Propagate ECC info to the generic NAND and MTD layers */
mtd->ecc_strength = ecc->strength;
+ if (!base->ecc.ctx.conf.strength)
+ base->ecc.ctx.conf.strength = ecc->strength;
mtd->ecc_step_size = ecc->size;
+ if (!base->ecc.ctx.conf.step_size)
+ base->ecc.ctx.conf.step_size = ecc->size;
/*
* Set the number of read / write steps for one page depending on ECC
@@ -6455,6 +6467,8 @@ static int nand_scan_tail(struct nand_chip *chip)
*/
if (!ecc->steps)
ecc->steps = mtd->writesize / ecc->size;
+ if (!base->ecc.ctx.nsteps)
+ base->ecc.ctx.nsteps = ecc->steps;
if (ecc->steps * ecc->size != mtd->writesize) {
WARN(1, "Invalid ECC parameters\n");
ret = -EINVAL;
diff --git a/drivers/mtd/nand/raw/rockchip-nand-controller.c b/drivers/mtd/nand/raw/rockchip-nand-controller.c
index 7baaef69d70a..55580447633b 100644
--- a/drivers/mtd/nand/raw/rockchip-nand-controller.c
+++ b/drivers/mtd/nand/raw/rockchip-nand-controller.c
@@ -420,13 +420,13 @@ static int rk_nfc_setup_interface(struct nand_chip *chip, int target,
u32 rate, tc2rw, trwpw, trw2c;
u32 temp;
- if (target < 0)
- return 0;
-
timings = nand_get_sdr_timings(conf);
if (IS_ERR(timings))
return -EOPNOTSUPP;
+ if (target < 0)
+ return 0;
+
if (IS_ERR(nfc->nfc_clk))
rate = clk_get_rate(nfc->ahb_clk);
else
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 0cacd7027e35..bc80fb6397dc 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -1214,9 +1214,9 @@ static int bond_option_arp_ip_targets_set(struct bonding *bond,
__be32 target;
if (newval->string) {
- if (!in4_pton(newval->string+1, -1, (u8 *)&target, -1, NULL)) {
- netdev_err(bond->dev, "invalid ARP target %pI4 specified\n",
- &target);
+ if (strlen(newval->string) < 1 ||
+ !in4_pton(newval->string + 1, -1, (u8 *)&target, -1, NULL)) {
+ netdev_err(bond->dev, "invalid ARP target specified\n");
return ret;
}
if (newval->string[0] == '+')
diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h
index 3a9edc292593..92b2bd8628e6 100644
--- a/drivers/net/can/m_can/m_can.h
+++ b/drivers/net/can/m_can/m_can.h
@@ -91,7 +91,7 @@ struct m_can_classdev {
ktime_t irq_timer_wait;
- struct m_can_ops *ops;
+ const struct m_can_ops *ops;
int version;
u32 irqstatus;
diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c
index 45400de4163d..d72fe771dfc7 100644
--- a/drivers/net/can/m_can/m_can_pci.c
+++ b/drivers/net/can/m_can/m_can_pci.c
@@ -77,7 +77,7 @@ static int iomap_write_fifo(struct m_can_classdev *cdev, int offset,
return 0;
}
-static struct m_can_ops m_can_pci_ops = {
+static const struct m_can_ops m_can_pci_ops = {
.read_reg = iomap_read_reg,
.write_reg = iomap_write_reg,
.write_fifo = iomap_write_fifo,
diff --git a/drivers/net/can/m_can/m_can_platform.c b/drivers/net/can/m_can/m_can_platform.c
index df0367124b4c..983ab80260dd 100644
--- a/drivers/net/can/m_can/m_can_platform.c
+++ b/drivers/net/can/m_can/m_can_platform.c
@@ -68,7 +68,7 @@ static int iomap_write_fifo(struct m_can_classdev *cdev, int offset,
return 0;
}
-static struct m_can_ops m_can_plat_ops = {
+static const struct m_can_ops m_can_plat_ops = {
.read_reg = iomap_read_reg,
.write_reg = iomap_write_reg,
.write_fifo = iomap_write_fifo,
diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c
index d723206ac7c9..2f73bf3abad8 100644
--- a/drivers/net/can/m_can/tcan4x5x-core.c
+++ b/drivers/net/can/m_can/tcan4x5x-core.c
@@ -357,7 +357,7 @@ static int tcan4x5x_get_gpios(struct m_can_classdev *cdev,
return 0;
}
-static struct m_can_ops tcan4x5x_ops = {
+static const struct m_can_ops tcan4x5x_ops = {
.init = tcan4x5x_init,
.read_reg = tcan4x5x_read_reg,
.write_reg = tcan4x5x_write_reg,
diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c
index b82842718735..c919668bbe7a 100644
--- a/drivers/net/can/rcar/rcar_canfd.c
+++ b/drivers/net/can/rcar/rcar_canfd.c
@@ -508,12 +508,6 @@
*/
#define RCANFD_CFFIFO_IDX 0
-/* fCAN clock select register settings */
-enum rcar_canfd_fcanclk {
- RCANFD_CANFDCLK = 0, /* CANFD clock */
- RCANFD_EXTCLK, /* Externally input clock */
-};
-
struct rcar_canfd_global;
struct rcar_canfd_hw_info {
@@ -545,8 +539,8 @@ struct rcar_canfd_global {
struct platform_device *pdev; /* Respective platform device */
struct clk *clkp; /* Peripheral clock */
struct clk *can_clk; /* fCAN clock */
- enum rcar_canfd_fcanclk fcan; /* CANFD or Ext clock */
unsigned long channels_mask; /* Enabled channels mask */
+ bool extclk; /* CANFD or Ext clock */
bool fdmode; /* CAN FD or Classical CAN only mode */
struct reset_control *rstc1;
struct reset_control *rstc2;
@@ -633,28 +627,28 @@ static inline void rcar_canfd_update(u32 mask, u32 val, u32 __iomem *reg)
static inline u32 rcar_canfd_read(void __iomem *base, u32 offset)
{
- return readl(base + (offset));
+ return readl(base + offset);
}
static inline void rcar_canfd_write(void __iomem *base, u32 offset, u32 val)
{
- writel(val, base + (offset));
+ writel(val, base + offset);
}
static void rcar_canfd_set_bit(void __iomem *base, u32 reg, u32 val)
{
- rcar_canfd_update(val, val, base + (reg));
+ rcar_canfd_update(val, val, base + reg);
}
static void rcar_canfd_clear_bit(void __iomem *base, u32 reg, u32 val)
{
- rcar_canfd_update(val, 0, base + (reg));
+ rcar_canfd_update(val, 0, base + reg);
}
static void rcar_canfd_update_bit(void __iomem *base, u32 reg,
u32 mask, u32 val)
{
- rcar_canfd_update(mask, val, base + (reg));
+ rcar_canfd_update(mask, val, base + reg);
}
static void rcar_canfd_get_data(struct rcar_canfd_channel *priv,
@@ -665,7 +659,7 @@ static void rcar_canfd_get_data(struct rcar_canfd_channel *priv,
lwords = DIV_ROUND_UP(cf->len, sizeof(u32));
for (i = 0; i < lwords; i++)
*((u32 *)cf->data + i) =
- rcar_canfd_read(priv->base, off + (i * sizeof(u32)));
+ rcar_canfd_read(priv->base, off + i * sizeof(u32));
}
static void rcar_canfd_put_data(struct rcar_canfd_channel *priv,
@@ -675,7 +669,7 @@ static void rcar_canfd_put_data(struct rcar_canfd_channel *priv,
lwords = DIV_ROUND_UP(cf->len, sizeof(u32));
for (i = 0; i < lwords; i++)
- rcar_canfd_write(priv->base, off + (i * sizeof(u32)),
+ rcar_canfd_write(priv->base, off + i * sizeof(u32),
*((u32 *)cf->data + i));
}
@@ -777,7 +771,7 @@ static void rcar_canfd_configure_controller(struct rcar_canfd_global *gpriv)
cfg |= RCANFD_GCFG_CMPOC;
/* Set External Clock if selected */
- if (gpriv->fcan != RCANFD_CANFDCLK)
+ if (gpriv->extclk)
cfg |= RCANFD_GCFG_DCS;
rcar_canfd_set_bit(gpriv->base, RCANFD_GCFG, cfg);
@@ -1941,16 +1935,12 @@ static int rcar_canfd_probe(struct platform_device *pdev)
return dev_err_probe(dev, PTR_ERR(gpriv->can_clk),
"cannot get canfd clock\n");
- gpriv->fcan = RCANFD_CANFDCLK;
-
+ /* CANFD clock may be further divided within the IP */
+ fcan_freq = clk_get_rate(gpriv->can_clk) / info->postdiv;
} else {
- gpriv->fcan = RCANFD_EXTCLK;
+ fcan_freq = clk_get_rate(gpriv->can_clk);
+ gpriv->extclk = true;
}
- fcan_freq = clk_get_rate(gpriv->can_clk);
-
- if (gpriv->fcan == RCANFD_CANFDCLK)
- /* CANFD clock is further divided by (1/2) within the IP */
- fcan_freq /= info->postdiv;
addr = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(addr)) {
@@ -2059,8 +2049,9 @@ static int rcar_canfd_probe(struct platform_device *pdev)
}
platform_set_drvdata(pdev, gpriv);
- dev_info(dev, "global operational state (clk %d, fdmode %d)\n",
- gpriv->fcan, gpriv->fdmode);
+ dev_info(dev, "global operational state (%s clk, %s mode)\n",
+ gpriv->extclk ? "ext" : "canfd",
+ gpriv->fdmode ? "fd" : "classical");
return 0;
fail_channel:
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index c116d3255207..3e7526274e34 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -2,7 +2,7 @@
//
// mcp251xfd - Microchip MCP251xFD Family CAN controller driver
//
-// Copyright (c) 2019, 2020, 2021 Pengutronix,
+// Copyright (c) 2019, 2020, 2021, 2023 Pengutronix,
// Marc Kleine-Budde <[email protected]>
//
// Based on:
@@ -744,6 +744,7 @@ static void mcp251xfd_chip_stop(struct mcp251xfd_priv *priv,
mcp251xfd_chip_interrupts_disable(priv);
mcp251xfd_chip_rx_int_disable(priv);
+ mcp251xfd_timestamp_stop(priv);
mcp251xfd_chip_sleep(priv);
}
@@ -763,6 +764,8 @@ static int mcp251xfd_chip_start(struct mcp251xfd_priv *priv)
if (err)
goto out_chip_stop;
+ mcp251xfd_timestamp_start(priv);
+
err = mcp251xfd_set_bittiming(priv);
if (err)
goto out_chip_stop;
@@ -791,7 +794,7 @@ static int mcp251xfd_chip_start(struct mcp251xfd_priv *priv)
return 0;
- out_chip_stop:
+out_chip_stop:
mcp251xfd_dump(priv);
mcp251xfd_chip_stop(priv, CAN_STATE_STOPPED);
@@ -867,18 +870,18 @@ static int mcp251xfd_get_berr_counter(const struct net_device *ndev,
static struct sk_buff *
mcp251xfd_alloc_can_err_skb(struct mcp251xfd_priv *priv,
- struct can_frame **cf, u32 *timestamp)
+ struct can_frame **cf, u32 *ts_raw)
{
struct sk_buff *skb;
int err;
- err = mcp251xfd_get_timestamp(priv, timestamp);
+ err = mcp251xfd_get_timestamp_raw(priv, ts_raw);
if (err)
return NULL;
skb = alloc_can_err_skb(priv->ndev, cf);
if (skb)
- mcp251xfd_skb_set_timestamp(priv, skb, *timestamp);
+ mcp251xfd_skb_set_timestamp_raw(priv, skb, *ts_raw);
return skb;
}
@@ -889,7 +892,7 @@ static int mcp251xfd_handle_rxovif(struct mcp251xfd_priv *priv)
struct mcp251xfd_rx_ring *ring;
struct sk_buff *skb;
struct can_frame *cf;
- u32 timestamp, rxovif;
+ u32 ts_raw, rxovif;
int err, i;
stats->rx_over_errors++;
@@ -924,14 +927,14 @@ static int mcp251xfd_handle_rxovif(struct mcp251xfd_priv *priv)
return err;
}
- skb = mcp251xfd_alloc_can_err_skb(priv, &cf, &timestamp);
+ skb = mcp251xfd_alloc_can_err_skb(priv, &cf, &ts_raw);
if (!skb)
return 0;
cf->can_id |= CAN_ERR_CRTL;
cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
- err = can_rx_offload_queue_timestamp(&priv->offload, skb, timestamp);
+ err = can_rx_offload_queue_timestamp(&priv->offload, skb, ts_raw);
if (err)
stats->rx_fifo_errors++;
@@ -948,12 +951,12 @@ static int mcp251xfd_handle_txatif(struct mcp251xfd_priv *priv)
static int mcp251xfd_handle_ivmif(struct mcp251xfd_priv *priv)
{
struct net_device_stats *stats = &priv->ndev->stats;
- u32 bdiag1, timestamp;
+ u32 bdiag1, ts_raw;
struct sk_buff *skb;
struct can_frame *cf = NULL;
int err;
- err = mcp251xfd_get_timestamp(priv, &timestamp);
+ err = mcp251xfd_get_timestamp_raw(priv, &ts_raw);
if (err)
return err;
@@ -1035,8 +1038,8 @@ static int mcp251xfd_handle_ivmif(struct mcp251xfd_priv *priv)
if (!cf)
return 0;
- mcp251xfd_skb_set_timestamp(priv, skb, timestamp);
- err = can_rx_offload_queue_timestamp(&priv->offload, skb, timestamp);
+ mcp251xfd_skb_set_timestamp_raw(priv, skb, ts_raw);
+ err = can_rx_offload_queue_timestamp(&priv->offload, skb, ts_raw);
if (err)
stats->rx_fifo_errors++;
@@ -1049,7 +1052,7 @@ static int mcp251xfd_handle_cerrif(struct mcp251xfd_priv *priv)
struct sk_buff *skb;
struct can_frame *cf = NULL;
enum can_state new_state, rx_state, tx_state;
- u32 trec, timestamp;
+ u32 trec, ts_raw;
int err;
err = regmap_read(priv->map_reg, MCP251XFD_REG_TREC, &trec);
@@ -1079,7 +1082,7 @@ static int mcp251xfd_handle_cerrif(struct mcp251xfd_priv *priv)
/* The skb allocation might fail, but can_change_state()
* handles cf == NULL.
*/
- skb = mcp251xfd_alloc_can_err_skb(priv, &cf, &timestamp);
+ skb = mcp251xfd_alloc_can_err_skb(priv, &cf, &ts_raw);
can_change_state(priv->ndev, cf, tx_state, rx_state);
if (new_state == CAN_STATE_BUS_OFF) {
@@ -1110,7 +1113,7 @@ static int mcp251xfd_handle_cerrif(struct mcp251xfd_priv *priv)
cf->data[7] = bec.rxerr;
}
- err = can_rx_offload_queue_timestamp(&priv->offload, skb, timestamp);
+ err = can_rx_offload_queue_timestamp(&priv->offload, skb, ts_raw);
if (err)
stats->rx_fifo_errors++;
@@ -1135,7 +1138,7 @@ mcp251xfd_handle_modif(const struct mcp251xfd_priv *priv, bool *set_normal_mode)
return 0;
}
- /* According to MCP2517FD errata DS80000792B 1., during a TX
+ /* According to MCP2517FD errata DS80000792C 1., during a TX
* MAB underflow, the controller will transition to Restricted
* Operation Mode or Listen Only Mode (depending on SERR2LOM).
*
@@ -1180,7 +1183,7 @@ static int mcp251xfd_handle_serrif(struct mcp251xfd_priv *priv)
/* TX MAB underflow
*
- * According to MCP2517FD Errata DS80000792B 1. a TX MAB
+ * According to MCP2517FD Errata DS80000792C 1. a TX MAB
* underflow is indicated by SERRIF and MODIF.
*
* In addition to the effects mentioned in the Errata, there
@@ -1224,7 +1227,7 @@ static int mcp251xfd_handle_serrif(struct mcp251xfd_priv *priv)
/* RX MAB overflow
*
- * According to MCP2517FD Errata DS80000792B 1. a RX MAB
+ * According to MCP2517FD Errata DS80000792C 1. a RX MAB
* overflow is indicated by SERRIF.
*
* In addition to the effects mentioned in the Errata, (most
@@ -1331,7 +1334,8 @@ mcp251xfd_handle_eccif(struct mcp251xfd_priv *priv, bool set_normal_mode)
return err;
/* Errata Reference:
- * mcp2517fd: DS80000789B, mcp2518fd: DS80000792C 2.
+ * mcp2517fd: DS80000789C 3., mcp2518fd: DS80000792E 2.,
+ * mcp251863: DS80000984A 2.
*
* ECC single error correction does not work in all cases:
*
@@ -1576,7 +1580,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
handled = IRQ_HANDLED;
} while (1);
- out_fail:
+out_fail:
can_rx_offload_threaded_irq_finish(&priv->offload);
netdev_err(priv->ndev, "IRQ handler returned %d (intf=0x%08x).\n",
@@ -1610,11 +1614,12 @@ static int mcp251xfd_open(struct net_device *ndev)
if (err)
goto out_mcp251xfd_ring_free;
+ mcp251xfd_timestamp_init(priv);
+
err = mcp251xfd_chip_start(priv);
if (err)
goto out_transceiver_disable;
- mcp251xfd_timestamp_init(priv);
clear_bit(MCP251XFD_FLAGS_DOWN, priv->flags);
can_rx_offload_enable(&priv->offload);
@@ -1641,22 +1646,21 @@ static int mcp251xfd_open(struct net_device *ndev)
return 0;
- out_free_irq:
+out_free_irq:
free_irq(spi->irq, priv);
- out_destroy_workqueue:
+out_destroy_workqueue:
destroy_workqueue(priv->wq);
- out_can_rx_offload_disable:
+out_can_rx_offload_disable:
can_rx_offload_disable(&priv->offload);
set_bit(MCP251XFD_FLAGS_DOWN, priv->flags);
- mcp251xfd_timestamp_stop(priv);
- out_transceiver_disable:
+out_transceiver_disable:
mcp251xfd_transceiver_disable(priv);
- out_mcp251xfd_ring_free:
+out_mcp251xfd_ring_free:
mcp251xfd_ring_free(priv);
- out_pm_runtime_put:
+out_pm_runtime_put:
mcp251xfd_chip_stop(priv, CAN_STATE_STOPPED);
pm_runtime_put(ndev->dev.parent);
- out_close_candev:
+out_close_candev:
close_candev(ndev);
return err;
@@ -1674,7 +1678,6 @@ static int mcp251xfd_stop(struct net_device *ndev)
free_irq(ndev->irq, priv);
destroy_workqueue(priv->wq);
can_rx_offload_disable(&priv->offload);
- mcp251xfd_timestamp_stop(priv);
mcp251xfd_chip_stop(priv, CAN_STATE_STOPPED);
mcp251xfd_transceiver_disable(priv);
mcp251xfd_ring_free(priv);
@@ -1820,9 +1823,9 @@ mcp251xfd_register_get_dev_id(const struct mcp251xfd_priv *priv, u32 *dev_id,
*effective_speed_hz_slow = xfer[0].effective_speed_hz;
*effective_speed_hz_fast = xfer[1].effective_speed_hz;
- out_kfree_buf_tx:
+out_kfree_buf_tx:
kfree(buf_tx);
- out_kfree_buf_rx:
+out_kfree_buf_rx:
kfree(buf_rx);
return err;
@@ -1936,13 +1939,13 @@ static int mcp251xfd_register(struct mcp251xfd_priv *priv)
return 0;
- out_unregister_candev:
+out_unregister_candev:
unregister_candev(ndev);
- out_chip_sleep:
+out_chip_sleep:
mcp251xfd_chip_sleep(priv);
- out_runtime_disable:
+out_runtime_disable:
pm_runtime_disable(ndev->dev.parent);
- out_runtime_put_noidle:
+out_runtime_put_noidle:
pm_runtime_put_noidle(ndev->dev.parent);
mcp251xfd_clks_and_vdd_disable(priv);
@@ -2095,7 +2098,8 @@ static int mcp251xfd_probe(struct spi_device *spi)
priv->devtype_data = *(struct mcp251xfd_devtype_data *)spi_get_device_match_data(spi);
/* Errata Reference:
- * mcp2517fd: DS80000792C 5., mcp2518fd: DS80000789C 4.
+ * mcp2517fd: DS80000792C 5., mcp2518fd: DS80000789E 4.,
+ * mcp251863: DS80000984A 4.
*
* The SPI can write corrupted data to the RAM at fast SPI
* speeds:
@@ -2155,9 +2159,9 @@ static int mcp251xfd_probe(struct spi_device *spi)
return 0;
- out_can_rx_offload_del:
+out_can_rx_offload_del:
can_rx_offload_del(&priv->offload);
- out_free_candev:
+out_free_candev:
spi->max_speed_hz = priv->spi_max_speed_hz_orig;
free_candev(ndev);
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.c
index 004eaf96262b..050321345304 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.c
@@ -94,7 +94,7 @@ static void mcp251xfd_dump_registers(const struct mcp251xfd_priv *priv,
kfree(buf);
}
- out:
+out:
mcp251xfd_dump_header(iter, MCP251XFD_DUMP_OBJECT_TYPE_REG, reg);
}
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c
index 92b7bc7f14b9..65150e762007 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c
@@ -397,7 +397,7 @@ mcp251xfd_regmap_crc_read(void *context,
return err;
}
- out:
+out:
memcpy(val_buf, buf_rx->data, val_len);
return 0;
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c
index bfe4caa0c99d..7bd2bcb5cf87 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c
@@ -206,6 +206,7 @@ mcp251xfd_ring_init_rx(struct mcp251xfd_priv *priv, u16 *base, u8 *fifo_nr)
int i, j;
mcp251xfd_for_each_rx_ring(priv, rx_ring, i) {
+ rx_ring->last_valid = timecounter_read(&priv->tc);
rx_ring->head = 0;
rx_ring->tail = 0;
rx_ring->base = *base;
@@ -485,6 +486,8 @@ int mcp251xfd_ring_alloc(struct mcp251xfd_priv *priv)
clear_bit(MCP251XFD_FLAGS_FD_MODE, priv->flags);
}
+ tx_ring->obj_num_shift_to_u8 = BITS_PER_TYPE(tx_ring->obj_num) -
+ ilog2(tx_ring->obj_num);
tx_ring->obj_size = tx_obj_size;
rem = priv->rx_obj_num;
@@ -507,6 +510,8 @@ int mcp251xfd_ring_alloc(struct mcp251xfd_priv *priv)
}
rx_ring->obj_num = rx_obj_num;
+ rx_ring->obj_num_shift_to_u8 = BITS_PER_TYPE(rx_ring->obj_num_shift_to_u8) -
+ ilog2(rx_obj_num);
rx_ring->obj_size = rx_obj_size;
priv->rx[i] = rx_ring;
}
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c
index ced8d9c81f8c..fe897f3e4c12 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c
@@ -2,7 +2,7 @@
//
// mcp251xfd - Microchip MCP251xFD Family CAN controller driver
//
-// Copyright (c) 2019, 2020, 2021 Pengutronix,
+// Copyright (c) 2019, 2020, 2021, 2023 Pengutronix,
// Marc Kleine-Budde <[email protected]>
//
// Based on:
@@ -16,23 +16,14 @@
#include "mcp251xfd.h"
-static inline int
-mcp251xfd_rx_head_get_from_chip(const struct mcp251xfd_priv *priv,
- const struct mcp251xfd_rx_ring *ring,
- u8 *rx_head, bool *fifo_empty)
+static inline bool mcp251xfd_rx_fifo_sta_empty(const u32 fifo_sta)
{
- u32 fifo_sta;
- int err;
-
- err = regmap_read(priv->map_reg, MCP251XFD_REG_FIFOSTA(ring->fifo_nr),
- &fifo_sta);
- if (err)
- return err;
-
- *rx_head = FIELD_GET(MCP251XFD_REG_FIFOSTA_FIFOCI_MASK, fifo_sta);
- *fifo_empty = !(fifo_sta & MCP251XFD_REG_FIFOSTA_TFNRFNIF);
+ return !(fifo_sta & MCP251XFD_REG_FIFOSTA_TFNRFNIF);
+}
- return 0;
+static inline bool mcp251xfd_rx_fifo_sta_full(const u32 fifo_sta)
+{
+ return fifo_sta & MCP251XFD_REG_FIFOSTA_TFERFFIF;
}
static inline int
@@ -80,29 +71,49 @@ mcp251xfd_check_rx_tail(const struct mcp251xfd_priv *priv,
}
static int
-mcp251xfd_rx_ring_update(const struct mcp251xfd_priv *priv,
- struct mcp251xfd_rx_ring *ring)
+mcp251xfd_get_rx_len(const struct mcp251xfd_priv *priv,
+ const struct mcp251xfd_rx_ring *ring,
+ u8 *len_p)
{
- u32 new_head;
- u8 chip_rx_head;
- bool fifo_empty;
+ const u8 shift = ring->obj_num_shift_to_u8;
+ u8 chip_head, tail, len;
+ u32 fifo_sta;
int err;
- err = mcp251xfd_rx_head_get_from_chip(priv, ring, &chip_rx_head,
- &fifo_empty);
- if (err || fifo_empty)
+ err = regmap_read(priv->map_reg, MCP251XFD_REG_FIFOSTA(ring->fifo_nr),
+ &fifo_sta);
+ if (err)
+ return err;
+
+ if (mcp251xfd_rx_fifo_sta_empty(fifo_sta)) {
+ *len_p = 0;
+ return 0;
+ }
+
+ if (mcp251xfd_rx_fifo_sta_full(fifo_sta)) {
+ *len_p = ring->obj_num;
+ return 0;
+ }
+
+ chip_head = FIELD_GET(MCP251XFD_REG_FIFOSTA_FIFOCI_MASK, fifo_sta);
+
+ err = mcp251xfd_check_rx_tail(priv, ring);
+ if (err)
return err;
+ tail = mcp251xfd_get_rx_tail(ring);
- /* chip_rx_head, is the next RX-Object filled by the HW.
- * The new RX head must be >= the old head.
+ /* First shift to full u8. The subtraction works on signed
+ * values, that keeps the difference steady around the u8
+ * overflow. The right shift acts on len, which is an u8.
*/
- new_head = round_down(ring->head, ring->obj_num) + chip_rx_head;
- if (new_head <= ring->head)
- new_head += ring->obj_num;
+ BUILD_BUG_ON(sizeof(ring->obj_num) != sizeof(chip_head));
+ BUILD_BUG_ON(sizeof(ring->obj_num) != sizeof(tail));
+ BUILD_BUG_ON(sizeof(ring->obj_num) != sizeof(len));
- ring->head = new_head;
+ len = (chip_head << shift) - (tail << shift);
+ *len_p = len >> shift;
- return mcp251xfd_check_rx_tail(priv, ring);
+ return 0;
}
static void
@@ -148,8 +159,6 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv,
if (!(hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR))
memcpy(cfd->data, hw_rx_obj->data, cfd->len);
-
- mcp251xfd_skb_set_timestamp(priv, skb, hw_rx_obj->ts);
}
static int
@@ -160,8 +169,26 @@ mcp251xfd_handle_rxif_one(struct mcp251xfd_priv *priv,
struct net_device_stats *stats = &priv->ndev->stats;
struct sk_buff *skb;
struct canfd_frame *cfd;
+ u64 timestamp;
int err;
+ /* According to mcp2518fd erratum DS80000789E 6. the FIFOCI
+ * bits of a FIFOSTA register, here the RX FIFO head index
+ * might be corrupted and we might process past the RX FIFO's
+ * head into old CAN frames.
+ *
+ * Compare the timestamp of currently processed CAN frame with
+ * last valid frame received. Abort with -EBADMSG if an old
+ * CAN frame is detected.
+ */
+ timestamp = timecounter_cyc2time(&priv->tc, hw_rx_obj->ts);
+ if (timestamp <= ring->last_valid) {
+ stats->rx_fifo_errors++;
+
+ return -EBADMSG;
+ }
+ ring->last_valid = timestamp;
+
if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_FDF)
skb = alloc_canfd_skb(priv->ndev, &cfd);
else
@@ -172,6 +199,7 @@ mcp251xfd_handle_rxif_one(struct mcp251xfd_priv *priv,
return 0;
}
+ mcp251xfd_skb_set_timestamp(skb, timestamp);
mcp251xfd_hw_rx_obj_to_skb(priv, hw_rx_obj, skb);
err = can_rx_offload_queue_timestamp(&priv->offload, skb, hw_rx_obj->ts);
if (err)
@@ -198,51 +226,80 @@ mcp251xfd_rx_obj_read(const struct mcp251xfd_priv *priv,
}
static int
+mcp251xfd_handle_rxif_ring_uinc(const struct mcp251xfd_priv *priv,
+ struct mcp251xfd_rx_ring *ring,
+ u8 len)
+{
+ int offset;
+ int err;
+
+ if (!len)
+ return 0;
+
+ ring->head += len;
+
+ /* Increment the RX FIFO tail pointer 'len' times in a
+ * single SPI message.
+ *
+ * Note:
+ * Calculate offset, so that the SPI transfer ends on
+ * the last message of the uinc_xfer array, which has
+ * "cs_change == 0", to properly deactivate the chip
+ * select.
+ */
+ offset = ARRAY_SIZE(ring->uinc_xfer) - len;
+ err = spi_sync_transfer(priv->spi,
+ ring->uinc_xfer + offset, len);
+ if (err)
+ return err;
+
+ ring->tail += len;
+
+ return 0;
+}
+
+static int
mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv,
struct mcp251xfd_rx_ring *ring)
{
struct mcp251xfd_hw_rx_obj_canfd *hw_rx_obj = ring->obj;
- u8 rx_tail, len;
+ u8 rx_tail, len, l;
int err, i;
- err = mcp251xfd_rx_ring_update(priv, ring);
+ err = mcp251xfd_get_rx_len(priv, ring, &len);
if (err)
return err;
- while ((len = mcp251xfd_get_rx_linear_len(ring))) {
- int offset;
-
+ while ((l = mcp251xfd_get_rx_linear_len(ring, len))) {
rx_tail = mcp251xfd_get_rx_tail(ring);
err = mcp251xfd_rx_obj_read(priv, ring, hw_rx_obj,
- rx_tail, len);
+ rx_tail, l);
if (err)
return err;
- for (i = 0; i < len; i++) {
+ for (i = 0; i < l; i++) {
err = mcp251xfd_handle_rxif_one(priv, ring,
(void *)hw_rx_obj +
i * ring->obj_size);
- if (err)
+
+ /* -EBADMSG means we're affected by mcp2518fd
+ * erratum DS80000789E 6., i.e. the timestamp
+ * in the RX object is older that the last
+ * valid received CAN frame. Don't process any
+ * further and mark processed frames as good.
+ */
+ if (err == -EBADMSG)
+ return mcp251xfd_handle_rxif_ring_uinc(priv, ring, i);
+ else if (err)
return err;
}
- /* Increment the RX FIFO tail pointer 'len' times in a
- * single SPI message.
- *
- * Note:
- * Calculate offset, so that the SPI transfer ends on
- * the last message of the uinc_xfer array, which has
- * "cs_change == 0", to properly deactivate the chip
- * select.
- */
- offset = ARRAY_SIZE(ring->uinc_xfer) - len;
- err = spi_sync_transfer(priv->spi,
- ring->uinc_xfer + offset, len);
+ err = mcp251xfd_handle_rxif_ring_uinc(priv, ring, l);
if (err)
return err;
- ring->tail += len;
+ len -= l;
}
return 0;
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c
index e5bd57b65aaf..f732556d233a 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c
@@ -2,7 +2,7 @@
//
// mcp251xfd - Microchip MCP251xFD Family CAN controller driver
//
-// Copyright (c) 2019, 2020, 2021 Pengutronix,
+// Copyright (c) 2019, 2020, 2021, 2023 Pengutronix,
// Marc Kleine-Budde <[email protected]>
//
// Based on:
@@ -16,6 +16,11 @@
#include "mcp251xfd.h"
+static inline bool mcp251xfd_tx_fifo_sta_full(u32 fifo_sta)
+{
+ return !(fifo_sta & MCP251XFD_REG_FIFOSTA_TFNRFNIF);
+}
+
static inline int
mcp251xfd_tef_tail_get_from_chip(const struct mcp251xfd_priv *priv,
u8 *tef_tail)
@@ -56,60 +61,43 @@ static int mcp251xfd_check_tef_tail(const struct mcp251xfd_priv *priv)
}
static int
-mcp251xfd_handle_tefif_recover(const struct mcp251xfd_priv *priv, const u32 seq)
-{
- const struct mcp251xfd_tx_ring *tx_ring = priv->tx;
- u32 tef_sta;
- int err;
-
- err = regmap_read(priv->map_reg, MCP251XFD_REG_TEFSTA, &tef_sta);
- if (err)
- return err;
-
- if (tef_sta & MCP251XFD_REG_TEFSTA_TEFOVIF) {
- netdev_err(priv->ndev,
- "Transmit Event FIFO buffer overflow.\n");
- return -ENOBUFS;
- }
-
- netdev_info(priv->ndev,
- "Transmit Event FIFO buffer %s. (seq=0x%08x, tef_tail=0x%08x, tef_head=0x%08x, tx_head=0x%08x).\n",
- tef_sta & MCP251XFD_REG_TEFSTA_TEFFIF ?
- "full" : tef_sta & MCP251XFD_REG_TEFSTA_TEFNEIF ?
- "not empty" : "empty",
- seq, priv->tef->tail, priv->tef->head, tx_ring->head);
-
- /* The Sequence Number in the TEF doesn't match our tef_tail. */
- return -EAGAIN;
-}
-
-static int
mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv,
const struct mcp251xfd_hw_tef_obj *hw_tef_obj,
unsigned int *frame_len_ptr)
{
struct net_device_stats *stats = &priv->ndev->stats;
+ u32 seq, tef_tail_masked, tef_tail;
struct sk_buff *skb;
- u32 seq, seq_masked, tef_tail_masked, tef_tail;
- seq = FIELD_GET(MCP251XFD_OBJ_FLAGS_SEQ_MCP2518FD_MASK,
+ /* Use the MCP2517FD mask on the MCP2518FD, too. We only
+ * compare 7 bits, this is enough to detect old TEF objects.
+ */
+ seq = FIELD_GET(MCP251XFD_OBJ_FLAGS_SEQ_MCP2517FD_MASK,
hw_tef_obj->flags);
-
- /* Use the MCP2517FD mask on the MCP2518FD, too. We only
- * compare 7 bits, this should be enough to detect
- * net-yet-completed, i.e. old TEF objects.
- */
- seq_masked = seq &
- field_mask(MCP251XFD_OBJ_FLAGS_SEQ_MCP2517FD_MASK);
tef_tail_masked = priv->tef->tail &
field_mask(MCP251XFD_OBJ_FLAGS_SEQ_MCP2517FD_MASK);
- if (seq_masked != tef_tail_masked)
- return mcp251xfd_handle_tefif_recover(priv, seq);
+
+ /* According to mcp2518fd erratum DS80000789E 6. the FIFOCI
+ * bits of a FIFOSTA register, here the TX FIFO tail index
+ * might be corrupted and we might process past the TEF FIFO's
+ * head into old CAN frames.
+ *
+ * Compare the sequence number of the currently processed CAN
+ * frame with the expected sequence number. Abort with
+ * -EBADMSG if an old CAN frame is detected.
+ */
+ if (seq != tef_tail_masked) {
+ netdev_dbg(priv->ndev, "%s: chip=0x%02x ring=0x%02x\n", __func__,
+ seq, tef_tail_masked);
+ stats->tx_fifo_errors++;
+
+ return -EBADMSG;
+ }
tef_tail = mcp251xfd_get_tef_tail(priv);
skb = priv->can.echo_skb[tef_tail];
if (skb)
- mcp251xfd_skb_set_timestamp(priv, skb, hw_tef_obj->ts);
+ mcp251xfd_skb_set_timestamp_raw(priv, skb, hw_tef_obj->ts);
stats->tx_bytes +=
can_rx_offload_get_echo_skb_queue_timestamp(&priv->offload,
tef_tail, hw_tef_obj->ts,
@@ -120,28 +108,44 @@ mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv,
return 0;
}
-static int mcp251xfd_tef_ring_update(struct mcp251xfd_priv *priv)
+static int
+mcp251xfd_get_tef_len(struct mcp251xfd_priv *priv, u8 *len_p)
{
const struct mcp251xfd_tx_ring *tx_ring = priv->tx;
- unsigned int new_head;
- u8 chip_tx_tail;
+ const u8 shift = tx_ring->obj_num_shift_to_u8;
+ u8 chip_tx_tail, tail, len;
+ u32 fifo_sta;
int err;
- err = mcp251xfd_tx_tail_get_from_chip(priv, &chip_tx_tail);
+ err = regmap_read(priv->map_reg, MCP251XFD_REG_FIFOSTA(priv->tx->fifo_nr),
+ &fifo_sta);
if (err)
return err;
- /* chip_tx_tail, is the next TX-Object send by the HW.
- * The new TEF head must be >= the old head, ...
+ if (mcp251xfd_tx_fifo_sta_full(fifo_sta)) {
+ *len_p = tx_ring->obj_num;
+ return 0;
+ }
+
+ chip_tx_tail = FIELD_GET(MCP251XFD_REG_FIFOSTA_FIFOCI_MASK, fifo_sta);
+
+ err = mcp251xfd_check_tef_tail(priv);
+ if (err)
+ return err;
+ tail = mcp251xfd_get_tef_tail(priv);
+
+ /* First shift to full u8. The subtraction works on signed
+ * values, that keeps the difference steady around the u8
+ * overflow. The right shift acts on len, which is an u8.
*/
- new_head = round_down(priv->tef->head, tx_ring->obj_num) + chip_tx_tail;
- if (new_head <= priv->tef->head)
- new_head += tx_ring->obj_num;
+ BUILD_BUG_ON(sizeof(tx_ring->obj_num) != sizeof(chip_tx_tail));
+ BUILD_BUG_ON(sizeof(tx_ring->obj_num) != sizeof(tail));
+ BUILD_BUG_ON(sizeof(tx_ring->obj_num) != sizeof(len));
- /* ... but it cannot exceed the TX head. */
- priv->tef->head = min(new_head, tx_ring->head);
+ len = (chip_tx_tail << shift) - (tail << shift);
+ *len_p = len >> shift;
- return mcp251xfd_check_tef_tail(priv);
+ return 0;
}
static inline int
@@ -182,13 +186,12 @@ int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
u8 tef_tail, len, l;
int err, i;
- err = mcp251xfd_tef_ring_update(priv);
+ err = mcp251xfd_get_tef_len(priv, &len);
if (err)
return err;
tef_tail = mcp251xfd_get_tef_tail(priv);
- len = mcp251xfd_get_tef_len(priv);
- l = mcp251xfd_get_tef_linear_len(priv);
+ l = mcp251xfd_get_tef_linear_len(priv, len);
err = mcp251xfd_tef_obj_read(priv, hw_tef_obj, tef_tail, l);
if (err)
return err;
@@ -203,12 +206,12 @@ int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
unsigned int frame_len = 0;
err = mcp251xfd_handle_tefif_one(priv, &hw_tef_obj[i], &frame_len);
- /* -EAGAIN means the Sequence Number in the TEF
- * doesn't match our tef_tail. This can happen if we
- * read the TEF objects too early. Leave loop let the
- * interrupt handler call us again.
+ /* -EBADMSG means we're affected by mcp2518fd erratum
+ * DS80000789E 6., i.e. the Sequence Number in the TEF
+ * doesn't match our tef_tail. Don't process any
+ * further and mark processed frames as good.
*/
- if (err == -EAGAIN)
+ if (err == -EBADMSG)
goto out_netif_wake_queue;
if (err)
return err;
@@ -216,13 +219,15 @@ int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
total_frame_len += frame_len;
}
- out_netif_wake_queue:
+out_netif_wake_queue:
len = i; /* number of handled goods TEFs */
if (len) {
struct mcp251xfd_tef_ring *ring = priv->tef;
struct mcp251xfd_tx_ring *tx_ring = priv->tx;
int offset;
+ ring->head += len;
+
/* Increment the TEF FIFO tail pointer 'len' times in
* a single SPI message.
*
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c
index 712e09186987..202ca0d24d03 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c
@@ -2,7 +2,7 @@
//
// mcp251xfd - Microchip MCP251xFD Family CAN controller driver
//
-// Copyright (c) 2021 Pengutronix,
+// Copyright (c) 2021, 2023 Pengutronix,
// Marc Kleine-Budde <[email protected]>
//
@@ -11,20 +11,20 @@
#include "mcp251xfd.h"
-static u64 mcp251xfd_timestamp_read(const struct cyclecounter *cc)
+static u64 mcp251xfd_timestamp_raw_read(const struct cyclecounter *cc)
{
const struct mcp251xfd_priv *priv;
- u32 timestamp = 0;
+ u32 ts_raw = 0;
int err;
priv = container_of(cc, struct mcp251xfd_priv, cc);
- err = mcp251xfd_get_timestamp(priv, &timestamp);
+ err = mcp251xfd_get_timestamp_raw(priv, &ts_raw);
if (err)
netdev_err(priv->ndev,
"Error %d while reading timestamp. HW timestamps may be inaccurate.",
err);
- return timestamp;
+ return ts_raw;
}
static void mcp251xfd_timestamp_work(struct work_struct *work)
@@ -39,28 +39,21 @@ static void mcp251xfd_timestamp_work(struct work_struct *work)
MCP251XFD_TIMESTAMP_WORK_DELAY_SEC * HZ);
}
-void mcp251xfd_skb_set_timestamp(const struct mcp251xfd_priv *priv,
- struct sk_buff *skb, u32 timestamp)
-{
- struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
- u64 ns;
-
- ns = timecounter_cyc2time(&priv->tc, timestamp);
- hwtstamps->hwtstamp = ns_to_ktime(ns);
-}
-
void mcp251xfd_timestamp_init(struct mcp251xfd_priv *priv)
{
struct cyclecounter *cc = &priv->cc;
- cc->read = mcp251xfd_timestamp_read;
+ cc->read = mcp251xfd_timestamp_raw_read;
cc->mask = CYCLECOUNTER_MASK(32);
cc->shift = 1;
cc->mult = clocksource_hz2mult(priv->can.clock.freq, cc->shift);
- timecounter_init(&priv->tc, &priv->cc, ktime_get_real_ns());
-
INIT_DELAYED_WORK(&priv->timestamp, mcp251xfd_timestamp_work);
+}
+
+void mcp251xfd_timestamp_start(struct mcp251xfd_priv *priv)
+{
+ timecounter_init(&priv->tc, &priv->cc, ktime_get_real_ns());
schedule_delayed_work(&priv->timestamp,
MCP251XFD_TIMESTAMP_WORK_DELAY_SEC * HZ);
}
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
index b35bfebd23f2..dcbbd2b2fae8 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
@@ -2,7 +2,7 @@
*
* mcp251xfd - Microchip MCP251xFD Family CAN controller driver
*
- * Copyright (c) 2019, 2020, 2021 Pengutronix,
+ * Copyright (c) 2019, 2020, 2021, 2023 Pengutronix,
* Marc Kleine-Budde <[email protected]>
* Copyright (c) 2019 Martin Sperl <[email protected]>
*/
@@ -524,6 +524,7 @@ struct mcp251xfd_tef_ring {
/* u8 obj_num equals tx_ring->obj_num */
/* u8 obj_size equals sizeof(struct mcp251xfd_hw_tef_obj) */
+ /* u8 obj_num_shift_to_u8 equals tx_ring->obj_num_shift_to_u8 */
union mcp251xfd_write_reg_buf irq_enable_buf;
struct spi_transfer irq_enable_xfer;
@@ -542,6 +543,7 @@ struct mcp251xfd_tx_ring {
u8 nr;
u8 fifo_nr;
u8 obj_num;
+ u8 obj_num_shift_to_u8;
u8 obj_size;
struct mcp251xfd_tx_obj obj[MCP251XFD_TX_OBJ_NUM_MAX];
@@ -552,10 +554,14 @@ struct mcp251xfd_rx_ring {
unsigned int head;
unsigned int tail;
+ /* timestamp of the last valid received CAN frame */
+ u64 last_valid;
+
u16 base;
u8 nr;
u8 fifo_nr;
u8 obj_num;
+ u8 obj_num_shift_to_u8;
u8 obj_size;
union mcp251xfd_write_reg_buf irq_enable_buf;
@@ -809,10 +815,27 @@ mcp251xfd_spi_cmd_write(const struct mcp251xfd_priv *priv,
return data;
}
-static inline int mcp251xfd_get_timestamp(const struct mcp251xfd_priv *priv,
- u32 *timestamp)
+static inline int mcp251xfd_get_timestamp_raw(const struct mcp251xfd_priv *priv,
+ u32 *ts_raw)
+{
+ return regmap_read(priv->map_reg, MCP251XFD_REG_TBC, ts_raw);
+}
+
+static inline void mcp251xfd_skb_set_timestamp(struct sk_buff *skb, u64 ns)
{
- return regmap_read(priv->map_reg, MCP251XFD_REG_TBC, timestamp);
+ struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
+
+ hwtstamps->hwtstamp = ns_to_ktime(ns);
+}
+
+static inline
+void mcp251xfd_skb_set_timestamp_raw(const struct mcp251xfd_priv *priv,
+ struct sk_buff *skb, u32 ts_raw)
+{
+ u64 ns;
+
+ ns = timecounter_cyc2time(&priv->tc, ts_raw);
+ mcp251xfd_skb_set_timestamp(skb, ns);
}
static inline u16 mcp251xfd_get_tef_obj_addr(u8 n)
@@ -861,17 +884,8 @@ static inline u8 mcp251xfd_get_tef_tail(const struct mcp251xfd_priv *priv)
return priv->tef->tail & (priv->tx->obj_num - 1);
}
-static inline u8 mcp251xfd_get_tef_len(const struct mcp251xfd_priv *priv)
+static inline u8 mcp251xfd_get_tef_linear_len(const struct mcp251xfd_priv *priv, u8 len)
{
- return priv->tef->head - priv->tef->tail;
-}
-
-static inline u8 mcp251xfd_get_tef_linear_len(const struct mcp251xfd_priv *priv)
-{
- u8 len;
-
- len = mcp251xfd_get_tef_len(priv);
-
return min_t(u8, len, priv->tx->obj_num - mcp251xfd_get_tef_tail(priv));
}
@@ -914,18 +928,9 @@ static inline u8 mcp251xfd_get_rx_tail(const struct mcp251xfd_rx_ring *ring)
return ring->tail & (ring->obj_num - 1);
}
-static inline u8 mcp251xfd_get_rx_len(const struct mcp251xfd_rx_ring *ring)
-{
- return ring->head - ring->tail;
-}
-
static inline u8
-mcp251xfd_get_rx_linear_len(const struct mcp251xfd_rx_ring *ring)
+mcp251xfd_get_rx_linear_len(const struct mcp251xfd_rx_ring *ring, u8 len)
{
- u8 len;
-
- len = mcp251xfd_get_rx_len(ring);
-
return min_t(u8, len, ring->obj_num - mcp251xfd_get_rx_tail(ring));
}
@@ -951,9 +956,8 @@ void mcp251xfd_ring_free(struct mcp251xfd_priv *priv);
int mcp251xfd_ring_alloc(struct mcp251xfd_priv *priv);
int mcp251xfd_handle_rxif(struct mcp251xfd_priv *priv);
int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv);
-void mcp251xfd_skb_set_timestamp(const struct mcp251xfd_priv *priv,
- struct sk_buff *skb, u32 timestamp);
void mcp251xfd_timestamp_init(struct mcp251xfd_priv *priv);
+void mcp251xfd_timestamp_start(struct mcp251xfd_priv *priv);
void mcp251xfd_timestamp_stop(struct mcp251xfd_priv *priv);
void mcp251xfd_tx_obj_write_sync(struct work_struct *work);
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 65c962f76898..340297e3bec7 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -40,6 +40,9 @@
#define USB_ABE_CANDEBUGGER_FD_VENDOR_ID 0x16d0
#define USB_ABE_CANDEBUGGER_FD_PRODUCT_ID 0x10b8
+#define USB_XYLANTA_SAINT3_VENDOR_ID 0x16d0
+#define USB_XYLANTA_SAINT3_PRODUCT_ID 0x0f30
+
#define GS_USB_ENDPOINT_IN 1
#define GS_USB_ENDPOINT_OUT 2
@@ -1530,6 +1533,8 @@ static const struct usb_device_id gs_usb_table[] = {
USB_CES_CANEXT_FD_PRODUCT_ID, 0) },
{ USB_DEVICE_INTERFACE_NUMBER(USB_ABE_CANDEBUGGER_FD_VENDOR_ID,
USB_ABE_CANDEBUGGER_FD_PRODUCT_ID, 0) },
+ { USB_DEVICE_INTERFACE_NUMBER(USB_XYLANTA_SAINT3_VENDOR_ID,
+ USB_XYLANTA_SAINT3_PRODUCT_ID, 0) },
{} /* Terminating entry */
};
diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
index 7faec0eef1ca..daa34b532aa8 100644
--- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
+++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
@@ -128,6 +128,7 @@ static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf_err_liste
static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leafimx = {
.quirks = 0,
+ .family = KVASER_LEAF,
.ops = &kvaser_usb_leaf_dev_ops,
};
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index 02f07b870f10..268949939636 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -1047,31 +1047,31 @@ static int lan9303_get_sset_count(struct dsa_switch *ds, int port, int sset)
return ARRAY_SIZE(lan9303_mib);
}
-static int lan9303_phy_read(struct dsa_switch *ds, int phy, int regnum)
+static int lan9303_phy_read(struct dsa_switch *ds, int port, int regnum)
{
struct lan9303 *chip = ds->priv;
int phy_base = chip->phy_addr_base;
- if (phy == phy_base)
+ if (port == 0)
return lan9303_virt_phy_reg_read(chip, regnum);
- if (phy > phy_base + 2)
+ if (port > 2)
return -ENODEV;
- return chip->ops->phy_read(chip, phy, regnum);
+ return chip->ops->phy_read(chip, phy_base + port, regnum);
}
-static int lan9303_phy_write(struct dsa_switch *ds, int phy, int regnum,
+static int lan9303_phy_write(struct dsa_switch *ds, int port, int regnum,
u16 val)
{
struct lan9303 *chip = ds->priv;
int phy_base = chip->phy_addr_base;
- if (phy == phy_base)
+ if (port == 0)
return lan9303_virt_phy_reg_write(chip, regnum, val);
- if (phy > phy_base + 2)
+ if (port > 2)
return -ENODEV;
- return chip->ops->phy_write(chip, phy, regnum, val);
+ return chip->ops->phy_write(chip, phy_base + port, regnum, val);
}
static int lan9303_port_enable(struct dsa_switch *ds, int port,
@@ -1099,7 +1099,7 @@ static void lan9303_port_disable(struct dsa_switch *ds, int port)
vlan_vid_del(dsa_port_to_conduit(dp), htons(ETH_P_8021Q), port);
lan9303_disable_processing_port(chip, port);
- lan9303_phy_write(ds, chip->phy_addr_base + port, MII_BMCR, BMCR_PDOWN);
+ lan9303_phy_write(ds, port, MII_BMCR, BMCR_PDOWN);
}
static int lan9303_port_bridge_join(struct dsa_switch *ds, int port,
@@ -1374,8 +1374,6 @@ static const struct dsa_switch_ops lan9303_switch_ops = {
static int lan9303_register_switch(struct lan9303 *chip)
{
- int base;
-
chip->ds = devm_kzalloc(chip->dev, sizeof(*chip->ds), GFP_KERNEL);
if (!chip->ds)
return -ENOMEM;
@@ -1385,8 +1383,7 @@ static int lan9303_register_switch(struct lan9303 *chip)
chip->ds->priv = chip;
chip->ds->ops = &lan9303_switch_ops;
chip->ds->phylink_mac_ops = &lan9303_phylink_mac_ops;
- base = chip->phy_addr_base;
- chip->ds->phys_mii_mask = GENMASK(LAN9303_NUM_PORTS - 1 + base, base);
+ chip->ds->phys_mii_mask = GENMASK(LAN9303_NUM_PORTS - 1, 0);
return dsa_register_switch(chip->ds);
}
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index baa1eeb9a1b0..b074b4bb0629 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -3116,7 +3116,8 @@ static void ksz_set_xmii(struct ksz_device *dev, int port,
/* On KSZ9893, disable RGMII in-band status support */
if (dev->chip_id == KSZ9893_CHIP_ID ||
dev->chip_id == KSZ8563_CHIP_ID ||
- dev->chip_id == KSZ9563_CHIP_ID)
+ dev->chip_id == KSZ9563_CHIP_ID ||
+ is_lan937x(dev))
data8 &= ~P_MII_MAC_MODE;
break;
default:
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index ee7db46e469d..5f0a628b9849 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -22,6 +22,7 @@
/* all KSZ switches count ports from 1 */
#define KSZ_PORT_1 0
#define KSZ_PORT_2 1
+#define KSZ_PORT_4 3
struct ksz_device;
struct ksz_port;
@@ -637,6 +638,12 @@ static inline int is_lan937x(struct ksz_device *dev)
dev->chip_id == LAN9374_CHIP_ID;
}
+static inline bool is_lan937x_tx_phy(struct ksz_device *dev, int port)
+{
+ return (dev->chip_id == LAN9371_CHIP_ID ||
+ dev->chip_id == LAN9372_CHIP_ID) && port == KSZ_PORT_4;
+}
+
/* STP State Defines */
#define PORT_TX_ENABLE BIT(2)
#define PORT_RX_ENABLE BIT(1)
diff --git a/drivers/net/dsa/microchip/lan937x_main.c b/drivers/net/dsa/microchip/lan937x_main.c
index b479a628b1ae..824d9309a3d3 100644
--- a/drivers/net/dsa/microchip/lan937x_main.c
+++ b/drivers/net/dsa/microchip/lan937x_main.c
@@ -55,6 +55,9 @@ static int lan937x_vphy_ind_addr_wr(struct ksz_device *dev, int addr, int reg)
u16 addr_base = REG_PORT_T1_PHY_CTRL_BASE;
u16 temp;
+ if (is_lan937x_tx_phy(dev, addr))
+ addr_base = REG_PORT_TX_PHY_CTRL_BASE;
+
/* get register address based on the logical port */
temp = PORT_CTRL_ADDR(addr, (addr_base + (reg << 2)));
@@ -320,6 +323,9 @@ void lan937x_phylink_get_caps(struct ksz_device *dev, int port,
/* MII/RMII/RGMII ports */
config->mac_capabilities |= MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
MAC_100HD | MAC_10 | MAC_1000FD;
+ } else if (is_lan937x_tx_phy(dev, port)) {
+ config->mac_capabilities |= MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_100HD | MAC_10;
}
}
@@ -370,23 +376,33 @@ int lan937x_setup(struct dsa_switch *ds)
ds->vlan_filtering_is_global = true;
/* Enable aggressive back off for half duplex & UNH mode */
- lan937x_cfg(dev, REG_SW_MAC_CTRL_0,
- (SW_PAUSE_UNH_MODE | SW_NEW_BACKOFF | SW_AGGR_BACKOFF),
- true);
+ ret = lan937x_cfg(dev, REG_SW_MAC_CTRL_0, (SW_PAUSE_UNH_MODE |
+ SW_NEW_BACKOFF |
+ SW_AGGR_BACKOFF), true);
+ if (ret < 0)
+ return ret;
/* If NO_EXC_COLLISION_DROP bit is set, the switch will not drop
* packets when 16 or more collisions occur
*/
- lan937x_cfg(dev, REG_SW_MAC_CTRL_1, NO_EXC_COLLISION_DROP, true);
+ ret = lan937x_cfg(dev, REG_SW_MAC_CTRL_1, NO_EXC_COLLISION_DROP, true);
+ if (ret < 0)
+ return ret;
/* enable global MIB counter freeze function */
- lan937x_cfg(dev, REG_SW_MAC_CTRL_6, SW_MIB_COUNTER_FREEZE, true);
+ ret = lan937x_cfg(dev, REG_SW_MAC_CTRL_6, SW_MIB_COUNTER_FREEZE, true);
+ if (ret < 0)
+ return ret;
/* disable CLK125 & CLK25, 1: disable, 0: enable */
- lan937x_cfg(dev, REG_SW_GLOBAL_OUTPUT_CTRL__1,
- (SW_CLK125_ENB | SW_CLK25_ENB), true);
+ ret = lan937x_cfg(dev, REG_SW_GLOBAL_OUTPUT_CTRL__1,
+ (SW_CLK125_ENB | SW_CLK25_ENB), true);
+ if (ret < 0)
+ return ret;
- return 0;
+ /* Disable global VPHY support. Related to CPU interface only? */
+ return ksz_rmw32(dev, REG_SW_CFG_STRAP_OVR, SW_VPHY_DISABLE,
+ SW_VPHY_DISABLE);
}
void lan937x_teardown(struct dsa_switch *ds)
diff --git a/drivers/net/dsa/microchip/lan937x_reg.h b/drivers/net/dsa/microchip/lan937x_reg.h
index 45b606b6429f..2f22a9d01de3 100644
--- a/drivers/net/dsa/microchip/lan937x_reg.h
+++ b/drivers/net/dsa/microchip/lan937x_reg.h
@@ -37,6 +37,10 @@
#define SW_CLK125_ENB BIT(1)
#define SW_CLK25_ENB BIT(0)
+/* 2 - PHY Control */
+#define REG_SW_CFG_STRAP_OVR 0x0214
+#define SW_VPHY_DISABLE BIT(31)
+
/* 3 - Operation Control */
#define REG_SW_OPERATION 0x0300
@@ -147,6 +151,7 @@
/* 1 - Phy */
#define REG_PORT_T1_PHY_CTRL_BASE 0x0100
+#define REG_PORT_TX_PHY_CTRL_BASE 0x0280
/* 3 - xMII */
#define PORT_SGMII_SEL BIT(7)
diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c
index 968cb81088bf..e9f2c67bc15f 100644
--- a/drivers/net/dsa/qca/ar9331.c
+++ b/drivers/net/dsa/qca/ar9331.c
@@ -1021,7 +1021,7 @@ static const struct regmap_config ar9331_mdio_regmap_config = {
.cache_type = REGCACHE_MAPLE,
};
-static struct regmap_bus ar9331_sw_bus = {
+static const struct regmap_bus ar9331_sw_bus = {
.reg_format_endian_default = REGMAP_ENDIAN_NATIVE,
.val_format_endian_default = REGMAP_ENDIAN_NATIVE,
.read = ar9331_mdio_read,
diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c
index b3c27cf538e8..f8d8c70642c4 100644
--- a/drivers/net/dsa/qca/qca8k-8xxx.c
+++ b/drivers/net/dsa/qca/qca8k-8xxx.c
@@ -565,7 +565,7 @@ qca8k_regmap_update_bits(void *ctx, uint32_t reg, uint32_t mask, uint32_t write_
return qca8k_regmap_update_bits_mii(priv, reg, mask, write_val);
}
-static struct regmap_config qca8k_regmap_config = {
+static const struct regmap_config qca8k_regmap_config = {
.reg_bits = 16,
.val_bits = 32,
.reg_stride = 4,
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
index a806dadc4196..20c6529ec135 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
@@ -1380,6 +1380,7 @@ static int bcmasp_probe(struct platform_device *pdev)
dev_err(dev, "Cannot create eth interface %d\n", i);
bcmasp_remove_intfs(priv);
of_node_put(intf_node);
+ ret = -ENOMEM;
goto of_put_exit;
}
list_add_tail(&intf->list, &priv->intfs);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index e2a4e1088b7f..9580ab83d387 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1262,7 +1262,7 @@ enum {
struct bnx2x_fw_stats_req {
struct stats_query_header hdr;
- struct stats_query_entry query[FP_SB_MAX_E1x+
+ struct stats_query_entry query[FP_SB_MAX_E2 +
BNX2X_FIRST_QUEUE_QUERY_IDX];
};
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 1bd0c5973252..80fce0aaad66 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -456,8 +456,9 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
dma_addr_t mapping;
unsigned int length, pad = 0;
u32 len, free_size, vlan_tag_flags, cfa_action, flags;
- u16 prod, last_frag;
+ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
struct pci_dev *pdev = bp->pdev;
+ u16 prod, last_frag, txts_prod;
struct bnxt_tx_ring_info *txr;
struct bnxt_sw_tx_bd *tx_buf;
__le32 lflags = 0;
@@ -509,27 +510,29 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
vlan_tag_flags |= 1 << TX_BD_CFA_META_TPID_SHIFT;
}
- if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
- struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && ptp &&
+ ptp->tx_tstamp_en) {
+ if (bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP) {
+ lflags |= cpu_to_le32(TX_BD_FLAGS_STAMP);
+ tx_buf->is_ts_pkt = 1;
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+ } else if (!skb_is_gso(skb)) {
+ u16 seq_id, hdr_off;
- if (ptp && ptp->tx_tstamp_en && !skb_is_gso(skb)) {
- if (atomic_dec_if_positive(&ptp->tx_avail) < 0) {
- atomic64_inc(&ptp->stats.ts_err);
- goto tx_no_ts;
- }
- if (!bnxt_ptp_parse(skb, &ptp->tx_seqid,
- &ptp->tx_hdr_off)) {
+ if (!bnxt_ptp_parse(skb, &seq_id, &hdr_off) &&
+ !bnxt_ptp_get_txts_prod(ptp, &txts_prod)) {
if (vlan_tag_flags)
- ptp->tx_hdr_off += VLAN_HLEN;
+ hdr_off += VLAN_HLEN;
lflags |= cpu_to_le32(TX_BD_FLAGS_STAMP);
+ tx_buf->is_ts_pkt = 1;
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
- } else {
- atomic_inc(&bp->ptp_cfg->tx_avail);
+
+ ptp->txts_req[txts_prod].tx_seqid = seq_id;
+ ptp->txts_req[txts_prod].tx_hdr_off = hdr_off;
+ tx_buf->txts_prod = txts_prod;
}
}
}
-
-tx_no_ts:
if (unlikely(skb->no_fcs))
lflags |= cpu_to_le32(TX_BD_FLAGS_NO_CRC);
@@ -758,8 +761,11 @@ tx_free:
dev_kfree_skb_any(skb);
tx_kick_pending:
if (BNXT_TX_PTP_IS_SET(lflags)) {
+ txr->tx_buf_ring[txr->tx_prod].is_ts_pkt = 0;
atomic64_inc(&bp->ptp_cfg->stats.ts_err);
- atomic_inc(&bp->ptp_cfg->tx_avail);
+ if (!(bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP))
+ /* set SKB to err so PTP worker will clean up */
+ ptp->txts_req[txts_prod].tx_skb = ERR_PTR(-EIO);
}
if (txr->kick_pending)
bnxt_txr_db_kick(bp, txr, txr->tx_prod);
@@ -768,7 +774,8 @@ tx_kick_pending:
return NETDEV_TX_OK;
}
-static void __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
+/* Returns true if some remaining TX packets not processed. */
+static bool __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
int budget)
{
struct netdev_queue *txq = netdev_get_tx_queue(bp->dev, txr->txq_index);
@@ -777,24 +784,33 @@ static void __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
unsigned int tx_bytes = 0;
u16 cons = txr->tx_cons;
int tx_pkts = 0;
+ bool rc = false;
while (RING_TX(bp, cons) != hw_cons) {
struct bnxt_sw_tx_bd *tx_buf;
struct sk_buff *skb;
+ bool is_ts_pkt;
int j, last;
tx_buf = &txr->tx_buf_ring[RING_TX(bp, cons)];
- cons = NEXT_TX(cons);
skb = tx_buf->skb;
- tx_buf->skb = NULL;
if (unlikely(!skb)) {
bnxt_sched_reset_txr(bp, txr, cons);
- return;
+ return rc;
}
+ is_ts_pkt = tx_buf->is_ts_pkt;
+ if (is_ts_pkt && (bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP)) {
+ rc = true;
+ break;
+ }
+
+ cons = NEXT_TX(cons);
tx_pkts++;
tx_bytes += skb->len;
+ tx_buf->skb = NULL;
+ tx_buf->is_ts_pkt = 0;
if (tx_buf->is_push) {
tx_buf->is_push = 0;
@@ -814,15 +830,11 @@ static void __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
skb_frag_size(&skb_shinfo(skb)->frags[j]),
DMA_TO_DEVICE);
}
- if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
+ if (unlikely(is_ts_pkt)) {
if (BNXT_CHIP_P5(bp)) {
/* PTP worker takes ownership of the skb */
- if (!bnxt_get_tx_ts_p5(bp, skb)) {
- skb = NULL;
- } else {
- atomic64_inc(&bp->ptp_cfg->stats.ts_err);
- atomic_inc(&bp->ptp_cfg->tx_avail);
- }
+ bnxt_get_tx_ts_p5(bp, skb, tx_buf->txts_prod);
+ skb = NULL;
}
}
@@ -837,18 +849,22 @@ next_tx_int:
__netif_txq_completed_wake(txq, tx_pkts, tx_bytes,
bnxt_tx_avail(bp, txr), bp->tx_wake_thresh,
READ_ONCE(txr->dev_state) == BNXT_DEV_STATE_CLOSING);
+
+ return rc;
}
static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
{
struct bnxt_tx_ring_info *txr;
+ bool more = false;
int i;
bnxt_for_each_napi_tx(i, bnapi, txr) {
if (txr->tx_hw_cons != RING_TX(bp, txr->tx_cons))
- __bnxt_tx_int(bp, txr, budget);
+ more |= __bnxt_tx_int(bp, txr, budget);
}
- bnapi->events &= ~BNXT_TX_CMP_EVENT;
+ if (!more)
+ bnapi->events &= ~BNXT_TX_CMP_EVENT;
}
static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
@@ -2914,6 +2930,8 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
cpr->has_more_work = 1;
break;
}
+ } else if (cmp_type == CMP_TYPE_TX_L2_PKT_TS_CMP) {
+ bnxt_tx_ts_cmp(bp, bnapi, (struct tx_ts_cmp *)txcmp);
} else if (cmp_type >= CMP_TYPE_RX_L2_CMP &&
cmp_type <= CMP_TYPE_RX_L2_TPA_START_V3_CMP) {
if (likely(budget))
@@ -2945,8 +2963,10 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
}
}
- if (event & BNXT_REDIRECT_EVENT)
+ if (event & BNXT_REDIRECT_EVENT) {
xdp_do_flush();
+ event &= ~BNXT_REDIRECT_EVENT;
+ }
if (event & BNXT_TX_EVENT) {
struct bnxt_tx_ring_info *txr = bnapi->tx_ring[0];
@@ -2956,6 +2976,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
wmb();
bnxt_db_write_relaxed(bp, &txr->tx_db, prod);
+ event &= ~BNXT_TX_EVENT;
}
cpr->cp_raw_cons = raw_cons;
@@ -2973,13 +2994,14 @@ static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi,
struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
+ bnapi->events &= ~BNXT_RX_EVENT;
}
if (bnapi->events & BNXT_AGG_EVENT) {
struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod);
+ bnapi->events &= ~BNXT_AGG_EVENT;
}
- bnapi->events &= BNXT_TX_CMP_EVENT;
}
static int bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
@@ -6260,6 +6282,21 @@ static u16 bnxt_get_max_rss_ring(struct bnxt *bp)
return max_ring;
}
+u16 bnxt_get_max_rss_ctx_ring(struct bnxt *bp)
+{
+ u16 i, tbl_size, max_ring = 0;
+ struct bnxt_rss_ctx *rss_ctx;
+
+ tbl_size = bnxt_get_rxfh_indir_size(bp->dev);
+
+ list_for_each_entry(rss_ctx, &bp->rss_ctx_list, list) {
+ for (i = 0; i < tbl_size; i++)
+ max_ring = max(max_ring, rss_ctx->rss_indir_tbl[i]);
+ }
+
+ return max_ring;
+}
+
int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings)
{
if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
@@ -6788,6 +6825,7 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
switch (ring_type) {
case HWRM_RING_ALLOC_TX: {
struct bnxt_tx_ring_info *txr;
+ u16 flags = 0;
txr = container_of(ring, struct bnxt_tx_ring_info,
tx_ring_struct);
@@ -6801,6 +6839,9 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
if (bp->flags & BNXT_FLAG_TX_COAL_CMPL)
req->cmpl_coal_cnt =
RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_64;
+ if ((bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP) && bp->ptp_cfg)
+ flags |= RING_ALLOC_REQ_FLAGS_TX_PKT_TS_CMPL_ENABLE;
+ req->flags = cpu_to_le16(flags);
break;
}
case HWRM_RING_ALLOC_RX:
@@ -8981,7 +9022,7 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
u8 flags;
int rc;
- if (bp->hwrm_spec_code < 0x10801 || !BNXT_CHIP_P5(bp)) {
+ if (bp->hwrm_spec_code < 0x10801 || !BNXT_CHIP_P5_PLUS(bp)) {
rc = -ENODEV;
goto no_ptp;
}
@@ -8997,7 +9038,8 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
goto exit;
flags = resp->flags;
- if (!(flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS)) {
+ if (BNXT_CHIP_P5_AND_MINUS(bp) &&
+ !(flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS)) {
rc = -ENODEV;
goto exit;
}
@@ -9010,10 +9052,13 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
ptp->bp = bp;
bp->ptp_cfg = ptp;
}
- if (flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_PARTIAL_DIRECT_ACCESS_REF_CLOCK) {
+
+ if (flags &
+ (PORT_MAC_PTP_QCFG_RESP_FLAGS_PARTIAL_DIRECT_ACCESS_REF_CLOCK |
+ PORT_MAC_PTP_QCFG_RESP_FLAGS_64B_PHC_TIME)) {
ptp->refclk_regs[0] = le32_to_cpu(resp->ts_ref_clock_reg_lower);
ptp->refclk_regs[1] = le32_to_cpu(resp->ts_ref_clock_reg_upper);
- } else if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
+ } else if (BNXT_CHIP_P5(bp)) {
ptp->refclk_regs[0] = BNXT_TS_REG_TIMESYNC_TS0_LOWER;
ptp->refclk_regs[1] = BNXT_TS_REG_TIMESYNC_TS0_UPPER;
} else {
@@ -9095,6 +9140,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
bp->fw_cap |= BNXT_FW_CAP_RX_ALL_PKT_TS;
if (flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_UDP_GSO_SUPPORTED)
bp->flags |= BNXT_FLAG_UDP_GSO_CAP;
+ if (flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_TX_PKT_TS_CMPL_SUPPORTED)
+ bp->fw_cap |= BNXT_FW_CAP_TX_TS_CMP;
bp->tx_push_thresh = 0;
if ((flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED) &&
@@ -12136,8 +12183,8 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
/* VF-reps may need to be re-opened after the PF is re-opened */
if (BNXT_PF(bp))
bnxt_vf_reps_open(bp);
- if (bp->ptp_cfg)
- atomic_set(&bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS);
+ if (bp->ptp_cfg && !(bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP))
+ WRITE_ONCE(bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS);
bnxt_ptp_init_rtc(bp, true);
bnxt_ptp_cfg_tstamp_filters(bp);
if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp))
@@ -12819,7 +12866,11 @@ bool bnxt_rfs_capable(struct bnxt *bp, bool new_rss_ctx)
if (!BNXT_NEW_RM(bp))
return true;
- if (hwr.vnic == bp->hw_resc.resv_vnics &&
+ /* Do not reduce VNIC and RSS ctx reservations. There is a FW
+ * issue that will mess up the default VNIC if we reduce the
+ * reservations.
+ */
+ if (hwr.vnic <= bp->hw_resc.resv_vnics &&
hwr.rss_ctx <= bp->hw_resc.resv_rsscos_ctxs)
return true;
@@ -15049,11 +15100,6 @@ static void bnxt_queue_mem_free(struct net_device *dev, void *qmem)
bnxt_free_one_rx_ring(bp, rxr);
bnxt_free_one_rx_agg_ring(bp, rxr);
- /* At this point, this NAPI instance has another page pool associated
- * with it. Disconnect here before freeing the old page pool to avoid
- * warnings.
- */
- rxr->page_pool->p.napi = NULL;
page_pool_destroy(rxr->page_pool);
rxr->page_pool = NULL;
@@ -15173,6 +15219,7 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx)
bnxt_hwrm_rx_ring_free(bp, rxr, false);
bnxt_hwrm_rx_agg_ring_free(bp, rxr, false);
rxr->rx_next_cons = 0;
+ page_pool_disable_direct_recycling(rxr->page_pool);
memcpy(qmem, rxr, sizeof(*rxr));
bnxt_init_rx_ring_struct(bp, qmem);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 9cf0acfa04e5..3c8826875ceb 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -181,6 +181,32 @@ struct tx_cmp {
#define TX_CMP_SQ_CONS_IDX(txcmp) \
(le32_to_cpu((txcmp)->sq_cons_idx) & TX_CMP_SQ_CONS_IDX_MASK)
+struct tx_ts_cmp {
+ __le32 tx_ts_cmp_flags_type;
+ #define TX_TS_CMP_FLAGS_ERROR (1 << 6)
+ #define TX_TS_CMP_FLAGS_TS_TYPE (1 << 7)
+ #define TX_TS_CMP_FLAGS_TS_TYPE_PM (0 << 7)
+ #define TX_TS_CMP_FLAGS_TS_TYPE_PA (1 << 7)
+ #define TX_TS_CMP_FLAGS_TS_FALLBACK (1 << 8)
+ #define TX_TS_CMP_TS_SUB_NS (0xf << 12)
+ #define TX_TS_CMP_TS_NS_MID (0xffff << 16)
+ #define TX_TS_CMP_TS_NS_MID_SFT 16
+ u32 tx_ts_cmp_opaque;
+ __le32 tx_ts_cmp_errors_v;
+ #define TX_TS_CMP_V (1 << 0)
+ #define TX_TS_CMP_TS_INVALID_ERR (1 << 10)
+ __le32 tx_ts_cmp_ts_ns_lo;
+};
+
+#define BNXT_GET_TX_TS_48B_NS(tscmp) \
+ (le32_to_cpu((tscmp)->tx_ts_cmp_ts_ns_lo) | \
+ ((u64)(le32_to_cpu((tscmp)->tx_ts_cmp_flags_type) & \
+ TX_TS_CMP_TS_NS_MID) << TX_TS_CMP_TS_NS_MID_SFT))
+
+#define BNXT_TX_TS_ERR(tscmp) \
+ (((tscmp)->tx_ts_cmp_flags_type & cpu_to_le32(TX_TS_CMP_FLAGS_ERROR)) &&\
+ ((tscmp)->tx_ts_cmp_errors_v & cpu_to_le32(TX_TS_CMP_TS_INVALID_ERR)))
+
struct rx_cmp {
__le32 rx_cmp_len_flags_type;
#define RX_CMP_CMP_TYPE (0x3f << 0)
@@ -848,11 +874,14 @@ struct bnxt_sw_tx_bd {
DEFINE_DMA_UNMAP_ADDR(mapping);
DEFINE_DMA_UNMAP_LEN(len);
struct page *page;
- u8 is_gso;
+ u8 is_ts_pkt;
u8 is_push;
u8 action;
unsigned short nr_frags;
- u16 rx_prod;
+ union {
+ u16 rx_prod;
+ u16 txts_prod;
+ };
};
struct bnxt_sw_rx_bd {
@@ -2237,9 +2266,17 @@ struct bnxt {
(BNXT_CHIP_NUM_58700((bp)->chip_num) && \
!BNXT_CHIP_TYPE_NITRO_A0(bp)))
+/* Chip class phase 3.x */
+#define BNXT_CHIP_P3(bp) \
+ (BNXT_CHIP_NUM_57X0X((bp)->chip_num) || \
+ BNXT_CHIP_TYPE_NITRO_A0(bp))
+
#define BNXT_CHIP_P4_PLUS(bp) \
(BNXT_CHIP_P4(bp) || BNXT_CHIP_P5_PLUS(bp))
+#define BNXT_CHIP_P5_AND_MINUS(bp) \
+ (BNXT_CHIP_P3(bp) || BNXT_CHIP_P4(bp) || BNXT_CHIP_P5(bp))
+
struct bnxt_aux_priv *aux_priv;
struct bnxt_en_dev *edev;
@@ -2384,6 +2421,7 @@ struct bnxt {
#define BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V2 BIT_ULL(16)
#define BNXT_FW_CAP_PCIE_STATS_SUPPORTED BIT_ULL(17)
#define BNXT_FW_CAP_EXT_STATS_SUPPORTED BIT_ULL(18)
+ #define BNXT_FW_CAP_TX_TS_CMP BIT_ULL(19)
#define BNXT_FW_CAP_ERR_RECOVER_RELOAD BIT_ULL(20)
#define BNXT_FW_CAP_HOT_RESET BIT_ULL(21)
#define BNXT_FW_CAP_PTP_RTC BIT_ULL(22)
@@ -2776,6 +2814,7 @@ int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, struct bnxt_vnic_info *vnic,
void bnxt_fill_ipv6_mask(__be32 mask[4]);
int bnxt_alloc_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx);
void bnxt_set_dflt_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx);
+u16 bnxt_get_max_rss_ctx_ring(struct bnxt *bp);
int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings);
int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic);
int bnxt_hwrm_vnic_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index bf157f6cc042..4d53ec7adc61 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -961,6 +961,12 @@ static int bnxt_set_channels(struct net_device *dev,
return rc;
}
+ if (req_rx_rings < bp->rx_nr_rings &&
+ req_rx_rings <= bnxt_get_max_rss_ctx_ring(bp)) {
+ netdev_warn(dev, "Can't deactivate rings used by RSS contexts\n");
+ return -EINVAL;
+ }
+
if (bnxt_get_nr_rss_ctxs(bp, req_rx_rings) !=
bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) &&
netif_is_rxfh_configured(dev)) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
index a14d46b9bfdf..37d42423459c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -110,7 +110,7 @@ static void bnxt_ptp_get_current_time(struct bnxt *bp)
}
static int bnxt_hwrm_port_ts_query(struct bnxt *bp, u32 flags, u64 *ts,
- u32 txts_tmo)
+ u32 txts_tmo, int slot)
{
struct hwrm_port_ts_query_output *resp;
struct hwrm_port_ts_query_input *req;
@@ -123,11 +123,12 @@ static int bnxt_hwrm_port_ts_query(struct bnxt *bp, u32 flags, u64 *ts,
req->flags = cpu_to_le32(flags);
if ((flags & PORT_TS_QUERY_REQ_FLAGS_PATH) ==
PORT_TS_QUERY_REQ_FLAGS_PATH_TX) {
+ struct bnxt_ptp_tx_req *txts_req = &bp->ptp_cfg->txts_req[slot];
u32 tmo_us = txts_tmo * 1000;
req->enables = cpu_to_le16(BNXT_PTP_QTS_TX_ENABLES);
- req->ptp_seq_id = cpu_to_le32(bp->ptp_cfg->tx_seqid);
- req->ptp_hdr_offset = cpu_to_le16(bp->ptp_cfg->tx_hdr_off);
+ req->ptp_seq_id = cpu_to_le32(txts_req->tx_seqid);
+ req->ptp_hdr_offset = cpu_to_le16(txts_req->tx_hdr_off);
if (!tmo_us)
tmo_us = BNXT_PTP_QTS_TIMEOUT;
tmo_us = min(tmo_us, BNXT_PTP_QTS_MAX_TMO_US);
@@ -656,6 +657,14 @@ static int bnxt_map_ptp_regs(struct bnxt *bp)
(ptp->refclk_regs[i] & BNXT_GRC_OFFSET_MASK);
return 0;
}
+ if (bp->flags & BNXT_FLAG_CHIP_P7) {
+ for (i = 0; i < 2; i++) {
+ if (reg_arr[i] & BNXT_GRC_BASE_MASK)
+ return -EINVAL;
+ ptp->refclk_mapped_regs[i] = reg_arr[i];
+ }
+ return 0;
+ }
return -ENODEV;
}
@@ -674,43 +683,44 @@ static u64 bnxt_cc_read(const struct cyclecounter *cc)
return ns;
}
-static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb)
+static int bnxt_stamp_tx_skb(struct bnxt *bp, int slot)
{
struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
struct skb_shared_hwtstamps timestamp;
+ struct bnxt_ptp_tx_req *txts_req;
unsigned long now = jiffies;
u64 ts = 0, ns = 0;
u32 tmo = 0;
int rc;
- if (!ptp->txts_pending)
- ptp->abs_txts_tmo = now + msecs_to_jiffies(ptp->txts_tmo);
- if (!time_after_eq(now, ptp->abs_txts_tmo))
- tmo = jiffies_to_msecs(ptp->abs_txts_tmo - now);
+ txts_req = &ptp->txts_req[slot];
+ /* make sure bnxt_get_tx_ts_p5() has updated abs_txts_tmo */
+ smp_rmb();
+ if (!time_after_eq(now, txts_req->abs_txts_tmo))
+ tmo = jiffies_to_msecs(txts_req->abs_txts_tmo - now);
rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_PATH_TX, &ts,
- tmo);
+ tmo, slot);
if (!rc) {
memset(&timestamp, 0, sizeof(timestamp));
spin_lock_bh(&ptp->ptp_lock);
ns = timecounter_cyc2time(&ptp->tc, ts);
spin_unlock_bh(&ptp->ptp_lock);
timestamp.hwtstamp = ns_to_ktime(ns);
- skb_tstamp_tx(ptp->tx_skb, &timestamp);
+ skb_tstamp_tx(txts_req->tx_skb, &timestamp);
ptp->stats.ts_pkts++;
} else {
- if (!time_after_eq(jiffies, ptp->abs_txts_tmo)) {
- ptp->txts_pending = true;
- return;
- }
+ if (!time_after_eq(jiffies, txts_req->abs_txts_tmo))
+ return -EAGAIN;
+
ptp->stats.ts_lost++;
netdev_warn_once(bp->dev,
"TS query for TX timer failed rc = %x\n", rc);
}
- dev_kfree_skb_any(ptp->tx_skb);
- ptp->tx_skb = NULL;
- atomic_inc(&ptp->tx_avail);
- ptp->txts_pending = false;
+ dev_kfree_skb_any(txts_req->tx_skb);
+ txts_req->tx_skb = NULL;
+
+ return 0;
}
static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info)
@@ -719,12 +729,30 @@ static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info)
ptp_info);
unsigned long now = jiffies;
struct bnxt *bp = ptp->bp;
+ u16 cons = ptp->txts_cons;
+ u32 num_requests;
+ int rc = 0;
+
+ num_requests = BNXT_MAX_TX_TS - READ_ONCE(ptp->tx_avail);
+ while (num_requests--) {
+ if (IS_ERR(ptp->txts_req[cons].tx_skb))
+ goto next_slot;
+ if (!ptp->txts_req[cons].tx_skb)
+ break;
+ rc = bnxt_stamp_tx_skb(bp, cons);
+ if (rc == -EAGAIN)
+ break;
+next_slot:
+ BNXT_PTP_INC_TX_AVAIL(ptp);
+ cons = NEXT_TXTS(cons);
+ }
+ ptp->txts_cons = cons;
- if (ptp->tx_skb)
- bnxt_stamp_tx_skb(bp, ptp->tx_skb);
-
- if (!time_after_eq(now, ptp->next_period))
+ if (!time_after_eq(now, ptp->next_period)) {
+ if (rc == -EAGAIN)
+ return 0;
return ptp->next_period - now;
+ }
bnxt_ptp_get_current_time(bp);
ptp->next_period = now + HZ;
@@ -734,22 +762,37 @@ static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info)
spin_unlock_bh(&ptp->ptp_lock);
ptp->next_overflow_check = now + BNXT_PHC_OVERFLOW_PERIOD;
}
- if (ptp->txts_pending)
+ if (rc == -EAGAIN)
return 0;
return HZ;
}
-int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb)
+int bnxt_ptp_get_txts_prod(struct bnxt_ptp_cfg *ptp, u16 *prod)
+{
+ spin_lock_bh(&ptp->ptp_tx_lock);
+ if (ptp->tx_avail) {
+ *prod = ptp->txts_prod;
+ ptp->txts_prod = NEXT_TXTS(*prod);
+ ptp->tx_avail--;
+ spin_unlock_bh(&ptp->ptp_tx_lock);
+ return 0;
+ }
+ spin_unlock_bh(&ptp->ptp_tx_lock);
+ atomic64_inc(&ptp->stats.ts_err);
+ return -ENOSPC;
+}
+
+void bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb, u16 prod)
{
struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+ struct bnxt_ptp_tx_req *txts_req;
- if (ptp->tx_skb) {
- netdev_err(bp->dev, "deferring skb:one SKB is still outstanding\n");
- return -EBUSY;
- }
- ptp->tx_skb = skb;
+ txts_req = &ptp->txts_req[prod];
+ txts_req->abs_txts_tmo = jiffies + msecs_to_jiffies(ptp->txts_tmo);
+ /* make sure abs_txts_tmo is written first */
+ smp_wmb();
+ txts_req->tx_skb = skb;
ptp_schedule_worker(ptp->ptp_clock, 0);
- return 0;
}
int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts)
@@ -768,6 +811,38 @@ int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts)
return 0;
}
+void bnxt_tx_ts_cmp(struct bnxt *bp, struct bnxt_napi *bnapi,
+ struct tx_ts_cmp *tscmp)
+{
+ struct skb_shared_hwtstamps timestamp = {};
+ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+ u32 opaque = tscmp->tx_ts_cmp_opaque;
+ struct bnxt_tx_ring_info *txr;
+ struct bnxt_sw_tx_bd *tx_buf;
+ u64 ts, ns;
+ u16 cons;
+
+ txr = bnapi->tx_ring[TX_OPAQUE_RING(opaque)];
+ ts = BNXT_GET_TX_TS_48B_NS(tscmp);
+ cons = TX_OPAQUE_IDX(opaque);
+ tx_buf = &txr->tx_buf_ring[RING_TX(bp, cons)];
+ if (tx_buf->is_ts_pkt) {
+ if (BNXT_TX_TS_ERR(tscmp)) {
+ netdev_err(bp->dev,
+ "timestamp completion error 0x%x 0x%x\n",
+ le32_to_cpu(tscmp->tx_ts_cmp_flags_type),
+ le32_to_cpu(tscmp->tx_ts_cmp_errors_v));
+ } else {
+ spin_lock_bh(&ptp->ptp_lock);
+ ns = timecounter_cyc2time(&ptp->tc, ts);
+ spin_unlock_bh(&ptp->ptp_lock);
+ timestamp.hwtstamp = ns_to_ktime(ns);
+ skb_tstamp_tx(tx_buf->skb, &timestamp);
+ }
+ tx_buf->is_ts_pkt = 0;
+ }
+}
+
static const struct ptp_clock_info bnxt_ptp_caps = {
.owner = THIS_MODULE,
.name = "bnxt clock",
@@ -914,7 +989,7 @@ int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg)
return rc;
} else {
rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_CURRENT_TIME,
- &ns, 0);
+ &ns, 0, 0);
if (rc)
return rc;
}
@@ -954,8 +1029,9 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg)
bnxt_ptp_free(bp);
- atomic_set(&ptp->tx_avail, BNXT_MAX_TX_TS);
+ WRITE_ONCE(ptp->tx_avail, BNXT_MAX_TX_TS);
spin_lock_init(&ptp->ptp_lock);
+ spin_lock_init(&ptp->ptp_tx_lock);
if (BNXT_PTP_USE_RTC(bp)) {
bnxt_ptp_timecounter_init(bp, false);
@@ -986,7 +1062,7 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg)
ptp->stats.ts_lost = 0;
atomic64_set(&ptp->stats.ts_err, 0);
- if (BNXT_CHIP_P5(bp)) {
+ if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
spin_lock_bh(&ptp->ptp_lock);
bnxt_refclk_read(bp, NULL, &ptp->current_time);
WRITE_ONCE(ptp->old_time, ptp->current_time);
@@ -1005,6 +1081,7 @@ out:
void bnxt_ptp_clear(struct bnxt *bp)
{
struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+ int i;
if (!ptp)
return;
@@ -1016,9 +1093,11 @@ void bnxt_ptp_clear(struct bnxt *bp)
kfree(ptp->ptp_info.pin_config);
ptp->ptp_info.pin_config = NULL;
- if (ptp->tx_skb) {
- dev_kfree_skb_any(ptp->tx_skb);
- ptp->tx_skb = NULL;
+ for (i = 0; i < BNXT_MAX_TX_TS; i++) {
+ if (ptp->txts_req[i].tx_skb) {
+ dev_kfree_skb_any(ptp->txts_req[i].tx_skb);
+ ptp->txts_req[i].tx_skb = NULL;
+ }
}
bnxt_unmap_ptp_regs(bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
index 8c30b428a428..a9a2f9a18c9c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
@@ -85,6 +85,16 @@ struct bnxt_ptp_stats {
atomic64_t ts_err;
};
+#define BNXT_MAX_TX_TS 4
+#define NEXT_TXTS(idx) (((idx) + 1) & (BNXT_MAX_TX_TS - 1))
+
+struct bnxt_ptp_tx_req {
+ struct sk_buff *tx_skb;
+ u16 tx_seqid;
+ u16 tx_hdr_off;
+ unsigned long abs_txts_tmo;
+};
+
struct bnxt_ptp_cfg {
struct ptp_clock_info ptp_info;
struct ptp_clock *ptp_clock;
@@ -93,7 +103,8 @@ struct bnxt_ptp_cfg {
struct bnxt_pps pps_info;
/* serialize timecounter access */
spinlock_t ptp_lock;
- struct sk_buff *tx_skb;
+ /* serialize ts tx request queuing */
+ spinlock_t ptp_tx_lock;
u64 current_time;
u64 old_time;
unsigned long next_period;
@@ -102,11 +113,10 @@ struct bnxt_ptp_cfg {
/* a 23b shift cyclecounter will overflow in ~36 mins. Check overflow every 18 mins. */
#define BNXT_PHC_OVERFLOW_PERIOD (18 * 60 * HZ)
- u16 tx_seqid;
- u16 tx_hdr_off;
+ struct bnxt_ptp_tx_req txts_req[BNXT_MAX_TX_TS];
+
struct bnxt *bp;
- atomic_t tx_avail;
-#define BNXT_MAX_TX_TS 1
+ u32 tx_avail;
u16 rxctl;
#define BNXT_PTP_MSG_SYNC (1 << 0)
#define BNXT_PTP_MSG_DELAY_REQ (1 << 1)
@@ -123,14 +133,14 @@ struct bnxt_ptp_cfg {
BNXT_PTP_MSG_PDELAY_REQ | \
BNXT_PTP_MSG_PDELAY_RESP)
u8 tx_tstamp_en:1;
- u8 txts_pending:1;
int rx_filter;
u32 tstamp_filters;
u32 refclk_regs[2];
u32 refclk_mapped_regs[2];
u32 txts_tmo;
- unsigned long abs_txts_tmo;
+ u16 txts_prod;
+ u16 txts_cons;
struct bnxt_ptp_stats stats;
};
@@ -147,6 +157,13 @@ do { \
((dst) = READ_ONCE(src))
#endif
+#define BNXT_PTP_INC_TX_AVAIL(ptp) \
+do { \
+ spin_lock_bh(&(ptp)->ptp_tx_lock); \
+ (ptp)->tx_avail++; \
+ spin_unlock_bh(&(ptp)->ptp_tx_lock); \
+} while (0)
+
int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off);
void bnxt_ptp_update_current_time(struct bnxt *bp);
void bnxt_ptp_pps_event(struct bnxt *bp, u32 data1, u32 data2);
@@ -154,8 +171,11 @@ int bnxt_ptp_cfg_tstamp_filters(struct bnxt *bp);
void bnxt_ptp_reapply_pps(struct bnxt *bp);
int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr);
int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr);
-int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb);
+int bnxt_ptp_get_txts_prod(struct bnxt_ptp_cfg *ptp, u16 *prod);
+void bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb, u16 prod);
int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts);
+void bnxt_tx_ts_cmp(struct bnxt *bp, struct bnxt_napi *bnapi,
+ struct tx_ts_cmp *tscmp);
void bnxt_ptp_rtc_timecounter_init(struct bnxt_ptp_cfg *ptp, u64 ns);
int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg);
int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg);
diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
index 241906697019..a42f3f280f3e 100644
--- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c
+++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
@@ -608,6 +608,28 @@ static int enic_get_ts_info(struct net_device *netdev,
return 0;
}
+static void enic_get_channels(struct net_device *netdev,
+ struct ethtool_channels *channels)
+{
+ struct enic *enic = netdev_priv(netdev);
+
+ switch (vnic_dev_get_intr_mode(enic->vdev)) {
+ case VNIC_DEV_INTR_MODE_MSIX:
+ channels->max_rx = ENIC_RQ_MAX;
+ channels->max_tx = ENIC_WQ_MAX;
+ channels->rx_count = enic->rq_count;
+ channels->tx_count = enic->wq_count;
+ break;
+ case VNIC_DEV_INTR_MODE_MSI:
+ case VNIC_DEV_INTR_MODE_INTX:
+ channels->max_combined = 1;
+ channels->combined_count = 1;
+ break;
+ default:
+ break;
+ }
+}
+
static const struct ethtool_ops enic_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
ETHTOOL_COALESCE_USE_ADAPTIVE_RX |
@@ -632,6 +654,7 @@ static const struct ethtool_ops enic_ethtool_ops = {
.set_rxfh = enic_set_rxfh,
.get_link_ksettings = enic_get_ksettings,
.get_ts_info = enic_get_ts_info,
+ .get_channels = enic_get_channels,
};
void enic_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 2e98a2a0bead..ce227b56cf72 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1109,6 +1109,46 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link)
}
/**
+ * e1000e_force_smbus - Force interfaces to transition to SMBUS mode.
+ * @hw: pointer to the HW structure
+ *
+ * Force the MAC and the PHY to SMBUS mode. Assumes semaphore already
+ * acquired.
+ *
+ * Return: 0 on success, negative errno on failure.
+ **/
+static s32 e1000e_force_smbus(struct e1000_hw *hw)
+{
+ u16 smb_ctrl = 0;
+ u32 ctrl_ext;
+ s32 ret_val;
+
+ /* Switching PHY interface always returns MDI error
+ * so disable retry mechanism to avoid wasting time
+ */
+ e1000e_disable_phy_retry(hw);
+
+ /* Force SMBus mode in the PHY */
+ ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &smb_ctrl);
+ if (ret_val) {
+ e1000e_enable_phy_retry(hw);
+ return ret_val;
+ }
+
+ smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS;
+ e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, smb_ctrl);
+
+ e1000e_enable_phy_retry(hw);
+
+ /* Force SMBus mode in the MAC */
+ ctrl_ext = er32(CTRL_EXT);
+ ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS;
+ ew32(CTRL_EXT, ctrl_ext);
+
+ return 0;
+}
+
+/**
* e1000_enable_ulp_lpt_lp - configure Ultra Low Power mode for LynxPoint-LP
* @hw: pointer to the HW structure
* @to_sx: boolean indicating a system power state transition to Sx
@@ -1165,6 +1205,14 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
if (ret_val)
goto out;
+ if (hw->mac.type != e1000_pch_mtp) {
+ ret_val = e1000e_force_smbus(hw);
+ if (ret_val) {
+ e_dbg("Failed to force SMBUS: %d\n", ret_val);
+ goto release;
+ }
+ }
+
/* Si workaround for ULP entry flow on i127/rev6 h/w. Enable
* LPLU and disable Gig speed when entering ULP
*/
@@ -1225,27 +1273,12 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
}
release:
- /* Switching PHY interface always returns MDI error
- * so disable retry mechanism to avoid wasting time
- */
- e1000e_disable_phy_retry(hw);
-
- /* Force SMBus mode in PHY */
- ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
- if (ret_val) {
- e1000e_enable_phy_retry(hw);
- hw->phy.ops.release(hw);
- goto out;
+ if (hw->mac.type == e1000_pch_mtp) {
+ ret_val = e1000e_force_smbus(hw);
+ if (ret_val)
+ e_dbg("Failed to force SMBUS over MTL system: %d\n",
+ ret_val);
}
- phy_reg |= CV_SMB_CTRL_FORCE_SMBUS;
- e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
-
- e1000e_enable_phy_retry(hw);
-
- /* Force SMBus mode in MAC */
- mac_reg = er32(CTRL_EXT);
- mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
- ew32(CTRL_EXT, mac_reg);
hw->phy.ops.release(hw);
out:
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index da5c59daf8ba..3cd161c6672b 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6363,49 +6363,49 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter)
mac_data |= E1000_EXTCNF_CTRL_GATE_PHY_CFG;
ew32(EXTCNF_CTRL, mac_data);
- /* Enable the Dynamic Power Gating in the MAC */
- mac_data = er32(FEXTNVM7);
- mac_data |= BIT(22);
- ew32(FEXTNVM7, mac_data);
-
/* Disable disconnected cable conditioning for Power Gating */
mac_data = er32(DPGFR);
mac_data |= BIT(2);
ew32(DPGFR, mac_data);
- /* Don't wake from dynamic Power Gating with clock request */
- mac_data = er32(FEXTNVM12);
- mac_data |= BIT(12);
- ew32(FEXTNVM12, mac_data);
-
- /* Ungate PGCB clock */
- mac_data = er32(FEXTNVM9);
- mac_data &= ~BIT(28);
- ew32(FEXTNVM9, mac_data);
-
- /* Enable K1 off to enable mPHY Power Gating */
- mac_data = er32(FEXTNVM6);
- mac_data |= BIT(31);
- ew32(FEXTNVM6, mac_data);
-
- /* Enable mPHY power gating for any link and speed */
- mac_data = er32(FEXTNVM8);
- mac_data |= BIT(9);
- ew32(FEXTNVM8, mac_data);
-
/* Enable the Dynamic Clock Gating in the DMA and MAC */
mac_data = er32(CTRL_EXT);
mac_data |= E1000_CTRL_EXT_DMA_DYN_CLK_EN;
ew32(CTRL_EXT, mac_data);
-
- /* No MAC DPG gating SLP_S0 in modern standby
- * Switch the logic of the lanphypc to use PMC counter
- */
- mac_data = er32(FEXTNVM5);
- mac_data |= BIT(7);
- ew32(FEXTNVM5, mac_data);
}
+ /* Enable the Dynamic Power Gating in the MAC */
+ mac_data = er32(FEXTNVM7);
+ mac_data |= BIT(22);
+ ew32(FEXTNVM7, mac_data);
+
+ /* Don't wake from dynamic Power Gating with clock request */
+ mac_data = er32(FEXTNVM12);
+ mac_data |= BIT(12);
+ ew32(FEXTNVM12, mac_data);
+
+ /* Ungate PGCB clock */
+ mac_data = er32(FEXTNVM9);
+ mac_data &= ~BIT(28);
+ ew32(FEXTNVM9, mac_data);
+
+ /* Enable K1 off to enable mPHY Power Gating */
+ mac_data = er32(FEXTNVM6);
+ mac_data |= BIT(31);
+ ew32(FEXTNVM6, mac_data);
+
+ /* Enable mPHY power gating for any link and speed */
+ mac_data = er32(FEXTNVM8);
+ mac_data |= BIT(9);
+ ew32(FEXTNVM8, mac_data);
+
+ /* No MAC DPG gating SLP_S0 in modern standby
+ * Switch the logic of the lanphypc to use PMC counter
+ */
+ mac_data = er32(FEXTNVM5);
+ mac_data |= BIT(7);
+ ew32(FEXTNVM5, mac_data);
+
/* Disable the time synchronization clock */
mac_data = er32(FEXTNVM7);
mac_data |= BIT(31);
@@ -6498,33 +6498,6 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter)
} else {
/* Request driver unconfigure the device from S0ix */
- /* Disable the Dynamic Power Gating in the MAC */
- mac_data = er32(FEXTNVM7);
- mac_data &= 0xFFBFFFFF;
- ew32(FEXTNVM7, mac_data);
-
- /* Disable mPHY power gating for any link and speed */
- mac_data = er32(FEXTNVM8);
- mac_data &= ~BIT(9);
- ew32(FEXTNVM8, mac_data);
-
- /* Disable K1 off */
- mac_data = er32(FEXTNVM6);
- mac_data &= ~BIT(31);
- ew32(FEXTNVM6, mac_data);
-
- /* Disable Ungate PGCB clock */
- mac_data = er32(FEXTNVM9);
- mac_data |= BIT(28);
- ew32(FEXTNVM9, mac_data);
-
- /* Cancel not waking from dynamic
- * Power Gating with clock request
- */
- mac_data = er32(FEXTNVM12);
- mac_data &= ~BIT(12);
- ew32(FEXTNVM12, mac_data);
-
/* Cancel disable disconnected cable conditioning
* for Power Gating
*/
@@ -6537,13 +6510,6 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter)
mac_data &= 0xFFF7FFFF;
ew32(CTRL_EXT, mac_data);
- /* Revert the lanphypc logic to use the internal Gbe counter
- * and not the PMC counter
- */
- mac_data = er32(FEXTNVM5);
- mac_data &= 0xFFFFFF7F;
- ew32(FEXTNVM5, mac_data);
-
/* Enable the periodic inband message,
* Request PCIe clock in K1 page770_17[10:9] =01b
*/
@@ -6581,6 +6547,40 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter)
mac_data &= ~BIT(31);
mac_data |= BIT(0);
ew32(FEXTNVM7, mac_data);
+
+ /* Disable the Dynamic Power Gating in the MAC */
+ mac_data = er32(FEXTNVM7);
+ mac_data &= 0xFFBFFFFF;
+ ew32(FEXTNVM7, mac_data);
+
+ /* Disable mPHY power gating for any link and speed */
+ mac_data = er32(FEXTNVM8);
+ mac_data &= ~BIT(9);
+ ew32(FEXTNVM8, mac_data);
+
+ /* Disable K1 off */
+ mac_data = er32(FEXTNVM6);
+ mac_data &= ~BIT(31);
+ ew32(FEXTNVM6, mac_data);
+
+ /* Disable Ungate PGCB clock */
+ mac_data = er32(FEXTNVM9);
+ mac_data |= BIT(28);
+ ew32(FEXTNVM9, mac_data);
+
+ /* Cancel not waking from dynamic
+ * Power Gating with clock request
+ */
+ mac_data = er32(FEXTNVM12);
+ mac_data &= ~BIT(12);
+ ew32(FEXTNVM12, mac_data);
+
+ /* Revert the lanphypc logic to use the internal Gbe counter
+ * and not the PMC counter
+ */
+ mac_data = er32(FEXTNVM5);
+ mac_data &= 0xFFFFFF7F;
+ ew32(FEXTNVM5, mac_data);
}
static int e1000e_pm_freeze(struct device *dev)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 284c3fad5a6e..310513d9321b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -13293,6 +13293,10 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
bool need_reset;
int i;
+ /* VSI shall be deleted in a moment, block loading new programs */
+ if (prog && test_bit(__I40E_IN_REMOVE, pf->state))
+ return -EINVAL;
+
/* Don't allow frames that span over multiple buffers */
if (vsi->netdev->mtu > frame_size - I40E_PACKET_HDR_PAD) {
NL_SET_ERR_MSG_MOD(extack, "MTU too large for linear frames and XDP prog does not support frags");
@@ -13301,14 +13305,9 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
/* When turning XDP on->off/off->on we reset and rebuild the rings. */
need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog);
-
if (need_reset)
i40e_prep_for_reset(pf);
- /* VSI shall be deleted in a moment, just return EINVAL */
- if (test_bit(__I40E_IN_REMOVE, pf->state))
- return -EINVAL;
-
old_prog = xchg(&vsi->xdp_prog, prog);
if (need_reset) {
diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c
index c9fbeebf7fb9..00fed5a61d62 100644
--- a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c
+++ b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c
@@ -373,6 +373,62 @@ void ice_devlink_destroy_pf_port(struct ice_pf *pf)
}
/**
+ * ice_devlink_port_get_vf_fn_mac - .port_fn_hw_addr_get devlink handler
+ * @port: devlink port structure
+ * @hw_addr: MAC address of the port
+ * @hw_addr_len: length of MAC address
+ * @extack: extended netdev ack structure
+ *
+ * Callback for the devlink .port_fn_hw_addr_get operation
+ * Return: zero on success or an error code on failure.
+ */
+static int ice_devlink_port_get_vf_fn_mac(struct devlink_port *port,
+ u8 *hw_addr, int *hw_addr_len,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_vf *vf = container_of(port, struct ice_vf, devlink_port);
+
+ ether_addr_copy(hw_addr, vf->dev_lan_addr);
+ *hw_addr_len = ETH_ALEN;
+
+ return 0;
+}
+
+/**
+ * ice_devlink_port_set_vf_fn_mac - .port_fn_hw_addr_set devlink handler
+ * @port: devlink port structure
+ * @hw_addr: MAC address of the port
+ * @hw_addr_len: length of MAC address
+ * @extack: extended netdev ack structure
+ *
+ * Callback for the devlink .port_fn_hw_addr_set operation
+ * Return: zero on success or an error code on failure.
+ */
+static int ice_devlink_port_set_vf_fn_mac(struct devlink_port *port,
+ const u8 *hw_addr,
+ int hw_addr_len,
+ struct netlink_ext_ack *extack)
+
+{
+ struct devlink_port_attrs *attrs = &port->attrs;
+ struct devlink_port_pci_vf_attrs *pci_vf;
+ struct devlink *devlink = port->devlink;
+ struct ice_pf *pf;
+ u16 vf_id;
+
+ pf = devlink_priv(devlink);
+ pci_vf = &attrs->pci_vf;
+ vf_id = pci_vf->vf;
+
+ return __ice_set_vf_mac(pf, vf_id, hw_addr);
+}
+
+static const struct devlink_port_ops ice_devlink_vf_port_ops = {
+ .port_fn_hw_addr_get = ice_devlink_port_get_vf_fn_mac,
+ .port_fn_hw_addr_set = ice_devlink_port_set_vf_fn_mac,
+};
+
+/**
* ice_devlink_create_vf_port - Create a devlink port for this VF
* @vf: the VF to create a port for
*
@@ -407,7 +463,8 @@ int ice_devlink_create_vf_port(struct ice_vf *vf)
devlink_port_attrs_set(devlink_port, &attrs);
devlink = priv_to_devlink(pf);
- err = devl_port_register(devlink, devlink_port, vsi->idx);
+ err = devl_port_register_with_ops(devlink, devlink_port, vsi->idx,
+ &ice_devlink_vf_port_ops);
if (err) {
dev_err(dev, "Failed to create devlink port for VF %d, error %d\n",
vf->vf_id, err);
diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c
index 52d15ef7f4b1..ad84d8ad49a6 100644
--- a/drivers/net/ethernet/intel/ice/ice_adapter.c
+++ b/drivers/net/ethernet/intel/ice/ice_adapter.c
@@ -11,6 +11,7 @@
#include "ice_adapter.h"
static DEFINE_XARRAY(ice_adapters);
+static DEFINE_MUTEX(ice_adapters_mutex);
/* PCI bus number is 8 bits. Slot is 5 bits. Domain can have the rest. */
#define INDEX_FIELD_DOMAIN GENMASK(BITS_PER_LONG - 1, 13)
@@ -47,8 +48,6 @@ static void ice_adapter_free(struct ice_adapter *adapter)
kfree(adapter);
}
-DEFINE_FREE(ice_adapter_free, struct ice_adapter*, if (_T) ice_adapter_free(_T))
-
/**
* ice_adapter_get - Get a shared ice_adapter structure.
* @pdev: Pointer to the pci_dev whose driver is getting the ice_adapter.
@@ -64,27 +63,26 @@ DEFINE_FREE(ice_adapter_free, struct ice_adapter*, if (_T) ice_adapter_free(_T))
*/
struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev)
{
- struct ice_adapter *ret, __free(ice_adapter_free) *adapter = NULL;
unsigned long index = ice_adapter_index(pdev);
-
- adapter = ice_adapter_new();
- if (!adapter)
- return ERR_PTR(-ENOMEM);
-
- xa_lock(&ice_adapters);
- ret = __xa_cmpxchg(&ice_adapters, index, NULL, adapter, GFP_KERNEL);
- if (xa_is_err(ret)) {
- ret = ERR_PTR(xa_err(ret));
- goto unlock;
- }
- if (ret) {
- refcount_inc(&ret->refcount);
- goto unlock;
+ struct ice_adapter *adapter;
+ int err;
+
+ scoped_guard(mutex, &ice_adapters_mutex) {
+ err = xa_insert(&ice_adapters, index, NULL, GFP_KERNEL);
+ if (err == -EBUSY) {
+ adapter = xa_load(&ice_adapters, index);
+ refcount_inc(&adapter->refcount);
+ return adapter;
+ }
+ if (err)
+ return ERR_PTR(err);
+
+ adapter = ice_adapter_new();
+ if (!adapter)
+ return ERR_PTR(-ENOMEM);
+ xa_store(&ice_adapters, index, adapter, GFP_KERNEL);
}
- ret = no_free_ptr(adapter);
-unlock:
- xa_unlock(&ice_adapters);
- return ret;
+ return adapter;
}
/**
@@ -94,23 +92,21 @@ unlock:
* Releases the reference to ice_adapter previously obtained with
* ice_adapter_get.
*
- * Context: Any.
+ * Context: Process, may sleep.
*/
void ice_adapter_put(const struct pci_dev *pdev)
{
unsigned long index = ice_adapter_index(pdev);
struct ice_adapter *adapter;
- xa_lock(&ice_adapters);
- adapter = xa_load(&ice_adapters, index);
- if (WARN_ON(!adapter))
- goto unlock;
+ scoped_guard(mutex, &ice_adapters_mutex) {
+ adapter = xa_load(&ice_adapters, index);
+ if (WARN_ON(!adapter))
+ return;
+ if (!refcount_dec_and_test(&adapter->refcount))
+ return;
- if (!refcount_dec_and_test(&adapter->refcount))
- goto unlock;
-
- WARN_ON(__xa_erase(&ice_adapters, index) != adapter);
+ WARN_ON(xa_erase(&ice_adapters, index) != adapter);
+ }
ice_adapter_free(adapter);
-unlock:
- xa_unlock(&ice_adapters);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 621a2ca7093e..b70d4ca43443 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -1461,6 +1461,55 @@ struct ice_aqc_get_sensor_reading_resp {
} data;
};
+/* DNL call command (indirect 0x0682)
+ * Struct is used for both command and response
+ */
+struct ice_aqc_dnl_call_command {
+ u8 ctx; /* Used in command, reserved in response */
+ u8 reserved;
+ __le16 activity_id;
+#define ICE_AQC_ACT_ID_DNL 0x1129
+ __le32 reserved1;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+struct ice_aqc_dnl_equa_param {
+ __le16 data_in;
+#define ICE_AQC_RX_EQU_SHIFT 8
+#define ICE_AQC_RX_EQU_PRE2 (0x10 << ICE_AQC_RX_EQU_SHIFT)
+#define ICE_AQC_RX_EQU_PRE1 (0x11 << ICE_AQC_RX_EQU_SHIFT)
+#define ICE_AQC_RX_EQU_POST1 (0x12 << ICE_AQC_RX_EQU_SHIFT)
+#define ICE_AQC_RX_EQU_BFLF (0x13 << ICE_AQC_RX_EQU_SHIFT)
+#define ICE_AQC_RX_EQU_BFHF (0x14 << ICE_AQC_RX_EQU_SHIFT)
+#define ICE_AQC_RX_EQU_DRATE (0x15 << ICE_AQC_RX_EQU_SHIFT)
+#define ICE_AQC_TX_EQU_PRE1 0x0
+#define ICE_AQC_TX_EQU_PRE3 0x3
+#define ICE_AQC_TX_EQU_ATTEN 0x4
+#define ICE_AQC_TX_EQU_POST1 0x8
+#define ICE_AQC_TX_EQU_PRE2 0xC
+ __le16 op_code_serdes_sel;
+#define ICE_AQC_OP_CODE_SHIFT 4
+#define ICE_AQC_OP_CODE_RX_EQU (0x9 << ICE_AQC_OP_CODE_SHIFT)
+#define ICE_AQC_OP_CODE_TX_EQU (0x10 << ICE_AQC_OP_CODE_SHIFT)
+ __le32 reserved[3];
+};
+
+struct ice_aqc_dnl_equa_respon {
+ /* Equalization value can be negative */
+ int val;
+ __le32 reserved[3];
+};
+
+/* DNL call command/response buffer (indirect 0x0682) */
+struct ice_aqc_dnl_call {
+ union {
+ struct ice_aqc_dnl_equa_param txrx_equa_reqs;
+ __le32 stores[4];
+ struct ice_aqc_dnl_equa_respon txrx_equa_resp;
+ } sto;
+};
+
struct ice_aqc_link_topo_params {
u8 lport_num;
u8 lport_num_valid;
@@ -2564,6 +2613,7 @@ struct ice_aq_desc {
struct ice_aqc_get_link_status get_link_status;
struct ice_aqc_event_lan_overflow lan_overflow;
struct ice_aqc_get_link_topo get_link_topo;
+ struct ice_aqc_dnl_call_command dnl_call;
struct ice_aqc_i2c read_write_i2c;
struct ice_aqc_read_i2c_resp read_i2c_resp;
struct ice_aqc_get_set_tx_topo get_set_tx_topo;
@@ -2688,6 +2738,7 @@ enum ice_adminq_opc {
ice_aqc_opc_set_phy_rec_clk_out = 0x0630,
ice_aqc_opc_get_phy_rec_clk_out = 0x0631,
ice_aqc_opc_get_sensor_reading = 0x0632,
+ ice_aqc_opc_dnl_call = 0x0682,
ice_aqc_opc_get_link_topo = 0x06E0,
ice_aqc_opc_read_i2c = 0x06E2,
ice_aqc_opc_write_i2c = 0x06E3,
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 9ae61cd8923e..e311a41a74fa 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -1497,8 +1497,9 @@ ice_sbq_send_cmd(struct ice_hw *hw, struct ice_sbq_cmd_desc *desc,
* ice_sbq_rw_reg - Fill Sideband Queue command
* @hw: pointer to the HW struct
* @in: message info to be filled in descriptor
+ * @flags: control queue descriptor flags
*/
-int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in)
+int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in, u16 flags)
{
struct ice_sbq_cmd_desc desc = {0};
struct ice_sbq_msg_req msg = {0};
@@ -1522,7 +1523,7 @@ int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in)
*/
msg_len -= sizeof(msg.data);
- desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD);
+ desc.flags = cpu_to_le16(flags);
desc.opcode = cpu_to_le16(ice_sbq_opc_neigh_dev_req);
desc.param0.cmd_len = cpu_to_le16(msg_len);
status = ice_sbq_send_cmd(hw, &desc, &msg, msg_len, NULL);
@@ -3371,6 +3372,100 @@ int ice_update_link_info(struct ice_port_info *pi)
}
/**
+ * ice_aq_get_phy_equalization - function to read serdes equaliser
+ * value from firmware using admin queue command.
+ * @hw: pointer to the HW struct
+ * @data_in: represents the serdes equalization parameter requested
+ * @op_code: represents the serdes number and flag to represent tx or rx
+ * @serdes_num: represents the serdes number
+ * @output: pointer to the caller-supplied buffer to return serdes equaliser
+ *
+ * Return: non-zero status on error and 0 on success.
+ */
+int ice_aq_get_phy_equalization(struct ice_hw *hw, u16 data_in, u16 op_code,
+ u8 serdes_num, int *output)
+{
+ struct ice_aqc_dnl_call_command *cmd;
+ struct ice_aqc_dnl_call buf = {};
+ struct ice_aq_desc desc;
+ int err;
+
+ buf.sto.txrx_equa_reqs.data_in = cpu_to_le16(data_in);
+ buf.sto.txrx_equa_reqs.op_code_serdes_sel =
+ cpu_to_le16(op_code | (serdes_num & 0xF));
+ cmd = &desc.params.dnl_call;
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_dnl_call);
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_BUF |
+ ICE_AQ_FLAG_RD |
+ ICE_AQ_FLAG_SI);
+ desc.datalen = cpu_to_le16(sizeof(struct ice_aqc_dnl_call));
+ cmd->activity_id = cpu_to_le16(ICE_AQC_ACT_ID_DNL);
+
+ err = ice_aq_send_cmd(hw, &desc, &buf, sizeof(struct ice_aqc_dnl_call),
+ NULL);
+ *output = err ? 0 : buf.sto.txrx_equa_resp.val;
+
+ return err;
+}
+
+#define FEC_REG_PORT(port) { \
+ FEC_CORR_LOW_REG_PORT##port, \
+ FEC_CORR_HIGH_REG_PORT##port, \
+ FEC_UNCORR_LOW_REG_PORT##port, \
+ FEC_UNCORR_HIGH_REG_PORT##port, \
+}
+
+static const u32 fec_reg[][ICE_FEC_MAX] = {
+ FEC_REG_PORT(0),
+ FEC_REG_PORT(1),
+ FEC_REG_PORT(2),
+ FEC_REG_PORT(3)
+};
+
+/**
+ * ice_aq_get_fec_stats - reads fec stats from phy
+ * @hw: pointer to the HW struct
+ * @pcs_quad: represents pcsquad of user input serdes
+ * @pcs_port: represents the pcs port number part of above pcs quad
+ * @fec_type: represents FEC stats type
+ * @output: pointer to the caller-supplied buffer to return requested fec stats
+ *
+ * Return: non-zero status on error and 0 on success.
+ */
+int ice_aq_get_fec_stats(struct ice_hw *hw, u16 pcs_quad, u16 pcs_port,
+ enum ice_fec_stats_types fec_type, u32 *output)
+{
+ u16 flag = (ICE_AQ_FLAG_RD | ICE_AQ_FLAG_BUF | ICE_AQ_FLAG_SI);
+ struct ice_sbq_msg_input msg = {};
+ u32 receiver_id, reg_offset;
+ int err;
+
+ if (pcs_port > 3)
+ return -EINVAL;
+
+ reg_offset = fec_reg[pcs_port][fec_type];
+
+ if (pcs_quad == 0)
+ receiver_id = FEC_RECEIVER_ID_PCS0;
+ else if (pcs_quad == 1)
+ receiver_id = FEC_RECEIVER_ID_PCS1;
+ else
+ return -EINVAL;
+
+ msg.msg_addr_low = lower_16_bits(reg_offset);
+ msg.msg_addr_high = receiver_id;
+ msg.opcode = ice_sbq_msg_rd;
+ msg.dest_dev = rmn_0;
+
+ err = ice_sbq_rw_reg(hw, &msg, flag);
+ if (err)
+ return err;
+
+ *output = msg.data;
+ return 0;
+}
+
+/**
* ice_cache_phy_user_req
* @pi: port information structure
* @cache_data: PHY logging data
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 86cc1df469dd..66f29bac783a 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -17,13 +17,34 @@
#define ICE_SQ_SEND_DELAY_TIME_MS 10
#define ICE_SQ_SEND_MAX_EXECUTE 3
+#define FEC_REG_SHIFT 2
+#define FEC_RECV_ID_SHIFT 4
+#define FEC_CORR_LOW_REG_PORT0 (0x02 << FEC_REG_SHIFT)
+#define FEC_CORR_HIGH_REG_PORT0 (0x03 << FEC_REG_SHIFT)
+#define FEC_UNCORR_LOW_REG_PORT0 (0x04 << FEC_REG_SHIFT)
+#define FEC_UNCORR_HIGH_REG_PORT0 (0x05 << FEC_REG_SHIFT)
+#define FEC_CORR_LOW_REG_PORT1 (0x42 << FEC_REG_SHIFT)
+#define FEC_CORR_HIGH_REG_PORT1 (0x43 << FEC_REG_SHIFT)
+#define FEC_UNCORR_LOW_REG_PORT1 (0x44 << FEC_REG_SHIFT)
+#define FEC_UNCORR_HIGH_REG_PORT1 (0x45 << FEC_REG_SHIFT)
+#define FEC_CORR_LOW_REG_PORT2 (0x4A << FEC_REG_SHIFT)
+#define FEC_CORR_HIGH_REG_PORT2 (0x4B << FEC_REG_SHIFT)
+#define FEC_UNCORR_LOW_REG_PORT2 (0x4C << FEC_REG_SHIFT)
+#define FEC_UNCORR_HIGH_REG_PORT2 (0x4D << FEC_REG_SHIFT)
+#define FEC_CORR_LOW_REG_PORT3 (0x52 << FEC_REG_SHIFT)
+#define FEC_CORR_HIGH_REG_PORT3 (0x53 << FEC_REG_SHIFT)
+#define FEC_UNCORR_LOW_REG_PORT3 (0x54 << FEC_REG_SHIFT)
+#define FEC_UNCORR_HIGH_REG_PORT3 (0x55 << FEC_REG_SHIFT)
+#define FEC_RECEIVER_ID_PCS0 (0x33 << FEC_RECV_ID_SHIFT)
+#define FEC_RECEIVER_ID_PCS1 (0x34 << FEC_RECV_ID_SHIFT)
+
int ice_init_hw(struct ice_hw *hw);
void ice_deinit_hw(struct ice_hw *hw);
int ice_check_reset(struct ice_hw *hw);
int ice_reset(struct ice_hw *hw, enum ice_reset_req req);
int ice_create_all_ctrlq(struct ice_hw *hw);
int ice_init_all_ctrlq(struct ice_hw *hw);
-void ice_shutdown_all_ctrlq(struct ice_hw *hw);
+void ice_shutdown_all_ctrlq(struct ice_hw *hw, bool unloading);
void ice_destroy_all_ctrlq(struct ice_hw *hw);
int
ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
@@ -121,6 +142,11 @@ int
ice_get_link_default_override(struct ice_link_default_override_tlv *ldo,
struct ice_port_info *pi);
bool ice_is_phy_caps_an_enabled(struct ice_aqc_get_phy_caps_data *caps);
+int ice_aq_get_phy_equalization(struct ice_hw *hw, u16 data_in, u16 op_code,
+ u8 serdes_num, int *output);
+int
+ice_aq_get_fec_stats(struct ice_hw *hw, u16 pcs_quad, u16 pcs_port,
+ enum ice_fec_stats_types fec_type, u32 *output);
enum ice_fc_mode ice_caps_to_fc_mode(u8 caps);
enum ice_fec_mode ice_caps_to_fec_mode(u8 caps, u8 fec_options);
@@ -201,7 +227,7 @@ int ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle);
void ice_replay_post(struct ice_hw *hw);
struct ice_q_ctx *
ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle);
-int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in);
+int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in, u16 flag);
int
ice_aq_get_cgu_abilities(struct ice_hw *hw,
struct ice_aqc_get_cgu_abilities *abilities);
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index ffe660f34992..ffaa6511c455 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -510,16 +510,19 @@ shutdown_sq_out:
*/
static bool ice_aq_ver_check(struct ice_hw *hw)
{
- if (hw->api_maj_ver > EXP_FW_API_VER_MAJOR) {
+ u8 exp_fw_api_ver_major = EXP_FW_API_VER_MAJOR_BY_MAC(hw);
+ u8 exp_fw_api_ver_minor = EXP_FW_API_VER_MINOR_BY_MAC(hw);
+
+ if (hw->api_maj_ver > exp_fw_api_ver_major) {
/* Major API version is newer than expected, don't load */
dev_warn(ice_hw_to_dev(hw),
"The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n");
return false;
- } else if (hw->api_maj_ver == EXP_FW_API_VER_MAJOR) {
- if (hw->api_min_ver > (EXP_FW_API_VER_MINOR + 2))
+ } else if (hw->api_maj_ver == exp_fw_api_ver_major) {
+ if (hw->api_min_ver > (exp_fw_api_ver_minor + 2))
dev_info(ice_hw_to_dev(hw),
"The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n");
- else if ((hw->api_min_ver + 2) < EXP_FW_API_VER_MINOR)
+ else if ((hw->api_min_ver + 2) < exp_fw_api_ver_minor)
dev_info(ice_hw_to_dev(hw),
"The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n");
} else {
@@ -684,10 +687,12 @@ struct ice_ctl_q_info *ice_get_sbq(struct ice_hw *hw)
* ice_shutdown_ctrlq - shutdown routine for any control queue
* @hw: pointer to the hardware structure
* @q_type: specific Control queue type
+ * @unloading: is the driver unloading itself
*
* NOTE: this function does not destroy the control queue locks.
*/
-static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
+static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type,
+ bool unloading)
{
struct ice_ctl_q_info *cq;
@@ -695,7 +700,7 @@ static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
case ICE_CTL_Q_ADMIN:
cq = &hw->adminq;
if (ice_check_sq_alive(hw, cq))
- ice_aq_q_shutdown(hw, true);
+ ice_aq_q_shutdown(hw, unloading);
break;
case ICE_CTL_Q_SB:
cq = &hw->sbq;
@@ -714,20 +719,21 @@ static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
/**
* ice_shutdown_all_ctrlq - shutdown routine for all control queues
* @hw: pointer to the hardware structure
+ * @unloading: is the driver unloading itself
*
* NOTE: this function does not destroy the control queue locks. The driver
* may call this at runtime to shutdown and later restart control queues, such
* as in response to a reset event.
*/
-void ice_shutdown_all_ctrlq(struct ice_hw *hw)
+void ice_shutdown_all_ctrlq(struct ice_hw *hw, bool unloading)
{
/* Shutdown FW admin queue */
- ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
+ ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN, unloading);
/* Shutdown PHY Sideband */
if (ice_is_sbq_supported(hw))
- ice_shutdown_ctrlq(hw, ICE_CTL_Q_SB);
+ ice_shutdown_ctrlq(hw, ICE_CTL_Q_SB, unloading);
/* Shutdown PF-VF Mailbox */
- ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX);
+ ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX, unloading);
}
/**
@@ -759,7 +765,7 @@ int ice_init_all_ctrlq(struct ice_hw *hw)
break;
ice_debug(hw, ICE_DBG_AQ_MSG, "Retry Admin Queue init due to FW critical error\n");
- ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
+ ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN, true);
msleep(ICE_CTL_Q_ADMIN_INIT_MSEC);
} while (retry++ < ICE_CTL_Q_ADMIN_INIT_TIMEOUT);
@@ -840,7 +846,7 @@ static void ice_destroy_ctrlq_locks(struct ice_ctl_q_info *cq)
void ice_destroy_all_ctrlq(struct ice_hw *hw)
{
/* shut down all the control queues first */
- ice_shutdown_all_ctrlq(hw);
+ ice_shutdown_all_ctrlq(hw, true);
ice_destroy_ctrlq_locks(&hw->adminq);
if (ice_is_sbq_supported(hw))
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
index 8f2fd1613a95..1d54b1cdb1c5 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.h
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -21,9 +21,18 @@
/* Defines that help manage the driver vs FW API checks.
* Take a look at ice_aq_ver_check in ice_controlq.c for actual usage.
*/
-#define EXP_FW_API_VER_BRANCH 0x00
-#define EXP_FW_API_VER_MAJOR 0x01
-#define EXP_FW_API_VER_MINOR 0x05
+#define EXP_FW_API_VER_MAJOR_E810 0x01
+#define EXP_FW_API_VER_MINOR_E810 0x05
+
+#define EXP_FW_API_VER_MAJOR_E830 0x01
+#define EXP_FW_API_VER_MINOR_E830 0x07
+
+#define EXP_FW_API_VER_MAJOR_BY_MAC(hw) ((hw)->mac_type == ICE_MAC_E830 ? \
+ EXP_FW_API_VER_MAJOR_E830 : \
+ EXP_FW_API_VER_MAJOR_E810)
+#define EXP_FW_API_VER_MINOR_BY_MAC(hw) ((hw)->mac_type == ICE_MAC_E830 ? \
+ EXP_FW_API_VER_MINOR_E830 : \
+ EXP_FW_API_VER_MINOR_E810)
/* Different control queue types: These are mainly for SW consumption. */
enum ice_ctl_q {
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 62c8205fceba..2b4f6f07737c 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -463,7 +463,354 @@ ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
static int ice_get_regs_len(struct net_device __always_unused *netdev)
{
- return sizeof(ice_regs_dump_list);
+ return (sizeof(ice_regs_dump_list) +
+ sizeof(struct ice_regdump_to_ethtool));
+}
+
+/**
+ * ice_ethtool_get_maxspeed - Get the max speed for given lport
+ * @hw: pointer to the HW struct
+ * @lport: logical port for which max speed is requested
+ * @max_speed: return max speed for input lport
+ *
+ * Return: 0 on success, negative on failure.
+ */
+static int ice_ethtool_get_maxspeed(struct ice_hw *hw, u8 lport, u8 *max_speed)
+{
+ struct ice_aqc_get_port_options_elem options[ICE_AQC_PORT_OPT_MAX] = {};
+ bool active_valid = false, pending_valid = true;
+ u8 option_count = ICE_AQC_PORT_OPT_MAX;
+ u8 active_idx = 0, pending_idx = 0;
+ int status;
+
+ status = ice_aq_get_port_options(hw, options, &option_count, lport,
+ true, &active_idx, &active_valid,
+ &pending_idx, &pending_valid);
+ if (status)
+ return -EIO;
+ if (!active_valid)
+ return -EINVAL;
+
+ *max_speed = options[active_idx].max_lane_speed & ICE_AQC_PORT_OPT_MAX_LANE_M;
+ return 0;
+}
+
+/**
+ * ice_is_serdes_muxed - returns whether serdes is muxed in hardware
+ * @hw: pointer to the HW struct
+ *
+ * Return: true when serdes is muxed, false when serdes is not muxed.
+ */
+static bool ice_is_serdes_muxed(struct ice_hw *hw)
+{
+ u32 reg_value = rd32(hw, GLGEN_SWITCH_MODE_CONFIG);
+
+ return FIELD_GET(GLGEN_SWITCH_MODE_CONFIG_25X4_QUAD_M, reg_value);
+}
+
+static int ice_map_port_topology_for_sfp(struct ice_port_topology *port_topology,
+ u8 lport, bool is_muxed)
+{
+ switch (lport) {
+ case 0:
+ port_topology->pcs_quad_select = 0;
+ port_topology->pcs_port = 0;
+ port_topology->primary_serdes_lane = 0;
+ break;
+ case 1:
+ port_topology->pcs_quad_select = 1;
+ port_topology->pcs_port = 0;
+ if (is_muxed)
+ port_topology->primary_serdes_lane = 2;
+ else
+ port_topology->primary_serdes_lane = 4;
+ break;
+ case 2:
+ port_topology->pcs_quad_select = 0;
+ port_topology->pcs_port = 1;
+ port_topology->primary_serdes_lane = 1;
+ break;
+ case 3:
+ port_topology->pcs_quad_select = 1;
+ port_topology->pcs_port = 1;
+ if (is_muxed)
+ port_topology->primary_serdes_lane = 3;
+ else
+ port_topology->primary_serdes_lane = 5;
+ break;
+ case 4:
+ port_topology->pcs_quad_select = 0;
+ port_topology->pcs_port = 2;
+ port_topology->primary_serdes_lane = 2;
+ break;
+ case 5:
+ port_topology->pcs_quad_select = 1;
+ port_topology->pcs_port = 2;
+ port_topology->primary_serdes_lane = 6;
+ break;
+ case 6:
+ port_topology->pcs_quad_select = 0;
+ port_topology->pcs_port = 3;
+ port_topology->primary_serdes_lane = 3;
+ break;
+ case 7:
+ port_topology->pcs_quad_select = 1;
+ port_topology->pcs_port = 3;
+ port_topology->primary_serdes_lane = 7;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ice_map_port_topology_for_qsfp(struct ice_port_topology *port_topology,
+ u8 lport, bool is_muxed)
+{
+ switch (lport) {
+ case 0:
+ port_topology->pcs_quad_select = 0;
+ port_topology->pcs_port = 0;
+ port_topology->primary_serdes_lane = 0;
+ break;
+ case 1:
+ port_topology->pcs_quad_select = 1;
+ port_topology->pcs_port = 0;
+ if (is_muxed)
+ port_topology->primary_serdes_lane = 2;
+ else
+ port_topology->primary_serdes_lane = 4;
+ break;
+ case 2:
+ port_topology->pcs_quad_select = 0;
+ port_topology->pcs_port = 1;
+ port_topology->primary_serdes_lane = 1;
+ break;
+ case 3:
+ port_topology->pcs_quad_select = 1;
+ port_topology->pcs_port = 1;
+ if (is_muxed)
+ port_topology->primary_serdes_lane = 3;
+ else
+ port_topology->primary_serdes_lane = 5;
+ break;
+ case 4:
+ port_topology->pcs_quad_select = 0;
+ port_topology->pcs_port = 2;
+ port_topology->primary_serdes_lane = 2;
+ break;
+ case 5:
+ port_topology->pcs_quad_select = 1;
+ port_topology->pcs_port = 2;
+ port_topology->primary_serdes_lane = 6;
+ break;
+ case 6:
+ port_topology->pcs_quad_select = 0;
+ port_topology->pcs_port = 3;
+ port_topology->primary_serdes_lane = 3;
+ break;
+ case 7:
+ port_topology->pcs_quad_select = 1;
+ port_topology->pcs_port = 3;
+ port_topology->primary_serdes_lane = 7;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_get_port_topology - returns physical topology like pcsquad, pcsport,
+ * serdes number
+ * @hw: pointer to the HW struct
+ * @lport: logical port for which physical info requested
+ * @port_topology: buffer to hold port topology
+ *
+ * Return: 0 on success, negative on failure.
+ */
+static int ice_get_port_topology(struct ice_hw *hw, u8 lport,
+ struct ice_port_topology *port_topology)
+{
+ struct ice_aqc_get_link_topo cmd = {};
+ u16 node_handle = 0;
+ u8 cage_type = 0;
+ bool is_muxed;
+ int err;
+ u8 ctx;
+
+ ctx = ICE_AQC_LINK_TOPO_NODE_TYPE_CAGE << ICE_AQC_LINK_TOPO_NODE_TYPE_S;
+ ctx |= ICE_AQC_LINK_TOPO_NODE_CTX_PORT << ICE_AQC_LINK_TOPO_NODE_CTX_S;
+ cmd.addr.topo_params.node_type_ctx = ctx;
+
+ err = ice_aq_get_netlist_node(hw, &cmd, &cage_type, &node_handle);
+ if (err)
+ return -EINVAL;
+
+ is_muxed = ice_is_serdes_muxed(hw);
+
+ if (cage_type == 0x11 || /* SFP+ */
+ cage_type == 0x12) { /* SFP28 */
+ port_topology->serdes_lane_count = 1;
+ err = ice_map_port_topology_for_sfp(port_topology, lport, is_muxed);
+ if (err)
+ return err;
+ } else if (cage_type == 0x13 || /* QSFP */
+ cage_type == 0x14) { /* QSFP28 */
+ u8 max_speed = 0;
+
+ err = ice_ethtool_get_maxspeed(hw, lport, &max_speed);
+ if (err)
+ return err;
+
+ if (max_speed == ICE_AQC_PORT_OPT_MAX_LANE_100G)
+ port_topology->serdes_lane_count = 4;
+ else if (max_speed == ICE_AQC_PORT_OPT_MAX_LANE_50G)
+ port_topology->serdes_lane_count = 2;
+ else
+ port_topology->serdes_lane_count = 1;
+
+ err = ice_map_port_topology_for_qsfp(port_topology, lport, is_muxed);
+ if (err)
+ return err;
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_get_tx_rx_equa - read serdes tx rx equaliser param
+ * @hw: pointer to the HW struct
+ * @serdes_num: represents the serdes number
+ * @ptr: structure to read all serdes parameter for given serdes
+ *
+ * Return: all serdes equalization parameter supported per serdes number
+ */
+static int ice_get_tx_rx_equa(struct ice_hw *hw, u8 serdes_num,
+ struct ice_serdes_equalization_to_ethtool *ptr)
+{
+ int err;
+
+ err = ice_aq_get_phy_equalization(hw, ICE_AQC_TX_EQU_PRE1,
+ ICE_AQC_OP_CODE_TX_EQU, serdes_num,
+ &ptr->tx_equalization_pre1);
+ if (err)
+ return err;
+
+ err = ice_aq_get_phy_equalization(hw, ICE_AQC_TX_EQU_PRE3,
+ ICE_AQC_OP_CODE_TX_EQU, serdes_num,
+ &ptr->tx_equalization_pre3);
+ if (err)
+ return err;
+
+ err = ice_aq_get_phy_equalization(hw, ICE_AQC_TX_EQU_ATTEN,
+ ICE_AQC_OP_CODE_TX_EQU, serdes_num,
+ &ptr->tx_equalization_atten);
+ if (err)
+ return err;
+
+ err = ice_aq_get_phy_equalization(hw, ICE_AQC_TX_EQU_POST1,
+ ICE_AQC_OP_CODE_TX_EQU, serdes_num,
+ &ptr->tx_equalization_post1);
+ if (err)
+ return err;
+
+ err = ice_aq_get_phy_equalization(hw, ICE_AQC_TX_EQU_PRE2,
+ ICE_AQC_OP_CODE_TX_EQU, serdes_num,
+ &ptr->tx_equalization_pre2);
+ if (err)
+ return err;
+
+ err = ice_aq_get_phy_equalization(hw, ICE_AQC_RX_EQU_PRE2,
+ ICE_AQC_OP_CODE_RX_EQU, serdes_num,
+ &ptr->rx_equalization_pre2);
+ if (err)
+ return err;
+
+ err = ice_aq_get_phy_equalization(hw, ICE_AQC_RX_EQU_PRE1,
+ ICE_AQC_OP_CODE_RX_EQU, serdes_num,
+ &ptr->rx_equalization_pre1);
+ if (err)
+ return err;
+
+ err = ice_aq_get_phy_equalization(hw, ICE_AQC_RX_EQU_POST1,
+ ICE_AQC_OP_CODE_RX_EQU, serdes_num,
+ &ptr->rx_equalization_post1);
+ if (err)
+ return err;
+
+ err = ice_aq_get_phy_equalization(hw, ICE_AQC_RX_EQU_BFLF,
+ ICE_AQC_OP_CODE_RX_EQU, serdes_num,
+ &ptr->rx_equalization_bflf);
+ if (err)
+ return err;
+
+ err = ice_aq_get_phy_equalization(hw, ICE_AQC_RX_EQU_BFHF,
+ ICE_AQC_OP_CODE_RX_EQU, serdes_num,
+ &ptr->rx_equalization_bfhf);
+ if (err)
+ return err;
+
+ err = ice_aq_get_phy_equalization(hw, ICE_AQC_RX_EQU_DRATE,
+ ICE_AQC_OP_CODE_RX_EQU, serdes_num,
+ &ptr->rx_equalization_drate);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+/**
+ * ice_get_extended_regs - returns FEC correctable, uncorrectable stats per
+ * pcsquad, pcsport
+ * @netdev: pointer to net device structure
+ * @p: output buffer to fill requested register dump
+ *
+ * Return: 0 on success, negative on failure.
+ */
+static int ice_get_extended_regs(struct net_device *netdev, void *p)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_regdump_to_ethtool *ice_prv_regs_buf;
+ struct ice_port_topology port_topology = {};
+ struct ice_port_info *pi;
+ struct ice_pf *pf;
+ struct ice_hw *hw;
+ unsigned int i;
+ int err;
+
+ pf = np->vsi->back;
+ hw = &pf->hw;
+ pi = np->vsi->port_info;
+
+ /* Serdes parameters are not supported if not the PF VSI */
+ if (np->vsi->type != ICE_VSI_PF || !pi)
+ return -EINVAL;
+
+ err = ice_get_port_topology(hw, pi->lport, &port_topology);
+ if (err)
+ return -EINVAL;
+ if (port_topology.serdes_lane_count > 4)
+ return -EINVAL;
+
+ ice_prv_regs_buf = p;
+
+ /* Get serdes equalization parameter for available serdes */
+ for (i = 0; i < port_topology.serdes_lane_count; i++) {
+ u8 serdes_num = 0;
+
+ serdes_num = port_topology.primary_serdes_lane + i;
+ err = ice_get_tx_rx_equa(hw, serdes_num,
+ &ice_prv_regs_buf->equalization[i]);
+ if (err)
+ return -EINVAL;
+ }
+
+ return 0;
}
static void
@@ -475,10 +822,12 @@ ice_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p)
u32 *regs_buf = (u32 *)p;
unsigned int i;
- regs->version = 1;
+ regs->version = 2;
for (i = 0; i < ARRAY_SIZE(ice_regs_dump_list); ++i)
regs_buf[i] = rd32(hw, ice_regs_dump_list[i]);
+
+ ice_get_extended_regs(netdev, (void *)&regs_buf[i]);
}
static u32 ice_get_msglevel(struct net_device *netdev)
@@ -4282,6 +4631,94 @@ ice_get_module_eeprom(struct net_device *netdev,
return 0;
}
+/**
+ * ice_get_port_fec_stats - returns FEC correctable, uncorrectable stats per
+ * pcsquad, pcsport
+ * @hw: pointer to the HW struct
+ * @pcs_quad: pcsquad for input port
+ * @pcs_port: pcsport for input port
+ * @fec_stats: buffer to hold FEC statistics for given port
+ *
+ * Return: 0 on success, negative on failure.
+ */
+static int ice_get_port_fec_stats(struct ice_hw *hw, u16 pcs_quad, u16 pcs_port,
+ struct ethtool_fec_stats *fec_stats)
+{
+ u32 fec_uncorr_low_val = 0, fec_uncorr_high_val = 0;
+ u32 fec_corr_low_val = 0, fec_corr_high_val = 0;
+ int err;
+
+ if (pcs_quad > 1 || pcs_port > 3)
+ return -EINVAL;
+
+ err = ice_aq_get_fec_stats(hw, pcs_quad, pcs_port, ICE_FEC_CORR_LOW,
+ &fec_corr_low_val);
+ if (err)
+ return err;
+
+ err = ice_aq_get_fec_stats(hw, pcs_quad, pcs_port, ICE_FEC_CORR_HIGH,
+ &fec_corr_high_val);
+ if (err)
+ return err;
+
+ err = ice_aq_get_fec_stats(hw, pcs_quad, pcs_port,
+ ICE_FEC_UNCORR_LOW,
+ &fec_uncorr_low_val);
+ if (err)
+ return err;
+
+ err = ice_aq_get_fec_stats(hw, pcs_quad, pcs_port,
+ ICE_FEC_UNCORR_HIGH,
+ &fec_uncorr_high_val);
+ if (err)
+ return err;
+
+ fec_stats->uncorrectable_blocks.total = (fec_corr_high_val << 16) +
+ fec_corr_low_val;
+ fec_stats->corrected_blocks.total = (fec_uncorr_high_val << 16) +
+ fec_uncorr_low_val;
+ return 0;
+}
+
+/**
+ * ice_get_fec_stats - returns FEC correctable, uncorrectable stats per netdev
+ * @netdev: network interface device structure
+ * @fec_stats: buffer to hold FEC statistics for given port
+ *
+ */
+static void ice_get_fec_stats(struct net_device *netdev,
+ struct ethtool_fec_stats *fec_stats)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_port_topology port_topology;
+ struct ice_port_info *pi;
+ struct ice_pf *pf;
+ struct ice_hw *hw;
+ int err;
+
+ pf = np->vsi->back;
+ hw = &pf->hw;
+ pi = np->vsi->port_info;
+
+ /* Serdes parameters are not supported if not the PF VSI */
+ if (np->vsi->type != ICE_VSI_PF || !pi)
+ return;
+
+ err = ice_get_port_topology(hw, pi->lport, &port_topology);
+ if (err) {
+ netdev_info(netdev, "Extended register dump failed Lport %d\n",
+ pi->lport);
+ return;
+ }
+
+ /* Get FEC correctable, uncorrectable counter */
+ err = ice_get_port_fec_stats(hw, port_topology.pcs_quad_select,
+ port_topology.pcs_port, fec_stats);
+ if (err)
+ netdev_info(netdev, "FEC stats get failed Lport %d Err %d\n",
+ pi->lport, err);
+}
+
static const struct ethtool_ops ice_ethtool_ops = {
.cap_rss_ctx_supported = true,
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
@@ -4290,6 +4727,7 @@ static const struct ethtool_ops ice_ethtool_ops = {
.cap_rss_sym_xor_supported = true,
.get_link_ksettings = ice_get_link_ksettings,
.set_link_ksettings = ice_set_link_ksettings,
+ .get_fec_stats = ice_get_fec_stats,
.get_drvinfo = ice_get_drvinfo,
.get_regs_len = ice_get_regs_len,
.get_regs = ice_get_regs,
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.h b/drivers/net/ethernet/intel/ice/ice_ethtool.h
index b88e3da06f13..9acccae38625 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.h
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.h
@@ -9,6 +9,35 @@ struct ice_phy_type_to_ethtool {
u8 link_mode;
};
+struct ice_serdes_equalization_to_ethtool {
+ int rx_equalization_pre2;
+ int rx_equalization_pre1;
+ int rx_equalization_post1;
+ int rx_equalization_bflf;
+ int rx_equalization_bfhf;
+ int rx_equalization_drate;
+ int tx_equalization_pre1;
+ int tx_equalization_pre3;
+ int tx_equalization_atten;
+ int tx_equalization_post1;
+ int tx_equalization_pre2;
+};
+
+struct ice_regdump_to_ethtool {
+ /* A multilane port can have max 4 serdes */
+ struct ice_serdes_equalization_to_ethtool equalization[4];
+};
+
+/* Port topology from lport i.e.
+ * serdes mapping, pcsquad, macport, cage etc...
+ */
+struct ice_port_topology {
+ u16 pcs_port;
+ u16 primary_serdes_lane;
+ u16 serdes_lane_count;
+ u16 pcs_quad_select;
+};
+
/* Macro to make PHY type to Ethtool link mode table entry.
* The index is the PHY type.
*/
diff --git a/drivers/net/ethernet/intel/ice/ice_hwmon.c b/drivers/net/ethernet/intel/ice/ice_hwmon.c
index e4c2c1bff6c0..b7aa6812510a 100644
--- a/drivers/net/ethernet/intel/ice/ice_hwmon.c
+++ b/drivers/net/ethernet/intel/ice/ice_hwmon.c
@@ -96,7 +96,7 @@ static bool ice_is_internal_reading_supported(struct ice_pf *pf)
unsigned long sensors = pf->hw.dev_caps.supported_sensors;
- return _test_bit(ICE_SENSOR_SUPPORT_E810_INT_TEMP_BIT, &sensors);
+ return test_bit(ICE_SENSOR_SUPPORT_E810_INT_TEMP_BIT, &sensors);
};
void ice_hwmon_init(struct ice_pf *pf)
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 080efb7473aa..14ec4ebcd9af 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -623,7 +623,7 @@ skip:
if (hw->port_info)
ice_sched_clear_port(hw->port_info);
- ice_shutdown_all_ctrlq(hw);
+ ice_shutdown_all_ctrlq(hw, false);
set_bit(ICE_PREPARED_FOR_RESET, pf->state);
}
@@ -4158,13 +4158,17 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked)
/* set for the next time the netdev is started */
if (!netif_running(vsi->netdev)) {
- ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT);
+ err = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT);
+ if (err)
+ goto rebuild_err;
dev_dbg(ice_pf_to_dev(pf), "Link is down, queue count change happens when link is brought up\n");
goto done;
}
ice_vsi_close(vsi);
- ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT);
+ err = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT);
+ if (err)
+ goto rebuild_err;
ice_for_each_traffic_class(i) {
if (vsi->tc_cfg.ena_tc & BIT(i))
@@ -4175,6 +4179,11 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked)
}
ice_pf_dcb_recfg(pf, locked);
ice_vsi_open(vsi);
+ goto done;
+
+rebuild_err:
+ dev_err(ice_pf_to_dev(pf), "Error during VSI rebuild: %d. Unload and reload the driver.\n",
+ err);
done:
clear_bit(ICE_CFG_BUSY, pf->state);
return err;
@@ -5490,7 +5499,7 @@ static void ice_prepare_for_shutdown(struct ice_pf *pf)
if (pf->vsi[v])
pf->vsi[v]->vsi_num = 0;
- ice_shutdown_all_ctrlq(hw);
+ ice_shutdown_all_ctrlq(hw, true);
}
/**
@@ -7750,7 +7759,7 @@ err_vsi_rebuild:
err_sched_init_port:
ice_sched_cleanup_all(hw);
err_init_ctrlq:
- ice_shutdown_all_ctrlq(hw);
+ ice_shutdown_all_ctrlq(hw, false);
set_bit(ICE_RESET_FAILED, pf->state);
clear_recovery:
/* set this bit in PF state to control service task scheduling */
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index b7ab6fdf710d..f6e26e1b5ab9 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -1618,6 +1618,10 @@ void ice_ptp_extts_event(struct ice_pf *pf)
u8 chan, tmr_idx;
u32 hi, lo;
+ /* Don't process timestamp events if PTP is not ready */
+ if (pf->ptp.state != ICE_PTP_READY)
+ return;
+
tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
/* Event time is captured by one of the two matched registers
* GLTSYN_EVNT_L: 32 LSB of sampled time event
@@ -1643,27 +1647,33 @@ void ice_ptp_extts_event(struct ice_pf *pf)
/**
* ice_ptp_cfg_extts - Configure EXTTS pin and channel
* @pf: Board private structure
- * @ena: true to enable; false to disable
* @chan: GPIO channel (0-3)
- * @gpio_pin: GPIO pin
- * @extts_flags: request flags from the ptp_extts_request.flags
+ * @config: desired EXTTS configuration.
+ * @store: If set to true, the values will be stored
+ *
+ * Configure an external timestamp event on the requested channel.
+ *
+ * Return: 0 on success, -EOPNOTUSPP on unsupported flags
*/
-static int
-ice_ptp_cfg_extts(struct ice_pf *pf, bool ena, unsigned int chan, u32 gpio_pin,
- unsigned int extts_flags)
+static int ice_ptp_cfg_extts(struct ice_pf *pf, unsigned int chan,
+ struct ice_extts_channel *config, bool store)
{
u32 func, aux_reg, gpio_reg, irq_reg;
struct ice_hw *hw = &pf->hw;
u8 tmr_idx;
- if (chan > (unsigned int)pf->ptp.info.n_ext_ts)
- return -EINVAL;
+ /* Reject requests with unsupported flags */
+ if (config->flags & ~(PTP_ENABLE_FEATURE |
+ PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS))
+ return -EOPNOTSUPP;
tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
irq_reg = rd32(hw, PFINT_OICR_ENA);
- if (ena) {
+ if (config->ena) {
/* Enable the interrupt */
irq_reg |= PFINT_OICR_TSYN_EVNT_M;
aux_reg = GLTSYN_AUX_IN_0_INT_ENA_M;
@@ -1672,9 +1682,9 @@ ice_ptp_cfg_extts(struct ice_pf *pf, bool ena, unsigned int chan, u32 gpio_pin,
#define GLTSYN_AUX_IN_0_EVNTLVL_FALLING_EDGE BIT(1)
/* set event level to requested edge */
- if (extts_flags & PTP_FALLING_EDGE)
+ if (config->flags & PTP_FALLING_EDGE)
aux_reg |= GLTSYN_AUX_IN_0_EVNTLVL_FALLING_EDGE;
- if (extts_flags & PTP_RISING_EDGE)
+ if (config->flags & PTP_RISING_EDGE)
aux_reg |= GLTSYN_AUX_IN_0_EVNTLVL_RISING_EDGE;
/* Write GPIO CTL reg.
@@ -1695,12 +1705,52 @@ ice_ptp_cfg_extts(struct ice_pf *pf, bool ena, unsigned int chan, u32 gpio_pin,
wr32(hw, PFINT_OICR_ENA, irq_reg);
wr32(hw, GLTSYN_AUX_IN(chan, tmr_idx), aux_reg);
- wr32(hw, GLGEN_GPIO_CTL(gpio_pin), gpio_reg);
+ wr32(hw, GLGEN_GPIO_CTL(config->gpio_pin), gpio_reg);
+
+ if (store)
+ memcpy(&pf->ptp.extts_channels[chan], config, sizeof(*config));
return 0;
}
/**
+ * ice_ptp_disable_all_extts - Disable all EXTTS channels
+ * @pf: Board private structure
+ */
+static void ice_ptp_disable_all_extts(struct ice_pf *pf)
+{
+ struct ice_extts_channel extts_cfg = {};
+ int i;
+
+ for (i = 0; i < pf->ptp.info.n_ext_ts; i++) {
+ if (pf->ptp.extts_channels[i].ena) {
+ extts_cfg.gpio_pin = pf->ptp.extts_channels[i].gpio_pin;
+ extts_cfg.ena = false;
+ ice_ptp_cfg_extts(pf, i, &extts_cfg, false);
+ }
+ }
+
+ synchronize_irq(pf->oicr_irq.virq);
+}
+
+/**
+ * ice_ptp_enable_all_extts - Enable all EXTTS channels
+ * @pf: Board private structure
+ *
+ * Called during reset to restore user configuration.
+ */
+static void ice_ptp_enable_all_extts(struct ice_pf *pf)
+{
+ int i;
+
+ for (i = 0; i < pf->ptp.info.n_ext_ts; i++) {
+ if (pf->ptp.extts_channels[i].ena)
+ ice_ptp_cfg_extts(pf, i, &pf->ptp.extts_channels[i],
+ false);
+ }
+}
+
+/**
* ice_ptp_cfg_clkout - Configure clock to generate periodic wave
* @pf: Board private structure
* @chan: GPIO channel (0-3)
@@ -1718,6 +1768,9 @@ static int ice_ptp_cfg_clkout(struct ice_pf *pf, unsigned int chan,
u32 func, val, gpio_pin;
u8 tmr_idx;
+ if (config && config->flags & ~PTP_PEROUT_PHASE)
+ return -EOPNOTSUPP;
+
tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
/* 0. Reset mode & out_en in AUX_OUT */
@@ -1853,17 +1906,18 @@ ice_ptp_gpio_enable_e810(struct ptp_clock_info *info,
struct ptp_clock_request *rq, int on)
{
struct ice_pf *pf = ptp_info_to_pf(info);
- struct ice_perout_channel clk_cfg = {0};
bool sma_pres = false;
unsigned int chan;
u32 gpio_pin;
- int err;
if (ice_is_feature_supported(pf, ICE_F_SMA_CTRL))
sma_pres = true;
switch (rq->type) {
case PTP_CLK_REQ_PEROUT:
+ {
+ struct ice_perout_channel clk_cfg = {};
+
chan = rq->perout.index;
if (sma_pres) {
if (chan == ice_pin_desc_e810t[SMA1].chan)
@@ -1883,15 +1937,19 @@ ice_ptp_gpio_enable_e810(struct ptp_clock_info *info,
clk_cfg.gpio_pin = chan;
}
+ clk_cfg.flags = rq->perout.flags;
clk_cfg.period = ((rq->perout.period.sec * NSEC_PER_SEC) +
rq->perout.period.nsec);
clk_cfg.start_time = ((rq->perout.start.sec * NSEC_PER_SEC) +
rq->perout.start.nsec);
clk_cfg.ena = !!on;
- err = ice_ptp_cfg_clkout(pf, chan, &clk_cfg, true);
- break;
+ return ice_ptp_cfg_clkout(pf, chan, &clk_cfg, true);
+ }
case PTP_CLK_REQ_EXTTS:
+ {
+ struct ice_extts_channel extts_cfg = {};
+
chan = rq->extts.index;
if (sma_pres) {
if (chan < ice_pin_desc_e810t[SMA2].chan)
@@ -1907,14 +1965,15 @@ ice_ptp_gpio_enable_e810(struct ptp_clock_info *info,
gpio_pin = chan;
}
- err = ice_ptp_cfg_extts(pf, !!on, chan, gpio_pin,
- rq->extts.flags);
- break;
+ extts_cfg.flags = rq->extts.flags;
+ extts_cfg.gpio_pin = gpio_pin;
+ extts_cfg.ena = !!on;
+
+ return ice_ptp_cfg_extts(pf, chan, &extts_cfg, true);
+ }
default:
return -EOPNOTSUPP;
}
-
- return err;
}
/**
@@ -1927,26 +1986,32 @@ static int ice_ptp_gpio_enable_e823(struct ptp_clock_info *info,
struct ptp_clock_request *rq, int on)
{
struct ice_pf *pf = ptp_info_to_pf(info);
- struct ice_perout_channel clk_cfg = {0};
- int err;
switch (rq->type) {
case PTP_CLK_REQ_PPS:
+ {
+ struct ice_perout_channel clk_cfg = {};
+
+ clk_cfg.flags = rq->perout.flags;
clk_cfg.gpio_pin = PPS_PIN_INDEX;
clk_cfg.period = NSEC_PER_SEC;
clk_cfg.ena = !!on;
- err = ice_ptp_cfg_clkout(pf, PPS_CLK_GEN_CHAN, &clk_cfg, true);
- break;
+ return ice_ptp_cfg_clkout(pf, PPS_CLK_GEN_CHAN, &clk_cfg, true);
+ }
case PTP_CLK_REQ_EXTTS:
- err = ice_ptp_cfg_extts(pf, !!on, rq->extts.index,
- TIME_SYNC_PIN_INDEX, rq->extts.flags);
- break;
+ {
+ struct ice_extts_channel extts_cfg = {};
+
+ extts_cfg.flags = rq->extts.flags;
+ extts_cfg.gpio_pin = TIME_SYNC_PIN_INDEX;
+ extts_cfg.ena = !!on;
+
+ return ice_ptp_cfg_extts(pf, rq->extts.index, &extts_cfg, true);
+ }
default:
return -EOPNOTSUPP;
}
-
- return err;
}
/**
@@ -2781,6 +2846,10 @@ static int ice_ptp_rebuild_owner(struct ice_pf *pf)
ice_ptp_restart_all_phy(pf);
}
+ /* Re-enable all periodic outputs and external timestamp events */
+ ice_ptp_enable_all_clkout(pf);
+ ice_ptp_enable_all_extts(pf);
+
return 0;
}
@@ -3340,6 +3409,8 @@ void ice_ptp_release(struct ice_pf *pf)
ice_ptp_release_tx_tracker(pf, &pf->ptp.port.tx);
+ ice_ptp_disable_all_extts(pf);
+
kthread_cancel_delayed_work_sync(&pf->ptp.work);
ice_ptp_port_phy_stop(&pf->ptp.port);
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h
index e0c23aaedc12..2db2257a0fb2 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
@@ -29,10 +29,17 @@ enum ice_ptp_pin_e810t {
struct ice_perout_channel {
bool ena;
u32 gpio_pin;
+ u32 flags;
u64 period;
u64 start_time;
};
+struct ice_extts_channel {
+ bool ena;
+ u32 gpio_pin;
+ u32 flags;
+};
+
/* The ice hardware captures Tx hardware timestamps in the PHY. The timestamp
* is stored in a buffer of registers. Depending on the specific hardware,
* this buffer might be shared across multiple PHY ports.
@@ -227,6 +234,7 @@ enum ice_ptp_state {
* @ext_ts_irq: the external timestamp IRQ in use
* @kworker: kwork thread for handling periodic work
* @perout_channels: periodic output data
+ * @extts_channels: channels for external timestamps
* @info: structure defining PTP hardware capabilities
* @clock: pointer to registered PTP clock device
* @tstamp_config: hardware timestamping configuration
@@ -250,6 +258,7 @@ struct ice_ptp {
u8 ext_ts_irq;
struct kthread_worker *kworker;
struct ice_perout_channel perout_channels[GLTSYN_TGT_H_IDX_MAX];
+ struct ice_extts_channel extts_channels[GLTSYN_TGT_H_IDX_MAX];
struct ptp_clock_info info;
struct ptp_clock *clock;
struct hwtstamp_config tstamp_config;
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
index 1e9a4ccd0ea2..3a33e6b9b313 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
@@ -247,7 +247,7 @@ static int ice_read_cgu_reg_e82x(struct ice_hw *hw, u32 addr, u32 *val)
};
int err;
- err = ice_sbq_rw_reg(hw, &cgu_msg);
+ err = ice_sbq_rw_reg(hw, &cgu_msg, ICE_AQ_FLAG_RD);
if (err) {
ice_debug(hw, ICE_DBG_PTP, "Failed to read CGU register 0x%04x, err %d\n",
addr, err);
@@ -280,7 +280,7 @@ static int ice_write_cgu_reg_e82x(struct ice_hw *hw, u32 addr, u32 val)
};
int err;
- err = ice_sbq_rw_reg(hw, &cgu_msg);
+ err = ice_sbq_rw_reg(hw, &cgu_msg, ICE_AQ_FLAG_RD);
if (err) {
ice_debug(hw, ICE_DBG_PTP, "Failed to write CGU register 0x%04x, err %d\n",
addr, err);
@@ -902,7 +902,7 @@ static int ice_write_phy_eth56g(struct ice_hw *hw, u8 phy_idx, u32 addr,
phy_msg.data = val;
phy_msg.dest_dev = hw->ptp.phy.eth56g.phy_addr[phy_idx];
- err = ice_sbq_rw_reg(hw, &phy_msg);
+ err = ice_sbq_rw_reg(hw, &phy_msg, ICE_AQ_FLAG_RD);
if (err)
ice_debug(hw, ICE_DBG_PTP, "PTP failed to send msg to phy %d\n",
@@ -934,7 +934,7 @@ static int ice_read_phy_eth56g(struct ice_hw *hw, u8 phy_idx, u32 addr,
phy_msg.data = 0;
phy_msg.dest_dev = hw->ptp.phy.eth56g.phy_addr[phy_idx];
- err = ice_sbq_rw_reg(hw, &phy_msg);
+ err = ice_sbq_rw_reg(hw, &phy_msg, ICE_AQ_FLAG_RD);
if (err) {
ice_debug(hw, ICE_DBG_PTP, "PTP failed to send msg to phy %d\n",
err);
@@ -2855,7 +2855,7 @@ ice_read_phy_reg_e82x(struct ice_hw *hw, u8 port, u16 offset, u32 *val)
ice_fill_phy_msg_e82x(hw, &msg, port, offset);
msg.opcode = ice_sbq_msg_rd;
- err = ice_sbq_rw_reg(hw, &msg);
+ err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD);
if (err) {
ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n",
err);
@@ -2933,7 +2933,7 @@ ice_write_phy_reg_e82x(struct ice_hw *hw, u8 port, u16 offset, u32 val)
msg.opcode = ice_sbq_msg_wr;
msg.data = val;
- err = ice_sbq_rw_reg(hw, &msg);
+ err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD);
if (err) {
ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n",
err);
@@ -3094,7 +3094,7 @@ ice_read_quad_reg_e82x(struct ice_hw *hw, u8 quad, u16 offset, u32 *val)
msg.opcode = ice_sbq_msg_rd;
- err = ice_sbq_rw_reg(hw, &msg);
+ err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD);
if (err) {
ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n",
err);
@@ -3129,7 +3129,7 @@ ice_write_quad_reg_e82x(struct ice_hw *hw, u8 quad, u16 offset, u32 val)
msg.opcode = ice_sbq_msg_wr;
msg.data = val;
- err = ice_sbq_rw_reg(hw, &msg);
+ err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD);
if (err) {
ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n",
err);
@@ -4780,7 +4780,7 @@ static int ice_read_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 *val)
msg.opcode = ice_sbq_msg_rd;
msg.dest_dev = rmn_0;
- err = ice_sbq_rw_reg(hw, &msg);
+ err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD);
if (err) {
ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n",
err);
@@ -4811,7 +4811,7 @@ static int ice_write_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 val)
msg.dest_dev = rmn_0;
msg.data = val;
- err = ice_sbq_rw_reg(hw, &msg);
+ err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD);
if (err) {
ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n",
err);
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
index 067712f4923f..55ef33208456 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.c
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -1416,21 +1416,23 @@ out_put_vf:
}
/**
- * ice_set_vf_mac
- * @netdev: network interface device structure
+ * __ice_set_vf_mac - program VF MAC address
+ * @pf: PF to be configure
* @vf_id: VF identifier
* @mac: MAC address
*
* program VF MAC address
+ * Return: zero on success or an error code on failure
*/
-int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
+int __ice_set_vf_mac(struct ice_pf *pf, u16 vf_id, const u8 *mac)
{
- struct ice_pf *pf = ice_netdev_to_pf(netdev);
+ struct device *dev;
struct ice_vf *vf;
int ret;
+ dev = ice_pf_to_dev(pf);
if (is_multicast_ether_addr(mac)) {
- netdev_err(netdev, "%pM not a valid unicast address\n", mac);
+ dev_err(dev, "%pM not a valid unicast address\n", mac);
return -EINVAL;
}
@@ -1459,13 +1461,13 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
if (is_zero_ether_addr(mac)) {
/* VF will send VIRTCHNL_OP_ADD_ETH_ADDR message with its MAC */
vf->pf_set_mac = false;
- netdev_info(netdev, "Removing MAC on VF %d. VF driver will be reinitialized\n",
- vf->vf_id);
+ dev_info(dev, "Removing MAC on VF %d. VF driver will be reinitialized\n",
+ vf->vf_id);
} else {
/* PF will add MAC rule for the VF */
vf->pf_set_mac = true;
- netdev_info(netdev, "Setting MAC %pM on VF %d. VF driver will be reinitialized\n",
- mac, vf_id);
+ dev_info(dev, "Setting MAC %pM on VF %d. VF driver will be reinitialized\n",
+ mac, vf_id);
}
ice_reset_vf(vf, ICE_VF_RESET_NOTIFY);
@@ -1477,6 +1479,20 @@ out_put_vf:
}
/**
+ * ice_set_vf_mac - .ndo_set_vf_mac handler
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @mac: MAC address
+ *
+ * program VF MAC address
+ * Return: zero on success or an error code on failure
+ */
+int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
+{
+ return __ice_set_vf_mac(ice_netdev_to_pf(netdev), vf_id, mac);
+}
+
+/**
* ice_set_vf_trust
* @netdev: network interface device structure
* @vf_id: VF identifier
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.h b/drivers/net/ethernet/intel/ice/ice_sriov.h
index 8f22313474d6..96549ca5c52c 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.h
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.h
@@ -28,6 +28,7 @@
#ifdef CONFIG_PCI_IOV
void ice_process_vflr_event(struct ice_pf *pf);
int ice_sriov_configure(struct pci_dev *pdev, int num_vfs);
+int __ice_set_vf_mac(struct ice_pf *pf, u16 vf_id, const u8 *mac);
int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac);
int
ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi);
@@ -81,6 +82,13 @@ ice_sriov_configure(struct pci_dev __always_unused *pdev,
}
static inline int
+__ice_set_vf_mac(struct ice_pf __always_unused *pf,
+ u16 __always_unused vf_id, const u8 __always_unused *mac)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
ice_set_vf_mac(struct net_device __always_unused *netdev,
int __always_unused vf_id, u8 __always_unused *mac)
{
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index aac59c85a911..e26ae79578ba 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -71,6 +71,14 @@ enum ice_aq_res_ids {
ICE_GLOBAL_CFG_LOCK_RES_ID
};
+enum ice_fec_stats_types {
+ ICE_FEC_CORR_LOW,
+ ICE_FEC_CORR_HIGH,
+ ICE_FEC_UNCORR_LOW,
+ ICE_FEC_UNCORR_HIGH,
+ ICE_FEC_MAX
+};
+
/* FW update timeout definitions are in milliseconds */
#define ICE_NVM_TIMEOUT 180000
#define ICE_CHANGE_LOCK_TIMEOUT 1000
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index 5352fee62d2b..0b9982804370 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -217,9 +217,9 @@ ltq_etop_free_channel(struct net_device *dev, struct ltq_etop_chan *ch)
if (ch->dma.irq)
free_irq(ch->dma.irq, priv);
if (IS_RX(ch->idx)) {
- int desc;
+ struct ltq_dma_channel *dma = &ch->dma;
- for (desc = 0; desc < LTQ_DESC_NUM; desc++)
+ for (dma->desc = 0; dma->desc < LTQ_DESC_NUM; dma->desc++)
dev_kfree_skb_any(ch->skb[ch->dma.desc]);
}
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 4a77f6fe2622..ebf84c240d6d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -139,6 +139,7 @@ M(MSIX_OFFSET, 0x005, msix_offset, msg_req, msix_offset_rsp) \
M(VF_FLR, 0x006, vf_flr, msg_req, msg_rsp) \
M(PTP_OP, 0x007, ptp_op, ptp_req, ptp_rsp) \
M(GET_HW_CAP, 0x008, get_hw_cap, msg_req, get_hw_cap_rsp) \
+M(NDC_SYNC_OP, 0x009, ndc_sync_op, ndc_sync_op, msg_rsp) \
M(LMTST_TBL_SETUP, 0x00a, lmtst_tbl_setup, lmtst_tbl_setup_req, \
msg_rsp) \
M(SET_VF_PERM, 0x00b, set_vf_perm, set_vf_perm, msg_rsp) \
@@ -1716,6 +1717,13 @@ struct lmtst_tbl_setup_req {
u64 rsvd[4];
};
+struct ndc_sync_op {
+ struct mbox_msghdr hdr;
+ u8 nix_lf_tx_sync;
+ u8 nix_lf_rx_sync;
+ u8 npa_lf_sync;
+};
+
/* CPT mailbox error codes
* Range 901 - 1000.
*/
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index ff78251f92d4..ac7ee3f3598c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -1643,7 +1643,7 @@ static int rvu_check_rsrc_availability(struct rvu *rvu,
if (req->ssow > block->lf.max) {
dev_err(&rvu->pdev->dev,
"Func 0x%x: Invalid SSOW req, %d > max %d\n",
- pcifunc, req->sso, block->lf.max);
+ pcifunc, req->ssow, block->lf.max);
return -EINVAL;
}
mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->addr);
@@ -2014,6 +2014,13 @@ int rvu_mbox_handler_vf_flr(struct rvu *rvu, struct msg_req *req,
return 0;
}
+int rvu_ndc_sync(struct rvu *rvu, int lfblkaddr, int lfidx, u64 lfoffset)
+{
+ /* Sync cached info for this LF in NDC to LLC/DRAM */
+ rvu_write64(rvu, lfblkaddr, lfoffset, BIT_ULL(12) | lfidx);
+ return rvu_poll_reg(rvu, lfblkaddr, lfoffset, BIT_ULL(12), true);
+}
+
int rvu_mbox_handler_get_hw_cap(struct rvu *rvu, struct msg_req *req,
struct get_hw_cap_rsp *rsp)
{
@@ -2068,6 +2075,65 @@ int rvu_mbox_handler_set_vf_perm(struct rvu *rvu, struct set_vf_perm *req,
return 0;
}
+int rvu_mbox_handler_ndc_sync_op(struct rvu *rvu,
+ struct ndc_sync_op *req,
+ struct msg_rsp *rsp)
+{
+ struct rvu_hwinfo *hw = rvu->hw;
+ u16 pcifunc = req->hdr.pcifunc;
+ int err, lfidx, lfblkaddr;
+
+ if (req->npa_lf_sync) {
+ /* Get NPA LF data */
+ lfblkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, pcifunc);
+ if (lfblkaddr < 0)
+ return NPA_AF_ERR_AF_LF_INVALID;
+
+ lfidx = rvu_get_lf(rvu, &hw->block[lfblkaddr], pcifunc, 0);
+ if (lfidx < 0)
+ return NPA_AF_ERR_AF_LF_INVALID;
+
+ /* Sync NPA NDC */
+ err = rvu_ndc_sync(rvu, lfblkaddr,
+ lfidx, NPA_AF_NDC_SYNC);
+ if (err)
+ dev_err(rvu->dev,
+ "NDC-NPA sync failed for LF %u\n", lfidx);
+ }
+
+ if (!req->nix_lf_tx_sync && !req->nix_lf_rx_sync)
+ return 0;
+
+ /* Get NIX LF data */
+ lfblkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+ if (lfblkaddr < 0)
+ return NIX_AF_ERR_AF_LF_INVALID;
+
+ lfidx = rvu_get_lf(rvu, &hw->block[lfblkaddr], pcifunc, 0);
+ if (lfidx < 0)
+ return NIX_AF_ERR_AF_LF_INVALID;
+
+ if (req->nix_lf_tx_sync) {
+ /* Sync NIX TX NDC */
+ err = rvu_ndc_sync(rvu, lfblkaddr,
+ lfidx, NIX_AF_NDC_TX_SYNC);
+ if (err)
+ dev_err(rvu->dev,
+ "NDC-NIX-TX sync fail for LF %u\n", lfidx);
+ }
+
+ if (req->nix_lf_rx_sync) {
+ /* Sync NIX RX NDC */
+ err = rvu_ndc_sync(rvu, lfblkaddr,
+ lfidx, NIX_AF_NDC_RX_SYNC);
+ if (err)
+ dev_err(rvu->dev,
+ "NDC-NIX-RX sync failed for LF %u\n", lfidx);
+ }
+
+ return 0;
+}
+
static int rvu_process_mbox_msg(struct otx2_mbox *mbox, int devid,
struct mbox_msghdr *req)
{
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 3063a84a45ef..03ee93fd9e94 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -800,6 +800,7 @@ int rvu_lf_reset(struct rvu *rvu, struct rvu_block *block, int lf);
int rvu_get_blkaddr(struct rvu *rvu, int blktype, u16 pcifunc);
int rvu_poll_reg(struct rvu *rvu, u64 block, u64 offset, u64 mask, bool zero);
int rvu_get_num_lbk_chans(void);
+int rvu_ndc_sync(struct rvu *rvu, int lfblkid, int lfidx, u64 lfoffset);
int rvu_get_blkaddr_from_slot(struct rvu *rvu, int blktype, u16 pcifunc,
u16 global_slot, u16 *slot_in_block);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 00af8888e329..4dbbaa719cbf 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -2497,9 +2497,7 @@ static int nix_txschq_free(struct rvu *rvu, u16 pcifunc)
}
mutex_unlock(&rvu->rsrc_lock);
- /* Sync cached info for this LF in NDC-TX to LLC/DRAM */
- rvu_write64(rvu, blkaddr, NIX_AF_NDC_TX_SYNC, BIT_ULL(12) | nixlf);
- err = rvu_poll_reg(rvu, blkaddr, NIX_AF_NDC_TX_SYNC, BIT_ULL(12), true);
+ err = rvu_ndc_sync(rvu, blkaddr, nixlf, NIX_AF_NDC_TX_SYNC);
if (err)
dev_err(rvu->dev, "NDC-TX sync failed for NIXLF %d\n", nixlf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
index 5ec92654e7ad..d56be5fb7eb4 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
@@ -121,6 +121,7 @@
#define NPA_AF_LF_RST (0x0020)
#define NPA_AF_GEN_CFG (0x0030)
#define NPA_AF_NDC_CFG (0x0040)
+#define NPA_AF_NDC_SYNC (0x0050)
#define NPA_AF_INP_CTL (0x00D0)
#define NPA_AF_ACTIVE_CYCLES_PC (0x00F0)
#define NPA_AF_AVG_DELAY (0x0100)
@@ -239,6 +240,7 @@
#define NIX_AF_RX_CPTX_INST_ADDR (0x0310)
#define NIX_AF_RX_CPTX_INST_QSEL(a) (0x0320ull | (uint64_t)(a) << 3)
#define NIX_AF_RX_CPTX_CREDIT(a) (0x0360ull | (uint64_t)(a) << 3)
+#define NIX_AF_NDC_RX_SYNC (0x03E0)
#define NIX_AF_NDC_TX_SYNC (0x03F0)
#define NIX_AF_AQ_CFG (0x0400)
#define NIX_AF_AQ_BASE (0x0410)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index ff05ea20409a..5492dea547a1 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -3245,6 +3245,29 @@ static int otx2_sriov_configure(struct pci_dev *pdev, int numvfs)
return otx2_sriov_enable(pdev, numvfs);
}
+static void otx2_ndc_sync(struct otx2_nic *pf)
+{
+ struct mbox *mbox = &pf->mbox;
+ struct ndc_sync_op *req;
+
+ mutex_lock(&mbox->lock);
+
+ req = otx2_mbox_alloc_msg_ndc_sync_op(mbox);
+ if (!req) {
+ mutex_unlock(&mbox->lock);
+ return;
+ }
+
+ req->nix_lf_tx_sync = 1;
+ req->nix_lf_rx_sync = 1;
+ req->npa_lf_sync = 1;
+
+ if (!otx2_sync_mbox_msg(mbox))
+ dev_err(pf->dev, "NDC sync operation failed\n");
+
+ mutex_unlock(&mbox->lock);
+}
+
static void otx2_remove(struct pci_dev *pdev)
{
struct net_device *netdev = pci_get_drvdata(pdev);
@@ -3293,6 +3316,7 @@ static void otx2_remove(struct pci_dev *pdev)
otx2_mcam_flow_del(pf);
otx2_shutdown_tc(pf);
otx2_shutdown_qos(pf);
+ otx2_ndc_sync(pf);
otx2_detach_resources(&pf->mbox);
if (pf->hw.lmt_info)
free_percpu(pf->hw.lmt_info);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 13d78d9b3197..0cc2dd85652f 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -3396,7 +3396,7 @@ static int mtk_open(struct net_device *dev)
for (i = 0; i < MTK_MAX_DEVS; i++) {
if (!eth->netdev[i])
- break;
+ continue;
target_mac = netdev_priv(eth->netdev[i]);
if (!soc->offload_version) {
@@ -4464,6 +4464,20 @@ static int mtk_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
return ret;
}
+static void mtk_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+
+ phylink_ethtool_get_pauseparam(mac->phylink, pause);
+}
+
+static int mtk_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+
+ return phylink_ethtool_set_pauseparam(mac->phylink, pause);
+}
+
static u16 mtk_select_queue(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev)
{
@@ -4492,8 +4506,10 @@ static const struct ethtool_ops mtk_ethtool_ops = {
.get_strings = mtk_get_strings,
.get_sset_count = mtk_get_sset_count,
.get_ethtool_stats = mtk_get_ethtool_stats,
+ .get_pauseparam = mtk_get_pauseparam,
+ .set_pauseparam = mtk_set_pauseparam,
.get_rxnfc = mtk_get_rxnfc,
- .set_rxnfc = mtk_set_rxnfc,
+ .set_rxnfc = mtk_set_rxnfc,
};
static const struct net_device_ops mtk_netdev_ops = {
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
index 691806bca372..223f709e2704 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe.h
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
@@ -8,7 +8,7 @@
#include <linux/bitfield.h>
#include <linux/rhashtable.h>
-#define MTK_PPE_ENTRIES_SHIFT 3
+#define MTK_PPE_ENTRIES_SHIFT 4
#define MTK_PPE_ENTRIES (1024 << MTK_PPE_ENTRIES_SHIFT)
#define MTK_PPE_HASH_MASK (MTK_PPE_ENTRIES - 1)
#define MTK_PPE_WAIT_TIMEOUT_US 1000000
diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
index 31aebeb2e285..25989c79c92e 100644
--- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
+++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
@@ -1524,6 +1524,7 @@ static int mtk_star_probe(struct platform_device *pdev)
{
struct device_node *of_node;
struct mtk_star_priv *priv;
+ struct phy_device *phydev;
struct net_device *ndev;
struct device *dev;
void __iomem *base;
@@ -1649,6 +1650,12 @@ static int mtk_star_probe(struct platform_device *pdev)
netif_napi_add(ndev, &priv->rx_napi, mtk_star_rx_poll);
netif_napi_add_tx(ndev, &priv->tx_napi, mtk_star_tx_poll);
+ phydev = of_phy_find_device(priv->phy_node);
+ if (phydev) {
+ phydev->mac_managed_pm = true;
+ put_device(&phydev->mdio.dev);
+ }
+
return devm_register_netdev(dev, ndev);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index b49d87a51f21..8cf8ba2622f2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -1145,7 +1145,7 @@ mlx5_tc_ct_entry_replace_rules(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5_ct_entry *entry,
u8 zone_restore_id)
{
- int err;
+ int err = 0;
if (mlx5_tc_ct_entry_in_ct_table(entry)) {
err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, false,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index c54fd01ea635..3d274599015b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -989,7 +989,12 @@ static void mlx5e_xfrm_update_stats(struct xfrm_state *x)
struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
struct net *net = dev_net(x->xso.dev);
+ u64 trailer_packets = 0, trailer_bytes = 0;
+ u64 replay_packets = 0, replay_bytes = 0;
+ u64 auth_packets = 0, auth_bytes = 0;
+ u64 success_packets, success_bytes;
u64 packets, bytes, lastuse;
+ size_t headers;
lockdep_assert(lockdep_is_held(&x->lock) ||
lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex) ||
@@ -999,26 +1004,43 @@ static void mlx5e_xfrm_update_stats(struct xfrm_state *x)
return;
if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
- mlx5_fc_query_cached(ipsec_rule->auth.fc, &bytes, &packets, &lastuse);
- x->stats.integrity_failed += packets;
- XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR, packets);
-
- mlx5_fc_query_cached(ipsec_rule->trailer.fc, &bytes, &packets, &lastuse);
- XFRM_ADD_STATS(net, LINUX_MIB_XFRMINHDRERROR, packets);
+ mlx5_fc_query_cached(ipsec_rule->auth.fc, &auth_bytes,
+ &auth_packets, &lastuse);
+ x->stats.integrity_failed += auth_packets;
+ XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR, auth_packets);
+
+ mlx5_fc_query_cached(ipsec_rule->trailer.fc, &trailer_bytes,
+ &trailer_packets, &lastuse);
+ XFRM_ADD_STATS(net, LINUX_MIB_XFRMINHDRERROR, trailer_packets);
}
if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
return;
- mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse);
- x->curlft.packets += packets;
- x->curlft.bytes += bytes;
-
if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
- mlx5_fc_query_cached(ipsec_rule->replay.fc, &bytes, &packets, &lastuse);
- x->stats.replay += packets;
- XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR, packets);
+ mlx5_fc_query_cached(ipsec_rule->replay.fc, &replay_bytes,
+ &replay_packets, &lastuse);
+ x->stats.replay += replay_packets;
+ XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR, replay_packets);
}
+
+ mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse);
+ success_packets = packets - auth_packets - trailer_packets - replay_packets;
+ x->curlft.packets += success_packets;
+ /* NIC counts all bytes passed through flow steering and doesn't have
+ * an ability to count payload data size which is needed for SA.
+ *
+ * To overcome HW limitestion, let's approximate the payload size
+ * by removing always available headers.
+ */
+ headers = sizeof(struct ethhdr);
+ if (sa_entry->attrs.family == AF_INET)
+ headers += sizeof(struct iphdr);
+ else
+ headers += sizeof(struct ipv6hdr);
+
+ success_bytes = bytes - auth_bytes - trailer_bytes - replay_bytes;
+ x->curlft.bytes += success_bytes - headers * success_packets;
}
static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 0eba4c5bb2ec..ff335527c10a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -6034,6 +6034,11 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
kfree(priv->htb_qos_sq_stats[i]);
kvfree(priv->htb_qos_sq_stats);
+ if (priv->mqprio_rl) {
+ mlx5e_mqprio_rl_cleanup(priv->mqprio_rl);
+ mlx5e_mqprio_rl_free(priv->mqprio_rl);
+ }
+
memset(priv, 0, sizeof(*priv));
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index db1cac68292f..e7a3290a708a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -142,6 +142,8 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_bytes) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_skbs) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_large_hds) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_hds_nodata_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_hds_nodata_bytes) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_ecn_mark) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 5693986ae656..ac1565c0c8af 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -1197,9 +1197,7 @@ static int get_num_eqs(struct mlx5_core_dev *dev)
if (!mlx5_core_is_eth_enabled(dev) && mlx5_eth_supported(dev))
return 1;
- max_dev_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
- MLX5_CAP_GEN(dev, max_num_eqs) :
- 1 << MLX5_CAP_GEN(dev, log_max_eq);
+ max_dev_eqs = mlx5_max_eq_cap_get(dev);
num_eqs = min_t(int, mlx5_irq_table_get_num_comp(eq_table->irq_table),
max_dev_eqs - MLX5_MAX_ASYNC_EQS);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
index 50d2ea323979..a436ce895e45 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
@@ -6,6 +6,9 @@
#include "helper.h"
#include "ofld.h"
+static int
+acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+
static bool
esw_acl_ingress_prio_tag_enabled(struct mlx5_eswitch *esw,
const struct mlx5_vport *vport)
@@ -123,18 +126,31 @@ static int esw_acl_ingress_src_port_drop_create(struct mlx5_eswitch *esw,
{
struct mlx5_flow_act flow_act = {};
struct mlx5_flow_handle *flow_rule;
+ bool created = false;
int err = 0;
+ if (!vport->ingress.acl) {
+ err = acl_ingress_ofld_setup(esw, vport);
+ if (err)
+ return err;
+ created = true;
+ }
+
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
flow_act.fg = vport->ingress.offloads.drop_grp;
flow_rule = mlx5_add_flow_rules(vport->ingress.acl, NULL, &flow_act, NULL, 0);
if (IS_ERR(flow_rule)) {
err = PTR_ERR(flow_rule);
- goto out;
+ goto err_out;
}
vport->ingress.offloads.drop_rule = flow_rule;
-out:
+
+ return 0;
+err_out:
+ /* Only destroy ingress acl created in this function. */
+ if (created)
+ esw_acl_ingress_ofld_cleanup(esw, vport);
return err;
}
@@ -299,16 +315,12 @@ static void esw_acl_ingress_ofld_groups_destroy(struct mlx5_vport *vport)
}
}
-int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
+static int
+acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
{
int num_ftes = 0;
int err;
- if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
- !esw_acl_ingress_prio_tag_enabled(esw, vport))
- return 0;
-
esw_acl_ingress_allow_rule_destroy(vport);
if (mlx5_eswitch_vport_match_metadata_enabled(esw))
@@ -347,6 +359,15 @@ group_err:
return err;
}
+int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+ !esw_acl_ingress_prio_tag_enabled(esw, vport))
+ return 0;
+
+ return acl_ingress_ofld_setup(esw, vport);
+}
+
void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 592143d5e1da..72949cb85244 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -4600,20 +4600,26 @@ mlx5_devlink_port_fn_max_io_eqs_get(struct devlink_port *port, u32 *max_io_eqs,
return -EOPNOTSUPP;
}
+ if (!MLX5_CAP_GEN_2(esw->dev, max_num_eqs_24b)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Device doesn't support getting the max number of EQs");
+ return -EOPNOTSUPP;
+ }
+
query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
if (!query_ctx)
return -ENOMEM;
mutex_lock(&esw->state_lock);
err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx,
- MLX5_CAP_GENERAL);
+ MLX5_CAP_GENERAL_2);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
goto out;
}
hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
- max_eqs = MLX5_GET(cmd_hca_cap, hca_caps, max_num_eqs);
+ max_eqs = MLX5_GET(cmd_hca_cap_2, hca_caps, max_num_eqs_24b);
if (max_eqs < MLX5_ESW_MAX_CTRL_EQS)
*max_io_eqs = 0;
else
@@ -4644,6 +4650,12 @@ mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port, u32 max_io_eqs,
return -EOPNOTSUPP;
}
+ if (!MLX5_CAP_GEN_2(esw->dev, max_num_eqs_24b)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Device doesn't support changing the max number of EQs");
+ return -EOPNOTSUPP;
+ }
+
if (check_add_overflow(max_io_eqs, MLX5_ESW_MAX_CTRL_EQS, &max_eqs)) {
NL_SET_ERR_MSG_MOD(extack, "Supplied value out of range");
return -EINVAL;
@@ -4655,17 +4667,17 @@ mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port, u32 max_io_eqs,
mutex_lock(&esw->state_lock);
err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx,
- MLX5_CAP_GENERAL);
+ MLX5_CAP_GENERAL_2);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
goto out;
}
hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
- MLX5_SET(cmd_hca_cap, hca_caps, max_num_eqs, max_eqs);
+ MLX5_SET(cmd_hca_cap_2, hca_caps, max_num_eqs_24b, max_eqs);
err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport_num,
- MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2);
if (err)
NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA caps");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index c38342b9f320..a7fd18888b6e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -383,4 +383,14 @@ static inline int mlx5_vport_to_func_id(const struct mlx5_core_dev *dev, u16 vpo
: vport;
}
+static inline int mlx5_max_eq_cap_get(const struct mlx5_core_dev *dev)
+{
+ if (MLX5_CAP_GEN_2(dev, max_num_eqs_24b))
+ return MLX5_CAP_GEN_2(dev, max_num_eqs_24b);
+
+ if (MLX5_CAP_GEN(dev, max_num_eqs))
+ return MLX5_CAP_GEN(dev, max_num_eqs);
+
+ return 1 << MLX5_CAP_GEN(dev, log_max_eq);
+}
#endif /* __MLX5_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index fb8787e30d3f..401d39069680 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -711,9 +711,7 @@ int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
int mlx5_irq_table_create(struct mlx5_core_dev *dev)
{
- int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
- MLX5_CAP_GEN(dev, max_num_eqs) :
- 1 << MLX5_CAP_GEN(dev, log_max_eq);
+ int num_eqs = mlx5_max_eq_cap_get(dev);
int total_vec;
int pcif_vec;
int req_vec;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 81eff6c410ce..7618c6147f86 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -1379,6 +1379,11 @@ int mlx5dr_cmd_create_modify_header_arg(struct mlx5_core_dev *dev,
void mlx5dr_cmd_destroy_modify_header_arg(struct mlx5_core_dev *dev,
u32 obj_id);
+int mlx5dr_definer_get(struct mlx5dr_domain *dmn, u16 format_id,
+ u8 *dw_selectors, u8 *byte_selectors,
+ u8 *match_mask, u32 *definer_id);
+void mlx5dr_definer_put(struct mlx5dr_domain *dmn, u32 definer_id);
+
struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn,
enum mlx5dr_icm_type icm_type);
void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
index 89fced86936f..3ac7dc67509f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -153,11 +153,6 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action);
u32 mlx5dr_action_get_pkt_reformat_id(struct mlx5dr_action *action);
-int mlx5dr_definer_get(struct mlx5dr_domain *dmn, u16 format_id,
- u8 *dw_selectors, u8 *byte_selectors,
- u8 *match_mask, u32 *definer_id);
-void mlx5dr_definer_put(struct mlx5dr_domain *dmn, u32 definer_id);
-
static inline bool
mlx5dr_is_supported(struct mlx5_core_dev *dev)
{
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
index 6c06b0592760..294e758f1067 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
@@ -513,6 +513,63 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core,
}
EXPORT_SYMBOL(mlxsw_env_get_module_eeprom_by_page);
+int
+mlxsw_env_set_module_eeprom_by_page(struct mlxsw_core *mlxsw_core,
+ u8 slot_index, u8 module,
+ const struct ethtool_module_eeprom *page,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core);
+ u32 bytes_written = 0;
+ u16 device_addr;
+ int err;
+
+ if (!mlxsw_env_linecard_is_active(mlxsw_env, slot_index)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot write to EEPROM of a module on an inactive line card");
+ return -EIO;
+ }
+
+ err = mlxsw_env_validate_module_type(mlxsw_core, slot_index, module);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "EEPROM is not equipped on port module type");
+ return err;
+ }
+
+ device_addr = page->offset;
+
+ while (bytes_written < page->length) {
+ char mcia_pl[MLXSW_REG_MCIA_LEN];
+ char eeprom_tmp[128] = {};
+ u8 size;
+
+ size = min_t(u8, page->length - bytes_written,
+ mlxsw_env->max_eeprom_len);
+
+ mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, page->page,
+ device_addr + bytes_written, size,
+ page->i2c_address);
+ mlxsw_reg_mcia_bank_number_set(mcia_pl, page->bank);
+ memcpy(eeprom_tmp, page->data + bytes_written, size);
+ mlxsw_reg_mcia_eeprom_memcpy_to(mcia_pl, eeprom_tmp);
+
+ err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcia), mcia_pl);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to access module's EEPROM");
+ return err;
+ }
+
+ err = mlxsw_env_mcia_status_process(mcia_pl, extack);
+ if (err)
+ return err;
+
+ bytes_written += size;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(mlxsw_env_set_module_eeprom_by_page);
+
static int mlxsw_env_module_reset(struct mlxsw_core *mlxsw_core, u8 slot_index,
u8 module)
{
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.h b/drivers/net/ethernet/mellanox/mlxsw/core_env.h
index a197e3ae069c..e4ff17869400 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.h
@@ -28,6 +28,12 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core,
const struct ethtool_module_eeprom *page,
struct netlink_ext_ack *extack);
+int
+mlxsw_env_set_module_eeprom_by_page(struct mlxsw_core *mlxsw_core,
+ u8 slot_index, u8 module,
+ const struct ethtool_module_eeprom *page,
+ struct netlink_ext_ack *extack);
+
int mlxsw_env_reset_module(struct net_device *netdev,
struct mlxsw_core *mlxsw_core, u8 slot_index,
u8 module, u32 *flags);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
index 025e0db983fe..b032d5a4b3b8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
@@ -1484,6 +1484,7 @@ err_type_file_file_validate:
vfree(types_info->data);
err_data_alloc:
kfree(types_info);
+ linecards->types_info = NULL;
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index 5c511e1a8efa..d61478c0c632 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -100,6 +100,12 @@ static const struct mlxsw_cooling_states default_cooling_states[] = {
struct mlxsw_thermal;
+struct mlxsw_thermal_cooling_device {
+ struct mlxsw_thermal *thermal;
+ struct thermal_cooling_device *cdev;
+ unsigned int idx;
+};
+
struct mlxsw_thermal_module {
struct mlxsw_thermal *parent;
struct thermal_zone_device *tzdev;
@@ -123,7 +129,7 @@ struct mlxsw_thermal {
const struct mlxsw_bus_info *bus_info;
struct thermal_zone_device *tzdev;
int polling_delay;
- struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX];
+ struct mlxsw_thermal_cooling_device cdevs[MLXSW_MFCR_PWMS_MAX];
struct thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
struct mlxsw_cooling_states cooling_states[MLXSW_THERMAL_NUM_TRIPS];
struct mlxsw_thermal_area line_cards[];
@@ -147,7 +153,7 @@ static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal,
int i;
for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
- if (thermal->cdevs[i] == cdev)
+ if (thermal->cdevs[i].cdev == cdev)
return i;
/* Allow mlxsw thermal zone binding to an external cooling device */
@@ -352,17 +358,14 @@ static int mlxsw_thermal_get_cur_state(struct thermal_cooling_device *cdev,
unsigned long *p_state)
{
- struct mlxsw_thermal *thermal = cdev->devdata;
+ struct mlxsw_thermal_cooling_device *mlxsw_cdev = cdev->devdata;
+ struct mlxsw_thermal *thermal = mlxsw_cdev->thermal;
struct device *dev = thermal->bus_info->dev;
char mfsc_pl[MLXSW_REG_MFSC_LEN];
- int err, idx;
u8 duty;
+ int err;
- idx = mlxsw_get_cooling_device_idx(thermal, cdev);
- if (idx < 0)
- return idx;
-
- mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0);
+ mlxsw_reg_mfsc_pack(mfsc_pl, mlxsw_cdev->idx, 0);
err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
if (err) {
dev_err(dev, "Failed to query PWM duty\n");
@@ -378,22 +381,19 @@ static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev,
unsigned long state)
{
- struct mlxsw_thermal *thermal = cdev->devdata;
+ struct mlxsw_thermal_cooling_device *mlxsw_cdev = cdev->devdata;
+ struct mlxsw_thermal *thermal = mlxsw_cdev->thermal;
struct device *dev = thermal->bus_info->dev;
char mfsc_pl[MLXSW_REG_MFSC_LEN];
- int idx;
int err;
if (state > MLXSW_THERMAL_MAX_STATE)
return -EINVAL;
- idx = mlxsw_get_cooling_device_idx(thermal, cdev);
- if (idx < 0)
- return idx;
-
/* Normalize the state to the valid speed range. */
state = max_t(unsigned long, MLXSW_THERMAL_MIN_STATE, state);
- mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state));
+ mlxsw_reg_mfsc_pack(mfsc_pl, mlxsw_cdev->idx,
+ mlxsw_state_to_duty(state));
err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
if (err) {
dev_err(dev, "Failed to write PWM duty\n");
@@ -753,17 +753,21 @@ int mlxsw_thermal_init(struct mlxsw_core *core,
}
for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
if (pwm_active & BIT(i)) {
+ struct mlxsw_thermal_cooling_device *mlxsw_cdev;
struct thermal_cooling_device *cdev;
+ mlxsw_cdev = &thermal->cdevs[i];
+ mlxsw_cdev->thermal = thermal;
+ mlxsw_cdev->idx = i;
cdev = thermal_cooling_device_register("mlxsw_fan",
- thermal,
+ mlxsw_cdev,
&mlxsw_cooling_ops);
if (IS_ERR(cdev)) {
err = PTR_ERR(cdev);
dev_err(dev, "Failed to register cooling device\n");
goto err_thermal_cooling_device_register;
}
- thermal->cdevs[i] = cdev;
+ mlxsw_cdev->cdev = cdev;
}
}
@@ -824,8 +828,7 @@ err_thermal_modules_init:
err_thermal_zone_device_register:
err_thermal_cooling_device_register:
for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
- if (thermal->cdevs[i])
- thermal_cooling_device_unregister(thermal->cdevs[i]);
+ thermal_cooling_device_unregister(thermal->cdevs[i].cdev);
err_reg_write:
err_reg_query:
kfree(thermal);
@@ -847,12 +850,8 @@ void mlxsw_thermal_fini(struct mlxsw_thermal *thermal)
thermal->tzdev = NULL;
}
- for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
- if (thermal->cdevs[i]) {
- thermal_cooling_device_unregister(thermal->cdevs[i]);
- thermal->cdevs[i] = NULL;
- }
- }
+ for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
+ thermal_cooling_device_unregister(thermal->cdevs[i].cdev);
kfree(thermal);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/item.h b/drivers/net/ethernet/mellanox/mlxsw/item.h
index cfafbeb42586..a619a0736bd1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/item.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/item.h
@@ -218,6 +218,10 @@ __mlxsw_item_bit_array_offset(const struct mlxsw_item *item,
}
max_index = (item->size.bytes << 3) / item->element_size - 1;
+ if (WARN_ONCE(index > max_index,
+ "name=%s,index=%u,max_index=%u\n", item->name, index,
+ max_index))
+ index = 0;
be_index = max_index - index;
offset = be_index * item->element_size >> 3;
in_byte_index = index % (BITS_PER_BYTE / item->element_size);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
index 431accdc6213..828c65036a4c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
@@ -140,6 +140,20 @@ mlxsw_m_get_module_eeprom_by_page(struct net_device *netdev,
page, extack);
}
+static int
+mlxsw_m_set_module_eeprom_by_page(struct net_device *netdev,
+ const struct ethtool_module_eeprom *page,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
+ struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
+
+ return mlxsw_env_set_module_eeprom_by_page(core,
+ mlxsw_m_port->slot_index,
+ mlxsw_m_port->module,
+ page, extack);
+}
+
static int mlxsw_m_reset(struct net_device *netdev, u32 *flags)
{
struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
@@ -181,6 +195,7 @@ static const struct ethtool_ops mlxsw_m_port_ethtool_ops = {
.get_module_info = mlxsw_m_get_module_info,
.get_module_eeprom = mlxsw_m_get_module_eeprom,
.get_module_eeprom_by_page = mlxsw_m_get_module_eeprom_by_page,
+ .set_module_eeprom_by_page = mlxsw_m_set_module_eeprom_by_page,
.reset = mlxsw_m_reset,
.get_module_power_mode = mlxsw_m_get_module_power_mode,
.set_module_power_mode = mlxsw_m_set_module_power_mode,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 0320dabd1380..060e5b939211 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1784,6 +1784,7 @@ static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci,
{
struct pci_dev *pdev = mlxsw_pci->pdev;
char mrsr_pl[MLXSW_REG_MRSR_LEN];
+ struct pci_dev *bridge;
int err;
if (!pci_reset_sbr_supported) {
@@ -1800,6 +1801,9 @@ static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci,
sbr:
device_lock_assert(&pdev->dev);
+ bridge = pci_upstream_bridge(pdev);
+ if (bridge)
+ pci_cfg_access_lock(bridge);
pci_cfg_access_lock(pdev);
pci_save_state(pdev);
@@ -1809,6 +1813,8 @@ sbr:
pci_restore_state(pdev);
pci_cfg_access_unlock(pdev);
+ if (bridge)
+ pci_cfg_access_unlock(bridge);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
index a755b0a901d3..c79da1411d33 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
@@ -1068,6 +1068,20 @@ mlxsw_sp_get_module_eeprom_by_page(struct net_device *dev,
}
static int
+mlxsw_sp_set_module_eeprom_by_page(struct net_device *dev,
+ const struct ethtool_module_eeprom *page,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ u8 slot_index = mlxsw_sp_port->mapping.slot_index;
+ u8 module = mlxsw_sp_port->mapping.module;
+
+ return mlxsw_env_set_module_eeprom_by_page(mlxsw_sp->core, slot_index,
+ module, page, extack);
+}
+
+static int
mlxsw_sp_get_ts_info(struct net_device *netdev, struct ethtool_ts_info *info)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
@@ -1256,6 +1270,7 @@ const struct ethtool_ops mlxsw_sp_port_ethtool_ops = {
.get_module_info = mlxsw_sp_get_module_info,
.get_module_eeprom = mlxsw_sp_get_module_eeprom,
.get_module_eeprom_by_page = mlxsw_sp_get_module_eeprom_by_page,
+ .set_module_eeprom_by_page = mlxsw_sp_set_module_eeprom_by_page,
.get_ts_info = mlxsw_sp_get_ts_info,
.get_eth_phy_stats = mlxsw_sp_get_eth_phy_stats,
.get_eth_mac_stats = mlxsw_sp_get_eth_mac_stats,
diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c
index 6453c92f0fa7..7fa1820db9cc 100644
--- a/drivers/net/ethernet/micrel/ks8851_common.c
+++ b/drivers/net/ethernet/micrel/ks8851_common.c
@@ -352,11 +352,11 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
netif_dbg(ks, intr, ks->netdev,
"%s: txspace %d\n", __func__, tx_space);
- spin_lock(&ks->statelock);
+ spin_lock_bh(&ks->statelock);
ks->tx_space = tx_space;
if (netif_queue_stopped(ks->netdev))
netif_wake_queue(ks->netdev);
- spin_unlock(&ks->statelock);
+ spin_unlock_bh(&ks->statelock);
}
if (status & IRQ_SPIBEI) {
@@ -482,6 +482,7 @@ static int ks8851_net_open(struct net_device *dev)
ks8851_wrreg16(ks, KS_IER, ks->rc_ier);
ks->queued_len = 0;
+ ks->tx_space = ks8851_rdreg16(ks, KS_TXMIR);
netif_start_queue(ks->netdev);
netif_dbg(ks, ifup, ks->netdev, "network device up\n");
@@ -635,14 +636,14 @@ static void ks8851_set_rx_mode(struct net_device *dev)
/* schedule work to do the actual set of the data if needed */
- spin_lock(&ks->statelock);
+ spin_lock_bh(&ks->statelock);
if (memcmp(&rxctrl, &ks->rxctrl, sizeof(rxctrl)) != 0) {
memcpy(&ks->rxctrl, &rxctrl, sizeof(ks->rxctrl));
schedule_work(&ks->rxctrl_work);
}
- spin_unlock(&ks->statelock);
+ spin_unlock_bh(&ks->statelock);
}
static int ks8851_set_mac_address(struct net_device *dev, void *addr)
@@ -1101,7 +1102,6 @@ int ks8851_probe_common(struct net_device *netdev, struct device *dev,
int ret;
ks->netdev = netdev;
- ks->tx_space = 6144;
ks->gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
ret = PTR_ERR_OR_ZERO(ks->gpio);
diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c
index 670c1de966db..3062cc0f9199 100644
--- a/drivers/net/ethernet/micrel/ks8851_spi.c
+++ b/drivers/net/ethernet/micrel/ks8851_spi.c
@@ -340,10 +340,10 @@ static void ks8851_tx_work(struct work_struct *work)
tx_space = ks8851_rdreg16_spi(ks, KS_TXMIR);
- spin_lock(&ks->statelock);
+ spin_lock_bh(&ks->statelock);
ks->queued_len -= dequeued_len;
ks->tx_space = tx_space;
- spin_unlock(&ks->statelock);
+ spin_unlock_bh(&ks->statelock);
ks8851_unlock_spi(ks, &flags);
}
diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c
index 3885d6fbace1..26b00e66d912 100644
--- a/drivers/net/ethernet/microchip/encx24j600-regmap.c
+++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c
@@ -474,13 +474,13 @@ static struct regmap_config regcfg = {
.unlock = regmap_unlock_mutex,
};
-static struct regmap_bus regmap_encx24j600 = {
+static const struct regmap_bus regmap_encx24j600 = {
.write = regmap_encx24j600_write,
.read = regmap_encx24j600_read,
.reg_update_bits = regmap_encx24j600_reg_update_bits,
};
-static struct regmap_config phycfg = {
+static const struct regmap_config phycfg = {
.name = "phy",
.reg_bits = 8,
.val_bits = 16,
@@ -492,7 +492,7 @@ static struct regmap_config phycfg = {
.volatile_reg = encx24j600_phymap_volatile,
};
-static struct regmap_bus phymap_encx24j600 = {
+static const struct regmap_bus phymap_encx24j600 = {
.reg_write = regmap_encx24j600_phy_reg_write,
.reg_read = regmap_encx24j600_phy_reg_read,
};
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
index 2fcbcecb41d1..fef4b2b0b1f2 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
@@ -571,9 +571,6 @@ static u64 ctx_addr_sig_regs[][3] = {
#define CRB_CTX_ADDR_REG_HI(FUNC_ID) (ctx_addr_sig_regs[FUNC_ID][2])
#define CRB_CTX_SIGNATURE_REG(FUNC_ID) (ctx_addr_sig_regs[FUNC_ID][1])
-#define lower32(x) ((u32)((x) & 0xffffffff))
-#define upper32(x) ((u32)(((u64)(x) >> 32) & 0xffffffff))
-
static struct netxen_recv_crb recv_crb_registers[] = {
/* Instance 0 */
{
@@ -723,9 +720,9 @@ netxen_init_old_ctx(struct netxen_adapter *adapter)
NETXEN_CTX_SIGNATURE_V2 : NETXEN_CTX_SIGNATURE;
NXWR32(adapter, CRB_CTX_ADDR_REG_LO(port),
- lower32(recv_ctx->phys_addr));
+ lower_32_bits(recv_ctx->phys_addr));
NXWR32(adapter, CRB_CTX_ADDR_REG_HI(port),
- upper32(recv_ctx->phys_addr));
+ upper_32_bits(recv_ctx->phys_addr));
NXWR32(adapter, CRB_CTX_SIGNATURE_REG(port),
signature | port);
return 0;
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 9246ea2118ff..714d2e804694 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1608,7 +1608,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
if (!tp->dash_enabled) {
rtl_set_d3_pll_down(tp, !wolopts);
- tp->dev->wol_enabled = wolopts ? 1 : 0;
+ tp->dev->ethtool->wol_enabled = wolopts ? 1 : 0;
}
}
@@ -5478,7 +5478,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
rtl_set_d3_pll_down(tp, true);
} else {
rtl_set_d3_pll_down(tp, false);
- dev->wol_enabled = 1;
+ dev->ethtool->wol_enabled = 1;
}
jumbo_max = rtl_jumbo_max(tp);
diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
index dcab638c57fe..24c90d8f5a44 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch.c
@@ -871,13 +871,13 @@ static void rswitch_tx_free(struct net_device *ndev)
dma_rmb();
skb = gq->skbs[gq->dirty];
if (skb) {
+ rdev->ndev->stats.tx_packets++;
+ rdev->ndev->stats.tx_bytes += skb->len;
dma_unmap_single(ndev->dev.parent,
gq->unmap_addrs[gq->dirty],
skb->len, DMA_TO_DEVICE);
dev_kfree_skb_any(gq->skbs[gq->dirty]);
gq->skbs[gq->dirty] = NULL;
- rdev->ndev->stats.tx_packets++;
- rdev->ndev->stats.tx_bytes += skb->len;
}
desc->desc.die_dt = DT_EEMPTY;
}
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 8fa6c0e9195b..7d69302ffa0a 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -1396,7 +1396,7 @@ static void efx_ef10_table_reset_mc_allocations(struct efx_nic *efx)
efx_mcdi_filter_table_reset_mc_allocations(efx);
nic_data->must_restore_piobufs = true;
efx_ef10_forget_old_piobufs(efx);
- efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
+ efx->rss_context.priv.context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
/* Driver-created vswitches and vports must be re-created */
nic_data->must_probe_vswitching = true;
diff --git a/drivers/net/ethernet/sfc/ef100_ethtool.c b/drivers/net/ethernet/sfc/ef100_ethtool.c
index cf55202b3a7b..896ffca4aee2 100644
--- a/drivers/net/ethernet/sfc/ef100_ethtool.c
+++ b/drivers/net/ethernet/sfc/ef100_ethtool.c
@@ -59,8 +59,12 @@ const struct ethtool_ops ef100_ethtool_ops = {
.get_rxfh_indir_size = efx_ethtool_get_rxfh_indir_size,
.get_rxfh_key_size = efx_ethtool_get_rxfh_key_size,
+ .rxfh_priv_size = sizeof(struct efx_rss_context_priv),
.get_rxfh = efx_ethtool_get_rxfh,
.set_rxfh = efx_ethtool_set_rxfh,
+ .create_rxfh_context = efx_ethtool_create_rxfh_context,
+ .modify_rxfh_context = efx_ethtool_modify_rxfh_context,
+ .remove_rxfh_context = efx_ethtool_remove_rxfh_context,
.get_module_info = efx_ethtool_get_module_info,
.get_module_eeprom = efx_ethtool_get_module_eeprom,
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index e9d9de8e648a..6f1a01ded7d4 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -299,7 +299,7 @@ static int efx_probe_nic(struct efx_nic *efx)
if (efx->n_channels > 1)
netdev_rss_key_fill(efx->rss_context.rx_hash_key,
sizeof(efx->rss_context.rx_hash_key));
- efx_set_default_rx_indir_table(efx, &efx->rss_context);
+ efx_set_default_rx_indir_table(efx, efx->rss_context.rx_indir_table);
/* Initialise the interrupt moderation settings */
efx->irq_mod_step_us = DIV_ROUND_UP(efx->timer_quantum_ns, 1000);
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 48d3623735ba..7a6cab883d66 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -158,7 +158,7 @@ static inline s32 efx_filter_get_rx_ids(struct efx_nic *efx,
}
/* RSS contexts */
-static inline bool efx_rss_active(struct efx_rss_context *ctx)
+static inline bool efx_rss_active(struct efx_rss_context_priv *ctx)
{
return ctx->context_id != EFX_MCDI_RSS_CONTEXT_INVALID;
}
diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c
index 4ebd5ae23eca..13cf647051af 100644
--- a/drivers/net/ethernet/sfc/efx_common.c
+++ b/drivers/net/ethernet/sfc/efx_common.c
@@ -714,7 +714,7 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method)
mutex_lock(&efx->mac_lock);
down_write(&efx->filter_sem);
- mutex_lock(&efx->rss_lock);
+ mutex_lock(&efx->net_dev->ethtool->rss_lock);
efx->type->fini(efx);
}
@@ -777,7 +777,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
if (efx->type->rx_restore_rss_contexts)
efx->type->rx_restore_rss_contexts(efx);
- mutex_unlock(&efx->rss_lock);
+ mutex_unlock(&efx->net_dev->ethtool->rss_lock);
efx->type->filter_table_restore(efx);
up_write(&efx->filter_sem);
@@ -793,7 +793,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
fail:
efx->port_initialized = false;
- mutex_unlock(&efx->rss_lock);
+ mutex_unlock(&efx->net_dev->ethtool->rss_lock);
up_write(&efx->filter_sem);
mutex_unlock(&efx->mac_lock);
@@ -1000,9 +1000,7 @@ int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev)
efx->type->rx_hash_offset - efx->type->rx_prefix_size;
efx->rx_packet_ts_offset =
efx->type->rx_ts_offset - efx->type->rx_prefix_size;
- INIT_LIST_HEAD(&efx->rss_context.list);
- efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
- mutex_init(&efx->rss_lock);
+ efx->rss_context.priv.context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
efx->vport_id = EVB_PORT_ID_ASSIGNED;
spin_lock_init(&efx->stats_lock);
efx->vi_stride = EFX_DEFAULT_VI_STRIDE;
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 37c69c8d90b1..0f5c68b8bab7 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -268,8 +268,12 @@ const struct ethtool_ops efx_ethtool_ops = {
.set_rxnfc = efx_ethtool_set_rxnfc,
.get_rxfh_indir_size = efx_ethtool_get_rxfh_indir_size,
.get_rxfh_key_size = efx_ethtool_get_rxfh_key_size,
+ .rxfh_priv_size = sizeof(struct efx_rss_context_priv),
.get_rxfh = efx_ethtool_get_rxfh,
.set_rxfh = efx_ethtool_set_rxfh,
+ .create_rxfh_context = efx_ethtool_create_rxfh_context,
+ .modify_rxfh_context = efx_ethtool_modify_rxfh_context,
+ .remove_rxfh_context = efx_ethtool_remove_rxfh_context,
.get_ts_info = efx_ethtool_get_ts_info,
.get_module_info = efx_ethtool_get_module_info,
.get_module_eeprom = efx_ethtool_get_module_eeprom,
diff --git a/drivers/net/ethernet/sfc/ethtool_common.c b/drivers/net/ethernet/sfc/ethtool_common.c
index 7d5e5db4eac5..6ded44b86052 100644
--- a/drivers/net/ethernet/sfc/ethtool_common.c
+++ b/drivers/net/ethernet/sfc/ethtool_common.c
@@ -820,10 +820,10 @@ int efx_ethtool_get_rxnfc(struct net_device *net_dev,
return 0;
case ETHTOOL_GRXFH: {
- struct efx_rss_context *ctx = &efx->rss_context;
+ struct efx_rss_context_priv *ctx = &efx->rss_context.priv;
__u64 data;
- mutex_lock(&efx->rss_lock);
+ mutex_lock(&net_dev->ethtool->rss_lock);
if (info->flow_type & FLOW_RSS && info->rss_context) {
ctx = efx_find_rss_context_entry(efx, info->rss_context);
if (!ctx) {
@@ -864,7 +864,7 @@ int efx_ethtool_get_rxnfc(struct net_device *net_dev,
out_setdata_unlock:
info->data = data;
out_unlock:
- mutex_unlock(&efx->rss_lock);
+ mutex_unlock(&net_dev->ethtool->rss_lock);
return rc;
}
@@ -1163,46 +1163,14 @@ u32 efx_ethtool_get_rxfh_key_size(struct net_device *net_dev)
return efx->type->rx_hash_key_size;
}
-static int efx_ethtool_get_rxfh_context(struct net_device *net_dev,
- struct ethtool_rxfh_param *rxfh)
-{
- struct efx_nic *efx = efx_netdev_priv(net_dev);
- struct efx_rss_context *ctx;
- int rc = 0;
-
- if (!efx->type->rx_pull_rss_context_config)
- return -EOPNOTSUPP;
-
- mutex_lock(&efx->rss_lock);
- ctx = efx_find_rss_context_entry(efx, rxfh->rss_context);
- if (!ctx) {
- rc = -ENOENT;
- goto out_unlock;
- }
- rc = efx->type->rx_pull_rss_context_config(efx, ctx);
- if (rc)
- goto out_unlock;
-
- rxfh->hfunc = ETH_RSS_HASH_TOP;
- if (rxfh->indir)
- memcpy(rxfh->indir, ctx->rx_indir_table,
- sizeof(ctx->rx_indir_table));
- if (rxfh->key)
- memcpy(rxfh->key, ctx->rx_hash_key,
- efx->type->rx_hash_key_size);
-out_unlock:
- mutex_unlock(&efx->rss_lock);
- return rc;
-}
-
int efx_ethtool_get_rxfh(struct net_device *net_dev,
struct ethtool_rxfh_param *rxfh)
{
struct efx_nic *efx = efx_netdev_priv(net_dev);
int rc;
- if (rxfh->rss_context)
- return efx_ethtool_get_rxfh_context(net_dev, rxfh);
+ if (rxfh->rss_context) /* core should never call us for these */
+ return -EINVAL;
rc = efx->type->rx_pull_rss_config(efx);
if (rc)
@@ -1218,68 +1186,85 @@ int efx_ethtool_get_rxfh(struct net_device *net_dev,
return 0;
}
-static int efx_ethtool_set_rxfh_context(struct net_device *net_dev,
- struct ethtool_rxfh_param *rxfh,
- struct netlink_ext_ack *extack)
+int efx_ethtool_modify_rxfh_context(struct net_device *net_dev,
+ struct ethtool_rxfh_context *ctx,
+ const struct ethtool_rxfh_param *rxfh,
+ struct netlink_ext_ack *extack)
{
struct efx_nic *efx = efx_netdev_priv(net_dev);
- u32 *rss_context = &rxfh->rss_context;
- struct efx_rss_context *ctx;
- u32 *indir = rxfh->indir;
- bool allocated = false;
- u8 *key = rxfh->key;
- int rc;
+ struct efx_rss_context_priv *priv;
+ const u32 *indir = rxfh->indir;
+ const u8 *key = rxfh->key;
- if (!efx->type->rx_push_rss_context_config)
+ if (!efx->type->rx_push_rss_context_config) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "NIC type does not support custom contexts");
return -EOPNOTSUPP;
-
- mutex_lock(&efx->rss_lock);
-
- if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
- if (rxfh->rss_delete) {
- /* alloc + delete == Nothing to do */
- rc = -EINVAL;
- goto out_unlock;
- }
- ctx = efx_alloc_rss_context_entry(efx);
- if (!ctx) {
- rc = -ENOMEM;
- goto out_unlock;
- }
- ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
- /* Initialise indir table and key to defaults */
- efx_set_default_rx_indir_table(efx, ctx);
- netdev_rss_key_fill(ctx->rx_hash_key, sizeof(ctx->rx_hash_key));
- allocated = true;
- } else {
- ctx = efx_find_rss_context_entry(efx, *rss_context);
- if (!ctx) {
- rc = -ENOENT;
- goto out_unlock;
- }
}
-
- if (rxfh->rss_delete) {
- /* delete this context */
- rc = efx->type->rx_push_rss_context_config(efx, ctx, NULL, NULL);
- if (!rc)
- efx_free_rss_context_entry(ctx);
- goto out_unlock;
+ /* Hash function is Toeplitz, cannot be changed */
+ if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
+ rxfh->hfunc != ETH_RSS_HASH_TOP) {
+ NL_SET_ERR_MSG_MOD(extack, "Only Toeplitz hash is supported");
+ return -EOPNOTSUPP;
}
+ priv = ethtool_rxfh_context_priv(ctx);
+
if (!key)
- key = ctx->rx_hash_key;
+ key = ethtool_rxfh_context_key(ctx);
if (!indir)
- indir = ctx->rx_indir_table;
+ indir = ethtool_rxfh_context_indir(ctx);
- rc = efx->type->rx_push_rss_context_config(efx, ctx, indir, key);
- if (rc && allocated)
- efx_free_rss_context_entry(ctx);
- else
- *rss_context = ctx->user_id;
-out_unlock:
- mutex_unlock(&efx->rss_lock);
- return rc;
+ return efx->type->rx_push_rss_context_config(efx, priv, indir, key,
+ false);
+}
+
+int efx_ethtool_create_rxfh_context(struct net_device *net_dev,
+ struct ethtool_rxfh_context *ctx,
+ const struct ethtool_rxfh_param *rxfh,
+ struct netlink_ext_ack *extack)
+{
+ struct efx_nic *efx = efx_netdev_priv(net_dev);
+ struct efx_rss_context_priv *priv;
+
+ priv = ethtool_rxfh_context_priv(ctx);
+
+ priv->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
+ priv->rx_hash_udp_4tuple = false;
+ /* Generate default indir table and/or key if not specified.
+ * We use ctx as a place to store these; this is fine because
+ * we're doing a create, so if we fail then the ctx will just
+ * be deleted.
+ */
+ if (!rxfh->indir)
+ efx_set_default_rx_indir_table(efx, ethtool_rxfh_context_indir(ctx));
+ if (!rxfh->key)
+ netdev_rss_key_fill(ethtool_rxfh_context_key(ctx),
+ ctx->key_size);
+ if (rxfh->hfunc == ETH_RSS_HASH_NO_CHANGE)
+ ctx->hfunc = ETH_RSS_HASH_TOP;
+ if (rxfh->input_xfrm == RXH_XFRM_NO_CHANGE)
+ ctx->input_xfrm = 0;
+ return efx_ethtool_modify_rxfh_context(net_dev, ctx, rxfh, extack);
+}
+
+int efx_ethtool_remove_rxfh_context(struct net_device *net_dev,
+ struct ethtool_rxfh_context *ctx,
+ u32 rss_context,
+ struct netlink_ext_ack *extack)
+{
+ struct efx_nic *efx = efx_netdev_priv(net_dev);
+ struct efx_rss_context_priv *priv;
+
+ if (!efx->type->rx_push_rss_context_config) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "NIC type does not support custom contexts");
+ return -EOPNOTSUPP;
+ }
+
+ priv = ethtool_rxfh_context_priv(ctx);
+ return efx->type->rx_push_rss_context_config(efx, priv, NULL, NULL,
+ true);
}
int efx_ethtool_set_rxfh(struct net_device *net_dev,
@@ -1295,8 +1280,9 @@ int efx_ethtool_set_rxfh(struct net_device *net_dev,
rxfh->hfunc != ETH_RSS_HASH_TOP)
return -EOPNOTSUPP;
- if (rxfh->rss_context)
- return efx_ethtool_set_rxfh_context(net_dev, rxfh, extack);
+ /* Custom contexts should use new API */
+ if (WARN_ON_ONCE(rxfh->rss_context))
+ return -EIO;
if (!indir && !key)
return 0;
diff --git a/drivers/net/ethernet/sfc/ethtool_common.h b/drivers/net/ethernet/sfc/ethtool_common.h
index a680e5980213..fc52e891637d 100644
--- a/drivers/net/ethernet/sfc/ethtool_common.h
+++ b/drivers/net/ethernet/sfc/ethtool_common.h
@@ -49,6 +49,18 @@ int efx_ethtool_get_rxfh(struct net_device *net_dev,
int efx_ethtool_set_rxfh(struct net_device *net_dev,
struct ethtool_rxfh_param *rxfh,
struct netlink_ext_ack *extack);
+int efx_ethtool_create_rxfh_context(struct net_device *net_dev,
+ struct ethtool_rxfh_context *ctx,
+ const struct ethtool_rxfh_param *rxfh,
+ struct netlink_ext_ack *extack);
+int efx_ethtool_modify_rxfh_context(struct net_device *net_dev,
+ struct ethtool_rxfh_context *ctx,
+ const struct ethtool_rxfh_param *rxfh,
+ struct netlink_ext_ack *extack);
+int efx_ethtool_remove_rxfh_context(struct net_device *net_dev,
+ struct ethtool_rxfh_context *ctx,
+ u32 rss_context,
+ struct netlink_ext_ack *extack);
int efx_ethtool_reset(struct net_device *net_dev, u32 *flags);
int efx_ethtool_get_module_eeprom(struct net_device *net_dev,
struct ethtool_eeprom *ee,
diff --git a/drivers/net/ethernet/sfc/mcdi_filters.c b/drivers/net/ethernet/sfc/mcdi_filters.c
index 4ff6586116ee..6ef96292909a 100644
--- a/drivers/net/ethernet/sfc/mcdi_filters.c
+++ b/drivers/net/ethernet/sfc/mcdi_filters.c
@@ -194,7 +194,7 @@ efx_mcdi_filter_push_prep_set_match_fields(struct efx_nic *efx,
static void efx_mcdi_filter_push_prep(struct efx_nic *efx,
const struct efx_filter_spec *spec,
efx_dword_t *inbuf, u64 handle,
- struct efx_rss_context *ctx,
+ struct efx_rss_context_priv *ctx,
bool replacing)
{
u32 flags = spec->flags;
@@ -245,7 +245,7 @@ static void efx_mcdi_filter_push_prep(struct efx_nic *efx,
static int efx_mcdi_filter_push(struct efx_nic *efx,
const struct efx_filter_spec *spec, u64 *handle,
- struct efx_rss_context *ctx, bool replacing)
+ struct efx_rss_context_priv *ctx, bool replacing)
{
MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
MCDI_DECLARE_BUF(outbuf, MC_CMD_FILTER_OP_EXT_OUT_LEN);
@@ -345,9 +345,9 @@ static s32 efx_mcdi_filter_insert_locked(struct efx_nic *efx,
bool replace_equal)
{
DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
+ struct efx_rss_context_priv *ctx = NULL;
struct efx_mcdi_filter_table *table;
struct efx_filter_spec *saved_spec;
- struct efx_rss_context *ctx = NULL;
unsigned int match_pri, hash;
unsigned int priv_flags;
bool rss_locked = false;
@@ -380,12 +380,12 @@ static s32 efx_mcdi_filter_insert_locked(struct efx_nic *efx,
bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
- mutex_lock(&efx->rss_lock);
+ mutex_lock(&efx->net_dev->ethtool->rss_lock);
rss_locked = true;
if (spec->rss_context)
ctx = efx_find_rss_context_entry(efx, spec->rss_context);
else
- ctx = &efx->rss_context;
+ ctx = &efx->rss_context.priv;
if (!ctx) {
rc = -ENOENT;
goto out_unlock;
@@ -548,7 +548,7 @@ static s32 efx_mcdi_filter_insert_locked(struct efx_nic *efx,
out_unlock:
if (rss_locked)
- mutex_unlock(&efx->rss_lock);
+ mutex_unlock(&efx->net_dev->ethtool->rss_lock);
up_write(&table->lock);
return rc;
}
@@ -611,13 +611,13 @@ static int efx_mcdi_filter_remove_internal(struct efx_nic *efx,
new_spec.priority = EFX_FILTER_PRI_AUTO;
new_spec.flags = (EFX_FILTER_FLAG_RX |
- (efx_rss_active(&efx->rss_context) ?
+ (efx_rss_active(&efx->rss_context.priv) ?
EFX_FILTER_FLAG_RX_RSS : 0));
new_spec.dmaq_id = 0;
new_spec.rss_context = 0;
rc = efx_mcdi_filter_push(efx, &new_spec,
&table->entry[filter_idx].handle,
- &efx->rss_context,
+ &efx->rss_context.priv,
true);
if (rc == 0)
@@ -764,7 +764,7 @@ static int efx_mcdi_filter_insert_addr_list(struct efx_nic *efx,
ids = vlan->uc;
}
- filter_flags = efx_rss_active(&efx->rss_context) ? EFX_FILTER_FLAG_RX_RSS : 0;
+ filter_flags = efx_rss_active(&efx->rss_context.priv) ? EFX_FILTER_FLAG_RX_RSS : 0;
/* Insert/renew filters */
for (i = 0; i < addr_count; i++) {
@@ -833,7 +833,7 @@ static int efx_mcdi_filter_insert_def(struct efx_nic *efx,
int rc;
u16 *id;
- filter_flags = efx_rss_active(&efx->rss_context) ? EFX_FILTER_FLAG_RX_RSS : 0;
+ filter_flags = efx_rss_active(&efx->rss_context.priv) ? EFX_FILTER_FLAG_RX_RSS : 0;
efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
@@ -1375,8 +1375,8 @@ void efx_mcdi_filter_table_restore(struct efx_nic *efx)
struct efx_mcdi_filter_table *table = efx->filter_state;
unsigned int invalid_filters = 0, failed = 0;
struct efx_mcdi_filter_vlan *vlan;
+ struct efx_rss_context_priv *ctx;
struct efx_filter_spec *spec;
- struct efx_rss_context *ctx;
unsigned int filter_idx;
u32 mcdi_flags;
int match_pri;
@@ -1388,7 +1388,7 @@ void efx_mcdi_filter_table_restore(struct efx_nic *efx)
return;
down_write(&table->lock);
- mutex_lock(&efx->rss_lock);
+ mutex_lock(&efx->net_dev->ethtool->rss_lock);
for (filter_idx = 0; filter_idx < EFX_MCDI_FILTER_TBL_ROWS; filter_idx++) {
spec = efx_mcdi_filter_entry_spec(table, filter_idx);
@@ -1407,7 +1407,7 @@ void efx_mcdi_filter_table_restore(struct efx_nic *efx)
if (spec->rss_context)
ctx = efx_find_rss_context_entry(efx, spec->rss_context);
else
- ctx = &efx->rss_context;
+ ctx = &efx->rss_context.priv;
if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
if (!ctx) {
netif_warn(efx, drv, efx->net_dev,
@@ -1444,7 +1444,7 @@ not_restored:
}
}
- mutex_unlock(&efx->rss_lock);
+ mutex_unlock(&efx->net_dev->ethtool->rss_lock);
up_write(&table->lock);
/*
@@ -1861,7 +1861,8 @@ out_unlock:
RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN |\
RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV6_RSS_MODE_LBN)
-int efx_mcdi_get_rss_context_flags(struct efx_nic *efx, u32 context, u32 *flags)
+static int efx_mcdi_get_rss_context_flags(struct efx_nic *efx, u32 context,
+ u32 *flags)
{
/*
* Firmware had a bug (sfc bug 61952) where it would not actually
@@ -1909,8 +1910,8 @@ int efx_mcdi_get_rss_context_flags(struct efx_nic *efx, u32 context, u32 *flags)
* Defaults are 4-tuple for TCP and 2-tuple for UDP and other-IP, so we
* just need to set the UDP ports flags (for both IP versions).
*/
-void efx_mcdi_set_rss_context_flags(struct efx_nic *efx,
- struct efx_rss_context *ctx)
+static void efx_mcdi_set_rss_context_flags(struct efx_nic *efx,
+ struct efx_rss_context_priv *ctx)
{
MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN);
u32 flags;
@@ -1931,7 +1932,7 @@ void efx_mcdi_set_rss_context_flags(struct efx_nic *efx,
}
static int efx_mcdi_filter_alloc_rss_context(struct efx_nic *efx, bool exclusive,
- struct efx_rss_context *ctx,
+ struct efx_rss_context_priv *ctx,
unsigned *context_size)
{
MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN);
@@ -2032,25 +2033,26 @@ void efx_mcdi_rx_free_indir_table(struct efx_nic *efx)
{
int rc;
- if (efx->rss_context.context_id != EFX_MCDI_RSS_CONTEXT_INVALID) {
- rc = efx_mcdi_filter_free_rss_context(efx, efx->rss_context.context_id);
+ if (efx->rss_context.priv.context_id != EFX_MCDI_RSS_CONTEXT_INVALID) {
+ rc = efx_mcdi_filter_free_rss_context(efx, efx->rss_context.priv.context_id);
WARN_ON(rc != 0);
}
- efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
+ efx->rss_context.priv.context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
}
static int efx_mcdi_filter_rx_push_shared_rss_config(struct efx_nic *efx,
unsigned *context_size)
{
struct efx_mcdi_filter_table *table = efx->filter_state;
- int rc = efx_mcdi_filter_alloc_rss_context(efx, false, &efx->rss_context,
- context_size);
+ int rc = efx_mcdi_filter_alloc_rss_context(efx, false,
+ &efx->rss_context.priv,
+ context_size);
if (rc != 0)
return rc;
table->rx_rss_context_exclusive = false;
- efx_set_default_rx_indir_table(efx, &efx->rss_context);
+ efx_set_default_rx_indir_table(efx, efx->rss_context.rx_indir_table);
return 0;
}
@@ -2058,26 +2060,27 @@ static int efx_mcdi_filter_rx_push_exclusive_rss_config(struct efx_nic *efx,
const u32 *rx_indir_table,
const u8 *key)
{
+ u32 old_rx_rss_context = efx->rss_context.priv.context_id;
struct efx_mcdi_filter_table *table = efx->filter_state;
- u32 old_rx_rss_context = efx->rss_context.context_id;
int rc;
- if (efx->rss_context.context_id == EFX_MCDI_RSS_CONTEXT_INVALID ||
+ if (efx->rss_context.priv.context_id == EFX_MCDI_RSS_CONTEXT_INVALID ||
!table->rx_rss_context_exclusive) {
- rc = efx_mcdi_filter_alloc_rss_context(efx, true, &efx->rss_context,
- NULL);
+ rc = efx_mcdi_filter_alloc_rss_context(efx, true,
+ &efx->rss_context.priv,
+ NULL);
if (rc == -EOPNOTSUPP)
return rc;
else if (rc != 0)
goto fail1;
}
- rc = efx_mcdi_filter_populate_rss_table(efx, efx->rss_context.context_id,
- rx_indir_table, key);
+ rc = efx_mcdi_filter_populate_rss_table(efx, efx->rss_context.priv.context_id,
+ rx_indir_table, key);
if (rc != 0)
goto fail2;
- if (efx->rss_context.context_id != old_rx_rss_context &&
+ if (efx->rss_context.priv.context_id != old_rx_rss_context &&
old_rx_rss_context != EFX_MCDI_RSS_CONTEXT_INVALID)
WARN_ON(efx_mcdi_filter_free_rss_context(efx, old_rx_rss_context) != 0);
table->rx_rss_context_exclusive = true;
@@ -2091,9 +2094,9 @@ static int efx_mcdi_filter_rx_push_exclusive_rss_config(struct efx_nic *efx,
return 0;
fail2:
- if (old_rx_rss_context != efx->rss_context.context_id) {
- WARN_ON(efx_mcdi_filter_free_rss_context(efx, efx->rss_context.context_id) != 0);
- efx->rss_context.context_id = old_rx_rss_context;
+ if (old_rx_rss_context != efx->rss_context.priv.context_id) {
+ WARN_ON(efx_mcdi_filter_free_rss_context(efx, efx->rss_context.priv.context_id) != 0);
+ efx->rss_context.priv.context_id = old_rx_rss_context;
}
fail1:
netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
@@ -2101,33 +2104,28 @@ fail1:
}
int efx_mcdi_rx_push_rss_context_config(struct efx_nic *efx,
- struct efx_rss_context *ctx,
+ struct efx_rss_context_priv *ctx,
const u32 *rx_indir_table,
- const u8 *key)
+ const u8 *key, bool delete)
{
int rc;
- WARN_ON(!mutex_is_locked(&efx->rss_lock));
+ WARN_ON(!mutex_is_locked(&efx->net_dev->ethtool->rss_lock));
if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) {
+ if (delete)
+ /* already wasn't in HW, nothing to do */
+ return 0;
rc = efx_mcdi_filter_alloc_rss_context(efx, true, ctx, NULL);
if (rc)
return rc;
}
- if (!rx_indir_table) /* Delete this context */
+ if (delete) /* Delete this context */
return efx_mcdi_filter_free_rss_context(efx, ctx->context_id);
- rc = efx_mcdi_filter_populate_rss_table(efx, ctx->context_id,
- rx_indir_table, key);
- if (rc)
- return rc;
-
- memcpy(ctx->rx_indir_table, rx_indir_table,
- sizeof(efx->rss_context.rx_indir_table));
- memcpy(ctx->rx_hash_key, key, efx->type->rx_hash_key_size);
-
- return 0;
+ return efx_mcdi_filter_populate_rss_table(efx, ctx->context_id,
+ rx_indir_table, key);
}
int efx_mcdi_rx_pull_rss_context_config(struct efx_nic *efx,
@@ -2139,16 +2137,16 @@ int efx_mcdi_rx_pull_rss_context_config(struct efx_nic *efx,
size_t outlen;
int rc, i;
- WARN_ON(!mutex_is_locked(&efx->rss_lock));
+ WARN_ON(!mutex_is_locked(&efx->net_dev->ethtool->rss_lock));
BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN !=
MC_CMD_RSS_CONTEXT_GET_KEY_IN_LEN);
- if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID)
+ if (ctx->priv.context_id == EFX_MCDI_RSS_CONTEXT_INVALID)
return -ENOENT;
MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_TABLE_IN_RSS_CONTEXT_ID,
- ctx->context_id);
+ ctx->priv.context_id);
BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_indir_table) !=
MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE_LEN);
rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_TABLE, inbuf, sizeof(inbuf),
@@ -2164,7 +2162,7 @@ int efx_mcdi_rx_pull_rss_context_config(struct efx_nic *efx,
RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE)[i];
MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_KEY_IN_RSS_CONTEXT_ID,
- ctx->context_id);
+ ctx->priv.context_id);
BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_hash_key) !=
MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_KEY, inbuf, sizeof(inbuf),
@@ -2186,35 +2184,42 @@ int efx_mcdi_rx_pull_rss_config(struct efx_nic *efx)
{
int rc;
- mutex_lock(&efx->rss_lock);
+ mutex_lock(&efx->net_dev->ethtool->rss_lock);
rc = efx_mcdi_rx_pull_rss_context_config(efx, &efx->rss_context);
- mutex_unlock(&efx->rss_lock);
+ mutex_unlock(&efx->net_dev->ethtool->rss_lock);
return rc;
}
void efx_mcdi_rx_restore_rss_contexts(struct efx_nic *efx)
{
struct efx_mcdi_filter_table *table = efx->filter_state;
- struct efx_rss_context *ctx;
+ struct ethtool_rxfh_context *ctx;
+ unsigned long context;
int rc;
- WARN_ON(!mutex_is_locked(&efx->rss_lock));
+ WARN_ON(!mutex_is_locked(&efx->net_dev->ethtool->rss_lock));
if (!table->must_restore_rss_contexts)
return;
- list_for_each_entry(ctx, &efx->rss_context.list, list) {
+ xa_for_each(&efx->net_dev->ethtool->rss_ctx, context, ctx) {
+ struct efx_rss_context_priv *priv;
+ u32 *indir;
+ u8 *key;
+
+ priv = ethtool_rxfh_context_priv(ctx);
/* previous NIC RSS context is gone */
- ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
+ priv->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
/* so try to allocate a new one */
- rc = efx_mcdi_rx_push_rss_context_config(efx, ctx,
- ctx->rx_indir_table,
- ctx->rx_hash_key);
+ indir = ethtool_rxfh_context_indir(ctx);
+ key = ethtool_rxfh_context_key(ctx);
+ rc = efx_mcdi_rx_push_rss_context_config(efx, priv, indir, key,
+ false);
if (rc)
netif_warn(efx, probe, efx->net_dev,
- "failed to restore RSS context %u, rc=%d"
+ "failed to restore RSS context %lu, rc=%d"
"; RSS filters may fail to be applied\n",
- ctx->user_id, rc);
+ context, rc);
}
table->must_restore_rss_contexts = false;
}
@@ -2276,7 +2281,7 @@ int efx_mcdi_vf_rx_push_rss_config(struct efx_nic *efx, bool user,
{
if (user)
return -EOPNOTSUPP;
- if (efx->rss_context.context_id != EFX_MCDI_RSS_CONTEXT_INVALID)
+ if (efx->rss_context.priv.context_id != EFX_MCDI_RSS_CONTEXT_INVALID)
return 0;
return efx_mcdi_filter_rx_push_shared_rss_config(efx, NULL);
}
@@ -2295,7 +2300,7 @@ int efx_mcdi_push_default_indir_table(struct efx_nic *efx,
efx_mcdi_rx_free_indir_table(efx);
if (rss_spread > 1) {
- efx_set_default_rx_indir_table(efx, &efx->rss_context);
+ efx_set_default_rx_indir_table(efx, efx->rss_context.rx_indir_table);
rc = efx->type->rx_push_rss_config(efx, false,
efx->rss_context.rx_indir_table, NULL);
}
diff --git a/drivers/net/ethernet/sfc/mcdi_filters.h b/drivers/net/ethernet/sfc/mcdi_filters.h
index c0d6558b9fd2..11b9f87ed9e1 100644
--- a/drivers/net/ethernet/sfc/mcdi_filters.h
+++ b/drivers/net/ethernet/sfc/mcdi_filters.h
@@ -145,9 +145,9 @@ void efx_mcdi_filter_del_vlan(struct efx_nic *efx, u16 vid);
void efx_mcdi_rx_free_indir_table(struct efx_nic *efx);
int efx_mcdi_rx_push_rss_context_config(struct efx_nic *efx,
- struct efx_rss_context *ctx,
+ struct efx_rss_context_priv *ctx,
const u32 *rx_indir_table,
- const u8 *key);
+ const u8 *key, bool delete);
int efx_mcdi_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
const u32 *rx_indir_table,
const u8 *key);
@@ -161,10 +161,6 @@ int efx_mcdi_push_default_indir_table(struct efx_nic *efx,
int efx_mcdi_rx_pull_rss_config(struct efx_nic *efx);
int efx_mcdi_rx_pull_rss_context_config(struct efx_nic *efx,
struct efx_rss_context *ctx);
-int efx_mcdi_get_rss_context_flags(struct efx_nic *efx, u32 context,
- u32 *flags);
-void efx_mcdi_set_rss_context_flags(struct efx_nic *efx,
- struct efx_rss_context *ctx);
void efx_mcdi_rx_restore_rss_contexts(struct efx_nic *efx);
static inline void efx_mcdi_update_rx_scatter(struct efx_nic *efx)
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index f2dd7feb0e0c..b85c51cbe7f9 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -737,21 +737,24 @@ struct vfdi_status;
/* The reserved RSS context value */
#define EFX_MCDI_RSS_CONTEXT_INVALID 0xffffffff
/**
- * struct efx_rss_context - A user-defined RSS context for filtering
- * @list: node of linked list on which this struct is stored
+ * struct efx_rss_context_priv - driver private data for an RSS context
* @context_id: the RSS_CONTEXT_ID returned by MC firmware, or
* %EFX_MCDI_RSS_CONTEXT_INVALID if this context is not present on the NIC.
- * For Siena, 0 if RSS is active, else %EFX_MCDI_RSS_CONTEXT_INVALID.
- * @user_id: the rss_context ID exposed to userspace over ethtool.
* @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled
+ */
+struct efx_rss_context_priv {
+ u32 context_id;
+ bool rx_hash_udp_4tuple;
+};
+
+/**
+ * struct efx_rss_context - an RSS context
+ * @priv: hardware-specific state
* @rx_hash_key: Toeplitz hash key for this RSS context
* @indir_table: Indirection table for this RSS context
*/
struct efx_rss_context {
- struct list_head list;
- u32 context_id;
- u32 user_id;
- bool rx_hash_udp_4tuple;
+ struct efx_rss_context_priv priv;
u8 rx_hash_key[40];
u32 rx_indir_table[128];
};
@@ -883,9 +886,7 @@ struct efx_mae;
* @rx_packet_ts_offset: Offset of timestamp from start of packet data
* (valid only if channel->sync_timestamps_enabled; always negative)
* @rx_scatter: Scatter mode enabled for receives
- * @rss_context: Main RSS context. Its @list member is the head of the list of
- * RSS contexts created by user requests
- * @rss_lock: Protects custom RSS context software state in @rss_context.list
+ * @rss_context: Main RSS context.
* @vport_id: The function's vport ID, only relevant for PFs
* @int_error_count: Number of internal errors seen recently
* @int_error_expire: Time at which error count will be expired
@@ -1052,7 +1053,6 @@ struct efx_nic {
int rx_packet_ts_offset;
bool rx_scatter;
struct efx_rss_context rss_context;
- struct mutex rss_lock;
u32 vport_id;
unsigned int_error_count;
@@ -1416,9 +1416,9 @@ struct efx_nic_type {
const u32 *rx_indir_table, const u8 *key);
int (*rx_pull_rss_config)(struct efx_nic *efx);
int (*rx_push_rss_context_config)(struct efx_nic *efx,
- struct efx_rss_context *ctx,
+ struct efx_rss_context_priv *ctx,
const u32 *rx_indir_table,
- const u8 *key);
+ const u8 *key, bool delete);
int (*rx_pull_rss_context_config)(struct efx_nic *efx,
struct efx_rss_context *ctx);
void (*rx_restore_rss_contexts)(struct efx_nic *efx);
diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c
index dcd901eccfc8..0b7dc75c40f9 100644
--- a/drivers/net/ethernet/sfc/rx_common.c
+++ b/drivers/net/ethernet/sfc/rx_common.c
@@ -557,69 +557,25 @@ efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
napi_gro_frags(napi);
}
-/* RSS contexts. We're using linked lists and crappy O(n) algorithms, because
- * (a) this is an infrequent control-plane operation and (b) n is small (max 64)
- */
-struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx)
+struct efx_rss_context_priv *efx_find_rss_context_entry(struct efx_nic *efx,
+ u32 id)
{
- struct list_head *head = &efx->rss_context.list;
- struct efx_rss_context *ctx, *new;
- u32 id = 1; /* Don't use zero, that refers to the master RSS context */
-
- WARN_ON(!mutex_is_locked(&efx->rss_lock));
+ struct ethtool_rxfh_context *ctx;
- /* Search for first gap in the numbering */
- list_for_each_entry(ctx, head, list) {
- if (ctx->user_id != id)
- break;
- id++;
- /* Check for wrap. If this happens, we have nearly 2^32
- * allocated RSS contexts, which seems unlikely.
- */
- if (WARN_ON_ONCE(!id))
- return NULL;
- }
+ WARN_ON(!mutex_is_locked(&efx->net_dev->ethtool->rss_lock));
- /* Create the new entry */
- new = kmalloc(sizeof(*new), GFP_KERNEL);
- if (!new)
+ ctx = xa_load(&efx->net_dev->ethtool->rss_ctx, id);
+ if (!ctx)
return NULL;
- new->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
- new->rx_hash_udp_4tuple = false;
-
- /* Insert the new entry into the gap */
- new->user_id = id;
- list_add_tail(&new->list, &ctx->list);
- return new;
-}
-
-struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id)
-{
- struct list_head *head = &efx->rss_context.list;
- struct efx_rss_context *ctx;
-
- WARN_ON(!mutex_is_locked(&efx->rss_lock));
-
- list_for_each_entry(ctx, head, list)
- if (ctx->user_id == id)
- return ctx;
- return NULL;
-}
-
-void efx_free_rss_context_entry(struct efx_rss_context *ctx)
-{
- list_del(&ctx->list);
- kfree(ctx);
+ return ethtool_rxfh_context_priv(ctx);
}
-void efx_set_default_rx_indir_table(struct efx_nic *efx,
- struct efx_rss_context *ctx)
+void efx_set_default_rx_indir_table(struct efx_nic *efx, u32 *indir)
{
size_t i;
- for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
- ctx->rx_indir_table[i] =
- ethtool_rxfh_indir_default(i, efx->rss_spread);
+ for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_indir_table); i++)
+ indir[i] = ethtool_rxfh_indir_default(i, efx->rss_spread);
}
/**
diff --git a/drivers/net/ethernet/sfc/rx_common.h b/drivers/net/ethernet/sfc/rx_common.h
index fbd2769307f9..75fa84192362 100644
--- a/drivers/net/ethernet/sfc/rx_common.h
+++ b/drivers/net/ethernet/sfc/rx_common.h
@@ -84,11 +84,9 @@ void
efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
unsigned int n_frags, u8 *eh, __wsum csum);
-struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx);
-struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id);
-void efx_free_rss_context_entry(struct efx_rss_context *ctx);
-void efx_set_default_rx_indir_table(struct efx_nic *efx,
- struct efx_rss_context *ctx);
+struct efx_rss_context_priv *efx_find_rss_context_entry(struct efx_nic *efx,
+ u32 id);
+void efx_set_default_rx_indir_table(struct efx_nic *efx, u32 *indir);
bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec);
bool efx_filter_spec_equal(const struct efx_filter_spec *left,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 094d34c4193c..2dbfbca606af 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -595,7 +595,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
/* Intel mgbe SGMII interface uses pcs-xcps */
if (plat->phy_interface == PHY_INTERFACE_MODE_SGMII ||
plat->phy_interface == PHY_INTERFACE_MODE_1000BASEX) {
- plat->mdio_bus_data->has_xpcs = true;
+ plat->mdio_bus_data->pcs_mask = BIT(INTEL_MGBE_XPCS_ADDR);
plat->mdio_bus_data->default_an_inband = true;
plat->select_pcs = intel_mgbe_select_pcs;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index 80eb72bc6311..901a3c1959fa 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -21,6 +21,7 @@
#define RGMII_IO_MACRO_CONFIG2 0x1C
#define RGMII_IO_MACRO_DEBUG1 0x20
#define EMAC_SYSTEM_LOW_POWER_DEBUG 0x28
+#define EMAC_WRAPPER_SGMII_PHY_CNTRL1 0xf4
/* RGMII_IO_MACRO_CONFIG fields */
#define RGMII_CONFIG_FUNC_CLK_EN BIT(30)
@@ -79,6 +80,9 @@
#define ETHQOS_MAC_CTRL_SPEED_MODE BIT(14)
#define ETHQOS_MAC_CTRL_PORT_SEL BIT(15)
+/* EMAC_WRAPPER_SGMII_PHY_CNTRL1 bits */
+#define SGMII_PHY_CNTRL1_SGMII_TX_TO_RX_LOOPBACK_EN BIT(3)
+
#define SGMII_10M_RX_CLK_DVDR 0x31
struct ethqos_emac_por {
@@ -95,6 +99,7 @@ struct ethqos_emac_driver_data {
bool has_integrated_pcs;
u32 dma_addr_width;
struct dwmac4_addrs dwmac4_addrs;
+ bool needs_sgmii_loopback;
};
struct qcom_ethqos {
@@ -114,6 +119,7 @@ struct qcom_ethqos {
unsigned int num_por;
bool rgmii_config_loopback_en;
bool has_emac_ge_3;
+ bool needs_sgmii_loopback;
};
static int rgmii_readl(struct qcom_ethqos *ethqos, unsigned int offset)
@@ -191,8 +197,22 @@ ethqos_update_link_clk(struct qcom_ethqos *ethqos, unsigned int speed)
clk_set_rate(ethqos->link_clk, ethqos->link_clk_rate);
}
+static void
+qcom_ethqos_set_sgmii_loopback(struct qcom_ethqos *ethqos, bool enable)
+{
+ if (!ethqos->needs_sgmii_loopback ||
+ ethqos->phy_mode != PHY_INTERFACE_MODE_2500BASEX)
+ return;
+
+ rgmii_updatel(ethqos,
+ SGMII_PHY_CNTRL1_SGMII_TX_TO_RX_LOOPBACK_EN,
+ enable ? SGMII_PHY_CNTRL1_SGMII_TX_TO_RX_LOOPBACK_EN : 0,
+ EMAC_WRAPPER_SGMII_PHY_CNTRL1);
+}
+
static void ethqos_set_func_clk_en(struct qcom_ethqos *ethqos)
{
+ qcom_ethqos_set_sgmii_loopback(ethqos, true);
rgmii_updatel(ethqos, RGMII_CONFIG_FUNC_CLK_EN,
RGMII_CONFIG_FUNC_CLK_EN, RGMII_IO_MACRO_CONFIG);
}
@@ -272,11 +292,12 @@ static const struct ethqos_emac_por emac_v4_0_0_por[] = {
static const struct ethqos_emac_driver_data emac_v4_0_0_data = {
.por = emac_v4_0_0_por,
- .num_por = ARRAY_SIZE(emac_v3_0_0_por),
+ .num_por = ARRAY_SIZE(emac_v4_0_0_por),
.rgmii_config_loopback_en = false,
.has_emac_ge_3 = true,
.link_clk_name = "phyaux",
.has_integrated_pcs = true,
+ .needs_sgmii_loopback = true,
.dma_addr_width = 36,
.dwmac4_addrs = {
.dma_chan = 0x00008100,
@@ -665,6 +686,14 @@ static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos)
return val;
}
+static void qcom_ethqos_speed_mode_2500(struct net_device *ndev, void *data)
+{
+ struct stmmac_priv *priv = netdev_priv(ndev);
+
+ priv->plat->max_speed = 2500;
+ priv->plat->phy_interface = PHY_INTERFACE_MODE_2500BASEX;
+}
+
static int ethqos_configure(struct qcom_ethqos *ethqos)
{
return ethqos->configure_func(ethqos);
@@ -674,6 +703,7 @@ static void ethqos_fix_mac_speed(void *priv, unsigned int speed, unsigned int mo
{
struct qcom_ethqos *ethqos = priv;
+ qcom_ethqos_set_sgmii_loopback(ethqos, false);
ethqos->speed = speed;
ethqos_update_link_clk(ethqos, speed);
ethqos_configure(ethqos);
@@ -787,6 +817,9 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
case PHY_INTERFACE_MODE_RGMII_TXID:
ethqos->configure_func = ethqos_configure_rgmii;
break;
+ case PHY_INTERFACE_MODE_2500BASEX:
+ plat_dat->speed_mode_2500 = qcom_ethqos_speed_mode_2500;
+ fallthrough;
case PHY_INTERFACE_MODE_SGMII:
ethqos->configure_func = ethqos_configure_sgmii;
break;
@@ -809,6 +842,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
ethqos->num_por = data->num_por;
ethqos->rgmii_config_loopback_en = data->rgmii_config_loopback_en;
ethqos->has_emac_ge_3 = data->has_emac_ge_3;
+ ethqos->needs_sgmii_loopback = data->needs_sgmii_loopback;
ethqos->link_clk = devm_clk_get(dev, data->link_clk_name ?: "rgmii");
if (IS_ERR(ethqos->link_clk))
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
index 23cf0a5b047f..c1732955a697 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
@@ -206,6 +206,9 @@ static int stm32mp1_validate_ethck_rate(struct plat_stmmacenet_data *plat_dat)
struct stm32_dwmac *dwmac = plat_dat->bsp_priv;
const u32 clk_rate = clk_get_rate(dwmac->clk_eth_ck);
+ if (!dwmac->enable_eth_ck)
+ return 0;
+
switch (plat_dat->mac_interface) {
case PHY_INTERFACE_MODE_MII:
case PHY_INTERFACE_MODE_GMII:
@@ -432,10 +435,12 @@ static int stm32_dwmac_parse_data(struct stm32_dwmac *dwmac,
dwmac->mode_mask = SYSCFG_MP1_ETH_MASK;
err = of_property_read_u32_index(np, "st,syscon", 2, &dwmac->mode_mask);
if (err) {
- if (dwmac->ops->is_mp13)
+ if (dwmac->ops->is_mp13) {
dev_err(dev, "Sysconfig register mask must be set (%d)\n", err);
- else
+ } else {
dev_dbg(dev, "Warning sysconfig register mask not set\n");
+ err = 0;
+ }
}
return err;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 83b654b7a9fd..4b6a359e5a94 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -7663,9 +7663,10 @@ int stmmac_dvr_probe(struct device *device,
#ifdef STMMAC_VLAN_TAG_USED
/* Both mac100 and gmac support receive VLAN tag detection */
ndev->features |= NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX;
- ndev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
- priv->hw->hw_vlan_en = true;
-
+ if (priv->plat->has_gmac4) {
+ ndev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
+ priv->hw->hw_vlan_en = true;
+ }
if (priv->dma_cap.vlhash) {
ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
ndev->features |= NETIF_F_HW_VLAN_STAG_FILTER;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index aa43117134d3..03f90676b3ad 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -497,35 +497,33 @@ int stmmac_mdio_reset(struct mii_bus *bus)
int stmmac_pcs_setup(struct net_device *ndev)
{
+ struct fwnode_handle *devnode, *pcsnode;
struct dw_xpcs *xpcs = NULL;
struct stmmac_priv *priv;
- int ret = -ENODEV;
- int mode, addr;
+ int addr, mode, ret;
priv = netdev_priv(ndev);
mode = priv->plat->phy_interface;
+ devnode = priv->plat->port_node;
if (priv->plat->pcs_init) {
ret = priv->plat->pcs_init(priv);
+ } else if (fwnode_property_present(devnode, "pcs-handle")) {
+ pcsnode = fwnode_find_reference(devnode, "pcs-handle", 0);
+ xpcs = xpcs_create_fwnode(pcsnode, mode);
+ fwnode_handle_put(pcsnode);
+ ret = PTR_ERR_OR_ZERO(xpcs);
} else if (priv->plat->mdio_bus_data &&
- priv->plat->mdio_bus_data->has_xpcs) {
- /* Try to probe the XPCS by scanning all addresses */
- for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
- xpcs = xpcs_create_mdiodev(priv->mii, addr, mode);
- if (IS_ERR(xpcs))
- continue;
-
- ret = 0;
- break;
- }
+ priv->plat->mdio_bus_data->pcs_mask) {
+ addr = ffs(priv->plat->mdio_bus_data->pcs_mask) - 1;
+ xpcs = xpcs_create_mdiodev(priv->mii, addr, mode);
+ ret = PTR_ERR_OR_ZERO(xpcs);
} else {
return 0;
}
- if (ret) {
- dev_warn(priv->device, "No xPCS found\n");
- return ret;
- }
+ if (ret)
+ return dev_err_probe(priv->device, ret, "No xPCS found\n");
priv->hw->xpcs = xpcs;
@@ -610,7 +608,7 @@ int stmmac_mdio_register(struct net_device *ndev)
snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s-%x",
new_bus->name, priv->plat->bus_id);
new_bus->priv = ndev;
- new_bus->phy_mask = mdio_bus_data->phy_mask;
+ new_bus->phy_mask = mdio_bus_data->phy_mask | mdio_bus_data->pcs_mask;
new_bus->parent = priv->device;
err = of_mdiobus_register(new_bus, mdio_node);
diff --git a/drivers/net/ethernet/tehuti/tn40.c b/drivers/net/ethernet/tehuti/tn40.c
index 11db9fde11fe..259bdac24cf2 100644
--- a/drivers/net/ethernet/tehuti/tn40.c
+++ b/drivers/net/ethernet/tehuti/tn40.c
@@ -10,6 +10,7 @@
#include <linux/pci.h>
#include <linux/phylink.h>
#include <linux/vmalloc.h>
+#include <net/netdev_queues.h>
#include <net/page_pool/helpers.h>
#include "tn40.h"
@@ -378,6 +379,7 @@ static int tn40_rx_receive(struct tn40_priv *priv, int budget)
if (!skb) {
u64_stats_update_begin(&priv->syncp);
priv->stats.rx_dropped++;
+ priv->alloc_fail++;
u64_stats_update_end(&priv->syncp);
tn40_recycle_rx_buffer(priv, rxdd);
break;
@@ -1571,6 +1573,66 @@ static const struct net_device_ops tn40_netdev_ops = {
.ndo_vlan_rx_kill_vid = tn40_vlan_rx_kill_vid,
};
+static int tn40_ethtool_get_link_ksettings(struct net_device *ndev,
+ struct ethtool_link_ksettings *cmd)
+{
+ struct tn40_priv *priv = netdev_priv(ndev);
+
+ return phylink_ethtool_ksettings_get(priv->phylink, cmd);
+}
+
+static const struct ethtool_ops tn40_ethtool_ops = {
+ .get_link = ethtool_op_get_link,
+ .get_link_ksettings = tn40_ethtool_get_link_ksettings,
+};
+
+static void tn40_get_queue_stats_rx(struct net_device *ndev, int idx,
+ struct netdev_queue_stats_rx *stats)
+{
+ struct tn40_priv *priv = netdev_priv(ndev);
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin(&priv->syncp);
+
+ stats->packets = priv->stats.rx_packets;
+ stats->bytes = priv->stats.rx_bytes;
+ stats->alloc_fail = priv->alloc_fail;
+ } while (u64_stats_fetch_retry(&priv->syncp, start));
+}
+
+static void tn40_get_queue_stats_tx(struct net_device *ndev, int idx,
+ struct netdev_queue_stats_tx *stats)
+{
+ struct tn40_priv *priv = netdev_priv(ndev);
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin(&priv->syncp);
+
+ stats->packets = priv->stats.tx_packets;
+ stats->bytes = priv->stats.tx_bytes;
+ } while (u64_stats_fetch_retry(&priv->syncp, start));
+}
+
+static void tn40_get_base_stats(struct net_device *ndev,
+ struct netdev_queue_stats_rx *rx,
+ struct netdev_queue_stats_tx *tx)
+{
+ rx->packets = 0;
+ rx->bytes = 0;
+ rx->alloc_fail = 0;
+
+ tx->packets = 0;
+ tx->bytes = 0;
+}
+
+static const struct netdev_stat_ops tn40_stat_ops = {
+ .get_queue_stats_rx = tn40_get_queue_stats_rx,
+ .get_queue_stats_tx = tn40_get_queue_stats_tx,
+ .get_base_stats = tn40_get_base_stats,
+};
+
static int tn40_priv_init(struct tn40_priv *priv)
{
int ret;
@@ -1599,6 +1661,8 @@ static struct net_device *tn40_netdev_alloc(struct pci_dev *pdev)
if (!ndev)
return NULL;
ndev->netdev_ops = &tn40_netdev_ops;
+ ndev->ethtool_ops = &tn40_ethtool_ops;
+ ndev->stat_ops = &tn40_stat_ops;
ndev->tx_queue_len = TN40_NDEV_TXQ_LEN;
ndev->mem_start = pci_resource_start(pdev, 0);
ndev->mem_end = pci_resource_end(pdev, 0);
diff --git a/drivers/net/ethernet/tehuti/tn40.h b/drivers/net/ethernet/tehuti/tn40.h
index 10368264f7b7..490781fe5120 100644
--- a/drivers/net/ethernet/tehuti/tn40.h
+++ b/drivers/net/ethernet/tehuti/tn40.h
@@ -123,6 +123,7 @@ struct tn40_priv {
int stats_flag;
struct rtnl_link_stats64 stats;
+ u64 alloc_fail;
struct u64_stats_sync syncp;
u8 txd_size;
diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index f160a3b71499..0d5a862cd78a 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -188,6 +188,7 @@ config TI_ICSSG_PRUETH
select TI_ICSS_IEP
select TI_K3_CPPI_DESC_POOL
depends on PRU_REMOTEPROC
+ depends on NET_SWITCHDEV
depends on ARCH_K3 && OF && TI_K3_UDMA_GLUE_LAYER
depends on PTP_1588_CLOCK_OPTIONAL
help
diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.c b/drivers/net/ethernet/ti/icssg/icss_iep.c
index 003668dee738..75c294ce6fb6 100644
--- a/drivers/net/ethernet/ti/icssg/icss_iep.c
+++ b/drivers/net/ethernet/ti/icssg/icss_iep.c
@@ -95,7 +95,7 @@ enum {
* @flags: Flags to represent IEP properties
*/
struct icss_iep_plat_data {
- struct regmap_config *config;
+ const struct regmap_config *config;
u32 reg_offs[ICSS_IEP_MAX_REGS];
u32 flags;
};
@@ -952,7 +952,7 @@ static int icss_iep_regmap_read(void *context, unsigned int reg,
return 0;
}
-static struct regmap_config am654_icss_iep_regmap_config = {
+static const struct regmap_config am654_icss_iep_regmap_config = {
.name = "icss iep",
.reg_stride = 1,
.reg_write = icss_iep_regmap_write,
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index 44cd7a5866c1..1bf9c38e4125 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -1978,6 +1978,7 @@ int wx_sw_init(struct wx *wx)
bitmap_zero(wx->state, WX_STATE_NBITS);
bitmap_zero(wx->flags, WX_PF_FLAGS_NBITS);
+ wx->misc_irq_domain = false;
return 0;
}
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
index ac0e1d42fe55..1eecba984f3b 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -1711,6 +1711,7 @@ static int wx_set_interrupt_capability(struct wx *wx)
}
pdev->irq = pci_irq_vector(pdev, 0);
+ wx->num_q_vectors = 1;
return 0;
}
@@ -2021,7 +2022,8 @@ void wx_free_irq(struct wx *wx)
int vector;
if (!(pdev->msix_enabled)) {
- free_irq(pdev->irq, wx);
+ if (!wx->misc_irq_domain)
+ free_irq(pdev->irq, wx);
return;
}
@@ -2036,7 +2038,7 @@ void wx_free_irq(struct wx *wx)
free_irq(entry->vector, q_vector);
}
- if (wx->mac.type == wx_mac_em)
+ if (!wx->misc_irq_domain)
free_irq(wx->msix_entry->vector, wx);
}
EXPORT_SYMBOL(wx_free_irq);
@@ -2051,6 +2053,9 @@ int wx_setup_isb_resources(struct wx *wx)
{
struct pci_dev *pdev = wx->pdev;
+ if (wx->isb_mem)
+ return 0;
+
wx->isb_mem = dma_alloc_coherent(&pdev->dev,
sizeof(u32) * 4,
&wx->isb_dma,
@@ -2410,7 +2415,6 @@ static void wx_free_all_tx_resources(struct wx *wx)
void wx_free_resources(struct wx *wx)
{
- wx_free_isb_resources(wx);
wx_free_all_rx_resources(wx);
wx_free_all_tx_resources(wx);
}
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index e0b7866f96ec..1d57b047817b 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -1109,6 +1109,7 @@ struct wx {
dma_addr_t isb_dma;
u32 *isb_mem;
u32 isb_tag[WX_ISB_MAX];
+ bool misc_irq_domain;
#define WX_MAX_RETA_ENTRIES 128
#define WX_RSS_INDIR_TBL_MAX 64
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c
index 46a5a3e95202..e868f7ef4920 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c
@@ -37,9 +37,9 @@ static int ngbe_set_wol(struct net_device *netdev,
wx->wol = 0;
if (wol->wolopts & WAKE_MAGIC)
wx->wol = WX_PSR_WKUP_CTL_MAG;
- netdev->wol_enabled = !!(wx->wol);
+ netdev->ethtool->wol_enabled = !!(wx->wol);
wr32(wx, WX_PSR_WKUP_CTL, wx->wol);
- device_set_wakeup_enable(&pdev->dev, netdev->wol_enabled);
+ device_set_wakeup_enable(&pdev->dev, netdev->ethtool->wol_enabled);
return 0;
}
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
index e894e01d030d..53aeae2f884b 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
@@ -387,6 +387,7 @@ err_dis_phy:
err_free_irq:
wx_free_irq(wx);
err_free_resources:
+ wx_free_isb_resources(wx);
wx_free_resources(wx);
return err;
}
@@ -408,6 +409,7 @@ static int ngbe_close(struct net_device *netdev)
ngbe_down(wx);
wx_free_irq(wx);
+ wx_free_isb_resources(wx);
wx_free_resources(wx);
phylink_disconnect_phy(wx->phylink);
wx_control_hw(wx, false);
@@ -650,7 +652,7 @@ static int ngbe_probe(struct pci_dev *pdev,
if (wx->wol_hw_supported)
wx->wol = NGBE_PSR_WKUP_CTL_MAG;
- netdev->wol_enabled = !!(wx->wol);
+ netdev->ethtool->wol_enabled = !!(wx->wol);
wr32(wx, NGBE_PSR_WKUP_CTL, wx->wol);
device_set_wakeup_enable(&pdev->dev, wx->wol);
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
index b3e3605d1edb..a4cf682dca65 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
@@ -27,57 +27,19 @@ void txgbe_irq_enable(struct wx *wx, bool queues)
}
/**
- * txgbe_intr - msi/legacy mode Interrupt Handler
- * @irq: interrupt number
- * @data: pointer to a network interface device structure
- **/
-static irqreturn_t txgbe_intr(int __always_unused irq, void *data)
-{
- struct wx_q_vector *q_vector;
- struct wx *wx = data;
- struct pci_dev *pdev;
- u32 eicr;
-
- q_vector = wx->q_vector[0];
- pdev = wx->pdev;
-
- eicr = wx_misc_isb(wx, WX_ISB_VEC0);
- if (!eicr) {
- /* shared interrupt alert!
- * the interrupt that we masked before the ICR read.
- */
- if (netif_running(wx->netdev))
- txgbe_irq_enable(wx, true);
- return IRQ_NONE; /* Not our interrupt */
- }
- wx->isb_mem[WX_ISB_VEC0] = 0;
- if (!(pdev->msi_enabled))
- wr32(wx, WX_PX_INTA, 1);
-
- wx->isb_mem[WX_ISB_MISC] = 0;
- /* would disable interrupts here but it is auto disabled */
- napi_schedule_irqoff(&q_vector->napi);
-
- /* re-enable link(maybe) and non-queue interrupts, no flush.
- * txgbe_poll will re-enable the queue interrupts
- */
- if (netif_running(wx->netdev))
- txgbe_irq_enable(wx, false);
-
- return IRQ_HANDLED;
-}
-
-/**
- * txgbe_request_msix_irqs - Initialize MSI-X interrupts
+ * txgbe_request_queue_irqs - Initialize MSI-X queue interrupts
* @wx: board private structure
*
- * Allocate MSI-X vectors and request interrupts from the kernel.
+ * Allocate MSI-X queue vectors and request interrupts from the kernel.
**/
-static int txgbe_request_msix_irqs(struct wx *wx)
+int txgbe_request_queue_irqs(struct wx *wx)
{
struct net_device *netdev = wx->netdev;
int vector, err;
+ if (!wx->pdev->msix_enabled)
+ return 0;
+
for (vector = 0; vector < wx->num_q_vectors; vector++) {
struct wx_q_vector *q_vector = wx->q_vector[vector];
struct msix_entry *entry = &wx->msix_q_entries[vector];
@@ -110,34 +72,6 @@ free_queue_irqs:
return err;
}
-/**
- * txgbe_request_irq - initialize interrupts
- * @wx: board private structure
- *
- * Attempt to configure interrupts using the best available
- * capabilities of the hardware and kernel.
- **/
-int txgbe_request_irq(struct wx *wx)
-{
- struct net_device *netdev = wx->netdev;
- struct pci_dev *pdev = wx->pdev;
- int err;
-
- if (pdev->msix_enabled)
- err = txgbe_request_msix_irqs(wx);
- else if (pdev->msi_enabled)
- err = request_irq(wx->pdev->irq, &txgbe_intr, 0,
- netdev->name, wx);
- else
- err = request_irq(wx->pdev->irq, &txgbe_intr, IRQF_SHARED,
- netdev->name, wx);
-
- if (err)
- wx_err(wx, "request_irq failed, Error %d\n", err);
-
- return err;
-}
-
static int txgbe_request_gpio_irq(struct txgbe *txgbe)
{
txgbe->gpio_irq = irq_find_mapping(txgbe->misc.domain, TXGBE_IRQ_GPIO);
@@ -178,6 +112,36 @@ static const struct irq_domain_ops txgbe_misc_irq_domain_ops = {
static irqreturn_t txgbe_misc_irq_handle(int irq, void *data)
{
+ struct wx_q_vector *q_vector;
+ struct txgbe *txgbe = data;
+ struct wx *wx = txgbe->wx;
+ u32 eicr;
+
+ if (wx->pdev->msix_enabled)
+ return IRQ_WAKE_THREAD;
+
+ eicr = wx_misc_isb(wx, WX_ISB_VEC0);
+ if (!eicr) {
+ /* shared interrupt alert!
+ * the interrupt that we masked before the ICR read.
+ */
+ if (netif_running(wx->netdev))
+ txgbe_irq_enable(wx, true);
+ return IRQ_NONE; /* Not our interrupt */
+ }
+ wx->isb_mem[WX_ISB_VEC0] = 0;
+ if (!(wx->pdev->msi_enabled))
+ wr32(wx, WX_PX_INTA, 1);
+
+ /* would disable interrupts here but it is auto disabled */
+ q_vector = wx->q_vector[0];
+ napi_schedule_irqoff(&q_vector->napi);
+
+ return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t txgbe_misc_irq_thread_fn(int irq, void *data)
+{
struct txgbe *txgbe = data;
struct wx *wx = txgbe->wx;
unsigned int nhandled = 0;
@@ -223,6 +187,7 @@ void txgbe_free_misc_irq(struct txgbe *txgbe)
int txgbe_setup_misc_irq(struct txgbe *txgbe)
{
+ unsigned long flags = IRQF_ONESHOT;
struct wx *wx = txgbe->wx;
int hwirq, err;
@@ -236,14 +201,17 @@ int txgbe_setup_misc_irq(struct txgbe *txgbe)
irq_create_mapping(txgbe->misc.domain, hwirq);
txgbe->misc.chip = txgbe_irq_chip;
- if (wx->pdev->msix_enabled)
+ if (wx->pdev->msix_enabled) {
txgbe->misc.irq = wx->msix_entry->vector;
- else
+ } else {
txgbe->misc.irq = wx->pdev->irq;
+ if (!wx->pdev->msi_enabled)
+ flags |= IRQF_SHARED;
+ }
- err = request_threaded_irq(txgbe->misc.irq, NULL,
- txgbe_misc_irq_handle,
- IRQF_ONESHOT,
+ err = request_threaded_irq(txgbe->misc.irq, txgbe_misc_irq_handle,
+ txgbe_misc_irq_thread_fn,
+ flags,
wx->netdev->name, txgbe);
if (err)
goto del_misc_irq;
@@ -256,6 +224,8 @@ int txgbe_setup_misc_irq(struct txgbe *txgbe)
if (err)
goto free_gpio_irq;
+ wx->misc_irq_domain = true;
+
return 0;
free_gpio_irq:
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h
index b77945e7a0f2..e6285b94625e 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h
@@ -2,6 +2,6 @@
/* Copyright (c) 2015 - 2024 Beijing WangXun Technology Co., Ltd. */
void txgbe_irq_enable(struct wx *wx, bool queues);
-int txgbe_request_irq(struct wx *wx);
+int txgbe_request_queue_irqs(struct wx *wx);
void txgbe_free_misc_irq(struct txgbe *txgbe);
int txgbe_setup_misc_irq(struct txgbe *txgbe);
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
index 41e9ebf11e41..93180225a6f1 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
@@ -309,9 +309,9 @@ static int txgbe_open(struct net_device *netdev)
wx_configure(wx);
- err = txgbe_request_irq(wx);
+ err = txgbe_request_queue_irqs(wx);
if (err)
- goto err_free_isb;
+ goto err_free_resources;
/* Notify the stack of the actual queue counts. */
err = netif_set_real_num_tx_queues(netdev, wx->num_tx_queues);
@@ -328,8 +328,8 @@ static int txgbe_open(struct net_device *netdev)
err_free_irq:
wx_free_irq(wx);
-err_free_isb:
- wx_free_isb_resources(wx);
+err_free_resources:
+ wx_free_resources(wx);
err_reset:
txgbe_reset(wx);
@@ -747,6 +747,7 @@ static void txgbe_remove(struct pci_dev *pdev)
txgbe_remove_phy(txgbe);
txgbe_free_misc_irq(txgbe);
+ wx_free_isb_resources(wx);
pci_release_selected_regions(pdev,
pci_select_bars(pdev, IORESOURCE_MEM));
diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
index ffe7f463e16e..ef6df0e37bea 100644
--- a/drivers/net/ntb_netdev.c
+++ b/drivers/net/ntb_netdev.c
@@ -119,7 +119,7 @@ static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
skb->protocol = eth_type_trans(skb, ndev);
skb->ip_summed = CHECKSUM_NONE;
- if (__netif_rx(skb) == NET_RX_DROP) {
+ if (netif_rx(skb) == NET_RX_DROP) {
ndev->stats.rx_errors++;
ndev->stats.rx_dropped++;
} else {
diff --git a/drivers/net/pcs/Kconfig b/drivers/net/pcs/Kconfig
index 87cf308fc6d8..f6aa437473de 100644
--- a/drivers/net/pcs/Kconfig
+++ b/drivers/net/pcs/Kconfig
@@ -6,11 +6,11 @@
menu "PCS device drivers"
config PCS_XPCS
- tristate
+ tristate "Synopsys DesignWare Ethernet XPCS"
select PHYLINK
help
- This module provides helper functions for Synopsys DesignWare XPCS
- controllers.
+ This module provides a driver and helper functions for Synopsys
+ DesignWare XPCS controllers.
config PCS_LYNX
tristate
diff --git a/drivers/net/pcs/Makefile b/drivers/net/pcs/Makefile
index fb1694192ae6..4f7920618b90 100644
--- a/drivers/net/pcs/Makefile
+++ b/drivers/net/pcs/Makefile
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for Linux PCS drivers
-pcs_xpcs-$(CONFIG_PCS_XPCS) := pcs-xpcs.o pcs-xpcs-nxp.o pcs-xpcs-wx.o
+pcs_xpcs-$(CONFIG_PCS_XPCS) := pcs-xpcs.o pcs-xpcs-plat.o \
+ pcs-xpcs-nxp.o pcs-xpcs-wx.o
obj-$(CONFIG_PCS_XPCS) += pcs_xpcs.o
obj-$(CONFIG_PCS_LYNX) += pcs-lynx.o
diff --git a/drivers/net/pcs/pcs-xpcs-plat.c b/drivers/net/pcs/pcs-xpcs-plat.c
new file mode 100644
index 000000000000..629315f1e57c
--- /dev/null
+++ b/drivers/net/pcs/pcs-xpcs-plat.c
@@ -0,0 +1,460 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Synopsys DesignWare XPCS platform device driver
+ *
+ * Copyright (C) 2024 Serge Semin
+ */
+
+#include <linux/atomic.h>
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/mdio.h>
+#include <linux/module.h>
+#include <linux/pcs/pcs-xpcs.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/property.h>
+#include <linux/sizes.h>
+
+#include "pcs-xpcs.h"
+
+/* Page select register for the indirect MMIO CSRs access */
+#define DW_VR_CSR_VIEWPORT 0xff
+
+struct dw_xpcs_plat {
+ struct platform_device *pdev;
+ struct mii_bus *bus;
+ bool reg_indir;
+ int reg_width;
+ void __iomem *reg_base;
+ struct clk *cclk;
+};
+
+static ptrdiff_t xpcs_mmio_addr_format(int dev, int reg)
+{
+ return FIELD_PREP(0x1f0000, dev) | FIELD_PREP(0xffff, reg);
+}
+
+static u16 xpcs_mmio_addr_page(ptrdiff_t csr)
+{
+ return FIELD_GET(0x1fff00, csr);
+}
+
+static ptrdiff_t xpcs_mmio_addr_offset(ptrdiff_t csr)
+{
+ return FIELD_GET(0xff, csr);
+}
+
+static int xpcs_mmio_read_reg_indirect(struct dw_xpcs_plat *pxpcs,
+ int dev, int reg)
+{
+ ptrdiff_t csr, ofs;
+ u16 page;
+ int ret;
+
+ csr = xpcs_mmio_addr_format(dev, reg);
+ page = xpcs_mmio_addr_page(csr);
+ ofs = xpcs_mmio_addr_offset(csr);
+
+ ret = pm_runtime_resume_and_get(&pxpcs->pdev->dev);
+ if (ret)
+ return ret;
+
+ switch (pxpcs->reg_width) {
+ case 4:
+ writel(page, pxpcs->reg_base + (DW_VR_CSR_VIEWPORT << 2));
+ ret = readl(pxpcs->reg_base + (ofs << 2));
+ break;
+ default:
+ writew(page, pxpcs->reg_base + (DW_VR_CSR_VIEWPORT << 1));
+ ret = readw(pxpcs->reg_base + (ofs << 1));
+ break;
+ }
+
+ pm_runtime_put(&pxpcs->pdev->dev);
+
+ return ret;
+}
+
+static int xpcs_mmio_write_reg_indirect(struct dw_xpcs_plat *pxpcs,
+ int dev, int reg, u16 val)
+{
+ ptrdiff_t csr, ofs;
+ u16 page;
+ int ret;
+
+ csr = xpcs_mmio_addr_format(dev, reg);
+ page = xpcs_mmio_addr_page(csr);
+ ofs = xpcs_mmio_addr_offset(csr);
+
+ ret = pm_runtime_resume_and_get(&pxpcs->pdev->dev);
+ if (ret)
+ return ret;
+
+ switch (pxpcs->reg_width) {
+ case 4:
+ writel(page, pxpcs->reg_base + (DW_VR_CSR_VIEWPORT << 2));
+ writel(val, pxpcs->reg_base + (ofs << 2));
+ break;
+ default:
+ writew(page, pxpcs->reg_base + (DW_VR_CSR_VIEWPORT << 1));
+ writew(val, pxpcs->reg_base + (ofs << 1));
+ break;
+ }
+
+ pm_runtime_put(&pxpcs->pdev->dev);
+
+ return 0;
+}
+
+static int xpcs_mmio_read_reg_direct(struct dw_xpcs_plat *pxpcs,
+ int dev, int reg)
+{
+ ptrdiff_t csr;
+ int ret;
+
+ csr = xpcs_mmio_addr_format(dev, reg);
+
+ ret = pm_runtime_resume_and_get(&pxpcs->pdev->dev);
+ if (ret)
+ return ret;
+
+ switch (pxpcs->reg_width) {
+ case 4:
+ ret = readl(pxpcs->reg_base + (csr << 2));
+ break;
+ default:
+ ret = readw(pxpcs->reg_base + (csr << 1));
+ break;
+ }
+
+ pm_runtime_put(&pxpcs->pdev->dev);
+
+ return ret;
+}
+
+static int xpcs_mmio_write_reg_direct(struct dw_xpcs_plat *pxpcs,
+ int dev, int reg, u16 val)
+{
+ ptrdiff_t csr;
+ int ret;
+
+ csr = xpcs_mmio_addr_format(dev, reg);
+
+ ret = pm_runtime_resume_and_get(&pxpcs->pdev->dev);
+ if (ret)
+ return ret;
+
+ switch (pxpcs->reg_width) {
+ case 4:
+ writel(val, pxpcs->reg_base + (csr << 2));
+ break;
+ default:
+ writew(val, pxpcs->reg_base + (csr << 1));
+ break;
+ }
+
+ pm_runtime_put(&pxpcs->pdev->dev);
+
+ return 0;
+}
+
+static int xpcs_mmio_read_c22(struct mii_bus *bus, int addr, int reg)
+{
+ struct dw_xpcs_plat *pxpcs = bus->priv;
+
+ if (addr != 0)
+ return -ENODEV;
+
+ if (pxpcs->reg_indir)
+ return xpcs_mmio_read_reg_indirect(pxpcs, MDIO_MMD_VEND2, reg);
+ else
+ return xpcs_mmio_read_reg_direct(pxpcs, MDIO_MMD_VEND2, reg);
+}
+
+static int xpcs_mmio_write_c22(struct mii_bus *bus, int addr, int reg, u16 val)
+{
+ struct dw_xpcs_plat *pxpcs = bus->priv;
+
+ if (addr != 0)
+ return -ENODEV;
+
+ if (pxpcs->reg_indir)
+ return xpcs_mmio_write_reg_indirect(pxpcs, MDIO_MMD_VEND2, reg, val);
+ else
+ return xpcs_mmio_write_reg_direct(pxpcs, MDIO_MMD_VEND2, reg, val);
+}
+
+static int xpcs_mmio_read_c45(struct mii_bus *bus, int addr, int dev, int reg)
+{
+ struct dw_xpcs_plat *pxpcs = bus->priv;
+
+ if (addr != 0)
+ return -ENODEV;
+
+ if (pxpcs->reg_indir)
+ return xpcs_mmio_read_reg_indirect(pxpcs, dev, reg);
+ else
+ return xpcs_mmio_read_reg_direct(pxpcs, dev, reg);
+}
+
+static int xpcs_mmio_write_c45(struct mii_bus *bus, int addr, int dev,
+ int reg, u16 val)
+{
+ struct dw_xpcs_plat *pxpcs = bus->priv;
+
+ if (addr != 0)
+ return -ENODEV;
+
+ if (pxpcs->reg_indir)
+ return xpcs_mmio_write_reg_indirect(pxpcs, dev, reg, val);
+ else
+ return xpcs_mmio_write_reg_direct(pxpcs, dev, reg, val);
+}
+
+static struct dw_xpcs_plat *xpcs_plat_create_data(struct platform_device *pdev)
+{
+ struct dw_xpcs_plat *pxpcs;
+
+ pxpcs = devm_kzalloc(&pdev->dev, sizeof(*pxpcs), GFP_KERNEL);
+ if (!pxpcs)
+ return ERR_PTR(-ENOMEM);
+
+ pxpcs->pdev = pdev;
+
+ dev_set_drvdata(&pdev->dev, pxpcs);
+
+ return pxpcs;
+}
+
+static int xpcs_plat_init_res(struct dw_xpcs_plat *pxpcs)
+{
+ struct platform_device *pdev = pxpcs->pdev;
+ struct device *dev = &pdev->dev;
+ resource_size_t spc_size;
+ struct resource *res;
+
+ if (!device_property_read_u32(dev, "reg-io-width", &pxpcs->reg_width)) {
+ if (pxpcs->reg_width != 2 && pxpcs->reg_width != 4) {
+ dev_err(dev, "Invalid reg-space data width\n");
+ return -EINVAL;
+ }
+ } else {
+ pxpcs->reg_width = 2;
+ }
+
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "direct") ?:
+ platform_get_resource_byname(pdev, IORESOURCE_MEM, "indirect");
+ if (!res) {
+ dev_err(dev, "No reg-space found\n");
+ return -EINVAL;
+ }
+
+ if (!strcmp(res->name, "indirect"))
+ pxpcs->reg_indir = true;
+
+ if (pxpcs->reg_indir)
+ spc_size = pxpcs->reg_width * SZ_256;
+ else
+ spc_size = pxpcs->reg_width * SZ_2M;
+
+ if (resource_size(res) < spc_size) {
+ dev_err(dev, "Invalid reg-space size\n");
+ return -EINVAL;
+ }
+
+ pxpcs->reg_base = devm_ioremap_resource(dev, res);
+ if (IS_ERR(pxpcs->reg_base)) {
+ dev_err(dev, "Failed to map reg-space\n");
+ return PTR_ERR(pxpcs->reg_base);
+ }
+
+ return 0;
+}
+
+static int xpcs_plat_init_clk(struct dw_xpcs_plat *pxpcs)
+{
+ struct device *dev = &pxpcs->pdev->dev;
+ int ret;
+
+ pxpcs->cclk = devm_clk_get(dev, "csr");
+ if (IS_ERR(pxpcs->cclk))
+ return dev_err_probe(dev, PTR_ERR(pxpcs->cclk),
+ "Failed to get CSR clock\n");
+
+ pm_runtime_set_active(dev);
+ ret = devm_pm_runtime_enable(dev);
+ if (ret) {
+ dev_err(dev, "Failed to enable runtime-PM\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int xpcs_plat_init_bus(struct dw_xpcs_plat *pxpcs)
+{
+ struct device *dev = &pxpcs->pdev->dev;
+ static atomic_t id = ATOMIC_INIT(-1);
+ int ret;
+
+ pxpcs->bus = devm_mdiobus_alloc_size(dev, 0);
+ if (!pxpcs->bus)
+ return -ENOMEM;
+
+ pxpcs->bus->name = "DW XPCS MCI/APB3";
+ pxpcs->bus->read = xpcs_mmio_read_c22;
+ pxpcs->bus->write = xpcs_mmio_write_c22;
+ pxpcs->bus->read_c45 = xpcs_mmio_read_c45;
+ pxpcs->bus->write_c45 = xpcs_mmio_write_c45;
+ pxpcs->bus->phy_mask = ~0;
+ pxpcs->bus->parent = dev;
+ pxpcs->bus->priv = pxpcs;
+
+ snprintf(pxpcs->bus->id, MII_BUS_ID_SIZE,
+ "dwxpcs-%x", atomic_inc_return(&id));
+
+ /* MDIO-bus here serves as just a back-end engine abstracting out
+ * the MDIO and MCI/APB3 IO interfaces utilized for the DW XPCS CSRs
+ * access.
+ */
+ ret = devm_mdiobus_register(dev, pxpcs->bus);
+ if (ret) {
+ dev_err(dev, "Failed to create MDIO bus\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+/* Note there is no need in the next function antagonist because the MDIO-bus
+ * de-registration will effectively remove and destroy all the MDIO-devices
+ * registered on the bus.
+ */
+static int xpcs_plat_init_dev(struct dw_xpcs_plat *pxpcs)
+{
+ struct device *dev = &pxpcs->pdev->dev;
+ struct mdio_device *mdiodev;
+ int ret;
+
+ /* There is a single memory-mapped DW XPCS device */
+ mdiodev = mdio_device_create(pxpcs->bus, 0);
+ if (IS_ERR(mdiodev))
+ return PTR_ERR(mdiodev);
+
+ /* Associate the FW-node with the device structure so it can be looked
+ * up later. Make sure DD-core is aware of the OF-node being re-used.
+ */
+ device_set_node(&mdiodev->dev, fwnode_handle_get(dev_fwnode(dev)));
+ mdiodev->dev.of_node_reused = true;
+
+ /* Pass the data further so the DW XPCS driver core could use it */
+ mdiodev->dev.platform_data = (void *)device_get_match_data(dev);
+
+ ret = mdio_device_register(mdiodev);
+ if (ret) {
+ dev_err(dev, "Failed to register MDIO device\n");
+ goto err_clean_data;
+ }
+
+ return 0;
+
+err_clean_data:
+ mdiodev->dev.platform_data = NULL;
+
+ fwnode_handle_put(dev_fwnode(&mdiodev->dev));
+ device_set_node(&mdiodev->dev, NULL);
+
+ mdio_device_free(mdiodev);
+
+ return ret;
+}
+
+static int xpcs_plat_probe(struct platform_device *pdev)
+{
+ struct dw_xpcs_plat *pxpcs;
+ int ret;
+
+ pxpcs = xpcs_plat_create_data(pdev);
+ if (IS_ERR(pxpcs))
+ return PTR_ERR(pxpcs);
+
+ ret = xpcs_plat_init_res(pxpcs);
+ if (ret)
+ return ret;
+
+ ret = xpcs_plat_init_clk(pxpcs);
+ if (ret)
+ return ret;
+
+ ret = xpcs_plat_init_bus(pxpcs);
+ if (ret)
+ return ret;
+
+ ret = xpcs_plat_init_dev(pxpcs);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int __maybe_unused xpcs_plat_pm_runtime_suspend(struct device *dev)
+{
+ struct dw_xpcs_plat *pxpcs = dev_get_drvdata(dev);
+
+ clk_disable_unprepare(pxpcs->cclk);
+
+ return 0;
+}
+
+static int __maybe_unused xpcs_plat_pm_runtime_resume(struct device *dev)
+{
+ struct dw_xpcs_plat *pxpcs = dev_get_drvdata(dev);
+
+ return clk_prepare_enable(pxpcs->cclk);
+}
+
+static const struct dev_pm_ops xpcs_plat_pm_ops = {
+ SET_RUNTIME_PM_OPS(xpcs_plat_pm_runtime_suspend,
+ xpcs_plat_pm_runtime_resume,
+ NULL)
+};
+
+DW_XPCS_INFO_DECLARE(xpcs_generic, DW_XPCS_ID_NATIVE, DW_XPCS_PMA_ID_NATIVE);
+DW_XPCS_INFO_DECLARE(xpcs_pma_gen1_3g, DW_XPCS_ID_NATIVE, DW_XPCS_PMA_GEN1_3G_ID);
+DW_XPCS_INFO_DECLARE(xpcs_pma_gen2_3g, DW_XPCS_ID_NATIVE, DW_XPCS_PMA_GEN2_3G_ID);
+DW_XPCS_INFO_DECLARE(xpcs_pma_gen2_6g, DW_XPCS_ID_NATIVE, DW_XPCS_PMA_GEN2_6G_ID);
+DW_XPCS_INFO_DECLARE(xpcs_pma_gen4_3g, DW_XPCS_ID_NATIVE, DW_XPCS_PMA_GEN4_3G_ID);
+DW_XPCS_INFO_DECLARE(xpcs_pma_gen4_6g, DW_XPCS_ID_NATIVE, DW_XPCS_PMA_GEN4_6G_ID);
+DW_XPCS_INFO_DECLARE(xpcs_pma_gen5_10g, DW_XPCS_ID_NATIVE, DW_XPCS_PMA_GEN5_10G_ID);
+DW_XPCS_INFO_DECLARE(xpcs_pma_gen5_12g, DW_XPCS_ID_NATIVE, DW_XPCS_PMA_GEN5_12G_ID);
+
+static const struct of_device_id xpcs_of_ids[] = {
+ { .compatible = "snps,dw-xpcs", .data = &xpcs_generic },
+ { .compatible = "snps,dw-xpcs-gen1-3g", .data = &xpcs_pma_gen1_3g },
+ { .compatible = "snps,dw-xpcs-gen2-3g", .data = &xpcs_pma_gen2_3g },
+ { .compatible = "snps,dw-xpcs-gen2-6g", .data = &xpcs_pma_gen2_6g },
+ { .compatible = "snps,dw-xpcs-gen4-3g", .data = &xpcs_pma_gen4_3g },
+ { .compatible = "snps,dw-xpcs-gen4-6g", .data = &xpcs_pma_gen4_6g },
+ { .compatible = "snps,dw-xpcs-gen5-10g", .data = &xpcs_pma_gen5_10g },
+ { .compatible = "snps,dw-xpcs-gen5-12g", .data = &xpcs_pma_gen5_12g },
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, xpcs_of_ids);
+
+static struct platform_driver xpcs_plat_driver = {
+ .probe = xpcs_plat_probe,
+ .driver = {
+ .name = "dwxpcs",
+ .pm = &xpcs_plat_pm_ops,
+ .of_match_table = xpcs_of_ids,
+ },
+};
+module_platform_driver(xpcs_plat_driver);
+
+MODULE_DESCRIPTION("Synopsys DesignWare XPCS platform device driver");
+MODULE_AUTHOR("Signed-off-by: Serge Semin <[email protected]>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 31525fe9c32e..82463f9d50c8 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -6,10 +6,13 @@
* Author: Jose Abreu <[email protected]>
*/
+#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/pcs/pcs-xpcs.h>
#include <linux/mdio.h>
+#include <linux/phy.h>
#include <linux/phylink.h>
+#include <linux/property.h>
#include "pcs-xpcs.h"
@@ -143,7 +146,7 @@ enum {
DW_XPCS_INTERFACE_MAX,
};
-struct xpcs_compat {
+struct dw_xpcs_compat {
const int *supported;
const phy_interface_t *interface;
int num_interfaces;
@@ -151,19 +154,19 @@ struct xpcs_compat {
int (*pma_config)(struct dw_xpcs *xpcs);
};
-struct xpcs_id {
+struct dw_xpcs_desc {
u32 id;
u32 mask;
- const struct xpcs_compat *compat;
+ const struct dw_xpcs_compat *compat;
};
-static const struct xpcs_compat *xpcs_find_compat(const struct xpcs_id *id,
- phy_interface_t interface)
+static const struct dw_xpcs_compat *
+xpcs_find_compat(const struct dw_xpcs_desc *desc, phy_interface_t interface)
{
int i, j;
for (i = 0; i < DW_XPCS_INTERFACE_MAX; i++) {
- const struct xpcs_compat *compat = &id->compat[i];
+ const struct dw_xpcs_compat *compat = &desc->compat[i];
for (j = 0; j < compat->num_interfaces; j++)
if (compat->interface[j] == interface)
@@ -175,9 +178,9 @@ static const struct xpcs_compat *xpcs_find_compat(const struct xpcs_id *id,
int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface)
{
- const struct xpcs_compat *compat;
+ const struct dw_xpcs_compat *compat;
- compat = xpcs_find_compat(xpcs->id, interface);
+ compat = xpcs_find_compat(xpcs->desc, interface);
if (!compat)
return -ENODEV;
@@ -185,7 +188,7 @@ int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface)
}
EXPORT_SYMBOL_GPL(xpcs_get_an_mode);
-static bool __xpcs_linkmode_supported(const struct xpcs_compat *compat,
+static bool __xpcs_linkmode_supported(const struct dw_xpcs_compat *compat,
enum ethtool_link_mode_bit_indices linkmode)
{
int i;
@@ -237,29 +240,6 @@ int xpcs_write_vpcs(struct dw_xpcs *xpcs, int reg, u16 val)
return xpcs_write_vendor(xpcs, MDIO_MMD_PCS, reg, val);
}
-static int xpcs_dev_flag(struct dw_xpcs *xpcs)
-{
- int ret, oui;
-
- ret = xpcs_read(xpcs, MDIO_MMD_PMAPMD, MDIO_DEVID1);
- if (ret < 0)
- return ret;
-
- oui = ret;
-
- ret = xpcs_read(xpcs, MDIO_MMD_PMAPMD, MDIO_DEVID2);
- if (ret < 0)
- return ret;
-
- ret = (ret >> 10) & 0x3F;
- oui |= ret << 16;
-
- if (oui == DW_OUI_WX)
- xpcs->dev_flag = DW_DEV_TXGBE;
-
- return 0;
-}
-
static int xpcs_poll_reset(struct dw_xpcs *xpcs, int dev)
{
/* Poll until the reset bit clears (50ms per retry == 0.6 sec) */
@@ -277,7 +257,7 @@ static int xpcs_poll_reset(struct dw_xpcs *xpcs, int dev)
}
static int xpcs_soft_reset(struct dw_xpcs *xpcs,
- const struct xpcs_compat *compat)
+ const struct dw_xpcs_compat *compat)
{
int ret, dev;
@@ -418,7 +398,7 @@ out:
}
static int _xpcs_config_aneg_c73(struct dw_xpcs *xpcs,
- const struct xpcs_compat *compat)
+ const struct dw_xpcs_compat *compat)
{
int ret, adv;
@@ -463,7 +443,7 @@ static int _xpcs_config_aneg_c73(struct dw_xpcs *xpcs,
}
static int xpcs_config_aneg_c73(struct dw_xpcs *xpcs,
- const struct xpcs_compat *compat)
+ const struct dw_xpcs_compat *compat)
{
int ret;
@@ -482,7 +462,7 @@ static int xpcs_config_aneg_c73(struct dw_xpcs *xpcs,
static int xpcs_aneg_done_c73(struct dw_xpcs *xpcs,
struct phylink_link_state *state,
- const struct xpcs_compat *compat, u16 an_stat1)
+ const struct dw_xpcs_compat *compat, u16 an_stat1)
{
int ret;
@@ -607,12 +587,12 @@ static int xpcs_validate(struct phylink_pcs *pcs, unsigned long *supported,
const struct phylink_link_state *state)
{
__ETHTOOL_DECLARE_LINK_MODE_MASK(xpcs_supported) = { 0, };
- const struct xpcs_compat *compat;
+ const struct dw_xpcs_compat *compat;
struct dw_xpcs *xpcs;
int i;
xpcs = phylink_pcs_to_xpcs(pcs);
- compat = xpcs_find_compat(xpcs->id, state->interface);
+ compat = xpcs_find_compat(xpcs->desc, state->interface);
if (!compat)
return -EINVAL;
@@ -633,7 +613,7 @@ void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces)
int i, j;
for (i = 0; i < DW_XPCS_INTERFACE_MAX; i++) {
- const struct xpcs_compat *compat = &xpcs->id->compat[i];
+ const struct dw_xpcs_compat *compat = &xpcs->desc->compat[i];
for (j = 0; j < compat->num_interfaces; j++)
__set_bit(compat->interface[j], interfaces);
@@ -684,7 +664,7 @@ static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs,
{
int ret, mdio_ctrl, tx_conf;
- if (xpcs->dev_flag == DW_DEV_TXGBE)
+ if (xpcs->info.pma == WX_TXGBE_XPCS_PMA_10G_ID)
xpcs_write_vpcs(xpcs, DW_VR_XS_PCS_DIG_CTRL1, DW_CL37_BP | DW_EN_VSMMD1);
/* For AN for C37 SGMII mode, the settings are :-
@@ -722,7 +702,7 @@ static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs,
ret |= (DW_VR_MII_PCS_MODE_C37_SGMII <<
DW_VR_MII_AN_CTRL_PCS_MODE_SHIFT &
DW_VR_MII_PCS_MODE_MASK);
- if (xpcs->dev_flag == DW_DEV_TXGBE) {
+ if (xpcs->info.pma == WX_TXGBE_XPCS_PMA_10G_ID) {
ret |= DW_VR_MII_AN_CTRL_8BIT;
/* Hardware requires it to be PHY side SGMII */
tx_conf = DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII;
@@ -744,7 +724,7 @@ static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs,
else
ret &= ~DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW;
- if (xpcs->dev_flag == DW_DEV_TXGBE)
+ if (xpcs->info.pma == WX_TXGBE_XPCS_PMA_10G_ID)
ret |= DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL;
ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_DIG_CTRL1, ret);
@@ -766,7 +746,7 @@ static int xpcs_config_aneg_c37_1000basex(struct dw_xpcs *xpcs,
int ret, mdio_ctrl, adv;
bool changed = 0;
- if (xpcs->dev_flag == DW_DEV_TXGBE)
+ if (xpcs->info.pma == WX_TXGBE_XPCS_PMA_10G_ID)
xpcs_write_vpcs(xpcs, DW_VR_XS_PCS_DIG_CTRL1, DW_CL37_BP | DW_EN_VSMMD1);
/* According to Chap 7.12, to set 1000BASE-X C37 AN, AN must
@@ -850,14 +830,14 @@ static int xpcs_config_2500basex(struct dw_xpcs *xpcs)
int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
const unsigned long *advertising, unsigned int neg_mode)
{
- const struct xpcs_compat *compat;
+ const struct dw_xpcs_compat *compat;
int ret;
- compat = xpcs_find_compat(xpcs->id, interface);
+ compat = xpcs_find_compat(xpcs->desc, interface);
if (!compat)
return -ENODEV;
- if (xpcs->dev_flag == DW_DEV_TXGBE) {
+ if (xpcs->info.pma == WX_TXGBE_XPCS_PMA_10G_ID) {
ret = txgbe_xpcs_switch_mode(xpcs, interface);
if (ret)
return ret;
@@ -915,7 +895,7 @@ static int xpcs_config(struct phylink_pcs *pcs, unsigned int neg_mode,
static int xpcs_get_state_c73(struct dw_xpcs *xpcs,
struct phylink_link_state *state,
- const struct xpcs_compat *compat)
+ const struct dw_xpcs_compat *compat)
{
bool an_enabled;
int pcs_stat1;
@@ -1115,10 +1095,10 @@ static void xpcs_get_state(struct phylink_pcs *pcs,
struct phylink_link_state *state)
{
struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs);
- const struct xpcs_compat *compat;
+ const struct dw_xpcs_compat *compat;
int ret;
- compat = xpcs_find_compat(xpcs->id, state->interface);
+ compat = xpcs_find_compat(xpcs->desc, state->interface);
if (!compat)
return;
@@ -1229,47 +1209,73 @@ static void xpcs_an_restart(struct phylink_pcs *pcs)
}
}
-static u32 xpcs_get_id(struct dw_xpcs *xpcs)
+static int xpcs_get_id(struct dw_xpcs *xpcs)
{
int ret;
u32 id;
- /* First, search C73 PCS using PCS MMD */
+ /* First, search C73 PCS using PCS MMD 3. Return ENODEV if communication
+ * failed indicating that device couldn't be reached.
+ */
ret = xpcs_read(xpcs, MDIO_MMD_PCS, MII_PHYSID1);
if (ret < 0)
- return 0xffffffff;
+ return -ENODEV;
id = ret << 16;
ret = xpcs_read(xpcs, MDIO_MMD_PCS, MII_PHYSID2);
if (ret < 0)
- return 0xffffffff;
+ return ret;
+
+ id |= ret;
- /* If Device IDs are not all zeros or all ones,
- * we found C73 AN-type device
+ /* If Device IDs are not all zeros or ones, then 10GBase-X/R or C73
+ * KR/KX4 PCS found. Otherwise fallback to detecting 1000Base-X or C37
+ * PCS in MII MMD 31.
*/
- if ((id | ret) && (id | ret) != 0xffffffff)
- return id | ret;
+ if (!id || id == 0xffffffff) {
+ ret = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_PHYSID1);
+ if (ret < 0)
+ return ret;
+
+ id = ret << 16;
+
+ ret = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_PHYSID2);
+ if (ret < 0)
+ return ret;
- /* Next, search C37 PCS using Vendor-Specific MII MMD */
- ret = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_PHYSID1);
+ id |= ret;
+ }
+
+ /* Set the PCS ID if it hasn't been pre-initialized */
+ if (xpcs->info.pcs == DW_XPCS_ID_NATIVE)
+ xpcs->info.pcs = id;
+
+ /* Find out PMA/PMD ID from MMD 1 device ID registers */
+ ret = xpcs_read(xpcs, MDIO_MMD_PMAPMD, MDIO_DEVID1);
if (ret < 0)
- return 0xffffffff;
+ return ret;
- id = ret << 16;
+ id = ret;
- ret = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_PHYSID2);
+ ret = xpcs_read(xpcs, MDIO_MMD_PMAPMD, MDIO_DEVID2);
if (ret < 0)
- return 0xffffffff;
+ return ret;
+
+ /* Note the inverted dword order and masked out Model/Revision numbers
+ * with respect to what is done with the PCS ID...
+ */
+ ret = (ret >> 10) & 0x3F;
+ id |= ret << 16;
- /* If Device IDs are not all zeros, we found C37 AN-type device */
- if (id | ret)
- return id | ret;
+ /* Set the PMA ID if it hasn't been pre-initialized */
+ if (xpcs->info.pma == DW_XPCS_PMA_ID_NATIVE)
+ xpcs->info.pma = id;
- return 0xffffffff;
+ return 0;
}
-static const struct xpcs_compat synopsys_xpcs_compat[DW_XPCS_INTERFACE_MAX] = {
+static const struct dw_xpcs_compat synopsys_xpcs_compat[DW_XPCS_INTERFACE_MAX] = {
[DW_XPCS_USXGMII] = {
.supported = xpcs_usxgmii_features,
.interface = xpcs_usxgmii_interfaces,
@@ -1314,7 +1320,7 @@ static const struct xpcs_compat synopsys_xpcs_compat[DW_XPCS_INTERFACE_MAX] = {
},
};
-static const struct xpcs_compat nxp_sja1105_xpcs_compat[DW_XPCS_INTERFACE_MAX] = {
+static const struct dw_xpcs_compat nxp_sja1105_xpcs_compat[DW_XPCS_INTERFACE_MAX] = {
[DW_XPCS_SGMII] = {
.supported = xpcs_sgmii_features,
.interface = xpcs_sgmii_interfaces,
@@ -1324,7 +1330,7 @@ static const struct xpcs_compat nxp_sja1105_xpcs_compat[DW_XPCS_INTERFACE_MAX] =
},
};
-static const struct xpcs_compat nxp_sja1110_xpcs_compat[DW_XPCS_INTERFACE_MAX] = {
+static const struct dw_xpcs_compat nxp_sja1110_xpcs_compat[DW_XPCS_INTERFACE_MAX] = {
[DW_XPCS_SGMII] = {
.supported = xpcs_sgmii_features,
.interface = xpcs_sgmii_interfaces,
@@ -1341,18 +1347,18 @@ static const struct xpcs_compat nxp_sja1110_xpcs_compat[DW_XPCS_INTERFACE_MAX] =
},
};
-static const struct xpcs_id xpcs_id_list[] = {
+static const struct dw_xpcs_desc xpcs_desc_list[] = {
{
- .id = SYNOPSYS_XPCS_ID,
- .mask = SYNOPSYS_XPCS_MASK,
+ .id = DW_XPCS_ID,
+ .mask = DW_XPCS_ID_MASK,
.compat = synopsys_xpcs_compat,
}, {
.id = NXP_SJA1105_XPCS_ID,
- .mask = SYNOPSYS_XPCS_MASK,
+ .mask = DW_XPCS_ID_MASK,
.compat = nxp_sja1105_xpcs_compat,
}, {
.id = NXP_SJA1110_XPCS_ID,
- .mask = SYNOPSYS_XPCS_MASK,
+ .mask = DW_XPCS_ID_MASK,
.compat = nxp_sja1110_xpcs_compat,
},
};
@@ -1365,12 +1371,9 @@ static const struct phylink_pcs_ops xpcs_phylink_ops = {
.pcs_link_up = xpcs_link_up,
};
-static struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
- phy_interface_t interface)
+static struct dw_xpcs *xpcs_create_data(struct mdio_device *mdiodev)
{
struct dw_xpcs *xpcs;
- u32 xpcs_id;
- int i, ret;
xpcs = kzalloc(sizeof(*xpcs), GFP_KERNEL);
if (!xpcs)
@@ -1378,59 +1381,142 @@ static struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
mdio_device_get(mdiodev);
xpcs->mdiodev = mdiodev;
+ xpcs->pcs.ops = &xpcs_phylink_ops;
+ xpcs->pcs.neg_mode = true;
+ xpcs->pcs.poll = true;
+
+ return xpcs;
+}
- xpcs_id = xpcs_get_id(xpcs);
+static void xpcs_free_data(struct dw_xpcs *xpcs)
+{
+ mdio_device_put(xpcs->mdiodev);
+ kfree(xpcs);
+}
- for (i = 0; i < ARRAY_SIZE(xpcs_id_list); i++) {
- const struct xpcs_id *entry = &xpcs_id_list[i];
- const struct xpcs_compat *compat;
+static int xpcs_init_clks(struct dw_xpcs *xpcs)
+{
+ static const char *ids[DW_XPCS_NUM_CLKS] = {
+ [DW_XPCS_CORE_CLK] = "core",
+ [DW_XPCS_PAD_CLK] = "pad",
+ };
+ struct device *dev = &xpcs->mdiodev->dev;
+ int ret, i;
- if ((xpcs_id & entry->mask) != entry->id)
- continue;
+ for (i = 0; i < DW_XPCS_NUM_CLKS; ++i)
+ xpcs->clks[i].id = ids[i];
- xpcs->id = entry;
+ ret = clk_bulk_get_optional(dev, DW_XPCS_NUM_CLKS, xpcs->clks);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to get clocks\n");
- compat = xpcs_find_compat(entry, interface);
- if (!compat) {
- ret = -ENODEV;
- goto out;
- }
+ ret = clk_bulk_prepare_enable(DW_XPCS_NUM_CLKS, xpcs->clks);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to enable clocks\n");
- ret = xpcs_dev_flag(xpcs);
- if (ret)
- goto out;
+ return 0;
+}
- xpcs->pcs.ops = &xpcs_phylink_ops;
- xpcs->pcs.neg_mode = true;
+static void xpcs_clear_clks(struct dw_xpcs *xpcs)
+{
+ clk_bulk_disable_unprepare(DW_XPCS_NUM_CLKS, xpcs->clks);
- if (xpcs->dev_flag != DW_DEV_TXGBE) {
- xpcs->pcs.poll = true;
+ clk_bulk_put(DW_XPCS_NUM_CLKS, xpcs->clks);
+}
- ret = xpcs_soft_reset(xpcs, compat);
- if (ret)
- goto out;
- }
+static int xpcs_init_id(struct dw_xpcs *xpcs)
+{
+ const struct dw_xpcs_info *info;
+ int i, ret;
- return xpcs;
+ info = dev_get_platdata(&xpcs->mdiodev->dev);
+ if (!info) {
+ xpcs->info.pcs = DW_XPCS_ID_NATIVE;
+ xpcs->info.pma = DW_XPCS_PMA_ID_NATIVE;
+ } else {
+ xpcs->info = *info;
}
- ret = -ENODEV;
+ ret = xpcs_get_id(xpcs);
+ if (ret < 0)
+ return ret;
-out:
- mdio_device_put(mdiodev);
- kfree(xpcs);
+ for (i = 0; i < ARRAY_SIZE(xpcs_desc_list); i++) {
+ const struct dw_xpcs_desc *desc = &xpcs_desc_list[i];
- return ERR_PTR(ret);
+ if ((xpcs->info.pcs & desc->mask) != desc->id)
+ continue;
+
+ xpcs->desc = desc;
+
+ break;
+ }
+
+ if (!xpcs->desc)
+ return -ENODEV;
+
+ return 0;
}
-void xpcs_destroy(struct dw_xpcs *xpcs)
+static int xpcs_init_iface(struct dw_xpcs *xpcs, phy_interface_t interface)
{
- if (xpcs)
- mdio_device_put(xpcs->mdiodev);
- kfree(xpcs);
+ const struct dw_xpcs_compat *compat;
+
+ compat = xpcs_find_compat(xpcs->desc, interface);
+ if (!compat)
+ return -EINVAL;
+
+ if (xpcs->info.pma == WX_TXGBE_XPCS_PMA_10G_ID) {
+ xpcs->pcs.poll = false;
+ return 0;
+ }
+
+ return xpcs_soft_reset(xpcs, compat);
}
-EXPORT_SYMBOL_GPL(xpcs_destroy);
+static struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
+ phy_interface_t interface)
+{
+ struct dw_xpcs *xpcs;
+ int ret;
+
+ xpcs = xpcs_create_data(mdiodev);
+ if (IS_ERR(xpcs))
+ return xpcs;
+
+ ret = xpcs_init_clks(xpcs);
+ if (ret)
+ goto out_free_data;
+
+ ret = xpcs_init_id(xpcs);
+ if (ret)
+ goto out_clear_clks;
+
+ ret = xpcs_init_iface(xpcs, interface);
+ if (ret)
+ goto out_clear_clks;
+
+ return xpcs;
+
+out_clear_clks:
+ xpcs_clear_clks(xpcs);
+
+out_free_data:
+ xpcs_free_data(xpcs);
+
+ return ERR_PTR(ret);
+}
+
+/**
+ * xpcs_create_mdiodev() - create a DW xPCS instance with the MDIO @addr
+ * @bus: pointer to the MDIO-bus descriptor for the device to be looked at
+ * @addr: device MDIO-bus ID
+ * @interface: requested PHY interface
+ *
+ * Return: a pointer to the DW XPCS handle if successful, otherwise -ENODEV if
+ * the PCS device couldn't be found on the bus and other negative errno related
+ * to the data allocation and MDIO-bus communications.
+ */
struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr,
phy_interface_t interface)
{
@@ -1455,5 +1541,54 @@ struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr,
}
EXPORT_SYMBOL_GPL(xpcs_create_mdiodev);
+/**
+ * xpcs_create_fwnode() - Create a DW xPCS instance from @fwnode
+ * @fwnode: fwnode handle poining to the DW XPCS device
+ * @interface: requested PHY interface
+ *
+ * Return: a pointer to the DW XPCS handle if successful, otherwise -ENODEV if
+ * the fwnode device is unavailable or the PCS device couldn't be found on the
+ * bus, -EPROBE_DEFER if the respective MDIO-device instance couldn't be found,
+ * other negative errno related to the data allocations and MDIO-bus
+ * communications.
+ */
+struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode,
+ phy_interface_t interface)
+{
+ struct mdio_device *mdiodev;
+ struct dw_xpcs *xpcs;
+
+ if (!fwnode_device_is_available(fwnode))
+ return ERR_PTR(-ENODEV);
+
+ mdiodev = fwnode_mdio_find_device(fwnode);
+ if (!mdiodev)
+ return ERR_PTR(-EPROBE_DEFER);
+
+ xpcs = xpcs_create(mdiodev, interface);
+
+ /* xpcs_create() has taken a refcount on the mdiodev if it was
+ * successful. If xpcs_create() fails, this will free the mdio
+ * device here. In any case, we don't need to hold our reference
+ * anymore, and putting it here will allow mdio_device_put() in
+ * xpcs_destroy() to automatically free the mdio device.
+ */
+ mdio_device_put(mdiodev);
+
+ return xpcs;
+}
+EXPORT_SYMBOL_GPL(xpcs_create_fwnode);
+
+void xpcs_destroy(struct dw_xpcs *xpcs)
+{
+ if (!xpcs)
+ return;
+
+ xpcs_clear_clks(xpcs);
+
+ xpcs_free_data(xpcs);
+}
+EXPORT_SYMBOL_GPL(xpcs_destroy);
+
MODULE_DESCRIPTION("Synopsys DesignWare XPCS library");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/pcs/pcs-xpcs.h b/drivers/net/pcs/pcs-xpcs.h
index 96c36b32ca99..fa05adfae220 100644
--- a/drivers/net/pcs/pcs-xpcs.h
+++ b/drivers/net/pcs/pcs-xpcs.h
@@ -6,8 +6,8 @@
* Author: Jose Abreu <[email protected]>
*/
-#define SYNOPSYS_XPCS_ID 0x7996ced0
-#define SYNOPSYS_XPCS_MASK 0xffffffff
+#include <linux/bits.h>
+#include <linux/pcs/pcs-xpcs.h>
/* Vendor regs access */
#define DW_VENDOR BIT(15)
@@ -120,6 +120,9 @@
/* VR MII EEE Control 1 defines */
#define DW_VR_MII_EEE_TRN_LPI BIT(0) /* Transparent Mode Enable */
+#define DW_XPCS_INFO_DECLARE(_name, _pcs, _pma) \
+ static const struct dw_xpcs_info _name = { .pcs = _pcs, .pma = _pma }
+
int xpcs_read(struct dw_xpcs *xpcs, int dev, u32 reg);
int xpcs_write(struct dw_xpcs *xpcs, int dev, u32 reg, u16 val);
int xpcs_read_vpcs(struct dw_xpcs *xpcs, int reg);
diff --git a/drivers/net/phy/aquantia/aquantia.h b/drivers/net/phy/aquantia/aquantia.h
index c0e1fd9d7152..2465345081f8 100644
--- a/drivers/net/phy/aquantia/aquantia.h
+++ b/drivers/net/phy/aquantia/aquantia.h
@@ -6,6 +6,9 @@
* Author: Heiner Kallweit <[email protected]>
*/
+#ifndef AQUANTIA_H
+#define AQUANTIA_H
+
#include <linux/device.h>
#include <linux/phy.h>
@@ -198,3 +201,6 @@ int aqr_phy_led_hw_control_set(struct phy_device *phydev, u8 index,
int aqr_phy_led_active_low_set(struct phy_device *phydev, int index, bool enable);
int aqr_phy_led_polarity_set(struct phy_device *phydev, int index,
unsigned long modes);
+int aqr_wait_reset_complete(struct phy_device *phydev);
+
+#endif /* AQUANTIA_H */
diff --git a/drivers/net/phy/aquantia/aquantia_firmware.c b/drivers/net/phy/aquantia/aquantia_firmware.c
index 0c9640ef153b..524627a36c6f 100644
--- a/drivers/net/phy/aquantia/aquantia_firmware.c
+++ b/drivers/net/phy/aquantia/aquantia_firmware.c
@@ -353,6 +353,10 @@ int aqr_firmware_load(struct phy_device *phydev)
{
int ret;
+ ret = aqr_wait_reset_complete(phydev);
+ if (ret)
+ return ret;
+
/* Check if the firmware is not already loaded by pooling
* the current version returned by the PHY. If 0 is returned,
* no firmware is loaded.
diff --git a/drivers/net/phy/aquantia/aquantia_main.c b/drivers/net/phy/aquantia/aquantia_main.c
index 6c14355744b7..d12e35374231 100644
--- a/drivers/net/phy/aquantia/aquantia_main.c
+++ b/drivers/net/phy/aquantia/aquantia_main.c
@@ -29,6 +29,7 @@
#define PHY_ID_AQR113 0x31c31c40
#define PHY_ID_AQR113C 0x31c31c12
#define PHY_ID_AQR114C 0x31c31c22
+#define PHY_ID_AQR115C 0x31c31c33
#define PHY_ID_AQR813 0x31c31cb2
#define MDIO_PHYXS_VEND_IF_STATUS 0xe812
@@ -441,7 +442,7 @@ static int aqr107_set_tunable(struct phy_device *phydev,
* The chip also provides a "reset completed" bit, but it's cleared after
* read. Therefore function would time out if called again.
*/
-static int aqr107_wait_reset_complete(struct phy_device *phydev)
+int aqr_wait_reset_complete(struct phy_device *phydev)
{
int val;
@@ -494,7 +495,7 @@ static int aqr107_config_init(struct phy_device *phydev)
WARN(phydev->interface == PHY_INTERFACE_MODE_XGMII,
"Your devicetree is out of date, please update it. The AQR107 family doesn't support XGMII, maybe you mean USXGMII.\n");
- ret = aqr107_wait_reset_complete(phydev);
+ ret = aqr_wait_reset_complete(phydev);
if (!ret)
aqr107_chip_info(phydev);
@@ -522,7 +523,7 @@ static int aqcs109_config_init(struct phy_device *phydev)
phydev->interface != PHY_INTERFACE_MODE_2500BASEX)
return -ENODEV;
- ret = aqr107_wait_reset_complete(phydev);
+ ret = aqr_wait_reset_complete(phydev);
if (!ret)
aqr107_chip_info(phydev);
@@ -652,7 +653,13 @@ static int aqr107_fill_interface_modes(struct phy_device *phydev)
unsigned long *possible = phydev->possible_interfaces;
unsigned int serdes_mode, rate_adapt;
phy_interface_t interface;
- int i, val;
+ int i, val, ret;
+
+ ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1,
+ VEND1_GLOBAL_CFG_10M, val, val != 0,
+ 1000, 100000, false);
+ if (ret)
+ return ret;
/* Walk the media-speed configuration registers to determine which
* host-side serdes modes may be used by the PHY depending on the
@@ -1000,6 +1007,30 @@ static struct phy_driver aqr_driver[] = {
.led_polarity_set = aqr_phy_led_polarity_set,
},
{
+ PHY_ID_MATCH_MODEL(PHY_ID_AQR115C),
+ .name = "Aquantia AQR115C",
+ .probe = aqr107_probe,
+ .get_rate_matching = aqr107_get_rate_matching,
+ .config_init = aqr113c_config_init,
+ .config_aneg = aqr_config_aneg,
+ .config_intr = aqr_config_intr,
+ .handle_interrupt = aqr_handle_interrupt,
+ .read_status = aqr107_read_status,
+ .get_tunable = aqr107_get_tunable,
+ .set_tunable = aqr107_set_tunable,
+ .suspend = aqr107_suspend,
+ .resume = aqr107_resume,
+ .get_sset_count = aqr107_get_sset_count,
+ .get_strings = aqr107_get_strings,
+ .get_stats = aqr107_get_stats,
+ .link_change_notify = aqr107_link_change_notify,
+ .led_brightness_set = aqr_phy_led_brightness_set,
+ .led_hw_is_supported = aqr_phy_led_hw_is_supported,
+ .led_hw_control_set = aqr_phy_led_hw_control_set,
+ .led_hw_control_get = aqr_phy_led_hw_control_get,
+ .led_polarity_set = aqr_phy_led_polarity_set,
+},
+{
PHY_ID_MATCH_MODEL(PHY_ID_AQR813),
.name = "Aquantia AQR813",
.probe = aqr107_probe,
@@ -1042,6 +1073,7 @@ static struct mdio_device_id __maybe_unused aqr_tbl[] = {
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR113) },
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR113C) },
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR114C) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_AQR115C) },
{ PHY_ID_MATCH_MODEL(PHY_ID_AQR813) },
{ }
};
diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c
index 0b88635f4fbc..d3273bc0da4a 100644
--- a/drivers/net/phy/microchip.c
+++ b/drivers/net/phy/microchip.c
@@ -12,8 +12,14 @@
#include <linux/of.h>
#include <dt-bindings/net/microchip-lan78xx.h>
+#define PHY_ID_LAN937X_TX 0x0007c190
+
+#define LAN937X_MODE_CTRL_STATUS_REG 0x11
+#define LAN937X_AUTOMDIX_EN BIT(7)
+#define LAN937X_MDI_MODE BIT(6)
+
#define DRIVER_AUTHOR "WOOJUNG HUH <[email protected]>"
-#define DRIVER_DESC "Microchip LAN88XX PHY driver"
+#define DRIVER_DESC "Microchip LAN88XX/LAN937X TX PHY driver"
struct lan88xx_priv {
int chip_id;
@@ -373,6 +379,115 @@ static void lan88xx_link_change_notify(struct phy_device *phydev)
}
}
+/**
+ * lan937x_tx_read_mdix_status - Read the MDIX status for the LAN937x TX PHY.
+ * @phydev: Pointer to the phy_device structure.
+ *
+ * This function reads the MDIX status of the LAN937x TX PHY and sets the
+ * mdix_ctrl and mdix fields of the phy_device structure accordingly.
+ * Note that MDIX status is not supported in AUTO mode, and will be set
+ * to invalid in such cases.
+ *
+ * Return: 0 on success, a negative error code on failure.
+ */
+static int lan937x_tx_read_mdix_status(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = phy_read(phydev, LAN937X_MODE_CTRL_STATUS_REG);
+ if (ret < 0)
+ return ret;
+
+ if (ret & LAN937X_AUTOMDIX_EN) {
+ phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
+ /* MDI/MDIX status is unknown */
+ phydev->mdix = ETH_TP_MDI_INVALID;
+ } else if (ret & LAN937X_MDI_MODE) {
+ phydev->mdix_ctrl = ETH_TP_MDI_X;
+ phydev->mdix = ETH_TP_MDI_X;
+ } else {
+ phydev->mdix_ctrl = ETH_TP_MDI;
+ phydev->mdix = ETH_TP_MDI;
+ }
+
+ return 0;
+}
+
+/**
+ * lan937x_tx_read_status - Read the status for the LAN937x TX PHY.
+ * @phydev: Pointer to the phy_device structure.
+ *
+ * This function reads the status of the LAN937x TX PHY and updates the
+ * phy_device structure accordingly.
+ *
+ * Return: 0 on success, a negative error code on failure.
+ */
+static int lan937x_tx_read_status(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = genphy_read_status(phydev);
+ if (ret < 0)
+ return ret;
+
+ return lan937x_tx_read_mdix_status(phydev);
+}
+
+/**
+ * lan937x_tx_set_mdix - Set the MDIX mode for the LAN937x TX PHY.
+ * @phydev: Pointer to the phy_device structure.
+ *
+ * This function configures the MDIX mode of the LAN937x TX PHY based on the
+ * mdix_ctrl field of the phy_device structure. The MDIX mode can be set to
+ * MDI (straight-through), MDIX (crossover), or AUTO (auto-MDIX). If the mode
+ * is not recognized, it returns 0 without making any changes.
+ *
+ * Return: 0 on success, a negative error code on failure.
+ */
+static int lan937x_tx_set_mdix(struct phy_device *phydev)
+{
+ u16 val;
+
+ switch (phydev->mdix_ctrl) {
+ case ETH_TP_MDI:
+ val = 0;
+ break;
+ case ETH_TP_MDI_X:
+ val = LAN937X_MDI_MODE;
+ break;
+ case ETH_TP_MDI_AUTO:
+ val = LAN937X_AUTOMDIX_EN;
+ break;
+ default:
+ return 0;
+ }
+
+ return phy_modify(phydev, LAN937X_MODE_CTRL_STATUS_REG,
+ LAN937X_AUTOMDIX_EN | LAN937X_MDI_MODE, val);
+}
+
+/**
+ * lan937x_tx_config_aneg - Configure auto-negotiation and fixed modes for the
+ * LAN937x TX PHY.
+ * @phydev: Pointer to the phy_device structure.
+ *
+ * This function configures the MDIX mode for the LAN937x TX PHY and then
+ * proceeds to configure the auto-negotiation or fixed mode settings
+ * based on the phy_device structure.
+ *
+ * Return: 0 on success, a negative error code on failure.
+ */
+static int lan937x_tx_config_aneg(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = lan937x_tx_set_mdix(phydev);
+ if (ret < 0)
+ return ret;
+
+ return genphy_config_aneg(phydev);
+}
+
static struct phy_driver microchip_phy_driver[] = {
{
.phy_id = 0x0007c132,
@@ -400,12 +515,21 @@ static struct phy_driver microchip_phy_driver[] = {
.set_wol = lan88xx_set_wol,
.read_page = lan88xx_read_page,
.write_page = lan88xx_write_page,
+},
+{
+ PHY_ID_MATCH_MODEL(PHY_ID_LAN937X_TX),
+ .name = "Microchip LAN937x TX",
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
+ .config_aneg = lan937x_tx_config_aneg,
+ .read_status = lan937x_tx_read_status,
} };
module_phy_driver(microchip_phy_driver);
static struct mdio_device_id __maybe_unused microchip_tbl[] = {
{ 0x0007c132, 0xfffffff2 },
+ { PHY_ID_MATCH_MODEL(PHY_ID_LAN937X_TX) },
{ }
};
diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c
index a838b61cd844..a35528497a57 100644
--- a/drivers/net/phy/microchip_t1.c
+++ b/drivers/net/phy/microchip_t1.c
@@ -748,7 +748,7 @@ static int lan87xx_cable_test_report(struct phy_device *phydev)
ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A,
lan87xx_cable_test_report_trans(detect));
- return 0;
+ return phy_init_hw(phydev);
}
static int lan87xx_cable_test_get_status(struct phy_device *phydev,
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index c4236564c1cd..785182fa5fe0 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -1309,7 +1309,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
if (netdev) {
struct device *parent = netdev->dev.parent;
- if (netdev->wol_enabled)
+ if (netdev->ethtool->wol_enabled)
pm_system_wakeup();
else if (device_may_wakeup(&netdev->dev))
pm_wakeup_dev_event(&netdev->dev, 0, true);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 6c6ec9475709..70b07e621fb2 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -296,7 +296,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
if (!netdev)
goto out;
- if (netdev->wol_enabled)
+ if (netdev->ethtool->wol_enabled)
return false;
/* As long as not all affected network drivers support the
@@ -1980,16 +1980,17 @@ int phy_suspend(struct phy_device *phydev)
const struct phy_driver *phydrv = phydev->drv;
int ret;
- if (phydev->suspended)
+ if (phydev->suspended || !phydrv)
return 0;
phy_ethtool_get_wol(phydev, &wol);
- phydev->wol_enabled = wol.wolopts || (netdev && netdev->wol_enabled);
+ phydev->wol_enabled = wol.wolopts ||
+ (netdev && netdev->ethtool->wol_enabled);
/* If the device has WOL enabled, we cannot suspend the PHY */
if (phydev->wol_enabled && !(phydrv->flags & PHY_ALWAYS_CALL_SUSPEND))
return -EBUSY;
- if (!phydrv || !phydrv->suspend)
+ if (!phydrv->suspend)
return 0;
ret = phydrv->suspend(phydev);
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 6c24c48dcf0f..51c526d227fa 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -2282,7 +2282,7 @@ void phylink_suspend(struct phylink *pl, bool mac_wol)
{
ASSERT_RTNL();
- if (mac_wol && (!pl->netdev || pl->netdev->wol_enabled)) {
+ if (mac_wol && (!pl->netdev || pl->netdev->ethtool->wol_enabled)) {
/* Wake-on-Lan enabled, MAC handling */
mutex_lock(&pl->state_mutex);
diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index 2174893c974f..bed839237fb5 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -32,6 +32,15 @@
#define RTL8211F_PHYCR2 0x19
#define RTL8211F_INSR 0x1d
+#define RTL8211F_LEDCR 0x10
+#define RTL8211F_LEDCR_MODE BIT(15)
+#define RTL8211F_LEDCR_ACT_TXRX BIT(4)
+#define RTL8211F_LEDCR_LINK_1000 BIT(3)
+#define RTL8211F_LEDCR_LINK_100 BIT(1)
+#define RTL8211F_LEDCR_LINK_10 BIT(0)
+#define RTL8211F_LEDCR_MASK GENMASK(4, 0)
+#define RTL8211F_LEDCR_SHIFT 5
+
#define RTL8211F_TX_DELAY BIT(8)
#define RTL8211F_RX_DELAY BIT(3)
@@ -87,6 +96,8 @@
#define RTL_8221B_VN_CG 0x001cc84a
#define RTL_8251B 0x001cc862
+#define RTL8211F_LED_COUNT 3
+
MODULE_DESCRIPTION("Realtek PHY driver");
MODULE_AUTHOR("Johnson Leung");
MODULE_LICENSE("GPL");
@@ -476,6 +487,98 @@ static int rtl821x_resume(struct phy_device *phydev)
return 0;
}
+static int rtl8211f_led_hw_is_supported(struct phy_device *phydev, u8 index,
+ unsigned long rules)
+{
+ const unsigned long mask = BIT(TRIGGER_NETDEV_LINK_10) |
+ BIT(TRIGGER_NETDEV_LINK_100) |
+ BIT(TRIGGER_NETDEV_LINK_1000) |
+ BIT(TRIGGER_NETDEV_RX) |
+ BIT(TRIGGER_NETDEV_TX);
+
+ /* The RTL8211F PHY supports these LED settings on up to three LEDs:
+ * - Link: Configurable subset of 10/100/1000 link rates
+ * - Active: Blink on activity, RX or TX is not differentiated
+ * The Active option has two modes, A and B:
+ * - A: Link and Active indication at configurable, but matching,
+ * subset of 10/100/1000 link rates
+ * - B: Link indication at configurable subset of 10/100/1000 link
+ * rates and Active indication always at all three 10+100+1000
+ * link rates.
+ * This code currently uses mode B only.
+ */
+
+ if (index >= RTL8211F_LED_COUNT)
+ return -EINVAL;
+
+ /* Filter out any other unsupported triggers. */
+ if (rules & ~mask)
+ return -EOPNOTSUPP;
+
+ /* RX and TX are not differentiated, either both are set or not set. */
+ if (!(rules & BIT(TRIGGER_NETDEV_RX)) ^ !(rules & BIT(TRIGGER_NETDEV_TX)))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static int rtl8211f_led_hw_control_get(struct phy_device *phydev, u8 index,
+ unsigned long *rules)
+{
+ int val;
+
+ val = phy_read_paged(phydev, 0xd04, RTL8211F_LEDCR);
+ if (val < 0)
+ return val;
+
+ val >>= RTL8211F_LEDCR_SHIFT * index;
+ val &= RTL8211F_LEDCR_MASK;
+
+ if (val & RTL8211F_LEDCR_LINK_10)
+ set_bit(TRIGGER_NETDEV_LINK_10, rules);
+
+ if (val & RTL8211F_LEDCR_LINK_100)
+ set_bit(TRIGGER_NETDEV_LINK_100, rules);
+
+ if (val & RTL8211F_LEDCR_LINK_1000)
+ set_bit(TRIGGER_NETDEV_LINK_1000, rules);
+
+ if (val & RTL8211F_LEDCR_ACT_TXRX) {
+ set_bit(TRIGGER_NETDEV_RX, rules);
+ set_bit(TRIGGER_NETDEV_TX, rules);
+ }
+
+ return 0;
+}
+
+static int rtl8211f_led_hw_control_set(struct phy_device *phydev, u8 index,
+ unsigned long rules)
+{
+ const u16 mask = RTL8211F_LEDCR_MASK << (RTL8211F_LEDCR_SHIFT * index);
+ u16 reg = RTL8211F_LEDCR_MODE; /* Mode B */
+
+ if (index >= RTL8211F_LED_COUNT)
+ return -EINVAL;
+
+ if (test_bit(TRIGGER_NETDEV_LINK_10, &rules))
+ reg |= RTL8211F_LEDCR_LINK_10;
+
+ if (test_bit(TRIGGER_NETDEV_LINK_100, &rules))
+ reg |= RTL8211F_LEDCR_LINK_100;
+
+ if (test_bit(TRIGGER_NETDEV_LINK_1000, &rules))
+ reg |= RTL8211F_LEDCR_LINK_1000;
+
+ if (test_bit(TRIGGER_NETDEV_RX, &rules) ||
+ test_bit(TRIGGER_NETDEV_TX, &rules)) {
+ reg |= RTL8211F_LEDCR_ACT_TXRX;
+ }
+
+ reg <<= RTL8211F_LEDCR_SHIFT * index;
+
+ return phy_modify_paged(phydev, 0xd04, RTL8211F_LEDCR, mask, reg);
+}
+
static int rtl8211e_config_init(struct phy_device *phydev)
{
int ret = 0, oldpage;
@@ -1192,6 +1295,9 @@ static struct phy_driver realtek_drvs[] = {
.read_page = rtl821x_read_page,
.write_page = rtl821x_write_page,
.flags = PHY_ALWAYS_CALL_SUSPEND,
+ .led_hw_is_supported = rtl8211f_led_hw_is_supported,
+ .led_hw_control_get = rtl8211f_led_hw_control_get,
+ .led_hw_control_set = rtl8211f_led_hw_control_set,
}, {
PHY_ID_MATCH_EXACT(RTL_8211FVD_PHYID),
.name = "RTL8211F-VD Gigabit Ethernet",
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 0a65b6d690fe..eb9acfcaeb09 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -70,6 +70,7 @@
#define MPHDRLEN_SSN 4 /* ditto with short sequence numbers */
#define PPP_PROTO_LEN 2
+#define PPP_LCP_HDRLEN 4
/*
* An instance of /dev/ppp can be associated with either a ppp
@@ -493,6 +494,15 @@ static ssize_t ppp_read(struct file *file, char __user *buf,
return ret;
}
+static bool ppp_check_packet(struct sk_buff *skb, size_t count)
+{
+ /* LCP packets must include LCP header which 4 bytes long:
+ * 1-byte code, 1-byte identifier, and 2-byte length.
+ */
+ return get_unaligned_be16(skb->data) != PPP_LCP ||
+ count >= PPP_PROTO_LEN + PPP_LCP_HDRLEN;
+}
+
static ssize_t ppp_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
@@ -515,6 +525,11 @@ static ssize_t ppp_write(struct file *file, const char __user *buf,
kfree_skb(skb);
goto out;
}
+ ret = -EINVAL;
+ if (unlikely(!ppp_check_packet(skb, count))) {
+ kfree_skb(skb);
+ goto out;
+ }
switch (pf->kind) {
case INTERFACE:
diff --git a/drivers/net/pse-pd/pd692x0.c b/drivers/net/pse-pd/pd692x0.c
index 820358b71f0f..0af7db80b2f8 100644
--- a/drivers/net/pse-pd/pd692x0.c
+++ b/drivers/net/pse-pd/pd692x0.c
@@ -73,6 +73,9 @@ enum {
PD692X0_MSG_SET_PORT_PARAM,
PD692X0_MSG_GET_PORT_STATUS,
PD692X0_MSG_DOWNLOAD_CMD,
+ PD692X0_MSG_GET_PORT_CLASS,
+ PD692X0_MSG_GET_PORT_MEAS,
+ PD692X0_MSG_GET_PORT_PARAM,
/* add new message above here */
PD692X0_MSG_CNT
@@ -134,7 +137,7 @@ static const struct pd692x0_msg pd692x0_msg_template_list[PD692X0_MSG_CNT] = {
[PD692X0_MSG_SET_PORT_PARAM] = {
.key = PD692X0_KEY_CMD,
.sub = {0x05, 0xc0},
- .data = { 0, 0xff, 0xff, 0xff,
+ .data = { 0xf, 0xff, 0xff, 0xff,
0x4e, 0x4e, 0x4e, 0x4e},
},
[PD692X0_MSG_GET_PORT_STATUS] = {
@@ -149,6 +152,24 @@ static const struct pd692x0_msg pd692x0_msg_template_list[PD692X0_MSG_CNT] = {
.data = {0x16, 0x16, 0x99, 0x4e,
0x4e, 0x4e, 0x4e, 0x4e},
},
+ [PD692X0_MSG_GET_PORT_CLASS] = {
+ .key = PD692X0_KEY_REQ,
+ .sub = {0x05, 0xc4},
+ .data = {0x4e, 0x4e, 0x4e, 0x4e,
+ 0x4e, 0x4e, 0x4e, 0x4e},
+ },
+ [PD692X0_MSG_GET_PORT_MEAS] = {
+ .key = PD692X0_KEY_REQ,
+ .sub = {0x05, 0xc5},
+ .data = {0x4e, 0x4e, 0x4e, 0x4e,
+ 0x4e, 0x4e, 0x4e, 0x4e},
+ },
+ [PD692X0_MSG_GET_PORT_PARAM] = {
+ .key = PD692X0_KEY_REQ,
+ .sub = {0x05, 0xc0},
+ .data = {0x4e, 0x4e, 0x4e, 0x4e,
+ 0x4e, 0x4e, 0x4e, 0x4e},
+ },
};
static u8 pd692x0_build_msg(struct pd692x0_msg *msg, u8 echo)
@@ -435,6 +456,184 @@ static int pd692x0_pi_is_enabled(struct pse_controller_dev *pcdev, int id)
}
}
+struct pd692x0_pse_ext_state_mapping {
+ u32 status_code;
+ enum ethtool_c33_pse_ext_state pse_ext_state;
+ u32 pse_ext_substate;
+};
+
+static const struct pd692x0_pse_ext_state_mapping
+pd692x0_pse_ext_state_map[] = {
+ {0x06, ETHTOOL_C33_PSE_EXT_STATE_OPTION_VPORT_LIM,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_HIGH_VOLTAGE},
+ {0x07, ETHTOOL_C33_PSE_EXT_STATE_OPTION_VPORT_LIM,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_LOW_VOLTAGE},
+ {0x08, ETHTOOL_C33_PSE_EXT_STATE_MR_PSE_ENABLE,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_PSE_ENABLE_DISABLE_PIN_ACTIVE},
+ {0x0C, ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_NON_EXISTING_PORT},
+ {0x11, ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNDEFINED_PORT},
+ {0x12, ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_INTERNAL_HW_FAULT},
+ {0x1B, ETHTOOL_C33_PSE_EXT_STATE_OPTION_DETECT_TED,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_DET_IN_PROCESS},
+ {0x1C, ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNKNOWN_PORT_STATUS},
+ {0x1E, ETHTOOL_C33_PSE_EXT_STATE_MR_MPS_VALID,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_DETECTED_UNDERLOAD},
+ {0x1F, ETHTOOL_C33_PSE_EXT_STATE_OVLD_DETECTED,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_OVLD_DETECTED_OVERLOAD},
+ {0x20, ETHTOOL_C33_PSE_EXT_STATE_POWER_NOT_AVAILABLE,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_BUDGET_EXCEEDED},
+ {0x21, ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_INTERNAL_HW_FAULT},
+ {0x22, ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_CONFIG_CHANGE},
+ {0x24, ETHTOOL_C33_PSE_EXT_STATE_OPTION_VPORT_LIM,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_VOLTAGE_INJECTION},
+ {0x25, ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNKNOWN_PORT_STATUS},
+ {0x34, ETHTOOL_C33_PSE_EXT_STATE_SHORT_DETECTED,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_SHORT_DETECTED_SHORT_CONDITION},
+ {0x35, ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_DETECTED_OVER_TEMP},
+ {0x36, ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_DETECTED_OVER_TEMP},
+ {0x37, ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNKNOWN_PORT_STATUS},
+ {0x3C, ETHTOOL_C33_PSE_EXT_STATE_POWER_NOT_AVAILABLE,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PORT_PW_LIMIT_EXCEEDS_CONTROLLER_BUDGET},
+ {0x3D, ETHTOOL_C33_PSE_EXT_STATE_POWER_NOT_AVAILABLE,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PD_REQUEST_EXCEEDS_PORT_LIMIT},
+ {0x41, ETHTOOL_C33_PSE_EXT_STATE_POWER_NOT_AVAILABLE,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_HW_PW_LIMIT},
+ {0x43, ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNKNOWN_PORT_STATUS},
+ {0xA7, ETHTOOL_C33_PSE_EXT_STATE_OPTION_DETECT_TED,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_CONNECTION_CHECK_ERROR},
+ {0xA8, ETHTOOL_C33_PSE_EXT_STATE_MR_MPS_VALID,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_CONNECTION_OPEN},
+ { /* sentinel */ }
+};
+
+static void
+pd692x0_get_ext_state(struct ethtool_c33_pse_ext_state_info *c33_ext_state_info,
+ u32 status_code)
+{
+ const struct pd692x0_pse_ext_state_mapping *ext_state_map;
+
+ ext_state_map = pd692x0_pse_ext_state_map;
+ while (ext_state_map->status_code) {
+ if (ext_state_map->status_code == status_code) {
+ c33_ext_state_info->c33_pse_ext_state = ext_state_map->pse_ext_state;
+ c33_ext_state_info->__c33_pse_ext_substate = ext_state_map->pse_ext_substate;
+ return;
+ }
+ ext_state_map++;
+ }
+}
+
+struct pd692x0_class_pw {
+ int class;
+ int class_cfg_value;
+ int class_pw;
+ int max_added_class_pw;
+};
+
+#define PD692X0_CLASS_PW_TABLE_SIZE 4
+/* 4/2 pairs class configuration power table in compliance mode.
+ * Need to be arranged in ascending order of power support.
+ */
+static const struct pd692x0_class_pw
+pd692x0_class_pw_table[PD692X0_CLASS_PW_TABLE_SIZE] = {
+ {.class = 3, .class_cfg_value = 0x3, .class_pw = 15000, .max_added_class_pw = 3100},
+ {.class = 4, .class_cfg_value = 0x2, .class_pw = 30000, .max_added_class_pw = 8000},
+ {.class = 6, .class_cfg_value = 0x1, .class_pw = 60000, .max_added_class_pw = 5000},
+ {.class = 8, .class_cfg_value = 0x0, .class_pw = 90000, .max_added_class_pw = 7500},
+};
+
+static int pd692x0_pi_get_pw_from_table(int op_mode, int added_pw)
+{
+ const struct pd692x0_class_pw *pw_table;
+ int i;
+
+ pw_table = pd692x0_class_pw_table;
+ for (i = 0; i < PD692X0_CLASS_PW_TABLE_SIZE; i++, pw_table++) {
+ if (pw_table->class_cfg_value == op_mode)
+ return pw_table->class_pw + added_pw * 100;
+ }
+
+ return -ERANGE;
+}
+
+static int pd692x0_pi_set_pw_from_table(struct device *dev,
+ struct pd692x0_msg *msg, int pw)
+{
+ const struct pd692x0_class_pw *pw_table;
+ int i;
+
+ pw_table = pd692x0_class_pw_table;
+ if (pw < pw_table->class_pw) {
+ dev_err(dev,
+ "Power limit %dmW not supported. Ranges minimal available: [%d-%d]\n",
+ pw,
+ pw_table->class_pw,
+ pw_table->class_pw + pw_table->max_added_class_pw);
+ return -ERANGE;
+ }
+
+ for (i = 0; i < PD692X0_CLASS_PW_TABLE_SIZE; i++, pw_table++) {
+ if (pw > (pw_table->class_pw + pw_table->max_added_class_pw))
+ continue;
+
+ if (pw < pw_table->class_pw) {
+ dev_err(dev,
+ "Power limit %dmW not supported. Ranges available: [%d-%d] or [%d-%d]\n",
+ pw,
+ (pw_table - 1)->class_pw,
+ (pw_table - 1)->class_pw + (pw_table - 1)->max_added_class_pw,
+ pw_table->class_pw,
+ pw_table->class_pw + pw_table->max_added_class_pw);
+ return -ERANGE;
+ }
+
+ msg->data[2] = pw_table->class_cfg_value;
+ msg->data[3] = (pw - pw_table->class_pw) / 100;
+ return 0;
+ }
+
+ pw_table--;
+ dev_warn(dev,
+ "Power limit %dmW not supported. Set to highest power limit %dmW\n",
+ pw, pw_table->class_pw + pw_table->max_added_class_pw);
+ msg->data[2] = pw_table->class_cfg_value;
+ msg->data[3] = pw_table->max_added_class_pw / 100;
+ return 0;
+}
+
+static int
+pd692x0_pi_get_pw_ranges(struct pse_control_status *st)
+{
+ const struct pd692x0_class_pw *pw_table;
+ int i;
+
+ pw_table = pd692x0_class_pw_table;
+ st->c33_pw_limit_ranges = kcalloc(PD692X0_CLASS_PW_TABLE_SIZE,
+ sizeof(struct ethtool_c33_pse_pw_limit_range),
+ GFP_KERNEL);
+ if (!st->c33_pw_limit_ranges)
+ return -ENOMEM;
+
+ for (i = 0; i < PD692X0_CLASS_PW_TABLE_SIZE; i++, pw_table++) {
+ st->c33_pw_limit_ranges[i].min = pw_table->class_pw;
+ st->c33_pw_limit_ranges[i].max = pw_table->class_pw + pw_table->max_added_class_pw;
+ }
+
+ st->c33_pw_limit_nb_ranges = i;
+ return 0;
+}
+
static int pd692x0_ethtool_get_status(struct pse_controller_dev *pcdev,
unsigned long id,
struct netlink_ext_ack *extack,
@@ -442,6 +641,7 @@ static int pd692x0_ethtool_get_status(struct pse_controller_dev *pcdev,
{
struct pd692x0_priv *priv = to_pd692x0_priv(pcdev);
struct pd692x0_msg msg, buf = {0};
+ u32 class;
int ret;
ret = pd692x0_fw_unavailable(priv);
@@ -471,6 +671,36 @@ static int pd692x0_ethtool_get_status(struct pse_controller_dev *pcdev,
priv->admin_state[id] = status->c33_admin_state;
+ pd692x0_get_ext_state(&status->c33_ext_state_info, buf.sub[0]);
+ status->c33_actual_pw = (buf.data[0] << 4 | buf.data[1]) * 100;
+
+ msg = pd692x0_msg_template_list[PD692X0_MSG_GET_PORT_PARAM];
+ msg.sub[2] = id;
+ memset(&buf, 0, sizeof(buf));
+ ret = pd692x0_sendrecv_msg(priv, &msg, &buf);
+ if (ret < 0)
+ return ret;
+
+ ret = pd692x0_pi_get_pw_from_table(buf.data[0], buf.data[1]);
+ if (ret < 0)
+ return ret;
+ status->c33_avail_pw_limit = ret;
+
+ memset(&buf, 0, sizeof(buf));
+ msg = pd692x0_msg_template_list[PD692X0_MSG_GET_PORT_CLASS];
+ msg.sub[2] = id;
+ ret = pd692x0_sendrecv_msg(priv, &msg, &buf);
+ if (ret < 0)
+ return ret;
+
+ class = buf.data[3] >> 4;
+ if (class <= 8)
+ status->c33_pw_class = class;
+
+ ret = pd692x0_pi_get_pw_ranges(status);
+ if (ret < 0)
+ return ret;
+
return 0;
}
@@ -749,12 +979,97 @@ out:
return ret;
}
+static int pd692x0_pi_get_voltage(struct pse_controller_dev *pcdev, int id)
+{
+ struct pd692x0_priv *priv = to_pd692x0_priv(pcdev);
+ struct pd692x0_msg msg, buf = {0};
+ int ret;
+
+ ret = pd692x0_fw_unavailable(priv);
+ if (ret)
+ return ret;
+
+ msg = pd692x0_msg_template_list[PD692X0_MSG_GET_PORT_MEAS];
+ msg.sub[2] = id;
+ ret = pd692x0_sendrecv_msg(priv, &msg, &buf);
+ if (ret < 0)
+ return ret;
+
+ /* Convert 0.1V unit to uV */
+ return (buf.sub[0] << 8 | buf.sub[1]) * 100000;
+}
+
+static int pd692x0_pi_get_current_limit(struct pse_controller_dev *pcdev,
+ int id)
+{
+ struct pd692x0_priv *priv = to_pd692x0_priv(pcdev);
+ struct pd692x0_msg msg, buf = {0};
+ int mW, uV, uA, ret;
+ s64 tmp_64;
+
+ msg = pd692x0_msg_template_list[PD692X0_MSG_GET_PORT_PARAM];
+ msg.sub[2] = id;
+ ret = pd692x0_sendrecv_msg(priv, &msg, &buf);
+ if (ret < 0)
+ return ret;
+
+ ret = pd692x0_pi_get_pw_from_table(buf.data[2], buf.data[3]);
+ if (ret < 0)
+ return ret;
+ mW = ret;
+
+ ret = pd692x0_pi_get_voltage(pcdev, id);
+ if (ret < 0)
+ return ret;
+ uV = ret;
+
+ tmp_64 = mW;
+ tmp_64 *= 1000000000ull;
+ /* uA = mW * 1000000000 / uV */
+ uA = DIV_ROUND_CLOSEST_ULL(tmp_64, uV);
+ return uA;
+}
+
+static int pd692x0_pi_set_current_limit(struct pse_controller_dev *pcdev,
+ int id, int max_uA)
+{
+ struct pd692x0_priv *priv = to_pd692x0_priv(pcdev);
+ struct device *dev = &priv->client->dev;
+ struct pd692x0_msg msg, buf = {0};
+ int uV, ret, mW;
+ s64 tmp_64;
+
+ ret = pd692x0_fw_unavailable(priv);
+ if (ret)
+ return ret;
+
+ ret = pd692x0_pi_get_voltage(pcdev, id);
+ if (ret < 0)
+ return ret;
+ uV = ret;
+
+ msg = pd692x0_msg_template_list[PD692X0_MSG_SET_PORT_PARAM];
+ msg.sub[2] = id;
+ tmp_64 = uV;
+ tmp_64 *= max_uA;
+ /* mW = uV * uA / 1000000000 */
+ mW = DIV_ROUND_CLOSEST_ULL(tmp_64, 1000000000);
+ ret = pd692x0_pi_set_pw_from_table(dev, &msg, mW);
+ if (ret)
+ return ret;
+
+ return pd692x0_sendrecv_msg(priv, &msg, &buf);
+}
+
static const struct pse_controller_ops pd692x0_ops = {
.setup_pi_matrix = pd692x0_setup_pi_matrix,
.ethtool_get_status = pd692x0_ethtool_get_status,
.pi_enable = pd692x0_pi_enable,
.pi_disable = pd692x0_pi_disable,
.pi_is_enabled = pd692x0_pi_is_enabled,
+ .pi_get_voltage = pd692x0_pi_get_voltage,
+ .pi_get_current_limit = pd692x0_pi_get_current_limit,
+ .pi_set_current_limit = pd692x0_pi_set_current_limit,
};
#define PD692X0_FW_LINE_MAX_SZ 0xff
diff --git a/drivers/net/pse-pd/pse_core.c b/drivers/net/pse-pd/pse_core.c
index 795ab264eaf2..16f364b3473b 100644
--- a/drivers/net/pse-pd/pse_core.c
+++ b/drivers/net/pse-pd/pse_core.c
@@ -265,10 +265,113 @@ static int pse_pi_disable(struct regulator_dev *rdev)
return ret;
}
+static int _pse_pi_get_voltage(struct regulator_dev *rdev)
+{
+ struct pse_controller_dev *pcdev = rdev_get_drvdata(rdev);
+ const struct pse_controller_ops *ops;
+ int id;
+
+ ops = pcdev->ops;
+ if (!ops->pi_get_voltage)
+ return -EOPNOTSUPP;
+
+ id = rdev_get_id(rdev);
+ return ops->pi_get_voltage(pcdev, id);
+}
+
+static int pse_pi_get_voltage(struct regulator_dev *rdev)
+{
+ struct pse_controller_dev *pcdev = rdev_get_drvdata(rdev);
+ int ret;
+
+ mutex_lock(&pcdev->lock);
+ ret = _pse_pi_get_voltage(rdev);
+ mutex_unlock(&pcdev->lock);
+
+ return ret;
+}
+
+static int _pse_ethtool_get_status(struct pse_controller_dev *pcdev,
+ int id,
+ struct netlink_ext_ack *extack,
+ struct pse_control_status *status);
+
+static int pse_pi_get_current_limit(struct regulator_dev *rdev)
+{
+ struct pse_controller_dev *pcdev = rdev_get_drvdata(rdev);
+ const struct pse_controller_ops *ops;
+ struct netlink_ext_ack extack = {};
+ struct pse_control_status st = {};
+ int id, uV, ret;
+ s64 tmp_64;
+
+ ops = pcdev->ops;
+ id = rdev_get_id(rdev);
+ mutex_lock(&pcdev->lock);
+ if (ops->pi_get_current_limit) {
+ ret = ops->pi_get_current_limit(pcdev, id);
+ goto out;
+ }
+
+ /* If pi_get_current_limit() callback not populated get voltage
+ * from pi_get_voltage() and power limit from ethtool_get_status()
+ * to calculate current limit.
+ */
+ ret = _pse_pi_get_voltage(rdev);
+ if (!ret) {
+ dev_err(pcdev->dev, "Voltage null\n");
+ ret = -ERANGE;
+ goto out;
+ }
+ if (ret < 0)
+ goto out;
+ uV = ret;
+
+ ret = _pse_ethtool_get_status(pcdev, id, &extack, &st);
+ if (ret)
+ goto out;
+
+ if (!st.c33_avail_pw_limit) {
+ ret = -ENODATA;
+ goto out;
+ }
+
+ tmp_64 = st.c33_avail_pw_limit;
+ tmp_64 *= 1000000000ull;
+ /* uA = mW * 1000000000 / uV */
+ ret = DIV_ROUND_CLOSEST_ULL(tmp_64, uV);
+
+out:
+ mutex_unlock(&pcdev->lock);
+ return ret;
+}
+
+static int pse_pi_set_current_limit(struct regulator_dev *rdev, int min_uA,
+ int max_uA)
+{
+ struct pse_controller_dev *pcdev = rdev_get_drvdata(rdev);
+ const struct pse_controller_ops *ops;
+ int id, ret;
+
+ ops = pcdev->ops;
+ if (!ops->pi_set_current_limit)
+ return -EOPNOTSUPP;
+
+ id = rdev_get_id(rdev);
+ mutex_lock(&pcdev->lock);
+ ret = ops->pi_set_current_limit(pcdev, id, max_uA);
+ mutex_unlock(&pcdev->lock);
+
+ return ret;
+}
+
static const struct regulator_ops pse_pi_ops = {
.is_enabled = pse_pi_is_enabled,
.enable = pse_pi_enable,
.disable = pse_pi_disable,
+ .get_voltage = pse_pi_get_voltage,
+ .get_current_limit = pse_pi_get_current_limit,
+ .set_current_limit = pse_pi_set_current_limit,
};
static int
@@ -298,7 +401,9 @@ devm_pse_pi_regulator_register(struct pse_controller_dev *pcdev,
rdesc->ops = &pse_pi_ops;
rdesc->owner = pcdev->owner;
- rinit_data->constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS;
+ rinit_data->constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS |
+ REGULATOR_CHANGE_CURRENT;
+ rinit_data->constraints.max_uA = MAX_PI_CURRENT;
rinit_data->supply_regulator = "vpwr";
rconfig.dev = pcdev->dev;
@@ -626,6 +731,23 @@ out:
}
EXPORT_SYMBOL_GPL(of_pse_control_get);
+static int _pse_ethtool_get_status(struct pse_controller_dev *pcdev,
+ int id,
+ struct netlink_ext_ack *extack,
+ struct pse_control_status *status)
+{
+ const struct pse_controller_ops *ops;
+
+ ops = pcdev->ops;
+ if (!ops->ethtool_get_status) {
+ NL_SET_ERR_MSG(extack,
+ "PSE driver does not support status report");
+ return -EOPNOTSUPP;
+ }
+
+ return ops->ethtool_get_status(pcdev, id, extack, status);
+}
+
/**
* pse_ethtool_get_status - get status of PSE control
* @psec: PSE control pointer
@@ -638,19 +760,10 @@ int pse_ethtool_get_status(struct pse_control *psec,
struct netlink_ext_ack *extack,
struct pse_control_status *status)
{
- const struct pse_controller_ops *ops;
int err;
- ops = psec->pcdev->ops;
-
- if (!ops->ethtool_get_status) {
- NL_SET_ERR_MSG(extack,
- "PSE driver does not support status report");
- return -EOPNOTSUPP;
- }
-
mutex_lock(&psec->pcdev->lock);
- err = ops->ethtool_get_status(psec->pcdev, psec->id, extack, status);
+ err = _pse_ethtool_get_status(psec->pcdev, psec->id, extack, status);
mutex_unlock(&psec->pcdev->lock);
return err;
@@ -732,6 +845,43 @@ int pse_ethtool_set_config(struct pse_control *psec,
}
EXPORT_SYMBOL_GPL(pse_ethtool_set_config);
+/**
+ * pse_ethtool_set_pw_limit - set PSE control power limit
+ * @psec: PSE control pointer
+ * @extack: extack for reporting useful error messages
+ * @pw_limit: power limit value in mW
+ *
+ * Return: 0 on success and failure value on error
+ */
+int pse_ethtool_set_pw_limit(struct pse_control *psec,
+ struct netlink_ext_ack *extack,
+ const unsigned int pw_limit)
+{
+ int uV, uA, ret;
+ s64 tmp_64;
+
+ ret = regulator_get_voltage(psec->ps);
+ if (!ret) {
+ NL_SET_ERR_MSG(extack,
+ "Can't calculate the current, PSE voltage read is 0");
+ return -ERANGE;
+ }
+ if (ret < 0) {
+ NL_SET_ERR_MSG(extack,
+ "Error reading PSE voltage");
+ return ret;
+ }
+ uV = ret;
+
+ tmp_64 = pw_limit;
+ tmp_64 *= 1000000000ull;
+ /* uA = mW * 1000000000 / uV */
+ uA = DIV_ROUND_CLOSEST_ULL(tmp_64, uV);
+
+ return regulator_set_current_limit(psec->ps, 0, uA);
+}
+EXPORT_SYMBOL_GPL(pse_ethtool_set_pw_limit);
+
bool pse_has_podl(struct pse_control *psec)
{
return psec->pcdev->types & ETHTOOL_PSE_PODL;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 9254bca2813d..9b24861464bc 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1661,6 +1661,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
int len, int *skb_xdp)
{
struct page_frag *alloc_frag = &current->task_frag;
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
struct bpf_prog *xdp_prog;
int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
char *buf;
@@ -1700,6 +1701,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
local_bh_disable();
rcu_read_lock();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
xdp_prog = rcu_dereference(tun->xdp_prog);
if (xdp_prog) {
struct xdp_buff xdp;
@@ -1728,12 +1730,14 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
pad = xdp.data - xdp.data_hard_start;
len = xdp.data_end - xdp.data;
}
+ bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad);
out:
+ bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
return NULL;
@@ -2566,6 +2570,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
if (m->msg_controllen == sizeof(struct tun_msg_ctl) &&
ctl && ctl->type == TUN_MSG_PTR) {
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
struct tun_page tpage;
int n = ctl->num;
int flush = 0, queued = 0;
@@ -2574,6 +2579,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
local_bh_disable();
rcu_read_lock();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
for (i = 0; i < n; i++) {
xdp = &((struct xdp_buff *)ctl->ptr)[i];
@@ -2588,6 +2594,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
if (tfile->napi_enabled && queued > 0)
napi_schedule(&tfile->napi);
+ bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c
index 0ba714ca5185..4b8528206cc8 100644
--- a/drivers/net/wireguard/allowedips.c
+++ b/drivers/net/wireguard/allowedips.c
@@ -15,8 +15,8 @@ static void swap_endian(u8 *dst, const u8 *src, u8 bits)
if (bits == 32) {
*(u32 *)dst = be32_to_cpu(*(const __be32 *)src);
} else if (bits == 128) {
- ((u64 *)dst)[0] = be64_to_cpu(((const __be64 *)src)[0]);
- ((u64 *)dst)[1] = be64_to_cpu(((const __be64 *)src)[1]);
+ ((u64 *)dst)[0] = get_unaligned_be64(src);
+ ((u64 *)dst)[1] = get_unaligned_be64(src + 8);
}
}
diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h
index 1ea4f874e367..7eb76724b3ed 100644
--- a/drivers/net/wireguard/queueing.h
+++ b/drivers/net/wireguard/queueing.h
@@ -124,10 +124,10 @@ static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id)
*/
static inline int wg_cpumask_next_online(int *last_cpu)
{
- int cpu = cpumask_next(*last_cpu, cpu_online_mask);
+ int cpu = cpumask_next(READ_ONCE(*last_cpu), cpu_online_mask);
if (cpu >= nr_cpu_ids)
cpu = cpumask_first(cpu_online_mask);
- *last_cpu = cpu;
+ WRITE_ONCE(*last_cpu, cpu);
return cpu;
}
diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
index 0d48e0f4a1ba..26e09c30d596 100644
--- a/drivers/net/wireguard/send.c
+++ b/drivers/net/wireguard/send.c
@@ -222,7 +222,7 @@ void wg_packet_send_keepalive(struct wg_peer *peer)
{
struct sk_buff *skb;
- if (skb_queue_empty(&peer->staged_packet_queue)) {
+ if (skb_queue_empty_lockless(&peer->staged_packet_queue)) {
skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH,
GFP_ATOMIC);
if (unlikely(!skb))
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 41aba087c1a6..835a05b91833 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -660,7 +660,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
hw->wiphy->features |= NL80211_FEATURE_WFA_TPC_IE_IN_PROBES;
if (iwl_fw_lookup_cmd_ver(mvm->fw, WOWLAN_KEK_KCK_MATERIAL,
- IWL_FW_CMD_VER_UNKNOWN) == 3)
+ IWL_FW_CMD_VER_UNKNOWN) >= 3)
hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK;
if (fw_has_api(&mvm->fw->ucode_capa,
@@ -1692,7 +1692,8 @@ static void iwl_mvm_prevent_esr_done_wk(struct wiphy *wiphy,
struct iwl_mvm_vif *mvmvif =
container_of(wk, struct iwl_mvm_vif, prevent_esr_done_wk.work);
struct iwl_mvm *mvm = mvmvif->mvm;
- struct ieee80211_vif *vif = iwl_mvm_get_bss_vif(mvm);
+ struct ieee80211_vif *vif =
+ container_of((void *)mvmvif, struct ieee80211_vif, drv_priv);
guard(mvm)(mvm);
iwl_mvm_unblock_esr(mvm, vif, IWL_MVM_ESR_BLOCKED_PREVENTION);
@@ -1714,7 +1715,8 @@ static void iwl_mvm_unblock_esr_tpt(struct wiphy *wiphy, struct wiphy_work *wk)
struct iwl_mvm_vif *mvmvif =
container_of(wk, struct iwl_mvm_vif, unblock_esr_tpt_wk);
struct iwl_mvm *mvm = mvmvif->mvm;
- struct ieee80211_vif *vif = iwl_mvm_get_bss_vif(mvm);
+ struct ieee80211_vif *vif =
+ container_of((void *)mvmvif, struct ieee80211_vif, drv_priv);
guard(mvm)(mvm);
iwl_mvm_unblock_esr(mvm, vif, IWL_MVM_ESR_BLOCKED_TPT);
@@ -6406,11 +6408,9 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm,
if (sync) {
lockdep_assert_held(&mvm->mutex);
ret = wait_event_timeout(mvm->rx_sync_waitq,
- READ_ONCE(mvm->queue_sync_state) == 0 ||
- iwl_mvm_is_radio_hw_killed(mvm),
+ READ_ONCE(mvm->queue_sync_state) == 0,
SYNC_RX_QUEUE_TIMEOUT);
- WARN_ONCE(!ret && !iwl_mvm_is_radio_hw_killed(mvm),
- "queue sync: failed to sync, state is 0x%lx, cookie %d\n",
+ WARN_ONCE(!ret, "queue sync: failed to sync, state is 0x%lx, cookie %d\n",
mvm->queue_sync_state,
mvm->queue_sync_cookie);
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index df0de859fed4..b7dcae76a05d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -149,7 +149,7 @@ static void iwl_mvm_rx_esr_mode_notif(struct iwl_mvm *mvm,
struct ieee80211_vif *vif = iwl_mvm_get_bss_vif(mvm);
/* FW recommendations is only for entering EMLSR */
- if (!vif || iwl_mvm_vif_from_mac80211(vif)->esr_active)
+ if (IS_ERR_OR_NULL(vif) || iwl_mvm_vif_from_mac80211(vif)->esr_active)
return;
if (le32_to_cpu(notif->action) == ESR_RECOMMEND_ENTER)
@@ -1898,12 +1898,10 @@ static bool iwl_mvm_set_hw_rfkill_state(struct iwl_op_mode *op_mode, bool state)
bool rfkill_safe_init_done = READ_ONCE(mvm->rfkill_safe_init_done);
bool unified = iwl_mvm_has_unified_ucode(mvm);
- if (state) {
+ if (state)
set_bit(IWL_MVM_STATUS_HW_RFKILL, &mvm->status);
- wake_up(&mvm->rx_sync_waitq);
- } else {
+ else
clear_bit(IWL_MVM_STATUS_HW_RFKILL, &mvm->status);
- }
iwl_mvm_set_rfkill_state(mvm);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
index b7185ddcca87..151289e13308 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
@@ -557,12 +557,10 @@ struct iwl_mvm_stat_data_all_macs {
};
static void iwl_mvm_update_link_sig(struct ieee80211_vif *vif, int sig,
- struct iwl_mvm_vif_link_info *link_info)
+ struct iwl_mvm_vif_link_info *link_info,
+ struct ieee80211_bss_conf *bss_conf)
{
struct iwl_mvm *mvm = iwl_mvm_vif_from_mac80211(vif)->mvm;
- struct ieee80211_bss_conf *bss_conf =
- iwl_mvm_rcu_fw_link_id_to_link_conf(mvm, link_info->fw_link_id,
- false);
int thold = bss_conf->cqm_rssi_thold;
int hyst = bss_conf->cqm_rssi_hyst;
int last_event;
@@ -670,7 +668,7 @@ static void iwl_mvm_stat_iterator(void *_data, u8 *mac,
mvmvif->deflink.beacon_stats.num_beacons;
/* This is used in pre-MLO API so use deflink */
- iwl_mvm_update_link_sig(vif, sig, &mvmvif->deflink);
+ iwl_mvm_update_link_sig(vif, sig, &mvmvif->deflink, &vif->bss_conf);
}
static void iwl_mvm_stat_iterator_all_macs(void *_data, u8 *mac,
@@ -705,7 +703,7 @@ static void iwl_mvm_stat_iterator_all_macs(void *_data, u8 *mac,
sig = -le32_to_cpu(mac_stats->beacon_filter_average_energy);
/* This is used in pre-MLO API so use deflink */
- iwl_mvm_update_link_sig(vif, sig, &mvmvif->deflink);
+ iwl_mvm_update_link_sig(vif, sig, &mvmvif->deflink, &vif->bss_conf);
}
static inline void
@@ -921,7 +919,8 @@ iwl_mvm_stat_iterator_all_links(struct iwl_mvm *mvm,
mvmvif->link[link_id]->beacon_stats.num_beacons;
sig = -le32_to_cpu(link_stats->beacon_filter_average_energy);
- iwl_mvm_update_link_sig(bss_conf->vif, sig, link_info);
+ iwl_mvm_update_link_sig(bss_conf->vif, sig, link_info,
+ bss_conf);
if (WARN_ONCE(mvmvif->id >= MAC_INDEX_AUX,
"invalid mvmvif id: %d", mvmvif->id))
@@ -967,7 +966,7 @@ static void iwl_mvm_update_esr_mode_tpt(struct iwl_mvm *mvm)
lockdep_assert_held(&mvm->mutex);
- if (!bss_vif)
+ if (IS_ERR_OR_NULL(bss_vif))
return;
mvmvif = iwl_mvm_vif_from_mac80211(bss_vif);
diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c
index d67293142ffb..3c48e1a57b24 100644
--- a/drivers/net/wireless/microchip/wilc1000/hif.c
+++ b/drivers/net/wireless/microchip/wilc1000/hif.c
@@ -382,7 +382,8 @@ wilc_parse_join_bss_param(struct cfg80211_bss *bss,
struct ieee80211_p2p_noa_attr noa_attr;
const struct cfg80211_bss_ies *ies;
struct wilc_join_bss_param *param;
- u8 rates_len = 0, ies_len;
+ u8 rates_len = 0;
+ int ies_len;
int ret;
param = kzalloc(sizeof(*param), GFP_KERNEL);
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index dd6ec0865141..0cfa39361d3b 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -1602,4 +1602,5 @@ static struct platform_driver apple_nvme_driver = {
module_platform_driver(apple_nvme_driver);
MODULE_AUTHOR("Sven Peter <[email protected]>");
+MODULE_DESCRIPTION("Apple ANS NVM Express device driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index f3a41133ac3f..68b400f9c42d 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -502,7 +502,7 @@ static inline bool nvme_ns_head_multipath(struct nvme_ns_head *head)
enum nvme_ns_features {
NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */
NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */
- NVME_NS_DEAC, /* DEAC bit in Write Zeores supported */
+ NVME_NS_DEAC = 1 << 2, /* DEAC bit in Write Zeores supported */
};
struct nvme_ns {
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index bd87dfd173a4..685e89b35d33 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -410,7 +410,29 @@ static ssize_t nvmet_addr_tsas_show(struct config_item *item,
return sprintf(page, "%s\n", nvmet_addr_tsas_rdma[i].name);
}
}
- return sprintf(page, "reserved\n");
+ return sprintf(page, "\n");
+}
+
+static u8 nvmet_addr_tsas_rdma_store(const char *page)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_rdma); i++) {
+ if (sysfs_streq(page, nvmet_addr_tsas_rdma[i].name))
+ return nvmet_addr_tsas_rdma[i].type;
+ }
+ return NVMF_RDMA_QPTYPE_INVALID;
+}
+
+static u8 nvmet_addr_tsas_tcp_store(const char *page)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_tcp); i++) {
+ if (sysfs_streq(page, nvmet_addr_tsas_tcp[i].name))
+ return nvmet_addr_tsas_tcp[i].type;
+ }
+ return NVMF_TCP_SECTYPE_INVALID;
}
static ssize_t nvmet_addr_tsas_store(struct config_item *item,
@@ -418,20 +440,19 @@ static ssize_t nvmet_addr_tsas_store(struct config_item *item,
{
struct nvmet_port *port = to_nvmet_port(item);
u8 treq = nvmet_port_disc_addr_treq_mask(port);
- u8 sectype;
- int i;
+ u8 sectype, qptype;
if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
- if (port->disc_addr.trtype != NVMF_TRTYPE_TCP)
- return -EINVAL;
-
- for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_tcp); i++) {
- if (sysfs_streq(page, nvmet_addr_tsas_tcp[i].name)) {
- sectype = nvmet_addr_tsas_tcp[i].type;
+ if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA) {
+ qptype = nvmet_addr_tsas_rdma_store(page);
+ if (qptype == port->disc_addr.tsas.rdma.qptype)
+ return count;
+ } else if (port->disc_addr.trtype == NVMF_TRTYPE_TCP) {
+ sectype = nvmet_addr_tsas_tcp_store(page);
+ if (sectype != NVMF_TCP_SECTYPE_INVALID)
goto found;
- }
}
pr_err("Invalid value '%s' for tsas\n", page);
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 337ee1cb09ae..381b4394731f 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -148,7 +148,7 @@ struct nvmet_fc_tgt_queue {
struct workqueue_struct *work_q;
struct kref ref;
/* array of fcp_iods */
- struct nvmet_fc_fcp_iod fod[] __counted_by(sqsize);
+ struct nvmet_fc_fcp_iod fod[] /* __counted_by(sqsize) */;
} __aligned(sizeof(unsigned long long));
struct nvmet_fc_hostport {
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 462375b293e4..c94203ce65bb 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -81,7 +81,8 @@ EXPORT_SYMBOL_GPL(of_irq_find_parent);
/*
* These interrupt controllers abuse interrupt-map for unspeakable
* reasons and rely on the core code to *ignore* it (the drivers do
- * their own parsing of the property).
+ * their own parsing of the property). The PAsemi entry covers a
+ * non-sensical interrupt-map that is better left ignored.
*
* If you think of adding to the list for something *new*, think
* again. There is a high chance that you will be sent back to the
@@ -95,6 +96,7 @@ static const char * const of_irq_imap_abusers[] = {
"fsl,ls1043a-extirq",
"fsl,ls1088a-extirq",
"renesas,rza1-irqc",
+ "pasemi,rootbus",
NULL,
};
@@ -293,20 +295,8 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
imaplen -= imap - oldimap;
pr_debug(" -> imaplen=%d\n", imaplen);
}
- if (!match) {
- if (intc) {
- /*
- * The PASEMI Nemo is a known offender, so
- * let's only warn for anyone else.
- */
- WARN(!IS_ENABLED(CONFIG_PPC_PASEMI),
- "%pOF interrupt-map failed, using interrupt-controller\n",
- ipar);
- return 0;
- }
-
+ if (!match)
goto fail;
- }
/*
* Successfully parsed an interrupt-map translation; copy new
diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c
index c5625dd9bf49..3a45879d85db 100644
--- a/drivers/pci/msi/msi.c
+++ b/drivers/pci/msi/msi.c
@@ -352,7 +352,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec,
struct irq_affinity *affd)
{
struct irq_affinity_desc *masks = NULL;
- struct msi_desc *entry;
+ struct msi_desc *entry, desc;
int ret;
/* Reject multi-MSI early on irq domain enabled architectures */
@@ -377,6 +377,12 @@ static int msi_capability_init(struct pci_dev *dev, int nvec,
/* All MSIs are unmasked by default; mask them all */
entry = msi_first_desc(&dev->dev, MSI_DESC_ALL);
pci_msi_mask(entry, msi_multi_mask(entry));
+ /*
+ * Copy the MSI descriptor for the error path because
+ * pci_msi_setup_msi_irqs() will free it for the hierarchical
+ * interrupt domain case.
+ */
+ memcpy(&desc, entry, sizeof(desc));
/* Configure MSI capability structure */
ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
@@ -396,7 +402,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec,
goto unlock;
err:
- pci_msi_unmask(entry, msi_multi_mask(entry));
+ pci_msi_unmask(&desc, msi_multi_mask(&desc));
pci_free_msi_irqs(dev);
fail:
dev->msi_enabled = 0;
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
index 78c490e0505a..0a02e85a8951 100644
--- a/drivers/perf/riscv_pmu.c
+++ b/drivers/perf/riscv_pmu.c
@@ -167,7 +167,7 @@ u64 riscv_pmu_event_update(struct perf_event *event)
unsigned long cmask;
u64 oldval, delta;
- if (!rvpmu->ctr_read)
+ if (!rvpmu->ctr_read || (hwc->state & PERF_HES_UPTODATE))
return 0;
cmask = riscv_pmu_ctr_get_width_mask(event);
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
index a2e4005e1fd0..4e842dcedfba 100644
--- a/drivers/perf/riscv_pmu_sbi.c
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -20,6 +20,7 @@
#include <linux/cpu_pm.h>
#include <linux/sched/clock.h>
#include <linux/soc/andes/irq.h>
+#include <linux/workqueue.h>
#include <asm/errata_list.h>
#include <asm/sbi.h>
@@ -114,7 +115,7 @@ struct sbi_pmu_event_data {
};
};
-static const struct sbi_pmu_event_data pmu_hw_event_map[] = {
+static struct sbi_pmu_event_data pmu_hw_event_map[] = {
[PERF_COUNT_HW_CPU_CYCLES] = {.hw_gen_event = {
SBI_PMU_HW_CPU_CYCLES,
SBI_PMU_EVENT_TYPE_HW, 0}},
@@ -148,7 +149,7 @@ static const struct sbi_pmu_event_data pmu_hw_event_map[] = {
};
#define C(x) PERF_COUNT_HW_CACHE_##x
-static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
+static struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
@@ -293,6 +294,34 @@ static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_M
},
};
+static void pmu_sbi_check_event(struct sbi_pmu_event_data *edata)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH,
+ 0, cmask, 0, edata->event_idx, 0, 0);
+ if (!ret.error) {
+ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
+ ret.value, 0x1, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0);
+ } else if (ret.error == SBI_ERR_NOT_SUPPORTED) {
+ /* This event cannot be monitored by any counter */
+ edata->event_idx = -EINVAL;
+ }
+}
+
+static void pmu_sbi_check_std_events(struct work_struct *work)
+{
+ for (int i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++)
+ pmu_sbi_check_event(&pmu_hw_event_map[i]);
+
+ for (int i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++)
+ for (int j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++)
+ for (int k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++)
+ pmu_sbi_check_event(&pmu_cache_event_map[i][j][k]);
+}
+
+static DECLARE_WORK(check_std_events_work, pmu_sbi_check_std_events);
+
static int pmu_sbi_ctr_get_width(int idx)
{
return pmu_ctr_list[idx].width;
@@ -478,6 +507,12 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig)
u64 raw_config_val;
int ret;
+ /*
+ * Ensure we are finished checking standard hardware events for
+ * validity before allowing userspace to configure any events.
+ */
+ flush_work(&check_std_events_work);
+
switch (type) {
case PERF_TYPE_HARDWARE:
if (config >= PERF_COUNT_HW_MAX)
@@ -762,7 +797,7 @@ static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu)
* which may include counters that are not enabled yet.
*/
sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP,
- 0, pmu->cmask, 0, 0, 0, 0);
+ 0, pmu->cmask, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0);
}
static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
@@ -1359,6 +1394,9 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
if (ret)
goto out_unregister;
+ /* Asynchronously check which standard events are available */
+ schedule_work(&check_std_events_work);
+
return 0;
out_unregister:
diff --git a/drivers/platform/mellanox/nvsw-sn2201.c b/drivers/platform/mellanox/nvsw-sn2201.c
index 3ef655591424..abe7be602f84 100644
--- a/drivers/platform/mellanox/nvsw-sn2201.c
+++ b/drivers/platform/mellanox/nvsw-sn2201.c
@@ -1198,6 +1198,7 @@ static int nvsw_sn2201_config_pre_init(struct nvsw_sn2201 *nvsw_sn2201)
static int nvsw_sn2201_probe(struct platform_device *pdev)
{
struct nvsw_sn2201 *nvsw_sn2201;
+ int ret;
nvsw_sn2201 = devm_kzalloc(&pdev->dev, sizeof(*nvsw_sn2201), GFP_KERNEL);
if (!nvsw_sn2201)
@@ -1205,8 +1206,10 @@ static int nvsw_sn2201_probe(struct platform_device *pdev)
nvsw_sn2201->dev = &pdev->dev;
platform_set_drvdata(pdev, nvsw_sn2201);
- platform_device_add_resources(pdev, nvsw_sn2201_lpc_io_resources,
+ ret = platform_device_add_resources(pdev, nvsw_sn2201_lpc_io_resources,
ARRAY_SIZE(nvsw_sn2201_lpc_io_resources));
+ if (ret)
+ return ret;
nvsw_sn2201->main_mux_deferred_nr = NVSW_SN2201_MAIN_MUX_DEFER_NR;
nvsw_sn2201->main_mux_devs = nvsw_sn2201_main_mux_brdinfo;
diff --git a/drivers/platform/x86/amilo-rfkill.c b/drivers/platform/x86/amilo-rfkill.c
index efcf909786a5..2423dc91debb 100644
--- a/drivers/platform/x86/amilo-rfkill.c
+++ b/drivers/platform/x86/amilo-rfkill.c
@@ -171,6 +171,7 @@ static void __exit amilo_rfkill_exit(void)
}
MODULE_AUTHOR("Ben Hutchings <[email protected]>");
+MODULE_DESCRIPTION("Fujitsu-Siemens Amilo rfkill support");
MODULE_LICENSE("GPL");
MODULE_DEVICE_TABLE(dmi, amilo_rfkill_id_table);
diff --git a/drivers/platform/x86/firmware_attributes_class.c b/drivers/platform/x86/firmware_attributes_class.c
index dd8240009565..182a07d8ae3d 100644
--- a/drivers/platform/x86/firmware_attributes_class.c
+++ b/drivers/platform/x86/firmware_attributes_class.c
@@ -49,4 +49,5 @@ int fw_attributes_class_put(void)
EXPORT_SYMBOL_GPL(fw_attributes_class_put);
MODULE_AUTHOR("Mark Pearson <[email protected]>");
+MODULE_DESCRIPTION("Firmware attributes class helper module");
MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/ibm_rtl.c b/drivers/platform/x86/ibm_rtl.c
index 1d4bbae115f1..231b37909801 100644
--- a/drivers/platform/x86/ibm_rtl.c
+++ b/drivers/platform/x86/ibm_rtl.c
@@ -29,6 +29,7 @@ static bool debug;
module_param(debug, bool, 0644);
MODULE_PARM_DESC(debug, "Show debug output");
+MODULE_DESCRIPTION("IBM Premium Real Time Mode (PRTM) driver");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Keith Mannthey <[email protected]>");
MODULE_AUTHOR("Vernon Mauery <[email protected]>");
diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c
index c7a827645864..10cd65497cc1 100644
--- a/drivers/platform/x86/intel/hid.c
+++ b/drivers/platform/x86/intel/hid.c
@@ -38,6 +38,7 @@ MODULE_PARM_DESC(enable_sw_tablet_mode,
/* When NOT in tablet mode, VGBS returns with the flag 0x40 */
#define TABLET_MODE_FLAG BIT(6)
+MODULE_DESCRIPTION("Intel HID Event hotkey driver");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Alex Hung");
diff --git a/drivers/platform/x86/intel/pmc/pltdrv.c b/drivers/platform/x86/intel/pmc/pltdrv.c
index ddfba38c2104..f2cb87dc2d37 100644
--- a/drivers/platform/x86/intel/pmc/pltdrv.c
+++ b/drivers/platform/x86/intel/pmc/pltdrv.c
@@ -86,4 +86,5 @@ static void __exit pmc_core_platform_exit(void)
module_init(pmc_core_platform_init);
module_exit(pmc_core_platform_exit);
+MODULE_DESCRIPTION("Intel PMC Core platform driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/platform/x86/intel/rst.c b/drivers/platform/x86/intel/rst.c
index 6bc9c4a603e0..f3a60e14d4c1 100644
--- a/drivers/platform/x86/intel/rst.c
+++ b/drivers/platform/x86/intel/rst.c
@@ -7,6 +7,7 @@
#include <linux/module.h>
#include <linux/slab.h>
+MODULE_DESCRIPTION("Intel Rapid Start Technology Driver");
MODULE_LICENSE("GPL");
static ssize_t irst_show_wakeup_events(struct device *dev,
diff --git a/drivers/platform/x86/intel/smartconnect.c b/drivers/platform/x86/intel/smartconnect.c
index cd25d0585324..31019a1a6d5e 100644
--- a/drivers/platform/x86/intel/smartconnect.c
+++ b/drivers/platform/x86/intel/smartconnect.c
@@ -6,6 +6,7 @@
#include <linux/acpi.h>
#include <linux/module.h>
+MODULE_DESCRIPTION("Intel Smart Connect disabling driver");
MODULE_LICENSE("GPL");
static int smartconnect_acpi_init(struct acpi_device *acpi)
diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c
index 84c1353eb12b..9b7ce03ba085 100644
--- a/drivers/platform/x86/intel/vbtn.c
+++ b/drivers/platform/x86/intel/vbtn.c
@@ -24,6 +24,7 @@
#define VGBS_TABLET_MODE_FLAGS (VGBS_TABLET_MODE_FLAG | VGBS_TABLET_MODE_FLAG_ALT)
+MODULE_DESCRIPTION("Intel Virtual Button driver");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("AceLan Kao");
diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c
index d0fee5d375d7..9c7857842caf 100644
--- a/drivers/platform/x86/lg-laptop.c
+++ b/drivers/platform/x86/lg-laptop.c
@@ -39,8 +39,6 @@ MODULE_LICENSE("GPL");
#define WMI_METHOD_WMBB "2B4F501A-BD3C-4394-8DCF-00A7D2BC8210"
#define WMI_EVENT_GUID WMI_EVENT_GUID0
-#define WMAB_METHOD "\\XINI.WMAB"
-#define WMBB_METHOD "\\XINI.WMBB"
#define SB_GGOV_METHOD "\\_SB.GGOV"
#define GOV_TLED 0x2020008
#define WM_GET 1
@@ -74,7 +72,7 @@ static u32 inited;
static int battery_limit_use_wmbb;
static struct led_classdev kbd_backlight;
-static enum led_brightness get_kbd_backlight_level(void);
+static enum led_brightness get_kbd_backlight_level(struct device *dev);
static const struct key_entry wmi_keymap[] = {
{KE_KEY, 0x70, {KEY_F15} }, /* LG control panel (F1) */
@@ -84,7 +82,6 @@ static const struct key_entry wmi_keymap[] = {
* this key both sends an event and
* changes backlight level.
*/
- {KE_KEY, 0x80, {KEY_RFKILL} },
{KE_END, 0}
};
@@ -128,11 +125,10 @@ static int ggov(u32 arg0)
return res;
}
-static union acpi_object *lg_wmab(u32 method, u32 arg1, u32 arg2)
+static union acpi_object *lg_wmab(struct device *dev, u32 method, u32 arg1, u32 arg2)
{
union acpi_object args[3];
acpi_status status;
- acpi_handle handle;
struct acpi_object_list arg;
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
@@ -143,29 +139,22 @@ static union acpi_object *lg_wmab(u32 method, u32 arg1, u32 arg2)
args[2].type = ACPI_TYPE_INTEGER;
args[2].integer.value = arg2;
- status = acpi_get_handle(NULL, (acpi_string) WMAB_METHOD, &handle);
- if (ACPI_FAILURE(status)) {
- pr_err("Cannot get handle");
- return NULL;
- }
-
arg.count = 3;
arg.pointer = args;
- status = acpi_evaluate_object(handle, NULL, &arg, &buffer);
+ status = acpi_evaluate_object(ACPI_HANDLE(dev), "WMAB", &arg, &buffer);
if (ACPI_FAILURE(status)) {
- acpi_handle_err(handle, "WMAB: call failed.\n");
+ dev_err(dev, "WMAB: call failed.\n");
return NULL;
}
return buffer.pointer;
}
-static union acpi_object *lg_wmbb(u32 method_id, u32 arg1, u32 arg2)
+static union acpi_object *lg_wmbb(struct device *dev, u32 method_id, u32 arg1, u32 arg2)
{
union acpi_object args[3];
acpi_status status;
- acpi_handle handle;
struct acpi_object_list arg;
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
u8 buf[32];
@@ -181,18 +170,12 @@ static union acpi_object *lg_wmbb(u32 method_id, u32 arg1, u32 arg2)
args[2].buffer.length = 32;
args[2].buffer.pointer = buf;
- status = acpi_get_handle(NULL, (acpi_string)WMBB_METHOD, &handle);
- if (ACPI_FAILURE(status)) {
- pr_err("Cannot get handle");
- return NULL;
- }
-
arg.count = 3;
arg.pointer = args;
- status = acpi_evaluate_object(handle, NULL, &arg, &buffer);
+ status = acpi_evaluate_object(ACPI_HANDLE(dev), "WMBB", &arg, &buffer);
if (ACPI_FAILURE(status)) {
- acpi_handle_err(handle, "WMAB: call failed.\n");
+ dev_err(dev, "WMBB: call failed.\n");
return NULL;
}
@@ -223,7 +206,7 @@ static void wmi_notify(u32 value, void *context)
if (eventcode == 0x10000000) {
led_classdev_notify_brightness_hw_changed(
- &kbd_backlight, get_kbd_backlight_level());
+ &kbd_backlight, get_kbd_backlight_level(kbd_backlight.dev->parent));
} else {
key = sparse_keymap_entry_from_scancode(
wmi_input_dev, eventcode);
@@ -272,14 +255,7 @@ static void wmi_input_setup(void)
static void acpi_notify(struct acpi_device *device, u32 event)
{
- struct key_entry *key;
-
acpi_handle_debug(device->handle, "notify: %d\n", event);
- if (inited & INIT_SPARSE_KEYMAP) {
- key = sparse_keymap_entry_from_scancode(wmi_input_dev, 0x80);
- if (key && key->type == KE_KEY)
- sparse_keymap_report_entry(wmi_input_dev, key, 1, true);
- }
}
static ssize_t fan_mode_store(struct device *dev,
@@ -295,7 +271,7 @@ static ssize_t fan_mode_store(struct device *dev,
if (ret)
return ret;
- r = lg_wmab(WM_FAN_MODE, WM_GET, 0);
+ r = lg_wmab(dev, WM_FAN_MODE, WM_GET, 0);
if (!r)
return -EIO;
@@ -306,9 +282,9 @@ static ssize_t fan_mode_store(struct device *dev,
m = r->integer.value;
kfree(r);
- r = lg_wmab(WM_FAN_MODE, WM_SET, (m & 0xffffff0f) | (value << 4));
+ r = lg_wmab(dev, WM_FAN_MODE, WM_SET, (m & 0xffffff0f) | (value << 4));
kfree(r);
- r = lg_wmab(WM_FAN_MODE, WM_SET, (m & 0xfffffff0) | value);
+ r = lg_wmab(dev, WM_FAN_MODE, WM_SET, (m & 0xfffffff0) | value);
kfree(r);
return count;
@@ -320,7 +296,7 @@ static ssize_t fan_mode_show(struct device *dev,
unsigned int status;
union acpi_object *r;
- r = lg_wmab(WM_FAN_MODE, WM_GET, 0);
+ r = lg_wmab(dev, WM_FAN_MODE, WM_GET, 0);
if (!r)
return -EIO;
@@ -347,7 +323,7 @@ static ssize_t usb_charge_store(struct device *dev,
if (ret)
return ret;
- r = lg_wmbb(WMBB_USB_CHARGE, WM_SET, value);
+ r = lg_wmbb(dev, WMBB_USB_CHARGE, WM_SET, value);
if (!r)
return -EIO;
@@ -361,7 +337,7 @@ static ssize_t usb_charge_show(struct device *dev,
unsigned int status;
union acpi_object *r;
- r = lg_wmbb(WMBB_USB_CHARGE, WM_GET, 0);
+ r = lg_wmbb(dev, WMBB_USB_CHARGE, WM_GET, 0);
if (!r)
return -EIO;
@@ -389,7 +365,7 @@ static ssize_t reader_mode_store(struct device *dev,
if (ret)
return ret;
- r = lg_wmab(WM_READER_MODE, WM_SET, value);
+ r = lg_wmab(dev, WM_READER_MODE, WM_SET, value);
if (!r)
return -EIO;
@@ -403,7 +379,7 @@ static ssize_t reader_mode_show(struct device *dev,
unsigned int status;
union acpi_object *r;
- r = lg_wmab(WM_READER_MODE, WM_GET, 0);
+ r = lg_wmab(dev, WM_READER_MODE, WM_GET, 0);
if (!r)
return -EIO;
@@ -431,7 +407,7 @@ static ssize_t fn_lock_store(struct device *dev,
if (ret)
return ret;
- r = lg_wmab(WM_FN_LOCK, WM_SET, value);
+ r = lg_wmab(dev, WM_FN_LOCK, WM_SET, value);
if (!r)
return -EIO;
@@ -445,7 +421,7 @@ static ssize_t fn_lock_show(struct device *dev,
unsigned int status;
union acpi_object *r;
- r = lg_wmab(WM_FN_LOCK, WM_GET, 0);
+ r = lg_wmab(dev, WM_FN_LOCK, WM_GET, 0);
if (!r)
return -EIO;
@@ -475,9 +451,9 @@ static ssize_t charge_control_end_threshold_store(struct device *dev,
union acpi_object *r;
if (battery_limit_use_wmbb)
- r = lg_wmbb(WMBB_BATT_LIMIT, WM_SET, value);
+ r = lg_wmbb(&pf_device->dev, WMBB_BATT_LIMIT, WM_SET, value);
else
- r = lg_wmab(WM_BATT_LIMIT, WM_SET, value);
+ r = lg_wmab(&pf_device->dev, WM_BATT_LIMIT, WM_SET, value);
if (!r)
return -EIO;
@@ -496,7 +472,7 @@ static ssize_t charge_control_end_threshold_show(struct device *device,
union acpi_object *r;
if (battery_limit_use_wmbb) {
- r = lg_wmbb(WMBB_BATT_LIMIT, WM_GET, 0);
+ r = lg_wmbb(&pf_device->dev, WMBB_BATT_LIMIT, WM_GET, 0);
if (!r)
return -EIO;
@@ -507,7 +483,7 @@ static ssize_t charge_control_end_threshold_show(struct device *device,
status = r->buffer.pointer[0x10];
} else {
- r = lg_wmab(WM_BATT_LIMIT, WM_GET, 0);
+ r = lg_wmab(&pf_device->dev, WM_BATT_LIMIT, WM_GET, 0);
if (!r)
return -EIO;
@@ -586,7 +562,7 @@ static void tpad_led_set(struct led_classdev *cdev,
{
union acpi_object *r;
- r = lg_wmab(WM_TLED, WM_SET, brightness > LED_OFF);
+ r = lg_wmab(cdev->dev->parent, WM_TLED, WM_SET, brightness > LED_OFF);
kfree(r);
}
@@ -608,16 +584,16 @@ static void kbd_backlight_set(struct led_classdev *cdev,
val = 0;
if (brightness >= LED_FULL)
val = 0x24;
- r = lg_wmab(WM_KEY_LIGHT, WM_SET, val);
+ r = lg_wmab(cdev->dev->parent, WM_KEY_LIGHT, WM_SET, val);
kfree(r);
}
-static enum led_brightness get_kbd_backlight_level(void)
+static enum led_brightness get_kbd_backlight_level(struct device *dev)
{
union acpi_object *r;
int val;
- r = lg_wmab(WM_KEY_LIGHT, WM_GET, 0);
+ r = lg_wmab(dev, WM_KEY_LIGHT, WM_GET, 0);
if (!r)
return LED_OFF;
@@ -645,7 +621,7 @@ static enum led_brightness get_kbd_backlight_level(void)
static enum led_brightness kbd_backlight_get(struct led_classdev *cdev)
{
- return get_kbd_backlight_level();
+ return get_kbd_backlight_level(cdev->dev->parent);
}
static LED_DEVICE(kbd_backlight, 255, LED_BRIGHT_HW_CHANGED);
@@ -672,6 +648,11 @@ static struct platform_driver pf_driver = {
static int acpi_add(struct acpi_device *device)
{
+ struct platform_device_info pdev_info = {
+ .fwnode = acpi_fwnode_handle(device),
+ .name = PLATFORM_NAME,
+ .id = PLATFORM_DEVID_NONE,
+ };
int ret;
const char *product;
int year = 2017;
@@ -683,9 +664,7 @@ static int acpi_add(struct acpi_device *device)
if (ret)
return ret;
- pf_device = platform_device_register_simple(PLATFORM_NAME,
- PLATFORM_DEVID_NONE,
- NULL, 0);
+ pf_device = platform_device_register_full(&pdev_info);
if (IS_ERR(pf_device)) {
ret = PTR_ERR(pf_device);
pf_device = NULL;
@@ -776,7 +755,7 @@ static void acpi_remove(struct acpi_device *device)
}
static const struct acpi_device_id device_ids[] = {
- {"LGEX0815", 0},
+ {"LGEX0820", 0},
{"", 0}
};
MODULE_DEVICE_TABLE(acpi, device_ids);
diff --git a/drivers/platform/x86/siemens/simatic-ipc-batt-apollolake.c b/drivers/platform/x86/siemens/simatic-ipc-batt-apollolake.c
index 31a139d87d9a..5edc294de6e4 100644
--- a/drivers/platform/x86/siemens/simatic-ipc-batt-apollolake.c
+++ b/drivers/platform/x86/siemens/simatic-ipc-batt-apollolake.c
@@ -45,6 +45,7 @@ static struct platform_driver simatic_ipc_batt_driver = {
module_platform_driver(simatic_ipc_batt_driver);
+MODULE_DESCRIPTION("CMOS Battery monitoring for Simatic IPCs based on Apollo Lake GPIO");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:" KBUILD_MODNAME);
MODULE_SOFTDEP("pre: simatic-ipc-batt platform:apollolake-pinctrl");
diff --git a/drivers/platform/x86/siemens/simatic-ipc-batt-elkhartlake.c b/drivers/platform/x86/siemens/simatic-ipc-batt-elkhartlake.c
index a7676f224075..e6a56d14b505 100644
--- a/drivers/platform/x86/siemens/simatic-ipc-batt-elkhartlake.c
+++ b/drivers/platform/x86/siemens/simatic-ipc-batt-elkhartlake.c
@@ -45,6 +45,7 @@ static struct platform_driver simatic_ipc_batt_driver = {
module_platform_driver(simatic_ipc_batt_driver);
+MODULE_DESCRIPTION("CMOS Battery monitoring for Simatic IPCs based on Elkhart Lake GPIO");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:" KBUILD_MODNAME);
MODULE_SOFTDEP("pre: simatic-ipc-batt platform:elkhartlake-pinctrl");
diff --git a/drivers/platform/x86/siemens/simatic-ipc-batt-f7188x.c b/drivers/platform/x86/siemens/simatic-ipc-batt-f7188x.c
index 5e77e05fdb5d..f8849d0e48a8 100644
--- a/drivers/platform/x86/siemens/simatic-ipc-batt-f7188x.c
+++ b/drivers/platform/x86/siemens/simatic-ipc-batt-f7188x.c
@@ -81,6 +81,7 @@ static struct platform_driver simatic_ipc_batt_driver = {
module_platform_driver(simatic_ipc_batt_driver);
+MODULE_DESCRIPTION("CMOS Battery monitoring for Simatic IPCs based on Nuvoton GPIO");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:" KBUILD_MODNAME);
MODULE_SOFTDEP("pre: simatic-ipc-batt gpio_f7188x platform:elkhartlake-pinctrl platform:alderlake-pinctrl");
diff --git a/drivers/platform/x86/siemens/simatic-ipc-batt.c b/drivers/platform/x86/siemens/simatic-ipc-batt.c
index c6dd263b4ee3..d9aff10608cf 100644
--- a/drivers/platform/x86/siemens/simatic-ipc-batt.c
+++ b/drivers/platform/x86/siemens/simatic-ipc-batt.c
@@ -247,6 +247,7 @@ static struct platform_driver simatic_ipc_batt_driver = {
module_platform_driver(simatic_ipc_batt_driver);
+MODULE_DESCRIPTION("CMOS core battery driver for Siemens Simatic IPCs");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:" KBUILD_MODNAME);
MODULE_AUTHOR("Henning Schild <[email protected]>");
diff --git a/drivers/platform/x86/siemens/simatic-ipc.c b/drivers/platform/x86/siemens/simatic-ipc.c
index 8ca6e277fa03..7039874d8f11 100644
--- a/drivers/platform/x86/siemens/simatic-ipc.c
+++ b/drivers/platform/x86/siemens/simatic-ipc.c
@@ -231,6 +231,7 @@ static void __exit simatic_ipc_exit_module(void)
module_init(simatic_ipc_init_module);
module_exit(simatic_ipc_exit_module);
+MODULE_DESCRIPTION("Siemens SIMATIC IPC platform driver");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Gerd Haeussler <[email protected]>");
MODULE_ALIAS("dmi:*:svnSIEMENSAG:*");
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 3a8d8df89186..78a5aac2dcfd 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -3271,7 +3271,7 @@ static const char *find_hci_method(acpi_handle handle)
*/
#define QUIRK_HCI_HOTKEY_QUICKSTART BIT(1)
-static const struct dmi_system_id toshiba_dmi_quirks[] = {
+static const struct dmi_system_id toshiba_dmi_quirks[] __initconst = {
{
/* Toshiba Portégé R700 */
/* https://bugzilla.kernel.org/show_bug.cgi?id=21012 */
@@ -3299,6 +3299,7 @@ static const struct dmi_system_id toshiba_dmi_quirks[] = {
},
.driver_data = (void *)(QUIRK_TURN_ON_PANEL_ON_RESUME | QUIRK_HCI_HOTKEY_QUICKSTART),
},
+ { }
};
static int toshiba_acpi_add(struct acpi_device *acpi_dev)
@@ -3306,8 +3307,6 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev)
struct toshiba_acpi_dev *dev;
const char *hci_method;
u32 dummy;
- const struct dmi_system_id *dmi_id;
- long quirks = 0;
int ret = 0;
if (toshiba_acpi)
@@ -3460,16 +3459,6 @@ iio_error:
}
#endif
- dmi_id = dmi_first_match(toshiba_dmi_quirks);
- if (dmi_id)
- quirks = (long)dmi_id->driver_data;
-
- if (turn_on_panel_on_resume == -1)
- turn_on_panel_on_resume = !!(quirks & QUIRK_TURN_ON_PANEL_ON_RESUME);
-
- if (hci_hotkey_quickstart == -1)
- hci_hotkey_quickstart = !!(quirks & QUIRK_HCI_HOTKEY_QUICKSTART);
-
toshiba_wwan_available(dev);
if (dev->wwan_supported)
toshiba_acpi_setup_wwan_rfkill(dev);
@@ -3618,10 +3607,27 @@ static struct acpi_driver toshiba_acpi_driver = {
.drv.pm = &toshiba_acpi_pm,
};
+static void __init toshiba_dmi_init(void)
+{
+ const struct dmi_system_id *dmi_id;
+ long quirks = 0;
+
+ dmi_id = dmi_first_match(toshiba_dmi_quirks);
+ if (dmi_id)
+ quirks = (long)dmi_id->driver_data;
+
+ if (turn_on_panel_on_resume == -1)
+ turn_on_panel_on_resume = !!(quirks & QUIRK_TURN_ON_PANEL_ON_RESUME);
+
+ if (hci_hotkey_quickstart == -1)
+ hci_hotkey_quickstart = !!(quirks & QUIRK_HCI_HOTKEY_QUICKSTART);
+}
+
static int __init toshiba_acpi_init(void)
{
int ret;
+ toshiba_dmi_init();
toshiba_proc_dir = proc_mkdir(PROC_TOSHIBA, acpi_root_dir);
if (!toshiba_proc_dir) {
pr_err("Unable to create proc dir " PROC_TOSHIBA "\n");
diff --git a/drivers/platform/x86/uv_sysfs.c b/drivers/platform/x86/uv_sysfs.c
index 37372d7cc54a..f6a0627f36db 100644
--- a/drivers/platform/x86/uv_sysfs.c
+++ b/drivers/platform/x86/uv_sysfs.c
@@ -929,4 +929,5 @@ module_init(uv_sysfs_init);
module_exit(uv_sysfs_exit);
MODULE_AUTHOR("Hewlett Packard Enterprise");
+MODULE_DESCRIPTION("Sysfs structure for HPE UV systems");
MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/wireless-hotkey.c b/drivers/platform/x86/wireless-hotkey.c
index e95cdbbfb708..a220fe4f9ef8 100644
--- a/drivers/platform/x86/wireless-hotkey.c
+++ b/drivers/platform/x86/wireless-hotkey.c
@@ -14,11 +14,13 @@
#include <linux/acpi.h>
#include <acpi/acpi_bus.h>
+MODULE_DESCRIPTION("Airplane mode button for AMD, HP & Xiaomi laptops");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Alex Hung");
MODULE_ALIAS("acpi*:HPQ6001:*");
MODULE_ALIAS("acpi*:WSTADEF:*");
MODULE_ALIAS("acpi*:AMDI0051:*");
+MODULE_ALIAS("acpi*:LGEX0815:*");
struct wl_button {
struct input_dev *input_dev;
@@ -29,6 +31,7 @@ static const struct acpi_device_id wl_ids[] = {
{"HPQ6001", 0},
{"WSTADEF", 0},
{"AMDI0051", 0},
+ {"LGEX0815", 0},
{"", 0},
};
diff --git a/drivers/platform/x86/xo1-rfkill.c b/drivers/platform/x86/xo1-rfkill.c
index e64d5646b4c7..5fe68296501c 100644
--- a/drivers/platform/x86/xo1-rfkill.c
+++ b/drivers/platform/x86/xo1-rfkill.c
@@ -74,5 +74,6 @@ static struct platform_driver xo1_rfkill_driver = {
module_platform_driver(xo1_rfkill_driver);
MODULE_AUTHOR("Daniel Drake <[email protected]>");
+MODULE_DESCRIPTION("OLPC XO-1 software RF kill switch");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:xo1-rfkill");
diff --git a/drivers/reset/Kconfig b/drivers/reset/Kconfig
index 7112f5932609..6bb5d9e372e4 100644
--- a/drivers/reset/Kconfig
+++ b/drivers/reset/Kconfig
@@ -68,6 +68,7 @@ config RESET_BRCMSTB_RESCAL
config RESET_GPIO
tristate "GPIO reset controller"
+ depends on GPIOLIB
help
This enables a generic reset controller for resets attached via
GPIOs. Typically for OF platforms this driver expects "reset-gpios"
diff --git a/drivers/reset/hisilicon/hi6220_reset.c b/drivers/reset/hisilicon/hi6220_reset.c
index 5c3267acd2b1..65aa5ff5ed82 100644
--- a/drivers/reset/hisilicon/hi6220_reset.c
+++ b/drivers/reset/hisilicon/hi6220_reset.c
@@ -219,4 +219,5 @@ static int __init hi6220_reset_init(void)
postcore_initcall(hi6220_reset_init);
+MODULE_DESCRIPTION("Hisilicon Hi6220 reset controller driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 2f16f543079b..a76c6af9ea63 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -4906,7 +4906,7 @@ dasd_eckd_free_cp(struct dasd_ccw_req *cqr, struct request *req)
ccw++;
if (dst) {
if (ccw->flags & CCW_FLAG_IDA)
- cda = *((char **)dma32_to_virt(ccw->cda));
+ cda = dma64_to_virt(*((dma64_t *)dma32_to_virt(ccw->cda)));
else
cda = dma32_to_virt(ccw->cda);
if (dst != cda) {
@@ -5525,7 +5525,7 @@ dasd_eckd_dump_ccw_range(struct dasd_device *device, struct ccw1 *from,
/* get pointer to data (consider IDALs) */
if (from->flags & CCW_FLAG_IDA)
- datap = (char *)*((addr_t *)dma32_to_virt(from->cda));
+ datap = dma64_to_virt(*((dma64_t *)dma32_to_virt(from->cda)));
else
datap = dma32_to_virt(from->cda);
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index 361e9bd75257..9f2023a077c2 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -585,7 +585,7 @@ dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req)
ccw++;
if (dst) {
if (ccw->flags & CCW_FLAG_IDA)
- cda = *((char **)dma32_to_virt(ccw->cda));
+ cda = dma64_to_virt(*((dma64_t *)dma32_to_virt(ccw->cda)));
else
cda = dma32_to_virt(ccw->cda);
if (dst != cda) {
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index d53ee34d398f..fbe29cabcbb8 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -1293,6 +1293,7 @@ sclp_init(void)
fail_unregister_reboot_notifier:
unregister_reboot_notifier(&sclp_reboot_notifier);
fail_init_state_uninitialized:
+ list_del(&sclp_state_change_event.list);
sclp_init_state = sclp_init_state_uninitialized;
free_page((unsigned long) sclp_read_sccb);
free_page((unsigned long) sclp_init_sccb);
diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
index 6e5c508b1e07..5f6e10225627 100644
--- a/drivers/s390/cio/vfio_ccw_cp.c
+++ b/drivers/s390/cio/vfio_ccw_cp.c
@@ -490,13 +490,14 @@ static int ccwchain_fetch_tic(struct ccw1 *ccw,
struct channel_program *cp)
{
struct ccwchain *iter;
- u32 cda, ccw_head;
+ u32 offset, ccw_head;
list_for_each_entry(iter, &cp->ccwchain_list, next) {
ccw_head = iter->ch_iova;
if (is_cpa_within_range(ccw->cda, ccw_head, iter->ch_len)) {
- cda = (u64)iter->ch_ccw + dma32_to_u32(ccw->cda) - ccw_head;
- ccw->cda = u32_to_dma32(cda);
+ /* Calculate offset of TIC target */
+ offset = dma32_to_u32(ccw->cda) - ccw_head;
+ ccw->cda = virt_to_dma32((void *)iter->ch_ccw + offset);
return 0;
}
}
@@ -914,7 +915,7 @@ void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
* in the ioctl directly. Path status changes etc.
*/
list_for_each_entry(chain, &cp->ccwchain_list, next) {
- ccw_head = (u32)(u64)chain->ch_ccw;
+ ccw_head = dma32_to_u32(virt_to_dma32(chain->ch_ccw));
/*
* On successful execution, cpa points just beyond the end
* of the chain.
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index d7569f395559..d6491fc84e8c 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -698,6 +698,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
dma64_t *indicatorp = NULL;
int ret, i, queue_idx = 0;
struct ccw1 *ccw;
+ dma32_t indicatorp_dma = 0;
ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw), NULL);
if (!ccw)
@@ -725,7 +726,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
*/
indicatorp = ccw_device_dma_zalloc(vcdev->cdev,
sizeof(*indicatorp),
- &ccw->cda);
+ &indicatorp_dma);
if (!indicatorp)
goto out;
*indicatorp = indicators_dma(vcdev);
@@ -735,6 +736,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
/* no error, just fall back to legacy interrupts */
vcdev->is_thinint = false;
}
+ ccw->cda = indicatorp_dma;
if (!vcdev->is_thinint) {
/* Register queue indicators with host. */
*indicators(vcdev) = 0;
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 4c69fc63c119..cbbe43d8ef87 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -610,15 +610,15 @@ int sas_ata_init(struct domain_device *found_dev)
rc = ata_sas_tport_add(ata_host->dev, ap);
if (rc)
- goto destroy_port;
+ goto free_port;
found_dev->sata_dev.ata_host = ata_host;
found_dev->sata_dev.ap = ap;
return 0;
-destroy_port:
- kfree(ap);
+free_port:
+ ata_port_free(ap);
free_host:
ata_host_put(ata_host);
return rc;
diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index 8fb7c41c0962..48d975c6dbf2 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -301,7 +301,7 @@ void sas_free_device(struct kref *kref)
if (dev_is_sata(dev) && dev->sata_dev.ap) {
ata_sas_tport_delete(dev->sata_dev.ap);
- kfree(dev->sata_dev.ap);
+ ata_port_free(dev->sata_dev.ap);
ata_host_put(dev->sata_dev.ata_host);
dev->sata_dev.ata_host = NULL;
dev->sata_dev.ap = NULL;
diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
index 85948963fb97..03d6ec1eb970 100644
--- a/drivers/scsi/libsas/sas_internal.h
+++ b/drivers/scsi/libsas/sas_internal.h
@@ -145,6 +145,20 @@ static inline void sas_fail_probe(struct domain_device *dev, const char *func, i
func, dev->parent ? "exp-attached" :
"direct-attached",
SAS_ADDR(dev->sas_addr), err);
+
+ /*
+ * If the device probe failed, the expander phy attached address
+ * needs to be reset so that the phy will not be treated as flutter
+ * in the next revalidation
+ */
+ if (dev->parent && !dev_is_expander(dev->dev_type)) {
+ struct sas_phy *phy = dev->phy;
+ struct domain_device *parent = dev->parent;
+ struct ex_phy *ex_phy = &parent->ex_dev.ex_phy[phy->number];
+
+ memset(ex_phy->attached_sas_addr, 0, SAS_ADDR_SIZE);
+ }
+
sas_unregister_dev(dev->port, dev);
}
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index acf0592d63da..91f022fb8d0c 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -926,6 +926,7 @@ static const int device_qfull_result =
static const int condition_met_result = SAM_STAT_CONDITION_MET;
static struct dentry *sdebug_debugfs_root;
+static ASYNC_DOMAIN_EXCLUSIVE(sdebug_async_domain);
static void sdebug_err_free(struct rcu_head *head)
{
@@ -1148,6 +1149,8 @@ static int sdebug_target_alloc(struct scsi_target *starget)
if (!targetip)
return -ENOMEM;
+ async_synchronize_full_domain(&sdebug_async_domain);
+
targetip->debugfs_entry = debugfs_create_dir(dev_name(&starget->dev),
sdebug_debugfs_root);
@@ -1174,7 +1177,8 @@ static void sdebug_target_destroy(struct scsi_target *starget)
targetip = (struct sdebug_target_info *)starget->hostdata;
if (targetip) {
starget->hostdata = NULL;
- async_schedule(sdebug_tartget_cleanup_async, targetip);
+ async_schedule_domain(sdebug_tartget_cleanup_async, targetip,
+ &sdebug_async_domain);
}
}
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 6b64af7d4927..1b7561abe05d 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -4119,8 +4119,6 @@ static int sd_resume(struct device *dev)
{
struct scsi_disk *sdkp = dev_get_drvdata(dev);
- sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
-
if (opal_unlock_from_suspend(sdkp->opal_dev)) {
sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n");
return -EIO;
@@ -4137,12 +4135,13 @@ static int sd_resume_common(struct device *dev, bool runtime)
if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */
return 0;
+ sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
+
if (!sd_do_start_stop(sdkp->device, runtime)) {
sdkp->suspended = false;
return 0;
}
- sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
ret = sd_start_stop_device(sdkp, 1);
if (!ret) {
sd_resume(dev);
diff --git a/drivers/soc/litex/Kconfig b/drivers/soc/litex/Kconfig
index e6ba3573a772..f3f869639588 100644
--- a/drivers/soc/litex/Kconfig
+++ b/drivers/soc/litex/Kconfig
@@ -7,7 +7,7 @@ config LITEX
config LITEX_SOC_CONTROLLER
tristate "Enable LiteX SoC Controller driver"
- depends on OF || COMPILE_TEST
+ depends on OF
depends on HAS_IOMEM
select LITEX
help
diff --git a/drivers/soc/litex/litex_soc_ctrl.c b/drivers/soc/litex/litex_soc_ctrl.c
index 10813299aa10..72c44119dd54 100644
--- a/drivers/soc/litex/litex_soc_ctrl.c
+++ b/drivers/soc/litex/litex_soc_ctrl.c
@@ -82,13 +82,11 @@ static int litex_reset_handler(struct notifier_block *this, unsigned long mode,
return NOTIFY_DONE;
}
-#ifdef CONFIG_OF
static const struct of_device_id litex_soc_ctrl_of_match[] = {
{.compatible = "litex,soc-controller"},
{},
};
MODULE_DEVICE_TABLE(of, litex_soc_ctrl_of_match);
-#endif /* CONFIG_OF */
static int litex_soc_ctrl_probe(struct platform_device *pdev)
{
@@ -130,7 +128,7 @@ static void litex_soc_ctrl_remove(struct platform_device *pdev)
static struct platform_driver litex_soc_ctrl_driver = {
.driver = {
.name = "litex-soc-controller",
- .of_match_table = of_match_ptr(litex_soc_ctrl_of_match)
+ .of_match_table = litex_soc_ctrl_of_match,
},
.probe = litex_soc_ctrl_probe,
.remove_new = litex_soc_ctrl_remove,
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
index 69daeba974f2..5f518e5a9273 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
@@ -707,7 +707,7 @@ int vchiq_initialise(struct vchiq_state *state, struct vchiq_instance **instance
* block forever.
*/
for (i = 0; i < VCHIQ_INIT_RETRIES; i++) {
- if (state)
+ if (vchiq_remote_initialised(state))
break;
usleep_range(500, 600);
}
@@ -1202,7 +1202,7 @@ void vchiq_dump_platform_instances(struct vchiq_state *state, struct seq_file *f
{
int i;
- if (!state)
+ if (!vchiq_remote_initialised(state))
return;
/*
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h
index 8af209e34fb2..382ec08f6a14 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.h
@@ -413,6 +413,11 @@ struct vchiq_state {
struct opaque_platform_state *platform_state;
};
+static inline bool vchiq_remote_initialised(const struct vchiq_state *state)
+{
+ return state->remote && state->remote->initialised;
+}
+
struct bulk_waiter {
struct vchiq_bulk *bulk;
struct completion event;
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c
index 1f74d0bb33ba..d5f7f61c5626 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_debugfs.c
@@ -138,7 +138,7 @@ void vchiq_debugfs_deinit(void)
#else /* CONFIG_DEBUG_FS */
-void vchiq_debugfs_init(void)
+void vchiq_debugfs_init(struct vchiq_state *state)
{
}
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c
index 3c63347d2d08..430f2ed2ccd3 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c
@@ -1170,6 +1170,11 @@ static int vchiq_open(struct inode *inode, struct file *file)
dev_dbg(state->dev, "arm: vchiq open\n");
+ if (!vchiq_remote_initialised(state)) {
+ dev_dbg(state->dev, "arm: vchiq has no connection to VideoCore\n");
+ return -ENOTCONN;
+ }
+
instance = kzalloc(sizeof(*instance), GFP_KERNEL);
if (!instance)
return -ENOMEM;
@@ -1200,7 +1205,7 @@ static int vchiq_release(struct inode *inode, struct file *file)
dev_dbg(state->dev, "arm: instance=%p\n", instance);
- if (!state) {
+ if (!vchiq_remote_initialised(state)) {
ret = -EPERM;
goto out;
}
diff --git a/drivers/tee/optee/ffa_abi.c b/drivers/tee/optee/ffa_abi.c
index 3235e1c719e8..3e73efa51bba 100644
--- a/drivers/tee/optee/ffa_abi.c
+++ b/drivers/tee/optee/ffa_abi.c
@@ -660,7 +660,9 @@ static bool optee_ffa_api_is_compatbile(struct ffa_device *ffa_dev,
const struct ffa_ops *ops)
{
const struct ffa_msg_ops *msg_ops = ops->msg_ops;
- struct ffa_send_direct_data data = { OPTEE_FFA_GET_API_VERSION };
+ struct ffa_send_direct_data data = {
+ .data0 = OPTEE_FFA_GET_API_VERSION,
+ };
int rc;
msg_ops->mode_32bit_set(ffa_dev);
@@ -677,7 +679,9 @@ static bool optee_ffa_api_is_compatbile(struct ffa_device *ffa_dev,
return false;
}
- data = (struct ffa_send_direct_data){ OPTEE_FFA_GET_OS_VERSION };
+ data = (struct ffa_send_direct_data){
+ .data0 = OPTEE_FFA_GET_OS_VERSION,
+ };
rc = msg_ops->sync_send_receive(ffa_dev, &data);
if (rc) {
pr_err("Unexpected error %d\n", rc);
@@ -698,7 +702,9 @@ static bool optee_ffa_exchange_caps(struct ffa_device *ffa_dev,
unsigned int *rpc_param_count,
unsigned int *max_notif_value)
{
- struct ffa_send_direct_data data = { OPTEE_FFA_EXCHANGE_CAPABILITIES };
+ struct ffa_send_direct_data data = {
+ .data0 = OPTEE_FFA_EXCHANGE_CAPABILITIES,
+ };
int rc;
rc = ops->msg_ops->sync_send_receive(ffa_dev, &data);
diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c
index 45f04a25255a..1b2345a697c5 100644
--- a/drivers/thermal/gov_power_allocator.c
+++ b/drivers/thermal/gov_power_allocator.c
@@ -759,6 +759,9 @@ static void power_allocator_manage(struct thermal_zone_device *tz)
return;
}
+ if (!params->trip_max)
+ return;
+
allocate_power(tz, params->trip_max->temperature);
params->update_cdevs = true;
}
diff --git a/drivers/thermal/gov_step_wise.c b/drivers/thermal/gov_step_wise.c
index 65974fe8be0d..fd5527188cf9 100644
--- a/drivers/thermal/gov_step_wise.c
+++ b/drivers/thermal/gov_step_wise.c
@@ -55,7 +55,11 @@ static unsigned long get_target_state(struct thermal_instance *instance,
if (cur_state <= instance->lower)
return THERMAL_NO_TARGET;
- return clamp(cur_state - 1, instance->lower, instance->upper);
+ /*
+ * If 'throttle' is false, no mitigation is necessary, so
+ * request the lower state for this instance.
+ */
+ return instance->lower;
}
return instance->target;
@@ -93,23 +97,6 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz,
if (instance->initialized && old_target == instance->target)
continue;
- if (trip->type == THERMAL_TRIP_PASSIVE) {
- /*
- * If the target state for this thermal instance
- * changes from THERMAL_NO_TARGET to something else,
- * ensure that the zone temperature will be updated
- * (assuming enabled passive cooling) until it becomes
- * THERMAL_NO_TARGET again, or the cooling device may
- * not be reset to its initial state.
- */
- if (old_target == THERMAL_NO_TARGET &&
- instance->target != THERMAL_NO_TARGET)
- tz->passive++;
- else if (old_target != THERMAL_NO_TARGET &&
- instance->target == THERMAL_NO_TARGET)
- tz->passive--;
- }
-
instance->initialized = true;
mutex_lock(&instance->cdev->lock);
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 1b0ab2790860..ecc748d15eb7 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -300,6 +300,8 @@ static void monitor_thermal_zone(struct thermal_zone_device *tz)
thermal_zone_device_set_polling(tz, tz->passive_delay_jiffies);
else if (tz->polling_delay_jiffies)
thermal_zone_device_set_polling(tz, tz->polling_delay_jiffies);
+ else if (tz->temperature == THERMAL_TEMP_INVALID)
+ thermal_zone_device_set_polling(tz, msecs_to_jiffies(THERMAL_RECHECK_DELAY_MS));
}
static struct thermal_governor *thermal_get_tz_governor(struct thermal_zone_device *tz)
@@ -482,16 +484,14 @@ static void thermal_trip_crossed(struct thermal_zone_device *tz,
thermal_governor_trip_crossed(governor, tz, trip, crossed_up);
}
-static int thermal_trip_notify_cmp(void *ascending, const struct list_head *a,
+static int thermal_trip_notify_cmp(void *not_used, const struct list_head *a,
const struct list_head *b)
{
struct thermal_trip_desc *tda = container_of(a, struct thermal_trip_desc,
notify_list_node);
struct thermal_trip_desc *tdb = container_of(b, struct thermal_trip_desc,
notify_list_node);
- int ret = tdb->notify_temp - tda->notify_temp;
-
- return ascending ? ret : -ret;
+ return tda->notify_temp - tdb->notify_temp;
}
void __thermal_zone_device_update(struct thermal_zone_device *tz,
@@ -511,7 +511,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz,
update_temperature(tz);
if (tz->temperature == THERMAL_TEMP_INVALID)
- return;
+ goto monitor;
__thermal_zone_set_trips(tz);
@@ -520,12 +520,12 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz,
for_each_trip_desc(tz, td)
handle_thermal_trip(tz, td, &way_up_list, &way_down_list);
- list_sort(&way_up_list, &way_up_list, thermal_trip_notify_cmp);
+ list_sort(NULL, &way_up_list, thermal_trip_notify_cmp);
list_for_each_entry(td, &way_up_list, notify_list_node)
thermal_trip_crossed(tz, &td->trip, governor, true);
list_sort(NULL, &way_down_list, thermal_trip_notify_cmp);
- list_for_each_entry(td, &way_down_list, notify_list_node)
+ list_for_each_entry_reverse(td, &way_down_list, notify_list_node)
thermal_trip_crossed(tz, &td->trip, governor, false);
if (governor->manage)
@@ -533,6 +533,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz,
thermal_debug_update_trip_stats(tz);
+monitor:
monitor_thermal_zone(tz);
}
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index 66f67e54e0c8..94eeb4011a48 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -133,6 +133,12 @@ struct thermal_zone_device {
struct thermal_trip_desc trips[] __counted_by(num_trips);
};
+/*
+ * Default delay after a failing thermal zone temperature check before
+ * attempting to check it again.
+ */
+#define THERMAL_RECHECK_DELAY_MS 250
+
/* Default Thermal Governor */
#if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE)
#define DEFAULT_THERMAL_GOVERNOR "step_wise"
diff --git a/drivers/tty/mxser.c b/drivers/tty/mxser.c
index 458bb1280ebf..5b97e420a95f 100644
--- a/drivers/tty/mxser.c
+++ b/drivers/tty/mxser.c
@@ -288,7 +288,7 @@ struct mxser_board {
enum mxser_must_hwid must_hwid;
speed_t max_baud;
- struct mxser_port ports[] __counted_by(nports);
+ struct mxser_port ports[] /* __counted_by(nports) */;
};
static DECLARE_BITMAP(mxser_boards, MXSER_BOARDS);
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index ff15022369e4..b0adafc44747 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -15,7 +15,6 @@
*/
#include <linux/acpi.h>
-#include <linux/cleanup.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/ioport.h>
@@ -42,8 +41,6 @@
#include <asm/irq.h>
-#include "../serial_base.h" /* For serial_base_add_isa_preferred_console() */
-
#include "8250.h"
/*
@@ -563,8 +560,6 @@ static void __init serial8250_isa_init_ports(void)
port->irqflags |= irqflag;
if (serial8250_isa_config != NULL)
serial8250_isa_config(i, &up->port, &up->capabilities);
-
- serial_base_add_isa_preferred_console(serial8250_reg.dev_name, i);
}
}
diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
index 170639d12b2a..ddac0a13cf84 100644
--- a/drivers/tty/serial/8250/8250_omap.c
+++ b/drivers/tty/serial/8250/8250_omap.c
@@ -115,6 +115,10 @@
/* RX FIFO occupancy indicator */
#define UART_OMAP_RX_LVL 0x19
+/* Timeout low and High */
+#define UART_OMAP_TO_L 0x26
+#define UART_OMAP_TO_H 0x27
+
/*
* Copy of the genpd flags for the console.
* Only used if console suspend is disabled
@@ -663,13 +667,24 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id)
/*
* On K3 SoCs, it is observed that RX TIMEOUT is signalled after
- * FIFO has been drained, in which case a dummy read of RX FIFO
- * is required to clear RX TIMEOUT condition.
+ * FIFO has been drained or erroneously.
+ * So apply solution of Errata i2310 as mentioned in
+ * https://www.ti.com/lit/pdf/sprz536
*/
if (priv->habit & UART_RX_TIMEOUT_QUIRK &&
- (iir & UART_IIR_RX_TIMEOUT) == UART_IIR_RX_TIMEOUT &&
- serial_port_in(port, UART_OMAP_RX_LVL) == 0) {
- serial_port_in(port, UART_RX);
+ (iir & UART_IIR_RX_TIMEOUT) == UART_IIR_RX_TIMEOUT) {
+ unsigned char efr2, timeout_h, timeout_l;
+
+ efr2 = serial_in(up, UART_OMAP_EFR2);
+ timeout_h = serial_in(up, UART_OMAP_TO_H);
+ timeout_l = serial_in(up, UART_OMAP_TO_L);
+ serial_out(up, UART_OMAP_TO_H, 0xFF);
+ serial_out(up, UART_OMAP_TO_L, 0xFF);
+ serial_out(up, UART_OMAP_EFR2, UART_OMAP_EFR2_TIMEOUT_BEHAVE);
+ serial_in(up, UART_IIR);
+ serial_out(up, UART_OMAP_EFR2, efr2);
+ serial_out(up, UART_OMAP_TO_H, timeout_h);
+ serial_out(up, UART_OMAP_TO_L, timeout_l);
}
/* Stop processing interrupts on input overrun */
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 40af74b55933..e1d7aa2fa347 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -1985,6 +1985,17 @@ enum {
MOXA_SUPP_RS485 = BIT(2),
};
+static unsigned short moxa_get_nports(unsigned short device)
+{
+ switch (device) {
+ case PCI_DEVICE_ID_MOXA_CP116E_A_A:
+ case PCI_DEVICE_ID_MOXA_CP116E_A_B:
+ return 8;
+ }
+
+ return FIELD_GET(0x00F0, device);
+}
+
static bool pci_moxa_is_mini_pcie(unsigned short device)
{
if (device == PCI_DEVICE_ID_MOXA_CP102N ||
@@ -2038,7 +2049,7 @@ static int pci_moxa_init(struct pci_dev *dev)
{
unsigned short device = dev->device;
resource_size_t iobar_addr = pci_resource_start(dev, 2);
- unsigned int num_ports = (device & 0x00F0) >> 4, i;
+ unsigned int i, num_ports = moxa_get_nports(device);
u8 val, init_mode = MOXA_RS232;
if (!(pci_moxa_supported_rs(dev) & MOXA_SUPP_RS232)) {
diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c
index 34801a6f300b..b88cc28c94e3 100644
--- a/drivers/tty/serial/bcm63xx_uart.c
+++ b/drivers/tty/serial/bcm63xx_uart.c
@@ -308,8 +308,8 @@ static void bcm_uart_do_tx(struct uart_port *port)
val = bcm_uart_readl(port, UART_MCTL_REG);
val = (val & UART_MCTL_TXFIFOFILL_MASK) >> UART_MCTL_TXFIFOFILL_SHIFT;
-
- pending = uart_port_tx_limited(port, ch, port->fifosize - val,
+ pending = uart_port_tx_limited_flags(port, ch, UART_TX_NOSTOP,
+ port->fifosize - val,
true,
bcm_uart_writel(port, ch, UART_FIFO_REG),
({}));
@@ -320,6 +320,9 @@ static void bcm_uart_do_tx(struct uart_port *port)
val = bcm_uart_readl(port, UART_IR_REG);
val &= ~UART_TX_INT_MASK;
bcm_uart_writel(port, val, UART_IR_REG);
+
+ if (uart_tx_stopped(port))
+ bcm_uart_stop_tx(port);
}
/*
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 2eb22594960f..f4f40c9373c2 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -1952,8 +1952,10 @@ static int imx_uart_rs485_config(struct uart_port *port, struct ktermios *termio
/* Make sure Rx is enabled in case Tx is active with Rx disabled */
if (!(rs485conf->flags & SER_RS485_ENABLED) ||
- rs485conf->flags & SER_RS485_RX_DURING_TX)
+ rs485conf->flags & SER_RS485_RX_DURING_TX) {
+ imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT);
imx_uart_start_rx(port);
+ }
return 0;
}
diff --git a/drivers/tty/serial/mcf.c b/drivers/tty/serial/mcf.c
index b0604d6da025..58858dd352c5 100644
--- a/drivers/tty/serial/mcf.c
+++ b/drivers/tty/serial/mcf.c
@@ -462,7 +462,7 @@ static const struct uart_ops mcf_uart_ops = {
.verify_port = mcf_verify_port,
};
-static struct mcf_uart mcf_ports[4];
+static struct mcf_uart mcf_ports[10];
#define MCF_MAXPORTS ARRAY_SIZE(mcf_ports)
diff --git a/drivers/tty/serial/serial_base.h b/drivers/tty/serial/serial_base.h
index 743a72ac34f3..b6c38d2edfd4 100644
--- a/drivers/tty/serial/serial_base.h
+++ b/drivers/tty/serial/serial_base.h
@@ -49,33 +49,3 @@ void serial_ctrl_unregister_port(struct uart_driver *drv, struct uart_port *port
int serial_core_register_port(struct uart_driver *drv, struct uart_port *port);
void serial_core_unregister_port(struct uart_driver *drv, struct uart_port *port);
-
-#ifdef CONFIG_SERIAL_CORE_CONSOLE
-
-int serial_base_add_preferred_console(struct uart_driver *drv,
- struct uart_port *port);
-
-#else
-
-static inline
-int serial_base_add_preferred_console(struct uart_driver *drv,
- struct uart_port *port)
-{
- return 0;
-}
-
-#endif
-
-#ifdef CONFIG_SERIAL_8250_CONSOLE
-
-int serial_base_add_isa_preferred_console(const char *name, int idx);
-
-#else
-
-static inline
-int serial_base_add_isa_preferred_console(const char *name, int idx)
-{
- return 0;
-}
-
-#endif
diff --git a/drivers/tty/serial/serial_base_bus.c b/drivers/tty/serial/serial_base_bus.c
index 73c6ee540c83..4df2a4b10445 100644
--- a/drivers/tty/serial/serial_base_bus.c
+++ b/drivers/tty/serial/serial_base_bus.c
@@ -8,7 +8,6 @@
* The serial core bus manages the serial core controller instances.
*/
-#include <linux/cleanup.h>
#include <linux/container_of.h>
#include <linux/device.h>
#include <linux/idr.h>
@@ -205,134 +204,6 @@ void serial_base_port_device_remove(struct serial_port_device *port_dev)
put_device(&port_dev->dev);
}
-#ifdef CONFIG_SERIAL_CORE_CONSOLE
-
-static int serial_base_add_one_prefcon(const char *match, const char *dev_name,
- int port_id)
-{
- int ret;
-
- ret = add_preferred_console_match(match, dev_name, port_id);
- if (ret == -ENOENT)
- return 0;
-
- return ret;
-}
-
-#ifdef __sparc__
-
-/* Handle Sparc ttya and ttyb options as done in console_setup() */
-static int serial_base_add_sparc_console(const char *dev_name, int idx)
-{
- const char *name;
-
- switch (idx) {
- case 0:
- name = "ttya";
- break;
- case 1:
- name = "ttyb";
- break;
- default:
- return 0;
- }
-
- return serial_base_add_one_prefcon(name, dev_name, idx);
-}
-
-#else
-
-static inline int serial_base_add_sparc_console(const char *dev_name, int idx)
-{
- return 0;
-}
-
-#endif
-
-static int serial_base_add_prefcon(const char *name, int idx)
-{
- const char *char_match __free(kfree) = NULL;
- const char *nmbr_match __free(kfree) = NULL;
- int ret;
-
- /* Handle ttyS specific options */
- if (strstarts(name, "ttyS")) {
- /* No name, just a number */
- nmbr_match = kasprintf(GFP_KERNEL, "%i", idx);
- if (!nmbr_match)
- return -ENODEV;
-
- ret = serial_base_add_one_prefcon(nmbr_match, name, idx);
- if (ret)
- return ret;
-
- /* Sparc ttya and ttyb */
- ret = serial_base_add_sparc_console(name, idx);
- if (ret)
- return ret;
- }
-
- /* Handle the traditional character device name style console=ttyS0 */
- char_match = kasprintf(GFP_KERNEL, "%s%i", name, idx);
- if (!char_match)
- return -ENOMEM;
-
- return serial_base_add_one_prefcon(char_match, name, idx);
-}
-
-/**
- * serial_base_add_preferred_console - Adds a preferred console
- * @drv: Serial port device driver
- * @port: Serial port instance
- *
- * Tries to add a preferred console for a serial port if specified in the
- * kernel command line. Supports both the traditional character device such
- * as console=ttyS0, and a hardware addressing based console=DEVNAME:0.0
- * style name.
- *
- * Translates the kernel command line option using a hardware based addressing
- * console=DEVNAME:0.0 to the serial port character device such as ttyS0.
- * Cannot be called early for ISA ports, depends on struct device.
- *
- * Note that duplicates are ignored by add_preferred_console().
- *
- * Return: 0 on success, negative error code on failure.
- */
-int serial_base_add_preferred_console(struct uart_driver *drv,
- struct uart_port *port)
-{
- const char *port_match __free(kfree) = NULL;
- int ret;
-
- ret = serial_base_add_prefcon(drv->dev_name, port->line);
- if (ret)
- return ret;
-
- port_match = kasprintf(GFP_KERNEL, "%s:%i.%i", dev_name(port->dev),
- port->ctrl_id, port->port_id);
- if (!port_match)
- return -ENOMEM;
-
- /* Translate a hardware addressing style console=DEVNAME:0.0 */
- return serial_base_add_one_prefcon(port_match, drv->dev_name, port->line);
-}
-
-#endif
-
-#ifdef CONFIG_SERIAL_8250_CONSOLE
-
-/*
- * Early ISA ports initialize the console before there is no struct device.
- * This should be only called from serial8250_isa_init_preferred_console(),
- * other callers are likely wrong and should rely on earlycon instead.
- */
-int serial_base_add_isa_preferred_console(const char *name, int idx)
-{
- return serial_base_add_prefcon(name, idx);
-}
-
-#endif
-
static int serial_base_init(void)
{
int ret;
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 0c4d60976663..2a8006e3d687 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -3422,10 +3422,6 @@ int serial_core_register_port(struct uart_driver *drv, struct uart_port *port)
if (ret)
goto err_unregister_ctrl_dev;
- ret = serial_base_add_preferred_console(drv, port);
- if (ret)
- goto err_unregister_port_dev;
-
ret = serial_core_add_one_port(drv, port);
if (ret)
goto err_unregister_port_dev;
diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
index 8944548c30fa..c532416aec22 100644
--- a/drivers/ufs/core/ufs-mcq.c
+++ b/drivers/ufs/core/ufs-mcq.c
@@ -105,16 +105,15 @@ EXPORT_SYMBOL_GPL(ufshcd_mcq_config_mac);
* @hba: per adapter instance
* @req: pointer to the request to be issued
*
- * Return: the hardware queue instance on which the request would
- * be queued.
+ * Return: the hardware queue instance on which the request will be or has
+ * been queued. %NULL if the request has already been freed.
*/
struct ufs_hw_queue *ufshcd_mcq_req_to_hwq(struct ufs_hba *hba,
struct request *req)
{
- u32 utag = blk_mq_unique_tag(req);
- u32 hwq = blk_mq_unique_tag_to_hwq(utag);
+ struct blk_mq_hw_ctx *hctx = READ_ONCE(req->mq_hctx);
- return &hba->uhq[hwq];
+ return hctx ? &hba->uhq[hctx->queue_num] : NULL;
}
/**
@@ -515,6 +514,8 @@ int ufshcd_mcq_sq_cleanup(struct ufs_hba *hba, int task_tag)
if (!cmd)
return -EINVAL;
hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd));
+ if (!hwq)
+ return 0;
} else {
hwq = hba->dev_cmd_queue;
}
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 1b65e6ae4137..46433ecf0c4d 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -6456,6 +6456,8 @@ static bool ufshcd_abort_one(struct request *rq, void *priv)
/* Release cmd in MCQ mode if abort succeeds */
if (is_mcq_enabled(hba) && (*ret == 0)) {
hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd));
+ if (!hwq)
+ return 0;
spin_lock_irqsave(&hwq->cq_lock, flags);
if (ufshcd_cmd_inflight(lrbp->cmd))
ufshcd_release_scsi_cmd(hba, lrbp);
diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c
index 4ce7cba2b48a..8f3b9a0a38e1 100644
--- a/drivers/usb/atm/cxacru.c
+++ b/drivers/usb/atm/cxacru.c
@@ -1131,6 +1131,7 @@ static int cxacru_bind(struct usbatm_data *usbatm_instance,
struct cxacru_data *instance;
struct usb_device *usb_dev = interface_to_usbdev(intf);
struct usb_host_endpoint *cmd_ep = usb_dev->ep_in[CXACRU_EP_CMD];
+ struct usb_endpoint_descriptor *in, *out;
int ret;
/* instance init */
@@ -1177,6 +1178,19 @@ static int cxacru_bind(struct usbatm_data *usbatm_instance,
goto fail;
}
+ if (usb_endpoint_xfer_int(&cmd_ep->desc))
+ ret = usb_find_common_endpoints(intf->cur_altsetting,
+ NULL, NULL, &in, &out);
+ else
+ ret = usb_find_common_endpoints(intf->cur_altsetting,
+ &in, &out, NULL, NULL);
+
+ if (ret) {
+ usb_err(usbatm_instance, "cxacru_bind: interface has incorrect endpoints\n");
+ ret = -ENODEV;
+ goto fail;
+ }
+
if ((cmd_ep->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK)
== USB_ENDPOINT_XFER_INT) {
usb_fill_int_urb(instance->rcv_urb,
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 7ee61a89520b..cb82557678dd 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -957,12 +957,16 @@ static bool dwc3_core_is_valid(struct dwc3 *dwc)
static void dwc3_core_setup_global_control(struct dwc3 *dwc)
{
+ unsigned int power_opt;
+ unsigned int hw_mode;
u32 reg;
reg = dwc3_readl(dwc->regs, DWC3_GCTL);
reg &= ~DWC3_GCTL_SCALEDOWN_MASK;
+ hw_mode = DWC3_GHWPARAMS0_MODE(dwc->hwparams.hwparams0);
+ power_opt = DWC3_GHWPARAMS1_EN_PWROPT(dwc->hwparams.hwparams1);
- switch (DWC3_GHWPARAMS1_EN_PWROPT(dwc->hwparams.hwparams1)) {
+ switch (power_opt) {
case DWC3_GHWPARAMS1_EN_PWROPT_CLK:
/**
* WORKAROUND: DWC3 revisions between 2.10a and 2.50a have an
@@ -995,6 +999,20 @@ static void dwc3_core_setup_global_control(struct dwc3 *dwc)
break;
}
+ /*
+ * This is a workaround for STAR#4846132, which only affects
+ * DWC_usb31 version2.00a operating in host mode.
+ *
+ * There is a problem in DWC_usb31 version 2.00a operating
+ * in host mode that would cause a CSR read timeout When CSR
+ * read coincides with RAM Clock Gating Entry. By disable
+ * Clock Gating, sacrificing power consumption for normal
+ * operation.
+ */
+ if (power_opt != DWC3_GHWPARAMS1_EN_PWROPT_NO &&
+ hw_mode != DWC3_GHWPARAMS0_MODE_GADGET && DWC3_VER_IS(DWC31, 200A))
+ reg |= DWC3_GCTL_DSBLCLKGTNG;
+
/* check if current dwc3 is on simulation board */
if (dwc->hwparams.hwparams6 & DWC3_GHWPARAMS6_EN_FPGA) {
dev_info(dwc->dev, "Running with FPGA optimizations\n");
@@ -2250,7 +2268,6 @@ assert_reset:
static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
{
- unsigned long flags;
u32 reg;
int i;
@@ -2293,9 +2310,7 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
break;
if (dwc->current_otg_role == DWC3_OTG_ROLE_DEVICE) {
- spin_lock_irqsave(&dwc->lock, flags);
dwc3_gadget_suspend(dwc);
- spin_unlock_irqrestore(&dwc->lock, flags);
synchronize_irq(dwc->irq_gadget);
}
@@ -2312,7 +2327,6 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg)
{
- unsigned long flags;
int ret;
u32 reg;
int i;
@@ -2366,9 +2380,7 @@ static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg)
if (dwc->current_otg_role == DWC3_OTG_ROLE_HOST) {
dwc3_otg_host_init(dwc);
} else if (dwc->current_otg_role == DWC3_OTG_ROLE_DEVICE) {
- spin_lock_irqsave(&dwc->lock, flags);
dwc3_gadget_resume(dwc);
- spin_unlock_irqrestore(&dwc->lock, flags);
}
break;
diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c
index ba7d180cc9e6..44e20c6c36d3 100644
--- a/drivers/usb/gadget/function/f_printer.c
+++ b/drivers/usb/gadget/function/f_printer.c
@@ -213,6 +213,7 @@ static inline struct usb_endpoint_descriptor *ep_desc(struct usb_gadget *gadget,
struct usb_endpoint_descriptor *ss)
{
switch (gadget->speed) {
+ case USB_SPEED_SUPER_PLUS:
case USB_SPEED_SUPER:
return ss;
case USB_SPEED_HIGH:
@@ -449,11 +450,8 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr)
mutex_lock(&dev->lock_printer_io);
spin_lock_irqsave(&dev->lock, flags);
- if (dev->interface < 0) {
- spin_unlock_irqrestore(&dev->lock, flags);
- mutex_unlock(&dev->lock_printer_io);
- return -ENODEV;
- }
+ if (dev->interface < 0)
+ goto out_disabled;
/* We will use this flag later to check if a printer reset happened
* after we turn interrupts back on.
@@ -461,6 +459,9 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr)
dev->reset_printer = 0;
setup_rx_reqs(dev);
+ /* this dropped the lock - need to retest */
+ if (dev->interface < 0)
+ goto out_disabled;
bytes_copied = 0;
current_rx_req = dev->current_rx_req;
@@ -494,6 +495,8 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr)
wait_event_interruptible(dev->rx_wait,
(likely(!list_empty(&dev->rx_buffers))));
spin_lock_irqsave(&dev->lock, flags);
+ if (dev->interface < 0)
+ goto out_disabled;
}
/* We have data to return then copy it to the caller's buffer.*/
@@ -537,6 +540,9 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr)
return -EAGAIN;
}
+ if (dev->interface < 0)
+ goto out_disabled;
+
/* If we not returning all the data left in this RX request
* buffer then adjust the amount of data left in the buffer.
* Othewise if we are done with this RX request buffer then
@@ -566,6 +572,11 @@ printer_read(struct file *fd, char __user *buf, size_t len, loff_t *ptr)
return bytes_copied;
else
return -EAGAIN;
+
+out_disabled:
+ spin_unlock_irqrestore(&dev->lock, flags);
+ mutex_unlock(&dev->lock_printer_io);
+ return -ENODEV;
}
static ssize_t
@@ -586,11 +597,8 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
mutex_lock(&dev->lock_printer_io);
spin_lock_irqsave(&dev->lock, flags);
- if (dev->interface < 0) {
- spin_unlock_irqrestore(&dev->lock, flags);
- mutex_unlock(&dev->lock_printer_io);
- return -ENODEV;
- }
+ if (dev->interface < 0)
+ goto out_disabled;
/* Check if a printer reset happens while we have interrupts on */
dev->reset_printer = 0;
@@ -613,6 +621,8 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
wait_event_interruptible(dev->tx_wait,
(likely(!list_empty(&dev->tx_reqs))));
spin_lock_irqsave(&dev->lock, flags);
+ if (dev->interface < 0)
+ goto out_disabled;
}
while (likely(!list_empty(&dev->tx_reqs)) && len) {
@@ -662,6 +672,9 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
return -EAGAIN;
}
+ if (dev->interface < 0)
+ goto out_disabled;
+
list_add(&req->list, &dev->tx_reqs_active);
/* here, we unlock, and only unlock, to avoid deadlock. */
@@ -674,6 +687,8 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
mutex_unlock(&dev->lock_printer_io);
return -EAGAIN;
}
+ if (dev->interface < 0)
+ goto out_disabled;
}
spin_unlock_irqrestore(&dev->lock, flags);
@@ -685,6 +700,11 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
return bytes_copied;
else
return -EAGAIN;
+
+out_disabled:
+ spin_unlock_irqrestore(&dev->lock, flags);
+ mutex_unlock(&dev->lock_printer_io);
+ return -ENODEV;
}
static int
diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
index 11dd0b9e847f..95191083b455 100644
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -1163,8 +1163,6 @@ struct net_device *gether_connect(struct gether *link)
if (netif_running(dev->net))
eth_start(dev, GFP_ATOMIC);
- netif_device_attach(dev->net);
-
/* on error, disable any endpoints */
} else {
(void) usb_ep_disable(link->out_ep);
@@ -1202,7 +1200,7 @@ void gether_disconnect(struct gether *link)
DBG(dev, "%s\n", __func__);
- netif_device_detach(dev->net);
+ netif_stop_queue(dev->net);
netif_carrier_off(dev->net);
/* disable endpoints, forcing (synchronous) completion
diff --git a/drivers/usb/gadget/udc/aspeed_udc.c b/drivers/usb/gadget/udc/aspeed_udc.c
index 3916c8e2ba01..821a6ab5da56 100644
--- a/drivers/usb/gadget/udc/aspeed_udc.c
+++ b/drivers/usb/gadget/udc/aspeed_udc.c
@@ -66,8 +66,8 @@
#define USB_UPSTREAM_EN BIT(0)
/* Main config reg */
-#define UDC_CFG_SET_ADDR(x) ((x) & 0x3f)
-#define UDC_CFG_ADDR_MASK (0x3f)
+#define UDC_CFG_SET_ADDR(x) ((x) & UDC_CFG_ADDR_MASK)
+#define UDC_CFG_ADDR_MASK GENMASK(6, 0)
/* Interrupt ctrl & status reg */
#define UDC_IRQ_EP_POOL_NAK BIT(17)
diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c
index 8abf3a567e30..108d9a593a80 100644
--- a/drivers/usb/musb/da8xx.c
+++ b/drivers/usb/musb/da8xx.c
@@ -556,7 +556,7 @@ static int da8xx_probe(struct platform_device *pdev)
ret = of_platform_populate(pdev->dev.of_node, NULL,
da8xx_auxdata_lookup, &pdev->dev);
if (ret)
- return ret;
+ goto err_unregister_phy;
pinfo = da8xx_dev_info;
pinfo.parent = &pdev->dev;
@@ -571,9 +571,13 @@ static int da8xx_probe(struct platform_device *pdev)
ret = PTR_ERR_OR_ZERO(glue->musb);
if (ret) {
dev_err(&pdev->dev, "failed to register musb device: %d\n", ret);
- usb_phy_generic_unregister(glue->usb_phy);
+ goto err_unregister_phy;
}
+ return 0;
+
+err_unregister_phy:
+ usb_phy_generic_unregister(glue->usb_phy);
return ret;
}
diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c
index 8d112c3edae5..adf32ca0f761 100644
--- a/drivers/usb/typec/ucsi/ucsi_acpi.c
+++ b/drivers/usb/typec/ucsi/ucsi_acpi.c
@@ -25,6 +25,7 @@ struct ucsi_acpi {
unsigned long flags;
#define UCSI_ACPI_COMMAND_PENDING 1
#define UCSI_ACPI_ACK_PENDING 2
+#define UCSI_ACPI_CHECK_BOGUS_EVENT 3
guid_t guid;
u64 cmd;
};
@@ -128,6 +129,58 @@ static const struct ucsi_operations ucsi_zenbook_ops = {
.async_write = ucsi_acpi_async_write
};
+static int ucsi_gram_read(struct ucsi *ucsi, unsigned int offset,
+ void *val, size_t val_len)
+{
+ u16 bogus_change = UCSI_CONSTAT_POWER_LEVEL_CHANGE |
+ UCSI_CONSTAT_PDOS_CHANGE;
+ struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi);
+ struct ucsi_connector_status *status;
+ int ret;
+
+ ret = ucsi_acpi_read(ucsi, offset, val, val_len);
+ if (ret < 0)
+ return ret;
+
+ if (UCSI_COMMAND(ua->cmd) == UCSI_GET_CONNECTOR_STATUS &&
+ test_bit(UCSI_ACPI_CHECK_BOGUS_EVENT, &ua->flags) &&
+ offset == UCSI_MESSAGE_IN) {
+ status = (struct ucsi_connector_status *)val;
+
+ /* Clear the bogus change */
+ if (status->change == bogus_change)
+ status->change = 0;
+
+ clear_bit(UCSI_ACPI_CHECK_BOGUS_EVENT, &ua->flags);
+ }
+
+ return ret;
+}
+
+static int ucsi_gram_sync_write(struct ucsi *ucsi, unsigned int offset,
+ const void *val, size_t val_len)
+{
+ struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi);
+ int ret;
+
+ ret = ucsi_acpi_sync_write(ucsi, offset, val, val_len);
+ if (ret < 0)
+ return ret;
+
+ if (UCSI_COMMAND(ua->cmd) == UCSI_GET_PDOS &&
+ ua->cmd & UCSI_GET_PDOS_PARTNER_PDO(1) &&
+ ua->cmd & UCSI_GET_PDOS_SRC_PDOS)
+ set_bit(UCSI_ACPI_CHECK_BOGUS_EVENT, &ua->flags);
+
+ return ret;
+}
+
+static const struct ucsi_operations ucsi_gram_ops = {
+ .read = ucsi_gram_read,
+ .sync_write = ucsi_gram_sync_write,
+ .async_write = ucsi_acpi_async_write
+};
+
static const struct dmi_system_id ucsi_acpi_quirks[] = {
{
.matches = {
@@ -136,6 +189,14 @@ static const struct dmi_system_id ucsi_acpi_quirks[] = {
},
.driver_data = (void *)&ucsi_zenbook_ops,
},
+ {
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LG Electronics"),
+ DMI_MATCH(DMI_PRODUCT_FAMILY, "LG gram PC"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "90Q"),
+ },
+ .driver_data = (void *)&ucsi_gram_ops,
+ },
{ }
};
diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c
index 985a880e86da..2fa973afe4e6 100644
--- a/drivers/usb/typec/ucsi/ucsi_glink.c
+++ b/drivers/usb/typec/ucsi/ucsi_glink.c
@@ -372,6 +372,7 @@ static int pmic_glink_ucsi_probe(struct auxiliary_device *adev,
ret = fwnode_property_read_u32(fwnode, "reg", &port);
if (ret < 0) {
dev_err(dev, "missing reg property of %pOFn\n", fwnode);
+ fwnode_handle_put(fwnode);
return ret;
}
@@ -386,9 +387,11 @@ static int pmic_glink_ucsi_probe(struct auxiliary_device *adev,
if (!desc)
continue;
- if (IS_ERR(desc))
+ if (IS_ERR(desc)) {
+ fwnode_handle_put(fwnode);
return dev_err_probe(dev, PTR_ERR(desc),
"unable to acquire orientation gpio\n");
+ }
ucsi->port_orientation[port] = desc;
}
diff --git a/drivers/usb/typec/ucsi/ucsi_stm32g0.c b/drivers/usb/typec/ucsi/ucsi_stm32g0.c
index ac48b7763114..ac69288e8bb0 100644
--- a/drivers/usb/typec/ucsi/ucsi_stm32g0.c
+++ b/drivers/usb/typec/ucsi/ucsi_stm32g0.c
@@ -65,6 +65,7 @@ struct ucsi_stm32g0 {
struct device *dev;
unsigned long flags;
#define COMMAND_PENDING 1
+#define ACK_PENDING 2
const char *fw_name;
struct ucsi *ucsi;
bool suspended;
@@ -396,9 +397,13 @@ static int ucsi_stm32g0_sync_write(struct ucsi *ucsi, unsigned int offset, const
size_t len)
{
struct ucsi_stm32g0 *g0 = ucsi_get_drvdata(ucsi);
+ bool ack = UCSI_COMMAND(*(u64 *)val) == UCSI_ACK_CC_CI;
int ret;
- set_bit(COMMAND_PENDING, &g0->flags);
+ if (ack)
+ set_bit(ACK_PENDING, &g0->flags);
+ else
+ set_bit(COMMAND_PENDING, &g0->flags);
ret = ucsi_stm32g0_async_write(ucsi, offset, val, len);
if (ret)
@@ -406,9 +411,14 @@ static int ucsi_stm32g0_sync_write(struct ucsi *ucsi, unsigned int offset, const
if (!wait_for_completion_timeout(&g0->complete, msecs_to_jiffies(5000)))
ret = -ETIMEDOUT;
+ else
+ return 0;
out_clear_bit:
- clear_bit(COMMAND_PENDING, &g0->flags);
+ if (ack)
+ clear_bit(ACK_PENDING, &g0->flags);
+ else
+ clear_bit(COMMAND_PENDING, &g0->flags);
return ret;
}
@@ -429,8 +439,9 @@ static irqreturn_t ucsi_stm32g0_irq_handler(int irq, void *data)
if (UCSI_CCI_CONNECTOR(cci))
ucsi_connector_change(g0->ucsi, UCSI_CCI_CONNECTOR(cci));
- if (test_bit(COMMAND_PENDING, &g0->flags) &&
- cci & (UCSI_CCI_ACK_COMPLETE | UCSI_CCI_COMMAND_COMPLETE))
+ if (cci & UCSI_CCI_ACK_COMPLETE && test_and_clear_bit(ACK_PENDING, &g0->flags))
+ complete(&g0->complete);
+ if (cci & UCSI_CCI_COMMAND_COMPLETE && test_and_clear_bit(COMMAND_PENDING, &g0->flags))
complete(&g0->complete);
return IRQ_HANDLED;
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 987c7921affa..ba0ce0075b2f 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1260,7 +1260,7 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info(
struct vfio_pci_hot_reset_info hdr;
struct vfio_pci_fill_info fill = {};
bool slot = false;
- int ret, count;
+ int ret, count = 0;
if (copy_from_user(&hdr, arg, minsz))
return -EFAULT;
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 85eea38dbdf4..2882944d23cc 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -257,6 +257,7 @@ config GPIO_WATCHDOG_ARCH_INITCALL
config LENOVO_SE10_WDT
tristate "Lenovo SE10 Watchdog"
depends on (X86 && DMI) || COMPILE_TEST
+ depends on HAS_IOPORT
select WATCHDOG_CORE
help
If you say yes here you get support for the watchdog
diff --git a/drivers/watchdog/menz69_wdt.c b/drivers/watchdog/menz69_wdt.c
index c7de30270043..0508a65acfa6 100644
--- a/drivers/watchdog/menz69_wdt.c
+++ b/drivers/watchdog/menz69_wdt.c
@@ -161,6 +161,7 @@ static struct mcb_driver men_z069_driver = {
module_mcb_driver(men_z069_driver);
MODULE_AUTHOR("Johannes Thumshirn <[email protected]>");
+MODULE_DESCRIPTION("Watchdog driver for the MEN z069 IP-Core");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS("mcb:16z069");
MODULE_IMPORT_NS(MCB);
diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c
index a7a12f2fe9de..b6e0236509bb 100644
--- a/drivers/watchdog/omap_wdt.c
+++ b/drivers/watchdog/omap_wdt.c
@@ -370,5 +370,6 @@ static struct platform_driver omap_wdt_driver = {
module_platform_driver(omap_wdt_driver);
MODULE_AUTHOR("George G. Davis");
+MODULE_DESCRIPTION("Driver for the TI OMAP 16xx/24xx/34xx 32KHz (non-secure) watchdog");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:omap_wdt");
diff --git a/drivers/watchdog/simatic-ipc-wdt.c b/drivers/watchdog/simatic-ipc-wdt.c
index cdc1a2e15180..1e91f0a560ff 100644
--- a/drivers/watchdog/simatic-ipc-wdt.c
+++ b/drivers/watchdog/simatic-ipc-wdt.c
@@ -227,6 +227,7 @@ static struct platform_driver simatic_ipc_wdt_driver = {
module_platform_driver(simatic_ipc_wdt_driver);
+MODULE_DESCRIPTION("Siemens SIMATIC IPC driver for Watchdogs");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS("platform:" KBUILD_MODNAME);
MODULE_AUTHOR("Gerd Haeussler <[email protected]>");
diff --git a/drivers/watchdog/ts4800_wdt.c b/drivers/watchdog/ts4800_wdt.c
index 0099403f4992..24b1ad52102e 100644
--- a/drivers/watchdog/ts4800_wdt.c
+++ b/drivers/watchdog/ts4800_wdt.c
@@ -200,5 +200,6 @@ static struct platform_driver ts4800_wdt_driver = {
module_platform_driver(ts4800_wdt_driver);
MODULE_AUTHOR("Damien Riegel <[email protected]>");
+MODULE_DESCRIPTION("Watchdog driver for TS-4800 based boards");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS("platform:ts4800_wdt");
diff --git a/drivers/watchdog/twl4030_wdt.c b/drivers/watchdog/twl4030_wdt.c
index 09d17e20f4a7..8c80d04811e4 100644
--- a/drivers/watchdog/twl4030_wdt.c
+++ b/drivers/watchdog/twl4030_wdt.c
@@ -118,6 +118,7 @@ static struct platform_driver twl4030_wdt_driver = {
module_platform_driver(twl4030_wdt_driver);
MODULE_AUTHOR("Nokia Corporation");
+MODULE_DESCRIPTION("TWL4030 Watchdog");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:twl4030_wdt");
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 15bb7989c387..3acf5e050072 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -512,7 +512,7 @@ static int afs_iget5_set_root(struct inode *inode, void *opaque)
struct afs_vnode *vnode = AFS_FS_I(inode);
vnode->volume = as->volume;
- vnode->fid.vid = as->volume->vid,
+ vnode->fid.vid = as->volume->vid;
vnode->fid.vnode = 1;
vnode->fid.unique = 1;
inode->i_ino = 1;
@@ -545,7 +545,7 @@ struct inode *afs_root_iget(struct super_block *sb, struct key *key)
BUG_ON(!(inode->i_state & I_NEW));
vnode = AFS_FS_I(inode);
- vnode->cb_v_check = atomic_read(&as->volume->cb_v_break),
+ vnode->cb_v_check = atomic_read(&as->volume->cb_v_break);
afs_set_netfs_context(vnode);
op = afs_alloc_operation(key, as->volume);
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 8dec2c6cbb7e..658f11aebda1 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -3,6 +3,7 @@
#include "alloc_background.h"
#include "alloc_foreground.h"
#include "backpointers.h"
+#include "bkey_buf.h"
#include "btree_cache.h"
#include "btree_io.h"
#include "btree_key_cache.h"
@@ -29,7 +30,7 @@
#include <linux/sched/task.h>
#include <linux/sort.h>
-static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket);
+static void bch2_discard_one_bucket_fast(struct bch_dev *, u64);
/* Persistent alloc info: */
@@ -893,12 +894,12 @@ int bch2_trigger_alloc(struct btree_trans *trans,
if (statechange(a->data_type == BCH_DATA_need_discard) &&
!bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset) &&
bucket_flushed(new_a))
- bch2_discard_one_bucket_fast(c, new.k->p);
+ bch2_discard_one_bucket_fast(ca, new.k->p.offset);
if (statechange(a->data_type == BCH_DATA_cached) &&
!bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) &&
should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
- bch2_do_invalidates(c);
+ bch2_dev_do_invalidates(ca);
if (statechange(a->data_type == BCH_DATA_need_gc_gens))
bch2_gc_gens_async(c);
@@ -1553,13 +1554,13 @@ err:
}
static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
- struct btree_iter *alloc_iter)
+ struct btree_iter *alloc_iter,
+ struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
- struct btree_iter lru_iter;
struct bch_alloc_v4 a_convert;
const struct bch_alloc_v4 *a;
- struct bkey_s_c alloc_k, lru_k;
+ struct bkey_s_c alloc_k;
struct printbuf buf = PRINTBUF;
int ret;
@@ -1573,6 +1574,14 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
a = bch2_alloc_to_v4(alloc_k, &a_convert);
+ if (a->fragmentation_lru) {
+ ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START,
+ a->fragmentation_lru,
+ alloc_k, last_flushed);
+ if (ret)
+ return ret;
+ }
+
if (a->data_type != BCH_DATA_cached)
return 0;
@@ -1597,73 +1606,66 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
a = &a_mut->v;
}
- lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru,
- lru_pos(alloc_k.k->p.inode,
- bucket_to_u64(alloc_k.k->p),
- a->io_time[READ]), 0);
- ret = bkey_err(lru_k);
+ ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, a->io_time[READ],
+ alloc_k, last_flushed);
if (ret)
- return ret;
-
- if (fsck_err_on(lru_k.k->type != KEY_TYPE_set, c,
- alloc_key_to_missing_lru_entry,
- "missing lru entry\n"
- " %s",
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
- ret = bch2_lru_set(trans,
- alloc_k.k->p.inode,
- bucket_to_u64(alloc_k.k->p),
- a->io_time[READ]);
- if (ret)
- goto err;
- }
+ goto err;
err:
fsck_err:
- bch2_trans_iter_exit(trans, &lru_iter);
printbuf_exit(&buf);
return ret;
}
int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
{
+ struct bkey_buf last_flushed;
+
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
int ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_check_alloc_to_lru_ref(trans, &iter)));
+ bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed)));
+
+ bch2_bkey_buf_exit(&last_flushed, c);
bch_err_fn(c, ret);
return ret;
}
-static int discard_in_flight_add(struct bch_fs *c, struct bpos bucket)
+static int discard_in_flight_add(struct bch_dev *ca, u64 bucket, bool in_progress)
{
int ret;
- mutex_lock(&c->discard_buckets_in_flight_lock);
- darray_for_each(c->discard_buckets_in_flight, i)
- if (bkey_eq(*i, bucket)) {
+ mutex_lock(&ca->discard_buckets_in_flight_lock);
+ darray_for_each(ca->discard_buckets_in_flight, i)
+ if (i->bucket == bucket) {
ret = -BCH_ERR_EEXIST_discard_in_flight_add;
goto out;
}
- ret = darray_push(&c->discard_buckets_in_flight, bucket);
+ ret = darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) {
+ .in_progress = in_progress,
+ .bucket = bucket,
+ }));
out:
- mutex_unlock(&c->discard_buckets_in_flight_lock);
+ mutex_unlock(&ca->discard_buckets_in_flight_lock);
return ret;
}
-static void discard_in_flight_remove(struct bch_fs *c, struct bpos bucket)
+static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket)
{
- mutex_lock(&c->discard_buckets_in_flight_lock);
- darray_for_each(c->discard_buckets_in_flight, i)
- if (bkey_eq(*i, bucket)) {
- darray_remove_item(&c->discard_buckets_in_flight, i);
+ mutex_lock(&ca->discard_buckets_in_flight_lock);
+ darray_for_each(ca->discard_buckets_in_flight, i)
+ if (i->bucket == bucket) {
+ BUG_ON(!i->in_progress);
+ darray_remove_item(&ca->discard_buckets_in_flight, i);
goto found;
}
BUG();
found:
- mutex_unlock(&c->discard_buckets_in_flight_lock);
+ mutex_unlock(&ca->discard_buckets_in_flight_lock);
}
struct discard_buckets_state {
@@ -1671,26 +1673,11 @@ struct discard_buckets_state {
u64 open;
u64 need_journal_commit;
u64 discarded;
- struct bch_dev *ca;
u64 need_journal_commit_this_dev;
};
-static void discard_buckets_next_dev(struct bch_fs *c, struct discard_buckets_state *s, struct bch_dev *ca)
-{
- if (s->ca == ca)
- return;
-
- if (s->ca && s->need_journal_commit_this_dev >
- bch2_dev_usage_read(s->ca).d[BCH_DATA_free].buckets)
- bch2_journal_flush_async(&c->journal, NULL);
-
- if (s->ca)
- percpu_ref_put(&s->ca->io_ref);
- s->ca = ca;
- s->need_journal_commit_this_dev = 0;
-}
-
static int bch2_discard_one_bucket(struct btree_trans *trans,
+ struct bch_dev *ca,
struct btree_iter *need_discard_iter,
struct bpos *discard_pos_done,
struct discard_buckets_state *s)
@@ -1704,16 +1691,6 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
bool discard_locked = false;
int ret = 0;
- struct bch_dev *ca = s->ca && s->ca->dev_idx == pos.inode
- ? s->ca
- : bch2_dev_get_ioref(c, pos.inode, WRITE);
- if (!ca) {
- bch2_btree_iter_set_pos(need_discard_iter, POS(pos.inode + 1, 0));
- return 0;
- }
-
- discard_buckets_next_dev(c, s, ca);
-
if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) {
s->open++;
goto out;
@@ -1773,7 +1750,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
goto out;
}
- if (discard_in_flight_add(c, SPOS(iter.pos.inode, iter.pos.offset, true)))
+ if (discard_in_flight_add(ca, iter.pos.offset, true))
goto out;
discard_locked = true;
@@ -1811,7 +1788,7 @@ write:
s->discarded++;
out:
if (discard_locked)
- discard_in_flight_remove(c, iter.pos);
+ discard_in_flight_remove(ca, iter.pos.offset);
s->seen++;
bch2_trans_iter_exit(trans, &iter);
printbuf_exit(&buf);
@@ -1820,7 +1797,8 @@ out:
static void bch2_do_discards_work(struct work_struct *work)
{
- struct bch_fs *c = container_of(work, struct bch_fs, discard_work);
+ struct bch_dev *ca = container_of(work, struct bch_dev, discard_work);
+ struct bch_fs *c = ca->fs;
struct discard_buckets_state s = {};
struct bpos discard_pos_done = POS_MAX;
int ret;
@@ -1831,23 +1809,41 @@ static void bch2_do_discards_work(struct work_struct *work)
* successful commit:
*/
ret = bch2_trans_run(c,
- for_each_btree_key(trans, iter,
- BTREE_ID_need_discard, POS_MIN, 0, k,
- bch2_discard_one_bucket(trans, &iter, &discard_pos_done, &s)));
-
- discard_buckets_next_dev(c, &s, NULL);
+ for_each_btree_key_upto(trans, iter,
+ BTREE_ID_need_discard,
+ POS(ca->dev_idx, 0),
+ POS(ca->dev_idx, U64_MAX), 0, k,
+ bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s)));
trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
bch2_err_str(ret));
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
+ percpu_ref_put(&ca->io_ref);
+}
+
+void bch2_dev_do_discards(struct bch_dev *ca)
+{
+ struct bch_fs *c = ca->fs;
+
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ return;
+
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard))
+ goto put_ioref;
+
+ if (queue_work(c->write_ref_wq, &ca->discard_work))
+ return;
+
+ bch2_write_ref_put(c, BCH_WRITE_REF_discard);
+put_ioref:
+ percpu_ref_put(&ca->io_ref);
}
void bch2_do_discards(struct bch_fs *c)
{
- if (bch2_write_ref_tryget(c, BCH_WRITE_REF_discard) &&
- !queue_work(c->write_ref_wq, &c->discard_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_discard);
+ for_each_member_device(c, ca)
+ bch2_dev_do_discards(ca);
}
static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpos bucket)
@@ -1876,68 +1872,69 @@ err:
static void bch2_do_discards_fast_work(struct work_struct *work)
{
- struct bch_fs *c = container_of(work, struct bch_fs, discard_fast_work);
+ struct bch_dev *ca = container_of(work, struct bch_dev, discard_fast_work);
+ struct bch_fs *c = ca->fs;
while (1) {
bool got_bucket = false;
- struct bpos bucket;
- struct bch_dev *ca;
+ u64 bucket;
- mutex_lock(&c->discard_buckets_in_flight_lock);
- darray_for_each(c->discard_buckets_in_flight, i) {
- if (i->snapshot)
+ mutex_lock(&ca->discard_buckets_in_flight_lock);
+ darray_for_each(ca->discard_buckets_in_flight, i) {
+ if (i->in_progress)
continue;
- ca = bch2_dev_get_ioref(c, i->inode, WRITE);
- if (!ca) {
- darray_remove_item(&c->discard_buckets_in_flight, i);
- continue;
- }
-
got_bucket = true;
- bucket = *i;
- i->snapshot = true;
+ bucket = i->bucket;
+ i->in_progress = true;
break;
}
- mutex_unlock(&c->discard_buckets_in_flight_lock);
+ mutex_unlock(&ca->discard_buckets_in_flight_lock);
if (!got_bucket)
break;
if (ca->mi.discard && !c->opts.nochanges)
blkdev_issue_discard(ca->disk_sb.bdev,
- bucket.offset * ca->mi.bucket_size,
+ bucket_to_sector(ca, bucket),
ca->mi.bucket_size,
GFP_KERNEL);
int ret = bch2_trans_do(c, NULL, NULL,
- BCH_WATERMARK_btree|
- BCH_TRANS_COMMIT_no_enospc,
- bch2_clear_bucket_needs_discard(trans, bucket));
+ BCH_WATERMARK_btree|
+ BCH_TRANS_COMMIT_no_enospc,
+ bch2_clear_bucket_needs_discard(trans, POS(ca->dev_idx, bucket)));
bch_err_fn(c, ret);
- percpu_ref_put(&ca->io_ref);
- discard_in_flight_remove(c, bucket);
+ discard_in_flight_remove(ca, bucket);
if (ret)
break;
}
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
+ percpu_ref_put(&ca->io_ref);
}
-static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket)
+static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
{
- rcu_read_lock();
- struct bch_dev *ca = bch2_dev_rcu(c, bucket.inode);
- bool dead = !ca || percpu_ref_is_dying(&ca->io_ref);
- rcu_read_unlock();
+ struct bch_fs *c = ca->fs;
+
+ if (discard_in_flight_add(ca, bucket, false))
+ return;
+
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ return;
- if (!dead &&
- !discard_in_flight_add(c, bucket) &&
- bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast) &&
- !queue_work(c->write_ref_wq, &c->discard_fast_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast))
+ goto put_ioref;
+
+ if (queue_work(c->write_ref_wq, &ca->discard_fast_work))
+ return;
+
+ bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
+put_ioref:
+ percpu_ref_put(&ca->io_ref);
}
static int invalidate_one_bucket(struct btree_trans *trans,
@@ -2038,7 +2035,8 @@ again:
static void bch2_do_invalidates_work(struct work_struct *work)
{
- struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work);
+ struct bch_dev *ca = container_of(work, struct bch_dev, invalidate_work);
+ struct bch_fs *c = ca->fs;
struct btree_trans *trans = bch2_trans_get(c);
int ret = 0;
@@ -2046,52 +2044,63 @@ static void bch2_do_invalidates_work(struct work_struct *work)
if (ret)
goto err;
- for_each_member_device(c, ca) {
- s64 nr_to_invalidate =
- should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
- struct btree_iter iter;
- bool wrapped = false;
-
- bch2_trans_iter_init(trans, &iter, BTREE_ID_lru,
- lru_pos(ca->dev_idx, 0,
- ((bch2_current_io_time(c, READ) + U32_MAX) &
- LRU_TIME_MAX)), 0);
-
- while (true) {
- bch2_trans_begin(trans);
-
- struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped);
- ret = bkey_err(k);
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
- continue;
- if (ret)
- break;
- if (!k.k)
- break;
+ s64 nr_to_invalidate =
+ should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
+ struct btree_iter iter;
+ bool wrapped = false;
- ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate);
- if (ret)
- break;
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_lru,
+ lru_pos(ca->dev_idx, 0,
+ ((bch2_current_io_time(c, READ) + U32_MAX) &
+ LRU_TIME_MAX)), 0);
- bch2_btree_iter_advance(&iter);
- }
- bch2_trans_iter_exit(trans, &iter);
+ while (true) {
+ bch2_trans_begin(trans);
- if (ret < 0) {
- bch2_dev_put(ca);
+ struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped);
+ ret = bkey_err(k);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ continue;
+ if (ret)
break;
- }
+ if (!k.k)
+ break;
+
+ ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate);
+ if (ret)
+ break;
+
+ bch2_btree_iter_advance(&iter);
}
+ bch2_trans_iter_exit(trans, &iter);
err:
bch2_trans_put(trans);
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
+ percpu_ref_put(&ca->io_ref);
+}
+
+void bch2_dev_do_invalidates(struct bch_dev *ca)
+{
+ struct bch_fs *c = ca->fs;
+
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ return;
+
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate))
+ goto put_ioref;
+
+ if (queue_work(c->write_ref_wq, &ca->invalidate_work))
+ return;
+
+ bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
+put_ioref:
+ percpu_ref_put(&ca->io_ref);
}
void bch2_do_invalidates(struct bch_fs *c)
{
- if (bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate) &&
- !queue_work(c->write_ref_wq, &c->invalidate_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
+ for_each_member_device(c, ca)
+ bch2_dev_do_invalidates(ca);
}
int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
@@ -2407,16 +2416,20 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
set_bit(ca->dev_idx, c->rw_devs[i].d);
}
-void bch2_fs_allocator_background_exit(struct bch_fs *c)
+void bch2_dev_allocator_background_exit(struct bch_dev *ca)
+{
+ darray_exit(&ca->discard_buckets_in_flight);
+}
+
+void bch2_dev_allocator_background_init(struct bch_dev *ca)
{
- darray_exit(&c->discard_buckets_in_flight);
+ mutex_init(&ca->discard_buckets_in_flight_lock);
+ INIT_WORK(&ca->discard_work, bch2_do_discards_work);
+ INIT_WORK(&ca->discard_fast_work, bch2_do_discards_fast_work);
+ INIT_WORK(&ca->invalidate_work, bch2_do_invalidates_work);
}
void bch2_fs_allocator_background_init(struct bch_fs *c)
{
spin_lock_init(&c->freelist_lock);
- mutex_init(&c->discard_buckets_in_flight_lock);
- INIT_WORK(&c->discard_work, bch2_do_discards_work);
- INIT_WORK(&c->discard_fast_work, bch2_do_discards_fast_work);
- INIT_WORK(&c->invalidate_work, bch2_do_invalidates_work);
}
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
index c3cc3c5ba5b6..ba2c5557a3f0 100644
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -275,6 +275,7 @@ int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned,
enum btree_iter_update_trigger_flags);
int bch2_check_alloc_info(struct bch_fs *);
int bch2_check_alloc_to_lru_refs(struct bch_fs *);
+void bch2_dev_do_discards(struct bch_dev *);
void bch2_do_discards(struct bch_fs *);
static inline u64 should_invalidate_buckets(struct bch_dev *ca,
@@ -289,6 +290,7 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca,
return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets);
}
+void bch2_dev_do_invalidates(struct bch_dev *);
void bch2_do_invalidates(struct bch_fs *);
static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a)
@@ -312,7 +314,9 @@ u64 bch2_min_rw_member_capacity(struct bch_fs *);
void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
-void bch2_fs_allocator_background_exit(struct bch_fs *);
+void bch2_dev_allocator_background_exit(struct bch_dev *);
+void bch2_dev_allocator_background_init(struct bch_dev *);
+
void bch2_fs_allocator_background_init(struct bch_fs *);
#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 927a5f300b30..27d97c22ae27 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -621,13 +621,13 @@ again:
avail = dev_buckets_free(ca, *usage, watermark);
if (usage->d[BCH_DATA_need_discard].buckets > avail)
- bch2_do_discards(c);
+ bch2_dev_do_discards(ca);
if (usage->d[BCH_DATA_need_gc_gens].buckets > avail)
bch2_gc_gens_async(c);
if (should_invalidate_buckets(ca, *usage))
- bch2_do_invalidates(c);
+ bch2_dev_do_invalidates(ca);
if (!avail) {
if (cl && !waiting) {
@@ -1703,6 +1703,7 @@ void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
nr[c->open_buckets[i].data_type]++;
+ printbuf_tabstops_reset(out);
printbuf_tabstop_push(out, 24);
percpu_down_read(&c->mark_lock);
@@ -1736,6 +1737,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
nr[c->open_buckets[i].data_type]++;
+ printbuf_tabstops_reset(out);
printbuf_tabstop_push(out, 12);
printbuf_tabstop_push(out, 16);
printbuf_tabstop_push(out, 16);
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index 4321f9fb73bd..6d8b1bc90be0 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -434,13 +434,6 @@ int bch2_check_btree_backpointers(struct bch_fs *c)
return ret;
}
-static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
-{
- return bpos_eq(l.k->p, r.k->p) &&
- bkey_bytes(l.k) == bkey_bytes(r.k) &&
- !memcmp(l.v, r.v, bkey_val_bytes(l.k));
-}
-
struct extents_to_bp_state {
struct bpos bucket_start;
struct bpos bucket_end;
@@ -536,11 +529,8 @@ static int check_bp_exists(struct btree_trans *trans,
struct btree_iter other_extent_iter = {};
struct printbuf buf = PRINTBUF;
struct bkey_s_c bp_k;
- struct bkey_buf tmp;
int ret = 0;
- bch2_bkey_buf_init(&tmp);
-
struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket);
if (!ca) {
prt_str(&buf, "extent for nonexistent device:bucket ");
@@ -565,22 +555,9 @@ static int check_bp_exists(struct btree_trans *trans,
if (bp_k.k->type != KEY_TYPE_backpointer ||
memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
- bch2_bkey_buf_reassemble(&tmp, c, orig_k);
-
- if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) {
- if (bp.level) {
- bch2_trans_unlock(trans);
- bch2_btree_interior_updates_flush(c);
- }
-
- ret = bch2_btree_write_buffer_flush_sync(trans);
- if (ret)
- goto err;
-
- bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k);
- ret = -BCH_ERR_transaction_restart_write_buffer_flush;
- goto out;
- }
+ ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed);
+ if (ret)
+ goto err;
goto check_existing_bp;
}
@@ -589,7 +566,6 @@ err:
fsck_err:
bch2_trans_iter_exit(trans, &other_extent_iter);
bch2_trans_iter_exit(trans, &bp_iter);
- bch2_bkey_buf_exit(&tmp, c);
bch2_dev_put(ca);
printbuf_exit(&buf);
return ret;
@@ -794,6 +770,8 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
!((1U << btree) & btree_interior_mask))
continue;
+ bch2_trans_begin(trans);
+
__for_each_btree_node(trans, iter, btree,
btree == start.btree ? start.pos : POS_MIN,
0, depth, BTREE_ITER_prefetch, b, ret) {
@@ -905,7 +883,7 @@ static int check_one_backpointer(struct btree_trans *trans,
struct bbpos start,
struct bbpos end,
struct bkey_s_c_backpointer bp,
- struct bpos *last_flushed_pos)
+ struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
@@ -925,20 +903,18 @@ static int check_one_backpointer(struct btree_trans *trans,
if (ret)
return ret;
- if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) {
- *last_flushed_pos = bp.k->p;
- ret = bch2_btree_write_buffer_flush_sync(trans) ?:
- -BCH_ERR_transaction_restart_write_buffer_flush;
- goto out;
- }
+ if (!k.k) {
+ ret = bch2_btree_write_buffer_maybe_flush(trans, bp.s_c, last_flushed);
+ if (ret)
+ goto out;
- if (fsck_err_on(!k.k, c,
- backpointer_to_missing_ptr,
- "backpointer for missing %s\n %s",
- bp.v->level ? "btree node" : "extent",
- (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
- ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
- goto out;
+ if (fsck_err(c, backpointer_to_missing_ptr,
+ "backpointer for missing %s\n %s",
+ bp.v->level ? "btree node" : "extent",
+ (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
+ ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
+ goto out;
+ }
}
out:
fsck_err:
@@ -951,14 +927,20 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
struct bbpos start,
struct bbpos end)
{
- struct bpos last_flushed_pos = SPOS_MAX;
+ struct bkey_buf last_flushed;
- return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
+ int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
check_one_backpointer(trans, start, end,
bkey_s_c_to_backpointer(k),
- &last_flushed_pos));
+ &last_flushed));
+
+ bch2_bkey_buf_exit(&last_flushed, trans->c);
+ return ret;
}
int bch2_check_backpointers_to_extents(struct bch_fs *c)
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index a6b83ecab7ce..1106fec6e155 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -493,6 +493,11 @@ struct io_count {
u64 sectors[2][BCH_DATA_NR];
};
+struct discard_in_flight {
+ bool in_progress:1;
+ u64 bucket:63;
+};
+
struct bch_dev {
struct kobject kobj;
#ifdef CONFIG_BCACHEFS_DEBUG
@@ -554,6 +559,12 @@ struct bch_dev {
size_t inc_gen_really_needs_gc;
size_t buckets_waiting_on_journal;
+ struct work_struct invalidate_work;
+ struct work_struct discard_work;
+ struct mutex discard_buckets_in_flight_lock;
+ DARRAY(struct discard_in_flight) discard_buckets_in_flight;
+ struct work_struct discard_fast_work;
+
atomic64_t rebalance_work;
struct journal_device journal;
@@ -915,11 +926,6 @@ struct bch_fs {
unsigned write_points_nr;
struct buckets_waiting_for_journal buckets_waiting_for_journal;
- struct work_struct invalidate_work;
- struct work_struct discard_work;
- struct mutex discard_buckets_in_flight_lock;
- DARRAY(struct bpos) discard_buckets_in_flight;
- struct work_struct discard_fast_work;
/* GARBAGE COLLECTION */
struct work_struct gc_gens_work;
diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c
index 94a1d1982fa8..587d7318a2e8 100644
--- a/fs/bcachefs/bkey.c
+++ b/fs/bcachefs/bkey.c
@@ -660,8 +660,9 @@ int bch2_bkey_format_invalid(struct bch_fs *c,
bch2_bkey_format_field_overflows(f, i)) {
unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1));
- u64 packed_max = f->bits_per_field[i]
- ? ~((~0ULL << 1) << (f->bits_per_field[i] - 1))
+ unsigned packed_bits = min(64, f->bits_per_field[i]);
+ u64 packed_max = packed_bits
+ ? ~((~0ULL << 1) << (packed_bits - 1))
: 0;
prt_printf(err, "field %u too large: %llu + %llu > %llu",
diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h
index fcd43915df07..936357149cf0 100644
--- a/fs/bcachefs/bkey.h
+++ b/fs/bcachefs/bkey.h
@@ -194,6 +194,13 @@ static inline struct bpos bkey_max(struct bpos l, struct bpos r)
return bkey_gt(l, r) ? l : r;
}
+static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
+{
+ return bpos_eq(l.k->p, r.k->p) &&
+ bkey_bytes(l.k) == bkey_bytes(r.k) &&
+ !memcmp(l.v, r.v, bkey_val_bytes(l.k));
+}
+
void bch2_bpos_swab(struct bpos *);
void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *);
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 0e477a926579..7c72a9e6f511 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -903,6 +903,8 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
bch2_dev_usage_update(c, ca, &old_gc, &gc, 0, true);
percpu_up_read(&c->mark_lock);
+ gc.fragmentation_lru = alloc_lru_idx_fragmentation(gc, ca);
+
if (fsck_err_on(new.data_type != gc.data_type, c,
alloc_key_data_type_wrong,
"bucket %llu:%llu gen %u has wrong data_type"
@@ -916,23 +918,19 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
#define copy_bucket_field(_errtype, _f) \
if (fsck_err_on(new._f != gc._f, c, _errtype, \
"bucket %llu:%llu gen %u data type %s has wrong " #_f \
- ": got %u, should be %u", \
+ ": got %llu, should be %llu", \
iter->pos.inode, iter->pos.offset, \
gc.gen, \
bch2_data_type_str(gc.data_type), \
- new._f, gc._f)) \
+ (u64) new._f, (u64) gc._f)) \
new._f = gc._f; \
- copy_bucket_field(alloc_key_gen_wrong,
- gen);
- copy_bucket_field(alloc_key_dirty_sectors_wrong,
- dirty_sectors);
- copy_bucket_field(alloc_key_cached_sectors_wrong,
- cached_sectors);
- copy_bucket_field(alloc_key_stripe_wrong,
- stripe);
- copy_bucket_field(alloc_key_stripe_redundancy_wrong,
- stripe_redundancy);
+ copy_bucket_field(alloc_key_gen_wrong, gen);
+ copy_bucket_field(alloc_key_dirty_sectors_wrong, dirty_sectors);
+ copy_bucket_field(alloc_key_cached_sectors_wrong, cached_sectors);
+ copy_bucket_field(alloc_key_stripe_wrong, stripe);
+ copy_bucket_field(alloc_key_stripe_redundancy_wrong, stripe_redundancy);
+ copy_bucket_field(alloc_key_fragmentation_lru_wrong, fragmentation_lru);
#undef copy_bucket_field
if (!bch2_alloc_v4_cmp(*old, new))
@@ -946,7 +944,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
a->v = new;
/*
- * The trigger normally makes sure this is set, but we're not running
+ * The trigger normally makes sure these are set, but we're not running
* triggers:
*/
if (a->v.data_type == BCH_DATA_cached && !a->v.io_time[READ])
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 3a1419d17888..0ed9e6574fcd 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -3130,7 +3130,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS);
memset(trans, 0, sizeof(*trans));
- closure_init_stack(&trans->ref);
seqmutex_lock(&c->btree_trans_lock);
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
@@ -3150,18 +3149,12 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
BUG_ON(pos_task &&
pid == pos_task->pid &&
pos->locked);
-
- if (pos_task && pid < pos_task->pid) {
- list_add_tail(&trans->list, &pos->list);
- goto list_add_done;
- }
}
}
- list_add_tail(&trans->list, &c->btree_trans_list);
-list_add_done:
+
+ list_add(&trans->list, &c->btree_trans_list);
seqmutex_unlock(&c->btree_trans_lock);
got_trans:
- trans->ref.closure_get_happened = false;
trans->c = c;
trans->last_begin_time = local_clock();
trans->fn_idx = fn_idx;
@@ -3200,6 +3193,8 @@ got_trans:
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
trans->srcu_lock_time = jiffies;
trans->srcu_held = true;
+
+ closure_init_stack_release(&trans->ref);
return trans;
}
@@ -3257,10 +3252,10 @@ void bch2_trans_put(struct btree_trans *trans)
bch2_journal_keys_put(c);
/*
- * trans->ref protects trans->locking_wait.task, btree_paths arary; used
+ * trans->ref protects trans->locking_wait.task, btree_paths array; used
* by cycle detector
*/
- closure_sync(&trans->ref);
+ closure_return_sync(&trans->ref);
trans->locking_wait.task = NULL;
unsigned long *paths_allocated = trans->paths_allocated;
@@ -3385,8 +3380,6 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
per_cpu_ptr(c->btree_trans_bufs, cpu)->trans;
if (trans) {
- closure_sync(&trans->ref);
-
seqmutex_lock(&c->btree_trans_lock);
list_del(&trans->list);
seqmutex_unlock(&c->btree_trans_lock);
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index 75c8a196b3f6..d0e92d948002 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -1,11 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "bkey_buf.h"
#include "btree_locking.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "btree_write_buffer.h"
#include "error.h"
+#include "extents.h"
#include "journal.h"
#include "journal_io.h"
#include "journal_reclaim.h"
@@ -492,6 +494,41 @@ int bch2_btree_write_buffer_tryflush(struct btree_trans *trans)
return ret;
}
+/**
+ * In check and repair code, when checking references to write buffer btrees we
+ * need to issue a flush before we have a definitive error: this issues a flush
+ * if this is a key we haven't yet checked.
+ */
+int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans,
+ struct bkey_s_c referring_k,
+ struct bkey_buf *last_flushed)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_buf tmp;
+ int ret = 0;
+
+ bch2_bkey_buf_init(&tmp);
+
+ if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) {
+ bch2_bkey_buf_reassemble(&tmp, c, referring_k);
+
+ if (bkey_is_btree_ptr(referring_k.k)) {
+ bch2_trans_unlock(trans);
+ bch2_btree_interior_updates_flush(c);
+ }
+
+ ret = bch2_btree_write_buffer_flush_sync(trans);
+ if (ret)
+ goto err;
+
+ bch2_bkey_buf_copy(last_flushed, c, tmp.k);
+ ret = -BCH_ERR_transaction_restart_write_buffer_flush;
+ }
+err:
+ bch2_bkey_buf_exit(&tmp, c);
+ return ret;
+}
+
static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, btree_write_buffer.flush_work);
diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h
index eebcd2b15249..dd5e64218b50 100644
--- a/fs/bcachefs/btree_write_buffer.h
+++ b/fs/bcachefs/btree_write_buffer.h
@@ -23,6 +23,9 @@ int bch2_btree_write_buffer_flush_sync(struct btree_trans *);
int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *);
int bch2_btree_write_buffer_tryflush(struct btree_trans *);
+struct bkey_buf;
+int bch2_btree_write_buffer_maybe_flush(struct btree_trans *, struct bkey_s_c, struct bkey_buf *);
+
struct journal_keys_to_wb {
struct btree_write_buffer_keys *wb;
size_t room;
diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
index 9e54323f0f5f..6d82e1165adc 100644
--- a/fs/bcachefs/chardev.c
+++ b/fs/bcachefs/chardev.c
@@ -216,7 +216,8 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
ret = PTR_ERR_OR_ZERO(optstr) ?:
bch2_parse_mount_opts(NULL, &thr->opts, optstr);
- kfree(optstr);
+ if (!IS_ERR(optstr))
+ kfree(optstr);
if (ret)
goto err;
@@ -319,7 +320,8 @@ static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg)
return ret;
ret = bch2_dev_add(c, path);
- kfree(path);
+ if (!IS_ERR(path))
+ kfree(path);
return ret;
}
@@ -850,7 +852,8 @@ static long bch2_ioctl_fsck_online(struct bch_fs *c,
ret = PTR_ERR_OR_ZERO(optstr) ?:
bch2_parse_mount_opts(c, &thr->opts, optstr);
- kfree(optstr);
+ if (!IS_ERR(optstr))
+ kfree(optstr);
if (ret)
goto err;
diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c
index 363644451106..0f40b585ce2b 100644
--- a/fs/bcachefs/clock.c
+++ b/fs/bcachefs/clock.c
@@ -132,14 +132,9 @@ static struct io_timer *get_expired_timer(struct io_clock *clock,
{
struct io_timer *ret = NULL;
- spin_lock(&clock->timer_lock);
-
if (clock->timers.used &&
time_after_eq(now, clock->timers.data[0]->expire))
heap_pop(&clock->timers, ret, io_timer_cmp, NULL);
-
- spin_unlock(&clock->timer_lock);
-
return ret;
}
@@ -148,8 +143,10 @@ void __bch2_increment_clock(struct io_clock *clock, unsigned sectors)
struct io_timer *timer;
unsigned long now = atomic64_add_return(sectors, &clock->now);
+ spin_lock(&clock->timer_lock);
while ((timer = get_expired_timer(clock, now)))
timer->fn(timer);
+ spin_unlock(&clock->timer_lock);
}
void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 1a0072eef109..0087b8555ead 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -5,7 +5,9 @@
#include "bkey_buf.h"
#include "btree_update.h"
#include "buckets.h"
+#include "compress.h"
#include "data_update.h"
+#include "disk_groups.h"
#include "ec.h"
#include "error.h"
#include "extents.h"
@@ -454,6 +456,38 @@ static void bch2_update_unwritten_extent(struct btree_trans *trans,
}
}
+void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
+{
+ printbuf_tabstop_push(out, 20);
+ prt_str(out, "rewrite ptrs:\t");
+ bch2_prt_u64_base2(out, data_opts->rewrite_ptrs);
+ prt_newline(out);
+
+ prt_str(out, "kill ptrs:\t");
+ bch2_prt_u64_base2(out, data_opts->kill_ptrs);
+ prt_newline(out);
+
+ prt_str(out, "target:\t");
+ bch2_target_to_text(out, c, data_opts->target);
+ prt_newline(out);
+
+ prt_str(out, "compression:\t");
+ bch2_compression_opt_to_text(out, background_compression(*io_opts));
+ prt_newline(out);
+
+ prt_str(out, "extra replicas:\t");
+ prt_u64(out, data_opts->extra_replicas);
+}
+
+void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
+{
+ bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k));
+ prt_newline(out);
+ bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts);
+}
+
int bch2_extent_drop_ptrs(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
@@ -643,6 +677,16 @@ int bch2_data_update_init(struct btree_trans *trans,
if (!(durability_have + durability_removing))
m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1);
+ if (!m->op.nr_replicas) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_data_update_to_text(&buf, m);
+ WARN(1, "trying to move an extent, but nr_replicas=0\n%s", buf.buf);
+ printbuf_exit(&buf);
+ ret = -BCH_ERR_data_update_done;
+ goto done;
+ }
+
m->op.nr_replicas_required = m->op.nr_replicas;
if (reserve_sectors) {
diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
index 991095bbd469..8d36365bdea8 100644
--- a/fs/bcachefs/data_update.h
+++ b/fs/bcachefs/data_update.h
@@ -17,6 +17,9 @@ struct data_update_opts {
unsigned write_flags;
};
+void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *,
+ struct bch_io_opts *, struct data_update_opts *);
+
struct data_update {
/* extent being updated: */
enum btree_id btree_id;
@@ -27,6 +30,8 @@ struct data_update {
struct bch_write_op op;
};
+void bch2_data_update_to_text(struct printbuf *, struct data_update *);
+
int bch2_data_update_index_update(struct bch_write_op *);
void bch2_data_update_read_done(struct data_update *,
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index 51cbf3928361..ebabab171fe5 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -568,6 +568,32 @@ static const struct file_operations cached_btree_nodes_ops = {
.read = bch2_cached_btree_nodes_read,
};
+typedef int (*list_cmp_fn)(const struct list_head *l, const struct list_head *r);
+
+static void list_sort(struct list_head *head, list_cmp_fn cmp)
+{
+ struct list_head *pos;
+
+ list_for_each(pos, head)
+ while (!list_is_last(pos, head) &&
+ cmp(pos, pos->next) > 0) {
+ struct list_head *pos2, *next = pos->next;
+
+ list_del(next);
+ list_for_each(pos2, head)
+ if (cmp(next, pos2) < 0)
+ goto pos_found;
+ BUG();
+pos_found:
+ list_add_tail(next, pos2);
+ }
+}
+
+static int list_ptr_order_cmp(const struct list_head *l, const struct list_head *r)
+{
+ return cmp_int(l, r);
+}
+
static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
@@ -575,41 +601,39 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
struct bch_fs *c = i->c;
struct btree_trans *trans;
ssize_t ret = 0;
- u32 seq;
i->ubuf = buf;
i->size = size;
i->ret = 0;
restart:
seqmutex_lock(&c->btree_trans_lock);
- list_for_each_entry(trans, &c->btree_trans_list, list) {
- struct task_struct *task = READ_ONCE(trans->locking_wait.task);
+ list_sort(&c->btree_trans_list, list_ptr_order_cmp);
- if (!task || task->pid <= i->iter)
+ list_for_each_entry(trans, &c->btree_trans_list, list) {
+ if ((ulong) trans <= i->iter)
continue;
- closure_get(&trans->ref);
- seq = seqmutex_seq(&c->btree_trans_lock);
- seqmutex_unlock(&c->btree_trans_lock);
+ i->iter = (ulong) trans;
- ret = flush_buf(i);
- if (ret) {
- closure_put(&trans->ref);
- goto unlocked;
- }
+ if (!closure_get_not_zero(&trans->ref))
+ continue;
+
+ u32 seq = seqmutex_unlock(&c->btree_trans_lock);
bch2_btree_trans_to_text(&i->buf, trans);
prt_printf(&i->buf, "backtrace:\n");
printbuf_indent_add(&i->buf, 2);
- bch2_prt_task_backtrace(&i->buf, task, 0, GFP_KERNEL);
+ bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task, 0, GFP_KERNEL);
printbuf_indent_sub(&i->buf, 2);
prt_newline(&i->buf);
- i->iter = task->pid;
-
closure_put(&trans->ref);
+ ret = flush_buf(i);
+ if (ret)
+ goto unlocked;
+
if (!seqmutex_relock(&c->btree_trans_lock, seq))
goto restart;
}
@@ -804,50 +828,55 @@ static const struct file_operations btree_transaction_stats_op = {
.read = btree_transaction_stats_read,
};
-static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
- size_t size, loff_t *ppos)
+/* walk btree transactions until we find a deadlock and print it */
+static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c)
{
- struct dump_iter *i = file->private_data;
- struct bch_fs *c = i->c;
struct btree_trans *trans;
- ssize_t ret = 0;
- u32 seq;
-
- i->ubuf = buf;
- i->size = size;
- i->ret = 0;
-
- if (i->iter)
- goto out;
+ ulong iter = 0;
restart:
seqmutex_lock(&c->btree_trans_lock);
- list_for_each_entry(trans, &c->btree_trans_list, list) {
- struct task_struct *task = READ_ONCE(trans->locking_wait.task);
+ list_sort(&c->btree_trans_list, list_ptr_order_cmp);
- if (!task || task->pid <= i->iter)
+ list_for_each_entry(trans, &c->btree_trans_list, list) {
+ if ((ulong) trans <= iter)
continue;
- closure_get(&trans->ref);
- seq = seqmutex_seq(&c->btree_trans_lock);
- seqmutex_unlock(&c->btree_trans_lock);
+ iter = (ulong) trans;
- ret = flush_buf(i);
- if (ret) {
- closure_put(&trans->ref);
- goto out;
- }
+ if (!closure_get_not_zero(&trans->ref))
+ continue;
- bch2_check_for_deadlock(trans, &i->buf);
+ u32 seq = seqmutex_unlock(&c->btree_trans_lock);
- i->iter = task->pid;
+ bool found = bch2_check_for_deadlock(trans, out) != 0;
closure_put(&trans->ref);
+ if (found)
+ return;
+
if (!seqmutex_relock(&c->btree_trans_lock, seq))
goto restart;
}
seqmutex_unlock(&c->btree_trans_lock);
-out:
+}
+
+static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct bch_fs *c = i->c;
+ ssize_t ret = 0;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ if (!i->iter) {
+ btree_deadlock_to_text(&i->buf, c);
+ i->iter++;
+ }
+
if (i->buf.allocation_failure)
ret = -ENOMEM;
diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h
index 24840aee335c..795f4fc0bab1 100644
--- a/fs/bcachefs/eytzinger.h
+++ b/fs/bcachefs/eytzinger.h
@@ -48,7 +48,7 @@ static inline unsigned eytzinger1_right_child(unsigned i)
static inline unsigned eytzinger1_first(unsigned size)
{
- return rounddown_pow_of_two(size);
+ return size ? rounddown_pow_of_two(size) : 0;
}
static inline unsigned eytzinger1_last(unsigned size)
@@ -101,7 +101,9 @@ static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
static inline unsigned eytzinger1_extra(unsigned size)
{
- return (size + 1 - rounddown_pow_of_two(size)) << 1;
+ return size
+ ? (size + 1 - rounddown_pow_of_two(size)) << 1
+ : 0;
}
static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index f9c9a95d7d4c..74a1e12a7a14 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -194,6 +194,12 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
* discard_new_inode() expects it to be set...
*/
inode->v.i_flags |= I_NEW;
+ /*
+ * We don't want bch2_evict_inode() to delete the inode on disk,
+ * we just raced and had another inode in cache. Normally new
+ * inodes don't have nlink == 0 - except tmpfiles do...
+ */
+ set_nlink(&inode->v, 1);
discard_new_inode(&inode->v);
inode = old;
} else {
@@ -2026,6 +2032,8 @@ err_put_super:
__bch2_fs_stop(c);
deactivate_locked_super(sb);
err:
+ if (ret)
+ pr_err("error: %s", bch2_err_str(ret));
/*
* On an inconsistency error in recovery we might see an -EROFS derived
* errorcode (from the journal), but we don't want to return that to
@@ -2065,7 +2073,8 @@ int __init bch2_vfs_init(void)
{
int ret = -ENOMEM;
- bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT);
+ bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT |
+ SLAB_ACCOUNT);
if (!bch2_inode_cache)
goto err;
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index c97fa7002b06..ebf39ef72fb2 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -389,7 +389,6 @@ retry:
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
- bch2_trans_unlock(trans);
if (!bch2_bkey_matches_ptr(c, k,
rbio->pick.ptr,
@@ -1004,6 +1003,9 @@ get_bio:
rbio->promote = promote;
INIT_WORK(&rbio->work, NULL);
+ if (flags & BCH_READ_NODECODE)
+ orig->pick = pick;
+
rbio->bio.bi_opf = orig->bio.bi_opf;
rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
rbio->bio.bi_end_io = bch2_read_endio;
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index dac2f498ae8b..10b19791ec98 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -1095,7 +1095,7 @@ unlock:
return ret;
}
-int bch2_dev_journal_alloc(struct bch_dev *ca)
+int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs)
{
unsigned nr;
int ret;
@@ -1117,7 +1117,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca)
min(1 << 13,
(1 << 24) / ca->mi.bucket_size));
- ret = __bch2_set_nr_journal_buckets(ca, nr, true, NULL);
+ ret = __bch2_set_nr_journal_buckets(ca, nr, new_fs, NULL);
err:
bch_err_fn(ca, ret);
return ret;
@@ -1129,7 +1129,7 @@ int bch2_fs_journal_alloc(struct bch_fs *c)
if (ca->journal.nr)
continue;
- int ret = bch2_dev_journal_alloc(ca);
+ int ret = bch2_dev_journal_alloc(ca, true);
if (ret) {
percpu_ref_put(&ca->io_ref);
return ret;
@@ -1184,9 +1184,11 @@ void bch2_fs_journal_stop(struct journal *j)
journal_quiesce(j);
cancel_delayed_work_sync(&j->write_work);
- BUG_ON(!bch2_journal_error(j) &&
- test_bit(JOURNAL_replay_done, &j->flags) &&
- j->last_empty_seq != journal_cur_seq(j));
+ WARN(!bch2_journal_error(j) &&
+ test_bit(JOURNAL_replay_done, &j->flags) &&
+ j->last_empty_seq != journal_cur_seq(j),
+ "journal shutdown error: cur seq %llu but last empty seq %llu",
+ journal_cur_seq(j), j->last_empty_seq);
if (!bch2_journal_error(j))
clear_bit(JOURNAL_running, &j->flags);
@@ -1418,8 +1420,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
unsigned long now = jiffies;
u64 nr_writes = j->nr_flush_writes + j->nr_noflush_writes;
- if (!out->nr_tabstops)
- printbuf_tabstop_push(out, 28);
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 28);
out->atomic++;
rcu_read_lock();
@@ -1521,6 +1523,11 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
struct journal_entry_pin *pin;
spin_lock(&j->lock);
+ if (!test_bit(JOURNAL_running, &j->flags)) {
+ spin_unlock(&j->lock);
+ return true;
+ }
+
*seq = max(*seq, j->pin.front);
if (*seq >= j->pin.back) {
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index fd1f7cdaa8bc..bc6b9c39dcb4 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -433,7 +433,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *);
int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
unsigned nr);
-int bch2_dev_journal_alloc(struct bch_dev *);
+int bch2_dev_journal_alloc(struct bch_dev *, bool);
int bch2_fs_journal_alloc(struct bch_fs *);
void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 492426c8d869..2326e2cb9cd2 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -415,6 +415,8 @@ static int journal_entry_btree_keys_validate(struct bch_fs *c,
flags|BCH_VALIDATE_journal);
if (ret == FSCK_DELETED_KEY)
continue;
+ else if (ret)
+ return ret;
k = bkey_next(k);
}
@@ -1677,6 +1679,13 @@ static CLOSURE_CALLBACK(journal_write_done)
mod_delayed_work(j->wq, &j->write_work, max(0L, delta));
}
+ /*
+ * We don't typically trigger journal writes from her - the next journal
+ * write will be triggered immediately after the previous one is
+ * allocated, in bch2_journal_write() - but the journal write error path
+ * is special:
+ */
+ bch2_journal_do_writes(j);
spin_unlock(&j->lock);
}
@@ -1755,11 +1764,13 @@ static CLOSURE_CALLBACK(journal_write_preflush)
if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) {
spin_lock(&j->lock);
- closure_wait(&j->async_wait, cl);
+ if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) {
+ closure_wait(&j->async_wait, cl);
+ spin_unlock(&j->lock);
+ continue_at(cl, journal_write_preflush, j->wq);
+ return;
+ }
spin_unlock(&j->lock);
-
- continue_at(cl, journal_write_preflush, j->wq);
- return;
}
if (w->separate_flush) {
diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c
index ed4846709611..1f25c111c54c 100644
--- a/fs/bcachefs/journal_seq_blacklist.c
+++ b/fs/bcachefs/journal_seq_blacklist.c
@@ -232,7 +232,7 @@ bool bch2_blacklist_entries_gc(struct bch_fs *c)
BUG_ON(nr != t->nr);
unsigned i;
- for (src = bl->start, i = eytzinger0_first(t->nr);
+ for (src = bl->start, i = t->nr == 0 ? 0 : eytzinger0_first(t->nr);
src < bl->start + nr;
src++, i = eytzinger0_next(i, nr)) {
BUG_ON(t->entries[i].start != le64_to_cpu(src->start));
diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c
index a40d116224ed..b12894ef44f3 100644
--- a/fs/bcachefs/lru.c
+++ b/fs/bcachefs/lru.c
@@ -77,6 +77,45 @@ static const char * const bch2_lru_types[] = {
NULL
};
+int bch2_lru_check_set(struct btree_trans *trans,
+ u16 lru_id, u64 time,
+ struct bkey_s_c referring_k,
+ struct bkey_buf *last_flushed)
+{
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+ struct btree_iter lru_iter;
+ struct bkey_s_c lru_k =
+ bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru,
+ lru_pos(lru_id,
+ bucket_to_u64(referring_k.k->p),
+ time), 0);
+ int ret = bkey_err(lru_k);
+ if (ret)
+ return ret;
+
+ if (lru_k.k->type != KEY_TYPE_set) {
+ ret = bch2_btree_write_buffer_maybe_flush(trans, referring_k, last_flushed);
+ if (ret)
+ goto err;
+
+ if (fsck_err(c, alloc_key_to_missing_lru_entry,
+ "missing %s lru entry\n"
+ " %s",
+ bch2_lru_types[lru_type(lru_k)],
+ (bch2_bkey_val_to_text(&buf, c, referring_k), buf.buf))) {
+ ret = bch2_lru_set(trans, lru_id, bucket_to_u64(referring_k.k->p), time);
+ if (ret)
+ goto err;
+ }
+ }
+err:
+fsck_err:
+ bch2_trans_iter_exit(trans, &lru_iter);
+ printbuf_exit(&buf);
+ return ret;
+}
+
static int bch2_check_lru_key(struct btree_trans *trans,
struct btree_iter *lru_iter,
struct bkey_s_c lru_k,
diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h
index bd71ba77de07..ed75bcf59d47 100644
--- a/fs/bcachefs/lru.h
+++ b/fs/bcachefs/lru.h
@@ -61,6 +61,9 @@ int bch2_lru_del(struct btree_trans *, u16, u64, u64);
int bch2_lru_set(struct btree_trans *, u16, u64, u64);
int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64);
+struct bkey_buf;
+int bch2_lru_check_set(struct btree_trans *, u16, u64, struct bkey_s_c, struct bkey_buf *);
+
int bch2_check_lrus(struct bch_fs *);
#endif /* _BCACHEFS_LRU_H */
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 6e477fadaa2a..e714e3bd5bbb 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -36,31 +36,6 @@ const char * const bch2_data_ops_strs[] = {
NULL
};
-static void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
- struct bch_io_opts *io_opts,
- struct data_update_opts *data_opts)
-{
- printbuf_tabstop_push(out, 20);
- prt_str(out, "rewrite ptrs:\t");
- bch2_prt_u64_base2(out, data_opts->rewrite_ptrs);
- prt_newline(out);
-
- prt_str(out, "kill ptrs:\t");
- bch2_prt_u64_base2(out, data_opts->kill_ptrs);
- prt_newline(out);
-
- prt_str(out, "target:\t");
- bch2_target_to_text(out, c, data_opts->target);
- prt_newline(out);
-
- prt_str(out, "compression:\t");
- bch2_compression_opt_to_text(out, background_compression(*io_opts));
- prt_newline(out);
-
- prt_str(out, "extra replicas:\t");
- prt_u64(out, data_opts->extra_replicas);
-}
-
static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k,
struct bch_io_opts *io_opts,
struct data_update_opts *data_opts)
diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c
index bda33e59e226..c1270d790e43 100644
--- a/fs/bcachefs/sb-errors.c
+++ b/fs/bcachefs/sb-errors.c
@@ -110,19 +110,25 @@ out:
void bch2_sb_errors_from_cpu(struct bch_fs *c)
{
bch_sb_errors_cpu *src = &c->fsck_error_counts;
- struct bch_sb_field_errors *dst =
- bch2_sb_field_resize(&c->disk_sb, errors,
- bch2_sb_field_errors_u64s(src->nr));
+ struct bch_sb_field_errors *dst;
unsigned i;
+ mutex_lock(&c->fsck_error_counts_lock);
+
+ dst = bch2_sb_field_resize(&c->disk_sb, errors,
+ bch2_sb_field_errors_u64s(src->nr));
+
if (!dst)
- return;
+ goto err;
for (i = 0; i < src->nr; i++) {
SET_BCH_SB_ERROR_ENTRY_ID(&dst->entries[i], src->data[i].id);
SET_BCH_SB_ERROR_ENTRY_NR(&dst->entries[i], src->data[i].nr);
dst->entries[i].last_error_time = cpu_to_le64(src->data[i].last_error_time);
}
+
+err:
+ mutex_unlock(&c->fsck_error_counts_lock);
}
static int bch2_sb_errors_to_cpu(struct bch_fs *c)
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index d6f35a99c429..d54121ec093f 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -286,7 +286,8 @@ enum bch_fsck_flags {
x(accounting_mismatch, 272, 0) \
x(accounting_replicas_not_marked, 273, 0) \
x(invalid_btree_id, 274, 0) \
- x(alloc_key_io_time_bad, 275, 0)
+ x(alloc_key_io_time_bad, 275, 0) \
+ x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/seqmutex.h b/fs/bcachefs/seqmutex.h
index c1860d8163fb..c4b3d8d3f414 100644
--- a/fs/bcachefs/seqmutex.h
+++ b/fs/bcachefs/seqmutex.h
@@ -19,17 +19,14 @@ static inline bool seqmutex_trylock(struct seqmutex *lock)
static inline void seqmutex_lock(struct seqmutex *lock)
{
mutex_lock(&lock->lock);
-}
-
-static inline void seqmutex_unlock(struct seqmutex *lock)
-{
lock->seq++;
- mutex_unlock(&lock->lock);
}
-static inline u32 seqmutex_seq(struct seqmutex *lock)
+static inline u32 seqmutex_unlock(struct seqmutex *lock)
{
- return lock->seq;
+ u32 seq = lock->seq;
+ mutex_unlock(&lock->lock);
+ return seq;
}
static inline bool seqmutex_relock(struct seqmutex *lock, u32 seq)
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 4ef98e696673..24023d6a9698 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -168,6 +168,9 @@ static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1));
size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]);
+ if (unlikely(new_bytes > INT_MAX))
+ return NULL;
+
new = kvzalloc(new_bytes, GFP_KERNEL);
if (!new)
return NULL;
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 9083df82073a..da735608d47c 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -536,7 +536,6 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
bch2_free_pending_node_rewrites(c);
- bch2_fs_allocator_background_exit(c);
bch2_fs_sb_errors_exit(c);
bch2_fs_counters_exit(c);
bch2_fs_snapshots_exit(c);
@@ -564,8 +563,11 @@ static void __bch2_fs_free(struct bch_fs *c)
BUG_ON(atomic_read(&c->journal_keys.ref));
bch2_fs_btree_write_buffer_exit(c);
percpu_free_rwsem(&c->mark_lock);
- EBUG_ON(c->online_reserved && percpu_u64_get(c->online_reserved));
- free_percpu(c->online_reserved);
+ if (c->online_reserved) {
+ u64 v = percpu_u64_get(c->online_reserved);
+ WARN(v, "online_reserved not 0 at shutdown: %lli", v);
+ free_percpu(c->online_reserved);
+ }
darray_exit(&c->btree_roots_extra);
free_percpu(c->pcpu);
@@ -1195,6 +1197,7 @@ static void bch2_dev_free(struct bch_dev *ca)
kfree(ca->buckets_nouse);
bch2_free_super(&ca->disk_sb);
+ bch2_dev_allocator_background_exit(ca);
bch2_dev_journal_exit(ca);
free_percpu(ca->io_done);
@@ -1317,6 +1320,8 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
atomic_long_set(&ca->ref, 1);
#endif
+ bch2_dev_allocator_background_init(ca);
+
if (percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
!(ca->sb_read_scratch = (void *) __get_free_page(GFP_KERNEL)) ||
@@ -1529,6 +1534,7 @@ static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
* The allocator thread itself allocates btree nodes, so stop it first:
*/
bch2_dev_allocator_remove(c, ca);
+ bch2_recalc_capacity(c);
bch2_dev_journal_stop(&c->journal, ca);
}
@@ -1540,6 +1546,7 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
+ bch2_dev_do_discards(ca);
}
int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
@@ -1765,7 +1772,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
if (ret)
goto err;
- ret = bch2_dev_journal_alloc(ca);
+ ret = bch2_dev_journal_alloc(ca, true);
bch_err_msg(c, ret, "allocating journal");
if (ret)
goto err;
@@ -1925,7 +1932,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
}
if (!ca->journal.nr) {
- ret = bch2_dev_journal_alloc(ca);
+ ret = bch2_dev_journal_alloc(ca, false);
bch_err_msg(ca, ret, "allocating journal");
if (ret)
goto err;
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 1a66be33bb04..60066822b532 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1924,8 +1924,17 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
next:
if (ret) {
/* Refcount held by the reclaim_bgs list after splice. */
- btrfs_get_block_group(bg);
- list_add_tail(&bg->bg_list, &retry_list);
+ spin_lock(&fs_info->unused_bgs_lock);
+ /*
+ * This block group might be added to the unused list
+ * during the above process. Move it back to the
+ * reclaim list otherwise.
+ */
+ if (list_empty(&bg->bg_list)) {
+ btrfs_get_block_group(bg);
+ list_add_tail(&bg->bg_list, &retry_list);
+ }
+ spin_unlock(&fs_info->unused_bgs_lock);
}
btrfs_put_block_group(bg);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f688fab55251..958155cc43a8 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3553,7 +3553,7 @@ err:
for (int i = 0; i < num_folios; i++) {
if (eb->folios[i]) {
detach_extent_buffer_folio(eb, eb->folios[i]);
- __folio_put(eb->folios[i]);
+ folio_put(eb->folios[i]);
}
}
__free_extent_buffer(eb);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 3ab8dea5036b..dabc3d0793cf 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2697,7 +2697,7 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
u64 offset = bytenr - block_group->start;
u64 to_free, to_unusable;
int bg_reclaim_threshold = 0;
- bool initial = (size == block_group->length);
+ bool initial = ((size == block_group->length) && (block_group->alloc_offset == 0));
u64 reclaimable_unusable;
WARN_ON(!initial && offset + size > block_group->zone_capacity);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 753db965f7c0..3a2b902b2d1f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -10385,7 +10385,7 @@ out_unlock:
out_folios:
for (i = 0; i < nr_folios; i++) {
if (folios[i])
- __folio_put(folios[i]);
+ folio_put(folios[i]);
}
kvfree(folios);
out:
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index fc2a7ea26354..39a15cca58ca 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1351,7 +1351,7 @@ static int flush_reservations(struct btrfs_fs_info *fs_info)
int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
{
- struct btrfs_root *quota_root;
+ struct btrfs_root *quota_root = NULL;
struct btrfs_trans_handle *trans = NULL;
int ret = 0;
@@ -1449,9 +1449,9 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
btrfs_free_tree_block(trans, btrfs_root_id(quota_root),
quota_root->node, 0, 1);
- btrfs_put_root(quota_root);
out:
+ btrfs_put_root(quota_root);
mutex_unlock(&fs_info->qgroup_ioctl_lock);
if (ret && trans)
btrfs_end_transaction(trans);
@@ -3062,8 +3062,6 @@ int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_inherit *inherit,
size_t size)
{
- if (!btrfs_qgroup_enabled(fs_info))
- return 0;
if (inherit->flags & ~BTRFS_QGROUP_INHERIT_FLAGS_SUPP)
return -EOPNOTSUPP;
if (size < sizeof(*inherit) || size > PAGE_SIZE)
@@ -3085,6 +3083,14 @@ int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info,
return -EINVAL;
/*
+ * Skip the inherit source qgroups check if qgroup is not enabled.
+ * Qgroup can still be later enabled causing problems, but in that case
+ * btrfs_qgroup_inherit() would just ignore those invalid ones.
+ */
+ if (!btrfs_qgroup_enabled(fs_info))
+ return 0;
+
+ /*
* Now check all the remaining qgroups, they should all:
*
* - Exist
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index cf531255ab76..9522a8b79d22 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -441,7 +441,8 @@ static int process_extent_item(struct btrfs_fs_info *fs_info,
u32 item_size = btrfs_item_size(leaf, slot);
unsigned long end, ptr;
u64 offset, flags, count;
- int type, ret;
+ int type;
+ int ret = 0;
ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
flags = btrfs_extent_flags(leaf, ei);
@@ -486,7 +487,11 @@ static int process_extent_item(struct btrfs_fs_info *fs_info,
key->objectid, key->offset);
break;
case BTRFS_EXTENT_OWNER_REF_KEY:
- WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA));
+ if (!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)) {
+ btrfs_err(fs_info,
+ "found extent owner ref without simple quotas enabled");
+ ret = -EINVAL;
+ }
break;
default:
btrfs_err(fs_info, "invalid key type in iref");
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index afd6932f5e89..d7caa3732f07 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1688,20 +1688,24 @@ static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx,
(i << fs_info->sectorsize_bits);
int err;
- bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ,
- fs_info, scrub_read_endio, stripe);
- bbio->bio.bi_iter.bi_sector = logical >> SECTOR_SHIFT;
-
io_stripe.is_scrub = true;
+ stripe_len = (nr_sectors - i) << fs_info->sectorsize_bits;
+ /*
+ * For RST cases, we need to manually split the bbio to
+ * follow the RST boundary.
+ */
err = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
- &stripe_len, &bioc, &io_stripe,
- &mirror);
+ &stripe_len, &bioc, &io_stripe, &mirror);
btrfs_put_bioc(bioc);
- if (err) {
- btrfs_bio_end_io(bbio,
- errno_to_blk_status(err));
- return;
+ if (err < 0) {
+ set_bit(i, &stripe->io_error_bitmap);
+ set_bit(i, &stripe->error_bitmap);
+ continue;
}
+
+ bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ,
+ fs_info, scrub_read_endio, stripe);
+ bbio->bio.bi_iter.bi_sector = logical >> SECTOR_SHIFT;
}
__bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index d620323d08ea..ae8c56442549 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -373,11 +373,18 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
* "optimal" chunk size based on the fs size. However when we actually
* allocate the chunk we will strip this down further, making it no more
* than 10% of the disk or 1G, whichever is smaller.
+ *
+ * On the zoned mode, we need to use zone_size (=
+ * data_sinfo->chunk_size) as it is.
*/
data_sinfo = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
- data_chunk_size = min(data_sinfo->chunk_size,
- mult_perc(fs_info->fs_devices->total_rw_bytes, 10));
- data_chunk_size = min_t(u64, data_chunk_size, SZ_1G);
+ if (!btrfs_is_zoned(fs_info)) {
+ data_chunk_size = min(data_sinfo->chunk_size,
+ mult_perc(fs_info->fs_devices->total_rw_bytes, 10));
+ data_chunk_size = min_t(u64, data_chunk_size, SZ_1G);
+ } else {
+ data_chunk_size = data_sinfo->chunk_size;
+ }
/*
* Since data allocations immediately use block groups as part of the
@@ -405,6 +412,17 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
avail >>= 3;
else
avail >>= 1;
+
+ /*
+ * On the zoned mode, we always allocate one zone as one chunk.
+ * Returning non-zone size alingned bytes here will result in
+ * less pressure for the async metadata reclaim process, and it
+ * will over-commit too much leading to ENOSPC. Align down to the
+ * zone size to avoid that.
+ */
+ if (btrfs_is_zoned(fs_info))
+ avail = ALIGN_DOWN(avail, fs_info->zone_size);
+
return avail;
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 26a2e5aa08e9..0bce1d45e252 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -138,6 +138,25 @@ static void wait_log_commit(struct btrfs_root *root, int transid);
* and once to do all the other items.
*/
+static struct inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *root)
+{
+ unsigned int nofs_flag;
+ struct inode *inode;
+
+ /*
+ * We're holding a transaction handle whether we are logging or
+ * replaying a log tree, so we must make sure NOFS semantics apply
+ * because btrfs_alloc_inode() may be triggered and it uses GFP_KERNEL
+ * to allocate an inode, which can recurse back into the filesystem and
+ * attempt a transaction commit, resulting in a deadlock.
+ */
+ nofs_flag = memalloc_nofs_save();
+ inode = btrfs_iget(root->fs_info->sb, objectid, root);
+ memalloc_nofs_restore(nofs_flag);
+
+ return inode;
+}
+
/*
* start a sub transaction and setup the log tree
* this increments the log tree writer count to make the people
@@ -600,7 +619,7 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root,
{
struct inode *inode;
- inode = btrfs_iget(root->fs_info->sb, objectid, root);
+ inode = btrfs_iget_logging(objectid, root);
if (IS_ERR(inode))
inode = NULL;
return inode;
@@ -5438,7 +5457,6 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
struct btrfs_log_ctx *ctx)
{
struct btrfs_root *root = start_inode->root;
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_path *path;
LIST_HEAD(dir_list);
struct btrfs_dir_list *dir_elem;
@@ -5499,7 +5517,7 @@ again:
continue;
btrfs_release_path(path);
- di_inode = btrfs_iget(fs_info->sb, di_key.objectid, root);
+ di_inode = btrfs_iget_logging(di_key.objectid, root);
if (IS_ERR(di_inode)) {
ret = PTR_ERR(di_inode);
goto out;
@@ -5559,7 +5577,7 @@ again:
btrfs_add_delayed_iput(curr_inode);
curr_inode = NULL;
- vfs_inode = btrfs_iget(fs_info->sb, ino, root);
+ vfs_inode = btrfs_iget_logging(ino, root);
if (IS_ERR(vfs_inode)) {
ret = PTR_ERR(vfs_inode);
break;
@@ -5654,7 +5672,7 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans,
if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES)
return BTRFS_LOG_FORCE_COMMIT;
- inode = btrfs_iget(root->fs_info->sb, ino, root);
+ inode = btrfs_iget_logging(ino, root);
/*
* If the other inode that had a conflicting dir entry was deleted in
* the current transaction then we either:
@@ -5755,7 +5773,6 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_log_ctx *ctx)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
int ret = 0;
/*
@@ -5786,7 +5803,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
list_del(&curr->list);
kfree(curr);
- inode = btrfs_iget(fs_info->sb, ino, root);
+ inode = btrfs_iget_logging(ino, root);
/*
* If the other inode that had a conflicting dir entry was
* deleted in the current transaction, we need to log its parent
@@ -5797,7 +5814,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
if (ret != -ENOENT)
break;
- inode = btrfs_iget(fs_info->sb, parent, root);
+ inode = btrfs_iget_logging(parent, root);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
break;
@@ -6319,7 +6336,6 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans,
struct btrfs_log_ctx *ctx)
{
const bool orig_log_new_dentries = ctx->log_new_dentries;
- struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_item *item;
int ret = 0;
@@ -6345,7 +6361,7 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans,
if (key.type == BTRFS_ROOT_ITEM_KEY)
continue;
- di_inode = btrfs_iget(fs_info->sb, key.objectid, inode->root);
+ di_inode = btrfs_iget_logging(key.objectid, inode->root);
if (IS_ERR(di_inode)) {
ret = PTR_ERR(di_inode);
break;
@@ -6729,7 +6745,6 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_log_ctx *ctx)
{
- struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
struct btrfs_path *path;
struct btrfs_key key;
@@ -6794,8 +6809,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
cur_offset = item_size;
}
- dir_inode = btrfs_iget(fs_info->sb, inode_key.objectid,
- root);
+ dir_inode = btrfs_iget_logging(inode_key.objectid, root);
/*
* If the parent inode was deleted, return an error to
* fallback to a transaction commit. This is to prevent
@@ -6857,7 +6871,6 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
while (true) {
- struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *leaf;
int slot;
struct btrfs_key search_key;
@@ -6872,7 +6885,7 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
search_key.objectid = found_key.offset;
search_key.type = BTRFS_INODE_ITEM_KEY;
search_key.offset = 0;
- inode = btrfs_iget(fs_info->sb, ino, root);
+ inode = btrfs_iget_logging(ino, root);
if (IS_ERR(inode))
return PTR_ERR(inode);
diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c
index f449f7340aad..9fb06dc16520 100644
--- a/fs/cachefiles/cache.c
+++ b/fs/cachefiles/cache.c
@@ -8,6 +8,7 @@
#include <linux/slab.h>
#include <linux/statfs.h>
#include <linux/namei.h>
+#include <trace/events/fscache.h>
#include "internal.h"
/*
@@ -312,19 +313,59 @@ static void cachefiles_withdraw_objects(struct cachefiles_cache *cache)
}
/*
- * Withdraw volumes.
+ * Withdraw fscache volumes.
+ */
+static void cachefiles_withdraw_fscache_volumes(struct cachefiles_cache *cache)
+{
+ struct list_head *cur;
+ struct cachefiles_volume *volume;
+ struct fscache_volume *vcookie;
+
+ _enter("");
+retry:
+ spin_lock(&cache->object_list_lock);
+ list_for_each(cur, &cache->volumes) {
+ volume = list_entry(cur, struct cachefiles_volume, cache_link);
+
+ if (atomic_read(&volume->vcookie->n_accesses) == 0)
+ continue;
+
+ vcookie = fscache_try_get_volume(volume->vcookie,
+ fscache_volume_get_withdraw);
+ if (vcookie) {
+ spin_unlock(&cache->object_list_lock);
+ fscache_withdraw_volume(vcookie);
+ fscache_put_volume(vcookie, fscache_volume_put_withdraw);
+ goto retry;
+ }
+ }
+ spin_unlock(&cache->object_list_lock);
+
+ _leave("");
+}
+
+/*
+ * Withdraw cachefiles volumes.
*/
static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache)
{
_enter("");
for (;;) {
+ struct fscache_volume *vcookie = NULL;
struct cachefiles_volume *volume = NULL;
spin_lock(&cache->object_list_lock);
if (!list_empty(&cache->volumes)) {
volume = list_first_entry(&cache->volumes,
struct cachefiles_volume, cache_link);
+ vcookie = fscache_try_get_volume(volume->vcookie,
+ fscache_volume_get_withdraw);
+ if (!vcookie) {
+ spin_unlock(&cache->object_list_lock);
+ cpu_relax();
+ continue;
+ }
list_del_init(&volume->cache_link);
}
spin_unlock(&cache->object_list_lock);
@@ -332,6 +373,7 @@ static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache)
break;
cachefiles_withdraw_volume(volume);
+ fscache_put_volume(vcookie, fscache_volume_put_withdraw);
}
_leave("");
@@ -371,6 +413,7 @@ void cachefiles_withdraw_cache(struct cachefiles_cache *cache)
pr_info("File cache on %s unregistering\n", fscache->name);
fscache_withdraw_cache(fscache);
+ cachefiles_withdraw_fscache_volumes(cache);
/* we now have to destroy all the active objects pertaining to this
* cache - which we do by passing them off to thread pool to be
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index 06cdf1a8a16f..89b11336a836 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -366,14 +366,14 @@ static __poll_t cachefiles_daemon_poll(struct file *file,
if (cachefiles_in_ondemand_mode(cache)) {
if (!xa_empty(&cache->reqs)) {
- rcu_read_lock();
+ xas_lock(&xas);
xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) {
if (!cachefiles_ondemand_is_reopening_read(req)) {
mask |= EPOLLIN;
break;
}
}
- rcu_read_unlock();
+ xas_unlock(&xas);
}
} else {
if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags))
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 6845a90cdfcc..7b99bd98de75 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -48,6 +48,7 @@ enum cachefiles_object_state {
CACHEFILES_ONDEMAND_OBJSTATE_CLOSE, /* Anonymous fd closed by daemon or initial state */
CACHEFILES_ONDEMAND_OBJSTATE_OPEN, /* Anonymous fd associated with object is available */
CACHEFILES_ONDEMAND_OBJSTATE_REOPENING, /* Object that was closed and is being reopened. */
+ CACHEFILES_ONDEMAND_OBJSTATE_DROPPING, /* Object is being dropped. */
};
struct cachefiles_ondemand_info {
@@ -128,6 +129,7 @@ struct cachefiles_cache {
unsigned long req_id_next;
struct xarray ondemand_ids; /* xarray for ondemand_id allocation */
u32 ondemand_id_next;
+ u32 msg_id_next;
};
static inline bool cachefiles_in_ondemand_mode(struct cachefiles_cache *cache)
@@ -335,6 +337,7 @@ cachefiles_ondemand_set_object_##_state(struct cachefiles_object *object) \
CACHEFILES_OBJECT_STATE_FUNCS(open, OPEN);
CACHEFILES_OBJECT_STATE_FUNCS(close, CLOSE);
CACHEFILES_OBJECT_STATE_FUNCS(reopening, REOPENING);
+CACHEFILES_OBJECT_STATE_FUNCS(dropping, DROPPING);
static inline bool cachefiles_ondemand_is_reopening_read(struct cachefiles_req *req)
{
diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
index bce005f2b456..470c96658385 100644
--- a/fs/cachefiles/ondemand.c
+++ b/fs/cachefiles/ondemand.c
@@ -517,7 +517,8 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object,
*/
xas_lock(&xas);
- if (test_bit(CACHEFILES_DEAD, &cache->flags)) {
+ if (test_bit(CACHEFILES_DEAD, &cache->flags) ||
+ cachefiles_ondemand_object_is_dropping(object)) {
xas_unlock(&xas);
ret = -EIO;
goto out;
@@ -527,20 +528,32 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object,
smp_mb();
if (opcode == CACHEFILES_OP_CLOSE &&
- !cachefiles_ondemand_object_is_open(object)) {
+ !cachefiles_ondemand_object_is_open(object)) {
WARN_ON_ONCE(object->ondemand->ondemand_id == 0);
xas_unlock(&xas);
ret = -EIO;
goto out;
}
- xas.xa_index = 0;
+ /*
+ * Cyclically find a free xas to avoid msg_id reuse that would
+ * cause the daemon to successfully copen a stale msg_id.
+ */
+ xas.xa_index = cache->msg_id_next;
xas_find_marked(&xas, UINT_MAX, XA_FREE_MARK);
+ if (xas.xa_node == XAS_RESTART) {
+ xas.xa_index = 0;
+ xas_find_marked(&xas, cache->msg_id_next - 1, XA_FREE_MARK);
+ }
if (xas.xa_node == XAS_RESTART)
xas_set_err(&xas, -EBUSY);
+
xas_store(&xas, req);
- xas_clear_mark(&xas, XA_FREE_MARK);
- xas_set_mark(&xas, CACHEFILES_REQ_NEW);
+ if (xas_valid(&xas)) {
+ cache->msg_id_next = xas.xa_index + 1;
+ xas_clear_mark(&xas, XA_FREE_MARK);
+ xas_set_mark(&xas, CACHEFILES_REQ_NEW);
+ }
xas_unlock(&xas);
} while (xas_nomem(&xas, GFP_KERNEL));
@@ -568,7 +581,8 @@ out:
* If error occurs after creating the anonymous fd,
* cachefiles_ondemand_fd_release() will set object to close.
*/
- if (opcode == CACHEFILES_OP_OPEN)
+ if (opcode == CACHEFILES_OP_OPEN &&
+ !cachefiles_ondemand_object_is_dropping(object))
cachefiles_ondemand_set_object_close(object);
kfree(req);
return ret;
@@ -667,8 +681,34 @@ int cachefiles_ondemand_init_object(struct cachefiles_object *object)
void cachefiles_ondemand_clean_object(struct cachefiles_object *object)
{
+ unsigned long index;
+ struct cachefiles_req *req;
+ struct cachefiles_cache *cache;
+
+ if (!object->ondemand)
+ return;
+
cachefiles_ondemand_send_req(object, CACHEFILES_OP_CLOSE, 0,
cachefiles_ondemand_init_close_req, NULL);
+
+ if (!object->ondemand->ondemand_id)
+ return;
+
+ /* Cancel all requests for the object that is being dropped. */
+ cache = object->volume->cache;
+ xa_lock(&cache->reqs);
+ cachefiles_ondemand_set_object_dropping(object);
+ xa_for_each(&cache->reqs, index, req) {
+ if (req->object == object) {
+ req->error = -EIO;
+ complete(&req->done);
+ __xa_erase(&cache->reqs, index);
+ }
+ }
+ xa_unlock(&cache->reqs);
+
+ /* Wait for ondemand_object_worker() to finish to avoid UAF. */
+ cancel_work_sync(&object->ondemand->ondemand_work);
}
int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object,
diff --git a/fs/cachefiles/volume.c b/fs/cachefiles/volume.c
index 89df0ba8ba5e..781aac4ef274 100644
--- a/fs/cachefiles/volume.c
+++ b/fs/cachefiles/volume.c
@@ -133,7 +133,6 @@ void cachefiles_free_volume(struct fscache_volume *vcookie)
void cachefiles_withdraw_volume(struct cachefiles_volume *volume)
{
- fscache_withdraw_volume(volume->vcookie);
cachefiles_set_volume_xattr(volume);
__cachefiles_free_volume(volume);
}
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index bcb6173943ee..4dd8a993c60a 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -110,9 +110,11 @@ int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file
if (xlen == 0)
xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, tlen);
if (xlen != tlen) {
- if (xlen < 0)
+ if (xlen < 0) {
+ ret = xlen;
trace_cachefiles_vfs_error(object, file_inode(file), xlen,
cachefiles_trace_getxattr_error);
+ }
if (xlen == -EIO)
cachefiles_io_error_obj(
object,
@@ -252,6 +254,7 @@ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume)
xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, len);
if (xlen != len) {
if (xlen < 0) {
+ ret = xlen;
trace_cachefiles_vfs_error(NULL, d_inode(dentry), xlen,
cachefiles_trace_getxattr_error);
if (xlen == -EIO)
diff --git a/fs/dcache.c b/fs/dcache.c
index 407095188f83..4c144519aa70 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -355,7 +355,11 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry)
flags &= ~DCACHE_ENTRY_TYPE;
WRITE_ONCE(dentry->d_flags, flags);
dentry->d_inode = NULL;
- if (flags & DCACHE_LRU_LIST)
+ /*
+ * The negative counter only tracks dentries on the LRU. Don't inc if
+ * d_lru is on another list.
+ */
+ if ((flags & (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST)
this_cpu_inc(nr_dentry_negative);
}
@@ -1844,9 +1848,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
spin_lock(&dentry->d_lock);
/*
- * Decrement negative dentry count if it was in the LRU list.
+ * The negative counter only tracks dentries on the LRU. Don't dec if
+ * d_lru is on another list.
*/
- if (dentry->d_flags & DCACHE_LRU_LIST)
+ if ((dentry->d_flags &
+ (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST)
this_cpu_dec(nr_dentry_negative);
hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
raw_write_seqcount_begin(&dentry->d_seq);
@@ -3029,28 +3035,25 @@ EXPORT_SYMBOL(d_splice_alias);
bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
{
- bool result;
+ bool subdir;
unsigned seq;
if (new_dentry == old_dentry)
return true;
- do {
- /* for restarting inner loop in case of seq retry */
- seq = read_seqbegin(&rename_lock);
- /*
- * Need rcu_readlock to protect against the d_parent trashing
- * due to d_move
- */
- rcu_read_lock();
- if (d_ancestor(old_dentry, new_dentry))
- result = true;
- else
- result = false;
- rcu_read_unlock();
- } while (read_seqretry(&rename_lock, seq));
-
- return result;
+ /* Access d_parent under rcu as d_move() may change it. */
+ rcu_read_lock();
+ seq = read_seqbegin(&rename_lock);
+ subdir = d_ancestor(old_dentry, new_dentry);
+ /* Try lockless once... */
+ if (read_seqretry(&rename_lock, seq)) {
+ /* ...else acquire lock for progress even on deep chains. */
+ read_seqlock_excl(&rename_lock);
+ subdir = d_ancestor(old_dentry, new_dentry);
+ read_sequnlock_excl(&rename_lock);
+ }
+ rcu_read_unlock();
+ return subdir;
}
EXPORT_SYMBOL(is_subdir);
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index c93bd24d2771..1b91d9513013 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -343,7 +343,7 @@ static int erofs_read_superblock(struct super_block *sb)
sbi->build_time = le64_to_cpu(dsb->build_time);
sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec);
- memcpy(&sb->s_uuid, dsb->uuid, sizeof(dsb->uuid));
+ super_set_uuid(sb, (void *)dsb->uuid, sizeof(dsb->uuid));
ret = strscpy(sbi->volume_name, dsb->volume_name,
sizeof(dsb->volume_name));
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 9b248ee5fef2..74d3d7bffcf3 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -711,6 +711,8 @@ int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
err = z_erofs_do_map_blocks(inode, map, flags);
out:
+ if (err)
+ map->m_llen = 0;
trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
return err;
}
diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c
index 036024bce9f7..b80f612867c2 100644
--- a/fs/erofs/zutil.c
+++ b/fs/erofs/zutil.c
@@ -148,7 +148,7 @@ int __init z_erofs_gbuf_init(void)
void z_erofs_gbuf_exit(void)
{
- int i;
+ int i, j;
for (i = 0; i < z_erofs_gbuf_count + (!!z_erofs_rsvbuf); ++i) {
struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i];
@@ -161,9 +161,9 @@ void z_erofs_gbuf_exit(void)
if (!gbuf->pages)
continue;
- for (i = 0; i < gbuf->nrpages; ++i)
- if (gbuf->pages[i])
- put_page(gbuf->pages[i]);
+ for (j = 0; j < gbuf->nrpages; ++j)
+ if (gbuf->pages[j])
+ put_page(gbuf->pages[j]);
kfree(gbuf->pages);
gbuf->pages = NULL;
}
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index 5a400259ae74..9a1a93e3888b 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -696,7 +696,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
return err;
}
- strbuf = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN +
+ strbuf = kzalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN +
XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL);
if (!strbuf) {
res = -ENOMEM;
diff --git a/fs/locks.c b/fs/locks.c
index 90c8746874de..bdd94c32256f 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1367,9 +1367,9 @@ retry:
locks_wake_up_blocks(&left->c);
}
out:
+ trace_posix_lock_inode(inode, request, error);
spin_unlock(&ctx->flc_lock);
percpu_up_read(&file_rwsem);
- trace_posix_lock_inode(inode, request, error);
/*
* Free any unused locks.
*/
@@ -2448,8 +2448,9 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
error = do_lock_file_wait(filp, cmd, file_lock);
/*
- * Attempt to detect a close/fcntl race and recover by releasing the
- * lock that was just acquired. There is no need to do that when we're
+ * Detect close/fcntl races and recover by zapping all POSIX locks
+ * associated with this file and our files_struct, just like on
+ * filp_flush(). There is no need to do that when we're
* unlocking though, or for OFD locks.
*/
if (!error && file_lock->c.flc_type != F_UNLCK &&
@@ -2464,9 +2465,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
f = files_lookup_fd_locked(files, fd);
spin_unlock(&files->file_lock);
if (f != filp) {
- file_lock->c.flc_type = F_UNLCK;
- error = do_lock_file_wait(filp, cmd, file_lock);
- WARN_ON_ONCE(error);
+ locks_remove_posix(filp, files);
error = -EBADF;
}
}
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index d6031acc34f0..a944a0f17b53 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -213,8 +213,7 @@ static int minix_rename(struct mnt_idmap *idmap,
if (!new_de)
goto out_dir;
err = minix_set_link(new_de, new_page, old_inode);
- kunmap(new_page);
- put_page(new_page);
+ unmap_and_put_page(new_page, new_de);
if (err)
goto out_dir;
inode_set_ctime_current(new_inode);
diff --git a/fs/namei.c b/fs/namei.c
index 37fb0a8aa09a..1e05a0f3f04d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3572,8 +3572,12 @@ static const char *open_last_lookups(struct nameidata *nd,
else
inode_lock_shared(dir->d_inode);
dentry = lookup_open(nd, file, op, got_write);
- if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED))
- fsnotify_create(dir->d_inode, dentry);
+ if (!IS_ERR(dentry)) {
+ if (file->f_mode & FMODE_CREATED)
+ fsnotify_create(dir->d_inode, dentry);
+ if (file->f_mode & FMODE_OPENED)
+ fsnotify_open(file);
+ }
if (open_flag & O_CREAT)
inode_unlock(dir->d_inode);
else
@@ -3700,6 +3704,8 @@ int vfs_tmpfile(struct mnt_idmap *idmap,
mode = vfs_prepare_mode(idmap, dir, mode, mode, mode);
error = dir->i_op->tmpfile(idmap, dir, file, mode);
dput(child);
+ if (file->f_mode & FMODE_OPENED)
+ fsnotify_open(file);
if (error)
return error;
/* Don't check for other permissions, the inode was just created */
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index a6bb03bea920..4c0401dbbfcf 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -117,7 +117,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
if (folio->index == rreq->no_unlock_folio &&
test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
- _debug("no unlock");
+ kdebug("no unlock");
else
folio_unlock(folio);
}
@@ -204,7 +204,7 @@ void netfs_readahead(struct readahead_control *ractl)
struct netfs_inode *ctx = netfs_inode(ractl->mapping->host);
int ret;
- _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
+ kenter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
if (readahead_count(ractl) == 0)
return;
@@ -268,7 +268,7 @@ int netfs_read_folio(struct file *file, struct folio *folio)
struct folio *sink = NULL;
int ret;
- _enter("%lx", folio->index);
+ kenter("%lx", folio->index);
rreq = netfs_alloc_request(mapping, file,
folio_file_pos(folio), folio_size(folio),
@@ -508,7 +508,7 @@ retry:
have_folio:
*_folio = folio;
- _leave(" = 0");
+ kleave(" = 0");
return 0;
error_put:
@@ -518,7 +518,7 @@ error:
folio_unlock(folio);
folio_put(folio);
}
- _leave(" = %d", ret);
+ kleave(" = %d", ret);
return ret;
}
EXPORT_SYMBOL(netfs_write_begin);
@@ -536,7 +536,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
size_t flen = folio_size(folio);
int ret;
- _enter("%zx @%llx", flen, start);
+ kenter("%zx @%llx", flen, start);
ret = -ENOMEM;
@@ -567,7 +567,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
error_put:
netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
error:
- _leave(" = %d", ret);
+ kleave(" = %d", ret);
return ret;
}
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index 07bc1fd43530..ecbc99ec7d36 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -56,7 +56,7 @@ static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx,
struct netfs_group *group = netfs_folio_group(folio);
loff_t pos = folio_file_pos(folio);
- _enter("");
+ kenter("");
if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE)
return NETFS_FLUSH_CONTENT;
@@ -272,12 +272,12 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
*/
howto = netfs_how_to_modify(ctx, file, folio, netfs_group,
flen, offset, part, maybe_trouble);
- _debug("howto %u", howto);
+ kdebug("howto %u", howto);
switch (howto) {
case NETFS_JUST_PREFETCH:
ret = netfs_prefetch_for_write(file, folio, offset, part);
if (ret < 0) {
- _debug("prefetch = %zd", ret);
+ kdebug("prefetch = %zd", ret);
goto error_folio_unlock;
}
break;
@@ -418,7 +418,7 @@ out:
}
iocb->ki_pos += written;
- _leave(" = %zd [%zd]", written, ret);
+ kleave(" = %zd [%zd]", written, ret);
return written ? written : ret;
error_folio_unlock:
@@ -491,7 +491,7 @@ ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct netfs_inode *ictx = netfs_inode(inode);
ssize_t ret;
- _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
+ kenter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
if (!iov_iter_count(from))
return 0;
@@ -523,17 +523,23 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
struct netfs_group *group;
struct folio *folio = page_folio(vmf->page);
struct file *file = vmf->vma->vm_file;
+ struct address_space *mapping = file->f_mapping;
struct inode *inode = file_inode(file);
struct netfs_inode *ictx = netfs_inode(inode);
vm_fault_t ret = VM_FAULT_RETRY;
int err;
- _enter("%lx", folio->index);
+ kenter("%lx", folio->index);
sb_start_pagefault(inode->i_sb);
if (folio_lock_killable(folio) < 0)
goto out;
+ if (folio->mapping != mapping) {
+ folio_unlock(folio);
+ ret = VM_FAULT_NOPAGE;
+ goto out;
+ }
if (folio_wait_writeback_killable(folio)) {
ret = VM_FAULT_LOCKED;
@@ -549,9 +555,9 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
group = netfs_folio_group(folio);
if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) {
folio_unlock(folio);
- err = filemap_fdatawait_range(inode->i_mapping,
- folio_pos(folio),
- folio_pos(folio) + folio_size(folio));
+ err = filemap_fdatawrite_range(mapping,
+ folio_pos(folio),
+ folio_pos(folio) + folio_size(folio));
switch (err) {
case 0:
ret = VM_FAULT_RETRY;
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
index 10a1e4da6bda..b6debac6205f 100644
--- a/fs/netfs/direct_read.c
+++ b/fs/netfs/direct_read.c
@@ -33,7 +33,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
size_t orig_count = iov_iter_count(iter);
bool async = !is_sync_kiocb(iocb);
- _enter("");
+ kenter("");
if (!orig_count)
return 0; /* Don't update atime */
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index e14cd53ac9fd..792ef17bae21 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -37,7 +37,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
size_t len = iov_iter_count(iter);
bool async = !is_sync_kiocb(iocb);
- _enter("");
+ kenter("");
/* We're going to need a bounce buffer if what we transmit is going to
* be different in some way to the source buffer, e.g. because it gets
@@ -45,7 +45,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
*/
// TODO
- _debug("uw %llx-%llx", start, end);
+ kdebug("uw %llx-%llx", start, end);
wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp, start,
iocb->ki_flags & IOCB_DIRECT ?
@@ -92,10 +92,11 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
if (async)
wreq->iocb = iocb;
+ wreq->len = iov_iter_count(&wreq->io_iter);
wreq->cleanup = netfs_cleanup_dio_write;
- ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), iov_iter_count(&wreq->io_iter));
+ ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len);
if (ret < 0) {
- _debug("begin = %zd", ret);
+ kdebug("begin = %zd", ret);
goto out;
}
@@ -142,7 +143,7 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
loff_t pos = iocb->ki_pos;
unsigned long long end = pos + iov_iter_count(from) - 1;
- _enter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode));
+ kenter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode));
if (!iov_iter_count(from))
return 0;
diff --git a/fs/netfs/fscache_cache.c b/fs/netfs/fscache_cache.c
index 9397ed39b0b4..288a73c3072d 100644
--- a/fs/netfs/fscache_cache.c
+++ b/fs/netfs/fscache_cache.c
@@ -237,7 +237,7 @@ int fscache_add_cache(struct fscache_cache *cache,
{
int n_accesses;
- _enter("{%s,%s}", ops->name, cache->name);
+ kenter("{%s,%s}", ops->name, cache->name);
BUG_ON(fscache_cache_state(cache) != FSCACHE_CACHE_IS_PREPARING);
@@ -257,7 +257,7 @@ int fscache_add_cache(struct fscache_cache *cache,
up_write(&fscache_addremove_sem);
pr_notice("Cache \"%s\" added (type %s)\n", cache->name, ops->name);
- _leave(" = 0 [%s]", cache->name);
+ kleave(" = 0 [%s]", cache->name);
return 0;
}
EXPORT_SYMBOL(fscache_add_cache);
diff --git a/fs/netfs/fscache_cookie.c b/fs/netfs/fscache_cookie.c
index bce2492186d0..4d1e8bf4c615 100644
--- a/fs/netfs/fscache_cookie.c
+++ b/fs/netfs/fscache_cookie.c
@@ -456,7 +456,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
{
struct fscache_cookie *cookie;
- _enter("V=%x", volume->debug_id);
+ kenter("V=%x", volume->debug_id);
if (!index_key || !index_key_len || index_key_len > 255 || aux_data_len > 255)
return NULL;
@@ -484,7 +484,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
trace_fscache_acquire(cookie);
fscache_stat(&fscache_n_acquires_ok);
- _leave(" = c=%08x", cookie->debug_id);
+ kleave(" = c=%08x", cookie->debug_id);
return cookie;
}
EXPORT_SYMBOL(__fscache_acquire_cookie);
@@ -505,7 +505,7 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie)
enum fscache_access_trace trace = fscache_access_lookup_cookie_end_failed;
bool need_withdraw = false;
- _enter("");
+ kenter("");
if (!cookie->volume->cache_priv) {
fscache_create_volume(cookie->volume, true);
@@ -519,7 +519,7 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie)
if (cookie->state != FSCACHE_COOKIE_STATE_FAILED)
fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_QUIESCENT);
need_withdraw = true;
- _leave(" [fail]");
+ kleave(" [fail]");
goto out;
}
@@ -572,7 +572,7 @@ void __fscache_use_cookie(struct fscache_cookie *cookie, bool will_modify)
bool queue = false;
int n_active;
- _enter("c=%08x", cookie->debug_id);
+ kenter("c=%08x", cookie->debug_id);
if (WARN(test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags),
"Trying to use relinquished cookie\n"))
@@ -636,7 +636,7 @@ again:
spin_unlock(&cookie->lock);
if (queue)
fscache_queue_cookie(cookie, fscache_cookie_get_use_work);
- _leave("");
+ kleave("");
}
EXPORT_SYMBOL(__fscache_use_cookie);
@@ -702,7 +702,7 @@ static void fscache_cookie_state_machine(struct fscache_cookie *cookie)
enum fscache_cookie_state state;
bool wake = false;
- _enter("c=%x", cookie->debug_id);
+ kenter("c=%x", cookie->debug_id);
again:
spin_lock(&cookie->lock);
@@ -820,7 +820,7 @@ out:
spin_unlock(&cookie->lock);
if (wake)
wake_up_cookie_state(cookie);
- _leave("");
+ kleave("");
}
static void fscache_cookie_worker(struct work_struct *work)
@@ -867,7 +867,7 @@ static void fscache_cookie_lru_do_one(struct fscache_cookie *cookie)
set_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags);
spin_unlock(&cookie->lock);
fscache_stat(&fscache_n_cookies_lru_expired);
- _debug("lru c=%x", cookie->debug_id);
+ kdebug("lru c=%x", cookie->debug_id);
__fscache_withdraw_cookie(cookie);
}
@@ -971,7 +971,7 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire)
if (retire)
fscache_stat(&fscache_n_relinquishes_retire);
- _enter("c=%08x{%d},%d",
+ kenter("c=%08x{%d},%d",
cookie->debug_id, atomic_read(&cookie->n_active), retire);
if (WARN(test_and_set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags),
@@ -1050,7 +1050,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
{
bool is_caching;
- _enter("c=%x", cookie->debug_id);
+ kenter("c=%x", cookie->debug_id);
fscache_stat(&fscache_n_invalidates);
@@ -1072,7 +1072,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
case FSCACHE_COOKIE_STATE_INVALIDATING: /* is_still_valid will catch it */
default:
spin_unlock(&cookie->lock);
- _leave(" [no %u]", cookie->state);
+ kleave(" [no %u]", cookie->state);
return;
case FSCACHE_COOKIE_STATE_LOOKING_UP:
@@ -1081,7 +1081,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
fallthrough;
case FSCACHE_COOKIE_STATE_CREATING:
spin_unlock(&cookie->lock);
- _leave(" [look %x]", cookie->inval_counter);
+ kleave(" [look %x]", cookie->inval_counter);
return;
case FSCACHE_COOKIE_STATE_ACTIVE:
@@ -1094,7 +1094,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
if (is_caching)
fscache_queue_cookie(cookie, fscache_cookie_get_inval_work);
- _leave(" [inv]");
+ kleave(" [inv]");
return;
}
}
diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c
index 38637e5c9b57..bf4eaeec44fb 100644
--- a/fs/netfs/fscache_io.c
+++ b/fs/netfs/fscache_io.c
@@ -28,12 +28,12 @@ bool fscache_wait_for_operation(struct netfs_cache_resources *cres,
again:
if (!fscache_cache_is_live(cookie->volume->cache)) {
- _leave(" [broken]");
+ kleave(" [broken]");
return false;
}
state = fscache_cookie_state(cookie);
- _enter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
+ kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
switch (state) {
case FSCACHE_COOKIE_STATE_CREATING:
@@ -52,7 +52,7 @@ again:
case FSCACHE_COOKIE_STATE_DROPPED:
case FSCACHE_COOKIE_STATE_RELINQUISHING:
default:
- _leave(" [not live]");
+ kleave(" [not live]");
return false;
}
@@ -92,7 +92,7 @@ again:
spin_lock(&cookie->lock);
state = fscache_cookie_state(cookie);
- _enter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
+ kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
switch (state) {
case FSCACHE_COOKIE_STATE_LOOKING_UP:
@@ -140,7 +140,7 @@ failed:
cres->cache_priv = NULL;
cres->ops = NULL;
fscache_end_cookie_access(cookie, fscache_access_io_not_live);
- _leave(" = -ENOBUFS");
+ kleave(" = -ENOBUFS");
return -ENOBUFS;
}
@@ -224,7 +224,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
if (len == 0)
goto abandon;
- _enter("%llx,%zx", start, len);
+ kenter("%llx,%zx", start, len);
wreq = kzalloc(sizeof(struct fscache_write_request), GFP_NOFS);
if (!wreq)
diff --git a/fs/netfs/fscache_main.c b/fs/netfs/fscache_main.c
index 42e98bb523e3..bf9b33d26e31 100644
--- a/fs/netfs/fscache_main.c
+++ b/fs/netfs/fscache_main.c
@@ -99,7 +99,7 @@ error_wq:
*/
void __exit fscache_exit(void)
{
- _enter("");
+ kenter("");
kmem_cache_destroy(fscache_cookie_jar);
fscache_proc_cleanup();
diff --git a/fs/netfs/fscache_volume.c b/fs/netfs/fscache_volume.c
index cdf991bdd9de..2e2a405ca9b0 100644
--- a/fs/netfs/fscache_volume.c
+++ b/fs/netfs/fscache_volume.c
@@ -27,6 +27,19 @@ struct fscache_volume *fscache_get_volume(struct fscache_volume *volume,
return volume;
}
+struct fscache_volume *fscache_try_get_volume(struct fscache_volume *volume,
+ enum fscache_volume_trace where)
+{
+ int ref;
+
+ if (!__refcount_inc_not_zero(&volume->ref, &ref))
+ return NULL;
+
+ trace_fscache_volume(volume->debug_id, ref + 1, where);
+ return volume;
+}
+EXPORT_SYMBOL(fscache_try_get_volume);
+
static void fscache_see_volume(struct fscache_volume *volume,
enum fscache_volume_trace where)
{
@@ -251,7 +264,7 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key,
fscache_see_volume(volume, fscache_volume_new_acquire);
fscache_stat(&fscache_n_volumes);
up_write(&fscache_addremove_sem);
- _leave(" = v=%x", volume->debug_id);
+ kleave(" = v=%x", volume->debug_id);
return volume;
err_vol:
@@ -420,6 +433,7 @@ void fscache_put_volume(struct fscache_volume *volume,
fscache_free_volume(volume);
}
}
+EXPORT_SYMBOL(fscache_put_volume);
/*
* Relinquish a volume representation cookie.
@@ -452,7 +466,7 @@ void fscache_withdraw_volume(struct fscache_volume *volume)
{
int n_accesses;
- _debug("withdraw V=%x", volume->debug_id);
+ kdebug("withdraw V=%x", volume->debug_id);
/* Allow wakeups on dec-to-0 */
n_accesses = atomic_dec_return(&volume->n_accesses);
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index 95e281a8af78..21e46bc9aa49 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -34,7 +34,6 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync);
/*
* main.c
*/
-extern unsigned int netfs_debug;
extern struct list_head netfs_io_requests;
extern spinlock_t netfs_proc_lock;
extern mempool_t netfs_request_pool;
@@ -63,15 +62,6 @@ static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {}
/*
* misc.c
*/
-#define NETFS_FLAG_PUT_MARK BIT(0)
-#define NETFS_FLAG_PAGECACHE_MARK BIT(1)
-int netfs_xa_store_and_mark(struct xarray *xa, unsigned long index,
- struct folio *folio, unsigned int flags,
- gfp_t gfp_mask);
-int netfs_add_folios_to_buffer(struct xarray *buffer,
- struct address_space *mapping,
- pgoff_t index, pgoff_t to, gfp_t gfp_mask);
-void netfs_clear_buffer(struct xarray *buffer);
/*
* objects.c
@@ -353,8 +343,6 @@ extern const struct seq_operations fscache_volumes_seq_ops;
struct fscache_volume *fscache_get_volume(struct fscache_volume *volume,
enum fscache_volume_trace where);
-void fscache_put_volume(struct fscache_volume *volume,
- enum fscache_volume_trace where);
bool fscache_begin_volume_access(struct fscache_volume *volume,
struct fscache_cookie *cookie,
enum fscache_access_trace why);
@@ -365,42 +353,12 @@ void fscache_create_volume(struct fscache_volume *volume, bool wait);
* debug tracing
*/
#define dbgprintk(FMT, ...) \
- printk("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
+ pr_debug("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__)
-#ifdef __KDEBUG
-#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__)
-#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__)
-#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__)
-
-#elif defined(CONFIG_NETFS_DEBUG)
-#define _enter(FMT, ...) \
-do { \
- if (netfs_debug) \
- kenter(FMT, ##__VA_ARGS__); \
-} while (0)
-
-#define _leave(FMT, ...) \
-do { \
- if (netfs_debug) \
- kleave(FMT, ##__VA_ARGS__); \
-} while (0)
-
-#define _debug(FMT, ...) \
-do { \
- if (netfs_debug) \
- kdebug(FMT, ##__VA_ARGS__); \
-} while (0)
-
-#else
-#define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__)
-#define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
-#define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__)
-#endif
-
/*
* assertions
*/
diff --git a/fs/netfs/io.c b/fs/netfs/io.c
index c93851b98368..c7576481c321 100644
--- a/fs/netfs/io.c
+++ b/fs/netfs/io.c
@@ -130,7 +130,7 @@ static void netfs_reset_subreq_iter(struct netfs_io_request *rreq,
if (count == remaining)
return;
- _debug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n",
+ kdebug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n",
rreq->debug_id, subreq->debug_index,
iov_iter_count(&subreq->io_iter), subreq->transferred,
subreq->len, rreq->i_size,
@@ -326,7 +326,7 @@ void netfs_subreq_terminated(struct netfs_io_subrequest *subreq,
struct netfs_io_request *rreq = subreq->rreq;
int u;
- _enter("R=%x[%x]{%llx,%lx},%zd",
+ kenter("R=%x[%x]{%llx,%lx},%zd",
rreq->debug_id, subreq->debug_index,
subreq->start, subreq->flags, transferred_or_error);
@@ -435,7 +435,7 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq,
struct netfs_inode *ictx = netfs_inode(rreq->inode);
size_t lsize;
- _enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);
+ kenter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);
if (rreq->origin != NETFS_DIO_READ) {
source = netfs_cache_prepare_read(subreq, rreq->i_size);
@@ -518,7 +518,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
subreq->start = rreq->start + rreq->submitted;
subreq->len = io_iter->count;
- _debug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted);
+ kdebug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted);
list_add_tail(&subreq->rreq_link, &rreq->subrequests);
/* Call out to the cache to find out what it can do with the remaining
@@ -570,7 +570,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
struct iov_iter io_iter;
int ret;
- _enter("R=%x %llx-%llx",
+ kenter("R=%x %llx-%llx",
rreq->debug_id, rreq->start, rreq->start + rreq->len - 1);
if (rreq->len == 0) {
@@ -593,7 +593,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
atomic_set(&rreq->nr_outstanding, 1);
io_iter = rreq->io_iter;
do {
- _debug("submit %llx + %llx >= %llx",
+ kdebug("submit %llx + %llx >= %llx",
rreq->start, rreq->submitted, rreq->i_size);
if (rreq->origin == NETFS_DIO_READ &&
rreq->start + rreq->submitted >= rreq->i_size)
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 5f0f438e5d21..db824c372842 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -20,10 +20,6 @@ MODULE_LICENSE("GPL");
EXPORT_TRACEPOINT_SYMBOL(netfs_sreq);
-unsigned netfs_debug;
-module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask");
-
static struct kmem_cache *netfs_request_slab;
static struct kmem_cache *netfs_subrequest_slab;
mempool_t netfs_request_pool;
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index bc1fc54fb724..172808e83ca8 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -8,87 +8,6 @@
#include <linux/swap.h>
#include "internal.h"
-/*
- * Attach a folio to the buffer and maybe set marks on it to say that we need
- * to put the folio later and twiddle the pagecache flags.
- */
-int netfs_xa_store_and_mark(struct xarray *xa, unsigned long index,
- struct folio *folio, unsigned int flags,
- gfp_t gfp_mask)
-{
- XA_STATE_ORDER(xas, xa, index, folio_order(folio));
-
-retry:
- xas_lock(&xas);
- for (;;) {
- xas_store(&xas, folio);
- if (!xas_error(&xas))
- break;
- xas_unlock(&xas);
- if (!xas_nomem(&xas, gfp_mask))
- return xas_error(&xas);
- goto retry;
- }
-
- if (flags & NETFS_FLAG_PUT_MARK)
- xas_set_mark(&xas, NETFS_BUF_PUT_MARK);
- if (flags & NETFS_FLAG_PAGECACHE_MARK)
- xas_set_mark(&xas, NETFS_BUF_PAGECACHE_MARK);
- xas_unlock(&xas);
- return xas_error(&xas);
-}
-
-/*
- * Create the specified range of folios in the buffer attached to the read
- * request. The folios are marked with NETFS_BUF_PUT_MARK so that we know that
- * these need freeing later.
- */
-int netfs_add_folios_to_buffer(struct xarray *buffer,
- struct address_space *mapping,
- pgoff_t index, pgoff_t to, gfp_t gfp_mask)
-{
- struct folio *folio;
- int ret;
-
- if (to + 1 == index) /* Page range is inclusive */
- return 0;
-
- do {
- /* TODO: Figure out what order folio can be allocated here */
- folio = filemap_alloc_folio(readahead_gfp_mask(mapping), 0);
- if (!folio)
- return -ENOMEM;
- folio->index = index;
- ret = netfs_xa_store_and_mark(buffer, index, folio,
- NETFS_FLAG_PUT_MARK, gfp_mask);
- if (ret < 0) {
- folio_put(folio);
- return ret;
- }
-
- index += folio_nr_pages(folio);
- } while (index <= to && index != 0);
-
- return 0;
-}
-
-/*
- * Clear an xarray buffer, putting a ref on the folios that have
- * NETFS_BUF_PUT_MARK set.
- */
-void netfs_clear_buffer(struct xarray *buffer)
-{
- struct folio *folio;
- XA_STATE(xas, buffer, 0);
-
- rcu_read_lock();
- xas_for_each_marked(&xas, folio, ULONG_MAX, NETFS_BUF_PUT_MARK) {
- folio_put(folio);
- }
- rcu_read_unlock();
- xa_destroy(buffer);
-}
-
/**
* netfs_dirty_folio - Mark folio dirty and pin a cache object for writeback
* @mapping: The mapping the folio belongs to.
@@ -107,7 +26,7 @@ bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio)
struct fscache_cookie *cookie = netfs_i_cookie(ictx);
bool need_use = false;
- _enter("");
+ kenter("");
if (!filemap_dirty_folio(mapping, folio))
return false;
@@ -180,7 +99,7 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
struct netfs_folio *finfo;
size_t flen = folio_size(folio);
- _enter("{%lx},%zx,%zx", folio->index, offset, length);
+ kenter("{%lx},%zx,%zx", folio->index, offset, length);
if (!folio_test_private(folio))
return;
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index 426cf87aaf2e..488147439fe0 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -161,7 +161,7 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
{
struct list_head *next;
- _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
+ kenter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
if (list_empty(&stream->subrequests))
return;
@@ -374,7 +374,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)
unsigned int notes;
int s;
- _enter("%llx-%llx", wreq->start, wreq->start + wreq->len);
+ kenter("%llx-%llx", wreq->start, wreq->start + wreq->len);
trace_netfs_collect(wreq);
trace_netfs_rreq(wreq, netfs_rreq_trace_collect);
@@ -409,7 +409,7 @@ reassess_streams:
front = stream->front;
while (front) {
trace_netfs_collect_sreq(wreq, front);
- //_debug("sreq [%x] %llx %zx/%zx",
+ //kdebug("sreq [%x] %llx %zx/%zx",
// front->debug_index, front->start, front->transferred, front->len);
/* Stall if there may be a discontinuity. */
@@ -598,7 +598,7 @@ reassess_streams:
out:
netfs_put_group_many(wreq->group, wreq->nr_group_rel);
wreq->nr_group_rel = 0;
- _leave(" = %x", notes);
+ kleave(" = %x", notes);
return;
need_retry:
@@ -606,7 +606,7 @@ need_retry:
* that any partially completed op will have had any wholly transferred
* folios removed from it.
*/
- _debug("retry");
+ kdebug("retry");
netfs_retry_writes(wreq);
goto out;
}
@@ -621,7 +621,7 @@ void netfs_write_collection_worker(struct work_struct *work)
size_t transferred;
int s;
- _enter("R=%x", wreq->debug_id);
+ kenter("R=%x", wreq->debug_id);
netfs_see_request(wreq, netfs_rreq_trace_see_work);
if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) {
@@ -684,7 +684,7 @@ void netfs_write_collection_worker(struct work_struct *work)
if (wreq->origin == NETFS_DIO_WRITE)
inode_dio_end(wreq->inode);
- _debug("finished");
+ kdebug("finished");
trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
@@ -744,7 +744,7 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
struct netfs_io_request *wreq = subreq->rreq;
struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr];
- _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
+ kenter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
switch (subreq->source) {
case NETFS_UPLOAD_TO_SERVER:
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 3aa86e268f40..d7c971df8866 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -99,7 +99,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
if (IS_ERR(wreq))
return wreq;
- _enter("R=%x", wreq->debug_id);
+ kenter("R=%x", wreq->debug_id);
ictx = netfs_inode(wreq->inode);
if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags))
@@ -159,7 +159,7 @@ static void netfs_prepare_write(struct netfs_io_request *wreq,
subreq->max_nr_segs = INT_MAX;
subreq->stream_nr = stream->stream_nr;
- _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
+ kenter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
refcount_read(&subreq->ref),
@@ -215,7 +215,7 @@ static void netfs_do_issue_write(struct netfs_io_stream *stream,
{
struct netfs_io_request *wreq = subreq->rreq;
- _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
+ kenter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
return netfs_write_subrequest_terminated(subreq, subreq->error, false);
@@ -272,11 +272,11 @@ int netfs_advance_write(struct netfs_io_request *wreq,
size_t part;
if (!stream->avail) {
- _leave("no write");
+ kleave("no write");
return len;
}
- _enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0);
+ kenter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0);
if (subreq && start != subreq->start + subreq->len) {
netfs_issue_write(wreq, stream);
@@ -288,7 +288,7 @@ int netfs_advance_write(struct netfs_io_request *wreq,
subreq = stream->construct;
part = min(subreq->max_len - subreq->len, len);
- _debug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len);
+ kdebug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len);
subreq->len += part;
subreq->nr_segs++;
@@ -319,7 +319,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
bool to_eof = false, streamw = false;
bool debug = false;
- _enter("");
+ kenter("");
/* netfs_perform_write() may shift i_size around the page or from out
* of the page to beyond it, but cannot move i_size into or through the
@@ -329,7 +329,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
if (fpos >= i_size) {
/* mmap beyond eof. */
- _debug("beyond eof");
+ kdebug("beyond eof");
folio_start_writeback(folio);
folio_unlock(folio);
wreq->nr_group_rel += netfs_folio_written_back(folio);
@@ -363,7 +363,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
}
flen -= foff;
- _debug("folio %zx %zx %zx", foff, flen, fsize);
+ kdebug("folio %zx %zx %zx", foff, flen, fsize);
/* Deal with discontinuities in the stream of dirty pages. These can
* arise from a number of sources:
@@ -483,11 +483,11 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
if (!debug)
kdebug("R=%x: No submit", wreq->debug_id);
- if (flen < fsize)
+ if (foff + flen < fsize)
for (int s = 0; s < NR_IO_STREAMS; s++)
netfs_issue_write(wreq, &wreq->io_streams[s]);
- _leave(" = 0");
+ kleave(" = 0");
return 0;
}
@@ -522,7 +522,7 @@ int netfs_writepages(struct address_space *mapping,
netfs_stat(&netfs_n_wh_writepages);
do {
- _debug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted);
+ kdebug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted);
/* It appears we don't have to handle cyclic writeback wrapping. */
WARN_ON_ONCE(wreq && folio_pos(folio) < wreq->start + wreq->submitted);
@@ -546,14 +546,14 @@ int netfs_writepages(struct address_space *mapping,
mutex_unlock(&ictx->wb_lock);
netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
- _leave(" = %d", error);
+ kleave(" = %d", error);
return error;
couldnt_start:
netfs_kill_dirty_pages(mapping, wbc, folio);
out:
mutex_unlock(&ictx->wb_lock);
- _leave(" = %d", error);
+ kleave(" = %d", error);
return error;
}
EXPORT_SYMBOL(netfs_writepages);
@@ -590,7 +590,7 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c
struct folio *folio, size_t copied, bool to_page_end,
struct folio **writethrough_cache)
{
- _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
+ kenter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
wreq->debug_id, wreq->iter.count, wreq->wsize, copied, to_page_end);
if (!*writethrough_cache) {
@@ -624,7 +624,7 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr
struct netfs_inode *ictx = netfs_inode(wreq->inode);
int ret;
- _enter("R=%x", wreq->debug_id);
+ kenter("R=%x", wreq->debug_id);
if (writethrough_cache)
netfs_write_folio(wreq, wbc, writethrough_cache);
@@ -657,7 +657,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
loff_t start = wreq->start;
int error = 0;
- _enter("%zx", len);
+ kenter("%zx", len);
if (wreq->origin == NETFS_DIO_WRITE)
inode_dio_begin(wreq->inode);
@@ -665,7 +665,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
while (len) {
// TODO: Prepare content encryption
- _debug("unbuffered %zx", len);
+ kdebug("unbuffered %zx", len);
part = netfs_advance_write(wreq, upload, start, len, false);
start += part;
len -= part;
@@ -684,6 +684,6 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
if (list_empty(&upload->subrequests))
netfs_wake_write_collector(wreq, false);
- _leave(" = %d", error);
+ kleave(" = %d", error);
return error;
}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 533b65057e18..c848ebe5d08f 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -2169,6 +2169,8 @@ static __net_init int nfsd_net_init(struct net *net)
nn->nfsd_svcstats.program = &nfsd_program;
nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL;
+ nn->nfsd_info.mutex = &nfsd_mutex;
+ nn->nfsd_serv = NULL;
nfsd4_init_leases_net(nn);
get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
seqlock_init(&nn->writeverf_lock);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index cd9a6a1a9fc8..89d7918de7b1 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -672,7 +672,6 @@ int nfsd_create_serv(struct net *net)
return error;
}
spin_lock(&nfsd_notifier_lock);
- nn->nfsd_info.mutex = &nfsd_mutex;
nn->nfsd_serv = serv;
spin_unlock(&nfsd_notifier_lock);
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 89caef7513db..ba50388ee4bf 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -377,11 +377,12 @@ void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr,
* @target: offset number of an entry in the group (start point)
* @bsize: size in bits
* @lock: spin lock protecting @bitmap
+ * @wrap: whether to wrap around
*/
static int nilfs_palloc_find_available_slot(unsigned char *bitmap,
unsigned long target,
unsigned int bsize,
- spinlock_t *lock)
+ spinlock_t *lock, bool wrap)
{
int pos, end = bsize;
@@ -397,6 +398,8 @@ static int nilfs_palloc_find_available_slot(unsigned char *bitmap,
end = target;
}
+ if (!wrap)
+ return -ENOSPC;
/* wrap around */
for (pos = 0; pos < end; pos++) {
@@ -495,9 +498,10 @@ int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp)
* nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object
* @inode: inode of metadata file using this allocator
* @req: nilfs_palloc_req structure exchanged for the allocation
+ * @wrap: whether to wrap around
*/
int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
- struct nilfs_palloc_req *req)
+ struct nilfs_palloc_req *req, bool wrap)
{
struct buffer_head *desc_bh, *bitmap_bh;
struct nilfs_palloc_group_desc *desc;
@@ -516,7 +520,7 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
entries_per_group = nilfs_palloc_entries_per_group(inode);
for (i = 0; i < ngroups; i += n) {
- if (group >= ngroups) {
+ if (group >= ngroups && wrap) {
/* wrap around */
group = 0;
maxgroup = nilfs_palloc_group(inode, req->pr_entry_nr,
@@ -550,7 +554,14 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
bitmap_kaddr = kmap_local_page(bitmap_bh->b_page);
bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
pos = nilfs_palloc_find_available_slot(
- bitmap, group_offset, entries_per_group, lock);
+ bitmap, group_offset, entries_per_group, lock,
+ wrap);
+ /*
+ * Since the search for a free slot in the second and
+ * subsequent bitmap blocks always starts from the
+ * beginning, the wrap flag only has an effect on the
+ * first search.
+ */
kunmap_local(bitmap_kaddr);
if (pos >= 0)
goto found;
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h
index b667e869ac07..d825a9faca6d 100644
--- a/fs/nilfs2/alloc.h
+++ b/fs/nilfs2/alloc.h
@@ -50,8 +50,8 @@ struct nilfs_palloc_req {
struct buffer_head *pr_entry_bh;
};
-int nilfs_palloc_prepare_alloc_entry(struct inode *,
- struct nilfs_palloc_req *);
+int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
+ struct nilfs_palloc_req *req, bool wrap);
void nilfs_palloc_commit_alloc_entry(struct inode *,
struct nilfs_palloc_req *);
void nilfs_palloc_abort_alloc_entry(struct inode *, struct nilfs_palloc_req *);
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 180fc8d36213..fc1caf63a42a 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -75,7 +75,7 @@ int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req)
{
int ret;
- ret = nilfs_palloc_prepare_alloc_entry(dat, req);
+ ret = nilfs_palloc_prepare_alloc_entry(dat, req, true);
if (ret < 0)
return ret;
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 52e50b1b7f22..4a29b0138d75 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -135,6 +135,9 @@ static bool nilfs_check_folio(struct folio *folio, char *kaddr)
goto Enamelen;
if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1))
goto Espan;
+ if (unlikely(p->inode &&
+ NILFS_PRIVATE_INODE(le64_to_cpu(p->inode))))
+ goto Einumber;
}
if (offs != limit)
goto Eend;
@@ -160,6 +163,9 @@ Enamelen:
goto bad_entry;
Espan:
error = "directory entry across blocks";
+ goto bad_entry;
+Einumber:
+ error = "disallowed inode number";
bad_entry:
nilfs_error(sb,
"bad entry in directory #%lu: %s - offset=%lu, inode=%lu, rec_len=%zd, name_len=%d",
@@ -377,11 +383,39 @@ found:
struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop)
{
- struct nilfs_dir_entry *de = nilfs_get_folio(dir, 0, foliop);
+ struct folio *folio;
+ struct nilfs_dir_entry *de, *next_de;
+ size_t limit;
+ char *msg;
+ de = nilfs_get_folio(dir, 0, &folio);
if (IS_ERR(de))
return NULL;
- return nilfs_next_entry(de);
+
+ limit = nilfs_last_byte(dir, 0); /* is a multiple of chunk size */
+ if (unlikely(!limit || le64_to_cpu(de->inode) != dir->i_ino ||
+ !nilfs_match(1, ".", de))) {
+ msg = "missing '.'";
+ goto fail;
+ }
+
+ next_de = nilfs_next_entry(de);
+ /*
+ * If "next_de" has not reached the end of the chunk, there is
+ * at least one more record. Check whether it matches "..".
+ */
+ if (unlikely((char *)next_de == (char *)de + nilfs_chunk_size(dir) ||
+ !nilfs_match(2, "..", next_de))) {
+ msg = "missing '..'";
+ goto fail;
+ }
+ *foliop = folio;
+ return next_de;
+
+fail:
+ nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg);
+ folio_release_kmap(folio, de);
+ return NULL;
}
ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index 612e609158b5..1e86b9303b7c 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -56,13 +56,10 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino,
struct nilfs_palloc_req req;
int ret;
- req.pr_entry_nr = 0; /*
- * 0 says find free inode from beginning
- * of a group. dull code!!
- */
+ req.pr_entry_nr = NILFS_FIRST_INO(ifile->i_sb);
req.pr_entry_bh = NULL;
- ret = nilfs_palloc_prepare_alloc_entry(ifile, &req);
+ ret = nilfs_palloc_prepare_alloc_entry(ifile, &req, false);
if (!ret) {
ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 1,
&req.pr_entry_bh);
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 728e90be3570..4017f7856440 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -116,9 +116,15 @@ enum {
#define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino)
#define NILFS_MDT_INODE(sb, ino) \
- ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & BIT(ino)))
+ ((ino) < NILFS_USER_INO && (NILFS_MDT_INO_BITS & BIT(ino)))
#define NILFS_VALID_INODE(sb, ino) \
- ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & BIT(ino)))
+ ((ino) >= NILFS_FIRST_INO(sb) || \
+ ((ino) < NILFS_USER_INO && (NILFS_SYS_INO_BITS & BIT(ino))))
+
+#define NILFS_PRIVATE_INODE(ino) ({ \
+ ino_t __ino = (ino); \
+ ((__ino) < NILFS_USER_INO && (__ino) != NILFS_ROOT_INO && \
+ (__ino) != NILFS_SKETCH_INO); })
/**
* struct nilfs_transaction_info: context information for synchronization
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index f41d7b6d432c..e44dde57ab65 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -452,6 +452,12 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
}
nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino);
+ if (nilfs->ns_first_ino < NILFS_USER_INO) {
+ nilfs_err(nilfs->ns_sb,
+ "too small lower limit for non-reserved inode numbers: %u",
+ nilfs->ns_first_ino);
+ return -EINVAL;
+ }
nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment);
if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) {
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 85da0629415d..1e829ed7b0ef 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -182,7 +182,7 @@ struct the_nilfs {
unsigned long ns_nrsvsegs;
unsigned long ns_first_data_block;
int ns_inode_size;
- int ns_first_ino;
+ unsigned int ns_first_ino;
u32 ns_crc_seed;
/* /sys/fs/<nilfs>/<device> */
diff --git a/fs/open.c b/fs/open.c
index 89cafb572061..278b3edcda44 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -202,13 +202,13 @@ long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
return error;
}
-SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length)
+SYSCALL_DEFINE2(ftruncate, unsigned int, fd, off_t, length)
{
return do_sys_ftruncate(fd, length, 1);
}
#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_ulong_t, length)
+COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_off_t, length)
{
return do_sys_ftruncate(fd, length, 1);
}
@@ -1004,11 +1004,6 @@ static int do_dentry_open(struct file *f,
}
}
- /*
- * Once we return a file with FMODE_OPENED, __fput() will call
- * fsnotify_close(), so we need fsnotify_open() here for symmetry.
- */
- fsnotify_open(f);
return 0;
cleanup_all:
@@ -1085,8 +1080,19 @@ EXPORT_SYMBOL(file_path);
*/
int vfs_open(const struct path *path, struct file *file)
{
+ int ret;
+
file->f_path = *path;
- return do_dentry_open(file, NULL);
+ ret = do_dentry_open(file, NULL);
+ if (!ret) {
+ /*
+ * Once we return a file with FMODE_OPENED, __fput() will call
+ * fsnotify_close(), so we need fsnotify_open() here for
+ * symmetry.
+ */
+ fsnotify_open(file);
+ }
+ return ret;
}
struct file *dentry_open(const struct path *path, int flags,
@@ -1177,8 +1183,10 @@ struct file *kernel_file_open(const struct path *path, int flags,
error = do_dentry_open(f, NULL);
if (error) {
fput(f);
- f = ERR_PTR(error);
+ return ERR_PTR(error);
}
+
+ fsnotify_open(f);
return f;
}
EXPORT_SYMBOL_GPL(kernel_file_open);
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index f1f2573bb18d..1374635e89fa 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -246,35 +246,6 @@ static int cifs_init_request(struct netfs_io_request *rreq, struct file *file)
}
/*
- * Expand the size of a readahead to the size of the rsize, if at least as
- * large as a page, allowing for the possibility that rsize is not pow-2
- * aligned.
- */
-static void cifs_expand_readahead(struct netfs_io_request *rreq)
-{
- unsigned int rsize = rreq->rsize;
- loff_t misalignment, i_size = i_size_read(rreq->inode);
-
- if (rsize < PAGE_SIZE)
- return;
-
- if (rsize < INT_MAX)
- rsize = roundup_pow_of_two(rsize);
- else
- rsize = ((unsigned int)INT_MAX + 1) / 2;
-
- misalignment = rreq->start & (rsize - 1);
- if (misalignment) {
- rreq->start -= misalignment;
- rreq->len += misalignment;
- }
-
- rreq->len = round_up(rreq->len, rsize);
- if (rreq->start < i_size && rreq->len > i_size - rreq->start)
- rreq->len = i_size - rreq->start;
-}
-
-/*
* Completion of a request operation.
*/
static void cifs_rreq_done(struct netfs_io_request *rreq)
@@ -329,7 +300,6 @@ const struct netfs_request_ops cifs_req_ops = {
.init_request = cifs_init_request,
.free_request = cifs_free_request,
.free_subrequest = cifs_free_subrequest,
- .expand_readahead = cifs_expand_readahead,
.clamp_length = cifs_clamp_length,
.issue_read = cifs_req_issue_read,
.done = cifs_rreq_done,
diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index 8d10be1fe18a..c3ee42188d25 100644
--- a/fs/smb/common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
@@ -917,6 +917,40 @@ struct smb2_query_directory_rsp {
__u8 Buffer[];
} __packed;
+/* DeviceType Flags */
+#define FILE_DEVICE_CD_ROM 0x00000002
+#define FILE_DEVICE_CD_ROM_FILE_SYSTEM 0x00000003
+#define FILE_DEVICE_DFS 0x00000006
+#define FILE_DEVICE_DISK 0x00000007
+#define FILE_DEVICE_DISK_FILE_SYSTEM 0x00000008
+#define FILE_DEVICE_FILE_SYSTEM 0x00000009
+#define FILE_DEVICE_NAMED_PIPE 0x00000011
+#define FILE_DEVICE_NETWORK 0x00000012
+#define FILE_DEVICE_NETWORK_FILE_SYSTEM 0x00000014
+#define FILE_DEVICE_NULL 0x00000015
+#define FILE_DEVICE_PARALLEL_PORT 0x00000016
+#define FILE_DEVICE_PRINTER 0x00000018
+#define FILE_DEVICE_SERIAL_PORT 0x0000001b
+#define FILE_DEVICE_STREAMS 0x0000001e
+#define FILE_DEVICE_TAPE 0x0000001f
+#define FILE_DEVICE_TAPE_FILE_SYSTEM 0x00000020
+#define FILE_DEVICE_VIRTUAL_DISK 0x00000024
+#define FILE_DEVICE_NETWORK_REDIRECTOR 0x00000028
+
+/* Device Characteristics */
+#define FILE_REMOVABLE_MEDIA 0x00000001
+#define FILE_READ_ONLY_DEVICE 0x00000002
+#define FILE_FLOPPY_DISKETTE 0x00000004
+#define FILE_WRITE_ONCE_MEDIA 0x00000008
+#define FILE_REMOTE_DEVICE 0x00000010
+#define FILE_DEVICE_IS_MOUNTED 0x00000020
+#define FILE_VIRTUAL_VOLUME 0x00000040
+#define FILE_DEVICE_SECURE_OPEN 0x00000100
+#define FILE_CHARACTERISTIC_TS_DEVICE 0x00001000
+#define FILE_CHARACTERISTIC_WEBDAV_DEVICE 0x00002000
+#define FILE_PORTABLE_DEVICE 0x00004000
+#define FILE_DEVICE_ALLOW_APPCONTAINER_TRAVERSAL 0x00020000
+
/*
* Maximum number of iovs we need for a set-info request.
* The largest one is rename/hardlink
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index e7e07891781b..840c71c66b30 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -2051,15 +2051,22 @@ out_err1:
* @access: file access flags
* @disposition: file disposition flags
* @may_flags: set with MAY_ flags
+ * @is_dir: is creating open flags for directory
*
* Return: file open flags
*/
static int smb2_create_open_flags(bool file_present, __le32 access,
__le32 disposition,
- int *may_flags)
+ int *may_flags,
+ bool is_dir)
{
int oflags = O_NONBLOCK | O_LARGEFILE;
+ if (is_dir) {
+ access &= ~FILE_WRITE_DESIRE_ACCESS_LE;
+ ksmbd_debug(SMB, "Discard write access to a directory\n");
+ }
+
if (access & FILE_READ_DESIRED_ACCESS_LE &&
access & FILE_WRITE_DESIRE_ACCESS_LE) {
oflags |= O_RDWR;
@@ -3167,7 +3174,9 @@ int smb2_open(struct ksmbd_work *work)
open_flags = smb2_create_open_flags(file_present, daccess,
req->CreateDisposition,
- &may_flags);
+ &may_flags,
+ req->CreateOptions & FILE_DIRECTORY_FILE_LE ||
+ (file_present && S_ISDIR(d_inode(path.dentry)->i_mode)));
if (!test_tree_conn_flag(tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
if (open_flags & (O_CREAT | O_TRUNC)) {
@@ -5314,8 +5323,13 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
info = (struct filesystem_device_info *)rsp->Buffer;
- info->DeviceType = cpu_to_le32(stfs.f_type);
- info->DeviceCharacteristics = cpu_to_le32(0x00000020);
+ info->DeviceType = cpu_to_le32(FILE_DEVICE_DISK);
+ info->DeviceCharacteristics =
+ cpu_to_le32(FILE_DEVICE_IS_MOUNTED);
+ if (!test_tree_conn_flag(work->tcon,
+ KSMBD_TREE_CONN_FLAG_WRITABLE))
+ info->DeviceCharacteristics |=
+ cpu_to_le32(FILE_READ_ONLY_DEVICE);
rsp->OutputBufferLength = cpu_to_le32(8);
break;
}
diff --git a/fs/super.c b/fs/super.c
index b72f1d288e95..095ba793e10c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1502,8 +1502,17 @@ static int fs_bdev_thaw(struct block_device *bdev)
lockdep_assert_held(&bdev->bd_fsfreeze_mutex);
+ /*
+ * The block device may have been frozen before it was claimed by a
+ * filesystem. Concurrently another process might try to mount that
+ * frozen block device and has temporarily claimed the block device for
+ * that purpose causing a concurrent fs_bdev_thaw() to end up here. The
+ * mounter is already about to abort mounting because they still saw an
+ * elevanted bdev->bd_fsfreeze_count so get_bdev_super() will return
+ * NULL in that case.
+ */
sb = get_bdev_super(bdev);
- if (WARN_ON_ONCE(!sb))
+ if (!sb)
return -EINVAL;
if (sb->s_op->thaw_super)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index eee7320ab0b0..17e409ceaa33 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -2057,7 +2057,7 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
goto out;
features = uffdio_api.features;
ret = -EINVAL;
- if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES))
+ if (uffdio_api.api != UFFD_API)
goto err_out;
ret = -EPERM;
if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE))
@@ -2081,6 +2081,11 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED;
uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC;
#endif
+
+ ret = -EINVAL;
+ if (features & ~uffdio_api.features)
+ goto err_out;
+
uffdio_api.ioctls = UFFD_API_IOCTLS;
ret = -EFAULT;
if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
diff --git a/fs/verity/measure.c b/fs/verity/measure.c
index 3969d54158d1..175d2f1bc089 100644
--- a/fs/verity/measure.c
+++ b/fs/verity/measure.c
@@ -111,14 +111,15 @@ __bpf_kfunc_start_defs();
/**
* bpf_get_fsverity_digest: read fsverity digest of file
* @file: file to get digest from
- * @digest_ptr: (out) dynptr for struct fsverity_digest
+ * @digest_p: (out) dynptr for struct fsverity_digest
*
* Read fsverity_digest of *file* into *digest_ptr*.
*
* Return: 0 on success, a negative value on error.
*/
-__bpf_kfunc int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr_kern *digest_ptr)
+__bpf_kfunc int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_p)
{
+ struct bpf_dynptr_kern *digest_ptr = (struct bpf_dynptr_kern *)digest_p;
const struct inode *inode = file_inode(file);
u32 dynptr_sz = __bpf_dynptr_size(digest_ptr);
struct fsverity_digest *arg;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index c101cf266bc4..6af6f744fdd6 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4058,20 +4058,32 @@ xfs_bmapi_reserve_delalloc(
xfs_extlen_t indlen;
uint64_t fdblocks;
int error;
- xfs_fileoff_t aoff = off;
+ xfs_fileoff_t aoff;
+ bool use_cowextszhint =
+ whichfork == XFS_COW_FORK && !prealloc;
+retry:
/*
* Cap the alloc length. Keep track of prealloc so we know whether to
* tag the inode before we return.
*/
+ aoff = off;
alen = XFS_FILBLKS_MIN(len + prealloc, XFS_MAX_BMBT_EXTLEN);
if (!eof)
alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
if (prealloc && alen >= len)
prealloc = alen - len;
- /* Figure out the extent size, adjust alen */
- if (whichfork == XFS_COW_FORK) {
+ /*
+ * If we're targetting the COW fork but aren't creating a speculative
+ * posteof preallocation, try to expand the reservation to align with
+ * the COW extent size hint if there's sufficient free space.
+ *
+ * Unlike the data fork, the CoW cancellation functions will free all
+ * the reservations at inactivation, so we don't require that every
+ * delalloc reservation have a dirty pagecache.
+ */
+ if (use_cowextszhint) {
struct xfs_bmbt_irec prev;
xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip);
@@ -4090,7 +4102,7 @@ xfs_bmapi_reserve_delalloc(
*/
error = xfs_quota_reserve_blkres(ip, alen);
if (error)
- return error;
+ goto out;
/*
* Split changing sb for alen and indlen since they could be coming
@@ -4140,6 +4152,17 @@ out_unreserve_frextents:
out_unreserve_quota:
if (XFS_IS_QUOTA_ON(mp))
xfs_quota_unreserve_blkres(ip, alen);
+out:
+ if (error == -ENOSPC || error == -EDQUOT) {
+ trace_xfs_delalloc_enospc(ip, off, len);
+
+ if (prealloc || use_cowextszhint) {
+ /* retry without any preallocation */
+ use_cowextszhint = false;
+ prealloc = 0;
+ goto retry;
+ }
+ }
return error;
}
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 97996cb79aaa..454b63ef7201 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -996,7 +996,7 @@ struct xfs_getparents_by_handle {
#define XFS_IOC_FSGEOMETRY _IOR ('X', 126, struct xfs_fsop_geom)
#define XFS_IOC_BULKSTAT _IOR ('X', 127, struct xfs_bulkstat_req)
#define XFS_IOC_INUMBERS _IOR ('X', 128, struct xfs_inumbers_req)
-#define XFS_IOC_EXCHANGE_RANGE _IOWR('X', 129, struct xfs_exchange_range)
+#define XFS_IOC_EXCHANGE_RANGE _IOW ('X', 129, struct xfs_exchange_range)
/* XFS_IOC_GETFSUUID ---------- deprecated 140 */
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index e7a7bfbe75b4..513b50da6215 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -379,10 +379,13 @@ xfs_dinode_verify_fork(
/*
* A directory small enough to fit in the inode must be stored
* in local format. The directory sf <-> extents conversion
- * code updates the directory size accordingly.
+ * code updates the directory size accordingly. Directories
+ * being truncated have zero size and are not subject to this
+ * check.
*/
if (S_ISDIR(mode)) {
- if (be64_to_cpu(dip->di_size) <= fork_size &&
+ if (dip->di_size &&
+ be64_to_cpu(dip->di_size) <= fork_size &&
fork_format != XFS_DINODE_FMT_LOCAL)
return __this_address;
}
@@ -528,9 +531,19 @@ xfs_dinode_verify(
if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
return __this_address;
- /* No zero-length symlinks/dirs. */
- if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
- return __this_address;
+ /*
+ * No zero-length symlinks/dirs unless they're unlinked and hence being
+ * inactivated.
+ */
+ if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) {
+ if (dip->di_version > 1) {
+ if (dip->di_nlink)
+ return __this_address;
+ } else {
+ if (dip->di_onlink)
+ return __this_address;
+ }
+ }
fa = xfs_dinode_verify_nrext64(mp, dip);
if (fa)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index ac2e77ebb54c..a4d9fbc21b83 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -486,13 +486,11 @@ out_unlock:
/*
* Test whether it is appropriate to check an inode for and free post EOF
- * blocks. The 'force' parameter determines whether we should also consider
- * regular files that are marked preallocated or append-only.
+ * blocks.
*/
bool
xfs_can_free_eofblocks(
- struct xfs_inode *ip,
- bool force)
+ struct xfs_inode *ip)
{
struct xfs_bmbt_irec imap;
struct xfs_mount *mp = ip->i_mount;
@@ -526,11 +524,11 @@ xfs_can_free_eofblocks(
return false;
/*
- * Do not free real preallocated or append-only files unless the file
- * has delalloc blocks and we are forced to remove them.
+ * Only free real extents for inodes with persistent preallocations or
+ * the append-only flag.
*/
if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
- if (!force || ip->i_delayed_blks == 0)
+ if (ip->i_delayed_blks == 0)
return false;
/*
@@ -584,6 +582,22 @@ xfs_free_eofblocks(
/* Wait on dio to ensure i_size has settled. */
inode_dio_wait(VFS_I(ip));
+ /*
+ * For preallocated files only free delayed allocations.
+ *
+ * Note that this means we also leave speculative preallocations in
+ * place for preallocated files.
+ */
+ if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) {
+ if (ip->i_delayed_blks) {
+ xfs_bmap_punch_delalloc_range(ip,
+ round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize),
+ LLONG_MAX);
+ }
+ xfs_inode_clear_eofblocks_tag(ip);
+ return 0;
+ }
+
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
if (error) {
ASSERT(xfs_is_shutdown(mp));
@@ -891,7 +905,7 @@ xfs_prepare_shift(
* Trim eofblocks to avoid shifting uninitialized post-eof preallocation
* into the accessible region of the file.
*/
- if (xfs_can_free_eofblocks(ip, true)) {
+ if (xfs_can_free_eofblocks(ip)) {
error = xfs_free_eofblocks(ip);
if (error)
return error;
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 51f84d8ff372..eb0895bfb9da 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -63,7 +63,7 @@ int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
xfs_off_t len);
/* EOF block manipulation functions */
-bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
+bool xfs_can_free_eofblocks(struct xfs_inode *ip);
int xfs_free_eofblocks(struct xfs_inode *ip);
int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 0953163a2d84..9967334ea99f 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1155,7 +1155,7 @@ xfs_inode_free_eofblocks(
}
*lockflags |= XFS_IOLOCK_EXCL;
- if (xfs_can_free_eofblocks(ip, false))
+ if (xfs_can_free_eofblocks(ip))
return xfs_free_eofblocks(ip);
/* inode could be preallocated or append-only */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index f36091e1e7f5..a4e3cd8971fc 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -42,6 +42,7 @@
#include "xfs_pnfs.h"
#include "xfs_parent.h"
#include "xfs_xattr.h"
+#include "xfs_sb.h"
struct kmem_cache *xfs_inode_cache;
@@ -870,9 +871,16 @@ xfs_init_new_inode(
* this saves us from needing to run a separate transaction to set the
* fork offset in the immediate future.
*/
- if (init_xattrs && xfs_has_attr(mp)) {
+ if (init_xattrs) {
ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0);
+
+ if (!xfs_has_attr(mp)) {
+ spin_lock(&mp->m_sb_lock);
+ xfs_add_attr(mp);
+ spin_unlock(&mp->m_sb_lock);
+ xfs_log_sb(tp);
+ }
}
/*
@@ -1595,7 +1603,7 @@ xfs_release(
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
return 0;
- if (xfs_can_free_eofblocks(ip, false)) {
+ if (xfs_can_free_eofblocks(ip)) {
/*
* Check if the inode is being opened, written and closed
* frequently and we have delayed allocation blocks outstanding
@@ -1856,15 +1864,13 @@ xfs_inode_needs_inactive(
/*
* This file isn't being freed, so check if there are post-eof blocks
- * to free. @force is true because we are evicting an inode from the
- * cache. Post-eof blocks must be freed, lest we end up with broken
- * free space accounting.
+ * to free.
*
* Note: don't bother with iolock here since lockdep complains about
* acquiring it in reclaim context. We have the only reference to the
* inode at this point anyways.
*/
- return xfs_can_free_eofblocks(ip, true);
+ return xfs_can_free_eofblocks(ip);
}
/*
@@ -1947,15 +1953,11 @@ xfs_inactive(
if (VFS_I(ip)->i_nlink != 0) {
/*
- * force is true because we are evicting an inode from the
- * cache. Post-eof blocks must be freed, lest we end up with
- * broken free space accounting.
- *
* Note: don't bother with iolock here since lockdep complains
* about acquiring it in reclaim context. We have the only
* reference to the inode at this point anyways.
*/
- if (xfs_can_free_eofblocks(ip, true))
+ if (xfs_can_free_eofblocks(ip))
error = xfs_free_eofblocks(ip);
goto out;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 378342673925..414903885ab9 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1148,33 +1148,23 @@ xfs_buffered_write_iomap_begin(
}
}
-retry:
- error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
- end_fsb - offset_fsb, prealloc_blocks,
- allocfork == XFS_DATA_FORK ? &imap : &cmap,
- allocfork == XFS_DATA_FORK ? &icur : &ccur,
- allocfork == XFS_DATA_FORK ? eof : cow_eof);
- switch (error) {
- case 0:
- break;
- case -ENOSPC:
- case -EDQUOT:
- /* retry without any preallocation */
- trace_xfs_delalloc_enospc(ip, offset, count);
- if (prealloc_blocks) {
- prealloc_blocks = 0;
- goto retry;
- }
- fallthrough;
- default:
- goto out_unlock;
- }
-
if (allocfork == XFS_COW_FORK) {
+ error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
+ end_fsb - offset_fsb, prealloc_blocks, &cmap,
+ &ccur, cow_eof);
+ if (error)
+ goto out_unlock;
+
trace_xfs_iomap_alloc(ip, offset, count, allocfork, &cmap);
goto found_cow;
}
+ error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
+ end_fsb - offset_fsb, prealloc_blocks, &imap, &icur,
+ eof);
+ if (error)
+ goto out_unlock;
+
/*
* Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
* them out if the write happens to fail.
diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h
index 933ca6581aba..fabcefe8a80a 100644
--- a/include/asm-generic/syscalls.h
+++ b/include/asm-generic/syscalls.h
@@ -19,7 +19,7 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
#ifndef sys_mmap
asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
- unsigned long fd, off_t pgoff);
+ unsigned long fd, unsigned long off);
#endif
#ifndef sys_rt_sigreturn
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a834f4b761bc..4f1d4a97b9d1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2494,7 +2494,7 @@ struct sk_buff;
struct bpf_dtab_netdev;
struct bpf_cpu_map_entry;
-void __dev_flush(void);
+void __dev_flush(struct list_head *flush_list);
int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
struct net_device *dev_rx);
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
@@ -2507,7 +2507,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
struct bpf_prog *xdp_prog, struct bpf_map *map,
bool exclude_ingress);
-void __cpu_map_flush(void);
+void __cpu_map_flush(struct list_head *flush_list);
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf,
struct net_device *dev_rx);
int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
@@ -2644,8 +2644,6 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr);
-bool dev_check_flush(void);
-bool cpu_map_check_flush(void);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -2738,7 +2736,7 @@ static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd)
return ERR_PTR(-EOPNOTSUPP);
}
-static inline void __dev_flush(void)
+static inline void __dev_flush(struct list_head *flush_list)
{
}
@@ -2784,7 +2782,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
return 0;
}
-static inline void __cpu_map_flush(void)
+static inline void __cpu_map_flush(struct list_head *flush_list)
{
}
@@ -2933,8 +2931,7 @@ bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
return ret;
}
-void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
- struct btf_mod_pair *used_btfs, u32 len);
+void __bpf_free_used_btfs(struct btf_mod_pair *used_btfs, u32 len);
static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
enum bpf_prog_type type)
@@ -3265,8 +3262,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
struct bpf_insn *insn_buf,
struct bpf_prog *prog,
u32 *target_size);
-int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
- struct bpf_dynptr_kern *ptr);
+int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
+ struct bpf_dynptr *ptr);
#else
static inline bool bpf_sock_common_is_valid_access(int off, int size,
enum bpf_access_type type,
@@ -3288,8 +3285,8 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
{
return 0;
}
-static inline int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
- struct bpf_dynptr_kern *ptr)
+static inline int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
+ struct bpf_dynptr *ptr)
{
return -EOPNOTSUPP;
}
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index e4070fb02b11..e98ba5a5cf79 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -73,7 +73,10 @@ enum bpf_iter_state {
struct bpf_reg_state {
/* Ordering of fields matters. See states_equal() */
enum bpf_reg_type type;
- /* Fixed part of pointer offset, pointer types only */
+ /*
+ * Fixed part of pointer offset, pointer types only.
+ * Or constant delta between "linked" scalars with the same ID.
+ */
s32 off;
union {
/* valid when type == PTR_TO_PACKET */
@@ -167,6 +170,13 @@ struct bpf_reg_state {
* Similarly to dynptrs, we use ID to track "belonging" of a reference
* to a specific instance of bpf_iter.
*/
+ /*
+ * Upper bit of ID is used to remember relationship between "linked"
+ * registers. Example:
+ * r1 = r2; both will have r1->id == r2->id == N
+ * r1 += 10; r1->id == N | BPF_ADD_CONST and r1->off == 10
+ */
+#define BPF_ADD_CONST (1U << 31)
u32 id;
/* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned
* from a pointer-cast helper, bpf_sk_fullsock() and
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 7c3e40c3295e..cffb43133c68 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -140,6 +140,7 @@ extern const struct file_operations btf_fops;
const char *btf_get_name(const struct btf *btf);
void btf_get(struct btf *btf);
void btf_put(struct btf *btf);
+const struct btf_header *btf_header(const struct btf *btf);
int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_sz);
struct btf *btf_get_by_fd(int fd);
int btf_get_info_by_fd(const struct btf *btf,
@@ -212,8 +213,10 @@ int btf_get_fd_by_id(u32 id);
u32 btf_obj_id(const struct btf *btf);
bool btf_is_kernel(const struct btf *btf);
bool btf_is_module(const struct btf *btf);
+bool btf_is_vmlinux(const struct btf *btf);
struct module *btf_try_get_module(const struct btf *btf);
u32 btf_nr_types(const struct btf *btf);
+struct btf *btf_base_btf(const struct btf *btf);
bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
const struct btf_member *m,
u32 expected_offset, u32 expected_size);
@@ -339,6 +342,11 @@ static inline u8 btf_int_offset(const struct btf_type *t)
return BTF_INT_OFFSET(*(u32 *)(t + 1));
}
+static inline __u8 btf_int_bits(const struct btf_type *t)
+{
+ return BTF_INT_BITS(*(__u32 *)(t + 1));
+}
+
static inline bool btf_type_is_scalar(const struct btf_type *t)
{
return btf_type_is_int(t) || btf_type_is_enum(t);
@@ -478,6 +486,11 @@ static inline struct btf_param *btf_params(const struct btf_type *t)
return (struct btf_param *)(t + 1);
}
+static inline struct btf_decl_tag *btf_decl_tag(const struct btf_type *t)
+{
+ return (struct btf_decl_tag *)(t + 1);
+}
+
static inline int btf_id_cmp_func(const void *a, const void *b)
{
const int *pa = a, *pb = b;
@@ -515,9 +528,38 @@ static inline const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf *
}
#endif
+enum btf_field_iter_kind {
+ BTF_FIELD_ITER_IDS,
+ BTF_FIELD_ITER_STRS,
+};
+
+struct btf_field_desc {
+ /* once-per-type offsets */
+ int t_off_cnt, t_offs[2];
+ /* member struct size, or zero, if no members */
+ int m_sz;
+ /* repeated per-member offsets */
+ int m_off_cnt, m_offs[1];
+};
+
+struct btf_field_iter {
+ struct btf_field_desc desc;
+ void *p;
+ int m_idx;
+ int off_idx;
+ int vlen;
+};
+
#ifdef CONFIG_BPF_SYSCALL
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
+void btf_set_base_btf(struct btf *btf, const struct btf *base_btf);
+int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **map_ids);
+int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t,
+ enum btf_field_iter_kind iter_kind);
+__u32 *btf_field_iter_next(struct btf_field_iter *it);
+
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
+const char *btf_str_by_offset(const struct btf *btf, u32 offset);
struct btf *btf_parse_vmlinux(void);
struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
u32 *btf_kfunc_id_set_contains(const struct btf *btf, u32 kfunc_btf_id,
@@ -531,6 +573,7 @@ s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id);
int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt,
struct module *owner);
struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id);
+bool btf_is_projection_of(const char *pname, const char *tname);
bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
const struct btf_type *t, enum bpf_prog_type prog_type,
int arg);
@@ -543,6 +586,28 @@ static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
{
return NULL;
}
+
+static inline void btf_set_base_btf(struct btf *btf, const struct btf *base_btf)
+{
+}
+
+static inline int btf_relocate(void *log, struct btf *btf, const struct btf *base_btf,
+ __u32 **map_ids)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t,
+ enum btf_field_iter_kind iter_kind)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline __u32 *btf_field_iter_next(struct btf_field_iter *it)
+{
+ return NULL;
+}
+
static inline const char *btf_name_by_offset(const struct btf *btf,
u32 offset)
{
diff --git a/include/linux/closure.h b/include/linux/closure.h
index 99155df162d0..2af44427107d 100644
--- a/include/linux/closure.h
+++ b/include/linux/closure.h
@@ -159,6 +159,7 @@ struct closure {
#ifdef CONFIG_DEBUG_CLOSURES
#define CLOSURE_MAGIC_DEAD 0xc054dead
#define CLOSURE_MAGIC_ALIVE 0xc054a11e
+#define CLOSURE_MAGIC_STACK 0xc05451cc
unsigned int magic;
struct list_head all;
@@ -285,6 +286,21 @@ static inline void closure_get(struct closure *cl)
}
/**
+ * closure_get_not_zero
+ */
+static inline bool closure_get_not_zero(struct closure *cl)
+{
+ unsigned old = atomic_read(&cl->remaining);
+ do {
+ if (!(old & CLOSURE_REMAINING_MASK))
+ return false;
+
+ } while (!atomic_try_cmpxchg_acquire(&cl->remaining, &old, old + 1));
+
+ return true;
+}
+
+/**
* closure_init - Initialize a closure, setting the refcount to 1
* @cl: closure to initialize
* @parent: parent of the new closure. cl will take a refcount on it for its
@@ -308,6 +324,18 @@ static inline void closure_init_stack(struct closure *cl)
{
memset(cl, 0, sizeof(struct closure));
atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
+#ifdef CONFIG_DEBUG_CLOSURES
+ cl->magic = CLOSURE_MAGIC_STACK;
+#endif
+}
+
+static inline void closure_init_stack_release(struct closure *cl)
+{
+ memset(cl, 0, sizeof(struct closure));
+ atomic_set_release(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
+#ifdef CONFIG_DEBUG_CLOSURES
+ cl->magic = CLOSURE_MAGIC_STACK;
+#endif
}
/**
@@ -355,6 +383,8 @@ do { \
*/
#define closure_return(_cl) continue_at((_cl), NULL, NULL)
+void closure_return_sync(struct closure *cl);
+
/**
* continue_at_nobarrier - jump to another function without barrier
*
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 233f61ec8afc..56cebaff0c91 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -608,7 +608,7 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd,
asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz,
struct compat_statfs64 __user *buf);
asmlinkage long compat_sys_truncate(const char __user *, compat_off_t);
-asmlinkage long compat_sys_ftruncate(unsigned int, compat_ulong_t);
+asmlinkage long compat_sys_ftruncate(unsigned int, compat_off_t);
/* No generic prototype for truncate64, ftruncate64, fallocate */
asmlinkage long compat_sys_openat(int dfd, const char __user *filename,
int flags, umode_t mode);
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 959196af7f5a..e213b5508da6 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -159,6 +159,57 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
return index % n_rx_rings;
}
+/**
+ * struct ethtool_rxfh_context - a custom RSS context configuration
+ * @indir_size: Number of u32 entries in indirection table
+ * @key_size: Size of hash key, in bytes
+ * @priv_size: Size of driver private data, in bytes
+ * @hfunc: RSS hash function identifier. One of the %ETH_RSS_HASH_*
+ * @input_xfrm: Defines how the input data is transformed. Valid values are one
+ * of %RXH_XFRM_*.
+ * @indir_configured: indir has been specified (at create time or subsequently)
+ * @key_configured: hkey has been specified (at create time or subsequently)
+ */
+struct ethtool_rxfh_context {
+ u32 indir_size;
+ u32 key_size;
+ u16 priv_size;
+ u8 hfunc;
+ u8 input_xfrm;
+ u8 indir_configured:1;
+ u8 key_configured:1;
+ /* private: driver private data, indirection table, and hash key are
+ * stored sequentially in @data area. Use below helpers to access.
+ */
+ u8 data[] __aligned(sizeof(void *));
+};
+
+static inline void *ethtool_rxfh_context_priv(struct ethtool_rxfh_context *ctx)
+{
+ return ctx->data;
+}
+
+static inline u32 *ethtool_rxfh_context_indir(struct ethtool_rxfh_context *ctx)
+{
+ return (u32 *)(ctx->data + ALIGN(ctx->priv_size, sizeof(u32)));
+}
+
+static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx)
+{
+ return (u8 *)(ethtool_rxfh_context_indir(ctx) + ctx->indir_size);
+}
+
+static inline size_t ethtool_rxfh_context_size(u32 indir_size, u32 key_size,
+ u16 priv_size)
+{
+ size_t indir_bytes = array_size(indir_size, sizeof(u32));
+ size_t flex_len;
+
+ flex_len = size_add(size_add(indir_bytes, key_size),
+ ALIGN(priv_size, sizeof(u32)));
+ return struct_size_t(struct ethtool_rxfh_context, data, flex_len);
+}
+
/* declare a link mode bitmap */
#define __ETHTOOL_DECLARE_LINK_MODE_MASK(name) \
DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS)
@@ -506,17 +557,16 @@ struct ethtool_ts_stats {
#define ETH_MODULE_MAX_I2C_ADDRESS 0x7f
/**
- * struct ethtool_module_eeprom - EEPROM dump from specified page
- * @offset: Offset within the specified EEPROM page to begin read, in bytes.
- * @length: Number of bytes to read.
- * @page: Page number to read from.
- * @bank: Page bank number to read from, if applicable by EEPROM spec.
+ * struct ethtool_module_eeprom - plug-in module EEPROM read / write parameters
+ * @offset: When @offset is 0-127, it is used as an address to the Lower Memory
+ * (@page must be 0). Otherwise, it is used as an address to the
+ * Upper Memory.
+ * @length: Number of bytes to read / write.
+ * @page: Page number.
+ * @bank: Bank number, if supported by EEPROM spec.
* @i2c_address: I2C address of a page. Value less than 0x7f expected. Most
* EEPROMs use 0x50 or 0x51.
* @data: Pointer to buffer with EEPROM data of @length size.
- *
- * This can be used to manage pages during EEPROM dump in ethtool and pass
- * required information to the driver.
*/
struct ethtool_module_eeprom {
u32 offset;
@@ -671,6 +721,12 @@ struct ethtool_rxfh_param {
* contexts.
* @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor
* RSS.
+ * @rxfh_priv_size: size of the driver private data area the core should
+ * allocate for an RSS context (in &struct ethtool_rxfh_context).
+ * @rxfh_max_context_id: maximum (exclusive) supported RSS context ID. If this
+ * is zero then the core may choose any (nonzero) ID, otherwise the core
+ * will only use IDs strictly less than this value, as the @rss_context
+ * argument to @create_rxfh_context and friends.
* @supported_coalesce_params: supported types of interrupt coalescing.
* @supported_ring_params: supported ring params.
* @get_drvinfo: Report driver/device information. Modern drivers no
@@ -767,6 +823,32 @@ struct ethtool_rxfh_param {
* will remain unchanged.
* Returns a negative error code or zero. An error code must be returned
* if at least one unsupported change was requested.
+ * @create_rxfh_context: Create a new RSS context with the specified RX flow
+ * hash indirection table, hash key, and hash function.
+ * The &struct ethtool_rxfh_context for this context is passed in @ctx;
+ * note that the indir table, hkey and hfunc are not yet populated as
+ * of this call. The driver does not need to update these; the core
+ * will do so if this op succeeds.
+ * However, if @rxfh.indir is set to %NULL, the driver must update the
+ * indir table in @ctx with the (default or inherited) table actually in
+ * use; similarly, if @rxfh.key is %NULL, @rxfh.hfunc is
+ * %ETH_RSS_HASH_NO_CHANGE, or @rxfh.input_xfrm is %RXH_XFRM_NO_CHANGE,
+ * the driver should update the corresponding information in @ctx.
+ * If the driver provides this method, it must also provide
+ * @modify_rxfh_context and @remove_rxfh_context.
+ * Returns a negative error code or zero.
+ * @modify_rxfh_context: Reconfigure the specified RSS context. Allows setting
+ * the contents of the RX flow hash indirection table, hash key, and/or
+ * hash function associated with the given context.
+ * Parameters which are set to %NULL or zero will remain unchanged.
+ * The &struct ethtool_rxfh_context for this context is passed in @ctx;
+ * note that it will still contain the *old* settings. The driver does
+ * not need to update these; the core will do so if this op succeeds.
+ * Returns a negative error code or zero. An error code must be returned
+ * if at least one unsupported change was requested.
+ * @remove_rxfh_context: Remove the specified RSS context.
+ * The &struct ethtool_rxfh_context for this context is passed in @ctx.
+ * Returns a negative error code or zero.
* @get_channels: Get number of channels.
* @set_channels: Set number of channels. Returns a negative error code or
* zero.
@@ -824,6 +906,8 @@ struct ethtool_rxfh_param {
* @get_module_eeprom_by_page: Get a region of plug-in module EEPROM data from
* specified page. Returns a negative error code or the amount of bytes
* read.
+ * @set_module_eeprom_by_page: Write to a region of plug-in module EEPROM,
+ * from kernel space only. Returns a negative error code or zero.
* @get_eth_phy_stats: Query some of the IEEE 802.3 PHY statistics.
* @get_eth_mac_stats: Query some of the IEEE 802.3 MAC statistics.
* @get_eth_ctrl_stats: Query some of the IEEE 802.3 MAC Ctrl statistics.
@@ -854,6 +938,8 @@ struct ethtool_ops {
u32 cap_link_lanes_supported:1;
u32 cap_rss_ctx_supported:1;
u32 cap_rss_sym_xor_supported:1;
+ u16 rxfh_priv_size;
+ u32 rxfh_max_context_id;
u32 supported_coalesce_params;
u32 supported_ring_params;
void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *);
@@ -916,6 +1002,18 @@ struct ethtool_ops {
int (*get_rxfh)(struct net_device *, struct ethtool_rxfh_param *);
int (*set_rxfh)(struct net_device *, struct ethtool_rxfh_param *,
struct netlink_ext_ack *extack);
+ int (*create_rxfh_context)(struct net_device *,
+ struct ethtool_rxfh_context *ctx,
+ const struct ethtool_rxfh_param *rxfh,
+ struct netlink_ext_ack *extack);
+ int (*modify_rxfh_context)(struct net_device *,
+ struct ethtool_rxfh_context *ctx,
+ const struct ethtool_rxfh_param *rxfh,
+ struct netlink_ext_ack *extack);
+ int (*remove_rxfh_context)(struct net_device *,
+ struct ethtool_rxfh_context *ctx,
+ u32 rss_context,
+ struct netlink_ext_ack *extack);
void (*get_channels)(struct net_device *, struct ethtool_channels *);
int (*set_channels)(struct net_device *, struct ethtool_channels *);
int (*get_dump_flag)(struct net_device *, struct ethtool_dump *);
@@ -958,6 +1056,9 @@ struct ethtool_ops {
int (*get_module_eeprom_by_page)(struct net_device *dev,
const struct ethtool_module_eeprom *page,
struct netlink_ext_ack *extack);
+ int (*set_module_eeprom_by_page)(struct net_device *dev,
+ const struct ethtool_module_eeprom *page,
+ struct netlink_ext_ack *extack);
void (*get_eth_phy_stats)(struct net_device *dev,
struct ethtool_eth_phy_stats *phy_stats);
void (*get_eth_mac_stats)(struct net_device *dev,
@@ -1000,6 +1101,21 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev,
const struct ethtool_link_ksettings *cmd,
u32 *dev_speed, u8 *dev_duplex);
+/**
+ * struct ethtool_netdev_state - per-netdevice state for ethtool features
+ * @rss_ctx: XArray of custom RSS contexts
+ * @rss_lock: Protects entries in @rss_ctx. May be taken from
+ * within RTNL.
+ * @wol_enabled: Wake-on-LAN is enabled
+ * @module_fw_flash_in_progress: Module firmware flashing is in progress.
+ */
+struct ethtool_netdev_state {
+ struct xarray rss_ctx;
+ struct mutex rss_lock;
+ unsigned wol_enabled:1;
+ unsigned module_fw_flash_in_progress:1;
+};
+
struct phy_device;
struct phy_tdr_config;
struct phy_plca_cfg;
@@ -1157,4 +1273,24 @@ struct ethtool_forced_speed_map {
void
ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size);
+
+/* C33 PSE extended state and substate. */
+struct ethtool_c33_pse_ext_state_info {
+ enum ethtool_c33_pse_ext_state c33_pse_ext_state;
+ union {
+ enum ethtool_c33_pse_ext_substate_error_condition error_condition;
+ enum ethtool_c33_pse_ext_substate_mr_pse_enable mr_pse_enable;
+ enum ethtool_c33_pse_ext_substate_option_detect_ted option_detect_ted;
+ enum ethtool_c33_pse_ext_substate_option_vport_lim option_vport_lim;
+ enum ethtool_c33_pse_ext_substate_ovld_detected ovld_detected;
+ enum ethtool_c33_pse_ext_substate_power_not_available power_not_available;
+ enum ethtool_c33_pse_ext_substate_short_detected short_detected;
+ u32 __c33_pse_ext_substate;
+ };
+};
+
+struct ethtool_c33_pse_pw_limit_range {
+ u32 min;
+ u32 max;
+};
#endif /* _LINUX_ETHTOOL_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index c0349522de8f..b6672ff61407 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -829,6 +829,33 @@ static inline struct list_head *bpf_net_ctx_get_xskmap_flush_list(void)
return &bpf_net_ctx->xskmap_map_flush_list;
}
+static inline void bpf_net_ctx_get_all_used_flush_lists(struct list_head **lh_map,
+ struct list_head **lh_dev,
+ struct list_head **lh_xsk)
+{
+ struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
+ u32 kern_flags = bpf_net_ctx->ri.kern_flags;
+ struct list_head *lh;
+
+ *lh_map = *lh_dev = *lh_xsk = NULL;
+
+ if (!IS_ENABLED(CONFIG_BPF_SYSCALL))
+ return;
+
+ lh = &bpf_net_ctx->dev_map_flush_list;
+ if (kern_flags & BPF_RI_F_DEV_MAP_INIT && !list_empty(lh))
+ *lh_dev = lh;
+
+ lh = &bpf_net_ctx->cpu_map_flush_list;
+ if (kern_flags & BPF_RI_F_CPU_MAP_INIT && !list_empty(lh))
+ *lh_map = lh;
+
+ lh = &bpf_net_ctx->xskmap_map_flush_list;
+ if (IS_ENABLED(CONFIG_XDP_SOCKETS) &&
+ kern_flags & BPF_RI_F_XSK_MAP_INIT && !list_empty(lh))
+ *lh_xsk = lh;
+}
+
/* Compute the linear packet data range [data, data_end) which
* will be accessed by various program types (cls_bpf, act_bpf,
* lwt, ...). Subsystems allowing direct data access must (!)
@@ -1207,8 +1234,7 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **ro_image,
struct bpf_binary_header **rw_hdr,
u8 **rw_image,
bpf_jit_fill_hole_t bpf_fill_ill_insns);
-int bpf_jit_binary_pack_finalize(struct bpf_prog *prog,
- struct bpf_binary_header *ro_header,
+int bpf_jit_binary_pack_finalize(struct bpf_binary_header *ro_header,
struct bpf_binary_header *rw_header);
void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header,
struct bpf_binary_header *rw_header);
@@ -1286,18 +1312,18 @@ static inline bool bpf_jit_kallsyms_enabled(void)
return false;
}
-const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
+int __bpf_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char *sym);
bool is_bpf_text_address(unsigned long addr);
int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
char *sym);
struct bpf_prog *bpf_prog_ksym_find(unsigned long addr);
-static inline const char *
+static inline int
bpf_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char **modname, char *sym)
{
- const char *ret = __bpf_address_lookup(addr, size, off, sym);
+ int ret = __bpf_address_lookup(addr, size, off, sym);
if (ret && modname)
*modname = NULL;
@@ -1341,11 +1367,11 @@ static inline bool bpf_jit_kallsyms_enabled(void)
return false;
}
-static inline const char *
+static inline int
__bpf_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char *sym)
{
- return NULL;
+ return 0;
}
static inline bool is_bpf_text_address(unsigned long addr)
@@ -1364,11 +1390,11 @@ static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
return NULL;
}
-static inline const char *
+static inline int
bpf_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char **modname, char *sym)
{
- return NULL;
+ return 0;
}
static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp)
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index bdf7f3eddf0a..4c91a019972b 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -19,6 +19,7 @@
enum fscache_cache_trace;
enum fscache_cookie_trace;
enum fscache_access_trace;
+enum fscache_volume_trace;
enum fscache_cache_state {
FSCACHE_CACHE_IS_NOT_PRESENT, /* No cache is present for this name */
@@ -97,6 +98,11 @@ extern void fscache_withdraw_cookie(struct fscache_cookie *cookie);
extern void fscache_io_error(struct fscache_cache *cache);
+extern struct fscache_volume *
+fscache_try_get_volume(struct fscache_volume *volume,
+ enum fscache_volume_trace where);
+extern void fscache_put_volume(struct fscache_volume *volume,
+ enum fscache_volume_trace where);
extern void fscache_end_volume_access(struct fscache_volume *volume,
struct fscache_cookie *cookie,
enum fscache_access_trace why);
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 4da80e92f804..278620e063ab 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -112,7 +112,13 @@ static inline int fsnotify_file(struct file *file, __u32 mask)
{
const struct path *path;
- if (file->f_mode & FMODE_NONOTIFY)
+ /*
+ * FMODE_NONOTIFY are fds generated by fanotify itself which should not
+ * generate new events. We also don't want to generate events for
+ * FMODE_PATH fds (involves open & close events) as they are just
+ * handle creation / destruction events and not "real" file events.
+ */
+ if (file->f_mode & (FMODE_NONOTIFY | FMODE_PATH))
return 0;
path = &file->f_path;
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 800995c425e0..b792274189a3 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -86,15 +86,15 @@ struct ftrace_hash;
#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_MODULES) && \
defined(CONFIG_DYNAMIC_FTRACE)
-const char *
+int
ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char **modname, char *sym);
#else
-static inline const char *
+static inline int
ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char **modname, char *sym)
{
- return NULL;
+ return 0;
}
#endif
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index b48570eaa449..7abdc0927124 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -207,7 +207,6 @@ struct io_submit_state {
bool need_plug;
bool cq_flush;
unsigned short submit_nr;
- unsigned int cqes_count;
struct blk_plug plug;
};
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 13fb41d25da6..7d3bd7c9664a 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1249,6 +1249,7 @@ extern int ata_slave_link_init(struct ata_port *ap);
extern struct ata_port *ata_sas_port_alloc(struct ata_host *,
struct ata_port_info *, struct Scsi_Host *);
extern void ata_port_probe(struct ata_port *ap);
+extern void ata_port_free(struct ata_port *ap);
extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap);
extern void ata_sas_tport_delete(struct ata_port *ap);
int ata_sas_device_configure(struct scsi_device *sdev, struct queue_limits *lim,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 66b921c81c0f..fdad0071d599 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2036,7 +2036,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
u8 min_mkey_log_entity_size_fixed_buffer_valid[0x1];
u8 reserved_at_402[0x1e];
- u8 reserved_at_420[0x3e0];
+ u8 reserved_at_420[0x20];
+
+ u8 reserved_at_440[0x8];
+ u8 max_num_eqs_24b[0x18];
+ u8 reserved_at_460[0x3a0];
};
enum mlx5_ifc_flow_destination_type {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 586a8f0104d7..1dc6248feb83 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1979,8 +1979,9 @@ static inline int subsection_map_index(unsigned long pfn)
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
{
int idx = subsection_map_index(pfn);
+ struct mem_section_usage *usage = READ_ONCE(ms->usage);
- return test_bit(idx, READ_ONCE(ms->usage)->subsection_map);
+ return usage ? test_bit(idx, usage->subsection_map) : 0;
}
#else
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
diff --git a/include/linux/module.h b/include/linux/module.h
index ffa1c603163c..4213d8993cd8 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -509,7 +509,9 @@ struct module {
#endif
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
unsigned int btf_data_size;
+ unsigned int btf_base_data_size;
void *btf_data;
+ void *btf_base_data;
#endif
#ifdef CONFIG_JUMP_LABEL
struct jump_entry *jump_entries;
@@ -931,11 +933,11 @@ int module_kallsyms_on_each_symbol(const char *modname,
* least KSYM_NAME_LEN long: a pointer to namebuf is returned if
* found, otherwise NULL.
*/
-const char *module_address_lookup(unsigned long addr,
- unsigned long *symbolsize,
- unsigned long *offset,
- char **modname, const unsigned char **modbuildid,
- char *namebuf);
+int module_address_lookup(unsigned long addr,
+ unsigned long *symbolsize,
+ unsigned long *offset,
+ char **modname, const unsigned char **modbuildid,
+ char *namebuf);
int lookup_module_symbol_name(unsigned long addr, char *symname);
int lookup_module_symbol_attrs(unsigned long addr,
unsigned long *size,
@@ -964,14 +966,14 @@ static inline int module_kallsyms_on_each_symbol(const char *modname,
}
/* For kallsyms to ask for address resolution. NULL means not found. */
-static inline const char *module_address_lookup(unsigned long addr,
+static inline int module_address_lookup(unsigned long addr,
unsigned long *symbolsize,
unsigned long *offset,
char **modname,
const unsigned char **modbuildid,
char *namebuf)
{
- return NULL;
+ return 0;
}
static inline int lookup_module_symbol_name(unsigned long addr, char *symname)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cc18acd3c58b..93558645c6d0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -80,6 +80,7 @@ struct xdp_buff;
struct xdp_frame;
struct xdp_metadata_ops;
struct xdp_md;
+struct ethtool_netdev_state;
typedef u32 xdp_features_t;
@@ -1986,8 +1987,6 @@ enum netdev_reg_state {
* switch driver and used to set the phys state of the
* switch port.
*
- * @wol_enabled: Wake-on-LAN is enabled
- *
* @threaded: napi threaded mode is enabled
*
* @net_notifier_list: List of per-net netdev notifier block
@@ -1999,6 +1998,7 @@ enum netdev_reg_state {
* @udp_tunnel_nic_info: static structure describing the UDP tunnel
* offload capabilities of the device
* @udp_tunnel_nic: UDP tunnel offload state
+ * @ethtool: ethtool related state
* @xdp_state: stores info on attached XDP BPF programs
*
* @nested_level: Used as a parameter of spin_lock_nested() of
@@ -2373,7 +2373,6 @@ struct net_device {
struct lock_class_key *qdisc_tx_busylock;
bool proto_down;
bool threaded;
- unsigned wol_enabled:1;
struct list_head net_notifier_list;
@@ -2384,6 +2383,8 @@ struct net_device {
const struct udp_tunnel_nic_info *udp_tunnel_nic_info;
struct udp_tunnel_nic *udp_tunnel_nic;
+ struct ethtool_netdev_state *ethtool;
+
/* protected by rtnl_lock */
struct bpf_xdp_entity xdp_state[__MAX_XDP_MODE];
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 425573202295..c693ac344ec0 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -85,10 +85,11 @@ enum {
enum {
NVMF_RDMA_QPTYPE_CONNECTED = 1, /* Reliable Connected */
NVMF_RDMA_QPTYPE_DATAGRAM = 2, /* Reliable Datagram */
+ NVMF_RDMA_QPTYPE_INVALID = 0xff,
};
-/* RDMA QP Service Type codes for Discovery Log Page entry TSAS
- * RDMA_QPTYPE field
+/* RDMA Provider Type codes for Discovery Log Page entry TSAS
+ * RDMA_PRTYPE field
*/
enum {
NVMF_RDMA_PRTYPE_NOT_SPECIFIED = 1, /* No Provider Specified */
@@ -110,6 +111,7 @@ enum {
NVMF_TCP_SECTYPE_NONE = 0, /* No Security */
NVMF_TCP_SECTYPE_TLS12 = 1, /* TLSv1.2, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */
NVMF_TCP_SECTYPE_TLS13 = 2, /* TLSv1.3, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */
+ NVMF_TCP_SECTYPE_INVALID = 0xff,
};
#define NVME_AQ_DEPTH 32
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 1acf5bac7f50..8c236c651d1d 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -230,7 +230,13 @@ static inline int folio_ref_dec_return(struct folio *folio)
static inline bool page_ref_add_unless(struct page *page, int nr, int u)
{
- bool ret = atomic_add_unless(&page->_refcount, nr, u);
+ bool ret = false;
+
+ rcu_read_lock();
+ /* avoid writing to the vmemmap area being remapped */
+ if (!page_is_fake_head(page) && page_ref_count(page) != u)
+ ret = atomic_add_unless(&page->_refcount, nr, u);
+ rcu_read_unlock();
if (page_ref_tracepoint_active(page_ref_mod_unless))
__page_ref_mod_unless(page, nr, ret);
@@ -258,54 +264,9 @@ static inline bool folio_try_get(struct folio *folio)
return folio_ref_add_unless(folio, 1, 0);
}
-static inline bool folio_ref_try_add_rcu(struct folio *folio, int count)
-{
-#ifdef CONFIG_TINY_RCU
- /*
- * The caller guarantees the folio will not be freed from interrupt
- * context, so (on !SMP) we only need preemption to be disabled
- * and TINY_RCU does that for us.
- */
-# ifdef CONFIG_PREEMPT_COUNT
- VM_BUG_ON(!in_atomic() && !irqs_disabled());
-# endif
- VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio);
- folio_ref_add(folio, count);
-#else
- if (unlikely(!folio_ref_add_unless(folio, count, 0))) {
- /* Either the folio has been freed, or will be freed. */
- return false;
- }
-#endif
- return true;
-}
-
-/**
- * folio_try_get_rcu - Attempt to increase the refcount on a folio.
- * @folio: The folio.
- *
- * This is a version of folio_try_get() optimised for non-SMP kernels.
- * If you are still holding the rcu_read_lock() after looking up the
- * page and know that the page cannot have its refcount decreased to
- * zero in interrupt context, you can use this instead of folio_try_get().
- *
- * Example users include get_user_pages_fast() (as pages are not unmapped
- * from interrupt context) and the page cache lookups (as pages are not
- * truncated from interrupt context). We also know that pages are not
- * frozen in interrupt context for the purposes of splitting or migration.
- *
- * You can also use this function if you're holding a lock that prevents
- * pages being frozen & removed; eg the i_pages lock for the page cache
- * or the mmap_lock or page table lock for page tables. In this case,
- * it will always succeed, and you could have used a plain folio_get(),
- * but it's sometimes more convenient to have a common function called
- * from both locked and RCU-protected contexts.
- *
- * Return: True if the reference count was successfully incremented.
- */
-static inline bool folio_try_get_rcu(struct folio *folio)
+static inline bool folio_ref_try_add(struct folio *folio, int count)
{
- return folio_ref_try_add_rcu(folio, 1);
+ return folio_ref_add_unless(folio, count, 0);
}
static inline int page_ref_freeze(struct page *page, int count)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 59f1df0cde5a..a0a026d2d244 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -354,11 +354,18 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
* a good order (that's 1MB if you're using 4kB pages)
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
+#define PREFERRED_MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
#else
-#define MAX_PAGECACHE_ORDER 8
+#define PREFERRED_MAX_PAGECACHE_ORDER 8
#endif
+/*
+ * xas_split_alloc() does not support arbitrary orders. This implies no
+ * 512MB THP on ARM64 with 64KB base page size.
+ */
+#define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1)
+#define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER)
+
/**
* mapping_set_large_folios() - Indicate the file supports large folios.
* @mapping: The file.
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index da3a6c30f6d2..b4a4eb6c8866 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -7,11 +7,12 @@
#ifndef __LINUX_PCS_XPCS_H
#define __LINUX_PCS_XPCS_H
+#include <linux/clk.h>
+#include <linux/fwnode.h>
+#include <linux/mdio.h>
#include <linux/phy.h>
#include <linux/phylink.h>
-
-#define NXP_SJA1105_XPCS_ID 0x00000010
-#define NXP_SJA1110_XPCS_ID 0x00000020
+#include <linux/types.h>
/* AN mode */
#define DW_AN_C73 1
@@ -20,20 +21,46 @@
#define DW_AN_C37_1000BASEX 4
#define DW_10GBASER 5
-/* device vendor OUI */
-#define DW_OUI_WX 0x0018fc80
+struct dw_xpcs_desc;
+
+enum dw_xpcs_pcs_id {
+ DW_XPCS_ID_NATIVE = 0,
+ NXP_SJA1105_XPCS_ID = 0x00000010,
+ NXP_SJA1110_XPCS_ID = 0x00000020,
+ DW_XPCS_ID = 0x7996ced0,
+ DW_XPCS_ID_MASK = 0xffffffff,
+};
-/* dev_flag */
-#define DW_DEV_TXGBE BIT(0)
+enum dw_xpcs_pma_id {
+ DW_XPCS_PMA_ID_NATIVE = 0,
+ DW_XPCS_PMA_GEN1_3G_ID,
+ DW_XPCS_PMA_GEN2_3G_ID,
+ DW_XPCS_PMA_GEN2_6G_ID,
+ DW_XPCS_PMA_GEN4_3G_ID,
+ DW_XPCS_PMA_GEN4_6G_ID,
+ DW_XPCS_PMA_GEN5_10G_ID,
+ DW_XPCS_PMA_GEN5_12G_ID,
+ WX_TXGBE_XPCS_PMA_10G_ID = 0x0018fc80,
+};
-struct xpcs_id;
+struct dw_xpcs_info {
+ u32 pcs;
+ u32 pma;
+};
+
+enum dw_xpcs_clock {
+ DW_XPCS_CORE_CLK,
+ DW_XPCS_PAD_CLK,
+ DW_XPCS_NUM_CLKS,
+};
struct dw_xpcs {
+ struct dw_xpcs_info info;
+ const struct dw_xpcs_desc *desc;
struct mdio_device *mdiodev;
- const struct xpcs_id *id;
+ struct clk_bulk_data clks[DW_XPCS_NUM_CLKS];
struct phylink_pcs pcs;
phy_interface_t interface;
- int dev_flag;
};
int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface);
@@ -46,6 +73,8 @@ int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns,
int enable);
struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr,
phy_interface_t interface);
+struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode,
+ phy_interface_t interface);
void xpcs_destroy(struct dw_xpcs *xpcs);
#endif /* __LINUX_PCS_XPCS_H */
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 205fccfc0f60..bd68f9d8e74f 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1126,7 +1126,7 @@ struct phy_driver {
u8 index, enum led_brightness value);
/**
- * @led_blink_set: Set a PHY LED brightness. Index indicates
+ * @led_blink_set: Set a PHY LED blinking. Index indicates
* which of the PHYs led should be configured to blink. Delays
* are in milliseconds and if both are zero then a sensible
* default should be chosen. The call should adjust the
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 40afab23881a..65c5184470f1 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -60,9 +60,6 @@ static inline const char *printk_skip_headers(const char *buffer)
#define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT
#define CONSOLE_LOGLEVEL_QUIET CONFIG_CONSOLE_LOGLEVEL_QUIET
-int add_preferred_console_match(const char *match, const char *name,
- const short idx);
-
extern int console_printk[];
#define console_loglevel (console_printk[0])
diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h
index 6eec24ffa866..591a53e082e6 100644
--- a/include/linux/pse-pd/pse.h
+++ b/include/linux/pse-pd/pse.h
@@ -9,6 +9,9 @@
#include <linux/list.h>
#include <uapi/linux/ethtool.h>
+/* Maximum current in uA according to IEEE 802.3-2022 Table 145-1 */
+#define MAX_PI_CURRENT 1920000
+
struct phy_device;
struct pse_controller_dev;
@@ -36,12 +39,29 @@ struct pse_control_config {
* functions. IEEE 802.3-2022 30.9.1.1.2 aPSEAdminState
* @c33_pw_status: power detection status of the PSE.
* IEEE 802.3-2022 30.9.1.1.5 aPSEPowerDetectionStatus:
+ * @c33_pw_class: detected class of a powered PD
+ * IEEE 802.3-2022 30.9.1.1.8 aPSEPowerClassification
+ * @c33_actual_pw: power currently delivered by the PSE in mW
+ * IEEE 802.3-2022 30.9.1.1.23 aPSEActualPower
+ * @c33_ext_state_info: extended state information of the PSE
+ * @c33_avail_pw_limit: available power limit of the PSE in mW
+ * IEEE 802.3-2022 145.2.5.4 pse_avail_pwr
+ * @c33_pw_limit_ranges: supported power limit configuration range. The driver
+ * is in charge of the memory allocation.
+ * @c33_pw_limit_nb_ranges: number of supported power limit configuration
+ * ranges
*/
struct pse_control_status {
enum ethtool_podl_pse_admin_state podl_admin_state;
enum ethtool_podl_pse_pw_d_status podl_pw_status;
enum ethtool_c33_pse_admin_state c33_admin_state;
enum ethtool_c33_pse_pw_d_status c33_pw_status;
+ u32 c33_pw_class;
+ u32 c33_actual_pw;
+ struct ethtool_c33_pse_ext_state_info c33_ext_state_info;
+ u32 c33_avail_pw_limit;
+ struct ethtool_c33_pse_pw_limit_range *c33_pw_limit_ranges;
+ u32 c33_pw_limit_nb_ranges;
};
/**
@@ -53,6 +73,14 @@ struct pse_control_status {
* May also return negative errno.
* @pi_enable: Configure the PSE PI as enabled.
* @pi_disable: Configure the PSE PI as disabled.
+ * @pi_get_voltage: Return voltage similarly to get_voltage regulator
+ * callback.
+ * @pi_get_current_limit: Get the configured current limit similarly to
+ * get_current_limit regulator callback.
+ * @pi_set_current_limit: Configure the current limit similarly to
+ * set_current_limit regulator callback.
+ * Should not return an error in case of MAX_PI_CURRENT
+ * current value set.
*/
struct pse_controller_ops {
int (*ethtool_get_status)(struct pse_controller_dev *pcdev,
@@ -62,6 +90,11 @@ struct pse_controller_ops {
int (*pi_is_enabled)(struct pse_controller_dev *pcdev, int id);
int (*pi_enable)(struct pse_controller_dev *pcdev, int id);
int (*pi_disable)(struct pse_controller_dev *pcdev, int id);
+ int (*pi_get_voltage)(struct pse_controller_dev *pcdev, int id);
+ int (*pi_get_current_limit)(struct pse_controller_dev *pcdev,
+ int id);
+ int (*pi_set_current_limit)(struct pse_controller_dev *pcdev,
+ int id, int max_uA);
};
struct module;
@@ -148,6 +181,11 @@ int pse_ethtool_get_status(struct pse_control *psec,
int pse_ethtool_set_config(struct pse_control *psec,
struct netlink_ext_ack *extack,
const struct pse_control_config *config);
+int pse_ethtool_set_pw_limit(struct pse_control *psec,
+ struct netlink_ext_ack *extack,
+ const unsigned int pw_limit);
+int pse_ethtool_get_pw_limit(struct pse_control *psec,
+ struct netlink_ext_ack *extack);
bool pse_has_podl(struct pse_control *psec);
bool pse_has_c33(struct pse_control *psec);
@@ -177,6 +215,19 @@ static inline int pse_ethtool_set_config(struct pse_control *psec,
return -EOPNOTSUPP;
}
+static inline int pse_ethtool_set_pw_limit(struct pse_control *psec,
+ struct netlink_ext_ack *extack,
+ const unsigned int pw_limit)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int pse_ethtool_get_pw_limit(struct pse_control *psec,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
static inline bool pse_has_podl(struct pse_control *psec)
{
return false;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5ff5e65a4627..d07d3645d2d5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2198,13 +2198,13 @@ static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); }
extern void sched_set_stop_task(int cpu, struct task_struct *stop);
#ifdef CONFIG_MEM_ALLOC_PROFILING
-static inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag)
+static __always_inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag)
{
swap(current->alloc_tag, tag);
return tag;
}
-static inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old)
+static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old)
{
#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n");
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 8cb65f50e830..aea25eef9a1a 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -811,8 +811,7 @@ enum UART_TX_FLAGS {
if (pending < WAKEUP_CHARS) { \
uart_write_wakeup(__port); \
\
- if (!((flags) & UART_TX_NOSTOP) && pending == 0 && \
- __port->ops->tx_empty(__port)) \
+ if (!((flags) & UART_TX_NOSTOP) && pending == 0) \
__port->ops->stop_tx(__port); \
} \
\
@@ -852,6 +851,24 @@ enum UART_TX_FLAGS {
})
/**
+ * uart_port_tx_limited_flags -- transmit helper for uart_port with count limiting with flags
+ * @port: uart port
+ * @ch: variable to store a character to be written to the HW
+ * @flags: %UART_TX_NOSTOP or similar
+ * @count: a limit of characters to send
+ * @tx_ready: can HW accept more data function
+ * @put_char: function to write a character
+ * @tx_done: function to call after the loop is done
+ *
+ * See uart_port_tx_limited() for more details.
+ */
+#define uart_port_tx_limited_flags(port, ch, flags, count, tx_ready, put_char, tx_done) ({ \
+ unsigned int __count = (count); \
+ __uart_port_tx(port, ch, flags, tx_ready, put_char, tx_done, __count, \
+ __count--); \
+})
+
+/**
* uart_port_tx -- transmit helper for uart_port
* @port: uart port
* @ch: variable to store a character to be written to the HW
diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index a45da7eef9a2..b14be59550e3 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -284,6 +284,12 @@ enum {
SFF8024_ID_QSFP_8438 = 0x0c,
SFF8024_ID_QSFP_8436_8636 = 0x0d,
SFF8024_ID_QSFP28_8636 = 0x11,
+ SFF8024_ID_QSFP_DD = 0x18,
+ SFF8024_ID_OSFP = 0x19,
+ SFF8024_ID_DSFP = 0x1B,
+ SFF8024_ID_QSFP_PLUS_CMIS = 0x1E,
+ SFF8024_ID_SFP_DD_CMIS = 0x1F,
+ SFF8024_ID_SFP_PLUS_CMIS = 0x20,
SFF8024_ENCODING_UNSPEC = 0x00,
SFF8024_ENCODING_8B10B = 0x01,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f4cda3fbdb75..9c29bdd5596d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1703,6 +1703,9 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb, struct iov_iter *from,
size_t length);
+int zerocopy_fill_skb_from_iter(struct sk_buff *skb,
+ struct iov_iter *from, size_t length);
+
static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb,
struct msghdr *msg, int len)
{
diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h
index 11f0a4063403..16c241a23472 100644
--- a/include/linux/skbuff_ref.h
+++ b/include/linux/skbuff_ref.h
@@ -32,13 +32,13 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f)
__skb_frag_ref(&skb_shinfo(skb)->frags[f]);
}
-bool napi_pp_put_page(struct page *page);
+bool napi_pp_put_page(netmem_ref netmem);
static inline void
skb_page_unref(struct page *page, bool recycle)
{
#ifdef CONFIG_PAGE_POOL
- if (recycle && napi_pp_put_page(page))
+ if (recycle && napi_pp_put_page(page_to_netmem(page)))
return;
#endif
put_page(page);
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 89d16b90370b..2a1ff91d1914 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -76,7 +76,7 @@ struct msghdr {
__kernel_size_t msg_controllen; /* ancillary data buffer length */
struct kiocb *msg_iocb; /* ptr to iocb for async requests */
struct ubuf_info *msg_ubuf;
- int (*sg_from_iter)(struct sock *sk, struct sk_buff *skb,
+ int (*sg_from_iter)(struct sk_buff *skb,
struct iov_iter *from, size_t length);
};
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 9c54f82901a1..84e13bd5df28 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -82,7 +82,7 @@ struct stmmac_priv;
struct stmmac_mdio_bus_data {
unsigned int phy_mask;
- unsigned int has_xpcs;
+ unsigned int pcs_mask;
unsigned int default_an_inband;
int *irqs;
int probed_phy_irq;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index bd450023b9a4..e685e93ba354 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -354,7 +354,8 @@ static inline swp_entry_t page_swap_entry(struct page *page)
}
/* linux/mm/workingset.c */
-bool workingset_test_recent(void *shadow, bool file, bool *workingset);
+bool workingset_test_recent(void *shadow, bool file, bool *workingset,
+ bool flush);
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
void workingset_refault(struct folio *folio, void *shadow);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 9104952d323d..fff820c3e93e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -322,13 +322,13 @@ asmlinkage long sys_io_pgetevents(aio_context_t ctx_id,
long nr,
struct io_event __user *events,
struct __kernel_timespec __user *timeout,
- const struct __aio_sigset *sig);
+ const struct __aio_sigset __user *sig);
asmlinkage long sys_io_pgetevents_time32(aio_context_t ctx_id,
long min_nr,
long nr,
struct io_event __user *events,
struct old_timespec32 __user *timeout,
- const struct __aio_sigset *sig);
+ const struct __aio_sigset __user *sig);
asmlinkage long sys_io_uring_setup(u32 entries,
struct io_uring_params __user *p);
asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit,
@@ -418,7 +418,7 @@ asmlinkage long sys_listmount(const struct mnt_id_req __user *req,
u64 __user *mnt_ids, size_t nr_mnt_ids,
unsigned int flags);
asmlinkage long sys_truncate(const char __user *path, long length);
-asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length);
+asmlinkage long sys_ftruncate(unsigned int fd, off_t length);
#if BITS_PER_LONG == 32
asmlinkage long sys_truncate64(const char __user *path, loff_t length);
asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length);
@@ -441,7 +441,7 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group);
asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
umode_t mode);
asmlinkage long sys_openat2(int dfd, const char __user *filename,
- struct open_how *how, size_t size);
+ struct open_how __user *how, size_t size);
asmlinkage long sys_close(unsigned int fd);
asmlinkage long sys_close_range(unsigned int fd, unsigned int max_fd,
unsigned int flags);
@@ -555,7 +555,7 @@ asmlinkage long sys_get_robust_list(int pid,
asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
size_t len);
-asmlinkage long sys_futex_waitv(struct futex_waitv *waiters,
+asmlinkage long sys_futex_waitv(struct futex_waitv __user *waiters,
unsigned int nr_futexes, unsigned int flags,
struct __kernel_timespec __user *timeout, clockid_t clockid);
@@ -859,9 +859,15 @@ asmlinkage long sys_prlimit64(pid_t pid, unsigned int resource,
const struct rlimit64 __user *new_rlim,
struct rlimit64 __user *old_rlim);
asmlinkage long sys_fanotify_init(unsigned int flags, unsigned int event_f_flags);
+#if defined(CONFIG_ARCH_SPLIT_ARG64)
+asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags,
+ unsigned int mask_1, unsigned int mask_2,
+ int dfd, const char __user * pathname);
+#else
asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags,
u64 mask, int fd,
const char __user *pathname);
+#endif
asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name,
struct file_handle __user *handle,
int __user *mnt_id, int flag);
@@ -907,7 +913,7 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
asmlinkage long sys_getrandom(char __user *buf, size_t count,
unsigned int flags);
asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags);
-asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size);
+asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size);
asmlinkage long sys_execveat(int dfd, const char __user *filename,
const char __user *const __user *argv,
const char __user *const __user *envp, int flags);
@@ -960,11 +966,11 @@ asmlinkage long sys_cachestat(unsigned int fd,
struct cachestat_range __user *cstat_range,
struct cachestat __user *cstat, unsigned int flags);
asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags);
-asmlinkage long sys_lsm_get_self_attr(unsigned int attr, struct lsm_ctx *ctx,
- u32 *size, u32 flags);
-asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx *ctx,
+asmlinkage long sys_lsm_get_self_attr(unsigned int attr, struct lsm_ctx __user *ctx,
+ u32 __user *size, u32 flags);
+asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx __user *ctx,
u32 size, u32 flags);
-asmlinkage long sys_lsm_list_modules(u64 *ids, u32 *size, u32 flags);
+asmlinkage long sys_lsm_list_modules(u64 __user *ids, u32 __user *size, u32 flags);
/*
* Architecture-specific system calls
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 21a67dc9efe8..e93ee8d936a9 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -490,9 +490,16 @@ static inline void tpm_buf_append_empty_auth(struct tpm_buf *buf, u32 handle)
{
}
#endif
+
+static inline struct tpm2_auth *tpm2_chip_auth(struct tpm_chip *chip)
+{
#ifdef CONFIG_TCG_TPM2_HMAC
+ return chip->auth;
+#else
+ return NULL;
+#endif
+}
-int tpm2_start_auth_session(struct tpm_chip *chip);
void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
u32 handle, u8 *name);
void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
@@ -504,9 +511,27 @@ static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip,
u8 *passphrase,
int passphraselen)
{
- tpm_buf_append_hmac_session(chip, buf, attributes, passphrase,
- passphraselen);
+ struct tpm_header *head;
+ int offset;
+
+ if (tpm2_chip_auth(chip)) {
+ tpm_buf_append_hmac_session(chip, buf, attributes, passphrase, passphraselen);
+ } else {
+ offset = buf->handles * 4 + TPM_HEADER_SIZE;
+ head = (struct tpm_header *)buf->data;
+
+ /*
+ * If the only sessions are optional, the command tag must change to
+ * TPM2_ST_NO_SESSIONS.
+ */
+ if (tpm_buf_length(buf) == offset)
+ head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS);
+ }
}
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+
+int tpm2_start_auth_session(struct tpm_chip *chip);
void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf);
int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf,
int rc);
@@ -521,56 +546,6 @@ static inline int tpm2_start_auth_session(struct tpm_chip *chip)
static inline void tpm2_end_auth_session(struct tpm_chip *chip)
{
}
-static inline void tpm_buf_append_name(struct tpm_chip *chip,
- struct tpm_buf *buf,
- u32 handle, u8 *name)
-{
- tpm_buf_append_u32(buf, handle);
- /* count the number of handles in the upper bits of flags */
- buf->handles++;
-}
-static inline void tpm_buf_append_hmac_session(struct tpm_chip *chip,
- struct tpm_buf *buf,
- u8 attributes, u8 *passphrase,
- int passphraselen)
-{
- /* offset tells us where the sessions area begins */
- int offset = buf->handles * 4 + TPM_HEADER_SIZE;
- u32 len = 9 + passphraselen;
-
- if (tpm_buf_length(buf) != offset) {
- /* not the first session so update the existing length */
- len += get_unaligned_be32(&buf->data[offset]);
- put_unaligned_be32(len, &buf->data[offset]);
- } else {
- tpm_buf_append_u32(buf, len);
- }
- /* auth handle */
- tpm_buf_append_u32(buf, TPM2_RS_PW);
- /* nonce */
- tpm_buf_append_u16(buf, 0);
- /* attributes */
- tpm_buf_append_u8(buf, 0);
- /* passphrase */
- tpm_buf_append_u16(buf, passphraselen);
- tpm_buf_append(buf, passphrase, passphraselen);
-}
-static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip,
- struct tpm_buf *buf,
- u8 attributes,
- u8 *passphrase,
- int passphraselen)
-{
- int offset = buf->handles * 4 + TPM_HEADER_SIZE;
- struct tpm_header *head = (struct tpm_header *) buf->data;
-
- /*
- * if the only sessions are optional, the command tag
- * must change to TPM2_ST_NO_SESSIONS
- */
- if (tpm_buf_length(buf) == offset)
- head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS);
-}
static inline void tpm_buf_fill_hmac_session(struct tpm_chip *chip,
struct tpm_buf *buf)
{
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index fe932ca3bc8c..e372a88e8c3f 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -324,6 +324,17 @@ enum {
* claim to support it.
*/
HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE,
+
+ /*
+ * When this quirk is set, the reserved bits of Primary/Secondary_PHY
+ * inside the LE Extended Advertising Report events are discarded.
+ * This is required for some Apple/Broadcom controllers which
+ * abuse these reserved bits for unrelated flags.
+ *
+ * This quirk can be set before hci_register_dev is called or
+ * during the hdev->setup vendor callback.
+ */
+ HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY,
};
/* HCI device flags */
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 6a9d063e9f47..534c3386e714 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -38,6 +38,8 @@ int __hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen,
int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
const void *param, u8 event, u32 timeout,
struct sock *sk);
+int hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen,
+ const void *param, u32 timeout);
void hci_cmd_sync_init(struct hci_dev *hdev);
void hci_cmd_sync_clear(struct hci_dev *hdev);
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e78ccbe38d6d..0a04eaf5343c 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -398,7 +398,7 @@ enum ieee80211_bss_change {
BSS_CHANGED_HE_OBSS_PD = 1<<28,
BSS_CHANGED_HE_BSS_COLOR = 1<<29,
BSS_CHANGED_FILS_DISCOVERY = 1<<30,
- BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31,
+ BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = BIT_ULL(31),
BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33),
BSS_CHANGED_MLD_TTLM = BIT_ULL(34),
BSS_CHANGED_TPE = BIT_ULL(35),
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 9abb7ee40d72..b63d53bb9dd6 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -305,11 +305,26 @@ struct flow_ports {
__be16 source, dest;
};
+struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev);
+int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable,
+ struct net_device *dev,
+ enum flow_block_command cmd);
+
unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state);
unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state);
+#if (IS_BUILTIN(CONFIG_NF_FLOW_TABLE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
+ (IS_MODULE(CONFIG_NF_FLOW_TABLE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
+extern int nf_flow_register_bpf(void);
+#else
+static inline int nf_flow_register_bpf(void)
+{
+ return 0;
+}
+#endif
+
#define MODULE_ALIAS_NF_FLOWTABLE(family) \
MODULE_ALIAS("nf-flowtable-" __stringify(family))
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 188d41da1a40..1bfdd16890fa 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1176,7 +1176,7 @@ static inline bool nft_chain_is_bound(struct nft_chain *chain)
int nft_chain_add(struct nft_table *table, struct nft_chain *chain);
void nft_chain_del(struct nft_chain *chain);
-void nf_tables_chain_destroy(struct nft_ctx *ctx);
+void nf_tables_chain_destroy(struct nft_chain *chain);
struct nft_stats {
u64 bytes;
@@ -1613,41 +1613,67 @@ static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext)
}
/**
- * struct nft_trans - nf_tables object update in transaction
+ * struct nft_trans - nf_tables object update in transaction
*
- * @list: used internally
- * @binding_list: list of objects with possible bindings
- * @msg_type: message type
- * @put_net: ctx->net needs to be put
- * @ctx: transaction context
- * @data: internal information related to the transaction
+ * @list: used internally
+ * @net: struct net
+ * @table: struct nft_table the object resides in
+ * @msg_type: message type
+ * @seq: netlink sequence number
+ * @flags: modifiers to new request
+ * @report: notify via unicast netlink message
+ * @put_net: net needs to be put
+ *
+ * This is the information common to all objects in the transaction,
+ * this must always be the first member of derived sub-types.
*/
struct nft_trans {
struct list_head list;
- struct list_head binding_list;
+ struct net *net;
+ struct nft_table *table;
int msg_type;
- bool put_net;
- struct nft_ctx ctx;
- char data[];
+ u32 seq;
+ u16 flags;
+ u8 report:1;
+ u8 put_net:1;
+};
+
+/**
+ * struct nft_trans_binding - nf_tables object with binding support in transaction
+ * @nft_trans: base structure, MUST be first member
+ * @binding_list: list of objects with possible bindings
+ *
+ * This is the base type used by objects that can be bound to a chain.
+ */
+struct nft_trans_binding {
+ struct nft_trans nft_trans;
+ struct list_head binding_list;
};
struct nft_trans_rule {
+ struct nft_trans nft_trans;
struct nft_rule *rule;
+ struct nft_chain *chain;
struct nft_flow_rule *flow;
u32 rule_id;
bool bound;
};
-#define nft_trans_rule(trans) \
- (((struct nft_trans_rule *)trans->data)->rule)
-#define nft_trans_flow_rule(trans) \
- (((struct nft_trans_rule *)trans->data)->flow)
-#define nft_trans_rule_id(trans) \
- (((struct nft_trans_rule *)trans->data)->rule_id)
-#define nft_trans_rule_bound(trans) \
- (((struct nft_trans_rule *)trans->data)->bound)
+#define nft_trans_container_rule(trans) \
+ container_of(trans, struct nft_trans_rule, nft_trans)
+#define nft_trans_rule(trans) \
+ nft_trans_container_rule(trans)->rule
+#define nft_trans_flow_rule(trans) \
+ nft_trans_container_rule(trans)->flow
+#define nft_trans_rule_id(trans) \
+ nft_trans_container_rule(trans)->rule_id
+#define nft_trans_rule_bound(trans) \
+ nft_trans_container_rule(trans)->bound
+#define nft_trans_rule_chain(trans) \
+ nft_trans_container_rule(trans)->chain
struct nft_trans_set {
+ struct nft_trans_binding nft_trans_binding;
struct nft_set *set;
u32 set_id;
u32 gc_int;
@@ -1657,100 +1683,117 @@ struct nft_trans_set {
u32 size;
};
-#define nft_trans_set(trans) \
- (((struct nft_trans_set *)trans->data)->set)
-#define nft_trans_set_id(trans) \
- (((struct nft_trans_set *)trans->data)->set_id)
-#define nft_trans_set_bound(trans) \
- (((struct nft_trans_set *)trans->data)->bound)
-#define nft_trans_set_update(trans) \
- (((struct nft_trans_set *)trans->data)->update)
-#define nft_trans_set_timeout(trans) \
- (((struct nft_trans_set *)trans->data)->timeout)
-#define nft_trans_set_gc_int(trans) \
- (((struct nft_trans_set *)trans->data)->gc_int)
-#define nft_trans_set_size(trans) \
- (((struct nft_trans_set *)trans->data)->size)
+#define nft_trans_container_set(t) \
+ container_of(t, struct nft_trans_set, nft_trans_binding.nft_trans)
+#define nft_trans_set(trans) \
+ nft_trans_container_set(trans)->set
+#define nft_trans_set_id(trans) \
+ nft_trans_container_set(trans)->set_id
+#define nft_trans_set_bound(trans) \
+ nft_trans_container_set(trans)->bound
+#define nft_trans_set_update(trans) \
+ nft_trans_container_set(trans)->update
+#define nft_trans_set_timeout(trans) \
+ nft_trans_container_set(trans)->timeout
+#define nft_trans_set_gc_int(trans) \
+ nft_trans_container_set(trans)->gc_int
+#define nft_trans_set_size(trans) \
+ nft_trans_container_set(trans)->size
struct nft_trans_chain {
+ struct nft_trans_binding nft_trans_binding;
struct nft_chain *chain;
- bool update;
char *name;
struct nft_stats __percpu *stats;
u8 policy;
+ bool update;
bool bound;
u32 chain_id;
struct nft_base_chain *basechain;
struct list_head hook_list;
};
-#define nft_trans_chain(trans) \
- (((struct nft_trans_chain *)trans->data)->chain)
-#define nft_trans_chain_update(trans) \
- (((struct nft_trans_chain *)trans->data)->update)
-#define nft_trans_chain_name(trans) \
- (((struct nft_trans_chain *)trans->data)->name)
-#define nft_trans_chain_stats(trans) \
- (((struct nft_trans_chain *)trans->data)->stats)
-#define nft_trans_chain_policy(trans) \
- (((struct nft_trans_chain *)trans->data)->policy)
-#define nft_trans_chain_bound(trans) \
- (((struct nft_trans_chain *)trans->data)->bound)
-#define nft_trans_chain_id(trans) \
- (((struct nft_trans_chain *)trans->data)->chain_id)
-#define nft_trans_basechain(trans) \
- (((struct nft_trans_chain *)trans->data)->basechain)
-#define nft_trans_chain_hooks(trans) \
- (((struct nft_trans_chain *)trans->data)->hook_list)
+#define nft_trans_container_chain(t) \
+ container_of(t, struct nft_trans_chain, nft_trans_binding.nft_trans)
+#define nft_trans_chain(trans) \
+ nft_trans_container_chain(trans)->chain
+#define nft_trans_chain_update(trans) \
+ nft_trans_container_chain(trans)->update
+#define nft_trans_chain_name(trans) \
+ nft_trans_container_chain(trans)->name
+#define nft_trans_chain_stats(trans) \
+ nft_trans_container_chain(trans)->stats
+#define nft_trans_chain_policy(trans) \
+ nft_trans_container_chain(trans)->policy
+#define nft_trans_chain_bound(trans) \
+ nft_trans_container_chain(trans)->bound
+#define nft_trans_chain_id(trans) \
+ nft_trans_container_chain(trans)->chain_id
+#define nft_trans_basechain(trans) \
+ nft_trans_container_chain(trans)->basechain
+#define nft_trans_chain_hooks(trans) \
+ nft_trans_container_chain(trans)->hook_list
struct nft_trans_table {
+ struct nft_trans nft_trans;
bool update;
};
-#define nft_trans_table_update(trans) \
- (((struct nft_trans_table *)trans->data)->update)
+#define nft_trans_container_table(trans) \
+ container_of(trans, struct nft_trans_table, nft_trans)
+#define nft_trans_table_update(trans) \
+ nft_trans_container_table(trans)->update
struct nft_trans_elem {
+ struct nft_trans nft_trans;
struct nft_set *set;
struct nft_elem_priv *elem_priv;
bool bound;
};
-#define nft_trans_elem_set(trans) \
- (((struct nft_trans_elem *)trans->data)->set)
-#define nft_trans_elem_priv(trans) \
- (((struct nft_trans_elem *)trans->data)->elem_priv)
-#define nft_trans_elem_set_bound(trans) \
- (((struct nft_trans_elem *)trans->data)->bound)
+#define nft_trans_container_elem(t) \
+ container_of(t, struct nft_trans_elem, nft_trans)
+#define nft_trans_elem_set(trans) \
+ nft_trans_container_elem(trans)->set
+#define nft_trans_elem_priv(trans) \
+ nft_trans_container_elem(trans)->elem_priv
+#define nft_trans_elem_set_bound(trans) \
+ nft_trans_container_elem(trans)->bound
struct nft_trans_obj {
+ struct nft_trans nft_trans;
struct nft_object *obj;
struct nft_object *newobj;
bool update;
};
-#define nft_trans_obj(trans) \
- (((struct nft_trans_obj *)trans->data)->obj)
-#define nft_trans_obj_newobj(trans) \
- (((struct nft_trans_obj *)trans->data)->newobj)
-#define nft_trans_obj_update(trans) \
- (((struct nft_trans_obj *)trans->data)->update)
+#define nft_trans_container_obj(t) \
+ container_of(t, struct nft_trans_obj, nft_trans)
+#define nft_trans_obj(trans) \
+ nft_trans_container_obj(trans)->obj
+#define nft_trans_obj_newobj(trans) \
+ nft_trans_container_obj(trans)->newobj
+#define nft_trans_obj_update(trans) \
+ nft_trans_container_obj(trans)->update
struct nft_trans_flowtable {
+ struct nft_trans nft_trans;
struct nft_flowtable *flowtable;
- bool update;
struct list_head hook_list;
u32 flags;
+ bool update;
};
-#define nft_trans_flowtable(trans) \
- (((struct nft_trans_flowtable *)trans->data)->flowtable)
-#define nft_trans_flowtable_update(trans) \
- (((struct nft_trans_flowtable *)trans->data)->update)
-#define nft_trans_flowtable_hooks(trans) \
- (((struct nft_trans_flowtable *)trans->data)->hook_list)
-#define nft_trans_flowtable_flags(trans) \
- (((struct nft_trans_flowtable *)trans->data)->flags)
+#define nft_trans_container_flowtable(t) \
+ container_of(t, struct nft_trans_flowtable, nft_trans)
+#define nft_trans_flowtable(trans) \
+ nft_trans_container_flowtable(trans)->flowtable
+#define nft_trans_flowtable_update(trans) \
+ nft_trans_container_flowtable(trans)->update
+#define nft_trans_flowtable_hooks(trans) \
+ nft_trans_container_flowtable(trans)->hook_list
+#define nft_trans_flowtable_flags(trans) \
+ nft_trans_container_flowtable(trans)->flags
#define NFT_TRANS_GC_BATCHCOUNT 256
@@ -1764,6 +1807,33 @@ struct nft_trans_gc {
struct rcu_head rcu;
};
+static inline void nft_ctx_update(struct nft_ctx *ctx,
+ const struct nft_trans *trans)
+{
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWRULE:
+ case NFT_MSG_DELRULE:
+ case NFT_MSG_DESTROYRULE:
+ ctx->chain = nft_trans_rule_chain(trans);
+ break;
+ case NFT_MSG_NEWCHAIN:
+ case NFT_MSG_DELCHAIN:
+ case NFT_MSG_DESTROYCHAIN:
+ ctx->chain = nft_trans_chain(trans);
+ break;
+ default:
+ ctx->chain = NULL;
+ break;
+ }
+
+ ctx->net = trans->net;
+ ctx->table = trans->table;
+ ctx->family = trans->table->family;
+ ctx->report = trans->report;
+ ctx->flags = trans->flags;
+ ctx->seq = trans->seq;
+}
+
struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
unsigned int gc_seq, gfp_t gfp);
void nft_trans_gc_destroy(struct nft_trans_gc *trans);
diff --git a/include/net/netmem.h b/include/net/netmem.h
index d8b810245c1d..46cc9b89ac79 100644
--- a/include/net/netmem.h
+++ b/include/net/netmem.h
@@ -38,4 +38,19 @@ static inline netmem_ref page_to_netmem(struct page *page)
return (__force netmem_ref)page;
}
+static inline int netmem_ref_count(netmem_ref netmem)
+{
+ return page_ref_count(netmem_to_page(netmem));
+}
+
+static inline unsigned long netmem_to_pfn(netmem_ref netmem)
+{
+ return page_to_pfn(netmem_to_page(netmem));
+}
+
+static inline netmem_ref netmem_compound_head(netmem_ref netmem)
+{
+ return page_to_netmem(compound_head(netmem_to_page(netmem)));
+}
+
#endif /* _NET_NETMEM_H */
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index 873631c79ab1..2b43a893c619 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -55,6 +55,8 @@
#include <linux/dma-mapping.h>
#include <net/page_pool/types.h>
+#include <net/net_debug.h>
+#include <net/netmem.h>
#ifdef CONFIG_PAGE_POOL_STATS
/* Deprecated driver-facing API, use netlink instead */
@@ -212,6 +214,11 @@ page_pool_get_dma_dir(const struct page_pool *pool)
return pool->p.dma_dir;
}
+static inline void page_pool_fragment_netmem(netmem_ref netmem, long nr)
+{
+ atomic_long_set(&netmem_to_page(netmem)->pp_ref_count, nr);
+}
+
/**
* page_pool_fragment_page() - split a fresh page into fragments
* @page: page to split
@@ -232,11 +239,12 @@ page_pool_get_dma_dir(const struct page_pool *pool)
*/
static inline void page_pool_fragment_page(struct page *page, long nr)
{
- atomic_long_set(&page->pp_ref_count, nr);
+ page_pool_fragment_netmem(page_to_netmem(page), nr);
}
-static inline long page_pool_unref_page(struct page *page, long nr)
+static inline long page_pool_unref_netmem(netmem_ref netmem, long nr)
{
+ struct page *page = netmem_to_page(netmem);
long ret;
/* If nr == pp_ref_count then we have cleared all remaining
@@ -279,15 +287,41 @@ static inline long page_pool_unref_page(struct page *page, long nr)
return ret;
}
+static inline long page_pool_unref_page(struct page *page, long nr)
+{
+ return page_pool_unref_netmem(page_to_netmem(page), nr);
+}
+
+static inline void page_pool_ref_netmem(netmem_ref netmem)
+{
+ atomic_long_inc(&netmem_to_page(netmem)->pp_ref_count);
+}
+
static inline void page_pool_ref_page(struct page *page)
{
- atomic_long_inc(&page->pp_ref_count);
+ page_pool_ref_netmem(page_to_netmem(page));
}
-static inline bool page_pool_is_last_ref(struct page *page)
+static inline bool page_pool_is_last_ref(netmem_ref netmem)
{
/* If page_pool_unref_page() returns 0, we were the last user */
- return page_pool_unref_page(page, 1) == 0;
+ return page_pool_unref_netmem(netmem, 1) == 0;
+}
+
+static inline void page_pool_put_netmem(struct page_pool *pool,
+ netmem_ref netmem,
+ unsigned int dma_sync_size,
+ bool allow_direct)
+{
+ /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
+ * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
+ */
+#ifdef CONFIG_PAGE_POOL
+ if (!page_pool_is_last_ref(netmem))
+ return;
+
+ page_pool_put_unrefed_netmem(pool, netmem, dma_sync_size, allow_direct);
+#endif
}
/**
@@ -308,15 +342,15 @@ static inline void page_pool_put_page(struct page_pool *pool,
unsigned int dma_sync_size,
bool allow_direct)
{
- /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
- * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
- */
-#ifdef CONFIG_PAGE_POOL
- if (!page_pool_is_last_ref(page))
- return;
+ page_pool_put_netmem(pool, page_to_netmem(page), dma_sync_size,
+ allow_direct);
+}
- page_pool_put_unrefed_page(pool, page, dma_sync_size, allow_direct);
-#endif
+static inline void page_pool_put_full_netmem(struct page_pool *pool,
+ netmem_ref netmem,
+ bool allow_direct)
+{
+ page_pool_put_netmem(pool, netmem, -1, allow_direct);
}
/**
@@ -331,7 +365,7 @@ static inline void page_pool_put_page(struct page_pool *pool,
static inline void page_pool_put_full_page(struct page_pool *pool,
struct page *page, bool allow_direct)
{
- page_pool_put_page(pool, page, -1, allow_direct);
+ page_pool_put_netmem(pool, page_to_netmem(page), -1, allow_direct);
}
/**
@@ -365,6 +399,18 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va,
page_pool_put_page(pool, virt_to_head_page(va), -1, allow_direct);
}
+static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem)
+{
+ struct page *page = netmem_to_page(netmem);
+
+ dma_addr_t ret = page->dma_addr;
+
+ if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA)
+ ret <<= PAGE_SHIFT;
+
+ return ret;
+}
+
/**
* page_pool_get_dma_addr() - Retrieve the stored DMA address.
* @page: page allocated from a page pool
@@ -374,16 +420,14 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va,
*/
static inline dma_addr_t page_pool_get_dma_addr(const struct page *page)
{
- dma_addr_t ret = page->dma_addr;
-
- if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA)
- ret <<= PAGE_SHIFT;
-
- return ret;
+ return page_pool_get_dma_addr_netmem(page_to_netmem((struct page *)page));
}
-static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+static inline bool page_pool_set_dma_addr_netmem(netmem_ref netmem,
+ dma_addr_t addr)
{
+ struct page *page = netmem_to_page(netmem);
+
if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) {
page->dma_addr = addr >> PAGE_SHIFT;
@@ -419,6 +463,11 @@ static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool,
page_pool_get_dma_dir(pool));
}
+static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+{
+ return page_pool_set_dma_addr_netmem(page_to_netmem(page), addr);
+}
+
static inline bool page_pool_put(struct page_pool *pool)
{
return refcount_dec_and_test(&pool->user_cnt);
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 7e8477057f3d..b70bcc14ceda 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -6,6 +6,7 @@
#include <linux/dma-direction.h>
#include <linux/ptr_ring.h>
#include <linux/types.h>
+#include <net/netmem.h>
#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA
* map/unmap
@@ -40,7 +41,7 @@
#define PP_ALLOC_CACHE_REFILL 64
struct pp_alloc_cache {
u32 count;
- struct page *cache[PP_ALLOC_CACHE_SIZE];
+ netmem_ref cache[PP_ALLOC_CACHE_SIZE];
};
/**
@@ -73,7 +74,7 @@ struct page_pool_params {
struct net_device *netdev;
unsigned int flags;
/* private: used by test code only */
- void (*init_callback)(struct page *page, void *arg);
+ void (*init_callback)(netmem_ref netmem, void *arg);
void *init_arg;
);
};
@@ -151,7 +152,7 @@ struct page_pool {
*/
__cacheline_group_begin(frag) __aligned(4 * sizeof(long));
long frag_users;
- struct page *frag_page;
+ netmem_ref frag_page;
unsigned int frag_offset;
__cacheline_group_end(frag);
@@ -220,8 +221,12 @@ struct page_pool {
};
struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
+netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp);
struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
unsigned int size, gfp_t gfp);
+netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool,
+ unsigned int *offset, unsigned int size,
+ gfp_t gfp);
struct page_pool *page_pool_create(const struct page_pool_params *params);
struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
int cpuid);
@@ -229,6 +234,7 @@ struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
struct xdp_mem_info;
#ifdef CONFIG_PAGE_POOL
+void page_pool_disable_direct_recycling(struct page_pool *pool);
void page_pool_destroy(struct page_pool *pool);
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
const struct xdp_mem_info *mem);
@@ -251,6 +257,9 @@ static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
}
#endif
+void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem,
+ unsigned int dma_sync_size,
+ bool allow_direct);
void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
unsigned int dma_sync_size,
bool allow_direct);
diff --git a/include/net/psample.h b/include/net/psample.h
index 0509d2d6be67..c52e9ebd88dd 100644
--- a/include/net/psample.h
+++ b/include/net/psample.h
@@ -24,7 +24,10 @@ struct psample_metadata {
u8 out_tc_valid:1,
out_tc_occ_valid:1,
latency_valid:1,
- unused:5;
+ rate_as_probability:1,
+ unused:4;
+ const u8 *user_cookie;
+ u32 user_cookie_len;
};
struct psample_group *psample_group_get(struct net *net, u32 group_num);
diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h
index 572d73fdcd5e..8034bf5febbe 100644
--- a/include/net/sctp/stream_sched.h
+++ b/include/net/sctp/stream_sched.h
@@ -35,10 +35,10 @@ struct sctp_sched_ops {
struct sctp_chunk *(*dequeue)(struct sctp_outq *q);
/* Called only if the chunk fit the packet */
void (*dequeue_done)(struct sctp_outq *q, struct sctp_chunk *chunk);
- /* Sched all chunks already enqueued */
- void (*sched_all)(struct sctp_stream *steam);
- /* Unched all chunks already enqueued */
- void (*unsched_all)(struct sctp_stream *steam);
+ /* Schedule all chunks already enqueued */
+ void (*sched_all)(struct sctp_stream *stream);
+ /* Unschedule all chunks already enqueued */
+ void (*unsched_all)(struct sctp_stream *stream);
};
int sctp_sched_set_sched(struct sctp_association *asoc,
diff --git a/include/net/tcx.h b/include/net/tcx.h
index 72a3e75e539f..5ce0ce9e0c02 100644
--- a/include/net/tcx.h
+++ b/include/net/tcx.h
@@ -13,7 +13,7 @@ struct mini_Qdisc;
struct tcx_entry {
struct mini_Qdisc __rcu *miniq;
struct bpf_mprog_bundle bundle;
- bool miniq_active;
+ u32 miniq_active;
struct rcu_head rcu;
};
@@ -125,11 +125,16 @@ static inline void tcx_skeys_dec(bool ingress)
tcx_dec();
}
-static inline void tcx_miniq_set_active(struct bpf_mprog_entry *entry,
- const bool active)
+static inline void tcx_miniq_inc(struct bpf_mprog_entry *entry)
{
ASSERT_RTNL();
- tcx_entry(entry)->miniq_active = active;
+ tcx_entry(entry)->miniq_active++;
+}
+
+static inline void tcx_miniq_dec(struct bpf_mprog_entry *entry)
+{
+ ASSERT_RTNL();
+ tcx_entry(entry)->miniq_active--;
}
static inline bool tcx_entry_is_active(struct bpf_mprog_entry *entry)
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 3d54de168a6d..bfe625b55d55 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -121,7 +121,7 @@ struct xsk_tx_metadata_ops {
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
-void __xsk_map_flush(void);
+void __xsk_map_flush(struct list_head *flush_list);
/**
* xsk_tx_metadata_to_compl - Save enough relevant metadata information
@@ -206,7 +206,7 @@ static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
return -EOPNOTSUPP;
}
-static inline void __xsk_map_flush(void)
+static inline void __xsk_map_flush(struct list_head *flush_list)
{
}
@@ -228,14 +228,4 @@ static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl,
}
#endif /* CONFIG_XDP_SOCKETS */
-
-#if defined(CONFIG_XDP_SOCKETS) && defined(CONFIG_DEBUG_NET)
-bool xsk_map_check_flush(void);
-#else
-static inline bool xsk_map_check_flush(void)
-{
- return false;
-}
-#endif
-
#endif /* _LINUX_XDP_SOCK_H */
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index a6190aa1b406..f1a73aa83fbb 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -35,12 +35,14 @@ enum fscache_volume_trace {
fscache_volume_get_cookie,
fscache_volume_get_create_work,
fscache_volume_get_hash_collision,
+ fscache_volume_get_withdraw,
fscache_volume_free,
fscache_volume_new_acquire,
fscache_volume_put_cookie,
fscache_volume_put_create_work,
fscache_volume_put_hash_collision,
fscache_volume_put_relinquish,
+ fscache_volume_put_withdraw,
fscache_volume_see_create_work,
fscache_volume_see_hash_wake,
fscache_volume_wait_create_work,
@@ -120,12 +122,14 @@ enum fscache_access_trace {
EM(fscache_volume_get_cookie, "GET cook ") \
EM(fscache_volume_get_create_work, "GET creat") \
EM(fscache_volume_get_hash_collision, "GET hcoll") \
+ EM(fscache_volume_get_withdraw, "GET withd") \
EM(fscache_volume_free, "FREE ") \
EM(fscache_volume_new_acquire, "NEW acq ") \
EM(fscache_volume_put_cookie, "PUT cook ") \
EM(fscache_volume_put_create_work, "PUT creat") \
EM(fscache_volume_put_hash_collision, "PUT hcoll") \
EM(fscache_volume_put_relinquish, "PUT relnq") \
+ EM(fscache_volume_put_withdraw, "PUT withd") \
EM(fscache_volume_see_create_work, "SEE creat") \
EM(fscache_volume_see_hash_wake, "SEE hwake") \
E_(fscache_volume_wait_create_work, "WAIT crea")
diff --git a/include/trace/events/page_pool.h b/include/trace/events/page_pool.h
index 6834356b2d2a..543e54e432a1 100644
--- a/include/trace/events/page_pool.h
+++ b/include/trace/events/page_pool.h
@@ -42,51 +42,53 @@ TRACE_EVENT(page_pool_release,
TRACE_EVENT(page_pool_state_release,
TP_PROTO(const struct page_pool *pool,
- const struct page *page, u32 release),
+ netmem_ref netmem, u32 release),
- TP_ARGS(pool, page, release),
+ TP_ARGS(pool, netmem, release),
TP_STRUCT__entry(
__field(const struct page_pool *, pool)
- __field(const struct page *, page)
+ __field(unsigned long, netmem)
__field(u32, release)
__field(unsigned long, pfn)
),
TP_fast_assign(
__entry->pool = pool;
- __entry->page = page;
+ __entry->netmem = (__force unsigned long)netmem;
__entry->release = release;
- __entry->pfn = page_to_pfn(page);
+ __entry->pfn = netmem_to_pfn(netmem);
),
- TP_printk("page_pool=%p page=%p pfn=0x%lx release=%u",
- __entry->pool, __entry->page, __entry->pfn, __entry->release)
+ TP_printk("page_pool=%p netmem=%p pfn=0x%lx release=%u",
+ __entry->pool, (void *)__entry->netmem,
+ __entry->pfn, __entry->release)
);
TRACE_EVENT(page_pool_state_hold,
TP_PROTO(const struct page_pool *pool,
- const struct page *page, u32 hold),
+ netmem_ref netmem, u32 hold),
- TP_ARGS(pool, page, hold),
+ TP_ARGS(pool, netmem, hold),
TP_STRUCT__entry(
__field(const struct page_pool *, pool)
- __field(const struct page *, page)
+ __field(unsigned long, netmem)
__field(u32, hold)
__field(unsigned long, pfn)
),
TP_fast_assign(
__entry->pool = pool;
- __entry->page = page;
+ __entry->netmem = (__force unsigned long)netmem;
__entry->hold = hold;
- __entry->pfn = page_to_pfn(page);
+ __entry->pfn = netmem_to_pfn(netmem);
),
- TP_printk("page_pool=%p page=%p pfn=0x%lx hold=%u",
- __entry->pool, __entry->page, __entry->pfn, __entry->hold)
+ TP_printk("page_pool=%p netmem=%p pfn=0x%lx hold=%u",
+ __entry->pool, (void *)__entry->netmem,
+ __entry->pfn, __entry->hold)
);
TRACE_EVENT(page_pool_update_nid,
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index d983c48a3b6a..d4cc26932ff4 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -737,7 +737,7 @@ __SC_COMP(__NR_pselect6_time64, sys_pselect6, compat_sys_pselect6_time64)
#define __NR_ppoll_time64 414
__SC_COMP(__NR_ppoll_time64, sys_ppoll, compat_sys_ppoll_time64)
#define __NR_io_pgetevents_time64 416
-__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents)
+__SC_COMP(__NR_io_pgetevents_time64, sys_io_pgetevents, compat_sys_io_pgetevents_time64)
#define __NR_recvmmsg_time64 417
__SC_COMP(__NR_recvmmsg_time64, sys_recvmmsg, compat_sys_recvmmsg_time64)
#define __NR_mq_timedsend_time64 418
diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
index aaed8e12ad0b..926b1deb1116 100644
--- a/include/uapi/drm/panthor_drm.h
+++ b/include/uapi/drm/panthor_drm.h
@@ -802,6 +802,9 @@ struct drm_panthor_queue_submit {
* Must be 64-bit/8-byte aligned (the size of a CS instruction)
*
* Can be zero if stream_addr is zero too.
+ *
+ * When the stream size is zero, the queue submit serves as a
+ * synchronization point.
*/
__u32 stream_size;
@@ -822,6 +825,8 @@ struct drm_panthor_queue_submit {
* ensure the GPU doesn't get garbage when reading the indirect command
* stream buffers. If you want the cache flush to happen
* unconditionally, pass a zero here.
+ *
+ * Ignored when stream_size is zero.
*/
__u32 latest_flush;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 25ea393cf084..35bcf52dbc65 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1425,6 +1425,8 @@ enum {
#define BPF_F_TEST_RUN_ON_CPU (1U << 0)
/* If set, XDP frames will be transmitted after processing */
#define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1)
+/* If set, apply CHECKSUM_COMPLETE to skb and validate the checksum */
+#define BPF_F_TEST_SKB_CHECKSUM_COMPLETE (1U << 2)
/* type for BPF_ENABLE_STATS */
enum bpf_stats_type {
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 8733a3117902..230110b97029 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -753,6 +753,197 @@ enum ethtool_module_power_mode {
};
/**
+ * enum ethtool_c33_pse_ext_state - groups of PSE extended states
+ * functions. IEEE 802.3-2022 33.2.4.4 Variables
+ *
+ * @ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION: Group of error_condition states
+ * @ETHTOOL_C33_PSE_EXT_STATE_MR_MPS_VALID: Group of mr_mps_valid states
+ * @ETHTOOL_C33_PSE_EXT_STATE_MR_PSE_ENABLE: Group of mr_pse_enable states
+ * @ETHTOOL_C33_PSE_EXT_STATE_OPTION_DETECT_TED: Group of option_detect_ted
+ * states
+ * @ETHTOOL_C33_PSE_EXT_STATE_OPTION_VPORT_LIM: Group of option_vport_lim states
+ * @ETHTOOL_C33_PSE_EXT_STATE_OVLD_DETECTED: Group of ovld_detected states
+ * @ETHTOOL_C33_PSE_EXT_STATE_PD_DLL_POWER_TYPE: Group of pd_dll_power_type
+ * states
+ * @ETHTOOL_C33_PSE_EXT_STATE_POWER_NOT_AVAILABLE: Group of power_not_available
+ * states
+ * @ETHTOOL_C33_PSE_EXT_STATE_SHORT_DETECTED: Group of short_detected states
+ */
+enum ethtool_c33_pse_ext_state {
+ ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION = 1,
+ ETHTOOL_C33_PSE_EXT_STATE_MR_MPS_VALID,
+ ETHTOOL_C33_PSE_EXT_STATE_MR_PSE_ENABLE,
+ ETHTOOL_C33_PSE_EXT_STATE_OPTION_DETECT_TED,
+ ETHTOOL_C33_PSE_EXT_STATE_OPTION_VPORT_LIM,
+ ETHTOOL_C33_PSE_EXT_STATE_OVLD_DETECTED,
+ ETHTOOL_C33_PSE_EXT_STATE_PD_DLL_POWER_TYPE,
+ ETHTOOL_C33_PSE_EXT_STATE_POWER_NOT_AVAILABLE,
+ ETHTOOL_C33_PSE_EXT_STATE_SHORT_DETECTED,
+};
+
+/**
+ * enum ethtool_c33_pse_ext_substate_mr_mps_valid - mr_mps_valid states
+ * functions. IEEE 802.3-2022 33.2.4.4 Variables
+ *
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_DETECTED_UNDERLOAD: Underload
+ * state
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_CONNECTION_OPEN: Port is not
+ * connected
+ *
+ * The PSE monitors either the DC or AC Maintain Power Signature
+ * (MPS, see 33.2.9.1). This variable indicates the presence or absence of
+ * a valid MPS.
+ */
+enum ethtool_c33_pse_ext_substate_mr_mps_valid {
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_DETECTED_UNDERLOAD = 1,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_CONNECTION_OPEN,
+};
+
+/**
+ * enum ethtool_c33_pse_ext_substate_error_condition - error_condition states
+ * functions. IEEE 802.3-2022 33.2.4.4 Variables
+ *
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_NON_EXISTING_PORT: Non-existing
+ * port number
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNDEFINED_PORT: Undefined port
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_INTERNAL_HW_FAULT: Internal
+ * hardware fault
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_COMM_ERROR_AFTER_FORCE_ON:
+ * Communication error after force on
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNKNOWN_PORT_STATUS: Unknown
+ * port status
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_TURN_OFF: Host
+ * crash turn off
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_FORCE_SHUTDOWN:
+ * Host crash force shutdown
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_CONFIG_CHANGE: Configuration
+ * change
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_DETECTED_OVER_TEMP: Over
+ * temperature detected
+ *
+ * error_condition is a variable indicating the status of
+ * implementation-specific fault conditions or optionally other system faults
+ * that prevent the PSE from meeting the specifications in Table 33–11 and that
+ * require the PSE not to source power. These error conditions are different
+ * from those monitored by the state diagrams in Figure 33–10.
+ */
+enum ethtool_c33_pse_ext_substate_error_condition {
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_NON_EXISTING_PORT = 1,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNDEFINED_PORT,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_INTERNAL_HW_FAULT,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_COMM_ERROR_AFTER_FORCE_ON,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNKNOWN_PORT_STATUS,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_TURN_OFF,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_FORCE_SHUTDOWN,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_CONFIG_CHANGE,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_DETECTED_OVER_TEMP,
+};
+
+/**
+ * enum ethtool_c33_pse_ext_substate_mr_pse_enable - mr_pse_enable states
+ * functions. IEEE 802.3-2022 33.2.4.4 Variables
+ *
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_PSE_ENABLE_DISABLE_PIN_ACTIVE: Disable
+ * pin active
+ *
+ * mr_pse_enable is control variable that selects PSE operation and test
+ * functions.
+ */
+enum ethtool_c33_pse_ext_substate_mr_pse_enable {
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_PSE_ENABLE_DISABLE_PIN_ACTIVE = 1,
+};
+
+/**
+ * enum ethtool_c33_pse_ext_substate_option_detect_ted - option_detect_ted
+ * states functions. IEEE 802.3-2022 33.2.4.4 Variables
+ *
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_DET_IN_PROCESS: Detection
+ * in process
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_CONNECTION_CHECK_ERROR:
+ * Connection check error
+ *
+ * option_detect_ted is a variable indicating if detection can be performed
+ * by the PSE during the ted_timer interval.
+ */
+enum ethtool_c33_pse_ext_substate_option_detect_ted {
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_DET_IN_PROCESS = 1,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_CONNECTION_CHECK_ERROR,
+};
+
+/**
+ * enum ethtool_c33_pse_ext_substate_option_vport_lim - option_vport_lim states
+ * functions. IEEE 802.3-2022 33.2.4.4 Variables
+ *
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_HIGH_VOLTAGE: Main supply
+ * voltage is high
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_LOW_VOLTAGE: Main supply
+ * voltage is low
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_VOLTAGE_INJECTION: Voltage
+ * injection into the port
+ *
+ * option_vport_lim is an optional variable indicates if VPSE is out of the
+ * operating range during normal operating state.
+ */
+enum ethtool_c33_pse_ext_substate_option_vport_lim {
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_HIGH_VOLTAGE = 1,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_LOW_VOLTAGE,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_VOLTAGE_INJECTION,
+};
+
+/**
+ * enum ethtool_c33_pse_ext_substate_ovld_detected - ovld_detected states
+ * functions. IEEE 802.3-2022 33.2.4.4 Variables
+ *
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OVLD_DETECTED_OVERLOAD: Overload state
+ *
+ * ovld_detected is a variable indicating if the PSE output current has been
+ * in an overload condition (see 33.2.7.6) for at least TCUT of a one-second
+ * sliding time.
+ */
+enum ethtool_c33_pse_ext_substate_ovld_detected {
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_OVLD_DETECTED_OVERLOAD = 1,
+};
+
+/**
+ * enum ethtool_c33_pse_ext_substate_power_not_available - power_not_available
+ * states functions. IEEE 802.3-2022 33.2.4.4 Variables
+ *
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_BUDGET_EXCEEDED: Power
+ * budget exceeded for the controller
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PORT_PW_LIMIT_EXCEEDS_CONTROLLER_BUDGET:
+ * Configured port power limit exceeded controller power budget
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PD_REQUEST_EXCEEDS_PORT_LIMIT:
+ * Power request from PD exceeds port limit
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_HW_PW_LIMIT: Power
+ * denied due to Hardware power limit
+ *
+ * power_not_available is a variable that is asserted in an
+ * implementation-dependent manner when the PSE is no longer capable of
+ * sourcing sufficient power to support the attached PD. Sufficient power
+ * is defined by classification; see 33.2.6.
+ */
+enum ethtool_c33_pse_ext_substate_power_not_available {
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_BUDGET_EXCEEDED = 1,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PORT_PW_LIMIT_EXCEEDS_CONTROLLER_BUDGET,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PD_REQUEST_EXCEEDS_PORT_LIMIT,
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_HW_PW_LIMIT,
+};
+
+/**
+ * enum ethtool_c33_pse_ext_substate_short_detected - short_detected states
+ * functions. IEEE 802.3-2022 33.2.4.4 Variables
+ *
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_SHORT_DETECTED_SHORT_CONDITION: Short
+ * condition was detected
+ *
+ * short_detected is a variable indicating if the PSE output current has been
+ * in a short circuit condition for TLIM within a sliding window (see 33.2.7.7).
+ */
+enum ethtool_c33_pse_ext_substate_short_detected {
+ ETHTOOL_C33_PSE_EXT_SUBSTATE_SHORT_DETECTED_SHORT_CONDITION = 1,
+};
+
+/**
* enum ethtool_pse_types - Types of PSE controller.
* @ETHTOOL_PSE_UNKNOWN: Type of PSE controller is unknown
* @ETHTOOL_PSE_PODL: PSE controller which support PoDL
@@ -878,6 +1069,24 @@ enum ethtool_mm_verify_status {
};
/**
+ * enum ethtool_module_fw_flash_status - plug-in module firmware flashing status
+ * @ETHTOOL_MODULE_FW_FLASH_STATUS_STARTED: The firmware flashing process has
+ * started.
+ * @ETHTOOL_MODULE_FW_FLASH_STATUS_IN_PROGRESS: The firmware flashing process
+ * is in progress.
+ * @ETHTOOL_MODULE_FW_FLASH_STATUS_COMPLETED: The firmware flashing process was
+ * completed successfully.
+ * @ETHTOOL_MODULE_FW_FLASH_STATUS_ERROR: The firmware flashing process was
+ * stopped due to an error.
+ */
+enum ethtool_module_fw_flash_status {
+ ETHTOOL_MODULE_FW_FLASH_STATUS_STARTED = 1,
+ ETHTOOL_MODULE_FW_FLASH_STATUS_IN_PROGRESS,
+ ETHTOOL_MODULE_FW_FLASH_STATUS_COMPLETED,
+ ETHTOOL_MODULE_FW_FLASH_STATUS_ERROR,
+};
+
+/**
* struct ethtool_gstrings - string set for data tagging
* @cmd: Command number = %ETHTOOL_GSTRINGS
* @string_set: String set ID; one of &enum ethtool_stringset
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index d15856c7e001..6d5bdcc67631 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -57,6 +57,7 @@ enum {
ETHTOOL_MSG_PLCA_GET_STATUS,
ETHTOOL_MSG_MM_GET,
ETHTOOL_MSG_MM_SET,
+ ETHTOOL_MSG_MODULE_FW_FLASH_ACT,
/* add new constants above here */
__ETHTOOL_MSG_USER_CNT,
@@ -109,6 +110,7 @@ enum {
ETHTOOL_MSG_PLCA_NTF,
ETHTOOL_MSG_MM_GET_REPLY,
ETHTOOL_MSG_MM_NTF,
+ ETHTOOL_MSG_MODULE_FW_FLASH_NTF,
/* add new constants above here */
__ETHTOOL_MSG_KERNEL_CNT,
@@ -929,6 +931,12 @@ enum {
/* Power Sourcing Equipment */
enum {
+ ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC,
+ ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, /* u32 */
+ ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, /* u32 */
+};
+
+enum {
ETHTOOL_A_PSE_UNSPEC,
ETHTOOL_A_PSE_HEADER, /* nest - _A_HEADER_* */
ETHTOOL_A_PODL_PSE_ADMIN_STATE, /* u32 */
@@ -937,6 +945,12 @@ enum {
ETHTOOL_A_C33_PSE_ADMIN_STATE, /* u32 */
ETHTOOL_A_C33_PSE_ADMIN_CONTROL, /* u32 */
ETHTOOL_A_C33_PSE_PW_D_STATUS, /* u32 */
+ ETHTOOL_A_C33_PSE_PW_CLASS, /* u32 */
+ ETHTOOL_A_C33_PSE_ACTUAL_PW, /* u32 */
+ ETHTOOL_A_C33_PSE_EXT_STATE, /* u32 */
+ ETHTOOL_A_C33_PSE_EXT_SUBSTATE, /* u32 */
+ ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, /* u32 */
+ ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES, /* nest - _C33_PSE_PW_LIMIT_* */
/* add new constants above here */
__ETHTOOL_A_PSE_CNT,
@@ -1018,6 +1032,23 @@ enum {
ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1)
};
+/* MODULE_FW_FLASH */
+
+enum {
+ ETHTOOL_A_MODULE_FW_FLASH_UNSPEC,
+ ETHTOOL_A_MODULE_FW_FLASH_HEADER, /* nest - _A_HEADER_* */
+ ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME, /* string */
+ ETHTOOL_A_MODULE_FW_FLASH_PASSWORD, /* u32 */
+ ETHTOOL_A_MODULE_FW_FLASH_STATUS, /* u32 */
+ ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG, /* string */
+ ETHTOOL_A_MODULE_FW_FLASH_DONE, /* uint */
+ ETHTOOL_A_MODULE_FW_FLASH_TOTAL, /* uint */
+
+ /* add new constants above here */
+ __ETHTOOL_A_MODULE_FW_FLASH_CNT,
+ ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1)
+};
+
/* generic netlink info */
#define ETHTOOL_GENL_NAME "ethtool"
#define ETHTOOL_GENL_VERSION 1
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index aa4094ca2444..639894ed1b97 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1376,7 +1376,7 @@ enum nft_secmark_attributes {
#define NFTA_SECMARK_MAX (__NFTA_SECMARK_MAX - 1)
/* Max security context length */
-#define NFT_SECMARK_CTX_MAXLEN 256
+#define NFT_SECMARK_CTX_MAXLEN 4096
/**
* enum nft_reject_types - nf_tables reject expression reject types
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index efc82c318fa2..3a701bd1f31b 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -649,7 +649,8 @@ enum ovs_flow_attr {
* Actions are passed as nested attributes.
*
* Executes the specified actions with the given probability on a per-packet
- * basis.
+ * basis. Nested actions will be able to access the probability value of the
+ * parent @OVS_ACTION_ATTR_SAMPLE.
*/
enum ovs_sample_attr {
OVS_SAMPLE_ATTR_UNSPEC,
@@ -914,6 +915,31 @@ struct check_pkt_len_arg {
};
#endif
+#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
+/**
+ * enum ovs_psample_attr - Attributes for %OVS_ACTION_ATTR_PSAMPLE
+ * action.
+ *
+ * @OVS_PSAMPLE_ATTR_GROUP: 32-bit number to identify the source of the
+ * sample.
+ * @OVS_PSAMPLE_ATTR_COOKIE: An optional variable-length binary cookie that
+ * contains user-defined metadata. The maximum length is
+ * OVS_PSAMPLE_COOKIE_MAX_SIZE bytes.
+ *
+ * Sends the packet to the psample multicast group with the specified group and
+ * cookie. It is possible to combine this action with the
+ * %OVS_ACTION_ATTR_TRUNC action to limit the size of the sample.
+ */
+enum ovs_psample_attr {
+ OVS_PSAMPLE_ATTR_GROUP = 1, /* u32 number. */
+ OVS_PSAMPLE_ATTR_COOKIE, /* Optional, user specified cookie. */
+
+ /* private: */
+ __OVS_PSAMPLE_ATTR_MAX
+};
+
+#define OVS_PSAMPLE_ATTR_MAX (__OVS_PSAMPLE_ATTR_MAX - 1)
+
/**
* enum ovs_action_attr - Action types.
*
@@ -966,6 +992,8 @@ struct check_pkt_len_arg {
* of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS
* argument.
* @OVS_ACTION_ATTR_DROP: Explicit drop action.
+ * @OVS_ACTION_ATTR_PSAMPLE: Send a sample of the packet to external observers
+ * via psample.
*
* Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
* fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -1004,6 +1032,7 @@ enum ovs_action_attr {
OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */
OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */
OVS_ACTION_ATTR_DROP, /* u32 error code. */
+ OVS_ACTION_ATTR_PSAMPLE, /* Nested OVS_PSAMPLE_ATTR_*. */
__OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted
* from userspace. */
diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h
index e585db5bf2d2..b765f0e81f20 100644
--- a/include/uapi/linux/psample.h
+++ b/include/uapi/linux/psample.h
@@ -8,7 +8,11 @@ enum {
PSAMPLE_ATTR_ORIGSIZE,
PSAMPLE_ATTR_SAMPLE_GROUP,
PSAMPLE_ATTR_GROUP_SEQ,
- PSAMPLE_ATTR_SAMPLE_RATE,
+ PSAMPLE_ATTR_SAMPLE_RATE, /* u32, ratio between observed and
+ * sampled packets or scaled probability
+ * if PSAMPLE_ATTR_SAMPLE_PROBABILITY
+ * is set.
+ */
PSAMPLE_ATTR_DATA,
PSAMPLE_ATTR_GROUP_REFCOUNT,
PSAMPLE_ATTR_TUNNEL,
@@ -19,6 +23,11 @@ enum {
PSAMPLE_ATTR_LATENCY, /* u64, nanoseconds */
PSAMPLE_ATTR_TIMESTAMP, /* u64, nanoseconds */
PSAMPLE_ATTR_PROTO, /* u16 */
+ PSAMPLE_ATTR_USER_COOKIE, /* binary, user provided data */
+ PSAMPLE_ATTR_SAMPLE_PROBABILITY,/* no argument, interpret rate in
+ * PSAMPLE_ATTR_SAMPLE_RATE as a
+ * probability scaled 0 - U32_MAX.
+ */
__PSAMPLE_ATTR_MAX
};
diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h
index 7cb4a172feed..927c735a5b0e 100644
--- a/include/uapi/linux/tcp_metrics.h
+++ b/include/uapi/linux/tcp_metrics.h
@@ -1,8 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/* tcp_metrics.h - TCP Metrics Interface */
-#ifndef _LINUX_TCP_METRICS_H
-#define _LINUX_TCP_METRICS_H
+#ifndef _UAPI_LINUX_TCP_METRICS_H
+#define _UAPI_LINUX_TCP_METRICS_H
#include <linux/types.h>
@@ -27,6 +27,22 @@ enum tcp_metric_index {
#define TCP_METRIC_MAX (__TCP_METRIC_MAX - 1)
+/* Re-define enum tcp_metric_index, again, using the values carried
+ * as netlink attribute types.
+ */
+enum {
+ TCP_METRICS_A_METRICS_RTT = 1,
+ TCP_METRICS_A_METRICS_RTTVAR,
+ TCP_METRICS_A_METRICS_SSTHRESH,
+ TCP_METRICS_A_METRICS_CWND,
+ TCP_METRICS_A_METRICS_REODERING,
+ TCP_METRICS_A_METRICS_RTT_US,
+ TCP_METRICS_A_METRICS_RTTVAR_US,
+
+ __TCP_METRICS_A_METRICS_MAX
+};
+#define TCP_METRICS_A_METRICS_MAX (__TCP_METRICS_A_METRICS_MAX - 1)
+
enum {
TCP_METRICS_ATTR_UNSPEC,
TCP_METRICS_ATTR_ADDR_IPV4, /* u32 */
@@ -58,4 +74,4 @@ enum {
#define TCP_METRICS_CMD_MAX (__TCP_METRICS_CMD_MAX - 1)
-#endif /* _LINUX_TCP_METRICS_H */
+#endif /* _UAPI_LINUX_TCP_METRICS_H */
diff --git a/include/uapi/linux/trace_mmap.h b/include/uapi/linux/trace_mmap.h
index bd1066754220..c102ef35d11e 100644
--- a/include/uapi/linux/trace_mmap.h
+++ b/include/uapi/linux/trace_mmap.h
@@ -43,6 +43,6 @@ struct trace_buffer_meta {
__u64 Reserved2;
};
-#define TRACE_MMAP_IOCTL_GET_READER _IO('T', 0x1)
+#define TRACE_MMAP_IOCTL_GET_READER _IO('R', 0x20)
#endif /* _TRACE_MMAP_H_ */
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 154b25b8a613..c326e2127dd4 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1259,8 +1259,8 @@ static void io_req_normal_work_add(struct io_kiocb *req)
if (ctx->flags & IORING_SETUP_SQPOLL) {
struct io_sq_data *sqd = ctx->sq_data;
- if (wq_has_sleeper(&sqd->wait))
- wake_up(&sqd->wait);
+ if (sqd->thread)
+ __set_notify_signal(sqd->thread);
return;
}
diff --git a/io_uring/net.c b/io_uring/net.c
index 7c98c4d50946..20aa36143933 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1127,16 +1127,18 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
flags |= MSG_DONTWAIT;
retry_multishot:
- kmsg->msg.msg_inq = -1;
- kmsg->msg.msg_flags = 0;
-
if (io_do_buffer_select(req)) {
ret = io_recv_buf_select(req, kmsg, &len, issue_flags);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ kmsg->msg.msg_inq = -1;
goto out_free;
+ }
sr->buf = NULL;
}
+ kmsg->msg.msg_flags = 0;
+ kmsg->msg.msg_inq = -1;
+
if (flags & MSG_WAITALL)
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
@@ -1265,14 +1267,14 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return io_sendmsg_prep_setup(req, req->opcode == IORING_OP_SENDMSG_ZC);
}
-static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb,
+static int io_sg_from_iter_iovec(struct sk_buff *skb,
struct iov_iter *from, size_t length)
{
skb_zcopy_downgrade_managed(skb);
- return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
+ return zerocopy_fill_skb_from_iter(skb, from, length);
}
-static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
+static int io_sg_from_iter(struct sk_buff *skb,
struct iov_iter *from, size_t length)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
@@ -1285,7 +1287,7 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
if (!frag)
shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
else if (unlikely(!skb_zcopy_managed(skb)))
- return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
+ return zerocopy_fill_skb_from_iter(skb, from, length);
bi.bi_size = min(from->count, length);
bi.bi_bvec_done = from->iov_offset;
@@ -1312,14 +1314,6 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
skb->data_len += copied;
skb->len += copied;
skb->truesize += truesize;
-
- if (sk && sk->sk_type == SOCK_STREAM) {
- sk_wmem_queued_add(sk, truesize);
- if (!skb_zcopy_pure(skb))
- sk_mem_charge(sk, truesize);
- } else {
- refcount_add(truesize, &skb->sk->sk_wmem_alloc);
- }
return ret;
}
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 7eb9ad3a3ae6..0291eef9ce92 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -50,5 +50,11 @@ endif
obj-$(CONFIG_BPF_PRELOAD) += preload/
obj-$(CONFIG_BPF_SYSCALL) += relo_core.o
-$(obj)/relo_core.o: $(srctree)/tools/lib/bpf/relo_core.c FORCE
+obj-$(CONFIG_BPF_SYSCALL) += btf_iter.o
+obj-$(CONFIG_BPF_SYSCALL) += btf_relocate.o
+
+# Some source files are common to libbpf.
+vpath %.c $(srctree)/kernel/bpf:$(srctree)/tools/lib/bpf
+
+$(obj)/%.o: %.c FORCE
$(call if_changed_rule,cc_o_c)
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 68240c3c6e7d..08a338e1f231 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -280,6 +280,7 @@ BTF_ID(func, bpf_lsm_cred_prepare)
BTF_ID(func, bpf_lsm_file_ioctl)
BTF_ID(func, bpf_lsm_file_lock)
BTF_ID(func, bpf_lsm_file_open)
+BTF_ID(func, bpf_lsm_file_post_open)
BTF_ID(func, bpf_lsm_file_receive)
BTF_ID(func, bpf_lsm_inode_create)
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index a2cf31b14be4..0d515ec57aa5 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -573,7 +573,7 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
}
size = arch_prepare_bpf_trampoline(NULL, image + image_off,
- image + PAGE_SIZE,
+ image + image_off + size,
model, flags, tlinks, stub_func);
if (size <= 0) {
if (image != *_image)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 7928d920056f..4ff11779699e 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -274,6 +274,7 @@ struct btf {
u32 start_str_off; /* first string offset (0 for base BTF) */
char name[MODULE_NAME_LEN];
bool kernel_btf;
+ __u32 *base_id_map; /* map from distilled base BTF -> vmlinux BTF ids */
};
enum verifier_phase {
@@ -530,6 +531,11 @@ static bool btf_type_is_decl_tag_target(const struct btf_type *t)
btf_type_is_var(t) || btf_type_is_typedef(t);
}
+bool btf_is_vmlinux(const struct btf *btf)
+{
+ return btf->kernel_btf && !btf->base_btf;
+}
+
u32 btf_nr_types(const struct btf *btf)
{
u32 total = 0;
@@ -772,7 +778,7 @@ static bool __btf_name_char_ok(char c, bool first)
return true;
}
-static const char *btf_str_by_offset(const struct btf *btf, u32 offset)
+const char *btf_str_by_offset(const struct btf *btf, u32 offset)
{
while (offset < btf->start_str_off)
btf = btf->base_btf;
@@ -1670,14 +1676,8 @@ static void btf_free_kfunc_set_tab(struct btf *btf)
if (!tab)
return;
- /* For module BTF, we directly assign the sets being registered, so
- * there is nothing to free except kfunc_set_tab.
- */
- if (btf_is_module(btf))
- goto free_tab;
for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++)
kfree(tab->sets[hook]);
-free_tab:
kfree(tab);
btf->kfunc_set_tab = NULL;
}
@@ -1735,7 +1735,12 @@ static void btf_free(struct btf *btf)
kvfree(btf->types);
kvfree(btf->resolved_sizes);
kvfree(btf->resolved_ids);
- kvfree(btf->data);
+ /* vmlinux does not allocate btf->data, it simply points it at
+ * __start_BTF.
+ */
+ if (!btf_is_vmlinux(btf))
+ kvfree(btf->data);
+ kvfree(btf->base_id_map);
kfree(btf);
}
@@ -1764,6 +1769,23 @@ void btf_put(struct btf *btf)
}
}
+struct btf *btf_base_btf(const struct btf *btf)
+{
+ return btf->base_btf;
+}
+
+const struct btf_header *btf_header(const struct btf *btf)
+{
+ return &btf->hdr;
+}
+
+void btf_set_base_btf(struct btf *btf, const struct btf *base_btf)
+{
+ btf->base_btf = (struct btf *)base_btf;
+ btf->start_id = btf_nr_types(base_btf);
+ btf->start_str_off = base_btf->hdr.str_len;
+}
+
static int env_resolve_init(struct btf_verifier_env *env)
{
struct btf *btf = env->btf;
@@ -5820,6 +5842,15 @@ static int find_kern_ctx_type_id(enum bpf_prog_type prog_type)
return ctx_type->type;
}
+bool btf_is_projection_of(const char *pname, const char *tname)
+{
+ if (strcmp(pname, "__sk_buff") == 0 && strcmp(tname, "sk_buff") == 0)
+ return true;
+ if (strcmp(pname, "xdp_md") == 0 && strcmp(tname, "xdp_buff") == 0)
+ return true;
+ return false;
+}
+
bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
const struct btf_type *t, enum bpf_prog_type prog_type,
int arg)
@@ -5882,9 +5913,7 @@ again:
* int socket_filter_bpf_prog(struct __sk_buff *skb)
* { // no fields of skb are ever used }
*/
- if (strcmp(ctx_tname, "__sk_buff") == 0 && strcmp(tname, "sk_buff") == 0)
- return true;
- if (strcmp(ctx_tname, "xdp_md") == 0 && strcmp(tname, "xdp_buff") == 0)
+ if (btf_is_projection_of(ctx_tname, tname))
return true;
if (strcmp(ctx_tname, tname)) {
/* bpf_user_pt_regs_t is a typedef, so resolve it to
@@ -6076,23 +6105,15 @@ int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_ty
BTF_ID_LIST(bpf_ctx_convert_btf_id)
BTF_ID(struct, bpf_ctx_convert)
-struct btf *btf_parse_vmlinux(void)
+static struct btf *btf_parse_base(struct btf_verifier_env *env, const char *name,
+ void *data, unsigned int data_size)
{
- struct btf_verifier_env *env = NULL;
- struct bpf_verifier_log *log;
struct btf *btf = NULL;
int err;
if (!IS_ENABLED(CONFIG_DEBUG_INFO_BTF))
return ERR_PTR(-ENOENT);
- env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
- if (!env)
- return ERR_PTR(-ENOMEM);
-
- log = &env->log;
- log->level = BPF_LOG_KERNEL;
-
btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN);
if (!btf) {
err = -ENOMEM;
@@ -6100,10 +6121,10 @@ struct btf *btf_parse_vmlinux(void)
}
env->btf = btf;
- btf->data = __start_BTF;
- btf->data_size = __stop_BTF - __start_BTF;
+ btf->data = data;
+ btf->data_size = data_size;
btf->kernel_btf = true;
- snprintf(btf->name, sizeof(btf->name), "vmlinux");
+ snprintf(btf->name, sizeof(btf->name), "%s", name);
err = btf_parse_hdr(env);
if (err)
@@ -6123,20 +6144,11 @@ struct btf *btf_parse_vmlinux(void)
if (err)
goto errout;
- /* btf_parse_vmlinux() runs under bpf_verifier_lock */
- bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]);
-
refcount_set(&btf->refcnt, 1);
- err = btf_alloc_id(btf);
- if (err)
- goto errout;
-
- btf_verifier_env_free(env);
return btf;
errout:
- btf_verifier_env_free(env);
if (btf) {
kvfree(btf->types);
kfree(btf);
@@ -6144,19 +6156,61 @@ errout:
return ERR_PTR(err);
}
+struct btf *btf_parse_vmlinux(void)
+{
+ struct btf_verifier_env *env = NULL;
+ struct bpf_verifier_log *log;
+ struct btf *btf;
+ int err;
+
+ env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
+ if (!env)
+ return ERR_PTR(-ENOMEM);
+
+ log = &env->log;
+ log->level = BPF_LOG_KERNEL;
+ btf = btf_parse_base(env, "vmlinux", __start_BTF, __stop_BTF - __start_BTF);
+ if (IS_ERR(btf))
+ goto err_out;
+
+ /* btf_parse_vmlinux() runs under bpf_verifier_lock */
+ bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]);
+ err = btf_alloc_id(btf);
+ if (err) {
+ btf_free(btf);
+ btf = ERR_PTR(err);
+ }
+err_out:
+ btf_verifier_env_free(env);
+ return btf;
+}
+
+/* If .BTF_ids section was created with distilled base BTF, both base and
+ * split BTF ids will need to be mapped to actual base/split ids for
+ * BTF now that it has been relocated.
+ */
+static __u32 btf_relocate_id(const struct btf *btf, __u32 id)
+{
+ if (!btf->base_btf || !btf->base_id_map)
+ return id;
+ return btf->base_id_map[id];
+}
+
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
-static struct btf *btf_parse_module(const char *module_name, const void *data, unsigned int data_size)
+static struct btf *btf_parse_module(const char *module_name, const void *data,
+ unsigned int data_size, void *base_data,
+ unsigned int base_data_size)
{
+ struct btf *btf = NULL, *vmlinux_btf, *base_btf = NULL;
struct btf_verifier_env *env = NULL;
struct bpf_verifier_log *log;
- struct btf *btf = NULL, *base_btf;
- int err;
+ int err = 0;
- base_btf = bpf_get_btf_vmlinux();
- if (IS_ERR(base_btf))
- return base_btf;
- if (!base_btf)
+ vmlinux_btf = bpf_get_btf_vmlinux();
+ if (IS_ERR(vmlinux_btf))
+ return vmlinux_btf;
+ if (!vmlinux_btf)
return ERR_PTR(-EINVAL);
env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
@@ -6166,6 +6220,16 @@ static struct btf *btf_parse_module(const char *module_name, const void *data, u
log = &env->log;
log->level = BPF_LOG_KERNEL;
+ if (base_data) {
+ base_btf = btf_parse_base(env, ".BTF.base", base_data, base_data_size);
+ if (IS_ERR(base_btf)) {
+ err = PTR_ERR(base_btf);
+ goto errout;
+ }
+ } else {
+ base_btf = vmlinux_btf;
+ }
+
btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN);
if (!btf) {
err = -ENOMEM;
@@ -6205,12 +6269,22 @@ static struct btf *btf_parse_module(const char *module_name, const void *data, u
if (err)
goto errout;
+ if (base_btf != vmlinux_btf) {
+ err = btf_relocate(btf, vmlinux_btf, &btf->base_id_map);
+ if (err)
+ goto errout;
+ btf_free(base_btf);
+ base_btf = vmlinux_btf;
+ }
+
btf_verifier_env_free(env);
refcount_set(&btf->refcnt, 1);
return btf;
errout:
btf_verifier_env_free(env);
+ if (base_btf != vmlinux_btf)
+ btf_free(base_btf);
if (btf) {
kvfree(btf->data);
kvfree(btf->types);
@@ -7763,7 +7837,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
err = -ENOMEM;
goto out;
}
- btf = btf_parse_module(mod->name, mod->btf_data, mod->btf_data_size);
+ btf = btf_parse_module(mod->name, mod->btf_data, mod->btf_data_size,
+ mod->btf_base_data, mod->btf_base_data_size);
if (IS_ERR(btf)) {
kfree(btf_mod);
if (!IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH)) {
@@ -8087,7 +8162,7 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
bool add_filter = !!kset->filter;
struct btf_kfunc_set_tab *tab;
struct btf_id_set8 *set;
- u32 set_cnt;
+ u32 set_cnt, i;
int ret;
if (hook >= BTF_KFUNC_HOOK_MAX) {
@@ -8133,21 +8208,15 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
goto end;
}
- /* We don't need to allocate, concatenate, and sort module sets, because
- * only one is allowed per hook. Hence, we can directly assign the
- * pointer and return.
- */
- if (!vmlinux_set) {
- tab->sets[hook] = add_set;
- goto do_add_filter;
- }
-
/* In case of vmlinux sets, there may be more than one set being
* registered per hook. To create a unified set, we allocate a new set
* and concatenate all individual sets being registered. While each set
* is individually sorted, they may become unsorted when concatenated,
* hence re-sorting the final set again is required to make binary
* searching the set using btf_id_set8_contains function work.
+ *
+ * For module sets, we need to allocate as we may need to relocate
+ * BTF ids.
*/
set_cnt = set ? set->cnt : 0;
@@ -8177,11 +8246,14 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
/* Concatenate the two sets */
memcpy(set->pairs + set->cnt, add_set->pairs, add_set->cnt * sizeof(set->pairs[0]));
+ /* Now that the set is copied, update with relocated BTF ids */
+ for (i = set->cnt; i < set->cnt + add_set->cnt; i++)
+ set->pairs[i].id = btf_relocate_id(btf, set->pairs[i].id);
+
set->cnt += add_set->cnt;
sort(set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func, NULL);
-do_add_filter:
if (add_filter) {
hook_filter = &tab->hook_filters[hook];
hook_filter->filters[hook_filter->nr_filters++] = kset->filter;
@@ -8301,7 +8373,7 @@ static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook,
return PTR_ERR(btf);
for (i = 0; i < kset->set->cnt; i++) {
- ret = btf_check_kfunc_protos(btf, kset->set->pairs[i].id,
+ ret = btf_check_kfunc_protos(btf, btf_relocate_id(btf, kset->set->pairs[i].id),
kset->set->pairs[i].flags);
if (ret)
goto err_out;
@@ -8365,7 +8437,7 @@ static int btf_check_dtor_kfuncs(struct btf *btf, const struct btf_id_dtor_kfunc
u32 nr_args, i;
for (i = 0; i < cnt; i++) {
- dtor_btf_id = dtors[i].kfunc_btf_id;
+ dtor_btf_id = btf_relocate_id(btf, dtors[i].kfunc_btf_id);
dtor_func = btf_type_by_id(btf, dtor_btf_id);
if (!dtor_func || !btf_type_is_func(dtor_func))
@@ -8400,7 +8472,7 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c
{
struct btf_id_dtor_kfunc_tab *tab;
struct btf *btf;
- u32 tab_cnt;
+ u32 tab_cnt, i;
int ret;
btf = btf_get_module_btf(owner);
@@ -8451,6 +8523,13 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c
btf->dtor_kfunc_tab = tab;
memcpy(tab->dtors + tab->cnt, dtors, add_cnt * sizeof(tab->dtors[0]));
+
+ /* remap BTF ids based on BTF relocation (if any) */
+ for (i = tab_cnt; i < tab_cnt + add_cnt; i++) {
+ tab->dtors[i].btf_id = btf_relocate_id(btf, tab->dtors[i].btf_id);
+ tab->dtors[i].kfunc_btf_id = btf_relocate_id(btf, tab->dtors[i].kfunc_btf_id);
+ }
+
tab->cnt += add_cnt;
sort(tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func, NULL);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1a6c3faa6e4a..7ee62e38faf0 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -736,11 +736,11 @@ static struct bpf_ksym *bpf_ksym_find(unsigned long addr)
return n ? container_of(n, struct bpf_ksym, tnode) : NULL;
}
-const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
+int __bpf_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char *sym)
{
struct bpf_ksym *ksym;
- char *ret = NULL;
+ int ret = 0;
rcu_read_lock();
ksym = bpf_ksym_find(addr);
@@ -748,9 +748,8 @@ const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
unsigned long symbol_start = ksym->start;
unsigned long symbol_end = ksym->end;
- strscpy(sym, ksym->name, KSYM_NAME_LEN);
+ ret = strscpy(sym, ksym->name, KSYM_NAME_LEN);
- ret = sym;
if (size)
*size = symbol_end - symbol_start;
if (off)
@@ -1174,8 +1173,7 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr,
}
/* Copy JITed text from rw_header to its final location, the ro_header. */
-int bpf_jit_binary_pack_finalize(struct bpf_prog *prog,
- struct bpf_binary_header *ro_header,
+int bpf_jit_binary_pack_finalize(struct bpf_binary_header *ro_header,
struct bpf_binary_header *rw_header)
{
void *ptr;
@@ -2743,8 +2741,7 @@ static void bpf_free_used_maps(struct bpf_prog_aux *aux)
kfree(aux->used_maps);
}
-void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
- struct btf_mod_pair *used_btfs, u32 len)
+void __bpf_free_used_btfs(struct btf_mod_pair *used_btfs, u32 len)
{
#ifdef CONFIG_BPF_SYSCALL
struct btf_mod_pair *btf_mod;
@@ -2761,7 +2758,7 @@ void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
static void bpf_free_used_btfs(struct bpf_prog_aux *aux)
{
- __bpf_free_used_btfs(aux, aux->used_btfs, aux->used_btf_cnt);
+ __bpf_free_used_btfs(aux->used_btfs, aux->used_btf_cnt);
kfree(aux->used_btfs);
}
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 068e994ed781..fbdf5a1aabfe 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -707,7 +707,6 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq)
*/
static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
{
- struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list();
struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
@@ -724,8 +723,11 @@ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
*/
bq->q[bq->count++] = xdpf;
- if (!bq->flush_node.prev)
+ if (!bq->flush_node.prev) {
+ struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list();
+
list_add(&bq->flush_node, flush_list);
+ }
}
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf,
@@ -757,9 +759,8 @@ trace:
return ret;
}
-void __cpu_map_flush(void)
+void __cpu_map_flush(struct list_head *flush_list)
{
- struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list();
struct xdp_bulk_queue *bq, *tmp;
list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
@@ -769,13 +770,3 @@ void __cpu_map_flush(void)
wake_up_process(bq->obj->kthread);
}
}
-
-#ifdef CONFIG_DEBUG_NET
-bool cpu_map_check_flush(void)
-{
- if (list_empty(bpf_net_ctx_get_cpu_map_flush_list()))
- return false;
- __cpu_map_flush();
- return true;
-}
-#endif
diff --git a/kernel/bpf/crypto.c b/kernel/bpf/crypto.c
index 2bee4af91e38..94854cd9c4cc 100644
--- a/kernel/bpf/crypto.c
+++ b/kernel/bpf/crypto.c
@@ -275,7 +275,7 @@ static int bpf_crypto_crypt(const struct bpf_crypto_ctx *ctx,
if (__bpf_dynptr_is_rdonly(dst))
return -EINVAL;
- siv_len = __bpf_dynptr_size(siv);
+ siv_len = siv ? __bpf_dynptr_size(siv) : 0;
src_len = __bpf_dynptr_size(src);
dst_len = __bpf_dynptr_size(dst);
if (!src_len || !dst_len)
@@ -303,36 +303,44 @@ static int bpf_crypto_crypt(const struct bpf_crypto_ctx *ctx,
/**
* bpf_crypto_decrypt() - Decrypt buffer using configured context and IV provided.
- * @ctx: The crypto context being used. The ctx must be a trusted pointer.
- * @src: bpf_dynptr to the encrypted data. Must be a trusted pointer.
- * @dst: bpf_dynptr to the buffer where to store the result. Must be a trusted pointer.
- * @siv: bpf_dynptr to IV data and state data to be used by decryptor.
+ * @ctx: The crypto context being used. The ctx must be a trusted pointer.
+ * @src: bpf_dynptr to the encrypted data. Must be a trusted pointer.
+ * @dst: bpf_dynptr to the buffer where to store the result. Must be a trusted pointer.
+ * @siv__nullable: bpf_dynptr to IV data and state data to be used by decryptor. May be NULL.
*
* Decrypts provided buffer using IV data and the crypto context. Crypto context must be configured.
*/
__bpf_kfunc int bpf_crypto_decrypt(struct bpf_crypto_ctx *ctx,
- const struct bpf_dynptr_kern *src,
- const struct bpf_dynptr_kern *dst,
- const struct bpf_dynptr_kern *siv)
+ const struct bpf_dynptr *src,
+ const struct bpf_dynptr *dst,
+ const struct bpf_dynptr *siv__nullable)
{
- return bpf_crypto_crypt(ctx, src, dst, siv, true);
+ const struct bpf_dynptr_kern *src_kern = (struct bpf_dynptr_kern *)src;
+ const struct bpf_dynptr_kern *dst_kern = (struct bpf_dynptr_kern *)dst;
+ const struct bpf_dynptr_kern *siv_kern = (struct bpf_dynptr_kern *)siv__nullable;
+
+ return bpf_crypto_crypt(ctx, src_kern, dst_kern, siv_kern, true);
}
/**
* bpf_crypto_encrypt() - Encrypt buffer using configured context and IV provided.
- * @ctx: The crypto context being used. The ctx must be a trusted pointer.
- * @src: bpf_dynptr to the plain data. Must be a trusted pointer.
- * @dst: bpf_dynptr to buffer where to store the result. Must be a trusted pointer.
- * @siv: bpf_dynptr to IV data and state data to be used by decryptor.
+ * @ctx: The crypto context being used. The ctx must be a trusted pointer.
+ * @src: bpf_dynptr to the plain data. Must be a trusted pointer.
+ * @dst: bpf_dynptr to the buffer where to store the result. Must be a trusted pointer.
+ * @siv__nullable: bpf_dynptr to IV data and state data to be used by decryptor. May be NULL.
*
* Encrypts provided buffer using IV data and the crypto context. Crypto context must be configured.
*/
__bpf_kfunc int bpf_crypto_encrypt(struct bpf_crypto_ctx *ctx,
- const struct bpf_dynptr_kern *src,
- const struct bpf_dynptr_kern *dst,
- const struct bpf_dynptr_kern *siv)
+ const struct bpf_dynptr *src,
+ const struct bpf_dynptr *dst,
+ const struct bpf_dynptr *siv__nullable)
{
- return bpf_crypto_crypt(ctx, src, dst, siv, false);
+ const struct bpf_dynptr_kern *src_kern = (struct bpf_dynptr_kern *)src;
+ const struct bpf_dynptr_kern *dst_kern = (struct bpf_dynptr_kern *)dst;
+ const struct bpf_dynptr_kern *siv_kern = (struct bpf_dynptr_kern *)siv__nullable;
+
+ return bpf_crypto_crypt(ctx, src_kern, dst_kern, siv_kern, false);
}
__bpf_kfunc_end_defs();
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 317ac2d66ebd..9e0e3b0a18e4 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -106,7 +106,7 @@ static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
return &dtab->dev_index_head[idx & (dtab->n_buckets - 1)];
}
-static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
+static int dev_map_alloc_check(union bpf_attr *attr)
{
u32 valsize = attr->value_size;
@@ -120,23 +120,28 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
attr->map_flags & ~DEV_CREATE_FLAG_MASK)
return -EINVAL;
+ if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
+ /* Hash table size must be power of 2; roundup_pow_of_two()
+ * can overflow into UB on 32-bit arches
+ */
+ if (attr->max_entries > 1UL << 31)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
+{
/* Lookup returns a pointer straight to dev->ifindex, so make sure the
* verifier prevents writes from the BPF side
*/
attr->map_flags |= BPF_F_RDONLY_PROG;
-
-
bpf_map_init_from_attr(&dtab->map, attr);
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
- /* hash table size must be power of 2; roundup_pow_of_two() can
- * overflow into UB on 32-bit arches, so check that first
- */
- if (dtab->map.max_entries > 1UL << 31)
- return -EINVAL;
-
+ /* Hash table size must be power of 2 */
dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries);
-
dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets,
dtab->map.numa_node);
if (!dtab->dev_index_head)
@@ -412,9 +417,8 @@ out:
* driver before returning from its napi->poll() routine. See the comment above
* xdp_do_flush() in filter.c.
*/
-void __dev_flush(void)
+void __dev_flush(struct list_head *flush_list)
{
- struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list();
struct xdp_dev_bulk_queue *bq, *tmp;
list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
@@ -425,16 +429,6 @@ void __dev_flush(void)
}
}
-#ifdef CONFIG_DEBUG_NET
-bool dev_check_flush(void)
-{
- if (list_empty(bpf_net_ctx_get_dev_flush_list()))
- return false;
- __dev_flush();
- return true;
-}
-#endif
-
/* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or
* by local_bh_disable() (from XDP calls inside NAPI). The
* rcu_read_lock_bh_held() below makes lockdep accept both.
@@ -459,7 +453,6 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
struct net_device *dev_rx, struct bpf_prog *xdp_prog)
{
- struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list();
struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
@@ -473,6 +466,8 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
* are only ever modified together.
*/
if (!bq->dev_rx) {
+ struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list();
+
bq->dev_rx = dev_rx;
bq->xdp_prog = xdp_prog;
list_add(&bq->flush_node, flush_list);
@@ -1046,6 +1041,7 @@ static u64 dev_map_mem_usage(const struct bpf_map *map)
BTF_ID_LIST_SINGLE(dev_map_btf_ids, struct, bpf_dtab)
const struct bpf_map_ops dev_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
+ .map_alloc_check = dev_map_alloc_check,
.map_alloc = dev_map_alloc,
.map_free = dev_map_free,
.map_get_next_key = dev_map_get_next_key,
@@ -1060,6 +1056,7 @@ const struct bpf_map_ops dev_map_ops = {
const struct bpf_map_ops dev_map_hash_ops = {
.map_meta_equal = bpf_map_meta_equal,
+ .map_alloc_check = dev_map_alloc_check,
.map_alloc = dev_map_alloc,
.map_free = dev_map_free,
.map_get_next_key = dev_map_hash_get_next_key,
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 6f1abcb4b084..b5f0adae8293 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1084,7 +1084,10 @@ struct bpf_async_cb {
struct bpf_prog *prog;
void __rcu *callback_fn;
void *value;
- struct rcu_head rcu;
+ union {
+ struct rcu_head rcu;
+ struct work_struct delete_work;
+ };
u64 flags;
};
@@ -1107,6 +1110,7 @@ struct bpf_async_cb {
struct bpf_hrtimer {
struct bpf_async_cb cb;
struct hrtimer timer;
+ atomic_t cancelling;
};
struct bpf_work {
@@ -1219,6 +1223,21 @@ static void bpf_wq_delete_work(struct work_struct *work)
kfree_rcu(w, cb.rcu);
}
+static void bpf_timer_delete_work(struct work_struct *work)
+{
+ struct bpf_hrtimer *t = container_of(work, struct bpf_hrtimer, cb.delete_work);
+
+ /* Cancel the timer and wait for callback to complete if it was running.
+ * If hrtimer_cancel() can be safely called it's safe to call
+ * kfree_rcu(t) right after for both preallocated and non-preallocated
+ * maps. The async->cb = NULL was already done and no code path can see
+ * address 't' anymore. Timer if armed for existing bpf_hrtimer before
+ * bpf_timer_cancel_and_free will have been cancelled.
+ */
+ hrtimer_cancel(&t->timer);
+ kfree_rcu(t, cb.rcu);
+}
+
static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags,
enum bpf_async_type type)
{
@@ -1262,6 +1281,8 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u
clockid = flags & (MAX_CLOCKS - 1);
t = (struct bpf_hrtimer *)cb;
+ atomic_set(&t->cancelling, 0);
+ INIT_WORK(&t->cb.delete_work, bpf_timer_delete_work);
hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
t->timer.function = bpf_timer_cb;
cb->value = (void *)async - map->record->timer_off;
@@ -1440,7 +1461,8 @@ static void drop_prog_refcnt(struct bpf_async_cb *async)
BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer)
{
- struct bpf_hrtimer *t;
+ struct bpf_hrtimer *t, *cur_t;
+ bool inc = false;
int ret = 0;
if (in_nmi())
@@ -1452,14 +1474,41 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer)
ret = -EINVAL;
goto out;
}
- if (this_cpu_read(hrtimer_running) == t) {
+
+ cur_t = this_cpu_read(hrtimer_running);
+ if (cur_t == t) {
/* If bpf callback_fn is trying to bpf_timer_cancel()
* its own timer the hrtimer_cancel() will deadlock
- * since it waits for callback_fn to finish
+ * since it waits for callback_fn to finish.
*/
ret = -EDEADLK;
goto out;
}
+
+ /* Only account in-flight cancellations when invoked from a timer
+ * callback, since we want to avoid waiting only if other _callbacks_
+ * are waiting on us, to avoid introducing lockups. Non-callback paths
+ * are ok, since nobody would synchronously wait for their completion.
+ */
+ if (!cur_t)
+ goto drop;
+ atomic_inc(&t->cancelling);
+ /* Need full barrier after relaxed atomic_inc */
+ smp_mb__after_atomic();
+ inc = true;
+ if (atomic_read(&cur_t->cancelling)) {
+ /* We're cancelling timer t, while some other timer callback is
+ * attempting to cancel us. In such a case, it might be possible
+ * that timer t belongs to the other callback, or some other
+ * callback waiting upon it (creating transitive dependencies
+ * upon us), and we will enter a deadlock if we continue
+ * cancelling and waiting for it synchronously, since it might
+ * do the same. Bail!
+ */
+ ret = -EDEADLK;
+ goto out;
+ }
+drop:
drop_prog_refcnt(&t->cb);
out:
__bpf_spin_unlock_irqrestore(&timer->lock);
@@ -1467,6 +1516,8 @@ out:
* if it was running.
*/
ret = ret ?: hrtimer_cancel(&t->timer);
+ if (inc)
+ atomic_dec(&t->cancelling);
rcu_read_unlock();
return ret;
}
@@ -1512,25 +1563,39 @@ void bpf_timer_cancel_and_free(void *val)
if (!t)
return;
- /* Cancel the timer and wait for callback to complete if it was running.
- * If hrtimer_cancel() can be safely called it's safe to call kfree(t)
- * right after for both preallocated and non-preallocated maps.
- * The async->cb = NULL was already done and no code path can
- * see address 't' anymore.
- *
- * Check that bpf_map_delete/update_elem() wasn't called from timer
- * callback_fn. In such case don't call hrtimer_cancel() (since it will
- * deadlock) and don't call hrtimer_try_to_cancel() (since it will just
- * return -1). Though callback_fn is still running on this cpu it's
+ /* We check that bpf_map_delete/update_elem() was called from timer
+ * callback_fn. In such case we don't call hrtimer_cancel() (since it
+ * will deadlock) and don't call hrtimer_try_to_cancel() (since it will
+ * just return -1). Though callback_fn is still running on this cpu it's
* safe to do kfree(t) because bpf_timer_cb() read everything it needed
* from 't'. The bpf subprog callback_fn won't be able to access 't',
* since async->cb = NULL was already done. The timer will be
* effectively cancelled because bpf_timer_cb() will return
* HRTIMER_NORESTART.
+ *
+ * However, it is possible the timer callback_fn calling us armed the
+ * timer _before_ calling us, such that failing to cancel it here will
+ * cause it to possibly use struct hrtimer after freeing bpf_hrtimer.
+ * Therefore, we _need_ to cancel any outstanding timers before we do
+ * kfree_rcu, even though no more timers can be armed.
+ *
+ * Moreover, we need to schedule work even if timer does not belong to
+ * the calling callback_fn, as on two different CPUs, we can end up in a
+ * situation where both sides run in parallel, try to cancel one
+ * another, and we end up waiting on both sides in hrtimer_cancel
+ * without making forward progress, since timer1 depends on time2
+ * callback to finish, and vice versa.
+ *
+ * CPU 1 (timer1_cb) CPU 2 (timer2_cb)
+ * bpf_timer_cancel_and_free(timer2) bpf_timer_cancel_and_free(timer1)
+ *
+ * To avoid these issues, punt to workqueue context when we are in a
+ * timer callback.
*/
- if (this_cpu_read(hrtimer_running) != t)
- hrtimer_cancel(&t->timer);
- kfree_rcu(t, cb.rcu);
+ if (this_cpu_read(hrtimer_running))
+ queue_work(system_unbound_wq, &t->cb.delete_work);
+ else
+ bpf_timer_delete_work(&t->cb.delete_work);
}
/* This function is called by map_delete/update_elem for individual element and
@@ -2433,7 +2498,7 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
/**
* bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data.
- * @ptr: The dynptr whose data slice to retrieve
+ * @p: The dynptr whose data slice to retrieve
* @offset: Offset into the dynptr
* @buffer__opt: User-provided buffer to copy contents into. May be NULL
* @buffer__szk: Size (in bytes) of the buffer if present. This is the
@@ -2459,9 +2524,10 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
* provided buffer, with its contents containing the data, if unable to obtain
* direct pointer)
*/
-__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset,
+__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset,
void *buffer__opt, u32 buffer__szk)
{
+ const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
enum bpf_dynptr_type type;
u32 len = buffer__szk;
int err;
@@ -2503,7 +2569,7 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset
/**
* bpf_dynptr_slice_rdwr() - Obtain a writable pointer to the dynptr data.
- * @ptr: The dynptr whose data slice to retrieve
+ * @p: The dynptr whose data slice to retrieve
* @offset: Offset into the dynptr
* @buffer__opt: User-provided buffer to copy contents into. May be NULL
* @buffer__szk: Size (in bytes) of the buffer if present. This is the
@@ -2543,9 +2609,11 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset
* provided buffer, with its contents containing the data, if unable to obtain
* direct pointer)
*/
-__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 offset,
+__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset,
void *buffer__opt, u32 buffer__szk)
{
+ const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
+
if (!ptr->data || __bpf_dynptr_is_rdonly(ptr))
return NULL;
@@ -2571,11 +2639,12 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 o
* will be copied out into the buffer and the user will need to call
* bpf_dynptr_write() to commit changes.
*/
- return bpf_dynptr_slice(ptr, offset, buffer__opt, buffer__szk);
+ return bpf_dynptr_slice(p, offset, buffer__opt, buffer__szk);
}
-__bpf_kfunc int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 start, u32 end)
+__bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u32 start, u32 end)
{
+ struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
u32 size;
if (!ptr->data || start > end)
@@ -2592,36 +2661,45 @@ __bpf_kfunc int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 start, u32 en
return 0;
}
-__bpf_kfunc bool bpf_dynptr_is_null(struct bpf_dynptr_kern *ptr)
+__bpf_kfunc bool bpf_dynptr_is_null(const struct bpf_dynptr *p)
{
+ struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
+
return !ptr->data;
}
-__bpf_kfunc bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
+__bpf_kfunc bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *p)
{
+ struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
+
if (!ptr->data)
return false;
return __bpf_dynptr_is_rdonly(ptr);
}
-__bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr_kern *ptr)
+__bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr *p)
{
+ struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
+
if (!ptr->data)
return -EINVAL;
return __bpf_dynptr_size(ptr);
}
-__bpf_kfunc int bpf_dynptr_clone(struct bpf_dynptr_kern *ptr,
- struct bpf_dynptr_kern *clone__uninit)
+__bpf_kfunc int bpf_dynptr_clone(const struct bpf_dynptr *p,
+ struct bpf_dynptr *clone__uninit)
{
+ struct bpf_dynptr_kern *clone = (struct bpf_dynptr_kern *)clone__uninit;
+ struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
+
if (!ptr->data) {
- bpf_dynptr_set_null(clone__uninit);
+ bpf_dynptr_set_null(clone);
return -EINVAL;
}
- *clone__uninit = *ptr;
+ *clone = *ptr;
return 0;
}
@@ -2721,7 +2799,7 @@ __bpf_kfunc int bpf_wq_start(struct bpf_wq *wq, unsigned int flags)
}
__bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq,
- int (callback_fn)(void *map, int *key, struct bpf_wq *wq),
+ int (callback_fn)(void *map, int *key, void *value),
unsigned int flags,
void *aux__ign)
{
@@ -2986,7 +3064,9 @@ late_initcall(kfunc_init);
*/
const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len)
{
- return bpf_dynptr_slice(ptr, 0, NULL, len);
+ const struct bpf_dynptr *p = (struct bpf_dynptr *)ptr;
+
+ return bpf_dynptr_slice(p, 0, NULL, len);
}
/* Get a pointer to dynptr data up to len bytes for read write access. If
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 4bd8f17a9f24..5aebfc3051e3 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -91,7 +91,7 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
goto fail;
} else {
u64 new_end, new_start;
- u32 buf_start, buf_end, new_n;
+ u32 buf_start, buf_end;
new_end = log->end_pos + n;
if (new_end - log->start_pos >= log->len_total)
@@ -708,7 +708,9 @@ static void print_reg_state(struct bpf_verifier_env *env,
verbose(env, "%s", btf_type_name(reg->btf, reg->btf_id));
verbose(env, "(");
if (reg->id)
- verbose_a("id=%d", reg->id);
+ verbose_a("id=%d", reg->id & ~BPF_ADD_CONST);
+ if (reg->id & BPF_ADD_CONST)
+ verbose(env, "%+d", reg->off);
if (reg->ref_obj_id)
verbose_a("ref_obj_id=%d", reg->ref_obj_id);
if (type_is_non_owning_ref(reg->type))
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index ec4e97c61eef..02aa9db8d796 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -261,6 +261,7 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
u32 saved_tid = info->tid;
struct task_struct *curr_task;
unsigned int curr_fd = info->fd;
+ struct file *f;
/* If this function returns a non-NULL file object,
* it held a reference to the task/file.
@@ -286,12 +287,8 @@ again:
}
rcu_read_lock();
- for (;; curr_fd++) {
- struct file *f;
- f = task_lookup_next_fdget_rcu(curr_task, &curr_fd);
- if (!f)
- break;
-
+ f = task_lookup_next_fdget_rcu(curr_task, &curr_fd);
+ if (f) {
/* set info->fd */
info->fd = curr_fd;
rcu_read_unlock();
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index fcecaba8668d..3d6306c363b7 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2982,8 +2982,10 @@ static int check_subprogs(struct bpf_verifier_env *env)
if (code == (BPF_JMP | BPF_CALL) &&
insn[i].src_reg == 0 &&
- insn[i].imm == BPF_FUNC_tail_call)
+ insn[i].imm == BPF_FUNC_tail_call) {
subprog[cur_subprog].has_tail_call = true;
+ subprog[cur_subprog].tail_call_reachable = true;
+ }
if (BPF_CLASS(code) == BPF_LD &&
(BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
subprog[cur_subprog].has_ld_abs = true;
@@ -3215,7 +3217,8 @@ static int insn_def_regno(const struct bpf_insn *insn)
case BPF_ST:
return -1;
case BPF_STX:
- if (BPF_MODE(insn->code) == BPF_ATOMIC &&
+ if ((BPF_MODE(insn->code) == BPF_ATOMIC ||
+ BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) &&
(insn->imm & BPF_FETCH)) {
if (insn->imm == BPF_CMPXCHG)
return BPF_REG_0;
@@ -3991,7 +3994,7 @@ static bool idset_contains(struct bpf_idset *s, u32 id)
u32 i;
for (i = 0; i < s->count; ++i)
- if (s->ids[i] == id)
+ if (s->ids[i] == (id & ~BPF_ADD_CONST))
return true;
return false;
@@ -4001,7 +4004,7 @@ static int idset_push(struct bpf_idset *s, u32 id)
{
if (WARN_ON_ONCE(s->count >= ARRAY_SIZE(s->ids)))
return -EFAULT;
- s->ids[s->count++] = id;
+ s->ids[s->count++] = id & ~BPF_ADD_CONST;
return 0;
}
@@ -4438,8 +4441,20 @@ static bool __is_pointer_value(bool allow_ptr_leaks,
static void assign_scalar_id_before_mov(struct bpf_verifier_env *env,
struct bpf_reg_state *src_reg)
{
- if (src_reg->type == SCALAR_VALUE && !src_reg->id &&
- !tnum_is_const(src_reg->var_off))
+ if (src_reg->type != SCALAR_VALUE)
+ return;
+
+ if (src_reg->id & BPF_ADD_CONST) {
+ /*
+ * The verifier is processing rX = rY insn and
+ * rY->id has special linked register already.
+ * Cleared it, since multiple rX += const are not supported.
+ */
+ src_reg->id = 0;
+ src_reg->off = 0;
+ }
+
+ if (!src_reg->id && !tnum_is_const(src_reg->var_off))
/* Ensure that src_reg has a valid ID that will be copied to
* dst_reg and then will be used by find_equal_scalars() to
* propagate min/max range.
@@ -7715,6 +7730,13 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
int err;
+ if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) {
+ verbose(env,
+ "arg#%d expected pointer to stack or const struct bpf_dynptr\n",
+ regno);
+ return -EINVAL;
+ }
+
/* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
* ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
*/
@@ -9464,6 +9486,10 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
return -EINVAL;
}
} else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) {
+ ret = check_func_arg_reg_off(env, reg, regno, ARG_PTR_TO_DYNPTR);
+ if (ret)
+ return ret;
+
ret = process_dynptr_func(env, regno, -1, arg->arg_type, 0);
if (ret)
return ret;
@@ -10917,7 +10943,7 @@ enum {
};
BTF_ID_LIST(kf_arg_btf_ids)
-BTF_ID(struct, bpf_dynptr_kern)
+BTF_ID(struct, bpf_dynptr)
BTF_ID(struct, bpf_list_head)
BTF_ID(struct, bpf_list_node)
BTF_ID(struct, bpf_rb_root)
@@ -11190,6 +11216,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
if (btf_is_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
return KF_ARG_PTR_TO_CTX;
+ if (is_kfunc_arg_nullable(meta->btf, &args[argno]) && register_is_null(reg))
+ return KF_ARG_PTR_TO_NULL;
+
if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno]))
return KF_ARG_PTR_TO_ALLOC_BTF_ID;
@@ -11235,9 +11264,6 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
if (is_kfunc_arg_callback(env, meta->btf, &args[argno]))
return KF_ARG_PTR_TO_CALLBACK;
- if (is_kfunc_arg_nullable(meta->btf, &args[argno]) && register_is_null(reg))
- return KF_ARG_PTR_TO_NULL;
-
if (argno + 1 < nargs &&
(is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]) ||
is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1])))
@@ -11268,6 +11294,8 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
bool strict_type_match = false;
const struct btf *reg_btf;
const char *reg_ref_tname;
+ bool taking_projection;
+ bool struct_same;
u32 reg_ref_id;
if (base_type(reg->type) == PTR_TO_BTF_ID) {
@@ -11311,7 +11339,13 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
- if (!btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match)) {
+ struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match);
+ /* If kfunc is accepting a projection type (ie. __sk_buff), it cannot
+ * actually use it -- it must cast to the underlying type. So we allow
+ * caller to pass in the underlying type.
+ */
+ taking_projection = btf_is_projection_of(ref_tname, reg_ref_tname);
+ if (!taking_projection && !struct_same) {
verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1,
btf_type_str(reg_ref_t), reg_ref_tname);
@@ -11957,12 +11991,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
int clone_ref_obj_id = 0;
- if (reg->type != PTR_TO_STACK &&
- reg->type != CONST_PTR_TO_DYNPTR) {
- verbose(env, "arg#%d expected pointer to stack or dynptr_ptr\n", i);
- return -EINVAL;
- }
-
if (reg->type == CONST_PTR_TO_DYNPTR)
dynptr_arg_type |= MEM_RDONLY;
@@ -14047,6 +14075,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
struct bpf_func_state *state = vstate->frame[vstate->curframe];
struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
+ bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
u8 opcode = BPF_OP(insn->code);
int err;
@@ -14069,11 +14098,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
if (dst_reg->type != SCALAR_VALUE)
ptr_reg = dst_reg;
- else
- /* Make sure ID is cleared otherwise dst_reg min/max could be
- * incorrectly propagated into other registers by find_equal_scalars()
- */
- dst_reg->id = 0;
+
if (BPF_SRC(insn->code) == BPF_X) {
src_reg = &regs[insn->src_reg];
if (src_reg->type != SCALAR_VALUE) {
@@ -14137,7 +14162,43 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
verbose(env, "verifier internal error: no src_reg\n");
return -EINVAL;
}
- return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
+ err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
+ if (err)
+ return err;
+ /*
+ * Compilers can generate the code
+ * r1 = r2
+ * r1 += 0x1
+ * if r2 < 1000 goto ...
+ * use r1 in memory access
+ * So remember constant delta between r2 and r1 and update r1 after
+ * 'if' condition.
+ */
+ if (env->bpf_capable && BPF_OP(insn->code) == BPF_ADD &&
+ dst_reg->id && is_reg_const(src_reg, alu32)) {
+ u64 val = reg_const_value(src_reg, alu32);
+
+ if ((dst_reg->id & BPF_ADD_CONST) ||
+ /* prevent overflow in find_equal_scalars() later */
+ val > (u32)S32_MAX) {
+ /*
+ * If the register already went through rX += val
+ * we cannot accumulate another val into rx->off.
+ */
+ dst_reg->off = 0;
+ dst_reg->id = 0;
+ } else {
+ dst_reg->id |= BPF_ADD_CONST;
+ dst_reg->off = val;
+ }
+ } else {
+ /*
+ * Make sure ID is cleared otherwise dst_reg min/max could be
+ * incorrectly propagated into other registers by find_equal_scalars()
+ */
+ dst_reg->id = 0;
+ }
+ return 0;
}
/* check validity of 32-bit and 64-bit arithmetic operations */
@@ -15109,12 +15170,36 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
static void find_equal_scalars(struct bpf_verifier_state *vstate,
struct bpf_reg_state *known_reg)
{
+ struct bpf_reg_state fake_reg;
struct bpf_func_state *state;
struct bpf_reg_state *reg;
bpf_for_each_reg_in_vstate(vstate, state, reg, ({
- if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
+ if (reg->type != SCALAR_VALUE || reg == known_reg)
+ continue;
+ if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST))
+ continue;
+ if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) ||
+ reg->off == known_reg->off) {
+ copy_register_state(reg, known_reg);
+ } else {
+ s32 saved_off = reg->off;
+
+ fake_reg.type = SCALAR_VALUE;
+ __mark_reg_known(&fake_reg, (s32)reg->off - (s32)known_reg->off);
+
+ /* reg = known_reg; reg += delta */
copy_register_state(reg, known_reg);
+ /*
+ * Must preserve off, id and add_const flag,
+ * otherwise another find_equal_scalars() will be incorrect.
+ */
+ reg->off = saved_off;
+
+ scalar32_min_max_add(reg, &fake_reg);
+ scalar_min_max_add(reg, &fake_reg);
+ reg->var_off = tnum_add(reg->var_off, fake_reg.var_off);
+ }
}));
}
@@ -16749,6 +16834,10 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
}
if (!rold->precise && exact == NOT_EXACT)
return true;
+ if ((rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST))
+ return false;
+ if ((rold->id & BPF_ADD_CONST) && (rold->off != rcur->off))
+ return false;
/* Why check_ids() for scalar registers?
*
* Consider the following BPF code:
@@ -18630,8 +18719,7 @@ static void release_maps(struct bpf_verifier_env *env)
/* drop refcnt of maps used by the rejected program */
static void release_btfs(struct bpf_verifier_env *env)
{
- __bpf_free_used_btfs(env->prog->aux, env->used_btfs,
- env->used_btf_cnt);
+ __bpf_free_used_btfs(env->used_btfs, env->used_btf_cnt);
}
/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 563877d6c28b..3d2bf1d50a0c 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1859,6 +1859,9 @@ static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return fals
void __init bringup_nonboot_cpus(unsigned int max_cpus)
{
+ if (!max_cpus)
+ return;
+
/* Try parallel bringup optimization if enabled */
if (cpuhp_bringup_cpus_parallel(max_cpus))
return;
@@ -2446,7 +2449,7 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
* The caller needs to hold cpus read locked while calling this function.
* Return:
* On success:
- * Positive state number if @state is CPUHP_AP_ONLINE_DYN;
+ * Positive state number if @state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN;
* 0 for all other states
* On failure: proper (negative) error code
*/
@@ -2469,7 +2472,7 @@ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
ret = cpuhp_store_callbacks(state, name, startup, teardown,
multi_instance);
- dynstate = state == CPUHP_AP_ONLINE_DYN;
+ dynstate = state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN;
if (ret > 0 && dynstate) {
state = ret;
ret = 0;
@@ -2500,8 +2503,8 @@ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
out:
mutex_unlock(&cpuhp_state_mutex);
/*
- * If the requested state is CPUHP_AP_ONLINE_DYN, return the
- * dynamically allocated state in case of success.
+ * If the requested state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN,
+ * return the dynamically allocated state in case of success.
*/
if (!ret && dynstate)
return state;
diff --git a/kernel/exit.c b/kernel/exit.c
index f95a2c1338a8..81fcee45d630 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -484,6 +484,8 @@ retry:
* Search through everything else, we should not get here often.
*/
for_each_process(g) {
+ if (atomic_read(&mm->mm_users) <= 1)
+ break;
if (g->flags & PF_KTHREAD)
continue;
for_each_thread(g, c) {
diff --git a/kernel/fork.c b/kernel/fork.c
index f314bdd7e610..99076dbe27d8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2355,7 +2355,6 @@ __latent_entropy struct task_struct *copy_process(
RCU_INIT_POINTER(p->bpf_storage, NULL);
p->bpf_ctx = NULL;
#endif
- p->bpf_net_context = NULL;
/* Perform scheduler related setup. Assign this task to a CPU. */
retval = sched_fork(clone_flags, p);
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 22ea19a36e6e..98b9622d372e 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -388,12 +388,12 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize,
!!__bpf_address_lookup(addr, symbolsize, offset, namebuf);
}
-static const char *kallsyms_lookup_buildid(unsigned long addr,
+static int kallsyms_lookup_buildid(unsigned long addr,
unsigned long *symbolsize,
unsigned long *offset, char **modname,
const unsigned char **modbuildid, char *namebuf)
{
- const char *ret;
+ int ret;
namebuf[KSYM_NAME_LEN - 1] = 0;
namebuf[0] = 0;
@@ -410,7 +410,7 @@ static const char *kallsyms_lookup_buildid(unsigned long addr,
if (modbuildid)
*modbuildid = NULL;
- ret = namebuf;
+ ret = strlen(namebuf);
goto found;
}
@@ -442,8 +442,13 @@ const char *kallsyms_lookup(unsigned long addr,
unsigned long *offset,
char **modname, char *namebuf)
{
- return kallsyms_lookup_buildid(addr, symbolsize, offset, modname,
- NULL, namebuf);
+ int ret = kallsyms_lookup_buildid(addr, symbolsize, offset, modname,
+ NULL, namebuf);
+
+ if (!ret)
+ return NULL;
+
+ return namebuf;
}
int lookup_symbol_name(unsigned long addr, char *symname)
@@ -478,19 +483,15 @@ static int __sprint_symbol(char *buffer, unsigned long address,
{
char *modname;
const unsigned char *buildid;
- const char *name;
unsigned long offset, size;
int len;
address += symbol_offset;
- name = kallsyms_lookup_buildid(address, &size, &offset, &modname, &buildid,
+ len = kallsyms_lookup_buildid(address, &size, &offset, &modname, &buildid,
buffer);
- if (!name)
+ if (!len)
return sprintf(buffer, "0x%lx", address - symbol_offset);
- if (name != buffer)
- strcpy(buffer, name);
- len = strlen(buffer);
offset -= symbol_offset;
if (add_offset)
diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c
index 62fb57bb9f16..bf65e0c3c86f 100644
--- a/kernel/module/kallsyms.c
+++ b/kernel/module/kallsyms.c
@@ -321,14 +321,15 @@ void * __weak dereference_module_function_descriptor(struct module *mod,
* For kallsyms to ask for address resolution. NULL means not found. Careful
* not to lock to avoid deadlock on oopses, simply disable preemption.
*/
-const char *module_address_lookup(unsigned long addr,
- unsigned long *size,
- unsigned long *offset,
- char **modname,
- const unsigned char **modbuildid,
- char *namebuf)
+int module_address_lookup(unsigned long addr,
+ unsigned long *size,
+ unsigned long *offset,
+ char **modname,
+ const unsigned char **modbuildid,
+ char *namebuf)
{
- const char *ret = NULL;
+ const char *sym;
+ int ret = 0;
struct module *mod;
preempt_disable();
@@ -344,12 +345,10 @@ const char *module_address_lookup(unsigned long addr,
#endif
}
- ret = find_kallsyms_symbol(mod, addr, size, offset);
- }
- /* Make a copy in here where it's safe */
- if (ret) {
- strscpy(namebuf, ret, KSYM_NAME_LEN);
- ret = namebuf;
+ sym = find_kallsyms_symbol(mod, addr, size, offset);
+
+ if (sym)
+ ret = strscpy(namebuf, sym, KSYM_NAME_LEN);
}
preempt_enable();
diff --git a/kernel/module/main.c b/kernel/module/main.c
index d18a94b973e1..d9592195c5bb 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -2166,6 +2166,8 @@ static int find_module_sections(struct module *mod, struct load_info *info)
#endif
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
mod->btf_data = any_section_objs(info, ".BTF", 1, &mod->btf_data_size);
+ mod->btf_base_data = any_section_objs(info, ".BTF.base", 1,
+ &mod->btf_base_data_size);
#endif
#ifdef CONFIG_JUMP_LABEL
mod->jump_entries = section_objs(info, "__jump_table",
@@ -2590,8 +2592,9 @@ static noinline int do_init_module(struct module *mod)
}
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
- /* .BTF is not SHF_ALLOC and will get removed, so sanitize pointer */
+ /* .BTF is not SHF_ALLOC and will get removed, so sanitize pointers */
mod->btf_data = NULL;
+ mod->btf_base_data = NULL;
#endif
/*
* We want to free module_init, but be aware that kallsyms may be
diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile
index 040fe7d1eda2..39a2b61c7232 100644
--- a/kernel/printk/Makefile
+++ b/kernel/printk/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-y = printk.o conopt.o
+obj-y = printk.o
obj-$(CONFIG_PRINTK) += printk_safe.o nbcon.o
obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o
obj-$(CONFIG_PRINTK_INDEX) += index.o
diff --git a/kernel/printk/conopt.c b/kernel/printk/conopt.c
deleted file mode 100644
index 9d507bac3657..000000000000
--- a/kernel/printk/conopt.c
+++ /dev/null
@@ -1,146 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Kernel command line console options for hardware based addressing
- *
- * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
- * Author: Tony Lindgren <[email protected]>
- */
-
-#include <linux/console.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/types.h>
-
-#include <asm/errno.h>
-
-#include "console_cmdline.h"
-
-/*
- * Allow longer DEVNAME:0.0 style console naming such as abcd0000.serial:0.0
- * in addition to the legacy ttyS0 style naming.
- */
-#define CONSOLE_NAME_MAX 32
-
-#define CONSOLE_OPT_MAX 16
-#define CONSOLE_BRL_OPT_MAX 16
-
-struct console_option {
- char name[CONSOLE_NAME_MAX];
- char opt[CONSOLE_OPT_MAX];
- char brl_opt[CONSOLE_BRL_OPT_MAX];
- u8 has_brl_opt:1;
-};
-
-/* Updated only at console_setup() time, no locking needed */
-static struct console_option conopt[MAX_CMDLINECONSOLES];
-
-/**
- * console_opt_save - Saves kernel command line console option for driver use
- * @str: Kernel command line console name and option
- * @brl_opt: Braille console options
- *
- * Saves a kernel command line console option for driver subsystems to use for
- * adding a preferred console during init. Called from console_setup() only.
- *
- * Return: 0 on success, negative error code on failure.
- */
-int __init console_opt_save(const char *str, const char *brl_opt)
-{
- struct console_option *con;
- size_t namelen, optlen;
- const char *opt;
- int i;
-
- namelen = strcspn(str, ",");
- if (namelen == 0 || namelen >= CONSOLE_NAME_MAX)
- return -EINVAL;
-
- opt = str + namelen;
- if (*opt == ',')
- opt++;
-
- optlen = strlen(opt);
- if (optlen >= CONSOLE_OPT_MAX)
- return -EINVAL;
-
- for (i = 0; i < MAX_CMDLINECONSOLES; i++) {
- con = &conopt[i];
-
- if (con->name[0]) {
- if (!strncmp(str, con->name, namelen))
- return 0;
- continue;
- }
-
- /*
- * The name isn't terminated, only opt is. Empty opt is fine,
- * but brl_opt can be either empty or NULL. For more info, see
- * _braille_console_setup().
- */
- strscpy(con->name, str, namelen + 1);
- strscpy(con->opt, opt, CONSOLE_OPT_MAX);
- if (brl_opt) {
- strscpy(con->brl_opt, brl_opt, CONSOLE_BRL_OPT_MAX);
- con->has_brl_opt = 1;
- }
-
- return 0;
- }
-
- return -ENOMEM;
-}
-
-static struct console_option *console_opt_find(const char *name)
-{
- struct console_option *con;
- int i;
-
- for (i = 0; i < MAX_CMDLINECONSOLES; i++) {
- con = &conopt[i];
- if (!strcmp(name, con->name))
- return con;
- }
-
- return NULL;
-}
-
-/**
- * add_preferred_console_match - Adds a preferred console if a match is found
- * @match: Expected console on kernel command line, such as console=DEVNAME:0.0
- * @name: Name of the console character device to add such as ttyS
- * @idx: Index for the console
- *
- * Allows driver subsystems to add a console after translating the command
- * line name to the character device name used for the console. Options are
- * added automatically based on the kernel command line. Duplicate preferred
- * consoles are ignored by __add_preferred_console().
- *
- * Return: 0 on success, negative error code on failure.
- */
-int add_preferred_console_match(const char *match, const char *name,
- const short idx)
-{
- struct console_option *con;
- char *brl_opt = NULL;
-
- if (!match || !strlen(match) || !name || !strlen(name) ||
- idx < 0)
- return -EINVAL;
-
- con = console_opt_find(match);
- if (!con)
- return -ENOENT;
-
- /*
- * See __add_preferred_console(). It checks for NULL brl_options to set
- * the preferred_console flag. Empty brl_opt instead of NULL leads into
- * the preferred_console flag not set, and CON_CONSDEV not being set,
- * and the boot console won't get disabled at the end of console_setup().
- */
- if (con->has_brl_opt)
- brl_opt = con->brl_opt;
-
- console_opt_add_preferred_console(name, idx, con->opt, brl_opt);
-
- return 0;
-}
diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h
index a125e0235589..3ca74ad391d6 100644
--- a/kernel/printk/console_cmdline.h
+++ b/kernel/printk/console_cmdline.h
@@ -2,12 +2,6 @@
#ifndef _CONSOLE_CMDLINE_H
#define _CONSOLE_CMDLINE_H
-#define MAX_CMDLINECONSOLES 8
-
-int console_opt_save(const char *str, const char *brl_opt);
-int console_opt_add_preferred_console(const char *name, const short idx,
- char *options, char *brl_options);
-
struct console_cmdline
{
char name[16]; /* Name of the driver */
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 420fd310129d..dddb15f48d59 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -383,6 +383,9 @@ static int console_locked;
/*
* Array of consoles built from command line options (console=)
*/
+
+#define MAX_CMDLINECONSOLES 8
+
static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
static int preferred_console = -1;
@@ -2500,17 +2503,6 @@ static int __init console_setup(char *str)
if (_braille_console_setup(&str, &brl_options))
return 1;
- /* Save the console for driver subsystem use */
- if (console_opt_save(str, brl_options))
- return 1;
-
- /* Flag register_console() to not call try_enable_default_console() */
- console_set_on_cmdline = 1;
-
- /* Don't attempt to parse a DEVNAME:0.0 style console */
- if (strchr(str, ':'))
- return 1;
-
/*
* Decode str into name, index, options.
*/
@@ -2541,13 +2533,6 @@ static int __init console_setup(char *str)
}
__setup("console=", console_setup);
-/* Only called from add_preferred_console_match() */
-int console_opt_add_preferred_console(const char *name, const short idx,
- char *options, char *brl_options)
-{
- return __add_preferred_console(name, idx, options, brl_options, true);
-}
-
/**
* add_preferred_console - add a device to the list of preferred consoles.
* @name: device name
@@ -3522,7 +3507,7 @@ void register_console(struct console *newcon)
* Note that a console with tty binding will have CON_CONSDEV
* flag set and will be first in the list.
*/
- if (preferred_console < 0 && !console_set_on_cmdline) {
+ if (preferred_console < 0) {
if (hlist_empty(&console_list) || !console_first()->device ||
console_first()->flags & CON_BOOT) {
try_enable_default_console(newcon);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index d7eee421d4bc..b696b85ac63e 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -46,8 +46,8 @@ COND_SYSCALL(io_getevents_time32);
COND_SYSCALL(io_getevents);
COND_SYSCALL(io_pgetevents_time32);
COND_SYSCALL(io_pgetevents);
-COND_SYSCALL_COMPAT(io_pgetevents_time32);
COND_SYSCALL_COMPAT(io_pgetevents);
+COND_SYSCALL_COMPAT(io_pgetevents_time64);
COND_SYSCALL(io_uring_setup);
COND_SYSCALL(io_uring_enter);
COND_SYSCALL(io_uring_register);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 492c14aac642..b8ee320208d4 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1285,6 +1285,8 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
struct hrtimer_clock_base *base;
unsigned long flags;
+ if (WARN_ON_ONCE(!timer->function))
+ return;
/*
* Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
* match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index d1daeab1bbc1..cd098846e251 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1369,8 +1369,8 @@ __bpf_kfunc void bpf_key_put(struct bpf_key *bkey)
#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
/**
* bpf_verify_pkcs7_signature - verify a PKCS#7 signature
- * @data_ptr: data to verify
- * @sig_ptr: signature of the data
+ * @data_p: data to verify
+ * @sig_p: signature of the data
* @trusted_keyring: keyring with keys trusted for signature verification
*
* Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr*
@@ -1378,10 +1378,12 @@ __bpf_kfunc void bpf_key_put(struct bpf_key *bkey)
*
* Return: 0 on success, a negative value on error.
*/
-__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr,
- struct bpf_dynptr_kern *sig_ptr,
+__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
+ struct bpf_dynptr *sig_p,
struct bpf_key *trusted_keyring)
{
+ struct bpf_dynptr_kern *data_ptr = (struct bpf_dynptr_kern *)data_p;
+ struct bpf_dynptr_kern *sig_ptr = (struct bpf_dynptr_kern *)sig_p;
const void *data, *sig;
u32 data_len, sig_len;
int ret;
@@ -1444,7 +1446,7 @@ __bpf_kfunc_start_defs();
* bpf_get_file_xattr - get xattr of a file
* @file: file to get xattr from
* @name__str: name of the xattr
- * @value_ptr: output buffer of the xattr value
+ * @value_p: output buffer of the xattr value
*
* Get xattr *name__str* of *file* and store the output in *value_ptr*.
*
@@ -1453,8 +1455,9 @@ __bpf_kfunc_start_defs();
* Return: 0 on success, a negative value on error.
*/
__bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str,
- struct bpf_dynptr_kern *value_ptr)
+ struct bpf_dynptr *value_p)
{
+ struct bpf_dynptr_kern *value_ptr = (struct bpf_dynptr_kern *)value_p;
struct dentry *dentry;
u32 value_len;
void *value;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 65208d3b5ed9..eacab4020508 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6969,7 +6969,7 @@ allocate_ftrace_mod_map(struct module *mod,
return mod_map;
}
-static const char *
+static int
ftrace_func_address_lookup(struct ftrace_mod_map *mod_map,
unsigned long addr, unsigned long *size,
unsigned long *off, char *sym)
@@ -6990,21 +6990,18 @@ ftrace_func_address_lookup(struct ftrace_mod_map *mod_map,
*size = found_func->size;
if (off)
*off = addr - found_func->ip;
- if (sym)
- strscpy(sym, found_func->name, KSYM_NAME_LEN);
-
- return found_func->name;
+ return strscpy(sym, found_func->name, KSYM_NAME_LEN);
}
- return NULL;
+ return 0;
}
-const char *
+int
ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char **modname, char *sym)
{
struct ftrace_mod_map *mod_map;
- const char *ret = NULL;
+ int ret = 0;
/* mod_map is freed via call_rcu() */
preempt_disable();
diff --git a/lib/build_OID_registry b/lib/build_OID_registry
index 56d8bafeb848..8267e8d71338 100755
--- a/lib/build_OID_registry
+++ b/lib/build_OID_registry
@@ -38,7 +38,9 @@ close IN_FILE || die;
#
open C_FILE, ">$ARGV[1]" or die;
print C_FILE "/*\n";
-print C_FILE " * Automatically generated by ", $0 =~ s#^\Q$abs_srctree/\E##r, ". Do not edit\n";
+my $scriptname = $0;
+$scriptname =~ s#^\Q$abs_srctree/\E##;
+print C_FILE " * Automatically generated by ", $scriptname, ". Do not edit\n";
print C_FILE " */\n";
#
diff --git a/lib/closure.c b/lib/closure.c
index 2e1ee9fdec08..116afae2eed9 100644
--- a/lib/closure.c
+++ b/lib/closure.c
@@ -13,7 +13,7 @@
#include <linux/seq_file.h>
#include <linux/sched/debug.h>
-static inline void closure_put_after_sub(struct closure *cl, int flags)
+static inline void closure_put_after_sub_checks(int flags)
{
int r = flags & CLOSURE_REMAINING_MASK;
@@ -22,12 +22,17 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
flags & CLOSURE_GUARD_MASK, (unsigned) __fls(r)))
r &= ~CLOSURE_GUARD_MASK;
- if (!r) {
- smp_acquire__after_ctrl_dep();
+ WARN(!r && (flags & ~CLOSURE_DESTRUCTOR),
+ "closure ref hit 0 with incorrect flags set: %x (%u)",
+ flags & ~CLOSURE_DESTRUCTOR, (unsigned) __fls(flags));
+}
+
+static inline void closure_put_after_sub(struct closure *cl, int flags)
+{
+ closure_put_after_sub_checks(flags);
- WARN(flags & ~CLOSURE_DESTRUCTOR,
- "closure ref hit 0 with incorrect flags set: %x (%u)",
- flags & ~CLOSURE_DESTRUCTOR, (unsigned) __fls(flags));
+ if (!(flags & CLOSURE_REMAINING_MASK)) {
+ smp_acquire__after_ctrl_dep();
cl->closure_get_happened = false;
@@ -145,6 +150,41 @@ void __sched __closure_sync(struct closure *cl)
}
EXPORT_SYMBOL(__closure_sync);
+/*
+ * closure_return_sync - finish running a closure, synchronously (i.e. waiting
+ * for outstanding get()s to finish) and returning once closure refcount is 0.
+ *
+ * Unlike closure_sync() this doesn't reinit the ref to 1; subsequent
+ * closure_get_not_zero() calls waill fail.
+ */
+void __sched closure_return_sync(struct closure *cl)
+{
+ struct closure_syncer s = { .task = current };
+
+ cl->s = &s;
+ set_closure_fn(cl, closure_sync_fn, NULL);
+
+ unsigned flags = atomic_sub_return_release(1 + CLOSURE_RUNNING - CLOSURE_DESTRUCTOR,
+ &cl->remaining);
+
+ closure_put_after_sub_checks(flags);
+
+ if (unlikely(flags & CLOSURE_REMAINING_MASK)) {
+ while (1) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (s.done)
+ break;
+ schedule();
+ }
+
+ __set_current_state(TASK_RUNNING);
+ }
+
+ if (cl->parent)
+ closure_put(cl->parent);
+}
+EXPORT_SYMBOL(closure_return_sync);
+
int __sched __closure_sync_timeout(struct closure *cl, unsigned long timeout)
{
struct closure_syncer s = { .task = current };
@@ -204,6 +244,9 @@ void closure_debug_destroy(struct closure *cl)
{
unsigned long flags;
+ if (cl->magic == CLOSURE_MAGIC_STACK)
+ return;
+
BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE);
cl->magic = CLOSURE_MAGIC_DEAD;
diff --git a/lib/string_helpers_kunit.c b/lib/string_helpers_kunit.c
index f88e39fd68d6..c853046183d2 100644
--- a/lib/string_helpers_kunit.c
+++ b/lib/string_helpers_kunit.c
@@ -625,4 +625,5 @@ static struct kunit_suite string_helpers_test_suite = {
kunit_test_suites(&string_helpers_test_suite);
+MODULE_DESCRIPTION("Test cases for string helpers module");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/string_kunit.c b/lib/string_kunit.c
index 2a812decf14b..c919e3293da6 100644
--- a/lib/string_kunit.c
+++ b/lib/string_kunit.c
@@ -633,4 +633,5 @@ static struct kunit_suite string_test_suite = {
kunit_test_suites(&string_test_suite);
+MODULE_DESCRIPTION("Test cases for string functions");
MODULE_LICENSE("GPL v2");
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index ce5716c3999a..b7acc29bcc3b 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -15198,6 +15198,7 @@ struct tail_call_test {
int flags;
int result;
int stack_depth;
+ bool has_tail_call;
};
/* Flags that can be passed to tail call test cases */
@@ -15273,6 +15274,7 @@ static struct tail_call_test tail_call_tests[] = {
BPF_EXIT_INSN(),
},
.result = 3,
+ .has_tail_call = true,
},
{
"Tail call 3",
@@ -15283,6 +15285,7 @@ static struct tail_call_test tail_call_tests[] = {
BPF_EXIT_INSN(),
},
.result = 6,
+ .has_tail_call = true,
},
{
"Tail call 4",
@@ -15293,6 +15296,7 @@ static struct tail_call_test tail_call_tests[] = {
BPF_EXIT_INSN(),
},
.result = 10,
+ .has_tail_call = true,
},
{
"Tail call load/store leaf",
@@ -15323,6 +15327,7 @@ static struct tail_call_test tail_call_tests[] = {
},
.result = 0,
.stack_depth = 16,
+ .has_tail_call = true,
},
{
"Tail call error path, max count reached",
@@ -15335,6 +15340,7 @@ static struct tail_call_test tail_call_tests[] = {
},
.flags = FLAG_NEED_STATE | FLAG_RESULT_IN_STATE,
.result = (MAX_TAIL_CALL_CNT + 1) * MAX_TESTRUNS,
+ .has_tail_call = true,
},
{
"Tail call count preserved across function calls",
@@ -15357,6 +15363,7 @@ static struct tail_call_test tail_call_tests[] = {
.stack_depth = 8,
.flags = FLAG_NEED_STATE | FLAG_RESULT_IN_STATE,
.result = (MAX_TAIL_CALL_CNT + 1) * MAX_TESTRUNS,
+ .has_tail_call = true,
},
{
"Tail call error path, NULL target",
@@ -15369,6 +15376,7 @@ static struct tail_call_test tail_call_tests[] = {
},
.flags = FLAG_NEED_STATE | FLAG_RESULT_IN_STATE,
.result = MAX_TESTRUNS,
+ .has_tail_call = true,
},
{
"Tail call error path, index out of range",
@@ -15381,6 +15389,7 @@ static struct tail_call_test tail_call_tests[] = {
},
.flags = FLAG_NEED_STATE | FLAG_RESULT_IN_STATE,
.result = MAX_TESTRUNS,
+ .has_tail_call = true,
},
};
@@ -15430,6 +15439,7 @@ static __init int prepare_tail_call_tests(struct bpf_array **pprogs)
fp->len = len;
fp->type = BPF_PROG_TYPE_SOCKET_FILTER;
fp->aux->stack_depth = test->stack_depth;
+ fp->aux->tail_call_reachable = test->has_tail_call;
memcpy(fp->insnsi, test->insns, len * sizeof(struct bpf_insn));
/* Relocate runtime tail call offsets and addresses */
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 6392f1cc97a3..e66823d6b10b 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1358,14 +1358,31 @@ static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
* access frequencies are similar. This is for minimizing the monitoring
* overhead under the dynamically changeable access pattern. If a merge was
* unnecessarily made, later 'kdamond_split_regions()' will revert it.
+ *
+ * The total number of regions could be higher than the user-defined limit,
+ * max_nr_regions for some cases. For example, the user can update
+ * max_nr_regions to a number that lower than the current number of regions
+ * while DAMON is running. For such a case, repeat merging until the limit is
+ * met while increasing @threshold up to possible maximum level.
*/
static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
unsigned long sz_limit)
{
struct damon_target *t;
-
- damon_for_each_target(t, c)
- damon_merge_regions_of(t, threshold, sz_limit);
+ unsigned int nr_regions;
+ unsigned int max_thres;
+
+ max_thres = c->attrs.aggr_interval /
+ (c->attrs.sample_interval ? c->attrs.sample_interval : 1);
+ do {
+ nr_regions = 0;
+ damon_for_each_target(t, c) {
+ damon_merge_regions_of(t, threshold, sz_limit);
+ nr_regions += damon_nr_regions(t);
+ }
+ threshold = max(1, threshold * 2);
+ } while (nr_regions > c->attrs.max_nr_regions &&
+ threshold / 2 < max_thres);
}
/*
diff --git a/mm/filemap.c b/mm/filemap.c
index 876cc64aadd7..657bcd887fdb 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1847,7 +1847,7 @@ repeat:
if (!folio || xa_is_value(folio))
goto out;
- if (!folio_try_get_rcu(folio))
+ if (!folio_try_get(folio))
goto repeat;
if (unlikely(folio != xas_reload(&xas))) {
@@ -2001,7 +2001,7 @@ retry:
if (!folio || xa_is_value(folio))
return folio;
- if (!folio_try_get_rcu(folio))
+ if (!folio_try_get(folio))
goto reset;
if (unlikely(folio != xas_reload(xas))) {
@@ -2181,7 +2181,7 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
if (xa_is_value(folio))
goto update_start;
- if (!folio_try_get_rcu(folio))
+ if (!folio_try_get(folio))
goto retry;
if (unlikely(folio != xas_reload(&xas)))
@@ -2313,7 +2313,7 @@ static void filemap_get_read_batch(struct address_space *mapping,
break;
if (xa_is_sibling(folio))
break;
- if (!folio_try_get_rcu(folio))
+ if (!folio_try_get(folio))
goto retry;
if (unlikely(folio != xas_reload(&xas)))
@@ -3124,7 +3124,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* Use the readahead code, even if readahead is disabled */
- if (vm_flags & VM_HUGEPAGE) {
+ if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) {
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1);
ra->size = HPAGE_PMD_NR;
@@ -3231,7 +3231,8 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf)
if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID))
return 0;
- ptep = pte_offset_map(vmf->pmd, vmf->address);
+ ptep = pte_offset_map_nolock(vma->vm_mm, vmf->pmd, vmf->address,
+ &vmf->ptl);
if (unlikely(!ptep))
return VM_FAULT_NOPAGE;
@@ -3472,7 +3473,7 @@ static struct folio *next_uptodate_folio(struct xa_state *xas,
continue;
if (folio_test_locked(folio))
continue;
- if (!folio_try_get_rcu(folio))
+ if (!folio_try_get(folio))
continue;
/* Has the page moved or been split? */
if (unlikely(folio != xas_reload(xas)))
@@ -4248,6 +4249,9 @@ static void filemap_cachestat(struct address_space *mapping,
XA_STATE(xas, &mapping->i_pages, first_index);
struct folio *folio;
+ /* Flush stats (and potentially sleep) outside the RCU read section. */
+ mem_cgroup_flush_stats_ratelimited(NULL);
+
rcu_read_lock();
xas_for_each(&xas, folio, last_index) {
int order;
@@ -4311,7 +4315,7 @@ static void filemap_cachestat(struct address_space *mapping,
goto resched;
}
#endif
- if (workingset_test_recent(shadow, true, &workingset))
+ if (workingset_test_recent(shadow, true, &workingset, false))
cs->nr_recently_evicted += nr_pages;
goto resched;
diff --git a/mm/gup.c b/mm/gup.c
index ca0f5cedce9b..f1d6bc06eb52 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -76,7 +76,7 @@ retry:
folio = page_folio(page);
if (WARN_ON_ONCE(folio_ref_count(folio) < 0))
return NULL;
- if (unlikely(!folio_ref_try_add_rcu(folio, refs)))
+ if (unlikely(!folio_ref_try_add(folio, refs)))
return NULL;
/*
@@ -97,95 +97,6 @@ retry:
return folio;
}
-/**
- * try_grab_folio() - Attempt to get or pin a folio.
- * @page: pointer to page to be grabbed
- * @refs: the value to (effectively) add to the folio's refcount
- * @flags: gup flags: these are the FOLL_* flag values.
- *
- * "grab" names in this file mean, "look at flags to decide whether to use
- * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.
- *
- * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the
- * same time. (That's true throughout the get_user_pages*() and
- * pin_user_pages*() APIs.) Cases:
- *
- * FOLL_GET: folio's refcount will be incremented by @refs.
- *
- * FOLL_PIN on large folios: folio's refcount will be incremented by
- * @refs, and its pincount will be incremented by @refs.
- *
- * FOLL_PIN on single-page folios: folio's refcount will be incremented by
- * @refs * GUP_PIN_COUNTING_BIAS.
- *
- * Return: The folio containing @page (with refcount appropriately
- * incremented) for success, or NULL upon failure. If neither FOLL_GET
- * nor FOLL_PIN was set, that's considered failure, and furthermore,
- * a likely bug in the caller, so a warning is also emitted.
- */
-struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
-{
- struct folio *folio;
-
- if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0))
- return NULL;
-
- if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
- return NULL;
-
- if (flags & FOLL_GET)
- return try_get_folio(page, refs);
-
- /* FOLL_PIN is set */
-
- /*
- * Don't take a pin on the zero page - it's not going anywhere
- * and it is used in a *lot* of places.
- */
- if (is_zero_page(page))
- return page_folio(page);
-
- folio = try_get_folio(page, refs);
- if (!folio)
- return NULL;
-
- /*
- * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
- * right zone, so fail and let the caller fall back to the slow
- * path.
- */
- if (unlikely((flags & FOLL_LONGTERM) &&
- !folio_is_longterm_pinnable(folio))) {
- if (!put_devmap_managed_folio_refs(folio, refs))
- folio_put_refs(folio, refs);
- return NULL;
- }
-
- /*
- * When pinning a large folio, use an exact count to track it.
- *
- * However, be sure to *also* increment the normal folio
- * refcount field at least once, so that the folio really
- * is pinned. That's why the refcount from the earlier
- * try_get_folio() is left intact.
- */
- if (folio_test_large(folio))
- atomic_add(refs, &folio->_pincount);
- else
- folio_ref_add(folio,
- refs * (GUP_PIN_COUNTING_BIAS - 1));
- /*
- * Adjust the pincount before re-checking the PTE for changes.
- * This is essentially a smp_mb() and is paired with a memory
- * barrier in folio_try_share_anon_rmap_*().
- */
- smp_mb__after_atomic();
-
- node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
-
- return folio;
-}
-
static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
{
if (flags & FOLL_PIN) {
@@ -203,58 +114,59 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
}
/**
- * try_grab_page() - elevate a page's refcount by a flag-dependent amount
- * @page: pointer to page to be grabbed
- * @flags: gup flags: these are the FOLL_* flag values.
+ * try_grab_folio() - add a folio's refcount by a flag-dependent amount
+ * @folio: pointer to folio to be grabbed
+ * @refs: the value to (effectively) add to the folio's refcount
+ * @flags: gup flags: these are the FOLL_* flag values
*
* This might not do anything at all, depending on the flags argument.
*
* "grab" names in this file mean, "look at flags to decide whether to use
- * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount.
+ * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.
*
* Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same
- * time. Cases: please see the try_grab_folio() documentation, with
- * "refs=1".
+ * time.
*
* Return: 0 for success, or if no action was required (if neither FOLL_PIN
* nor FOLL_GET was set, nothing is done). A negative error code for failure:
*
- * -ENOMEM FOLL_GET or FOLL_PIN was set, but the page could not
+ * -ENOMEM FOLL_GET or FOLL_PIN was set, but the folio could not
* be grabbed.
+ *
+ * It is called when we have a stable reference for the folio, typically in
+ * GUP slow path.
*/
-int __must_check try_grab_page(struct page *page, unsigned int flags)
+int __must_check try_grab_folio(struct folio *folio, int refs,
+ unsigned int flags)
{
- struct folio *folio = page_folio(page);
-
if (WARN_ON_ONCE(folio_ref_count(folio) <= 0))
return -ENOMEM;
- if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
+ if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(&folio->page)))
return -EREMOTEIO;
if (flags & FOLL_GET)
- folio_ref_inc(folio);
+ folio_ref_add(folio, refs);
else if (flags & FOLL_PIN) {
/*
* Don't take a pin on the zero page - it's not going anywhere
* and it is used in a *lot* of places.
*/
- if (is_zero_page(page))
+ if (is_zero_folio(folio))
return 0;
/*
- * Similar to try_grab_folio(): be sure to *also*
- * increment the normal page refcount field at least once,
+ * Increment the normal page refcount field at least once,
* so that the page really is pinned.
*/
if (folio_test_large(folio)) {
- folio_ref_add(folio, 1);
- atomic_add(1, &folio->_pincount);
+ folio_ref_add(folio, refs);
+ atomic_add(refs, &folio->_pincount);
} else {
- folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);
+ folio_ref_add(folio, refs * GUP_PIN_COUNTING_BIAS);
}
- node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1);
+ node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
}
return 0;
@@ -515,6 +427,102 @@ static int record_subpages(struct page *page, unsigned long sz,
return nr;
}
+
+/**
+ * try_grab_folio_fast() - Attempt to get or pin a folio in fast path.
+ * @page: pointer to page to be grabbed
+ * @refs: the value to (effectively) add to the folio's refcount
+ * @flags: gup flags: these are the FOLL_* flag values.
+ *
+ * "grab" names in this file mean, "look at flags to decide whether to use
+ * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.
+ *
+ * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the
+ * same time. (That's true throughout the get_user_pages*() and
+ * pin_user_pages*() APIs.) Cases:
+ *
+ * FOLL_GET: folio's refcount will be incremented by @refs.
+ *
+ * FOLL_PIN on large folios: folio's refcount will be incremented by
+ * @refs, and its pincount will be incremented by @refs.
+ *
+ * FOLL_PIN on single-page folios: folio's refcount will be incremented by
+ * @refs * GUP_PIN_COUNTING_BIAS.
+ *
+ * Return: The folio containing @page (with refcount appropriately
+ * incremented) for success, or NULL upon failure. If neither FOLL_GET
+ * nor FOLL_PIN was set, that's considered failure, and furthermore,
+ * a likely bug in the caller, so a warning is also emitted.
+ *
+ * It uses add ref unless zero to elevate the folio refcount and must be called
+ * in fast path only.
+ */
+static struct folio *try_grab_folio_fast(struct page *page, int refs,
+ unsigned int flags)
+{
+ struct folio *folio;
+
+ /* Raise warn if it is not called in fast GUP */
+ VM_WARN_ON_ONCE(!irqs_disabled());
+
+ if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0))
+ return NULL;
+
+ if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
+ return NULL;
+
+ if (flags & FOLL_GET)
+ return try_get_folio(page, refs);
+
+ /* FOLL_PIN is set */
+
+ /*
+ * Don't take a pin on the zero page - it's not going anywhere
+ * and it is used in a *lot* of places.
+ */
+ if (is_zero_page(page))
+ return page_folio(page);
+
+ folio = try_get_folio(page, refs);
+ if (!folio)
+ return NULL;
+
+ /*
+ * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
+ * right zone, so fail and let the caller fall back to the slow
+ * path.
+ */
+ if (unlikely((flags & FOLL_LONGTERM) &&
+ !folio_is_longterm_pinnable(folio))) {
+ if (!put_devmap_managed_folio_refs(folio, refs))
+ folio_put_refs(folio, refs);
+ return NULL;
+ }
+
+ /*
+ * When pinning a large folio, use an exact count to track it.
+ *
+ * However, be sure to *also* increment the normal folio
+ * refcount field at least once, so that the folio really
+ * is pinned. That's why the refcount from the earlier
+ * try_get_folio() is left intact.
+ */
+ if (folio_test_large(folio))
+ atomic_add(refs, &folio->_pincount);
+ else
+ folio_ref_add(folio,
+ refs * (GUP_PIN_COUNTING_BIAS - 1));
+ /*
+ * Adjust the pincount before re-checking the PTE for changes.
+ * This is essentially a smp_mb() and is paired with a memory
+ * barrier in folio_try_share_anon_rmap_*().
+ */
+ smp_mb__after_atomic();
+
+ node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
+
+ return folio;
+}
#endif /* CONFIG_ARCH_HAS_HUGEPD || CONFIG_HAVE_GUP_FAST */
#ifdef CONFIG_ARCH_HAS_HUGEPD
@@ -535,7 +543,7 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
*/
static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz,
unsigned long addr, unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
+ struct page **pages, int *nr, bool fast)
{
unsigned long pte_end;
struct page *page;
@@ -558,9 +566,15 @@ static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz
page = pte_page(pte);
refs = record_subpages(page, sz, addr, end, pages + *nr);
- folio = try_grab_folio(page, refs, flags);
- if (!folio)
- return 0;
+ if (fast) {
+ folio = try_grab_folio_fast(page, refs, flags);
+ if (!folio)
+ return 0;
+ } else {
+ folio = page_folio(page);
+ if (try_grab_folio(folio, refs, flags))
+ return 0;
+ }
if (unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) {
gup_put_folio(folio, refs, flags);
@@ -588,7 +602,7 @@ static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz
static int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
unsigned long addr, unsigned int pdshift,
unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
+ struct page **pages, int *nr, bool fast)
{
pte_t *ptep;
unsigned long sz = 1UL << hugepd_shift(hugepd);
@@ -598,7 +612,8 @@ static int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
ptep = hugepte_offset(hugepd, addr, pdshift);
do {
next = hugepte_addr_end(addr, end, sz);
- ret = gup_hugepte(vma, ptep, sz, addr, end, flags, pages, nr);
+ ret = gup_hugepte(vma, ptep, sz, addr, end, flags, pages, nr,
+ fast);
if (ret != 1)
return ret;
} while (ptep++, addr = next, addr != end);
@@ -625,7 +640,7 @@ static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
ptep = hugepte_offset(hugepd, addr, pdshift);
ptl = huge_pte_lock(h, vma->vm_mm, ptep);
ret = gup_hugepd(vma, hugepd, addr, pdshift, addr + PAGE_SIZE,
- flags, &page, &nr);
+ flags, &page, &nr, false);
spin_unlock(ptl);
if (ret == 1) {
@@ -642,7 +657,7 @@ static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
static inline int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
unsigned long addr, unsigned int pdshift,
unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
+ struct page **pages, int *nr, bool fast)
{
return 0;
}
@@ -729,7 +744,7 @@ static struct page *follow_huge_pud(struct vm_area_struct *vma,
gup_must_unshare(vma, flags, page))
return ERR_PTR(-EMLINK);
- ret = try_grab_page(page, flags);
+ ret = try_grab_folio(page_folio(page), 1, flags);
if (ret)
page = ERR_PTR(ret);
else
@@ -806,7 +821,7 @@ static struct page *follow_huge_pmd(struct vm_area_struct *vma,
VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
!PageAnonExclusive(page), page);
- ret = try_grab_page(page, flags);
+ ret = try_grab_folio(page_folio(page), 1, flags);
if (ret)
return ERR_PTR(ret);
@@ -968,8 +983,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
!PageAnonExclusive(page), page);
- /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */
- ret = try_grab_page(page, flags);
+ /* try_grab_folio() does nothing unless FOLL_GET or FOLL_PIN is set. */
+ ret = try_grab_folio(page_folio(page), 1, flags);
if (unlikely(ret)) {
page = ERR_PTR(ret);
goto out;
@@ -1233,7 +1248,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
goto unmap;
*page = pte_page(entry);
}
- ret = try_grab_page(*page, gup_flags);
+ ret = try_grab_folio(page_folio(*page), 1, gup_flags);
if (unlikely(ret))
goto unmap;
out:
@@ -1636,20 +1651,19 @@ next_page:
* pages.
*/
if (page_increm > 1) {
- struct folio *folio;
+ struct folio *folio = page_folio(page);
/*
* Since we already hold refcount on the
* large folio, this should never fail.
*/
- folio = try_grab_folio(page, page_increm - 1,
- foll_flags);
- if (WARN_ON_ONCE(!folio)) {
+ if (try_grab_folio(folio, page_increm - 1,
+ foll_flags)) {
/*
* Release the 1st page ref if the
* folio is problematic, fail hard.
*/
- gup_put_folio(page_folio(page), 1,
+ gup_put_folio(folio, 1,
foll_flags);
ret = -EFAULT;
goto out;
@@ -2797,7 +2811,6 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
* This code is based heavily on the PowerPC implementation by Nick Piggin.
*/
#ifdef CONFIG_HAVE_GUP_FAST
-
/*
* Used in the GUP-fast path to determine whether GUP is permitted to work on
* a specific folio.
@@ -2962,7 +2975,7 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
- folio = try_grab_folio(page, 1, flags);
+ folio = try_grab_folio_fast(page, 1, flags);
if (!folio)
goto pte_unmap;
@@ -3049,7 +3062,7 @@ static int gup_fast_devmap_leaf(unsigned long pfn, unsigned long addr,
break;
}
- folio = try_grab_folio(page, 1, flags);
+ folio = try_grab_folio_fast(page, 1, flags);
if (!folio) {
gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
break;
@@ -3138,7 +3151,7 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
page = pmd_page(orig);
refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr);
- folio = try_grab_folio(page, refs, flags);
+ folio = try_grab_folio_fast(page, refs, flags);
if (!folio)
return 0;
@@ -3182,7 +3195,7 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr,
page = pud_page(orig);
refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr);
- folio = try_grab_folio(page, refs, flags);
+ folio = try_grab_folio_fast(page, refs, flags);
if (!folio)
return 0;
@@ -3222,7 +3235,7 @@ static int gup_fast_pgd_leaf(pgd_t orig, pgd_t *pgdp, unsigned long addr,
page = pgd_page(orig);
refs = record_subpages(page, PGDIR_SIZE, addr, end, pages + *nr);
- folio = try_grab_folio(page, refs, flags);
+ folio = try_grab_folio_fast(page, refs, flags);
if (!folio)
return 0;
@@ -3276,7 +3289,8 @@ static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
* pmd format and THP pmd format
*/
if (gup_hugepd(NULL, __hugepd(pmd_val(pmd)), addr,
- PMD_SHIFT, next, flags, pages, nr) != 1)
+ PMD_SHIFT, next, flags, pages, nr,
+ true) != 1)
return 0;
} else if (!gup_fast_pte_range(pmd, pmdp, addr, next, flags,
pages, nr))
@@ -3306,7 +3320,8 @@ static int gup_fast_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
return 0;
} else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
if (gup_hugepd(NULL, __hugepd(pud_val(pud)), addr,
- PUD_SHIFT, next, flags, pages, nr) != 1)
+ PUD_SHIFT, next, flags, pages, nr,
+ true) != 1)
return 0;
} else if (!gup_fast_pmd_range(pudp, pud, addr, next, flags,
pages, nr))
@@ -3333,7 +3348,8 @@ static int gup_fast_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
BUILD_BUG_ON(p4d_leaf(p4d));
if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
if (gup_hugepd(NULL, __hugepd(p4d_val(p4d)), addr,
- P4D_SHIFT, next, flags, pages, nr) != 1)
+ P4D_SHIFT, next, flags, pages, nr,
+ true) != 1)
return 0;
} else if (!gup_fast_pud_range(p4dp, p4d, addr, next, flags,
pages, nr))
@@ -3362,7 +3378,8 @@ static void gup_fast_pgd_range(unsigned long addr, unsigned long end,
return;
} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
if (gup_hugepd(NULL, __hugepd(pgd_val(pgd)), addr,
- PGDIR_SHIFT, next, flags, pages, nr) != 1)
+ PGDIR_SHIFT, next, flags, pages, nr,
+ true) != 1)
return;
} else if (!gup_fast_p4d_range(pgdp, pgd, addr, next, flags,
pages, nr))
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index db7946a0a28c..2120f7478e55 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1331,7 +1331,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
if (!*pgmap)
return ERR_PTR(-EFAULT);
page = pfn_to_page(pfn);
- ret = try_grab_page(page, flags);
+ ret = try_grab_folio(page_folio(page), 1, flags);
if (ret)
page = ERR_PTR(ret);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f35abff8be60..43e1af868cfd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1625,13 +1625,10 @@ static inline void destroy_compound_gigantic_folio(struct folio *folio,
* folio appears as just a compound page. Otherwise, wait until after
* allocating vmemmap to clear the flag.
*
- * A reference is held on the folio, except in the case of demote.
- *
* Must be called with hugetlb lock held.
*/
-static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
- bool adjust_surplus,
- bool demote)
+static void remove_hugetlb_folio(struct hstate *h, struct folio *folio,
+ bool adjust_surplus)
{
int nid = folio_nid(folio);
@@ -1645,6 +1642,7 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
list_del(&folio->lru);
if (folio_test_hugetlb_freed(folio)) {
+ folio_clear_hugetlb_freed(folio);
h->free_huge_pages--;
h->free_huge_pages_node[nid]--;
}
@@ -1661,33 +1659,13 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
if (!folio_test_hugetlb_vmemmap_optimized(folio))
__folio_clear_hugetlb(folio);
- /*
- * In the case of demote we do not ref count the page as it will soon
- * be turned into a page of smaller size.
- */
- if (!demote)
- folio_ref_unfreeze(folio, 1);
-
h->nr_huge_pages--;
h->nr_huge_pages_node[nid]--;
}
-static void remove_hugetlb_folio(struct hstate *h, struct folio *folio,
- bool adjust_surplus)
-{
- __remove_hugetlb_folio(h, folio, adjust_surplus, false);
-}
-
-static void remove_hugetlb_folio_for_demote(struct hstate *h, struct folio *folio,
- bool adjust_surplus)
-{
- __remove_hugetlb_folio(h, folio, adjust_surplus, true);
-}
-
static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
bool adjust_surplus)
{
- int zeroed;
int nid = folio_nid(folio);
VM_BUG_ON_FOLIO(!folio_test_hugetlb_vmemmap_optimized(folio), folio);
@@ -1711,21 +1689,6 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
*/
folio_set_hugetlb_vmemmap_optimized(folio);
- /*
- * This folio is about to be managed by the hugetlb allocator and
- * should have no users. Drop our reference, and check for others
- * just in case.
- */
- zeroed = folio_put_testzero(folio);
- if (unlikely(!zeroed))
- /*
- * It is VERY unlikely soneone else has taken a ref
- * on the folio. In this case, we simply return as
- * free_huge_folio() will be called when this other ref
- * is dropped.
- */
- return;
-
arch_clear_hugetlb_flags(folio);
enqueue_hugetlb_folio(h, folio);
}
@@ -1763,13 +1726,6 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
}
/*
- * Move PageHWPoison flag from head page to the raw error pages,
- * which makes any healthy subpages reusable.
- */
- if (unlikely(folio_test_hwpoison(folio)))
- folio_clear_hugetlb_hwpoison(folio);
-
- /*
* If vmemmap pages were allocated above, then we need to clear the
* hugetlb flag under the hugetlb lock.
*/
@@ -1780,6 +1736,15 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
}
/*
+ * Move PageHWPoison flag from head page to the raw error pages,
+ * which makes any healthy subpages reusable.
+ */
+ if (unlikely(folio_test_hwpoison(folio)))
+ folio_clear_hugetlb_hwpoison(folio);
+
+ folio_ref_unfreeze(folio, 1);
+
+ /*
* Non-gigantic pages demoted from CMA allocated gigantic pages
* need to be given back to CMA in free_gigantic_folio.
*/
@@ -2197,6 +2162,9 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
nid = numa_mem_id();
retry:
folio = __folio_alloc(gfp_mask, order, nid, nmask);
+ /* Ensure hugetlb folio won't have large_rmappable flag set. */
+ if (folio)
+ folio_clear_large_rmappable(folio);
if (folio && !folio_ref_freeze(folio, 1)) {
folio_put(folio);
@@ -3079,11 +3047,8 @@ retry:
free_new:
spin_unlock_irq(&hugetlb_lock);
- if (new_folio) {
- /* Folio has a zero ref count, but needs a ref to be freed */
- folio_ref_unfreeze(new_folio, 1);
+ if (new_folio)
update_and_free_hugetlb_folio(h, new_folio, false);
- }
return ret;
}
@@ -3938,7 +3903,7 @@ static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio)
target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order);
- remove_hugetlb_folio_for_demote(h, folio, false);
+ remove_hugetlb_folio(h, folio, false);
spin_unlock_irq(&hugetlb_lock);
/*
@@ -3952,7 +3917,6 @@ static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio)
if (rc) {
/* Allocation of vmemmmap failed, we can not demote folio */
spin_lock_irq(&hugetlb_lock);
- folio_ref_unfreeze(folio, 1);
add_hugetlb_folio(h, folio, false);
return rc;
}
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index b9a55322e52c..8193906515c6 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -446,6 +446,8 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h,
unsigned long vmemmap_reuse;
VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio);
+
if (!folio_test_hugetlb_vmemmap_optimized(folio))
return 0;
@@ -481,6 +483,9 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h,
*/
int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio)
{
+ /* avoid writes from page_ref_add_unless() while unfolding vmemmap */
+ synchronize_rcu();
+
return __hugetlb_vmemmap_restore_folio(h, folio, 0);
}
@@ -505,6 +510,9 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h,
long restored = 0;
long ret = 0;
+ /* avoid writes from page_ref_add_unless() while unfolding vmemmap */
+ synchronize_rcu();
+
list_for_each_entry_safe(folio, t_folio, folio_list, lru) {
if (folio_test_hugetlb_vmemmap_optimized(folio)) {
ret = __hugetlb_vmemmap_restore_folio(h, folio,
@@ -550,6 +558,8 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
unsigned long vmemmap_reuse;
VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio);
+
if (!vmemmap_should_optimize_folio(h, folio))
return ret;
@@ -601,6 +611,9 @@ void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio)
{
LIST_HEAD(vmemmap_pages);
+ /* avoid writes from page_ref_add_unless() while folding vmemmap */
+ synchronize_rcu();
+
__hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, 0);
free_vmemmap_page_list(&vmemmap_pages);
}
@@ -644,6 +657,9 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
flush_tlb_all();
+ /* avoid writes from page_ref_add_unless() while folding vmemmap */
+ synchronize_rcu();
+
list_for_each_entry(folio, folio_list, lru) {
int ret;
diff --git a/mm/internal.h b/mm/internal.h
index 6902b7dd8509..cc2c5e07fad3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1182,8 +1182,8 @@ int migrate_device_coherent_page(struct page *page);
/*
* mm/gup.c
*/
-struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
-int __must_check try_grab_page(struct page *page, unsigned int flags);
+int __must_check try_grab_folio(struct folio *folio, int refs,
+ unsigned int flags);
/*
* mm/huge_memory.c
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 71fe2a95b8bd..8f2f1bb18c9c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -7823,17 +7823,6 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)
/* Transfer the charge and the css ref */
commit_charge(new, memcg);
- /*
- * If the old folio is a large folio and is in the split queue, it needs
- * to be removed from the split queue now, in case getting an incorrect
- * split queue in destroy_large_folio() after the memcg of the old folio
- * is cleared.
- *
- * In addition, the old folio is about to be freed after migration, so
- * removing from the split queue a bit earlier seems reasonable.
- */
- if (folio_test_large(old) && folio_test_large_rmappable(old))
- folio_undo_large_rmappable(old);
old->memcg_data = 0;
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 20cb9f5f7446..a8c6f466e33a 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -415,6 +415,15 @@ int folio_migrate_mapping(struct address_space *mapping,
if (folio_ref_count(folio) != expected_count)
return -EAGAIN;
+ /* Take off deferred split queue while frozen and memcg set */
+ if (folio_test_large(folio) &&
+ folio_test_large_rmappable(folio)) {
+ if (!folio_ref_freeze(folio, expected_count))
+ return -EAGAIN;
+ folio_undo_large_rmappable(folio);
+ folio_ref_unfreeze(folio, expected_count);
+ }
+
/* No turning back from here */
newfolio->index = folio->index;
newfolio->mapping = folio->mapping;
@@ -433,6 +442,10 @@ int folio_migrate_mapping(struct address_space *mapping,
return -EAGAIN;
}
+ /* Take off deferred split queue while frozen and memcg set */
+ if (folio_test_large(folio) && folio_test_large_rmappable(folio))
+ folio_undo_large_rmappable(folio);
+
/*
* Now we know that no one else is looking at the folio:
* no turning back from here.
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 12c9297ed4a7..8a1c92090129 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -415,13 +415,20 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc)
else
bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;
- if (bg_thresh >= thresh)
- bg_thresh = thresh / 2;
tsk = current;
if (rt_task(tsk)) {
bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
}
+ /*
+ * Dirty throttling logic assumes the limits in page units fit into
+ * 32-bits. This gives 16TB dirty limits max which is hopefully enough.
+ */
+ if (thresh > UINT_MAX)
+ thresh = UINT_MAX;
+ /* This makes sure bg_thresh is within 32-bits as well */
+ if (bg_thresh >= thresh)
+ bg_thresh = thresh / 2;
dtc->thresh = thresh;
dtc->bg_thresh = bg_thresh;
@@ -471,7 +478,11 @@ static unsigned long node_dirty_limit(struct pglist_data *pgdat)
if (rt_task(tsk))
dirty += dirty / 4;
- return dirty;
+ /*
+ * Dirty throttling logic assumes the limits in page units fit into
+ * 32-bits. This gives 16TB dirty limits max which is hopefully enough.
+ */
+ return min_t(unsigned long, dirty, UINT_MAX);
}
/**
@@ -508,10 +519,17 @@ static int dirty_background_bytes_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
+ unsigned long old_bytes = dirty_background_bytes;
ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
- if (ret == 0 && write)
+ if (ret == 0 && write) {
+ if (DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE) >
+ UINT_MAX) {
+ dirty_background_bytes = old_bytes;
+ return -ERANGE;
+ }
dirty_background_ratio = 0;
+ }
return ret;
}
@@ -537,6 +555,10 @@ static int dirty_bytes_handler(struct ctl_table *table, int write,
ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
+ if (DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) > UINT_MAX) {
+ vm_dirty_bytes = old_bytes;
+ return -ERANGE;
+ }
writeback_set_ratelimit();
vm_dirty_ratio = 0;
}
@@ -1660,7 +1682,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
*/
dtc->wb_thresh = __wb_calc_thresh(dtc, dtc->thresh);
dtc->wb_bg_thresh = dtc->thresh ?
- div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
+ div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
/*
* In order to avoid the stacked BDI deadlock we need
diff --git a/mm/readahead.c b/mm/readahead.c
index c1b23989d9ca..817b2a352d78 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -503,11 +503,11 @@ void page_cache_ra_order(struct readahead_control *ractl,
limit = min(limit, index + ra->size - 1);
- if (new_order < MAX_PAGECACHE_ORDER) {
+ if (new_order < MAX_PAGECACHE_ORDER)
new_order += 2;
- new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order);
- new_order = min_t(unsigned int, new_order, ilog2(ra->size));
- }
+
+ new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order);
+ new_order = min_t(unsigned int, new_order, ilog2(ra->size));
/* See comment in page_cache_ra_unbounded() */
nofs = memalloc_nofs_save();
diff --git a/mm/shmem.c b/mm/shmem.c
index a8b181a63402..c1befe046c7e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -541,8 +541,9 @@ static bool shmem_confirm_swap(struct address_space *mapping,
static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;
-bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
- struct mm_struct *mm, unsigned long vm_flags)
+static bool __shmem_is_huge(struct inode *inode, pgoff_t index,
+ bool shmem_huge_force, struct mm_struct *mm,
+ unsigned long vm_flags)
{
loff_t i_size;
@@ -573,6 +574,16 @@ bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
}
}
+bool shmem_is_huge(struct inode *inode, pgoff_t index,
+ bool shmem_huge_force, struct mm_struct *mm,
+ unsigned long vm_flags)
+{
+ if (HPAGE_PMD_ORDER > MAX_PAGECACHE_ORDER)
+ return false;
+
+ return __shmem_is_huge(inode, index, shmem_huge_force, mm, vm_flags);
+}
+
#if defined(CONFIG_SYSFS)
static int shmem_parse_huge(const char *str)
{
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d0cbdd7c1e5b..e34ea860153f 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2543,7 +2543,15 @@ static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
static struct xarray *
addr_to_vb_xa(unsigned long addr)
{
- int index = (addr / VMAP_BLOCK_SIZE) % num_possible_cpus();
+ int index = (addr / VMAP_BLOCK_SIZE) % nr_cpu_ids;
+
+ /*
+ * Please note, nr_cpu_ids points on a highest set
+ * possible bit, i.e. we never invoke cpumask_next()
+ * if an index points on it which is nr_cpu_ids - 1.
+ */
+ if (!cpu_possible(index))
+ index = cpumask_next(index, cpu_possible_mask);
return &per_cpu(vmap_block_queue, index).vmap_blocks;
}
diff --git a/mm/workingset.c b/mm/workingset.c
index c22adb93622a..a2b28e356e68 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -412,10 +412,12 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg)
* @file: whether the corresponding folio is from the file lru.
* @workingset: where the workingset value unpacked from shadow should
* be stored.
+ * @flush: whether to flush cgroup rstat.
*
* Return: true if the shadow is for a recently evicted folio; false otherwise.
*/
-bool workingset_test_recent(void *shadow, bool file, bool *workingset)
+bool workingset_test_recent(void *shadow, bool file, bool *workingset,
+ bool flush)
{
struct mem_cgroup *eviction_memcg;
struct lruvec *eviction_lruvec;
@@ -467,10 +469,16 @@ bool workingset_test_recent(void *shadow, bool file, bool *workingset)
/*
* Flush stats (and potentially sleep) outside the RCU read section.
+ *
+ * Note that workingset_test_recent() itself might be called in RCU read
+ * section (for e.g, in cachestat) - these callers need to skip flushing
+ * stats (via the flush argument).
+ *
* XXX: With per-memcg flushing and thresholding, is ratelimiting
* still needed here?
*/
- mem_cgroup_flush_stats_ratelimited(eviction_memcg);
+ if (flush)
+ mem_cgroup_flush_stats_ratelimited(eviction_memcg);
eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
refault = atomic_long_read(&eviction_lruvec->nonresident_age);
@@ -558,7 +566,7 @@ void workingset_refault(struct folio *folio, void *shadow)
mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
- if (!workingset_test_recent(shadow, file, &workingset))
+ if (!workingset_test_recent(shadow, file, &workingset, true))
return;
folio_set_active(folio);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 0c76dcde5361..080053a85b4d 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -899,8 +899,8 @@ static int hci_conn_hash_alloc_unset(struct hci_dev *hdev)
U16_MAX, GFP_ATOMIC);
}
-struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
- u8 role, u16 handle)
+static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
+ u8 role, u16 handle)
{
struct hci_conn *conn;
@@ -1041,7 +1041,16 @@ struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type,
if (unlikely(handle < 0))
return ERR_PTR(-ECONNREFUSED);
- return hci_conn_add(hdev, type, dst, role, handle);
+ return __hci_conn_add(hdev, type, dst, role, handle);
+}
+
+struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
+ u8 role, u16 handle)
+{
+ if (handle > HCI_CONN_HANDLE_MAX)
+ return ERR_PTR(-EINVAL);
+
+ return __hci_conn_add(hdev, type, dst, role, handle);
}
static void hci_conn_cleanup_child(struct hci_conn *conn, u8 reason)
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index dd3b0f501018..c644b30977bd 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -63,50 +63,6 @@ DEFINE_MUTEX(hci_cb_list_lock);
/* HCI ID Numbering */
static DEFINE_IDA(hci_index_ida);
-static int hci_scan_req(struct hci_request *req, unsigned long opt)
-{
- __u8 scan = opt;
-
- BT_DBG("%s %x", req->hdev->name, scan);
-
- /* Inquiry and Page scans */
- hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
- return 0;
-}
-
-static int hci_auth_req(struct hci_request *req, unsigned long opt)
-{
- __u8 auth = opt;
-
- BT_DBG("%s %x", req->hdev->name, auth);
-
- /* Authentication */
- hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth);
- return 0;
-}
-
-static int hci_encrypt_req(struct hci_request *req, unsigned long opt)
-{
- __u8 encrypt = opt;
-
- BT_DBG("%s %x", req->hdev->name, encrypt);
-
- /* Encryption */
- hci_req_add(req, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt);
- return 0;
-}
-
-static int hci_linkpol_req(struct hci_request *req, unsigned long opt)
-{
- __le16 policy = cpu_to_le16(opt);
-
- BT_DBG("%s %x", req->hdev->name, policy);
-
- /* Default link policy */
- hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy);
- return 0;
-}
-
/* Get HCI device by index.
* Device is held on return. */
struct hci_dev *hci_dev_get(int index)
@@ -735,6 +691,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
{
struct hci_dev *hdev;
struct hci_dev_req dr;
+ __le16 policy;
int err = 0;
if (copy_from_user(&dr, arg, sizeof(dr)))
@@ -761,8 +718,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
switch (cmd) {
case HCISETAUTH:
- err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
- HCI_INIT_TIMEOUT, NULL);
+ err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_AUTH_ENABLE,
+ 1, &dr.dev_opt, HCI_CMD_TIMEOUT);
break;
case HCISETENCRYPT:
@@ -773,19 +730,23 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
if (!test_bit(HCI_AUTH, &hdev->flags)) {
/* Auth must be enabled first */
- err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
- HCI_INIT_TIMEOUT, NULL);
+ err = __hci_cmd_sync_status(hdev,
+ HCI_OP_WRITE_AUTH_ENABLE,
+ 1, &dr.dev_opt,
+ HCI_CMD_TIMEOUT);
if (err)
break;
}
- err = hci_req_sync(hdev, hci_encrypt_req, dr.dev_opt,
- HCI_INIT_TIMEOUT, NULL);
+ err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_ENCRYPT_MODE,
+ 1, &dr.dev_opt,
+ HCI_CMD_TIMEOUT);
break;
case HCISETSCAN:
- err = hci_req_sync(hdev, hci_scan_req, dr.dev_opt,
- HCI_INIT_TIMEOUT, NULL);
+ err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_SCAN_ENABLE,
+ 1, &dr.dev_opt,
+ HCI_CMD_TIMEOUT);
/* Ensure that the connectable and discoverable states
* get correctly modified as this was a non-mgmt change.
@@ -795,8 +756,11 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
break;
case HCISETLINKPOL:
- err = hci_req_sync(hdev, hci_linkpol_req, dr.dev_opt,
- HCI_INIT_TIMEOUT, NULL);
+ policy = cpu_to_le16(dr.dev_opt);
+
+ err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_DEF_LINK_POLICY,
+ 2, &policy,
+ HCI_CMD_TIMEOUT);
break;
case HCISETLINKMODE:
@@ -2751,7 +2715,11 @@ void hci_unregister_dev(struct hci_dev *hdev)
list_del(&hdev->list);
write_unlock(&hci_dev_list_lock);
+ cancel_work_sync(&hdev->rx_work);
+ cancel_work_sync(&hdev->cmd_work);
+ cancel_work_sync(&hdev->tx_work);
cancel_work_sync(&hdev->power_on);
+ cancel_work_sync(&hdev->error_reset);
hci_cmd_sync_clear(hdev);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index a487f9df8145..93f7ac905cec 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -6311,6 +6311,13 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data,
evt_type = __le16_to_cpu(info->type) & LE_EXT_ADV_EVT_TYPE_MASK;
legacy_evt_type = ext_evt_type_to_legacy(hdev, evt_type);
+
+ if (test_bit(HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY,
+ &hdev->quirks)) {
+ info->primary_phy &= 0x1f;
+ info->secondary_phy &= 0x1f;
+ }
+
if (legacy_evt_type != LE_ADV_INVALID) {
process_adv_report(hdev, legacy_evt_type, &info->bdaddr,
info->bdaddr_type, NULL, 0,
@@ -6660,6 +6667,7 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
struct bt_iso_qos *qos;
bool pending = false;
u16 handle = __le16_to_cpu(ev->handle);
+ u32 c_sdu_interval, p_sdu_interval;
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
@@ -6684,12 +6692,25 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
pending = test_and_clear_bit(HCI_CONN_CREATE_CIS, &conn->flags);
- /* Convert ISO Interval (1.25 ms slots) to SDU Interval (us) */
- qos->ucast.in.interval = le16_to_cpu(ev->interval) * 1250;
- qos->ucast.out.interval = qos->ucast.in.interval;
+ /* BLUETOOTH CORE SPECIFICATION Version 5.4 | Vol 6, Part G
+ * page 3075:
+ * Transport_Latency_C_To_P = CIG_Sync_Delay + (FT_C_To_P) ×
+ * ISO_Interval + SDU_Interval_C_To_P
+ * ...
+ * SDU_Interval = (CIG_Sync_Delay + (FT) x ISO_Interval) -
+ * Transport_Latency
+ */
+ c_sdu_interval = (get_unaligned_le24(ev->cig_sync_delay) +
+ (ev->c_ft * le16_to_cpu(ev->interval) * 1250)) -
+ get_unaligned_le24(ev->c_latency);
+ p_sdu_interval = (get_unaligned_le24(ev->cig_sync_delay) +
+ (ev->p_ft * le16_to_cpu(ev->interval) * 1250)) -
+ get_unaligned_le24(ev->p_latency);
switch (conn->role) {
case HCI_ROLE_SLAVE:
+ qos->ucast.in.interval = c_sdu_interval;
+ qos->ucast.out.interval = p_sdu_interval;
/* Convert Transport Latency (us) to Latency (msec) */
qos->ucast.in.latency =
DIV_ROUND_CLOSEST(get_unaligned_le24(ev->c_latency),
@@ -6703,6 +6724,8 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
qos->ucast.out.phy = ev->p_phy;
break;
case HCI_ROLE_MASTER:
+ qos->ucast.in.interval = p_sdu_interval;
+ qos->ucast.out.interval = c_sdu_interval;
/* Convert Transport Latency (us) to Latency (msec) */
qos->ucast.out.latency =
DIV_ROUND_CLOSEST(get_unaligned_le24(ev->c_latency),
@@ -6893,6 +6916,10 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
bis = hci_conn_hash_lookup_handle(hdev, handle);
if (!bis) {
+ if (handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_dbg(hdev, "ignore too large handle %u", handle);
+ continue;
+ }
bis = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY,
HCI_ROLE_SLAVE, handle);
if (IS_ERR(bis))
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index a8a7d2b36870..eea34e6a236f 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -280,6 +280,19 @@ int __hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen,
}
EXPORT_SYMBOL(__hci_cmd_sync_status);
+int hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen,
+ const void *param, u32 timeout)
+{
+ int err;
+
+ hci_req_sync_lock(hdev);
+ err = __hci_cmd_sync_status(hdev, opcode, plen, param, timeout);
+ hci_req_sync_unlock(hdev);
+
+ return err;
+}
+EXPORT_SYMBOL(hci_cmd_sync_status);
+
static void hci_cmd_sync_work(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_sync_work);
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index cc055b952ce6..398fb81f7a13 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -1356,8 +1356,7 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
lock_sock(sk);
switch (sk->sk_state) {
case BT_CONNECT2:
- if (pi->conn->hcon &&
- test_bit(HCI_CONN_PA_SYNC, &pi->conn->hcon->flags)) {
+ if (test_bit(BT_SK_PA_SYNC, &pi->flags)) {
iso_conn_big_sync(sk);
sk->sk_state = BT_LISTEN;
} else {
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index aed025734d04..c3c26bbb5dda 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -6761,6 +6761,8 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
BT_DBG("chan %p, len %d", chan, skb->len);
+ l2cap_chan_lock(chan);
+
if (chan->state != BT_BOUND && chan->state != BT_CONNECTED)
goto drop;
@@ -6777,6 +6779,7 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
}
drop:
+ l2cap_chan_unlock(chan);
l2cap_chan_put(chan);
free_skb:
kfree_skb(skb);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 6db60946c627..ba437c6f6ee5 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1239,6 +1239,10 @@ static void l2cap_sock_kill(struct sock *sk)
BT_DBG("sk %p state %s", sk, state_to_string(sk->sk_state));
+ /* Sock is dead, so set chan data to NULL, avoid other task use invalid
+ * sock pointer.
+ */
+ l2cap_pi(sk)->chan->data = NULL;
/* Kill poor orphan */
l2cap_chan_put(l2cap_pi(sk)->chan);
@@ -1481,12 +1485,16 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan)
static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
{
- struct sock *sk = chan->data;
- struct l2cap_pinfo *pi = l2cap_pi(sk);
+ struct sock *sk;
+ struct l2cap_pinfo *pi;
int err;
- lock_sock(sk);
+ sk = chan->data;
+ if (!sk)
+ return -ENXIO;
+ pi = l2cap_pi(sk);
+ lock_sock(sk);
if (chan->mode == L2CAP_MODE_ERTM && !list_empty(&pi->rx_busy)) {
err = -ENOMEM;
goto done;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index a6d7f790cdda..6d7a442ceb89 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -127,9 +127,10 @@ struct xdp_test_data {
#define TEST_XDP_FRAME_SIZE (PAGE_SIZE - sizeof(struct xdp_page_head))
#define TEST_XDP_MAX_BATCH 256
-static void xdp_test_run_init_page(struct page *page, void *arg)
+static void xdp_test_run_init_page(netmem_ref netmem, void *arg)
{
- struct xdp_page_head *head = phys_to_virt(page_to_phys(page));
+ struct xdp_page_head *head =
+ phys_to_virt(page_to_phys(netmem_to_page(netmem)));
struct xdp_buff *new_ctx, *orig_ctx;
u32 headroom = XDP_PACKET_HEADROOM;
struct xdp_test_data *xdp = arg;
@@ -992,7 +993,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
void *data;
int ret;
- if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size)
+ if ((kattr->test.flags & ~BPF_F_TEST_SKB_CHECKSUM_COMPLETE) ||
+ kattr->test.cpu || kattr->test.batch_size)
return -EINVAL;
data = bpf_test_init(kattr, kattr->test.data_size_in,
@@ -1040,6 +1042,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
__skb_put(skb, size);
+
if (ctx && ctx->ifindex > 1) {
dev = dev_get_by_index(net, ctx->ifindex);
if (!dev) {
@@ -1075,9 +1078,19 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
__skb_push(skb, hh_len);
if (is_direct_pkt_access)
bpf_compute_data_pointers(skb);
+
ret = convert___skb_to_skb(skb, ctx);
if (ret)
goto out;
+
+ if (kattr->test.flags & BPF_F_TEST_SKB_CHECKSUM_COMPLETE) {
+ const int off = skb_network_offset(skb);
+ int len = skb->len - off;
+
+ skb->csum = skb_checksum(skb, off, len, 0);
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ }
+
ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false);
if (ret)
goto out;
@@ -1092,6 +1105,20 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
}
memset(__skb_push(skb, hh_len), 0, hh_len);
}
+
+ if (kattr->test.flags & BPF_F_TEST_SKB_CHECKSUM_COMPLETE) {
+ const int off = skb_network_offset(skb);
+ int len = skb->len - off;
+ __wsum csum;
+
+ csum = skb_checksum(skb, off, len, 0);
+
+ if (csum_fold(skb->csum) != csum_fold(csum)) {
+ ret = -EBADMSG;
+ goto out;
+ }
+ }
+
convert_skb_to___skb(skb, ctx);
size = skb->len;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index e614cfd8e14a..a40f733b37d7 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -416,15 +416,23 @@ static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
- struct page *page = skb_frag_page(frag);
- u8 *vaddr = kmap(page);
+ u32 p_off, p_len, copied;
+ struct page *p;
+ u8 *vaddr;
if (copy > len)
copy = len;
- n = INDIRECT_CALL_1(cb, simple_copy_to_iter,
- vaddr + skb_frag_off(frag) + offset - start,
- copy, data, to);
- kunmap(page);
+
+ n = 0;
+ skb_frag_foreach_page(frag,
+ skb_frag_off(frag) + offset - start,
+ copy, p, p_off, p_len, copied) {
+ vaddr = kmap_local_page(p);
+ n += INDIRECT_CALL_1(cb, simple_copy_to_iter,
+ vaddr + p_off, p_len, data, to);
+ kunmap_local(vaddr);
+ }
+
offset += n;
if (n != copy)
goto short_copy;
@@ -610,16 +618,10 @@ fault:
}
EXPORT_SYMBOL(skb_copy_datagram_from_iter);
-int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
- struct sk_buff *skb, struct iov_iter *from,
- size_t length)
+int zerocopy_fill_skb_from_iter(struct sk_buff *skb,
+ struct iov_iter *from, size_t length)
{
- int frag;
-
- if (msg && msg->msg_ubuf && msg->sg_from_iter)
- return msg->sg_from_iter(sk, skb, from, length);
-
- frag = skb_shinfo(skb)->nr_frags;
+ int frag = skb_shinfo(skb)->nr_frags;
while (length && iov_iter_count(from)) {
struct page *head, *last_head = NULL;
@@ -627,7 +629,6 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
int refs, order, n = 0;
size_t start;
ssize_t copied;
- unsigned long truesize;
if (frag == MAX_SKB_FRAGS)
return -EMSGSIZE;
@@ -639,17 +640,9 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
length -= copied;
- truesize = PAGE_ALIGN(copied + start);
skb->data_len += copied;
skb->len += copied;
- skb->truesize += truesize;
- if (sk && sk->sk_type == SOCK_STREAM) {
- sk_wmem_queued_add(sk, truesize);
- if (!skb_zcopy_pure(skb))
- sk_mem_charge(sk, truesize);
- } else {
- refcount_add(truesize, &skb->sk->sk_wmem_alloc);
- }
+ skb->truesize += PAGE_ALIGN(copied + start);
head = compound_head(pages[n]);
order = compound_order(head);
@@ -692,6 +685,30 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
}
return 0;
}
+
+int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb, struct iov_iter *from,
+ size_t length)
+{
+ unsigned long orig_size = skb->truesize;
+ unsigned long truesize;
+ int ret;
+
+ if (msg && msg->msg_ubuf && msg->sg_from_iter)
+ ret = msg->sg_from_iter(skb, from, length);
+ else
+ ret = zerocopy_fill_skb_from_iter(skb, from, length);
+
+ truesize = skb->truesize - orig_size;
+ if (sk && sk->sk_type == SOCK_STREAM) {
+ sk_wmem_queued_add(sk, truesize);
+ if (!skb_zcopy_pure(skb))
+ sk_mem_charge(sk, truesize);
+ } else {
+ refcount_add(truesize, &skb->sk->sk_wmem_alloc);
+ }
+ return ret;
+}
EXPORT_SYMBOL(__zerocopy_sg_from_iter);
/**
diff --git a/net/core/dev.c b/net/core/dev.c
index 0a23d7da7fbc..73e5af6943c3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5126,11 +5126,14 @@ static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb)
{
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
+
if (xdp_prog) {
struct xdp_buff xdp;
u32 act;
int err;
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
act = netif_receive_generic_xdp(pskb, &xdp, xdp_prog);
if (act != XDP_PASS) {
switch (act) {
@@ -5144,11 +5147,13 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb)
generic_xdp_tx(*pskb, xdp_prog);
break;
}
+ bpf_net_ctx_clear(bpf_net_ctx);
return XDP_DROP;
}
}
return XDP_PASS;
out_redir:
+ bpf_net_ctx_clear(bpf_net_ctx);
kfree_skb_reason(*pskb, SKB_DROP_REASON_XDP);
return XDP_DROP;
}
@@ -10336,6 +10341,10 @@ int register_netdevice(struct net_device *dev)
if (ret)
return ret;
+ /* rss ctx ID 0 is reserved for the default context, start from 1 */
+ xa_init_flags(&dev->ethtool->rss_ctx, XA_FLAGS_ALLOC1);
+ mutex_init(&dev->ethtool->rss_lock);
+
spin_lock_init(&dev->addr_list_lock);
netdev_set_addr_lockdep_class(dev);
@@ -11116,6 +11125,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->real_num_rx_queues = rxqs;
if (netif_alloc_rx_queues(dev))
goto free_all;
+ dev->ethtool = kzalloc(sizeof(*dev->ethtool), GFP_KERNEL_ACCOUNT);
+ if (!dev->ethtool)
+ goto free_all;
strcpy(dev->name, name);
dev->name_assign_type = name_assign_type;
@@ -11166,6 +11178,7 @@ void free_netdev(struct net_device *dev)
return;
}
+ kfree(dev->ethtool);
netif_free_tx_queues(dev);
netif_free_rx_queues(dev);
@@ -11231,6 +11244,34 @@ void synchronize_net(void)
}
EXPORT_SYMBOL(synchronize_net);
+static void netdev_rss_contexts_free(struct net_device *dev)
+{
+ struct ethtool_rxfh_context *ctx;
+ unsigned long context;
+
+ mutex_lock(&dev->ethtool->rss_lock);
+ xa_for_each(&dev->ethtool->rss_ctx, context, ctx) {
+ struct ethtool_rxfh_param rxfh;
+
+ rxfh.indir = ethtool_rxfh_context_indir(ctx);
+ rxfh.key = ethtool_rxfh_context_key(ctx);
+ rxfh.hfunc = ctx->hfunc;
+ rxfh.input_xfrm = ctx->input_xfrm;
+ rxfh.rss_context = context;
+ rxfh.rss_delete = true;
+
+ xa_erase(&dev->ethtool->rss_ctx, context);
+ if (dev->ethtool_ops->create_rxfh_context)
+ dev->ethtool_ops->remove_rxfh_context(dev, ctx,
+ context, NULL);
+ else
+ dev->ethtool_ops->set_rxfh(dev, &rxfh, NULL);
+ kfree(ctx);
+ }
+ xa_destroy(&dev->ethtool->rss_ctx);
+ mutex_unlock(&dev->ethtool->rss_lock);
+}
+
/**
* unregister_netdevice_queue - remove device from the kernel
* @dev: device
@@ -11334,11 +11375,15 @@ void unregister_netdevice_many_notify(struct list_head *head,
netdev_name_node_alt_flush(dev);
netdev_name_node_free(dev->name_node);
+ netdev_rss_contexts_free(dev);
+
call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
+ mutex_destroy(&dev->ethtool->rss_lock);
+
if (skb)
rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh);
diff --git a/net/core/filter.c b/net/core/filter.c
index eb1c4425c06f..d767880c276d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4277,22 +4277,39 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
*/
void xdp_do_flush(void)
{
- __dev_flush();
- __cpu_map_flush();
- __xsk_map_flush();
+ struct list_head *lh_map, *lh_dev, *lh_xsk;
+
+ bpf_net_ctx_get_all_used_flush_lists(&lh_map, &lh_dev, &lh_xsk);
+ if (lh_dev)
+ __dev_flush(lh_dev);
+ if (lh_map)
+ __cpu_map_flush(lh_map);
+ if (lh_xsk)
+ __xsk_map_flush(lh_xsk);
}
EXPORT_SYMBOL_GPL(xdp_do_flush);
#if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL)
void xdp_do_check_flushed(struct napi_struct *napi)
{
- bool ret;
+ struct list_head *lh_map, *lh_dev, *lh_xsk;
+ bool missed = false;
- ret = dev_check_flush();
- ret |= cpu_map_check_flush();
- ret |= xsk_map_check_flush();
+ bpf_net_ctx_get_all_used_flush_lists(&lh_map, &lh_dev, &lh_xsk);
+ if (lh_dev) {
+ __dev_flush(lh_dev);
+ missed = true;
+ }
+ if (lh_map) {
+ __cpu_map_flush(lh_map);
+ missed = true;
+ }
+ if (lh_xsk) {
+ __xsk_map_flush(lh_xsk);
+ missed = true;
+ }
- WARN_ONCE(ret, "Missing xdp_do_flush() invocation after NAPI by %ps\n",
+ WARN_ONCE(missed, "Missing xdp_do_flush() invocation after NAPI by %ps\n",
napi->poll);
}
#endif
@@ -6810,7 +6827,7 @@ static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = {
.ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -6829,7 +6846,7 @@ static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -6848,7 +6865,7 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -6872,7 +6889,7 @@ static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = {
.ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -6896,7 +6913,7 @@ static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = {
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -6920,7 +6937,7 @@ static const struct bpf_func_proto bpf_tc_sk_lookup_udp_proto = {
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -6958,7 +6975,7 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -6982,7 +6999,7 @@ static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
.ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -7006,7 +7023,7 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -7026,7 +7043,7 @@ static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
.ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -7045,7 +7062,7 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -7064,7 +7081,7 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
@@ -11854,28 +11871,34 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
}
__bpf_kfunc_start_defs();
-__bpf_kfunc int bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags,
- struct bpf_dynptr_kern *ptr__uninit)
+__bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags,
+ struct bpf_dynptr *ptr__uninit)
{
+ struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit;
+ struct sk_buff *skb = (struct sk_buff *)s;
+
if (flags) {
- bpf_dynptr_set_null(ptr__uninit);
+ bpf_dynptr_set_null(ptr);
return -EINVAL;
}
- bpf_dynptr_init(ptr__uninit, skb, BPF_DYNPTR_TYPE_SKB, 0, skb->len);
+ bpf_dynptr_init(ptr, skb, BPF_DYNPTR_TYPE_SKB, 0, skb->len);
return 0;
}
-__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags,
- struct bpf_dynptr_kern *ptr__uninit)
+__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_md *x, u64 flags,
+ struct bpf_dynptr *ptr__uninit)
{
+ struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit;
+ struct xdp_buff *xdp = (struct xdp_buff *)x;
+
if (flags) {
- bpf_dynptr_set_null(ptr__uninit);
+ bpf_dynptr_set_null(ptr);
return -EINVAL;
}
- bpf_dynptr_init(ptr__uninit, xdp, BPF_DYNPTR_TYPE_XDP, 0, xdp_get_buff_len(xdp));
+ bpf_dynptr_init(ptr, xdp, BPF_DYNPTR_TYPE_XDP, 0, xdp_get_buff_len(xdp));
return 0;
}
@@ -11901,10 +11924,11 @@ __bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern,
return 0;
}
-__bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct sk_buff *skb, struct sock *sk,
+__bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct __sk_buff *s, struct sock *sk,
struct bpf_tcp_req_attrs *attrs, int attrs__sz)
{
#if IS_ENABLED(CONFIG_SYN_COOKIES)
+ struct sk_buff *skb = (struct sk_buff *)s;
const struct request_sock_ops *ops;
struct inet_request_sock *ireq;
struct tcp_request_sock *treq;
@@ -11999,16 +12023,17 @@ __bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct sk_buff *skb, struct sock *sk,
__bpf_kfunc_end_defs();
-int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
- struct bpf_dynptr_kern *ptr__uninit)
+int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
+ struct bpf_dynptr *ptr__uninit)
{
+ struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit;
int err;
err = bpf_dynptr_from_skb(skb, flags, ptr__uninit);
if (err)
return err;
- bpf_dynptr_set_rdonly(ptr__uninit);
+ bpf_dynptr_set_rdonly(ptr);
return 0;
}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 3927a0a7fa9a..855271a6cad2 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -327,19 +327,18 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
}
EXPORT_SYMBOL(page_pool_create);
-static void page_pool_return_page(struct page_pool *pool, struct page *page);
+static void page_pool_return_page(struct page_pool *pool, netmem_ref netmem);
-noinline
-static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
+static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool)
{
struct ptr_ring *r = &pool->ring;
- struct page *page;
+ netmem_ref netmem;
int pref_nid; /* preferred NUMA node */
/* Quicker fallback, avoid locks when ring is empty */
if (__ptr_ring_empty(r)) {
alloc_stat_inc(pool, empty);
- return NULL;
+ return 0;
}
/* Softirq guarantee CPU and thus NUMA node is stable. This,
@@ -354,57 +353,57 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
/* Refill alloc array, but only if NUMA match */
do {
- page = __ptr_ring_consume(r);
- if (unlikely(!page))
+ netmem = (__force netmem_ref)__ptr_ring_consume(r);
+ if (unlikely(!netmem))
break;
- if (likely(page_to_nid(page) == pref_nid)) {
- pool->alloc.cache[pool->alloc.count++] = page;
+ if (likely(page_to_nid(netmem_to_page(netmem)) == pref_nid)) {
+ pool->alloc.cache[pool->alloc.count++] = netmem;
} else {
/* NUMA mismatch;
* (1) release 1 page to page-allocator and
* (2) break out to fallthrough to alloc_pages_node.
* This limit stress on page buddy alloactor.
*/
- page_pool_return_page(pool, page);
+ page_pool_return_page(pool, netmem);
alloc_stat_inc(pool, waive);
- page = NULL;
+ netmem = 0;
break;
}
} while (pool->alloc.count < PP_ALLOC_CACHE_REFILL);
/* Return last page */
if (likely(pool->alloc.count > 0)) {
- page = pool->alloc.cache[--pool->alloc.count];
+ netmem = pool->alloc.cache[--pool->alloc.count];
alloc_stat_inc(pool, refill);
}
- return page;
+ return netmem;
}
/* fast path */
-static struct page *__page_pool_get_cached(struct page_pool *pool)
+static netmem_ref __page_pool_get_cached(struct page_pool *pool)
{
- struct page *page;
+ netmem_ref netmem;
/* Caller MUST guarantee safe non-concurrent access, e.g. softirq */
if (likely(pool->alloc.count)) {
/* Fast-path */
- page = pool->alloc.cache[--pool->alloc.count];
+ netmem = pool->alloc.cache[--pool->alloc.count];
alloc_stat_inc(pool, fast);
} else {
- page = page_pool_refill_alloc_cache(pool);
+ netmem = page_pool_refill_alloc_cache(pool);
}
- return page;
+ return netmem;
}
static void __page_pool_dma_sync_for_device(const struct page_pool *pool,
- const struct page *page,
+ netmem_ref netmem,
u32 dma_sync_size)
{
#if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC)
- dma_addr_t dma_addr = page_pool_get_dma_addr(page);
+ dma_addr_t dma_addr = page_pool_get_dma_addr_netmem(netmem);
dma_sync_size = min(dma_sync_size, pool->p.max_len);
__dma_sync_single_for_device(pool->p.dev, dma_addr + pool->p.offset,
@@ -414,14 +413,14 @@ static void __page_pool_dma_sync_for_device(const struct page_pool *pool,
static __always_inline void
page_pool_dma_sync_for_device(const struct page_pool *pool,
- const struct page *page,
+ netmem_ref netmem,
u32 dma_sync_size)
{
if (pool->dma_sync && dma_dev_need_sync(pool->p.dev))
- __page_pool_dma_sync_for_device(pool, page, dma_sync_size);
+ __page_pool_dma_sync_for_device(pool, netmem, dma_sync_size);
}
-static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
+static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem)
{
dma_addr_t dma;
@@ -430,31 +429,32 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
* into page private data (i.e 32bit cpu with 64bit DMA caps)
* This mapping is kept for lifetime of page, until leaving pool.
*/
- dma = dma_map_page_attrs(pool->p.dev, page, 0,
- (PAGE_SIZE << pool->p.order),
- pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC |
- DMA_ATTR_WEAK_ORDERING);
+ dma = dma_map_page_attrs(pool->p.dev, netmem_to_page(netmem), 0,
+ (PAGE_SIZE << pool->p.order), pool->p.dma_dir,
+ DMA_ATTR_SKIP_CPU_SYNC |
+ DMA_ATTR_WEAK_ORDERING);
if (dma_mapping_error(pool->p.dev, dma))
return false;
- if (page_pool_set_dma_addr(page, dma))
+ if (page_pool_set_dma_addr_netmem(netmem, dma))
goto unmap_failed;
- page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
+ page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len);
return true;
unmap_failed:
- WARN_ON_ONCE("unexpected DMA address, please report to netdev@");
+ WARN_ONCE(1, "unexpected DMA address, please report to netdev@");
dma_unmap_page_attrs(pool->p.dev, dma,
PAGE_SIZE << pool->p.order, pool->p.dma_dir,
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
return false;
}
-static void page_pool_set_pp_info(struct page_pool *pool,
- struct page *page)
+static void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
{
+ struct page *page = netmem_to_page(netmem);
+
page->pp = pool;
page->pp_magic |= PP_SIGNATURE;
@@ -464,13 +464,15 @@ static void page_pool_set_pp_info(struct page_pool *pool,
* is dirtying the same cache line as the page->pp_magic above, so
* the overhead is negligible.
*/
- page_pool_fragment_page(page, 1);
+ page_pool_fragment_netmem(netmem, 1);
if (pool->has_init_callback)
- pool->slow.init_callback(page, pool->slow.init_arg);
+ pool->slow.init_callback(netmem, pool->slow.init_arg);
}
-static void page_pool_clear_pp_info(struct page *page)
+static void page_pool_clear_pp_info(netmem_ref netmem)
{
+ struct page *page = netmem_to_page(netmem);
+
page->pp_magic = 0;
page->pp = NULL;
}
@@ -485,34 +487,34 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
if (unlikely(!page))
return NULL;
- if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page))) {
+ if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page)))) {
put_page(page);
return NULL;
}
alloc_stat_inc(pool, slow_high_order);
- page_pool_set_pp_info(pool, page);
+ page_pool_set_pp_info(pool, page_to_netmem(page));
/* Track how many pages are held 'in-flight' */
pool->pages_state_hold_cnt++;
- trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt);
+ trace_page_pool_state_hold(pool, page_to_netmem(page),
+ pool->pages_state_hold_cnt);
return page;
}
/* slow path */
-noinline
-static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
- gfp_t gfp)
+static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool,
+ gfp_t gfp)
{
const int bulk = PP_ALLOC_CACHE_REFILL;
unsigned int pp_order = pool->p.order;
bool dma_map = pool->dma_map;
- struct page *page;
+ netmem_ref netmem;
int i, nr_pages;
/* Don't support bulk alloc for high-order pages */
if (unlikely(pp_order))
- return __page_pool_alloc_page_order(pool, gfp);
+ return page_to_netmem(__page_pool_alloc_page_order(pool, gfp));
/* Unnecessary as alloc cache is empty, but guarantees zero count */
if (unlikely(pool->alloc.count > 0))
@@ -521,56 +523,63 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
/* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */
memset(&pool->alloc.cache, 0, sizeof(void *) * bulk);
- nr_pages = alloc_pages_bulk_array_node(gfp, pool->p.nid, bulk,
- pool->alloc.cache);
+ nr_pages = alloc_pages_bulk_array_node(gfp,
+ pool->p.nid, bulk,
+ (struct page **)pool->alloc.cache);
if (unlikely(!nr_pages))
- return NULL;
+ return 0;
/* Pages have been filled into alloc.cache array, but count is zero and
* page element have not been (possibly) DMA mapped.
*/
for (i = 0; i < nr_pages; i++) {
- page = pool->alloc.cache[i];
- if (dma_map && unlikely(!page_pool_dma_map(pool, page))) {
- put_page(page);
+ netmem = pool->alloc.cache[i];
+ if (dma_map && unlikely(!page_pool_dma_map(pool, netmem))) {
+ put_page(netmem_to_page(netmem));
continue;
}
- page_pool_set_pp_info(pool, page);
- pool->alloc.cache[pool->alloc.count++] = page;
+ page_pool_set_pp_info(pool, netmem);
+ pool->alloc.cache[pool->alloc.count++] = netmem;
/* Track how many pages are held 'in-flight' */
pool->pages_state_hold_cnt++;
- trace_page_pool_state_hold(pool, page,
+ trace_page_pool_state_hold(pool, netmem,
pool->pages_state_hold_cnt);
}
/* Return last page */
if (likely(pool->alloc.count > 0)) {
- page = pool->alloc.cache[--pool->alloc.count];
+ netmem = pool->alloc.cache[--pool->alloc.count];
alloc_stat_inc(pool, slow);
} else {
- page = NULL;
+ netmem = 0;
}
/* When page just alloc'ed is should/must have refcnt 1. */
- return page;
+ return netmem;
}
/* For using page_pool replace: alloc_pages() API calls, but provide
* synchronization guarantee for allocation side.
*/
-struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
+netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp)
{
- struct page *page;
+ netmem_ref netmem;
/* Fast-path: Get a page from cache */
- page = __page_pool_get_cached(pool);
- if (page)
- return page;
+ netmem = __page_pool_get_cached(pool);
+ if (netmem)
+ return netmem;
/* Slow-path: cache empty, do real allocation */
- page = __page_pool_alloc_pages_slow(pool, gfp);
- return page;
+ netmem = __page_pool_alloc_pages_slow(pool, gfp);
+ return netmem;
+}
+EXPORT_SYMBOL(page_pool_alloc_netmem);
+
+struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
+{
+ return netmem_to_page(page_pool_alloc_netmem(pool, gfp));
}
EXPORT_SYMBOL(page_pool_alloc_pages);
ALLOW_ERROR_INJECTION(page_pool_alloc_pages, NULL);
@@ -599,8 +608,8 @@ s32 page_pool_inflight(const struct page_pool *pool, bool strict)
return inflight;
}
-static __always_inline
-void __page_pool_release_page_dma(struct page_pool *pool, struct page *page)
+static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
+ netmem_ref netmem)
{
dma_addr_t dma;
@@ -610,13 +619,13 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page)
*/
return;
- dma = page_pool_get_dma_addr(page);
+ dma = page_pool_get_dma_addr_netmem(netmem);
/* When page is unmapped, it cannot be returned to our pool */
dma_unmap_page_attrs(pool->p.dev, dma,
PAGE_SIZE << pool->p.order, pool->p.dma_dir,
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
- page_pool_set_dma_addr(page, 0);
+ page_pool_set_dma_addr_netmem(netmem, 0);
}
/* Disconnects a page (from a page_pool). API users can have a need
@@ -624,35 +633,34 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page)
* a regular page (that will eventually be returned to the normal
* page-allocator via put_page).
*/
-void page_pool_return_page(struct page_pool *pool, struct page *page)
+void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
{
int count;
- __page_pool_release_page_dma(pool, page);
-
- page_pool_clear_pp_info(page);
+ __page_pool_release_page_dma(pool, netmem);
/* This may be the last page returned, releasing the pool, so
* it is not safe to reference pool afterwards.
*/
count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
- trace_page_pool_state_release(pool, page, count);
+ trace_page_pool_state_release(pool, netmem, count);
- put_page(page);
+ page_pool_clear_pp_info(netmem);
+ put_page(netmem_to_page(netmem));
/* An optimization would be to call __free_pages(page, pool->p.order)
* knowing page is not part of page-cache (thus avoiding a
* __page_cache_release() call).
*/
}
-static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
+static bool page_pool_recycle_in_ring(struct page_pool *pool, netmem_ref netmem)
{
int ret;
/* BH protection not needed if current is softirq */
if (in_softirq())
- ret = ptr_ring_produce(&pool->ring, page);
+ ret = ptr_ring_produce(&pool->ring, (__force void *)netmem);
else
- ret = ptr_ring_produce_bh(&pool->ring, page);
+ ret = ptr_ring_produce_bh(&pool->ring, (__force void *)netmem);
if (!ret) {
recycle_stat_inc(pool, ring);
@@ -667,7 +675,7 @@ static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
*
* Caller must provide appropriate safe context.
*/
-static bool page_pool_recycle_in_cache(struct page *page,
+static bool page_pool_recycle_in_cache(netmem_ref netmem,
struct page_pool *pool)
{
if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) {
@@ -676,14 +684,15 @@ static bool page_pool_recycle_in_cache(struct page *page,
}
/* Caller MUST have verified/know (page_ref_count(page) == 1) */
- pool->alloc.cache[pool->alloc.count++] = page;
+ pool->alloc.cache[pool->alloc.count++] = netmem;
recycle_stat_inc(pool, cached);
return true;
}
-static bool __page_pool_page_can_be_recycled(const struct page *page)
+static bool __page_pool_page_can_be_recycled(netmem_ref netmem)
{
- return page_ref_count(page) == 1 && !page_is_pfmemalloc(page);
+ return page_ref_count(netmem_to_page(netmem)) == 1 &&
+ !page_is_pfmemalloc(netmem_to_page(netmem));
}
/* If the page refcnt == 1, this will try to recycle the page.
@@ -692,8 +701,8 @@ static bool __page_pool_page_can_be_recycled(const struct page *page)
* If the page refcnt != 1, then the page will be returned to memory
* subsystem.
*/
-static __always_inline struct page *
-__page_pool_put_page(struct page_pool *pool, struct page *page,
+static __always_inline netmem_ref
+__page_pool_put_page(struct page_pool *pool, netmem_ref netmem,
unsigned int dma_sync_size, bool allow_direct)
{
lockdep_assert_no_hardirq();
@@ -707,16 +716,16 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
* page is NOT reusable when allocated when system is under
* some pressure. (page_is_pfmemalloc)
*/
- if (likely(__page_pool_page_can_be_recycled(page))) {
+ if (likely(__page_pool_page_can_be_recycled(netmem))) {
/* Read barrier done in page_ref_count / READ_ONCE */
- page_pool_dma_sync_for_device(pool, page, dma_sync_size);
+ page_pool_dma_sync_for_device(pool, netmem, dma_sync_size);
- if (allow_direct && page_pool_recycle_in_cache(page, pool))
- return NULL;
+ if (allow_direct && page_pool_recycle_in_cache(netmem, pool))
+ return 0;
/* Page found as candidate for recycling */
- return page;
+ return netmem;
}
/* Fallback/non-XDP mode: API user have elevated refcnt.
*
@@ -732,9 +741,9 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
* will be invoking put_page.
*/
recycle_stat_inc(pool, released_refcnt);
- page_pool_return_page(pool, page);
+ page_pool_return_page(pool, netmem);
- return NULL;
+ return 0;
}
static bool page_pool_napi_local(const struct page_pool *pool)
@@ -760,19 +769,28 @@ static bool page_pool_napi_local(const struct page_pool *pool)
return napi && READ_ONCE(napi->list_owner) == cpuid;
}
-void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
- unsigned int dma_sync_size, bool allow_direct)
+void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem,
+ unsigned int dma_sync_size, bool allow_direct)
{
if (!allow_direct)
allow_direct = page_pool_napi_local(pool);
- page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct);
- if (page && !page_pool_recycle_in_ring(pool, page)) {
+ netmem =
+ __page_pool_put_page(pool, netmem, dma_sync_size, allow_direct);
+ if (netmem && !page_pool_recycle_in_ring(pool, netmem)) {
/* Cache full, fallback to free pages */
recycle_stat_inc(pool, ring_full);
- page_pool_return_page(pool, page);
+ page_pool_return_page(pool, netmem);
}
}
+EXPORT_SYMBOL(page_pool_put_unrefed_netmem);
+
+void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
+ unsigned int dma_sync_size, bool allow_direct)
+{
+ page_pool_put_unrefed_netmem(pool, page_to_netmem(page), dma_sync_size,
+ allow_direct);
+}
EXPORT_SYMBOL(page_pool_put_unrefed_page);
/**
@@ -800,16 +818,16 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
allow_direct = page_pool_napi_local(pool);
for (i = 0; i < count; i++) {
- struct page *page = virt_to_head_page(data[i]);
+ netmem_ref netmem = page_to_netmem(virt_to_head_page(data[i]));
/* It is not the last user for the page frag case */
- if (!page_pool_is_last_ref(page))
+ if (!page_pool_is_last_ref(netmem))
continue;
- page = __page_pool_put_page(pool, page, -1, allow_direct);
+ netmem = __page_pool_put_page(pool, netmem, -1, allow_direct);
/* Approved for bulk recycling in ptr_ring cache */
- if (page)
- data[bulk_len++] = page;
+ if (netmem)
+ data[bulk_len++] = (__force void *)netmem;
}
if (!bulk_len)
@@ -835,98 +853,106 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
* since put_page() with refcnt == 1 can be an expensive operation
*/
for (; i < bulk_len; i++)
- page_pool_return_page(pool, data[i]);
+ page_pool_return_page(pool, (__force netmem_ref)data[i]);
}
EXPORT_SYMBOL(page_pool_put_page_bulk);
-static struct page *page_pool_drain_frag(struct page_pool *pool,
- struct page *page)
+static netmem_ref page_pool_drain_frag(struct page_pool *pool,
+ netmem_ref netmem)
{
long drain_count = BIAS_MAX - pool->frag_users;
/* Some user is still using the page frag */
- if (likely(page_pool_unref_page(page, drain_count)))
- return NULL;
+ if (likely(page_pool_unref_netmem(netmem, drain_count)))
+ return 0;
- if (__page_pool_page_can_be_recycled(page)) {
- page_pool_dma_sync_for_device(pool, page, -1);
- return page;
+ if (__page_pool_page_can_be_recycled(netmem)) {
+ page_pool_dma_sync_for_device(pool, netmem, -1);
+ return netmem;
}
- page_pool_return_page(pool, page);
- return NULL;
+ page_pool_return_page(pool, netmem);
+ return 0;
}
static void page_pool_free_frag(struct page_pool *pool)
{
long drain_count = BIAS_MAX - pool->frag_users;
- struct page *page = pool->frag_page;
+ netmem_ref netmem = pool->frag_page;
- pool->frag_page = NULL;
+ pool->frag_page = 0;
- if (!page || page_pool_unref_page(page, drain_count))
+ if (!netmem || page_pool_unref_netmem(netmem, drain_count))
return;
- page_pool_return_page(pool, page);
+ page_pool_return_page(pool, netmem);
}
-struct page *page_pool_alloc_frag(struct page_pool *pool,
- unsigned int *offset,
- unsigned int size, gfp_t gfp)
+netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool,
+ unsigned int *offset, unsigned int size,
+ gfp_t gfp)
{
unsigned int max_size = PAGE_SIZE << pool->p.order;
- struct page *page = pool->frag_page;
+ netmem_ref netmem = pool->frag_page;
if (WARN_ON(size > max_size))
- return NULL;
+ return 0;
size = ALIGN(size, dma_get_cache_alignment());
*offset = pool->frag_offset;
- if (page && *offset + size > max_size) {
- page = page_pool_drain_frag(pool, page);
- if (page) {
+ if (netmem && *offset + size > max_size) {
+ netmem = page_pool_drain_frag(pool, netmem);
+ if (netmem) {
alloc_stat_inc(pool, fast);
goto frag_reset;
}
}
- if (!page) {
- page = page_pool_alloc_pages(pool, gfp);
- if (unlikely(!page)) {
- pool->frag_page = NULL;
- return NULL;
+ if (!netmem) {
+ netmem = page_pool_alloc_netmem(pool, gfp);
+ if (unlikely(!netmem)) {
+ pool->frag_page = 0;
+ return 0;
}
- pool->frag_page = page;
+ pool->frag_page = netmem;
frag_reset:
pool->frag_users = 1;
*offset = 0;
pool->frag_offset = size;
- page_pool_fragment_page(page, BIAS_MAX);
- return page;
+ page_pool_fragment_netmem(netmem, BIAS_MAX);
+ return netmem;
}
pool->frag_users++;
pool->frag_offset = *offset + size;
alloc_stat_inc(pool, fast);
- return page;
+ return netmem;
+}
+EXPORT_SYMBOL(page_pool_alloc_frag_netmem);
+
+struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
+ unsigned int size, gfp_t gfp)
+{
+ return netmem_to_page(page_pool_alloc_frag_netmem(pool, offset, size,
+ gfp));
}
EXPORT_SYMBOL(page_pool_alloc_frag);
static void page_pool_empty_ring(struct page_pool *pool)
{
- struct page *page;
+ netmem_ref netmem;
/* Empty recycle ring */
- while ((page = ptr_ring_consume_bh(&pool->ring))) {
+ while ((netmem = (__force netmem_ref)ptr_ring_consume_bh(&pool->ring))) {
/* Verify the refcnt invariant of cached pages */
- if (!(page_ref_count(page) == 1))
+ if (!(page_ref_count(netmem_to_page(netmem)) == 1))
pr_crit("%s() page_pool refcnt %d violation\n",
- __func__, page_ref_count(page));
+ __func__, netmem_ref_count(netmem));
- page_pool_return_page(pool, page);
+ page_pool_return_page(pool, netmem);
}
}
@@ -942,7 +968,7 @@ static void __page_pool_destroy(struct page_pool *pool)
static void page_pool_empty_alloc_cache_once(struct page_pool *pool)
{
- struct page *page;
+ netmem_ref netmem;
if (pool->destroy_cnt)
return;
@@ -952,8 +978,8 @@ static void page_pool_empty_alloc_cache_once(struct page_pool *pool)
* call concurrently.
*/
while (pool->alloc.count) {
- page = pool->alloc.cache[--pool->alloc.count];
- page_pool_return_page(pool, page);
+ netmem = pool->alloc.cache[--pool->alloc.count];
+ page_pool_return_page(pool, netmem);
}
}
@@ -1014,7 +1040,7 @@ void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
pool->xdp_mem_id = mem->id;
}
-static void page_pool_disable_direct_recycling(struct page_pool *pool)
+void page_pool_disable_direct_recycling(struct page_pool *pool)
{
/* Disable direct recycling based on pool->cpuid.
* Paired with READ_ONCE() in page_pool_napi_local().
@@ -1032,6 +1058,7 @@ static void page_pool_disable_direct_recycling(struct page_pool *pool)
WRITE_ONCE(pool->p.napi, NULL);
}
+EXPORT_SYMBOL(page_pool_disable_direct_recycling);
void page_pool_destroy(struct page_pool *pool)
{
@@ -1059,15 +1086,15 @@ EXPORT_SYMBOL(page_pool_destroy);
/* Caller must provide appropriate safe context, e.g. NAPI. */
void page_pool_update_nid(struct page_pool *pool, int new_nid)
{
- struct page *page;
+ netmem_ref netmem;
trace_page_pool_update_nid(pool, new_nid);
pool->p.nid = new_nid;
/* Flush pool alloc cache, as refill will check NUMA node */
while (pool->alloc.count) {
- page = pool->alloc.cache[--pool->alloc.count];
- page_pool_return_page(pool, page);
+ netmem = pool->alloc.cache[--pool->alloc.count];
+ page_pool_return_page(pool, netmem);
}
}
EXPORT_SYMBOL(page_pool_update_nid);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index eabfc8290f5e..87e67194f240 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3969,22 +3969,28 @@ static int rtnl_dellinkprop(struct sk_buff *skb, struct nlmsghdr *nlh,
return rtnl_linkprop(RTM_DELLINKPROP, skb, nlh, extack);
}
-static u32 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
+static noinline_for_stack u32 rtnl_calcit(struct sk_buff *skb,
+ struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
size_t min_ifinfo_dump_size = 0;
- struct nlattr *tb[IFLA_MAX+1];
u32 ext_filter_mask = 0;
struct net_device *dev;
- int hdrlen;
+ struct nlattr *nla;
+ int hdrlen, rem;
/* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */
hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ?
sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
- if (nlmsg_parse_deprecated(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, NULL) >= 0) {
- if (tb[IFLA_EXT_MASK])
- ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+ if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
+ return NLMSG_GOODSIZE;
+
+ nla_for_each_attr_type(nla, IFLA_EXT_MASK,
+ nlmsg_attrdata(nlh, hdrlen),
+ nlmsg_attrlen(nlh, hdrlen), rem) {
+ if (nla_len(nla) == sizeof(u32))
+ ext_filter_mask = nla_get_u32(nla);
}
if (!ext_filter_mask)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index eb9a7e65b5c8..83f8cd8aa2d1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1015,8 +1015,10 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
EXPORT_SYMBOL(skb_cow_data_for_xdp);
#if IS_ENABLED(CONFIG_PAGE_POOL)
-bool napi_pp_put_page(struct page *page)
+bool napi_pp_put_page(netmem_ref netmem)
{
+ struct page *page = netmem_to_page(netmem);
+
page = compound_head(page);
/* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
@@ -1029,7 +1031,7 @@ bool napi_pp_put_page(struct page *page)
if (unlikely(!is_pp_page(page)))
return false;
- page_pool_put_full_page(page->pp, page, false);
+ page_pool_put_full_netmem(page->pp, page_to_netmem(page), false);
return true;
}
@@ -1040,7 +1042,7 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data)
{
if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)
return false;
- return napi_pp_put_page(virt_to_page(data));
+ return napi_pp_put_page(page_to_netmem(virt_to_page(data)));
}
/**
@@ -1871,7 +1873,6 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
struct msghdr *msg, int len,
struct ubuf_info *uarg)
{
- struct ubuf_info *orig_uarg = skb_zcopy(skb);
int err, orig_len = skb->len;
if (uarg->ops->link_skb) {
@@ -1879,6 +1880,8 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
if (err)
return err;
} else {
+ struct ubuf_info *orig_uarg = skb_zcopy(skb);
+
/* An skb can only point to one uarg. This edge case happens
* when TCP appends to an skb, but zerocopy_realloc triggered
* a new alloc.
@@ -1899,8 +1902,7 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
return err;
}
- if (!uarg->ops->link_skb)
- skb_zcopy_set(skb, uarg, NULL);
+ skb_zcopy_set(skb, uarg, NULL);
return skb->len - orig_len;
}
EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index fd20aae30be2..bbf40b999713 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -434,7 +434,8 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
page = sg_page(sge);
if (copied + copy > len)
copy = len - copied;
- copy = copy_page_to_iter(page, sge->offset, copy, iter);
+ if (copy)
+ copy = copy_page_to_iter(page, sge->offset, copy, iter);
if (!copy) {
copied = copied ? copied : -EFAULT;
goto out;
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 504f954a1b28..9a190635fe95 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -8,4 +8,4 @@ ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \
linkstate.o debug.o wol.o features.o privflags.o rings.o \
channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
tunnels.o fec.o eeprom.o stats.o phc_vclocks.o mm.o \
- module.o pse-pd.o plca.o mm.o
+ module.o cmis_fw_update.o cmis_cdb.o pse-pd.o plca.o mm.o
diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c
index 7b4bbd674bae..cee188da54f8 100644
--- a/net/ethtool/channels.c
+++ b/net/ethtool/channels.c
@@ -171,11 +171,9 @@ ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info)
*/
if (ethtool_get_max_rxnfc_channel(dev, &max_rxnfc_in_use))
max_rxnfc_in_use = 0;
- if (!netif_is_rxfh_configured(dev) ||
- ethtool_get_max_rxfh_channel(dev, &max_rxfh_in_use))
- max_rxfh_in_use = 0;
+ max_rxfh_in_use = ethtool_get_max_rxfh_channel(dev);
if (channels.combined_count + channels.rx_count <= max_rxfh_in_use) {
- GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing indirection table settings");
+ GENL_SET_ERR_MSG_FMT(info, "requested channel counts are too low for existing indirection table (%d)", max_rxfh_in_use);
return -EINVAL;
}
if (channels.combined_count + channels.rx_count <= max_rxnfc_in_use) {
diff --git a/net/ethtool/cmis.h b/net/ethtool/cmis.h
new file mode 100644
index 000000000000..e71cc3e1b7eb
--- /dev/null
+++ b/net/ethtool/cmis.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#define ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH 120
+#define ETHTOOL_CMIS_CDB_CMD_PAGE 0x9F
+#define ETHTOOL_CMIS_CDB_PAGE_I2C_ADDR 0x50
+
+/**
+ * struct ethtool_cmis_cdb - CDB commands parameters
+ * @cmis_rev: CMIS revision major.
+ * @read_write_len_ext: Allowable additional number of byte octets to the LPL
+ * in a READ or a WRITE CDB commands.
+ * @max_completion_time: Maximum CDB command completion time in msec.
+ */
+struct ethtool_cmis_cdb {
+ u8 cmis_rev;
+ u8 read_write_len_ext;
+ u16 max_completion_time;
+};
+
+enum ethtool_cmis_cdb_cmd_id {
+ ETHTOOL_CMIS_CDB_CMD_QUERY_STATUS = 0x0000,
+ ETHTOOL_CMIS_CDB_CMD_MODULE_FEATURES = 0x0040,
+ ETHTOOL_CMIS_CDB_CMD_FW_MANAGMENT_FEATURES = 0x0041,
+ ETHTOOL_CMIS_CDB_CMD_START_FW_DOWNLOAD = 0x0101,
+ ETHTOOL_CMIS_CDB_CMD_WRITE_FW_BLOCK_LPL = 0x0103,
+ ETHTOOL_CMIS_CDB_CMD_COMPLETE_FW_DOWNLOAD = 0x0107,
+ ETHTOOL_CMIS_CDB_CMD_RUN_FW_IMAGE = 0x0109,
+ ETHTOOL_CMIS_CDB_CMD_COMMIT_FW_IMAGE = 0x010A,
+};
+
+/**
+ * struct ethtool_cmis_cdb_request - CDB commands request fields as decribed in
+ * the CMIS standard
+ * @id: Command ID.
+ * @epl_len: EPL memory length.
+ * @lpl_len: LPL memory length.
+ * @chk_code: Check code for the previous field and the payload.
+ * @resv1: Added to match the CMIS standard request continuity.
+ * @resv2: Added to match the CMIS standard request continuity.
+ * @payload: Payload for the CDB commands.
+ */
+struct ethtool_cmis_cdb_request {
+ __be16 id;
+ struct_group(body,
+ __be16 epl_len;
+ u8 lpl_len;
+ u8 chk_code;
+ u8 resv1;
+ u8 resv2;
+ u8 payload[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH];
+ );
+};
+
+#define CDB_F_COMPLETION_VALID BIT(0)
+#define CDB_F_STATUS_VALID BIT(1)
+#define CDB_F_MODULE_STATE_VALID BIT(2)
+
+/**
+ * struct ethtool_cmis_cdb_cmd_args - CDB commands execution arguments
+ * @req: CDB command fields as described in the CMIS standard.
+ * @max_duration: Maximum duration time for command completion in msec.
+ * @read_write_len_ext: Allowable additional number of byte octets to the LPL
+ * in a READ or a WRITE commands.
+ * @msleep_pre_rpl: Waiting time before checking reply in msec.
+ * @rpl_exp_len: Expected reply length in bytes.
+ * @flags: Validation flags for CDB commands.
+ * @err_msg: Error message to be sent to user space.
+ */
+struct ethtool_cmis_cdb_cmd_args {
+ struct ethtool_cmis_cdb_request req;
+ u16 max_duration;
+ u8 read_write_len_ext;
+ u8 msleep_pre_rpl;
+ u8 rpl_exp_len;
+ u8 flags;
+ char *err_msg;
+};
+
+/**
+ * struct ethtool_cmis_cdb_rpl_hdr - CDB commands reply header arguments
+ * @rpl_len: Reply length.
+ * @rpl_chk_code: Reply check code.
+ */
+struct ethtool_cmis_cdb_rpl_hdr {
+ u8 rpl_len;
+ u8 rpl_chk_code;
+};
+
+/**
+ * struct ethtool_cmis_cdb_rpl - CDB commands reply arguments
+ * @hdr: CDB commands reply header arguments.
+ * @payload: Payload for the CDB commands reply.
+ */
+struct ethtool_cmis_cdb_rpl {
+ struct ethtool_cmis_cdb_rpl_hdr hdr;
+ u8 payload[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH];
+};
+
+u32 ethtool_cmis_get_max_payload_size(u8 num_of_byte_octs);
+
+void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args,
+ enum ethtool_cmis_cdb_cmd_id cmd, u8 *pl,
+ u8 lpl_len, u16 max_duration,
+ u8 read_write_len_ext, u16 msleep_pre_rpl,
+ u8 rpl_exp_len, u8 flags);
+
+void ethtool_cmis_cdb_check_completion_flag(u8 cmis_rev, u8 *flags);
+
+void ethtool_cmis_page_init(struct ethtool_module_eeprom *page_data,
+ u8 page, u32 offset, u32 length);
+void ethtool_cmis_page_fini(struct ethtool_module_eeprom *page_data);
+
+struct ethtool_cmis_cdb *
+ethtool_cmis_cdb_init(struct net_device *dev,
+ const struct ethtool_module_fw_flash_params *params,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params);
+void ethtool_cmis_cdb_fini(struct ethtool_cmis_cdb *cdb);
+
+int ethtool_cmis_wait_for_cond(struct net_device *dev, u8 flags, u8 flag,
+ u16 max_duration, u32 offset,
+ bool (*cond_success)(u8), bool (*cond_fail)(u8), u8 *state);
+
+int ethtool_cmis_cdb_execute_cmd(struct net_device *dev,
+ struct ethtool_cmis_cdb_cmd_args *args);
diff --git a/net/ethtool/cmis_cdb.c b/net/ethtool/cmis_cdb.c
new file mode 100644
index 000000000000..1bb08783b60d
--- /dev/null
+++ b/net/ethtool/cmis_cdb.c
@@ -0,0 +1,602 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/ethtool.h>
+#include <linux/jiffies.h>
+
+#include "common.h"
+#include "module_fw.h"
+#include "cmis.h"
+
+/* For accessing the LPL field on page 9Fh, the allowable length extension is
+ * min(i, 15) byte octets where i specifies the allowable additional number of
+ * byte octets in a READ or a WRITE.
+ */
+u32 ethtool_cmis_get_max_payload_size(u8 num_of_byte_octs)
+{
+ return 8 * (1 + min_t(u8, num_of_byte_octs, 15));
+}
+
+void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args,
+ enum ethtool_cmis_cdb_cmd_id cmd, u8 *pl,
+ u8 lpl_len, u16 max_duration,
+ u8 read_write_len_ext, u16 msleep_pre_rpl,
+ u8 rpl_exp_len, u8 flags)
+{
+ args->req.id = cpu_to_be16(cmd);
+ args->req.lpl_len = lpl_len;
+ if (pl)
+ memcpy(args->req.payload, pl, args->req.lpl_len);
+
+ args->max_duration = max_duration;
+ args->read_write_len_ext =
+ ethtool_cmis_get_max_payload_size(read_write_len_ext);
+ args->msleep_pre_rpl = msleep_pre_rpl;
+ args->rpl_exp_len = rpl_exp_len;
+ args->flags = flags;
+ args->err_msg = NULL;
+}
+
+void ethtool_cmis_page_init(struct ethtool_module_eeprom *page_data,
+ u8 page, u32 offset, u32 length)
+{
+ page_data->page = page;
+ page_data->offset = offset;
+ page_data->length = length;
+ page_data->i2c_address = ETHTOOL_CMIS_CDB_PAGE_I2C_ADDR;
+}
+
+#define CMIS_REVISION_PAGE 0x00
+#define CMIS_REVISION_OFFSET 0x01
+
+struct cmis_rev_rpl {
+ u8 rev;
+};
+
+static u8 cmis_rev_rpl_major(struct cmis_rev_rpl *rpl)
+{
+ return rpl->rev >> 4;
+}
+
+static int cmis_rev_major_get(struct net_device *dev, u8 *rev_major)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_module_eeprom page_data = {0};
+ struct netlink_ext_ack extack = {};
+ struct cmis_rev_rpl rpl = {};
+ int err;
+
+ ethtool_cmis_page_init(&page_data, CMIS_REVISION_PAGE,
+ CMIS_REVISION_OFFSET, sizeof(rpl));
+ page_data.data = (u8 *)&rpl;
+
+ err = ops->get_module_eeprom_by_page(dev, &page_data, &extack);
+ if (err < 0) {
+ if (extack._msg)
+ netdev_err(dev, "%s\n", extack._msg);
+ return err;
+ }
+
+ *rev_major = cmis_rev_rpl_major(&rpl);
+
+ return 0;
+}
+
+#define CMIS_CDB_ADVERTISEMENT_PAGE 0x01
+#define CMIS_CDB_ADVERTISEMENT_OFFSET 0xA3
+
+/* Based on section 8.4.11 "CDB Messaging Support Advertisement" in CMIS
+ * standard revision 5.2.
+ */
+struct cmis_cdb_advert_rpl {
+ u8 inst_supported;
+ u8 read_write_len_ext;
+ u8 resv1;
+ u8 resv2;
+};
+
+static u8 cmis_cdb_advert_rpl_inst_supported(struct cmis_cdb_advert_rpl *rpl)
+{
+ return rpl->inst_supported >> 6;
+}
+
+static int cmis_cdb_advertisement_get(struct ethtool_cmis_cdb *cdb,
+ struct net_device *dev)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_module_eeprom page_data = {};
+ struct cmis_cdb_advert_rpl rpl = {};
+ struct netlink_ext_ack extack = {};
+ int err;
+
+ ethtool_cmis_page_init(&page_data, CMIS_CDB_ADVERTISEMENT_PAGE,
+ CMIS_CDB_ADVERTISEMENT_OFFSET, sizeof(rpl));
+ page_data.data = (u8 *)&rpl;
+
+ err = ops->get_module_eeprom_by_page(dev, &page_data, &extack);
+ if (err < 0) {
+ if (extack._msg)
+ netdev_err(dev, "%s\n", extack._msg);
+ return err;
+ }
+
+ if (!cmis_cdb_advert_rpl_inst_supported(&rpl))
+ return -EOPNOTSUPP;
+
+ cdb->read_write_len_ext = rpl.read_write_len_ext;
+
+ return 0;
+}
+
+#define CMIS_PASSWORD_ENTRY_PAGE 0x00
+#define CMIS_PASSWORD_ENTRY_OFFSET 0x7A
+
+struct cmis_password_entry_pl {
+ __be32 password;
+};
+
+/* See section 9.3.1 "CMD 0000h: Query Status" in CMIS standard revision 5.2.
+ * struct cmis_cdb_query_status_pl and struct cmis_cdb_query_status_rpl are
+ * structured layouts of the flat arrays,
+ * struct ethtool_cmis_cdb_request::payload and
+ * struct ethtool_cmis_cdb_rpl::payload respectively.
+ */
+struct cmis_cdb_query_status_pl {
+ u16 response_delay;
+};
+
+struct cmis_cdb_query_status_rpl {
+ u8 length;
+ u8 status;
+};
+
+static int
+cmis_cdb_validate_password(struct ethtool_cmis_cdb *cdb,
+ struct net_device *dev,
+ const struct ethtool_module_fw_flash_params *params,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct cmis_cdb_query_status_pl qs_pl = {0};
+ struct ethtool_module_eeprom page_data = {};
+ struct ethtool_cmis_cdb_cmd_args args = {};
+ struct cmis_password_entry_pl pe_pl = {};
+ struct cmis_cdb_query_status_rpl *rpl;
+ struct netlink_ext_ack extack = {};
+ int err;
+
+ ethtool_cmis_page_init(&page_data, CMIS_PASSWORD_ENTRY_PAGE,
+ CMIS_PASSWORD_ENTRY_OFFSET, sizeof(pe_pl));
+ page_data.data = (u8 *)&pe_pl;
+
+ pe_pl = *((struct cmis_password_entry_pl *)page_data.data);
+ pe_pl.password = params->password;
+ err = ops->set_module_eeprom_by_page(dev, &page_data, &extack);
+ if (err < 0) {
+ if (extack._msg)
+ netdev_err(dev, "%s\n", extack._msg);
+ return err;
+ }
+
+ ethtool_cmis_cdb_compose_args(&args, ETHTOOL_CMIS_CDB_CMD_QUERY_STATUS,
+ (u8 *)&qs_pl, sizeof(qs_pl), 0,
+ cdb->read_write_len_ext, 1000,
+ sizeof(*rpl),
+ CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
+
+ err = ethtool_cmis_cdb_execute_cmd(dev, &args);
+ if (err < 0) {
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "Query Status command failed",
+ args.err_msg);
+ return err;
+ }
+
+ rpl = (struct cmis_cdb_query_status_rpl *)args.req.payload;
+ if (!rpl->length || !rpl->status) {
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "Password was not accepted",
+ NULL);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Some CDB commands asserts the CDB completion flag only from CMIS
+ * revision 5. Therefore, check the relevant validity flag only when
+ * the revision supports it.
+ */
+void ethtool_cmis_cdb_check_completion_flag(u8 cmis_rev, u8 *flags)
+{
+ *flags |= cmis_rev >= 5 ? CDB_F_COMPLETION_VALID : 0;
+}
+
+#define CMIS_CDB_MODULE_FEATURES_RESV_DATA 34
+
+/* See section 9.4.1 "CMD 0040h: Module Features" in CMIS standard revision 5.2.
+ * struct cmis_cdb_module_features_rpl is structured layout of the flat
+ * array, ethtool_cmis_cdb_rpl::payload.
+ */
+struct cmis_cdb_module_features_rpl {
+ u8 resv1[CMIS_CDB_MODULE_FEATURES_RESV_DATA];
+ __be16 max_completion_time;
+};
+
+static u16
+cmis_cdb_module_features_completion_time(struct cmis_cdb_module_features_rpl *rpl)
+{
+ return be16_to_cpu(rpl->max_completion_time);
+}
+
+static int cmis_cdb_module_features_get(struct ethtool_cmis_cdb *cdb,
+ struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params)
+{
+ struct ethtool_cmis_cdb_cmd_args args = {};
+ struct cmis_cdb_module_features_rpl *rpl;
+ u8 flags = CDB_F_STATUS_VALID;
+ int err;
+
+ ethtool_cmis_cdb_check_completion_flag(cdb->cmis_rev, &flags);
+ ethtool_cmis_cdb_compose_args(&args,
+ ETHTOOL_CMIS_CDB_CMD_MODULE_FEATURES,
+ NULL, 0, 0, cdb->read_write_len_ext,
+ 1000, sizeof(*rpl), flags);
+
+ err = ethtool_cmis_cdb_execute_cmd(dev, &args);
+ if (err < 0) {
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "Module Features command failed",
+ args.err_msg);
+ return err;
+ }
+
+ rpl = (struct cmis_cdb_module_features_rpl *)args.req.payload;
+ cdb->max_completion_time =
+ cmis_cdb_module_features_completion_time(rpl);
+
+ return 0;
+}
+
+struct ethtool_cmis_cdb *
+ethtool_cmis_cdb_init(struct net_device *dev,
+ const struct ethtool_module_fw_flash_params *params,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params)
+{
+ struct ethtool_cmis_cdb *cdb;
+ int err;
+
+ cdb = kzalloc(sizeof(*cdb), GFP_KERNEL);
+ if (!cdb)
+ return ERR_PTR(-ENOMEM);
+
+ err = cmis_rev_major_get(dev, &cdb->cmis_rev);
+ if (err < 0)
+ goto err;
+
+ if (cdb->cmis_rev < 4) {
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "CMIS revision doesn't support module firmware flashing",
+ NULL);
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ err = cmis_cdb_advertisement_get(cdb, dev);
+ if (err < 0)
+ goto err;
+
+ if (params->password_valid) {
+ err = cmis_cdb_validate_password(cdb, dev, params, ntf_params);
+ if (err < 0)
+ goto err;
+ }
+
+ err = cmis_cdb_module_features_get(cdb, dev, ntf_params);
+ if (err < 0)
+ goto err;
+
+ return cdb;
+
+err:
+ ethtool_cmis_cdb_fini(cdb);
+ return ERR_PTR(err);
+}
+
+void ethtool_cmis_cdb_fini(struct ethtool_cmis_cdb *cdb)
+{
+ kfree(cdb);
+}
+
+static bool is_completed(u8 data)
+{
+ return !!(data & 0x40);
+}
+
+#define CMIS_CDB_STATUS_SUCCESS 0x01
+
+static bool status_success(u8 data)
+{
+ return data == CMIS_CDB_STATUS_SUCCESS;
+}
+
+#define CMIS_CDB_STATUS_FAIL 0x40
+
+static bool status_fail(u8 data)
+{
+ return data & CMIS_CDB_STATUS_FAIL;
+}
+
+struct cmis_wait_for_cond_rpl {
+ u8 state;
+};
+
+static int
+ethtool_cmis_module_poll(struct net_device *dev,
+ struct cmis_wait_for_cond_rpl *rpl, u32 offset,
+ bool (*cond_success)(u8), bool (*cond_fail)(u8))
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_module_eeprom page_data = {0};
+ struct netlink_ext_ack extack = {};
+ int err;
+
+ ethtool_cmis_page_init(&page_data, 0, offset, sizeof(rpl));
+ page_data.data = (u8 *)rpl;
+
+ err = ops->get_module_eeprom_by_page(dev, &page_data, &extack);
+ if (err < 0) {
+ if (extack._msg)
+ netdev_err_once(dev, "%s\n", extack._msg);
+ return -EBUSY;
+ }
+
+ if ((*cond_success)(rpl->state))
+ return 0;
+
+ if (*cond_fail && (*cond_fail)(rpl->state))
+ return -EIO;
+
+ return -EBUSY;
+}
+
+int ethtool_cmis_wait_for_cond(struct net_device *dev, u8 flags, u8 flag,
+ u16 max_duration, u32 offset,
+ bool (*cond_success)(u8), bool (*cond_fail)(u8),
+ u8 *state)
+{
+ struct cmis_wait_for_cond_rpl rpl = {};
+ unsigned long end;
+ int err;
+
+ if (!(flags & flag))
+ return 0;
+
+ if (max_duration == 0)
+ max_duration = U16_MAX;
+
+ end = jiffies + msecs_to_jiffies(max_duration);
+ do {
+ err = ethtool_cmis_module_poll(dev, &rpl, offset, cond_success,
+ cond_fail);
+ if (err != -EBUSY)
+ goto out;
+
+ msleep(20);
+ } while (time_before(jiffies, end));
+
+ err = ethtool_cmis_module_poll(dev, &rpl, offset, cond_success,
+ cond_fail);
+ if (err == -EBUSY)
+ err = -ETIMEDOUT;
+
+out:
+ *state = rpl.state;
+ return err;
+}
+
+#define CMIS_CDB_COMPLETION_FLAG_OFFSET 0x08
+
+static int cmis_cdb_wait_for_completion(struct net_device *dev,
+ struct ethtool_cmis_cdb_cmd_args *args)
+{
+ u8 flag;
+ int err;
+
+ /* Some vendors demand waiting time before checking completion flag
+ * in some CDB commands.
+ */
+ msleep(args->msleep_pre_rpl);
+
+ err = ethtool_cmis_wait_for_cond(dev, args->flags,
+ CDB_F_COMPLETION_VALID,
+ args->max_duration,
+ CMIS_CDB_COMPLETION_FLAG_OFFSET,
+ is_completed, NULL, &flag);
+ if (err < 0)
+ args->err_msg = "Completion Flag did not set on time";
+
+ return err;
+}
+
+#define CMIS_CDB_STATUS_OFFSET 0x25
+
+static void cmis_cdb_status_fail_msg_get(u8 status, char **err_msg)
+{
+ switch (status) {
+ case 0b10000001:
+ *err_msg = "CDB Status is in progress: Busy capturing command";
+ break;
+ case 0b10000010:
+ *err_msg =
+ "CDB Status is in progress: Busy checking/validating command";
+ break;
+ case 0b10000011:
+ *err_msg = "CDB Status is in progress: Busy executing";
+ break;
+ case 0b01000000:
+ *err_msg = "CDB status failed: no specific failure";
+ break;
+ case 0b01000010:
+ *err_msg =
+ "CDB status failed: Parameter range error or parameter not supported";
+ break;
+ case 0b01000101:
+ *err_msg = "CDB status failed: CdbChkCode error";
+ break;
+ default:
+ *err_msg = "Unknown failure reason";
+ }
+};
+
+static int cmis_cdb_wait_for_status(struct net_device *dev,
+ struct ethtool_cmis_cdb_cmd_args *args)
+{
+ u8 status;
+ int err;
+
+ /* Some vendors demand waiting time before checking status in some
+ * CDB commands.
+ */
+ msleep(args->msleep_pre_rpl);
+
+ err = ethtool_cmis_wait_for_cond(dev, args->flags, CDB_F_STATUS_VALID,
+ args->max_duration,
+ CMIS_CDB_STATUS_OFFSET,
+ status_success, status_fail, &status);
+ if (err < 0 && !args->err_msg)
+ cmis_cdb_status_fail_msg_get(status, &args->err_msg);
+
+ return err;
+}
+
+#define CMIS_CDB_REPLY_OFFSET 0x86
+
+static int cmis_cdb_process_reply(struct net_device *dev,
+ struct ethtool_module_eeprom *page_data,
+ struct ethtool_cmis_cdb_cmd_args *args)
+{
+ u8 rpl_hdr_len = sizeof(struct ethtool_cmis_cdb_rpl_hdr);
+ u8 rpl_exp_len = args->rpl_exp_len + rpl_hdr_len;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct netlink_ext_ack extack = {};
+ struct ethtool_cmis_cdb_rpl *rpl;
+ int err;
+
+ if (!args->rpl_exp_len)
+ return 0;
+
+ ethtool_cmis_page_init(page_data, ETHTOOL_CMIS_CDB_CMD_PAGE,
+ CMIS_CDB_REPLY_OFFSET, rpl_exp_len);
+ page_data->data = kmalloc(page_data->length, GFP_KERNEL);
+ if (!page_data->data)
+ return -ENOMEM;
+
+ err = ops->get_module_eeprom_by_page(dev, page_data, &extack);
+ if (err < 0) {
+ if (extack._msg)
+ netdev_err(dev, "%s\n", extack._msg);
+ goto out;
+ }
+
+ rpl = (struct ethtool_cmis_cdb_rpl *)page_data->data;
+ if ((args->rpl_exp_len > rpl->hdr.rpl_len + rpl_hdr_len) ||
+ !rpl->hdr.rpl_chk_code) {
+ err = -EIO;
+ goto out;
+ }
+
+ args->req.lpl_len = rpl->hdr.rpl_len;
+ memcpy(args->req.payload, rpl->payload, args->req.lpl_len);
+
+out:
+ kfree(page_data->data);
+ return err;
+}
+
+static int
+__ethtool_cmis_cdb_execute_cmd(struct net_device *dev,
+ struct ethtool_module_eeprom *page_data,
+ u8 page, u32 offset, u32 length, void *data)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct netlink_ext_ack extack = {};
+ int err;
+
+ ethtool_cmis_page_init(page_data, page, offset, length);
+ page_data->data = kmemdup(data, page_data->length, GFP_KERNEL);
+ if (!page_data->data)
+ return -ENOMEM;
+
+ err = ops->set_module_eeprom_by_page(dev, page_data, &extack);
+ if (err < 0) {
+ if (extack._msg)
+ netdev_err(dev, "%s\n", extack._msg);
+ }
+
+ kfree(page_data->data);
+ return err;
+}
+
+static u8 cmis_cdb_calc_checksum(const void *data, size_t size)
+{
+ const u8 *bytes = (const u8 *)data;
+ u8 checksum = 0;
+
+ for (size_t i = 0; i < size; i++)
+ checksum += bytes[i];
+
+ return ~checksum;
+}
+
+#define CMIS_CDB_CMD_ID_OFFSET 0x80
+
+int ethtool_cmis_cdb_execute_cmd(struct net_device *dev,
+ struct ethtool_cmis_cdb_cmd_args *args)
+{
+ struct ethtool_module_eeprom page_data = {};
+ u32 offset;
+ int err;
+
+ args->req.chk_code =
+ cmis_cdb_calc_checksum(&args->req, sizeof(args->req));
+
+ if (args->req.lpl_len > args->read_write_len_ext) {
+ args->err_msg = "LPL length is longer than CDB read write length extension allows";
+ return -EINVAL;
+ }
+
+ /* According to the CMIS standard, there are two options to trigger the
+ * CDB commands. The default option is triggering the command by writing
+ * the CMDID bytes. Therefore, the command will be split to 2 calls:
+ * First, with everything except the CMDID field and then the CMDID
+ * field.
+ */
+ offset = CMIS_CDB_CMD_ID_OFFSET +
+ offsetof(struct ethtool_cmis_cdb_request, body);
+ err = __ethtool_cmis_cdb_execute_cmd(dev, &page_data,
+ ETHTOOL_CMIS_CDB_CMD_PAGE, offset,
+ sizeof(args->req.body),
+ &args->req.body);
+ if (err < 0)
+ return err;
+
+ offset = CMIS_CDB_CMD_ID_OFFSET +
+ offsetof(struct ethtool_cmis_cdb_request, id);
+ err = __ethtool_cmis_cdb_execute_cmd(dev, &page_data,
+ ETHTOOL_CMIS_CDB_CMD_PAGE, offset,
+ sizeof(args->req.id),
+ &args->req.id);
+ if (err < 0)
+ return err;
+
+ err = cmis_cdb_wait_for_completion(dev, args);
+ if (err < 0)
+ return err;
+
+ err = cmis_cdb_wait_for_status(dev, args);
+ if (err < 0)
+ return err;
+
+ return cmis_cdb_process_reply(dev, &page_data, args);
+}
diff --git a/net/ethtool/cmis_fw_update.c b/net/ethtool/cmis_fw_update.c
new file mode 100644
index 000000000000..ae4b4b28a601
--- /dev/null
+++ b/net/ethtool/cmis_fw_update.c
@@ -0,0 +1,399 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/ethtool.h>
+#include <linux/firmware.h>
+
+#include "common.h"
+#include "module_fw.h"
+#include "cmis.h"
+
+struct cmis_fw_update_fw_mng_features {
+ u8 start_cmd_payload_size;
+ u16 max_duration_start;
+ u16 max_duration_write;
+ u16 max_duration_complete;
+};
+
+/* See section 9.4.2 "CMD 0041h: Firmware Management Features" in CMIS standard
+ * revision 5.2.
+ * struct cmis_cdb_fw_mng_features_rpl is a structured layout of the flat
+ * array, ethtool_cmis_cdb_rpl::payload.
+ */
+struct cmis_cdb_fw_mng_features_rpl {
+ u8 resv1;
+ u8 resv2;
+ u8 start_cmd_payload_size;
+ u8 resv3;
+ u8 read_write_len_ext;
+ u8 write_mechanism;
+ u8 resv4;
+ u8 resv5;
+ __be16 max_duration_start;
+ __be16 resv6;
+ __be16 max_duration_write;
+ __be16 max_duration_complete;
+ __be16 resv7;
+};
+
+#define CMIS_CDB_FW_WRITE_MECHANISM_LPL 0x01
+
+static int
+cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb,
+ struct net_device *dev,
+ struct cmis_fw_update_fw_mng_features *fw_mng,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params)
+{
+ struct ethtool_cmis_cdb_cmd_args args = {};
+ struct cmis_cdb_fw_mng_features_rpl *rpl;
+ u8 flags = CDB_F_STATUS_VALID;
+ int err;
+
+ ethtool_cmis_cdb_check_completion_flag(cdb->cmis_rev, &flags);
+ ethtool_cmis_cdb_compose_args(&args,
+ ETHTOOL_CMIS_CDB_CMD_FW_MANAGMENT_FEATURES,
+ NULL, 0, cdb->max_completion_time,
+ cdb->read_write_len_ext, 1000,
+ sizeof(*rpl), flags);
+
+ err = ethtool_cmis_cdb_execute_cmd(dev, &args);
+ if (err < 0) {
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "FW Management Features command failed",
+ args.err_msg);
+ return err;
+ }
+
+ rpl = (struct cmis_cdb_fw_mng_features_rpl *)args.req.payload;
+ if (!(rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL)) {
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "Write LPL is not supported",
+ NULL);
+ return -EOPNOTSUPP;
+ }
+
+ /* Above, we used read_write_len_ext that we got from CDB
+ * advertisement. Update it with the value that we got from module
+ * features query, which is specific for Firmware Management Commands
+ * (IDs 0100h-01FFh).
+ */
+ cdb->read_write_len_ext = rpl->read_write_len_ext;
+ fw_mng->start_cmd_payload_size = rpl->start_cmd_payload_size;
+ fw_mng->max_duration_start = be16_to_cpu(rpl->max_duration_start);
+ fw_mng->max_duration_write = be16_to_cpu(rpl->max_duration_write);
+ fw_mng->max_duration_complete = be16_to_cpu(rpl->max_duration_complete);
+
+ return 0;
+}
+
+/* See section 9.7.2 "CMD 0101h: Start Firmware Download" in CMIS standard
+ * revision 5.2.
+ * struct cmis_cdb_start_fw_download_pl is a structured layout of the
+ * flat array, ethtool_cmis_cdb_request::payload.
+ */
+struct cmis_cdb_start_fw_download_pl {
+ __struct_group(cmis_cdb_start_fw_download_pl_h, head, /* no attrs */,
+ __be32 image_size;
+ __be32 resv1;
+ );
+ u8 vendor_data[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH -
+ sizeof(struct cmis_cdb_start_fw_download_pl_h)];
+};
+
+static int
+cmis_fw_update_start_download(struct ethtool_cmis_cdb *cdb,
+ struct ethtool_cmis_fw_update_params *fw_update,
+ struct cmis_fw_update_fw_mng_features *fw_mng)
+{
+ u8 vendor_data_size = fw_mng->start_cmd_payload_size;
+ struct cmis_cdb_start_fw_download_pl pl = {};
+ struct ethtool_cmis_cdb_cmd_args args = {};
+ u8 lpl_len;
+ int err;
+
+ pl.image_size = cpu_to_be32(fw_update->fw->size);
+ memcpy(pl.vendor_data, fw_update->fw->data, vendor_data_size);
+
+ lpl_len = offsetof(struct cmis_cdb_start_fw_download_pl,
+ vendor_data[vendor_data_size]);
+
+ ethtool_cmis_cdb_compose_args(&args,
+ ETHTOOL_CMIS_CDB_CMD_START_FW_DOWNLOAD,
+ (u8 *)&pl, lpl_len,
+ fw_mng->max_duration_start,
+ cdb->read_write_len_ext, 1000, 0,
+ CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
+
+ err = ethtool_cmis_cdb_execute_cmd(fw_update->dev, &args);
+ if (err < 0)
+ ethnl_module_fw_flash_ntf_err(fw_update->dev,
+ &fw_update->ntf_params,
+ "Start FW download command failed",
+ args.err_msg);
+
+ return err;
+}
+
+/* See section 9.7.4 "CMD 0103h: Write Firmware Block LPL" in CMIS standard
+ * revision 5.2.
+ * struct cmis_cdb_write_fw_block_lpl_pl is a structured layout of the
+ * flat array, ethtool_cmis_cdb_request::payload.
+ */
+struct cmis_cdb_write_fw_block_lpl_pl {
+ __be32 block_address;
+ u8 fw_block[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH - sizeof(__be32)];
+};
+
+static int
+cmis_fw_update_write_image(struct ethtool_cmis_cdb *cdb,
+ struct ethtool_cmis_fw_update_params *fw_update,
+ struct cmis_fw_update_fw_mng_features *fw_mng)
+{
+ u8 start = fw_mng->start_cmd_payload_size;
+ u32 offset, max_block_size, max_lpl_len;
+ u32 image_size = fw_update->fw->size;
+ int err;
+
+ max_lpl_len = min_t(u32,
+ ethtool_cmis_get_max_payload_size(cdb->read_write_len_ext),
+ ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH);
+ max_block_size =
+ max_lpl_len - sizeof_field(struct cmis_cdb_write_fw_block_lpl_pl,
+ block_address);
+
+ for (offset = start; offset < image_size; offset += max_block_size) {
+ struct cmis_cdb_write_fw_block_lpl_pl pl = {
+ .block_address = cpu_to_be32(offset - start),
+ };
+ struct ethtool_cmis_cdb_cmd_args args = {};
+ u32 block_size, lpl_len;
+
+ ethnl_module_fw_flash_ntf_in_progress(fw_update->dev,
+ &fw_update->ntf_params,
+ offset - start,
+ image_size);
+ block_size = min_t(u32, max_block_size, image_size - offset);
+ memcpy(pl.fw_block, &fw_update->fw->data[offset], block_size);
+ lpl_len = block_size +
+ sizeof_field(struct cmis_cdb_write_fw_block_lpl_pl,
+ block_address);
+
+ ethtool_cmis_cdb_compose_args(&args,
+ ETHTOOL_CMIS_CDB_CMD_WRITE_FW_BLOCK_LPL,
+ (u8 *)&pl, lpl_len,
+ fw_mng->max_duration_write,
+ cdb->read_write_len_ext, 1, 0,
+ CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
+
+ err = ethtool_cmis_cdb_execute_cmd(fw_update->dev, &args);
+ if (err < 0) {
+ ethnl_module_fw_flash_ntf_err(fw_update->dev,
+ &fw_update->ntf_params,
+ "Write FW block LPL command failed",
+ args.err_msg);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int
+cmis_fw_update_complete_download(struct ethtool_cmis_cdb *cdb,
+ struct net_device *dev,
+ struct cmis_fw_update_fw_mng_features *fw_mng,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params)
+{
+ struct ethtool_cmis_cdb_cmd_args args = {};
+ int err;
+
+ ethtool_cmis_cdb_compose_args(&args,
+ ETHTOOL_CMIS_CDB_CMD_COMPLETE_FW_DOWNLOAD,
+ NULL, 0, fw_mng->max_duration_complete,
+ cdb->read_write_len_ext, 1000, 0,
+ CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
+
+ err = ethtool_cmis_cdb_execute_cmd(dev, &args);
+ if (err < 0)
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "Complete FW download command failed",
+ args.err_msg);
+
+ return err;
+}
+
+static int
+cmis_fw_update_download_image(struct ethtool_cmis_cdb *cdb,
+ struct ethtool_cmis_fw_update_params *fw_update,
+ struct cmis_fw_update_fw_mng_features *fw_mng)
+{
+ int err;
+
+ err = cmis_fw_update_start_download(cdb, fw_update, fw_mng);
+ if (err < 0)
+ return err;
+
+ err = cmis_fw_update_write_image(cdb, fw_update, fw_mng);
+ if (err < 0)
+ return err;
+
+ err = cmis_fw_update_complete_download(cdb, fw_update->dev, fw_mng,
+ &fw_update->ntf_params);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+enum {
+ CMIS_MODULE_LOW_PWR = 1,
+ CMIS_MODULE_READY = 3,
+};
+
+static bool module_is_ready(u8 data)
+{
+ u8 state = (data >> 1) & 7;
+
+ return state == CMIS_MODULE_READY || state == CMIS_MODULE_LOW_PWR;
+}
+
+#define CMIS_MODULE_READY_MAX_DURATION_MSEC 1000
+#define CMIS_MODULE_STATE_OFFSET 3
+
+static int
+cmis_fw_update_wait_for_module_state(struct net_device *dev, u8 flags)
+{
+ u8 state;
+
+ return ethtool_cmis_wait_for_cond(dev, flags, CDB_F_MODULE_STATE_VALID,
+ CMIS_MODULE_READY_MAX_DURATION_MSEC,
+ CMIS_MODULE_STATE_OFFSET,
+ module_is_ready, NULL, &state);
+}
+
+/* See section 9.7.10 "CMD 0109h: Run Firmware Image" in CMIS standard
+ * revision 5.2.
+ * struct cmis_cdb_run_fw_image_pl is a structured layout of the flat
+ * array, ethtool_cmis_cdb_request::payload.
+ */
+struct cmis_cdb_run_fw_image_pl {
+ u8 resv1;
+ u8 image_to_run;
+ u16 delay_to_reset;
+};
+
+static int
+cmis_fw_update_run_image(struct ethtool_cmis_cdb *cdb, struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params)
+{
+ struct ethtool_cmis_cdb_cmd_args args = {};
+ struct cmis_cdb_run_fw_image_pl pl = {0};
+ int err;
+
+ ethtool_cmis_cdb_compose_args(&args, ETHTOOL_CMIS_CDB_CMD_RUN_FW_IMAGE,
+ (u8 *)&pl, sizeof(pl),
+ cdb->max_completion_time,
+ cdb->read_write_len_ext, 1000, 0,
+ CDB_F_MODULE_STATE_VALID);
+
+ err = ethtool_cmis_cdb_execute_cmd(dev, &args);
+ if (err < 0) {
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "Run image command failed",
+ args.err_msg);
+ return err;
+ }
+
+ err = cmis_fw_update_wait_for_module_state(dev, args.flags);
+ if (err < 0)
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "Module is not ready on time after reset",
+ NULL);
+
+ return err;
+}
+
+static int
+cmis_fw_update_commit_image(struct ethtool_cmis_cdb *cdb,
+ struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params)
+{
+ struct ethtool_cmis_cdb_cmd_args args = {};
+ int err;
+
+ ethtool_cmis_cdb_compose_args(&args,
+ ETHTOOL_CMIS_CDB_CMD_COMMIT_FW_IMAGE,
+ NULL, 0, cdb->max_completion_time,
+ cdb->read_write_len_ext, 1000, 0,
+ CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID);
+
+ err = ethtool_cmis_cdb_execute_cmd(dev, &args);
+ if (err < 0)
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "Commit image command failed",
+ args.err_msg);
+
+ return err;
+}
+
+static int cmis_fw_update_reset(struct net_device *dev)
+{
+ __u32 reset_data = ETH_RESET_PHY;
+
+ return dev->ethtool_ops->reset(dev, &reset_data);
+}
+
+void
+ethtool_cmis_fw_update(struct ethtool_cmis_fw_update_params *fw_update)
+{
+ struct ethnl_module_fw_flash_ntf_params *ntf_params =
+ &fw_update->ntf_params;
+ struct cmis_fw_update_fw_mng_features fw_mng = {0};
+ struct net_device *dev = fw_update->dev;
+ struct ethtool_cmis_cdb *cdb;
+ int err;
+
+ cdb = ethtool_cmis_cdb_init(dev, &fw_update->params, ntf_params);
+ if (IS_ERR(cdb))
+ goto err_send_ntf;
+
+ ethnl_module_fw_flash_ntf_start(dev, ntf_params);
+
+ err = cmis_fw_update_fw_mng_features_get(cdb, dev, &fw_mng, ntf_params);
+ if (err < 0)
+ goto err_cdb_fini;
+
+ err = cmis_fw_update_download_image(cdb, fw_update, &fw_mng);
+ if (err < 0)
+ goto err_cdb_fini;
+
+ err = cmis_fw_update_run_image(cdb, dev, ntf_params);
+ if (err < 0)
+ goto err_cdb_fini;
+
+ /* The CDB command "Run Firmware Image" resets the firmware, so the new
+ * one might have different settings.
+ * Free the old CDB instance, and init a new one.
+ */
+ ethtool_cmis_cdb_fini(cdb);
+
+ cdb = ethtool_cmis_cdb_init(dev, &fw_update->params, ntf_params);
+ if (IS_ERR(cdb))
+ goto err_send_ntf;
+
+ err = cmis_fw_update_commit_image(cdb, dev, ntf_params);
+ if (err < 0)
+ goto err_cdb_fini;
+
+ err = cmis_fw_update_reset(dev);
+ if (err < 0)
+ goto err_cdb_fini;
+
+ ethnl_module_fw_flash_ntf_complete(dev, ntf_params);
+ ethtool_cmis_cdb_fini(cdb);
+ return;
+
+err_cdb_fini:
+ ethtool_cmis_cdb_fini(cdb);
+err_send_ntf:
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params, NULL, NULL);
+}
diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c
index 759b16e3d134..3e18ca1ccc5e 100644
--- a/net/ethtool/coalesce.c
+++ b/net/ethtool/coalesce.c
@@ -211,9 +211,9 @@ static int coalesce_fill_reply(struct sk_buff *skb,
{
const struct coalesce_reply_data *data = COALESCE_REPDATA(reply_base);
const struct kernel_ethtool_coalesce *kcoal = &data->kernel_coalesce;
- struct dim_irq_moder *moder = req_base->dev->irq_moder;
const struct ethtool_coalesce *coal = &data->coalesce;
u32 supported = data->supported_params;
+ struct dim_irq_moder *moder;
int ret = 0;
if (coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_USECS,
@@ -272,9 +272,10 @@ static int coalesce_fill_reply(struct sk_buff *skb,
kcoal->tx_aggr_time_usecs, supported))
return -EMSGSIZE;
- if (!moder)
+ if (!req_base->dev || !req_base->dev->irq_moder)
return 0;
+ moder = req_base->dev->irq_moder;
rcu_read_lock();
if (moder->profile_flags & DIM_PROFILE_RX) {
ret = coalesce_put_profile(skb, ETHTOOL_A_COALESCE_RX_PROFILE,
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 6b2a360dcdf0..7bda9600efcf 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -587,35 +587,64 @@ err_free_info:
return err;
}
-int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max)
+static u32 ethtool_get_max_rss_ctx_channel(struct net_device *dev)
+{
+ struct ethtool_rxfh_context *ctx;
+ unsigned long context;
+ u32 max_ring = 0;
+
+ mutex_lock(&dev->ethtool->rss_lock);
+ xa_for_each(&dev->ethtool->rss_ctx, context, ctx) {
+ u32 i, *tbl;
+
+ tbl = ethtool_rxfh_context_indir(ctx);
+ for (i = 0; i < ctx->indir_size; i++)
+ max_ring = max(max_ring, tbl[i]);
+ }
+ mutex_unlock(&dev->ethtool->rss_lock);
+
+ return max_ring;
+}
+
+u32 ethtool_get_max_rxfh_channel(struct net_device *dev)
{
struct ethtool_rxfh_param rxfh = {};
- u32 dev_size, current_max = 0;
+ u32 dev_size, current_max;
int ret;
+ /* While we do track whether RSS context has an indirection
+ * table explicitly set by the user, no driver looks at that bit.
+ * Assume drivers won't auto-regenerate the additional tables,
+ * to be safe.
+ */
+ current_max = ethtool_get_max_rss_ctx_channel(dev);
+
+ if (!netif_is_rxfh_configured(dev))
+ return current_max;
+
if (!dev->ethtool_ops->get_rxfh_indir_size ||
!dev->ethtool_ops->get_rxfh)
- return -EOPNOTSUPP;
+ return current_max;
dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
if (dev_size == 0)
- return -EOPNOTSUPP;
+ return current_max;
rxfh.indir = kcalloc(dev_size, sizeof(rxfh.indir[0]), GFP_USER);
if (!rxfh.indir)
- return -ENOMEM;
+ return U32_MAX;
ret = dev->ethtool_ops->get_rxfh(dev, &rxfh);
- if (ret)
- goto out;
+ if (ret) {
+ current_max = U32_MAX;
+ goto out_free;
+ }
while (dev_size--)
current_max = max(current_max, rxfh.indir[dev_size]);
- *max = current_max;
-
-out:
+out_free:
kfree(rxfh.indir);
- return ret;
+ return current_max;
}
int ethtool_check_ops(const struct ethtool_ops *ops)
diff --git a/net/ethtool/common.h b/net/ethtool/common.h
index 28b8aaaf9bcb..b55705a9ad5a 100644
--- a/net/ethtool/common.h
+++ b/net/ethtool/common.h
@@ -42,7 +42,7 @@ int __ethtool_get_link(struct net_device *dev);
bool convert_legacy_settings_to_link_ksettings(
struct ethtool_link_ksettings *link_ksettings,
const struct ethtool_cmd *legacy_settings);
-int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max);
+u32 ethtool_get_max_rxfh_channel(struct net_device *dev);
int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max);
int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info);
diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c
index 6209c3a9c8f7..3b8209e930fd 100644
--- a/net/ethtool/eeprom.c
+++ b/net/ethtool/eeprom.c
@@ -91,6 +91,12 @@ static int get_module_eeprom_by_page(struct net_device *dev,
{
const struct ethtool_ops *ops = dev->ethtool_ops;
+ if (dev->ethtool->module_fw_flash_in_progress) {
+ NL_SET_ERR_MSG(extack,
+ "Module firmware flashing is in progress");
+ return -EBUSY;
+ }
+
if (dev->sfp_bus)
return sfp_get_module_eeprom_by_page(dev->sfp_bus, page_data, extack);
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index e645d751a5e8..615812ff8974 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -658,6 +658,9 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
if (!dev->ethtool_ops->get_link_ksettings)
return -EOPNOTSUPP;
+ if (dev->ethtool->module_fw_flash_in_progress)
+ return -EBUSY;
+
memset(&link_ksettings, 0, sizeof(link_ksettings));
err = dev->ethtool_ops->get_link_ksettings(dev, &link_ksettings);
if (err < 0)
@@ -1199,6 +1202,7 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
const struct ethtool_ops *ops = dev->ethtool_ops;
struct ethtool_rxfh_param rxfh_dev = {};
u32 user_indir_size, user_key_size;
+ struct ethtool_rxfh_context *ctx;
struct ethtool_rxfh rxfh;
u32 indir_bytes;
u8 *rss_config;
@@ -1246,11 +1250,26 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
if (user_key_size)
rxfh_dev.key = rss_config + indir_bytes;
- rxfh_dev.rss_context = rxfh.rss_context;
-
- ret = dev->ethtool_ops->get_rxfh(dev, &rxfh_dev);
- if (ret)
- goto out;
+ if (rxfh.rss_context) {
+ ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context);
+ if (!ctx) {
+ ret = -ENOENT;
+ goto out;
+ }
+ if (rxfh_dev.indir)
+ memcpy(rxfh_dev.indir, ethtool_rxfh_context_indir(ctx),
+ indir_bytes);
+ if (rxfh_dev.key)
+ memcpy(rxfh_dev.key, ethtool_rxfh_context_key(ctx),
+ user_key_size);
+ rxfh_dev.hfunc = ctx->hfunc;
+ rxfh_dev.input_xfrm = ctx->input_xfrm;
+ ret = 0;
+ } else {
+ ret = dev->ethtool_ops->get_rxfh(dev, &rxfh_dev);
+ if (ret)
+ goto out;
+ }
if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, hfunc),
&rxfh_dev.hfunc, sizeof(rxfh.hfunc))) {
@@ -1278,10 +1297,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
const struct ethtool_ops *ops = dev->ethtool_ops;
u32 dev_indir_size = 0, dev_key_size = 0, i;
struct ethtool_rxfh_param rxfh_dev = {};
+ struct ethtool_rxfh_context *ctx = NULL;
struct netlink_ext_ack *extack = NULL;
struct ethtool_rxnfc rx_rings;
struct ethtool_rxfh rxfh;
+ bool locked = false; /* dev->ethtool->rss_lock taken */
u32 indir_bytes = 0;
+ bool create = false;
u8 *rss_config;
int ret;
@@ -1309,6 +1331,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) &&
!ops->cap_rss_sym_xor_supported)
return -EOPNOTSUPP;
+ create = rxfh.rss_context == ETH_RXFH_CONTEXT_ALLOC;
/* If either indir, hash key or function is valid, proceed further.
* Must request at least one change: indir size, hash key, function
@@ -1374,13 +1397,77 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
}
}
+ if (rxfh.rss_context) {
+ mutex_lock(&dev->ethtool->rss_lock);
+ locked = true;
+ }
+ if (create) {
+ if (rxfh_dev.rss_delete) {
+ ret = -EINVAL;
+ goto out;
+ }
+ ctx = kzalloc(ethtool_rxfh_context_size(dev_indir_size,
+ dev_key_size,
+ ops->rxfh_priv_size),
+ GFP_KERNEL_ACCOUNT);
+ if (!ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ctx->indir_size = dev_indir_size;
+ ctx->key_size = dev_key_size;
+ ctx->priv_size = ops->rxfh_priv_size;
+ /* Initialise to an empty context */
+ ctx->hfunc = ETH_RSS_HASH_NO_CHANGE;
+ ctx->input_xfrm = RXH_XFRM_NO_CHANGE;
+ if (ops->create_rxfh_context) {
+ u32 limit = ops->rxfh_max_context_id ?: U32_MAX;
+ u32 ctx_id;
+
+ /* driver uses new API, core allocates ID */
+ ret = xa_alloc(&dev->ethtool->rss_ctx, &ctx_id, ctx,
+ XA_LIMIT(1, limit), GFP_KERNEL_ACCOUNT);
+ if (ret < 0) {
+ kfree(ctx);
+ goto out;
+ }
+ WARN_ON(!ctx_id); /* can't happen */
+ rxfh.rss_context = ctx_id;
+ }
+ } else if (rxfh.rss_context) {
+ ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context);
+ if (!ctx) {
+ ret = -ENOENT;
+ goto out;
+ }
+ }
rxfh_dev.hfunc = rxfh.hfunc;
rxfh_dev.rss_context = rxfh.rss_context;
rxfh_dev.input_xfrm = rxfh.input_xfrm;
- ret = ops->set_rxfh(dev, &rxfh_dev, extack);
- if (ret)
+ if (rxfh.rss_context && ops->create_rxfh_context) {
+ if (create)
+ ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev,
+ extack);
+ else if (rxfh_dev.rss_delete)
+ ret = ops->remove_rxfh_context(dev, ctx,
+ rxfh.rss_context,
+ extack);
+ else
+ ret = ops->modify_rxfh_context(dev, ctx, &rxfh_dev,
+ extack);
+ } else {
+ ret = ops->set_rxfh(dev, &rxfh_dev, extack);
+ }
+ if (ret) {
+ if (create) {
+ /* failed to create, free our new tracking entry */
+ if (ops->create_rxfh_context)
+ xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context);
+ kfree(ctx);
+ }
goto out;
+ }
if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context),
&rxfh_dev.rss_context, sizeof(rxfh_dev.rss_context)))
@@ -1393,8 +1480,44 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
dev->priv_flags |= IFF_RXFH_CONFIGURED;
}
+ /* Update rss_ctx tracking */
+ if (create && !ops->create_rxfh_context) {
+ /* driver uses old API, it chose context ID */
+ if (WARN_ON(xa_load(&dev->ethtool->rss_ctx, rxfh_dev.rss_context))) {
+ /* context ID reused, our tracking is screwed */
+ kfree(ctx);
+ goto out;
+ }
+ /* Allocate the exact ID the driver gave us */
+ if (xa_is_err(xa_store(&dev->ethtool->rss_ctx, rxfh_dev.rss_context,
+ ctx, GFP_KERNEL))) {
+ kfree(ctx);
+ goto out;
+ }
+ }
+ if (rxfh_dev.rss_delete) {
+ WARN_ON(xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context) != ctx);
+ kfree(ctx);
+ } else if (ctx) {
+ if (rxfh_dev.indir) {
+ for (i = 0; i < dev_indir_size; i++)
+ ethtool_rxfh_context_indir(ctx)[i] = rxfh_dev.indir[i];
+ ctx->indir_configured = 1;
+ }
+ if (rxfh_dev.key) {
+ memcpy(ethtool_rxfh_context_key(ctx), rxfh_dev.key,
+ dev_key_size);
+ ctx->key_configured = 1;
+ }
+ if (rxfh_dev.hfunc != ETH_RSS_HASH_NO_CHANGE)
+ ctx->hfunc = rxfh_dev.hfunc;
+ if (rxfh_dev.input_xfrm != RXH_XFRM_NO_CHANGE)
+ ctx->input_xfrm = rxfh_dev.input_xfrm;
+ }
out:
+ if (locked)
+ mutex_unlock(&dev->ethtool->rss_lock);
kfree(rss_config);
return ret;
}
@@ -1449,6 +1572,9 @@ static int ethtool_reset(struct net_device *dev, char __user *useraddr)
if (!dev->ethtool_ops->reset)
return -EOPNOTSUPP;
+ if (dev->ethtool->module_fw_flash_in_progress)
+ return -EBUSY;
+
if (copy_from_user(&reset, useraddr, sizeof(reset)))
return -EFAULT;
@@ -1503,7 +1629,7 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
if (ret)
return ret;
- dev->wol_enabled = !!wol.wolopts;
+ dev->ethtool->wol_enabled = !!wol.wolopts;
ethtool_notify(dev, ETHTOOL_MSG_WOL_NTF, NULL);
return 0;
@@ -1923,9 +2049,7 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
* indirection table/rxnfc settings */
if (ethtool_get_max_rxnfc_channel(dev, &max_rxnfc_in_use))
max_rxnfc_in_use = 0;
- if (!netif_is_rxfh_configured(dev) ||
- ethtool_get_max_rxfh_channel(dev, &max_rxfh_in_use))
- max_rxfh_in_use = 0;
+ max_rxfh_in_use = ethtool_get_max_rxfh_channel(dev);
if (channels.combined_count + channels.rx_count <=
max_t(u64, max_rxnfc_in_use, max_rxfh_in_use))
return -EINVAL;
@@ -2462,6 +2586,9 @@ int ethtool_get_module_info_call(struct net_device *dev,
const struct ethtool_ops *ops = dev->ethtool_ops;
struct phy_device *phydev = dev->phydev;
+ if (dev->ethtool->module_fw_flash_in_progress)
+ return -EBUSY;
+
if (dev->sfp_bus)
return sfp_get_module_info(dev->sfp_bus, modinfo);
@@ -2499,6 +2626,9 @@ int ethtool_get_module_eeprom_call(struct net_device *dev,
const struct ethtool_ops *ops = dev->ethtool_ops;
struct phy_device *phydev = dev->phydev;
+ if (dev->ethtool->module_fw_flash_in_progress)
+ return -EBUSY;
+
if (dev->sfp_bus)
return sfp_get_module_eeprom(dev->sfp_bus, ee, data);
diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c
index b2de2108b356..34d76e87847d 100644
--- a/net/ethtool/linkstate.c
+++ b/net/ethtool/linkstate.c
@@ -37,6 +37,8 @@ static int linkstate_get_sqi(struct net_device *dev)
mutex_lock(&phydev->lock);
if (!phydev->drv || !phydev->drv->get_sqi)
ret = -EOPNOTSUPP;
+ else if (!phydev->link)
+ ret = -ENETDOWN;
else
ret = phydev->drv->get_sqi(phydev);
mutex_unlock(&phydev->lock);
@@ -55,6 +57,8 @@ static int linkstate_get_sqi_max(struct net_device *dev)
mutex_lock(&phydev->lock);
if (!phydev->drv || !phydev->drv->get_sqi_max)
ret = -EOPNOTSUPP;
+ else if (!phydev->link)
+ ret = -ENETDOWN;
else
ret = phydev->drv->get_sqi_max(phydev);
mutex_unlock(&phydev->lock);
@@ -62,6 +66,17 @@ static int linkstate_get_sqi_max(struct net_device *dev)
return ret;
};
+static bool linkstate_sqi_critical_error(int sqi)
+{
+ return sqi < 0 && sqi != -EOPNOTSUPP && sqi != -ENETDOWN;
+}
+
+static bool linkstate_sqi_valid(struct linkstate_reply_data *data)
+{
+ return data->sqi >= 0 && data->sqi_max >= 0 &&
+ data->sqi <= data->sqi_max;
+}
+
static int linkstate_get_link_ext_state(struct net_device *dev,
struct linkstate_reply_data *data)
{
@@ -93,12 +108,12 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
data->link = __ethtool_get_link(dev);
ret = linkstate_get_sqi(dev);
- if (ret < 0 && ret != -EOPNOTSUPP)
+ if (linkstate_sqi_critical_error(ret))
goto out;
data->sqi = ret;
ret = linkstate_get_sqi_max(dev);
- if (ret < 0 && ret != -EOPNOTSUPP)
+ if (linkstate_sqi_critical_error(ret))
goto out;
data->sqi_max = ret;
@@ -136,11 +151,10 @@ static int linkstate_reply_size(const struct ethnl_req_info *req_base,
len = nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */
+ 0;
- if (data->sqi != -EOPNOTSUPP)
- len += nla_total_size(sizeof(u32));
-
- if (data->sqi_max != -EOPNOTSUPP)
- len += nla_total_size(sizeof(u32));
+ if (linkstate_sqi_valid(data)) {
+ len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI */
+ len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI_MAX */
+ }
if (data->link_ext_state_provided)
len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_STATE */
@@ -164,13 +178,14 @@ static int linkstate_fill_reply(struct sk_buff *skb,
nla_put_u8(skb, ETHTOOL_A_LINKSTATE_LINK, !!data->link))
return -EMSGSIZE;
- if (data->sqi != -EOPNOTSUPP &&
- nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi))
- return -EMSGSIZE;
+ if (linkstate_sqi_valid(data)) {
+ if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi))
+ return -EMSGSIZE;
- if (data->sqi_max != -EOPNOTSUPP &&
- nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max))
- return -EMSGSIZE;
+ if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX,
+ data->sqi_max))
+ return -EMSGSIZE;
+ }
if (data->link_ext_state_provided) {
if (nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_STATE,
diff --git a/net/ethtool/module.c b/net/ethtool/module.c
index ceb575efc290..aba78436d350 100644
--- a/net/ethtool/module.c
+++ b/net/ethtool/module.c
@@ -1,10 +1,14 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/ethtool.h>
+#include <linux/firmware.h>
+#include <linux/sfp.h>
+#include <net/devlink.h>
#include "netlink.h"
#include "common.h"
#include "bitset.h"
+#include "module_fw.h"
struct module_req_info {
struct ethnl_req_info base;
@@ -33,6 +37,12 @@ static int module_get_power_mode(struct net_device *dev,
if (!ops->get_module_power_mode)
return 0;
+ if (dev->ethtool->module_fw_flash_in_progress) {
+ NL_SET_ERR_MSG(extack,
+ "Module firmware flashing is in progress");
+ return -EBUSY;
+ }
+
return ops->get_module_power_mode(dev, &data->power, extack);
}
@@ -109,6 +119,12 @@ ethnl_set_module_validate(struct ethnl_req_info *req_info,
if (!tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY])
return 0;
+ if (req_info->dev->ethtool->module_fw_flash_in_progress) {
+ NL_SET_ERR_MSG(info->extack,
+ "Module firmware flashing is in progress");
+ return -EBUSY;
+ }
+
if (!ops->get_module_power_mode || !ops->set_module_power_mode) {
NL_SET_ERR_MSG_ATTR(info->extack,
tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY],
@@ -158,3 +174,381 @@ const struct ethnl_request_ops ethnl_module_request_ops = {
.set = ethnl_set_module,
.set_ntf_cmd = ETHTOOL_MSG_MODULE_NTF,
};
+
+/* MODULE_FW_FLASH_ACT */
+
+const struct nla_policy
+ethnl_module_fw_flash_act_policy[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD + 1] = {
+ [ETHTOOL_A_MODULE_FW_FLASH_HEADER] =
+ NLA_POLICY_NESTED(ethnl_header_policy),
+ [ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME] = { .type = NLA_NUL_STRING },
+ [ETHTOOL_A_MODULE_FW_FLASH_PASSWORD] = { .type = NLA_U32 },
+};
+
+static LIST_HEAD(module_fw_flash_work_list);
+static DEFINE_SPINLOCK(module_fw_flash_work_list_lock);
+
+static int
+module_flash_fw_work_list_add(struct ethtool_module_fw_flash *module_fw,
+ struct genl_info *info)
+{
+ struct ethtool_module_fw_flash *work;
+
+ /* First, check if already registered. */
+ spin_lock(&module_fw_flash_work_list_lock);
+ list_for_each_entry(work, &module_fw_flash_work_list, list) {
+ if (work->fw_update.ntf_params.portid == info->snd_portid &&
+ work->fw_update.dev == module_fw->fw_update.dev) {
+ spin_unlock(&module_fw_flash_work_list_lock);
+ return -EALREADY;
+ }
+ }
+
+ list_add_tail(&module_fw->list, &module_fw_flash_work_list);
+ spin_unlock(&module_fw_flash_work_list_lock);
+
+ return 0;
+}
+
+static void module_flash_fw_work_list_del(struct list_head *list)
+{
+ spin_lock(&module_fw_flash_work_list_lock);
+ list_del(list);
+ spin_unlock(&module_fw_flash_work_list_lock);
+}
+
+static void module_flash_fw_work(struct work_struct *work)
+{
+ struct ethtool_module_fw_flash *module_fw;
+
+ module_fw = container_of(work, struct ethtool_module_fw_flash, work);
+
+ ethtool_cmis_fw_update(&module_fw->fw_update);
+
+ module_flash_fw_work_list_del(&module_fw->list);
+ module_fw->fw_update.dev->ethtool->module_fw_flash_in_progress = false;
+ netdev_put(module_fw->fw_update.dev, &module_fw->dev_tracker);
+ release_firmware(module_fw->fw_update.fw);
+ kfree(module_fw);
+}
+
+#define MODULE_EEPROM_PHYS_ID_PAGE 0
+#define MODULE_EEPROM_PHYS_ID_I2C_ADDR 0x50
+
+static int module_flash_fw_work_init(struct ethtool_module_fw_flash *module_fw,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_module_eeprom page_data = {};
+ u8 phys_id;
+ int err;
+
+ /* Fetch the SFF-8024 Identifier Value. For all supported standards, it
+ * is located at I2C address 0x50, byte 0. See section 4.1 in SFF-8024,
+ * revision 4.9.
+ */
+ page_data.page = MODULE_EEPROM_PHYS_ID_PAGE;
+ page_data.offset = SFP_PHYS_ID;
+ page_data.length = sizeof(phys_id);
+ page_data.i2c_address = MODULE_EEPROM_PHYS_ID_I2C_ADDR;
+ page_data.data = &phys_id;
+
+ err = ops->get_module_eeprom_by_page(dev, &page_data, extack);
+ if (err < 0)
+ return err;
+
+ switch (phys_id) {
+ case SFF8024_ID_QSFP_DD:
+ case SFF8024_ID_OSFP:
+ case SFF8024_ID_DSFP:
+ case SFF8024_ID_QSFP_PLUS_CMIS:
+ case SFF8024_ID_SFP_DD_CMIS:
+ case SFF8024_ID_SFP_PLUS_CMIS:
+ INIT_WORK(&module_fw->work, module_flash_fw_work);
+ break;
+ default:
+ NL_SET_ERR_MSG(extack,
+ "Module type does not support firmware flashing");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+void ethnl_module_fw_flash_sock_destroy(struct ethnl_sock_priv *sk_priv)
+{
+ struct ethtool_module_fw_flash *work;
+
+ spin_lock(&module_fw_flash_work_list_lock);
+ list_for_each_entry(work, &module_fw_flash_work_list, list) {
+ if (work->fw_update.dev == sk_priv->dev &&
+ work->fw_update.ntf_params.portid == sk_priv->portid) {
+ work->fw_update.ntf_params.closed_sock = true;
+ break;
+ }
+ }
+ spin_unlock(&module_fw_flash_work_list_lock);
+}
+
+static int
+module_flash_fw_schedule(struct net_device *dev, const char *file_name,
+ struct ethtool_module_fw_flash_params *params,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ struct ethtool_cmis_fw_update_params *fw_update;
+ struct ethtool_module_fw_flash *module_fw;
+ int err;
+
+ module_fw = kzalloc(sizeof(*module_fw), GFP_KERNEL);
+ if (!module_fw)
+ return -ENOMEM;
+
+ fw_update = &module_fw->fw_update;
+ fw_update->params = *params;
+ err = request_firmware_direct(&fw_update->fw,
+ file_name, &dev->dev);
+ if (err) {
+ NL_SET_ERR_MSG(info->extack,
+ "Failed to request module firmware image");
+ goto err_free;
+ }
+
+ err = module_flash_fw_work_init(module_fw, dev, info->extack);
+ if (err < 0)
+ goto err_release_firmware;
+
+ dev->ethtool->module_fw_flash_in_progress = true;
+ netdev_hold(dev, &module_fw->dev_tracker, GFP_KERNEL);
+ fw_update->dev = dev;
+ fw_update->ntf_params.portid = info->snd_portid;
+ fw_update->ntf_params.seq = info->snd_seq;
+ fw_update->ntf_params.closed_sock = false;
+
+ err = ethnl_sock_priv_set(skb, dev, fw_update->ntf_params.portid,
+ ETHTOOL_SOCK_TYPE_MODULE_FW_FLASH);
+ if (err < 0)
+ goto err_release_firmware;
+
+ err = module_flash_fw_work_list_add(module_fw, info);
+ if (err < 0)
+ goto err_release_firmware;
+
+ schedule_work(&module_fw->work);
+
+ return 0;
+
+err_release_firmware:
+ release_firmware(fw_update->fw);
+err_free:
+ kfree(module_fw);
+ return err;
+}
+
+static int module_flash_fw(struct net_device *dev, struct nlattr **tb,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ struct ethtool_module_fw_flash_params params = {};
+ const char *file_name;
+ struct nlattr *attr;
+
+ if (GENL_REQ_ATTR_CHECK(info, ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME))
+ return -EINVAL;
+
+ file_name = nla_data(tb[ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME]);
+
+ attr = tb[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD];
+ if (attr) {
+ params.password = cpu_to_be32(nla_get_u32(attr));
+ params.password_valid = true;
+ }
+
+ return module_flash_fw_schedule(dev, file_name, &params, skb, info);
+}
+
+static int ethnl_module_fw_flash_validate(struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink_port *devlink_port = dev->devlink_port;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+
+ if (!ops->set_module_eeprom_by_page ||
+ !ops->get_module_eeprom_by_page) {
+ NL_SET_ERR_MSG(extack,
+ "Flashing module firmware is not supported by this device");
+ return -EOPNOTSUPP;
+ }
+
+ if (!ops->reset) {
+ NL_SET_ERR_MSG(extack,
+ "Reset module is not supported by this device, so flashing is not permitted");
+ return -EOPNOTSUPP;
+ }
+
+ if (dev->ethtool->module_fw_flash_in_progress) {
+ NL_SET_ERR_MSG(extack, "Module firmware flashing already in progress");
+ return -EBUSY;
+ }
+
+ if (dev->flags & IFF_UP) {
+ NL_SET_ERR_MSG(extack, "Netdevice is up, so flashing is not permitted");
+ return -EBUSY;
+ }
+
+ if (devlink_port && devlink_port->attrs.split) {
+ NL_SET_ERR_MSG(extack, "Can't perform firmware flashing on a split port");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int ethnl_act_module_fw_flash(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ethnl_req_info req_info = {};
+ struct nlattr **tb = info->attrs;
+ struct net_device *dev;
+ int ret;
+
+ ret = ethnl_parse_header_dev_get(&req_info,
+ tb[ETHTOOL_A_MODULE_FW_FLASH_HEADER],
+ genl_info_net(info), info->extack,
+ true);
+ if (ret < 0)
+ return ret;
+ dev = req_info.dev;
+
+ rtnl_lock();
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto out_rtnl;
+
+ ret = ethnl_module_fw_flash_validate(dev, info->extack);
+ if (ret < 0)
+ goto out_rtnl;
+
+ ret = module_flash_fw(dev, tb, skb, info);
+
+ ethnl_ops_complete(dev);
+
+out_rtnl:
+ rtnl_unlock();
+ ethnl_parse_header_dev_put(&req_info);
+ return ret;
+}
+
+/* MODULE_FW_FLASH_NTF */
+
+static int
+ethnl_module_fw_flash_ntf_put_err(struct sk_buff *skb, char *err_msg,
+ char *sub_err_msg)
+{
+ int err_msg_len, sub_err_msg_len, total_len;
+ struct nlattr *attr;
+
+ if (!err_msg)
+ return 0;
+
+ err_msg_len = strlen(err_msg);
+ total_len = err_msg_len + 2; /* For period and NUL. */
+
+ if (sub_err_msg) {
+ sub_err_msg_len = strlen(sub_err_msg);
+ total_len += sub_err_msg_len + 2; /* For ", ". */
+ }
+
+ attr = nla_reserve(skb, ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG,
+ total_len);
+ if (!attr)
+ return -ENOMEM;
+
+ if (sub_err_msg)
+ sprintf(nla_data(attr), "%s, %s.", err_msg, sub_err_msg);
+ else
+ sprintf(nla_data(attr), "%s.", err_msg);
+
+ return 0;
+}
+
+static void
+ethnl_module_fw_flash_ntf(struct net_device *dev,
+ enum ethtool_module_fw_flash_status status,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params,
+ char *err_msg, char *sub_err_msg,
+ u64 done, u64 total)
+{
+ struct sk_buff *skb;
+ void *hdr;
+ int ret;
+
+ if (ntf_params->closed_sock)
+ return;
+
+ skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return;
+
+ hdr = ethnl_unicast_put(skb, ntf_params->portid, ntf_params->seq,
+ ETHTOOL_MSG_MODULE_FW_FLASH_NTF);
+ if (!hdr)
+ goto err_skb;
+
+ ret = ethnl_fill_reply_header(skb, dev,
+ ETHTOOL_A_MODULE_FW_FLASH_HEADER);
+ if (ret < 0)
+ goto err_skb;
+
+ if (nla_put_u32(skb, ETHTOOL_A_MODULE_FW_FLASH_STATUS, status))
+ goto err_skb;
+
+ ret = ethnl_module_fw_flash_ntf_put_err(skb, err_msg, sub_err_msg);
+ if (ret < 0)
+ goto err_skb;
+
+ if (nla_put_uint(skb, ETHTOOL_A_MODULE_FW_FLASH_DONE, done))
+ goto err_skb;
+
+ if (nla_put_uint(skb, ETHTOOL_A_MODULE_FW_FLASH_TOTAL, total))
+ goto err_skb;
+
+ genlmsg_end(skb, hdr);
+ genlmsg_unicast(dev_net(dev), skb, ntf_params->portid);
+ return;
+
+err_skb:
+ nlmsg_free(skb);
+}
+
+void ethnl_module_fw_flash_ntf_err(struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *params,
+ char *err_msg, char *sub_err_msg)
+{
+ ethnl_module_fw_flash_ntf(dev, ETHTOOL_MODULE_FW_FLASH_STATUS_ERROR,
+ params, err_msg, sub_err_msg, 0, 0);
+}
+
+void
+ethnl_module_fw_flash_ntf_start(struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *params)
+{
+ ethnl_module_fw_flash_ntf(dev, ETHTOOL_MODULE_FW_FLASH_STATUS_STARTED,
+ params, NULL, NULL, 0, 0);
+}
+
+void
+ethnl_module_fw_flash_ntf_complete(struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *params)
+{
+ ethnl_module_fw_flash_ntf(dev, ETHTOOL_MODULE_FW_FLASH_STATUS_COMPLETED,
+ params, NULL, NULL, 0, 0);
+}
+
+void
+ethnl_module_fw_flash_ntf_in_progress(struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *params,
+ u64 done, u64 total)
+{
+ ethnl_module_fw_flash_ntf(dev,
+ ETHTOOL_MODULE_FW_FLASH_STATUS_IN_PROGRESS,
+ params, NULL, NULL, done, total);
+}
diff --git a/net/ethtool/module_fw.h b/net/ethtool/module_fw.h
new file mode 100644
index 000000000000..634543a12d0c
--- /dev/null
+++ b/net/ethtool/module_fw.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <uapi/linux/ethtool.h>
+#include "netlink.h"
+
+/**
+ * struct ethnl_module_fw_flash_ntf_params - module firmware flashing
+ * notifications parameters
+ * @portid: Netlink portid of sender.
+ * @seq: Sequence number of sender.
+ * @closed_sock: Indicates whether the socket was closed from user space.
+ */
+struct ethnl_module_fw_flash_ntf_params {
+ u32 portid;
+ u32 seq;
+ bool closed_sock;
+};
+
+/**
+ * struct ethtool_module_fw_flash_params - module firmware flashing parameters
+ * @password: Module password. Only valid when @pass_valid is set.
+ * @password_valid: Whether the module password is valid or not.
+ */
+struct ethtool_module_fw_flash_params {
+ __be32 password;
+ u8 password_valid:1;
+};
+
+/**
+ * struct ethtool_cmis_fw_update_params - CMIS firmware update specific
+ * parameters
+ * @dev: Pointer to the net_device to be flashed.
+ * @params: Module firmware flashing parameters.
+ * @ntf_params: Module firmware flashing notification parameters.
+ * @fw: Firmware to flash.
+ */
+struct ethtool_cmis_fw_update_params {
+ struct net_device *dev;
+ struct ethtool_module_fw_flash_params params;
+ struct ethnl_module_fw_flash_ntf_params ntf_params;
+ const struct firmware *fw;
+};
+
+/**
+ * struct ethtool_module_fw_flash - module firmware flashing
+ * @list: List node for &module_fw_flash_work_list.
+ * @dev_tracker: Refcount tracker for @dev.
+ * @work: The flashing firmware work.
+ * @fw_update: CMIS firmware update specific parameters.
+ */
+struct ethtool_module_fw_flash {
+ struct list_head list;
+ netdevice_tracker dev_tracker;
+ struct work_struct work;
+ struct ethtool_cmis_fw_update_params fw_update;
+};
+
+void ethnl_module_fw_flash_sock_destroy(struct ethnl_sock_priv *sk_priv);
+
+void
+ethnl_module_fw_flash_ntf_err(struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *params,
+ char *err_msg, char *sub_err_msg);
+void
+ethnl_module_fw_flash_ntf_start(struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *params);
+void
+ethnl_module_fw_flash_ntf_complete(struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *params);
+void
+ethnl_module_fw_flash_ntf_in_progress(struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *params,
+ u64 done, u64 total);
+
+void ethtool_cmis_fw_update(struct ethtool_cmis_fw_update_params *params);
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index bd04f28d5cf4..cb1eea00e349 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -4,6 +4,7 @@
#include <linux/ethtool_netlink.h>
#include <linux/pm_runtime.h>
#include "netlink.h"
+#include "module_fw.h"
static struct genl_family ethtool_genl_family;
@@ -30,6 +31,35 @@ const struct nla_policy ethnl_header_policy_stats[] = {
ETHTOOL_FLAGS_STATS),
};
+int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid,
+ enum ethnl_sock_type type)
+{
+ struct ethnl_sock_priv *sk_priv;
+
+ sk_priv = genl_sk_priv_get(&ethtool_genl_family, NETLINK_CB(skb).sk);
+ if (IS_ERR(sk_priv))
+ return PTR_ERR(sk_priv);
+
+ sk_priv->dev = dev;
+ sk_priv->portid = portid;
+ sk_priv->type = type;
+
+ return 0;
+}
+
+static void ethnl_sock_priv_destroy(void *priv)
+{
+ struct ethnl_sock_priv *sk_priv = priv;
+
+ switch (sk_priv->type) {
+ case ETHTOOL_SOCK_TYPE_MODULE_FW_FLASH:
+ ethnl_module_fw_flash_sock_destroy(sk_priv);
+ break;
+ default:
+ break;
+ }
+}
+
int ethnl_ops_begin(struct net_device *dev)
{
int ret;
@@ -239,6 +269,11 @@ void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd)
cmd);
}
+void *ethnl_unicast_put(struct sk_buff *skb, u32 portid, u32 seq, u8 cmd)
+{
+ return genlmsg_put(skb, portid, seq, &ethtool_genl_family, 0, cmd);
+}
+
int ethnl_multicast(struct sk_buff *skb, struct net_device *dev)
{
return genlmsg_multicast_netns(&ethtool_genl_family, dev_net(dev), skb,
@@ -760,10 +795,22 @@ static void ethnl_notify_features(struct netdev_notifier_info *info)
static int ethnl_netdev_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
+ struct netdev_notifier_info *info = ptr;
+ struct netlink_ext_ack *extack;
+ struct net_device *dev;
+
+ dev = netdev_notifier_info_to_dev(info);
+ extack = netdev_notifier_info_to_extack(info);
+
switch (event) {
case NETDEV_FEAT_CHANGE:
ethnl_notify_features(ptr);
break;
+ case NETDEV_PRE_UP:
+ if (dev->ethtool->module_fw_flash_in_progress) {
+ NL_SET_ERR_MSG(extack, "Can't set port up while flashing module firmware");
+ return NOTIFY_BAD;
+ }
}
return NOTIFY_DONE;
@@ -1125,6 +1172,13 @@ static const struct genl_ops ethtool_genl_ops[] = {
.policy = ethnl_mm_set_policy,
.maxattr = ARRAY_SIZE(ethnl_mm_set_policy) - 1,
},
+ {
+ .cmd = ETHTOOL_MSG_MODULE_FW_FLASH_ACT,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = ethnl_act_module_fw_flash,
+ .policy = ethnl_module_fw_flash_act_policy,
+ .maxattr = ARRAY_SIZE(ethnl_module_fw_flash_act_policy) - 1,
+ },
};
static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
@@ -1141,6 +1195,8 @@ static struct genl_family ethtool_genl_family __ro_after_init = {
.resv_start_op = ETHTOOL_MSG_MODULE_GET + 1,
.mcgrps = ethtool_nl_mcgrps,
.n_mcgrps = ARRAY_SIZE(ethtool_nl_mcgrps),
+ .sock_priv_size = sizeof(struct ethnl_sock_priv),
+ .sock_priv_destroy = ethnl_sock_priv_destroy,
};
/* module setup */
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 9a333a8d04c1..46ec273a87c5 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -21,6 +21,7 @@ struct sk_buff *ethnl_reply_init(size_t payload, struct net_device *dev, u8 cmd,
void **ehdrp);
void *ethnl_dump_put(struct sk_buff *skb, struct netlink_callback *cb, u8 cmd);
void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd);
+void *ethnl_unicast_put(struct sk_buff *skb, u32 portid, u32 seq, u8 cmd);
int ethnl_multicast(struct sk_buff *skb, struct net_device *dev);
/**
@@ -283,6 +284,19 @@ struct ethnl_reply_data {
int ethnl_ops_begin(struct net_device *dev);
void ethnl_ops_complete(struct net_device *dev);
+enum ethnl_sock_type {
+ ETHTOOL_SOCK_TYPE_MODULE_FW_FLASH,
+};
+
+struct ethnl_sock_priv {
+ struct net_device *dev;
+ u32 portid;
+ enum ethnl_sock_type type;
+};
+
+int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid,
+ enum ethnl_sock_type type);
+
/**
* struct ethnl_request_ops - unified handling of GET and SET requests
* @request_cmd: command id for request (GET)
@@ -441,6 +455,7 @@ extern const struct nla_policy ethnl_plca_set_cfg_policy[ETHTOOL_A_PLCA_MAX + 1]
extern const struct nla_policy ethnl_plca_get_status_policy[ETHTOOL_A_PLCA_HEADER + 1];
extern const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1];
extern const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1];
+extern const struct nla_policy ethnl_module_fw_flash_act_policy[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD + 1];
int ethnl_set_features(struct sk_buff *skb, struct genl_info *info);
int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info);
@@ -448,6 +463,7 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info);
int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info);
int ethnl_tunnel_info_start(struct netlink_callback *cb);
int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int ethnl_act_module_fw_flash(struct sk_buff *skb, struct genl_info *info);
extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN];
extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN];
diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
index 2c981d443f27..ba46c9c8b12d 100644
--- a/net/ethtool/pse-pd.c
+++ b/net/ethtool/pse-pd.c
@@ -86,10 +86,56 @@ static int pse_reply_size(const struct ethnl_req_info *req_base,
len += nla_total_size(sizeof(u32)); /* _C33_PSE_ADMIN_STATE */
if (st->c33_pw_status > 0)
len += nla_total_size(sizeof(u32)); /* _C33_PSE_PW_D_STATUS */
+ if (st->c33_pw_class > 0)
+ len += nla_total_size(sizeof(u32)); /* _C33_PSE_PW_CLASS */
+ if (st->c33_actual_pw > 0)
+ len += nla_total_size(sizeof(u32)); /* _C33_PSE_ACTUAL_PW */
+ if (st->c33_ext_state_info.c33_pse_ext_state > 0) {
+ len += nla_total_size(sizeof(u32)); /* _C33_PSE_EXT_STATE */
+ if (st->c33_ext_state_info.__c33_pse_ext_substate > 0)
+ /* _C33_PSE_EXT_SUBSTATE */
+ len += nla_total_size(sizeof(u32));
+ }
+ if (st->c33_avail_pw_limit > 0)
+ /* _C33_AVAIL_PSE_PW_LIMIT */
+ len += nla_total_size(sizeof(u32));
+ if (st->c33_pw_limit_nb_ranges > 0)
+ /* _C33_PSE_PW_LIMIT_RANGES */
+ len += st->c33_pw_limit_nb_ranges *
+ (nla_total_size(0) +
+ nla_total_size(sizeof(u32)) * 2);
return len;
}
+static int pse_put_pw_limit_ranges(struct sk_buff *skb,
+ const struct pse_control_status *st)
+{
+ const struct ethtool_c33_pse_pw_limit_range *pw_limit_ranges;
+ int i;
+
+ pw_limit_ranges = st->c33_pw_limit_ranges;
+ for (i = 0; i < st->c33_pw_limit_nb_ranges; i++) {
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, ETHTOOL_A_C33_PSE_PW_LIMIT_MIN,
+ pw_limit_ranges->min) ||
+ nla_put_u32(skb, ETHTOOL_A_C33_PSE_PW_LIMIT_MAX,
+ pw_limit_ranges->max)) {
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+ }
+ nla_nest_end(skb, nest);
+ pw_limit_ranges++;
+ }
+
+ return 0;
+}
+
static int pse_fill_reply(struct sk_buff *skb,
const struct ethnl_req_info *req_base,
const struct ethnl_reply_data *reply_base)
@@ -117,9 +163,46 @@ static int pse_fill_reply(struct sk_buff *skb,
st->c33_pw_status))
return -EMSGSIZE;
+ if (st->c33_pw_class > 0 &&
+ nla_put_u32(skb, ETHTOOL_A_C33_PSE_PW_CLASS,
+ st->c33_pw_class))
+ return -EMSGSIZE;
+
+ if (st->c33_actual_pw > 0 &&
+ nla_put_u32(skb, ETHTOOL_A_C33_PSE_ACTUAL_PW,
+ st->c33_actual_pw))
+ return -EMSGSIZE;
+
+ if (st->c33_ext_state_info.c33_pse_ext_state > 0) {
+ if (nla_put_u32(skb, ETHTOOL_A_C33_PSE_EXT_STATE,
+ st->c33_ext_state_info.c33_pse_ext_state))
+ return -EMSGSIZE;
+
+ if (st->c33_ext_state_info.__c33_pse_ext_substate > 0 &&
+ nla_put_u32(skb, ETHTOOL_A_C33_PSE_EXT_SUBSTATE,
+ st->c33_ext_state_info.__c33_pse_ext_substate))
+ return -EMSGSIZE;
+ }
+
+ if (st->c33_avail_pw_limit > 0 &&
+ nla_put_u32(skb, ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT,
+ st->c33_avail_pw_limit))
+ return -EMSGSIZE;
+
+ if (st->c33_pw_limit_nb_ranges > 0 &&
+ pse_put_pw_limit_ranges(skb, st))
+ return -EMSGSIZE;
+
return 0;
}
+static void pse_cleanup_data(struct ethnl_reply_data *reply_base)
+{
+ const struct pse_reply_data *data = PSE_REPDATA(reply_base);
+
+ kfree(data->status.c33_pw_limit_ranges);
+}
+
/* PSE_SET */
const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = {
@@ -130,6 +213,7 @@ const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = {
[ETHTOOL_A_C33_PSE_ADMIN_CONTROL] =
NLA_POLICY_RANGE(NLA_U32, ETHTOOL_C33_PSE_ADMIN_STATE_DISABLED,
ETHTOOL_C33_PSE_ADMIN_STATE_ENABLED),
+ [ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT] = { .type = NLA_U32 },
};
static int
@@ -172,19 +256,39 @@ static int
ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info)
{
struct net_device *dev = req_info->dev;
- struct pse_control_config config = {};
struct nlattr **tb = info->attrs;
struct phy_device *phydev;
+ int ret = 0;
phydev = dev->phydev;
+
+ if (tb[ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT]) {
+ unsigned int pw_limit;
+
+ pw_limit = nla_get_u32(tb[ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT]);
+ ret = pse_ethtool_set_pw_limit(phydev->psec, info->extack,
+ pw_limit);
+ if (ret)
+ return ret;
+ }
+
/* These values are already validated by the ethnl_pse_set_policy */
- if (pse_has_podl(phydev->psec))
- config.podl_admin_control = nla_get_u32(tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL]);
- if (pse_has_c33(phydev->psec))
- config.c33_admin_control = nla_get_u32(tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL]);
+ if (tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] ||
+ tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL]) {
+ struct pse_control_config config = {};
+
+ if (pse_has_podl(phydev->psec))
+ config.podl_admin_control = nla_get_u32(tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL]);
+ if (pse_has_c33(phydev->psec))
+ config.c33_admin_control = nla_get_u32(tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL]);
+
+ ret = pse_ethtool_set_config(phydev->psec, info->extack,
+ &config);
+ if (ret)
+ return ret;
+ }
- /* Return errno directly - PSE has no notification */
- return pse_ethtool_set_config(phydev->psec, info->extack, &config);
+ return ret;
}
const struct ethnl_request_ops ethnl_pse_request_ops = {
@@ -197,6 +301,7 @@ const struct ethnl_request_ops ethnl_pse_request_ops = {
.prepare_data = pse_prepare_data,
.reply_size = pse_reply_size,
.fill_reply = pse_fill_reply,
+ .cleanup_data = pse_cleanup_data,
.set_validate = ethnl_set_pse_validate,
.set = ethnl_set_pse,
diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c
index 0ed56c9ac1bc..a39d8000d808 100644
--- a/net/ethtool/wol.c
+++ b/net/ethtool/wol.c
@@ -137,7 +137,7 @@ ethnl_set_wol(struct ethnl_req_info *req_info, struct genl_info *info)
ret = dev->ethtool_ops->set_wol(dev, &wol);
if (ret)
return ret;
- dev->wol_enabled = !!wol.wolopts;
+ dev->ethtool->wol_enabled = !!wol.wolopts;
return 1;
}
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 7adace541fe2..9712cdb8087c 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -1383,6 +1383,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb,
req.sdiag_family = AF_UNSPEC; /* compatibility */
req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type);
req.idiag_ext = rc->idiag_ext;
+ req.pad = 0;
req.idiag_states = rc->idiag_states;
req.id = rc->id;
@@ -1398,6 +1399,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
req.sdiag_family = rc->idiag_family;
req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type);
req.idiag_ext = rc->idiag_ext;
+ req.pad = 0;
req.idiag_states = rc->idiag_states;
req.id = rc->id;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ec2ed92dcad5..e0f54b9be850 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2129,8 +2129,16 @@ void tcp_clear_retrans(struct tcp_sock *tp)
static inline void tcp_init_undo(struct tcp_sock *tp)
{
tp->undo_marker = tp->snd_una;
+
/* Retransmission still in flight may cause DSACKs later. */
- tp->undo_retrans = tp->retrans_out ? : -1;
+ /* First, account for regular retransmits in flight: */
+ tp->undo_retrans = tp->retrans_out;
+ /* Next, account for TLP retransmits in flight: */
+ if (tp->tlp_high_seq && tp->tlp_retrans)
+ tp->undo_retrans++;
+ /* Finally, avoid 0, because undo_retrans==0 means "can undo now": */
+ if (!tp->undo_retrans)
+ tp->undo_retrans = -1;
}
static bool tcp_is_rack(const struct sock *sk)
@@ -2209,6 +2217,7 @@ void tcp_enter_loss(struct sock *sk)
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
+ tp->tlp_high_seq = 0;
tcp_ecn_queue_cwr(tp);
/* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
@@ -3077,7 +3086,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
return;
if (tcp_try_undo_dsack(sk))
- tcp_try_keep_open(sk);
+ tcp_try_to_open(sk, flag);
tcp_identify_packet_loss(sk, ack_flag);
if (icsk->icsk_ca_state != TCP_CA_Recovery) {
@@ -4228,6 +4237,13 @@ void tcp_parse_options(const struct net *net,
*/
break;
#endif
+#ifdef CONFIG_TCP_AO
+ case TCPOPT_AO:
+ /* TCP AO has already been checked
+ * (see tcp_inbound_ao_hash()).
+ */
+ break;
+#endif
case TCPOPT_FASTOPEN:
tcp_parse_fastopen_option(
opsize - TCPOLEN_FASTOPEN_BASE,
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index e93df98de3f4..b01eb6d94413 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -619,6 +619,7 @@ static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] =
[TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, },
[TCP_METRICS_ATTR_ADDR_IPV6] = { .type = NLA_BINARY,
.len = sizeof(struct in6_addr), },
+ [TCP_METRICS_ATTR_SADDR_IPV4] = { .type = NLA_U32, },
/* Following attributes are not received for GET/DEL,
* we keep them for reference
*/
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index ab4b6de0e069..4d40615dc8fc 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -479,15 +479,26 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk,
const struct sk_buff *skb,
u32 rtx_delta)
{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
const struct tcp_sock *tp = tcp_sk(sk);
- const int timeout = TCP_RTO_MAX * 2;
+ int timeout = TCP_RTO_MAX * 2;
s32 rcv_delta;
+ if (user_timeout) {
+ /* If user application specified a TCP_USER_TIMEOUT,
+ * it does not want win 0 packets to 'reset the timer'
+ * while retransmits are not making progress.
+ */
+ if (rtx_delta > user_timeout)
+ return true;
+ timeout = min_t(u32, timeout, msecs_to_jiffies(user_timeout));
+ }
/* Note: timer interrupt might have been delayed by at least one jiffy,
* and tp->rcv_tstamp might very well have been written recently.
* rcv_delta can thus be negative.
*/
- rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp;
+ rcv_delta = icsk->icsk_timeout - tp->rcv_tstamp;
if (rcv_delta <= timeout)
return false;
@@ -532,8 +543,6 @@ void tcp_retransmit_timer(struct sock *sk)
if (WARN_ON_ONCE(!skb))
return;
- tp->tlp_high_seq = 0;
-
if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
!((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
/* Receiver dastardly shrinks window. Our retransmits
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d08bf16d476d..49c622e743e8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -326,6 +326,8 @@ found:
goto fail_unlock;
}
+ sock_set_flag(sk, SOCK_RCU_FREE);
+
sk_add_node_rcu(sk, &hslot->head);
hslot->count++;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -342,7 +344,7 @@ found:
hslot2->count++;
spin_unlock(&hslot2->lock);
}
- sock_set_flag(sk, SOCK_RCU_FREE);
+
error = 0;
fail_unlock:
spin_unlock_bh(&hslot->lock);
@@ -938,8 +940,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
kfree_skb(skb);
return -EINVAL;
}
- if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
- dst_xfrm(skb_dst(skb))) {
+ if (is_udplite || dst_xfrm(skb_dst(skb))) {
kfree_skb(skb);
return -EIO;
}
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 59448a2dbf2c..aa2e0a28ca61 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -357,6 +357,14 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
else
uh->check = gso_make_checksum(seg, ~check) ? : CSUM_MANGLED_0;
+ /* On the TX path, CHECKSUM_NONE and CHECKSUM_UNNECESSARY have the same
+ * meaning. However, check for bad offloads in the GSO stack expects the
+ * latter, if the checksum was calculated in software. To vouch for the
+ * segment skbs we actually need to set it on the gso_skb.
+ */
+ if (gso_skb->ip_summed == CHECKSUM_NONE)
+ gso_skb->ip_summed = CHECKSUM_UNNECESSARY;
+
/* update refcount for the packet */
if (copy_dtor) {
int delta = sum_truesize - gso_skb->truesize;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b56f0b9f4307..6602a2e9cdb5 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -46,7 +46,6 @@
#include <net/tcp_states.h>
#include <net/ip6_checksum.h>
#include <net/ip6_tunnel.h>
-#include <trace/events/udp.h>
#include <net/xfrm.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
@@ -1257,8 +1256,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
kfree_skb(skb);
return -EINVAL;
}
- if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
- dst_xfrm(skb_dst(skb))) {
+ if (is_udplite || dst_xfrm(skb_dst(skb))) {
kfree_skb(skb);
return -EIO;
}
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 64f446f0930b..29dfbd70c79c 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -60,7 +60,6 @@
#include <linux/atomic.h>
#include "l2tp_core.h"
-#include "trace.h"
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -1290,17 +1289,20 @@ static void l2tp_session_unhash(struct l2tp_session *session)
static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
{
struct l2tp_session *session;
- struct list_head *pos;
- struct list_head *tmp;
spin_lock_bh(&tunnel->list_lock);
tunnel->acpt_newsess = false;
- list_for_each_safe(pos, tmp, &tunnel->session_list) {
- session = list_entry(pos, struct l2tp_session, list);
+ for (;;) {
+ session = list_first_entry_or_null(&tunnel->session_list,
+ struct l2tp_session, list);
+ if (!session)
+ break;
+ l2tp_session_inc_refcount(session);
list_del_init(&session->list);
spin_unlock_bh(&tunnel->list_lock);
l2tp_session_delete(session);
spin_lock_bh(&tunnel->list_lock);
+ l2tp_session_dec_refcount(session);
}
spin_unlock_bh(&tunnel->list_lock);
}
diff --git a/net/mac802154/main.c b/net/mac802154/main.c
index 9ab7396668d2..21b7c3b280b4 100644
--- a/net/mac802154/main.c
+++ b/net/mac802154/main.c
@@ -161,8 +161,10 @@ void ieee802154_configure_durations(struct wpan_phy *phy,
}
phy->symbol_duration = duration;
- phy->lifs_period = (IEEE802154_LIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC;
- phy->sifs_period = (IEEE802154_SIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC;
+ phy->lifs_period =
+ (IEEE802154_LIFS_PERIOD * phy->symbol_duration) / NSEC_PER_USEC;
+ phy->sifs_period =
+ (IEEE802154_SIFS_PERIOD * phy->symbol_duration) / NSEC_PER_USEC;
}
EXPORT_SYMBOL(ieee802154_configure_durations);
@@ -184,10 +186,10 @@ static void ieee802154_setup_wpan_phy_pib(struct wpan_phy *wpan_phy)
* Should be done when all drivers sets this value.
*/
- wpan_phy->lifs_period =
- (IEEE802154_LIFS_PERIOD * wpan_phy->symbol_duration) / 1000;
- wpan_phy->sifs_period =
- (IEEE802154_SIFS_PERIOD * wpan_phy->symbol_duration) / 1000;
+ wpan_phy->lifs_period = (IEEE802154_LIFS_PERIOD *
+ wpan_phy->symbol_duration) / NSEC_PER_USEC;
+ wpan_phy->sifs_period = (IEEE802154_SIFS_PERIOD *
+ wpan_phy->symbol_duration) / NSEC_PER_USEC;
}
int ieee802154_register_hw(struct ieee802154_hw *hw)
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index 2a6f1ed763c9..6fbed5bb5c3e 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -34,8 +34,8 @@ void ieee802154_xmit_sync_worker(struct work_struct *work)
if (res)
goto err_tx;
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += skb->len;
+ DEV_STATS_INC(dev, tx_packets);
+ DEV_STATS_ADD(dev, tx_bytes, skb->len);
ieee802154_xmit_complete(&local->hw, skb, false);
@@ -90,8 +90,8 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb)
if (ret)
goto err_wake_netif_queue;
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += len;
+ DEV_STATS_INC(dev, tx_packets);
+ DEV_STATS_ADD(dev, tx_bytes, len);
} else {
local->tx_skb = skb;
queue_work(local->workqueue, &local->sync_tx_work);
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 614815a3ed73..f0aa4d7ef499 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -142,8 +142,13 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
# flow table infrastructure
obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \
- nf_flow_table_offload.o
+ nf_flow_table_offload.o nf_flow_table_xdp.o
nf_flow_table-$(CONFIG_NF_FLOW_TABLE_PROCFS) += nf_flow_table_procfs.o
+ifeq ($(CONFIG_NF_FLOW_TABLE),m)
+nf_flow_table-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_flow_table_bpf.o
+else ifeq ($(CONFIG_NF_FLOW_TABLE),y)
+nf_flow_table-$(CONFIG_DEBUG_INFO_BTF) += nf_flow_table_bpf.o
+endif
obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 1e689c714127..83e452916403 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -126,7 +126,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
if (sctph->source != cp->vport || payload_csum ||
skb->ip_summed == CHECKSUM_PARTIAL) {
sctph->source = cp->vport;
- if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb))
+ if (!skb_is_gso(skb))
sctp_nat_csum(skb, sctph, sctphoff);
} else {
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -175,7 +175,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
(skb->ip_summed == CHECKSUM_PARTIAL &&
!(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) {
sctph->dest = cp->dport;
- if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb))
+ if (!skb_is_gso(skb))
sctp_nat_csum(skb, sctph, sctphoff);
} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 8715617b02fe..34ba14e59e95 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -321,7 +321,6 @@ insert_tree(struct net *net,
struct nf_conncount_rb *rbconn;
struct nf_conncount_tuple *conn;
unsigned int count = 0, gc_count = 0;
- u8 keylen = data->keylen;
bool do_gc = true;
spin_lock_bh(&nf_conncount_locks[hash]);
@@ -333,7 +332,7 @@ restart:
rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node);
parent = *rbnode;
- diff = key_diff(key, rbconn->key, keylen);
+ diff = key_diff(key, rbconn->key, data->keylen);
if (diff < 0) {
rbnode = &((*rbnode)->rb_left);
} else if (diff > 0) {
@@ -378,7 +377,7 @@ restart:
conn->tuple = *tuple;
conn->zone = *zone;
- memcpy(rbconn->key, key, sizeof(u32) * keylen);
+ memcpy(rbconn->key, key, sizeof(u32) * data->keylen);
nf_conncount_list_init(&rbconn->list);
list_add(&conn->node, &rbconn->list.head);
@@ -403,7 +402,6 @@ count_tree(struct net *net,
struct rb_node *parent;
struct nf_conncount_rb *rbconn;
unsigned int hash;
- u8 keylen = data->keylen;
hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS;
root = &data->root[hash];
@@ -414,7 +412,7 @@ count_tree(struct net *net,
rbconn = rb_entry(parent, struct nf_conncount_rb, node);
- diff = key_diff(key, rbconn->key, keylen);
+ diff = key_diff(key, rbconn->key, data->keylen);
if (diff < 0) {
parent = rcu_dereference_raw(parent->rb_left);
} else if (diff > 0) {
diff --git a/net/netfilter/nf_flow_table_bpf.c b/net/netfilter/nf_flow_table_bpf.c
new file mode 100644
index 000000000000..4a5f5195f2d2
--- /dev/null
+++ b/net/netfilter/nf_flow_table_bpf.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable Flow Table Helpers for XDP hook
+ *
+ * These are called from the XDP programs.
+ * Note that it is allowed to break compatibility for these functions since
+ * the interface they are exposed through to BPF programs is explicitly
+ * unstable.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <net/xdp.h>
+
+/* bpf_flowtable_opts - options for bpf flowtable helpers
+ * @error: out parameter, set for any encountered error
+ */
+struct bpf_flowtable_opts {
+ s32 error;
+};
+
+enum {
+ NF_BPF_FLOWTABLE_OPTS_SZ = 4,
+};
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in nf_flow_table BTF");
+
+__bpf_kfunc_start_defs();
+
+static struct flow_offload_tuple_rhash *
+bpf_xdp_flow_tuple_lookup(struct net_device *dev,
+ struct flow_offload_tuple *tuple, __be16 proto)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *nf_flow_table;
+ struct flow_offload *nf_flow;
+
+ nf_flow_table = nf_flowtable_by_dev(dev);
+ if (!nf_flow_table)
+ return ERR_PTR(-ENOENT);
+
+ tuplehash = flow_offload_lookup(nf_flow_table, tuple);
+ if (!tuplehash)
+ return ERR_PTR(-ENOENT);
+
+ nf_flow = container_of(tuplehash, struct flow_offload,
+ tuplehash[tuplehash->tuple.dir]);
+ flow_offload_refresh(nf_flow_table, nf_flow, false);
+
+ return tuplehash;
+}
+
+__bpf_kfunc struct flow_offload_tuple_rhash *
+bpf_xdp_flow_lookup(struct xdp_md *ctx, struct bpf_fib_lookup *fib_tuple,
+ struct bpf_flowtable_opts *opts, u32 opts_len)
+{
+ struct xdp_buff *xdp = (struct xdp_buff *)ctx;
+ struct flow_offload_tuple tuple = {
+ .iifidx = fib_tuple->ifindex,
+ .l3proto = fib_tuple->family,
+ .l4proto = fib_tuple->l4_protocol,
+ .src_port = fib_tuple->sport,
+ .dst_port = fib_tuple->dport,
+ };
+ struct flow_offload_tuple_rhash *tuplehash;
+ __be16 proto;
+
+ if (opts_len != NF_BPF_FLOWTABLE_OPTS_SZ) {
+ opts->error = -EINVAL;
+ return NULL;
+ }
+
+ switch (fib_tuple->family) {
+ case AF_INET:
+ tuple.src_v4.s_addr = fib_tuple->ipv4_src;
+ tuple.dst_v4.s_addr = fib_tuple->ipv4_dst;
+ proto = htons(ETH_P_IP);
+ break;
+ case AF_INET6:
+ tuple.src_v6 = *(struct in6_addr *)&fib_tuple->ipv6_src;
+ tuple.dst_v6 = *(struct in6_addr *)&fib_tuple->ipv6_dst;
+ proto = htons(ETH_P_IPV6);
+ break;
+ default:
+ opts->error = -EAFNOSUPPORT;
+ return NULL;
+ }
+
+ tuplehash = bpf_xdp_flow_tuple_lookup(xdp->rxq->dev, &tuple, proto);
+ if (IS_ERR(tuplehash)) {
+ opts->error = PTR_ERR(tuplehash);
+ return NULL;
+ }
+
+ return tuplehash;
+}
+
+__diag_pop()
+
+__bpf_kfunc_end_defs();
+
+BTF_KFUNCS_START(nf_ft_kfunc_set)
+BTF_ID_FLAGS(func, bpf_xdp_flow_lookup, KF_TRUSTED_ARGS | KF_RET_NULL)
+BTF_KFUNCS_END(nf_ft_kfunc_set)
+
+static const struct btf_kfunc_id_set nf_flow_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &nf_ft_kfunc_set,
+};
+
+int nf_flow_register_bpf(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP,
+ &nf_flow_kfunc_set);
+}
+EXPORT_SYMBOL_GPL(nf_flow_register_bpf);
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 6eef15648b7b..88787b45e30d 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -98,7 +98,7 @@ static int __init nf_flow_inet_module_init(void)
nft_register_flowtable_type(&flowtable_ipv6);
nft_register_flowtable_type(&flowtable_inet);
- return 0;
+ return nf_flow_register_bpf();
}
static void __exit nf_flow_inet_module_exit(void)
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index a010b25076ca..ff1a4e36c2b5 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -1192,7 +1192,7 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
int err;
if (!nf_flowtable_hw_offload(flowtable))
- return 0;
+ return nf_flow_offload_xdp_setup(flowtable, dev, cmd);
if (dev->netdev_ops->ndo_setup_tc)
err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
diff --git a/net/netfilter/nf_flow_table_xdp.c b/net/netfilter/nf_flow_table_xdp.c
new file mode 100644
index 000000000000..e1252d042699
--- /dev/null
+++ b/net/netfilter/nf_flow_table_xdp.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/netdevice.h>
+#include <net/flow_offload.h>
+#include <net/netfilter/nf_flow_table.h>
+
+struct flow_offload_xdp_ft {
+ struct list_head head;
+ struct nf_flowtable *ft;
+ struct rcu_head rcuhead;
+};
+
+struct flow_offload_xdp {
+ struct hlist_node hnode;
+ unsigned long net_device_addr;
+ struct list_head head;
+};
+
+#define NF_XDP_HT_BITS 4
+static DEFINE_HASHTABLE(nf_xdp_hashtable, NF_XDP_HT_BITS);
+static DEFINE_MUTEX(nf_xdp_hashtable_lock);
+
+/* caller must hold rcu read lock */
+struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev)
+{
+ unsigned long key = (unsigned long)dev;
+ struct flow_offload_xdp *iter;
+
+ hash_for_each_possible_rcu(nf_xdp_hashtable, iter, hnode, key) {
+ if (key == iter->net_device_addr) {
+ struct flow_offload_xdp_ft *ft_elem;
+
+ /* The user is supposed to insert a given net_device
+ * just into a single nf_flowtable so we always return
+ * the first element here.
+ */
+ ft_elem = list_first_or_null_rcu(&iter->head,
+ struct flow_offload_xdp_ft,
+ head);
+ return ft_elem ? ft_elem->ft : NULL;
+ }
+ }
+
+ return NULL;
+}
+
+static int nf_flowtable_by_dev_insert(struct nf_flowtable *ft,
+ const struct net_device *dev)
+{
+ struct flow_offload_xdp *iter, *elem = NULL;
+ unsigned long key = (unsigned long)dev;
+ struct flow_offload_xdp_ft *ft_elem;
+
+ ft_elem = kzalloc(sizeof(*ft_elem), GFP_KERNEL_ACCOUNT);
+ if (!ft_elem)
+ return -ENOMEM;
+
+ ft_elem->ft = ft;
+
+ mutex_lock(&nf_xdp_hashtable_lock);
+
+ hash_for_each_possible(nf_xdp_hashtable, iter, hnode, key) {
+ if (key == iter->net_device_addr) {
+ elem = iter;
+ break;
+ }
+ }
+
+ if (!elem) {
+ elem = kzalloc(sizeof(*elem), GFP_KERNEL_ACCOUNT);
+ if (!elem)
+ goto err_unlock;
+
+ elem->net_device_addr = key;
+ INIT_LIST_HEAD(&elem->head);
+ hash_add_rcu(nf_xdp_hashtable, &elem->hnode, key);
+ }
+ list_add_tail_rcu(&ft_elem->head, &elem->head);
+
+ mutex_unlock(&nf_xdp_hashtable_lock);
+
+ return 0;
+
+err_unlock:
+ mutex_unlock(&nf_xdp_hashtable_lock);
+ kfree(ft_elem);
+
+ return -ENOMEM;
+}
+
+static void nf_flowtable_by_dev_remove(struct nf_flowtable *ft,
+ const struct net_device *dev)
+{
+ struct flow_offload_xdp *iter, *elem = NULL;
+ unsigned long key = (unsigned long)dev;
+
+ mutex_lock(&nf_xdp_hashtable_lock);
+
+ hash_for_each_possible(nf_xdp_hashtable, iter, hnode, key) {
+ if (key == iter->net_device_addr) {
+ elem = iter;
+ break;
+ }
+ }
+
+ if (elem) {
+ struct flow_offload_xdp_ft *ft_elem, *ft_next;
+
+ list_for_each_entry_safe(ft_elem, ft_next, &elem->head, head) {
+ if (ft_elem->ft == ft) {
+ list_del_rcu(&ft_elem->head);
+ kfree_rcu(ft_elem, rcuhead);
+ }
+ }
+
+ if (list_empty(&elem->head))
+ hash_del_rcu(&elem->hnode);
+ else
+ elem = NULL;
+ }
+
+ mutex_unlock(&nf_xdp_hashtable_lock);
+
+ if (elem) {
+ synchronize_rcu();
+ kfree(elem);
+ }
+}
+
+int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable,
+ struct net_device *dev,
+ enum flow_block_command cmd)
+{
+ switch (cmd) {
+ case FLOW_BLOCK_BIND:
+ return nf_flowtable_by_dev_insert(flowtable, dev);
+ case FLOW_BLOCK_UNBIND:
+ nf_flowtable_by_dev_remove(flowtable, dev);
+ return 0;
+ }
+
+ WARN_ON_ONCE(1);
+ return 0;
+}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index e8dcf41d360d..481ee78e77bc 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -153,14 +153,18 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
{
struct nft_trans *trans;
- trans = kzalloc(sizeof(struct nft_trans) + size, gfp);
+ trans = kzalloc(size, gfp);
if (trans == NULL)
return NULL;
INIT_LIST_HEAD(&trans->list);
- INIT_LIST_HEAD(&trans->binding_list);
trans->msg_type = msg_type;
- trans->ctx = *ctx;
+
+ trans->net = ctx->net;
+ trans->table = ctx->table;
+ trans->seq = ctx->seq;
+ trans->flags = ctx->flags;
+ trans->report = ctx->report;
return trans;
}
@@ -171,10 +175,26 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL);
}
+static struct nft_trans_binding *nft_trans_get_binding(struct nft_trans *trans)
+{
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWCHAIN:
+ case NFT_MSG_NEWSET:
+ return container_of(trans, struct nft_trans_binding, nft_trans);
+ }
+
+ return NULL;
+}
+
static void nft_trans_list_del(struct nft_trans *trans)
{
+ struct nft_trans_binding *trans_binding;
+
list_del(&trans->list);
- list_del(&trans->binding_list);
+
+ trans_binding = nft_trans_get_binding(trans);
+ if (trans_binding)
+ list_del(&trans_binding->binding_list);
}
static void nft_trans_destroy(struct nft_trans *trans)
@@ -236,7 +256,7 @@ static void __nft_chain_trans_bind(const struct nft_ctx *ctx,
nft_trans_chain_bound(trans) = bind;
break;
case NFT_MSG_NEWRULE:
- if (trans->ctx.chain == chain)
+ if (nft_trans_rule_chain(trans) == chain)
nft_trans_rule_bound(trans) = bind;
break;
}
@@ -372,21 +392,26 @@ static void nf_tables_unregister_hook(struct net *net,
static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans)
{
struct nftables_pernet *nft_net = nft_pernet(net);
+ struct nft_trans_binding *binding;
+
+ list_add_tail(&trans->list, &nft_net->commit_list);
+
+ binding = nft_trans_get_binding(trans);
+ if (!binding)
+ return;
switch (trans->msg_type) {
case NFT_MSG_NEWSET:
if (!nft_trans_set_update(trans) &&
nft_set_is_anonymous(nft_trans_set(trans)))
- list_add_tail(&trans->binding_list, &nft_net->binding_list);
+ list_add_tail(&binding->binding_list, &nft_net->binding_list);
break;
case NFT_MSG_NEWCHAIN:
if (!nft_trans_chain_update(trans) &&
nft_chain_binding(nft_trans_chain(trans)))
- list_add_tail(&trans->binding_list, &nft_net->binding_list);
+ list_add_tail(&binding->binding_list, &nft_net->binding_list);
break;
}
-
- list_add_tail(&trans->list, &nft_net->commit_list);
}
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
@@ -416,11 +441,28 @@ static int nft_deltable(struct nft_ctx *ctx)
return err;
}
-static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
+static struct nft_trans *
+nft_trans_alloc_chain(const struct nft_ctx *ctx, int msg_type)
{
+ struct nft_trans_chain *trans_chain;
struct nft_trans *trans;
trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain));
+ if (!trans)
+ return NULL;
+
+ trans_chain = nft_trans_container_chain(trans);
+ INIT_LIST_HEAD(&trans_chain->nft_trans_binding.binding_list);
+ trans_chain->chain = ctx->chain;
+
+ return trans;
+}
+
+static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc_chain(ctx, msg_type);
if (trans == NULL)
return ERR_PTR(-ENOMEM);
@@ -432,7 +474,6 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID]));
}
}
- nft_trans_chain(trans) = ctx->chain;
nft_trans_commit_list_add_tail(ctx->net, trans);
return trans;
@@ -505,6 +546,7 @@ static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID]));
}
nft_trans_rule(trans) = rule;
+ nft_trans_rule_chain(trans) = ctx->chain;
nft_trans_commit_list_add_tail(ctx->net, trans);
return trans;
@@ -560,12 +602,16 @@ static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
struct nft_set *set,
const struct nft_set_desc *desc)
{
+ struct nft_trans_set *trans_set;
struct nft_trans *trans;
trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set));
if (trans == NULL)
return -ENOMEM;
+ trans_set = nft_trans_container_set(trans);
+ INIT_LIST_HEAD(&trans_set->nft_trans_binding.binding_list);
+
if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) {
nft_trans_set_id(trans) =
ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
@@ -1217,11 +1263,11 @@ static bool nft_table_pending_update(const struct nft_ctx *ctx)
return true;
list_for_each_entry(trans, &nft_net->commit_list, list) {
- if (trans->ctx.table == ctx->table &&
+ if (trans->table == ctx->table &&
((trans->msg_type == NFT_MSG_NEWCHAIN &&
nft_trans_chain_update(trans)) ||
(trans->msg_type == NFT_MSG_DELCHAIN &&
- nft_is_base_chain(trans->ctx.chain))))
+ nft_is_base_chain(nft_trans_chain(trans)))))
return true;
}
@@ -1615,15 +1661,15 @@ static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info,
return nft_flush_table(&ctx);
}
-static void nf_tables_table_destroy(struct nft_ctx *ctx)
+static void nf_tables_table_destroy(struct nft_table *table)
{
- if (WARN_ON(ctx->table->use > 0))
+ if (WARN_ON(table->use > 0))
return;
- rhltable_destroy(&ctx->table->chains_ht);
- kfree(ctx->table->name);
- kfree(ctx->table->udata);
- kfree(ctx->table);
+ rhltable_destroy(&table->chains_ht);
+ kfree(table->name);
+ kfree(table->udata);
+ kfree(table);
}
void nft_register_chain_type(const struct nft_chain_type *ctype)
@@ -2049,18 +2095,19 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
return newstats;
}
-static void nft_chain_stats_replace(struct nft_trans *trans)
+static void nft_chain_stats_replace(struct nft_trans_chain *trans)
{
- struct nft_base_chain *chain = nft_base_chain(trans->ctx.chain);
+ const struct nft_trans *t = &trans->nft_trans_binding.nft_trans;
+ struct nft_base_chain *chain = nft_base_chain(trans->chain);
- if (!nft_trans_chain_stats(trans))
+ if (!trans->stats)
return;
- nft_trans_chain_stats(trans) =
- rcu_replace_pointer(chain->stats, nft_trans_chain_stats(trans),
- lockdep_commit_lock_is_held(trans->ctx.net));
+ trans->stats =
+ rcu_replace_pointer(chain->stats, trans->stats,
+ lockdep_commit_lock_is_held(t->net));
- if (!nft_trans_chain_stats(trans))
+ if (!trans->stats)
static_branch_inc(&nft_counters_enabled);
}
@@ -2078,9 +2125,9 @@ static void nf_tables_chain_free_chain_rules(struct nft_chain *chain)
kvfree(chain->blob_next);
}
-void nf_tables_chain_destroy(struct nft_ctx *ctx)
+void nf_tables_chain_destroy(struct nft_chain *chain)
{
- struct nft_chain *chain = ctx->chain;
+ const struct nft_table *table = chain->table;
struct nft_hook *hook, *next;
if (WARN_ON(chain->use > 0))
@@ -2092,7 +2139,7 @@ void nf_tables_chain_destroy(struct nft_ctx *ctx)
if (nft_is_base_chain(chain)) {
struct nft_base_chain *basechain = nft_base_chain(chain);
- if (nft_base_chain_netdev(ctx->family, basechain->ops.hooknum)) {
+ if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) {
list_for_each_entry_safe(hook, next,
&basechain->hook_list, list) {
list_del_rcu(&hook->list);
@@ -2581,7 +2628,7 @@ err_chain_add:
err_trans:
nft_use_dec_restore(&table->use);
err_destroy_chain:
- nf_tables_chain_destroy(ctx);
+ nf_tables_chain_destroy(chain);
return err;
}
@@ -2698,8 +2745,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
}
err = -ENOMEM;
- trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN,
- sizeof(struct nft_trans_chain));
+ trans = nft_trans_alloc_chain(ctx, NFT_MSG_NEWCHAIN);
if (trans == NULL)
goto err_trans;
@@ -2725,7 +2771,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
err = -EEXIST;
list_for_each_entry(tmp, &nft_net->commit_list, list) {
if (tmp->msg_type == NFT_MSG_NEWCHAIN &&
- tmp->ctx.table == table &&
+ tmp->table == table &&
nft_trans_chain_update(tmp) &&
nft_trans_chain_name(tmp) &&
strcmp(name, nft_trans_chain_name(tmp)) == 0) {
@@ -2774,13 +2820,11 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
struct nft_trans *trans;
list_for_each_entry(trans, &nft_net->commit_list, list) {
- struct nft_chain *chain = trans->ctx.chain;
-
if (trans->msg_type == NFT_MSG_NEWCHAIN &&
- chain->table == table &&
+ nft_trans_chain(trans)->table == table &&
id == nft_trans_chain_id(trans) &&
- nft_active_genmask(chain, genmask))
- return chain;
+ nft_active_genmask(nft_trans_chain(trans), genmask))
+ return nft_trans_chain(trans);
}
return ERR_PTR(-ENOENT);
}
@@ -2915,8 +2959,7 @@ static int nft_delchain_hook(struct nft_ctx *ctx,
list_move(&hook->list, &chain_del_list);
}
- trans = nft_trans_alloc(ctx, NFT_MSG_DELCHAIN,
- sizeof(struct nft_trans_chain));
+ trans = nft_trans_alloc_chain(ctx, NFT_MSG_DELCHAIN);
if (!trans) {
err = -ENOMEM;
goto err_chain_del_hook;
@@ -3823,6 +3866,15 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
nf_tables_rule_destroy(ctx, rule);
}
+/** nft_chain_validate - loop detection and hook validation
+ *
+ * @ctx: context containing call depth and base chain
+ * @chain: chain to validate
+ *
+ * Walk through the rules of the given chain and chase all jumps/gotos
+ * and set lookups until either the jump limit is hit or all reachable
+ * chains have been validated.
+ */
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
{
struct nft_expr *expr, *last;
@@ -3844,6 +3896,9 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
if (!expr->ops->validate)
continue;
+ /* This may call nft_chain_validate() recursively,
+ * callers that do so must increment ctx->level.
+ */
err = expr->ops->validate(ctx, expr, &data);
if (err < 0)
return err;
@@ -4188,7 +4243,7 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
list_for_each_entry(trans, &nft_net->commit_list, list) {
if (trans->msg_type == NFT_MSG_NEWRULE &&
- trans->ctx.chain == chain &&
+ nft_trans_rule_chain(trans) == chain &&
id == nft_trans_rule_id(trans))
return nft_trans_rule(trans);
}
@@ -9417,51 +9472,53 @@ static int nf_tables_validate(struct net *net)
*
* We defer the drop policy until the transaction has been finalized.
*/
-static void nft_chain_commit_drop_policy(struct nft_trans *trans)
+static void nft_chain_commit_drop_policy(struct nft_trans_chain *trans)
{
struct nft_base_chain *basechain;
- if (nft_trans_chain_policy(trans) != NF_DROP)
+ if (trans->policy != NF_DROP)
return;
- if (!nft_is_base_chain(trans->ctx.chain))
+ if (!nft_is_base_chain(trans->chain))
return;
- basechain = nft_base_chain(trans->ctx.chain);
+ basechain = nft_base_chain(trans->chain);
basechain->policy = NF_DROP;
}
-static void nft_chain_commit_update(struct nft_trans *trans)
+static void nft_chain_commit_update(struct nft_trans_chain *trans)
{
+ struct nft_table *table = trans->nft_trans_binding.nft_trans.table;
struct nft_base_chain *basechain;
- if (nft_trans_chain_name(trans)) {
- rhltable_remove(&trans->ctx.table->chains_ht,
- &trans->ctx.chain->rhlhead,
+ if (trans->name) {
+ rhltable_remove(&table->chains_ht,
+ &trans->chain->rhlhead,
nft_chain_ht_params);
- swap(trans->ctx.chain->name, nft_trans_chain_name(trans));
- rhltable_insert_key(&trans->ctx.table->chains_ht,
- trans->ctx.chain->name,
- &trans->ctx.chain->rhlhead,
+ swap(trans->chain->name, trans->name);
+ rhltable_insert_key(&table->chains_ht,
+ trans->chain->name,
+ &trans->chain->rhlhead,
nft_chain_ht_params);
}
- if (!nft_is_base_chain(trans->ctx.chain))
+ if (!nft_is_base_chain(trans->chain))
return;
nft_chain_stats_replace(trans);
- basechain = nft_base_chain(trans->ctx.chain);
+ basechain = nft_base_chain(trans->chain);
- switch (nft_trans_chain_policy(trans)) {
+ switch (trans->policy) {
case NF_DROP:
case NF_ACCEPT:
- basechain->policy = nft_trans_chain_policy(trans);
+ basechain->policy = trans->policy;
break;
}
}
-static void nft_obj_commit_update(struct nft_trans *trans)
+static void nft_obj_commit_update(const struct nft_ctx *ctx,
+ struct nft_trans *trans)
{
struct nft_object *newobj;
struct nft_object *obj;
@@ -9473,15 +9530,21 @@ static void nft_obj_commit_update(struct nft_trans *trans)
return;
obj->ops->update(obj, newobj);
- nft_obj_destroy(&trans->ctx, newobj);
+ nft_obj_destroy(ctx, newobj);
}
static void nft_commit_release(struct nft_trans *trans)
{
+ struct nft_ctx ctx = {
+ .net = trans->net,
+ };
+
+ nft_ctx_update(&ctx, trans);
+
switch (trans->msg_type) {
case NFT_MSG_DELTABLE:
case NFT_MSG_DESTROYTABLE:
- nf_tables_table_destroy(&trans->ctx);
+ nf_tables_table_destroy(trans->table);
break;
case NFT_MSG_NEWCHAIN:
free_percpu(nft_trans_chain_stats(trans));
@@ -9492,25 +9555,25 @@ static void nft_commit_release(struct nft_trans *trans)
if (nft_trans_chain_update(trans))
nft_hooks_destroy(&nft_trans_chain_hooks(trans));
else
- nf_tables_chain_destroy(&trans->ctx);
+ nf_tables_chain_destroy(nft_trans_chain(trans));
break;
case NFT_MSG_DELRULE:
case NFT_MSG_DESTROYRULE:
- nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+ nf_tables_rule_destroy(&ctx, nft_trans_rule(trans));
break;
case NFT_MSG_DELSET:
case NFT_MSG_DESTROYSET:
- nft_set_destroy(&trans->ctx, nft_trans_set(trans));
+ nft_set_destroy(&ctx, nft_trans_set(trans));
break;
case NFT_MSG_DELSETELEM:
case NFT_MSG_DESTROYSETELEM:
- nf_tables_set_elem_destroy(&trans->ctx,
+ nf_tables_set_elem_destroy(&ctx,
nft_trans_elem_set(trans),
nft_trans_elem_priv(trans));
break;
case NFT_MSG_DELOBJ:
case NFT_MSG_DESTROYOBJ:
- nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
+ nft_obj_destroy(&ctx, nft_trans_obj(trans));
break;
case NFT_MSG_DELFLOWTABLE:
case NFT_MSG_DESTROYFLOWTABLE:
@@ -9522,7 +9585,7 @@ static void nft_commit_release(struct nft_trans *trans)
}
if (trans->put_net)
- put_net(trans->ctx.net);
+ put_net(trans->net);
kfree(trans);
}
@@ -9641,10 +9704,10 @@ static void nf_tables_commit_chain_prepare_cancel(struct net *net)
struct nft_trans *trans, *next;
list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) {
- struct nft_chain *chain = trans->ctx.chain;
-
if (trans->msg_type == NFT_MSG_NEWRULE ||
trans->msg_type == NFT_MSG_DELRULE) {
+ struct nft_chain *chain = nft_trans_rule_chain(trans);
+
kvfree(chain->blob_next);
chain->blob_next = NULL;
}
@@ -10002,7 +10065,7 @@ static void nf_tables_commit_release(struct net *net)
trans = list_last_entry(&nft_net->commit_list,
struct nft_trans, list);
- get_net(trans->ctx.net);
+ get_net(trans->net);
WARN_ON_ONCE(trans->put_net);
trans->put_net = true;
@@ -10146,12 +10209,15 @@ static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq)
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nftables_pernet *nft_net = nft_pernet(net);
+ const struct nlmsghdr *nlh = nlmsg_hdr(skb);
+ struct nft_trans_binding *trans_binding;
struct nft_trans *trans, *next;
unsigned int base_seq, gc_seq;
LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
struct nft_chain *chain;
struct nft_table *table;
+ struct nft_ctx ctx;
LIST_HEAD(adl);
int err;
@@ -10160,7 +10226,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
return 0;
}
- list_for_each_entry(trans, &nft_net->binding_list, binding_list) {
+ nft_ctx_init(&ctx, net, skb, nlh, NFPROTO_UNSPEC, NULL, NULL, NULL);
+
+ list_for_each_entry(trans_binding, &nft_net->binding_list, binding_list) {
+ trans = &trans_binding->nft_trans;
switch (trans->msg_type) {
case NFT_MSG_NEWSET:
if (!nft_trans_set_update(trans) &&
@@ -10178,6 +10247,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
return -EINVAL;
}
break;
+ default:
+ WARN_ONCE(1, "Unhandled bind type %d", trans->msg_type);
+ break;
}
}
@@ -10193,9 +10265,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
/* 1. Allocate space for next generation rules_gen_X[] */
list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) {
+ struct nft_table *table = trans->table;
int ret;
- ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table);
+ ret = nf_tables_commit_audit_alloc(&adl, table);
if (ret) {
nf_tables_commit_chain_prepare_cancel(net);
nf_tables_commit_audit_free(&adl);
@@ -10203,7 +10276,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
}
if (trans->msg_type == NFT_MSG_NEWRULE ||
trans->msg_type == NFT_MSG_DELRULE) {
- chain = trans->ctx.chain;
+ chain = nft_trans_rule_chain(trans);
ret = nf_tables_commit_chain_prepare(net, chain);
if (ret < 0) {
@@ -10236,70 +10309,71 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
net->nft.gencursor = nft_gencursor_next(net);
list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) {
- nf_tables_commit_audit_collect(&adl, trans->ctx.table,
- trans->msg_type);
+ struct nft_table *table = trans->table;
+
+ nft_ctx_update(&ctx, trans);
+
+ nf_tables_commit_audit_collect(&adl, table, trans->msg_type);
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
- if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) {
+ if (!(table->flags & __NFT_TABLE_F_UPDATE)) {
nft_trans_destroy(trans);
break;
}
- if (trans->ctx.table->flags & NFT_TABLE_F_DORMANT)
- nf_tables_table_disable(net, trans->ctx.table);
+ if (table->flags & NFT_TABLE_F_DORMANT)
+ nf_tables_table_disable(net, table);
- trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE;
+ table->flags &= ~__NFT_TABLE_F_UPDATE;
} else {
- nft_clear(net, trans->ctx.table);
+ nft_clear(net, table);
}
- nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE);
+ nf_tables_table_notify(&ctx, NFT_MSG_NEWTABLE);
nft_trans_destroy(trans);
break;
case NFT_MSG_DELTABLE:
case NFT_MSG_DESTROYTABLE:
- list_del_rcu(&trans->ctx.table->list);
- nf_tables_table_notify(&trans->ctx, trans->msg_type);
+ list_del_rcu(&table->list);
+ nf_tables_table_notify(&ctx, trans->msg_type);
break;
case NFT_MSG_NEWCHAIN:
if (nft_trans_chain_update(trans)) {
- nft_chain_commit_update(trans);
- nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN,
+ nft_chain_commit_update(nft_trans_container_chain(trans));
+ nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN,
&nft_trans_chain_hooks(trans));
list_splice(&nft_trans_chain_hooks(trans),
&nft_trans_basechain(trans)->hook_list);
/* trans destroyed after rcu grace period */
} else {
- nft_chain_commit_drop_policy(trans);
- nft_clear(net, trans->ctx.chain);
- nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN, NULL);
+ nft_chain_commit_drop_policy(nft_trans_container_chain(trans));
+ nft_clear(net, nft_trans_chain(trans));
+ nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, NULL);
nft_trans_destroy(trans);
}
break;
case NFT_MSG_DELCHAIN:
case NFT_MSG_DESTROYCHAIN:
if (nft_trans_chain_update(trans)) {
- nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN,
+ nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN,
&nft_trans_chain_hooks(trans));
- if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) {
+ if (!(table->flags & NFT_TABLE_F_DORMANT)) {
nft_netdev_unregister_hooks(net,
&nft_trans_chain_hooks(trans),
true);
}
} else {
- nft_chain_del(trans->ctx.chain);
- nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN,
+ nft_chain_del(nft_trans_chain(trans));
+ nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN,
NULL);
- nf_tables_unregister_hook(trans->ctx.net,
- trans->ctx.table,
- trans->ctx.chain);
+ nf_tables_unregister_hook(ctx.net, ctx.table,
+ nft_trans_chain(trans));
}
break;
case NFT_MSG_NEWRULE:
- nft_clear(trans->ctx.net, nft_trans_rule(trans));
- nf_tables_rule_notify(&trans->ctx,
- nft_trans_rule(trans),
+ nft_clear(net, nft_trans_rule(trans));
+ nf_tables_rule_notify(&ctx, nft_trans_rule(trans),
NFT_MSG_NEWRULE);
- if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)
nft_flow_rule_destroy(nft_trans_flow_rule(trans));
nft_trans_destroy(trans);
@@ -10307,14 +10381,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_DELRULE:
case NFT_MSG_DESTROYRULE:
list_del_rcu(&nft_trans_rule(trans)->list);
- nf_tables_rule_notify(&trans->ctx,
- nft_trans_rule(trans),
+ nf_tables_rule_notify(&ctx, nft_trans_rule(trans),
trans->msg_type);
- nft_rule_expr_deactivate(&trans->ctx,
- nft_trans_rule(trans),
+ nft_rule_expr_deactivate(&ctx, nft_trans_rule(trans),
NFT_TRANS_COMMIT);
- if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)
nft_flow_rule_destroy(nft_trans_flow_rule(trans));
break;
case NFT_MSG_NEWSET:
@@ -10333,9 +10405,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
*/
if (nft_set_is_anonymous(nft_trans_set(trans)) &&
!list_empty(&nft_trans_set(trans)->bindings))
- nft_use_dec(&trans->ctx.table->use);
+ nft_use_dec(&table->use);
}
- nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+ nf_tables_set_notify(&ctx, nft_trans_set(trans),
NFT_MSG_NEWSET, GFP_KERNEL);
nft_trans_destroy(trans);
break;
@@ -10343,14 +10415,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_DESTROYSET:
nft_trans_set(trans)->dead = 1;
list_del_rcu(&nft_trans_set(trans)->list);
- nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+ nf_tables_set_notify(&ctx, nft_trans_set(trans),
trans->msg_type, GFP_KERNEL);
break;
case NFT_MSG_NEWSETELEM:
- te = (struct nft_trans_elem *)trans->data;
+ te = nft_trans_container_elem(trans);
nft_setelem_activate(net, te->set, te->elem_priv);
- nf_tables_setelem_notify(&trans->ctx, te->set,
+ nf_tables_setelem_notify(&ctx, te->set,
te->elem_priv,
NFT_MSG_NEWSETELEM);
if (te->set->ops->commit &&
@@ -10362,9 +10434,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
break;
case NFT_MSG_DELSETELEM:
case NFT_MSG_DESTROYSETELEM:
- te = (struct nft_trans_elem *)trans->data;
+ te = nft_trans_container_elem(trans);
- nf_tables_setelem_notify(&trans->ctx, te->set,
+ nf_tables_setelem_notify(&ctx, te->set,
te->elem_priv,
trans->msg_type);
nft_setelem_remove(net, te->set, te->elem_priv);
@@ -10380,13 +10452,13 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
break;
case NFT_MSG_NEWOBJ:
if (nft_trans_obj_update(trans)) {
- nft_obj_commit_update(trans);
- nf_tables_obj_notify(&trans->ctx,
+ nft_obj_commit_update(&ctx, trans);
+ nf_tables_obj_notify(&ctx,
nft_trans_obj(trans),
NFT_MSG_NEWOBJ);
} else {
nft_clear(net, nft_trans_obj(trans));
- nf_tables_obj_notify(&trans->ctx,
+ nf_tables_obj_notify(&ctx,
nft_trans_obj(trans),
NFT_MSG_NEWOBJ);
nft_trans_destroy(trans);
@@ -10395,14 +10467,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_DELOBJ:
case NFT_MSG_DESTROYOBJ:
nft_obj_del(nft_trans_obj(trans));
- nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
+ nf_tables_obj_notify(&ctx, nft_trans_obj(trans),
trans->msg_type);
break;
case NFT_MSG_NEWFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
nft_trans_flowtable(trans)->data.flags =
nft_trans_flowtable_flags(trans);
- nf_tables_flowtable_notify(&trans->ctx,
+ nf_tables_flowtable_notify(&ctx,
nft_trans_flowtable(trans),
&nft_trans_flowtable_hooks(trans),
NFT_MSG_NEWFLOWTABLE);
@@ -10410,7 +10482,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
&nft_trans_flowtable(trans)->hook_list);
} else {
nft_clear(net, nft_trans_flowtable(trans));
- nf_tables_flowtable_notify(&trans->ctx,
+ nf_tables_flowtable_notify(&ctx,
nft_trans_flowtable(trans),
NULL,
NFT_MSG_NEWFLOWTABLE);
@@ -10420,7 +10492,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_DELFLOWTABLE:
case NFT_MSG_DESTROYFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
- nf_tables_flowtable_notify(&trans->ctx,
+ nf_tables_flowtable_notify(&ctx,
nft_trans_flowtable(trans),
&nft_trans_flowtable_hooks(trans),
trans->msg_type);
@@ -10428,7 +10500,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
&nft_trans_flowtable_hooks(trans));
} else {
list_del_rcu(&nft_trans_flowtable(trans)->list);
- nf_tables_flowtable_notify(&trans->ctx,
+ nf_tables_flowtable_notify(&ctx,
nft_trans_flowtable(trans),
NULL,
trans->msg_type);
@@ -10470,28 +10542,32 @@ static void nf_tables_module_autoload(struct net *net)
static void nf_tables_abort_release(struct nft_trans *trans)
{
+ struct nft_ctx ctx = { };
+
+ nft_ctx_update(&ctx, trans);
+
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
- nf_tables_table_destroy(&trans->ctx);
+ nf_tables_table_destroy(trans->table);
break;
case NFT_MSG_NEWCHAIN:
if (nft_trans_chain_update(trans))
nft_hooks_destroy(&nft_trans_chain_hooks(trans));
else
- nf_tables_chain_destroy(&trans->ctx);
+ nf_tables_chain_destroy(nft_trans_chain(trans));
break;
case NFT_MSG_NEWRULE:
- nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+ nf_tables_rule_destroy(&ctx, nft_trans_rule(trans));
break;
case NFT_MSG_NEWSET:
- nft_set_destroy(&trans->ctx, nft_trans_set(trans));
+ nft_set_destroy(&ctx, nft_trans_set(trans));
break;
case NFT_MSG_NEWSETELEM:
nft_set_elem_destroy(nft_trans_elem_set(trans),
nft_trans_elem_priv(trans), true);
break;
case NFT_MSG_NEWOBJ:
- nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
+ nft_obj_destroy(&ctx, nft_trans_obj(trans));
break;
case NFT_MSG_NEWFLOWTABLE:
if (nft_trans_flowtable_update(trans))
@@ -10523,6 +10599,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
struct nft_trans *trans, *next;
LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
+ struct nft_ctx ctx = {
+ .net = net,
+ };
int err = 0;
if (action == NFNL_ABORT_VALIDATE &&
@@ -10531,37 +10610,41 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list,
list) {
+ struct nft_table *table = trans->table;
+
+ nft_ctx_update(&ctx, trans);
+
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
- if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) {
+ if (!(table->flags & __NFT_TABLE_F_UPDATE)) {
nft_trans_destroy(trans);
break;
}
- if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_DORMANT) {
- nf_tables_table_disable(net, trans->ctx.table);
- trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
- } else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) {
- trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT;
+ if (table->flags & __NFT_TABLE_F_WAS_DORMANT) {
+ nf_tables_table_disable(net, table);
+ table->flags |= NFT_TABLE_F_DORMANT;
+ } else if (table->flags & __NFT_TABLE_F_WAS_AWAKEN) {
+ table->flags &= ~NFT_TABLE_F_DORMANT;
}
- if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_ORPHAN) {
- trans->ctx.table->flags &= ~NFT_TABLE_F_OWNER;
- trans->ctx.table->nlpid = 0;
+ if (table->flags & __NFT_TABLE_F_WAS_ORPHAN) {
+ table->flags &= ~NFT_TABLE_F_OWNER;
+ table->nlpid = 0;
}
- trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE;
+ table->flags &= ~__NFT_TABLE_F_UPDATE;
nft_trans_destroy(trans);
} else {
- list_del_rcu(&trans->ctx.table->list);
+ list_del_rcu(&table->list);
}
break;
case NFT_MSG_DELTABLE:
case NFT_MSG_DESTROYTABLE:
- nft_clear(trans->ctx.net, trans->ctx.table);
+ nft_clear(trans->net, table);
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWCHAIN:
if (nft_trans_chain_update(trans)) {
- if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) {
+ if (!(table->flags & NFT_TABLE_F_DORMANT)) {
nft_netdev_unregister_hooks(net,
&nft_trans_chain_hooks(trans),
true);
@@ -10574,11 +10657,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
}
- nft_use_dec_restore(&trans->ctx.table->use);
- nft_chain_del(trans->ctx.chain);
- nf_tables_unregister_hook(trans->ctx.net,
- trans->ctx.table,
- trans->ctx.chain);
+ nft_use_dec_restore(&table->use);
+ nft_chain_del(nft_trans_chain(trans));
+ nf_tables_unregister_hook(trans->net, table,
+ nft_trans_chain(trans));
}
break;
case NFT_MSG_DELCHAIN:
@@ -10587,8 +10669,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
list_splice(&nft_trans_chain_hooks(trans),
&nft_trans_basechain(trans)->hook_list);
} else {
- nft_use_inc_restore(&trans->ctx.table->use);
- nft_clear(trans->ctx.net, trans->ctx.chain);
+ nft_use_inc_restore(&table->use);
+ nft_clear(trans->net, nft_trans_chain(trans));
}
nft_trans_destroy(trans);
break;
@@ -10597,20 +10679,20 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
}
- nft_use_dec_restore(&trans->ctx.chain->use);
+ nft_use_dec_restore(&nft_trans_rule_chain(trans)->use);
list_del_rcu(&nft_trans_rule(trans)->list);
- nft_rule_expr_deactivate(&trans->ctx,
+ nft_rule_expr_deactivate(&ctx,
nft_trans_rule(trans),
NFT_TRANS_ABORT);
- if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)
nft_flow_rule_destroy(nft_trans_flow_rule(trans));
break;
case NFT_MSG_DELRULE:
case NFT_MSG_DESTROYRULE:
- nft_use_inc_restore(&trans->ctx.chain->use);
- nft_clear(trans->ctx.net, nft_trans_rule(trans));
- nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans));
- if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ nft_use_inc_restore(&nft_trans_rule_chain(trans)->use);
+ nft_clear(trans->net, nft_trans_rule(trans));
+ nft_rule_expr_activate(&ctx, nft_trans_rule(trans));
+ if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)
nft_flow_rule_destroy(nft_trans_flow_rule(trans));
nft_trans_destroy(trans);
@@ -10620,7 +10702,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
}
- nft_use_dec_restore(&trans->ctx.table->use);
+ nft_use_dec_restore(&table->use);
if (nft_trans_set_bound(trans)) {
nft_trans_destroy(trans);
break;
@@ -10630,10 +10712,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_DELSET:
case NFT_MSG_DESTROYSET:
- nft_use_inc_restore(&trans->ctx.table->use);
- nft_clear(trans->ctx.net, nft_trans_set(trans));
+ nft_use_inc_restore(&table->use);
+ nft_clear(trans->net, nft_trans_set(trans));
if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
- nft_map_activate(&trans->ctx, nft_trans_set(trans));
+ nft_map_activate(&ctx, nft_trans_set(trans));
nft_trans_destroy(trans);
break;
@@ -10642,7 +10724,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
}
- te = (struct nft_trans_elem *)trans->data;
+ te = nft_trans_container_elem(trans);
nft_setelem_remove(net, te->set, te->elem_priv);
if (!nft_setelem_is_catchall(te->set, te->elem_priv))
atomic_dec(&te->set->nelems);
@@ -10655,7 +10737,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_DELSETELEM:
case NFT_MSG_DESTROYSETELEM:
- te = (struct nft_trans_elem *)trans->data;
+ te = nft_trans_container_elem(trans);
if (!nft_setelem_active_next(net, te->set, te->elem_priv)) {
nft_setelem_data_activate(net, te->set, te->elem_priv);
@@ -10673,17 +10755,17 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_NEWOBJ:
if (nft_trans_obj_update(trans)) {
- nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans));
+ nft_obj_destroy(&ctx, nft_trans_obj_newobj(trans));
nft_trans_destroy(trans);
} else {
- nft_use_dec_restore(&trans->ctx.table->use);
+ nft_use_dec_restore(&table->use);
nft_obj_del(nft_trans_obj(trans));
}
break;
case NFT_MSG_DELOBJ:
case NFT_MSG_DESTROYOBJ:
- nft_use_inc_restore(&trans->ctx.table->use);
- nft_clear(trans->ctx.net, nft_trans_obj(trans));
+ nft_use_inc_restore(&table->use);
+ nft_clear(trans->net, nft_trans_obj(trans));
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWFLOWTABLE:
@@ -10691,7 +10773,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_unregister_flowtable_net_hooks(net,
&nft_trans_flowtable_hooks(trans));
} else {
- nft_use_dec_restore(&trans->ctx.table->use);
+ nft_use_dec_restore(&table->use);
list_del_rcu(&nft_trans_flowtable(trans)->list);
nft_unregister_flowtable_net_hooks(net,
&nft_trans_flowtable(trans)->hook_list);
@@ -10703,8 +10785,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
list_splice(&nft_trans_flowtable_hooks(trans),
&nft_trans_flowtable(trans)->hook_list);
} else {
- nft_use_inc_restore(&trans->ctx.table->use);
- nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
+ nft_use_inc_restore(&table->use);
+ nft_clear(trans->net, nft_trans_flowtable(trans));
}
nft_trans_destroy(trans);
break;
@@ -10809,150 +10891,6 @@ int nft_chain_validate_hooks(const struct nft_chain *chain,
}
EXPORT_SYMBOL_GPL(nft_chain_validate_hooks);
-/*
- * Loop detection - walk through the ruleset beginning at the destination chain
- * of a new jump until either the source chain is reached (loop) or all
- * reachable chains have been traversed.
- *
- * The loop check is performed whenever a new jump verdict is added to an
- * expression or verdict map or a verdict map is bound to a new chain.
- */
-
-static int nf_tables_check_loops(const struct nft_ctx *ctx,
- const struct nft_chain *chain);
-
-static int nft_check_loops(const struct nft_ctx *ctx,
- const struct nft_set_ext *ext)
-{
- const struct nft_data *data;
- int ret;
-
- data = nft_set_ext_data(ext);
- switch (data->verdict.code) {
- case NFT_JUMP:
- case NFT_GOTO:
- ret = nf_tables_check_loops(ctx, data->verdict.chain);
- break;
- default:
- ret = 0;
- break;
- }
-
- return ret;
-}
-
-static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
- struct nft_set *set,
- const struct nft_set_iter *iter,
- struct nft_elem_priv *elem_priv)
-{
- const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
-
- if (!nft_set_elem_active(ext, iter->genmask))
- return 0;
-
- if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
- *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
- return 0;
-
- return nft_check_loops(ctx, ext);
-}
-
-static int nft_set_catchall_loops(const struct nft_ctx *ctx,
- struct nft_set *set)
-{
- u8 genmask = nft_genmask_next(ctx->net);
- struct nft_set_elem_catchall *catchall;
- struct nft_set_ext *ext;
- int ret = 0;
-
- list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
- ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_set_elem_active(ext, genmask))
- continue;
-
- ret = nft_check_loops(ctx, ext);
- if (ret < 0)
- return ret;
- }
-
- return ret;
-}
-
-static int nf_tables_check_loops(const struct nft_ctx *ctx,
- const struct nft_chain *chain)
-{
- const struct nft_rule *rule;
- const struct nft_expr *expr, *last;
- struct nft_set *set;
- struct nft_set_binding *binding;
- struct nft_set_iter iter;
-
- if (ctx->chain == chain)
- return -ELOOP;
-
- if (fatal_signal_pending(current))
- return -EINTR;
-
- list_for_each_entry(rule, &chain->rules, list) {
- nft_rule_for_each_expr(expr, last, rule) {
- struct nft_immediate_expr *priv;
- const struct nft_data *data;
- int err;
-
- if (strcmp(expr->ops->type->name, "immediate"))
- continue;
-
- priv = nft_expr_priv(expr);
- if (priv->dreg != NFT_REG_VERDICT)
- continue;
-
- data = &priv->data;
- switch (data->verdict.code) {
- case NFT_JUMP:
- case NFT_GOTO:
- err = nf_tables_check_loops(ctx,
- data->verdict.chain);
- if (err < 0)
- return err;
- break;
- default:
- break;
- }
- }
- }
-
- list_for_each_entry(set, &ctx->table->sets, list) {
- if (!nft_is_active_next(ctx->net, set))
- continue;
- if (!(set->flags & NFT_SET_MAP) ||
- set->dtype != NFT_DATA_VERDICT)
- continue;
-
- list_for_each_entry(binding, &set->bindings, list) {
- if (!(binding->flags & NFT_SET_MAP) ||
- binding->chain != chain)
- continue;
-
- iter.genmask = nft_genmask_next(ctx->net);
- iter.type = NFT_ITER_UPDATE;
- iter.skip = 0;
- iter.count = 0;
- iter.err = 0;
- iter.fn = nf_tables_loop_check_setelem;
-
- set->ops->walk(ctx, set, &iter);
- if (!iter.err)
- iter.err = nft_set_catchall_loops(ctx, set);
-
- if (iter.err < 0)
- return iter.err;
- }
- }
-
- return 0;
-}
-
/**
* nft_parse_u32_check - fetch u32 attribute and check for maximum value
*
@@ -11065,7 +11003,7 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
if (data != NULL &&
(data->verdict.code == NFT_GOTO ||
data->verdict.code == NFT_JUMP)) {
- err = nf_tables_check_loops(ctx, data->verdict.chain);
+ err = nft_chain_validate(ctx, data->verdict.chain);
if (err < 0)
return err;
}
@@ -11365,7 +11303,7 @@ int __nft_release_basechain(struct nft_ctx *ctx)
}
nft_chain_del(ctx->chain);
nft_use_dec(&ctx->table->use);
- nf_tables_chain_destroy(ctx);
+ nf_tables_chain_destroy(ctx->chain);
return 0;
}
@@ -11440,12 +11378,11 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
nft_obj_destroy(&ctx, obj);
}
list_for_each_entry_safe(chain, nc, &table->chains, list) {
- ctx.chain = chain;
nft_chain_del(chain);
nft_use_dec(&table->use);
- nf_tables_chain_destroy(&ctx);
+ nf_tables_chain_destroy(chain);
}
- nf_tables_table_destroy(&ctx);
+ nf_tables_table_destroy(table);
}
static void __nft_release_tables(struct net *net)
@@ -11483,8 +11420,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
gc_seq = nft_gc_seq_begin(nft_net);
- if (!list_empty(&nf_tables_destroy_list))
- nf_tables_trans_destroy_flush_work();
+ nf_tables_trans_destroy_flush_work();
again:
list_for_each_entry(table, &nft_net->tables, list) {
if (nft_table_has_owner(table) &&
@@ -11588,6 +11524,14 @@ static int __init nf_tables_module_init(void)
{
int err;
+ BUILD_BUG_ON(offsetof(struct nft_trans_table, nft_trans) != 0);
+ BUILD_BUG_ON(offsetof(struct nft_trans_chain, nft_trans_binding.nft_trans) != 0);
+ BUILD_BUG_ON(offsetof(struct nft_trans_rule, nft_trans) != 0);
+ BUILD_BUG_ON(offsetof(struct nft_trans_set, nft_trans_binding.nft_trans) != 0);
+ BUILD_BUG_ON(offsetof(struct nft_trans_elem, nft_trans) != 0);
+ BUILD_BUG_ON(offsetof(struct nft_trans_obj, nft_trans) != 0);
+ BUILD_BUG_ON(offsetof(struct nft_trans_flowtable, nft_trans) != 0);
+
err = register_pernet_subsys(&nf_tables_net_ops);
if (err < 0)
return err;
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 12ab78fa5d84..64675f1c7f29 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -513,38 +513,38 @@ static void nft_flow_rule_offload_abort(struct net *net,
int err = 0;
list_for_each_entry_continue_reverse(trans, &nft_net->commit_list, list) {
- if (trans->ctx.family != NFPROTO_NETDEV)
+ if (trans->table->family != NFPROTO_NETDEV)
continue;
switch (trans->msg_type) {
case NFT_MSG_NEWCHAIN:
- if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) ||
+ if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) ||
nft_trans_chain_update(trans))
continue;
- err = nft_flow_offload_chain(trans->ctx.chain, NULL,
+ err = nft_flow_offload_chain(nft_trans_chain(trans), NULL,
FLOW_BLOCK_UNBIND);
break;
case NFT_MSG_DELCHAIN:
- if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD))
continue;
- err = nft_flow_offload_chain(trans->ctx.chain, NULL,
+ err = nft_flow_offload_chain(nft_trans_chain(trans), NULL,
FLOW_BLOCK_BIND);
break;
case NFT_MSG_NEWRULE:
- if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD))
continue;
- err = nft_flow_offload_rule(trans->ctx.chain,
+ err = nft_flow_offload_rule(nft_trans_rule_chain(trans),
nft_trans_rule(trans),
NULL, FLOW_CLS_DESTROY);
break;
case NFT_MSG_DELRULE:
- if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD))
continue;
- err = nft_flow_offload_rule(trans->ctx.chain,
+ err = nft_flow_offload_rule(nft_trans_rule_chain(trans),
nft_trans_rule(trans),
nft_trans_flow_rule(trans),
FLOW_CLS_REPLACE);
@@ -564,46 +564,46 @@ int nft_flow_rule_offload_commit(struct net *net)
u8 policy;
list_for_each_entry(trans, &nft_net->commit_list, list) {
- if (trans->ctx.family != NFPROTO_NETDEV)
+ if (trans->table->family != NFPROTO_NETDEV)
continue;
switch (trans->msg_type) {
case NFT_MSG_NEWCHAIN:
- if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) ||
+ if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) ||
nft_trans_chain_update(trans))
continue;
policy = nft_trans_chain_policy(trans);
- err = nft_flow_offload_chain(trans->ctx.chain, &policy,
+ err = nft_flow_offload_chain(nft_trans_chain(trans), &policy,
FLOW_BLOCK_BIND);
break;
case NFT_MSG_DELCHAIN:
- if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD))
continue;
policy = nft_trans_chain_policy(trans);
- err = nft_flow_offload_chain(trans->ctx.chain, &policy,
+ err = nft_flow_offload_chain(nft_trans_chain(trans), &policy,
FLOW_BLOCK_UNBIND);
break;
case NFT_MSG_NEWRULE:
- if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD))
continue;
- if (trans->ctx.flags & NLM_F_REPLACE ||
- !(trans->ctx.flags & NLM_F_APPEND)) {
+ if (trans->flags & NLM_F_REPLACE ||
+ !(trans->flags & NLM_F_APPEND)) {
err = -EOPNOTSUPP;
break;
}
- err = nft_flow_offload_rule(trans->ctx.chain,
+ err = nft_flow_offload_rule(nft_trans_rule_chain(trans),
nft_trans_rule(trans),
nft_trans_flow_rule(trans),
FLOW_CLS_REPLACE);
break;
case NFT_MSG_DELRULE:
- if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD))
continue;
- err = nft_flow_offload_rule(trans->ctx.chain,
+ err = nft_flow_offload_rule(nft_trans_rule_chain(trans),
nft_trans_rule(trans),
NULL, FLOW_CLS_DESTROY);
break;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index f466af4f8531..eab4f476b47f 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -366,8 +366,7 @@ static int cttimeout_default_set(struct sk_buff *skb,
__u8 l4num;
int ret;
- if (!cda[CTA_TIMEOUT_L3PROTO] ||
- !cda[CTA_TIMEOUT_L4PROTO] ||
+ if (!cda[CTA_TIMEOUT_L4PROTO] ||
!cda[CTA_TIMEOUT_DATA])
return -EINVAL;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index f1c31757e496..55e28e1da66e 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -325,7 +325,7 @@ static void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
hooks = nf_hook_entries_head(net, pf, entry->state.hook);
i = entry->hook_index;
- if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) {
+ if (!hooks || i >= hooks->num_hook_entries) {
kfree_skb_reason(skb, SKB_DROP_REASON_NETFILTER_DROP);
nf_queue_entry_free(entry);
return;
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 6475c7abc1fe..ac2422c215e5 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -221,7 +221,7 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
list_del(&rule->list);
nf_tables_rule_destroy(&chain_ctx, rule);
}
- nf_tables_chain_destroy(&chain_ctx);
+ nf_tables_chain_destroy(chain);
break;
default:
break;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index ef93e0d3bee0..588a5e6ad899 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -59,9 +59,9 @@ MODULE_PARM_DESC(ip_list_gid, "default owning group of /proc/net/xt_recent/* fil
/* retained for backwards compatibility */
static unsigned int ip_pkt_list_tot __read_mostly;
module_param(ip_pkt_list_tot, uint, 0400);
-MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 255)");
+MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 65535)");
-#define XT_RECENT_MAX_NSTAMPS 256
+#define XT_RECENT_MAX_NSTAMPS 65536
struct recent_entry {
struct list_head list;
@@ -69,7 +69,7 @@ struct recent_entry {
union nf_inet_addr addr;
u_int16_t family;
u_int8_t ttl;
- u_int8_t index;
+ u_int16_t index;
u_int16_t nstamps;
unsigned long stamps[];
};
@@ -80,7 +80,7 @@ struct recent_table {
union nf_inet_addr mask;
unsigned int refcnt;
unsigned int entries;
- u8 nstamps_max_mask;
+ u_int16_t nstamps_max_mask;
struct list_head lru_list;
struct list_head iphash[];
};
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 29a7081858cd..2535f3f9f462 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -10,6 +10,7 @@ config OPENVSWITCH
(NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \
(!NF_NAT || NF_NAT) && \
(!NETFILTER_CONNCOUNT || NETFILTER_CONNCOUNT)))
+ depends on PSAMPLE || !PSAMPLE
select LIBCRC32C
select MPLS
select NET_MPLS_GSO
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 964225580824..101f9a23792c 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -24,6 +24,11 @@
#include <net/checksum.h>
#include <net/dsfield.h>
#include <net/mpls.h>
+
+#if IS_ENABLED(CONFIG_PSAMPLE)
+#include <net/psample.h>
+#endif
+
#include <net/sctp/checksum.h>
#include "datapath.h"
@@ -1043,12 +1048,15 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
struct nlattr *sample_arg;
int rem = nla_len(attr);
const struct sample_arg *arg;
+ u32 init_probability;
bool clone_flow_key;
+ int err;
/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
sample_arg = nla_data(attr);
arg = nla_data(sample_arg);
actions = nla_next(sample_arg, &rem);
+ init_probability = OVS_CB(skb)->probability;
if ((arg->probability != U32_MAX) &&
(!arg->probability || get_random_u32() > arg->probability)) {
@@ -1057,9 +1065,16 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
return 0;
}
+ OVS_CB(skb)->probability = arg->probability;
+
clone_flow_key = !arg->exec;
- return clone_execute(dp, skb, key, 0, actions, rem, last,
- clone_flow_key);
+ err = clone_execute(dp, skb, key, 0, actions, rem, last,
+ clone_flow_key);
+
+ if (!last)
+ OVS_CB(skb)->probability = init_probability;
+
+ return err;
}
/* When 'last' is true, clone() should always consume the 'skb'.
@@ -1299,6 +1314,44 @@ static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
return 0;
}
+#if IS_ENABLED(CONFIG_PSAMPLE)
+static void execute_psample(struct datapath *dp, struct sk_buff *skb,
+ const struct nlattr *attr)
+{
+ struct psample_group psample_group = {};
+ struct psample_metadata md = {};
+ const struct nlattr *a;
+ u32 rate;
+ int rem;
+
+ nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) {
+ switch (nla_type(a)) {
+ case OVS_PSAMPLE_ATTR_GROUP:
+ psample_group.group_num = nla_get_u32(a);
+ break;
+
+ case OVS_PSAMPLE_ATTR_COOKIE:
+ md.user_cookie = nla_data(a);
+ md.user_cookie_len = nla_len(a);
+ break;
+ }
+ }
+
+ psample_group.net = ovs_dp_get_net(dp);
+ md.in_ifindex = OVS_CB(skb)->input_vport->dev->ifindex;
+ md.trunc_size = skb->len - OVS_CB(skb)->cutlen;
+ md.rate_as_probability = 1;
+
+ rate = OVS_CB(skb)->probability ? OVS_CB(skb)->probability : U32_MAX;
+
+ psample_sample_packet(&psample_group, skb, rate, &md);
+}
+#else
+static void execute_psample(struct datapath *dp, struct sk_buff *skb,
+ const struct nlattr *attr)
+{}
+#endif
+
/* Execute a list of actions against 'skb'. */
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key,
@@ -1502,6 +1555,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
ovs_kfree_skb_reason(skb, reason);
return 0;
}
+
+ case OVS_ACTION_ATTR_PSAMPLE:
+ execute_psample(dp, skb, a);
+ OVS_CB(skb)->cutlen = 0;
+ if (nla_is_last(a, rem)) {
+ consume_skb(skb);
+ return 0;
+ }
+ break;
}
if (unlikely(err)) {
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 3b980bf2770b..8eb1d644b741 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -679,6 +679,8 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
action |= BIT(NF_NAT_MANIP_DST);
err = nf_ct_nat(skb, ct, ctinfo, &action, &info->range, info->commit);
+ if (err != NF_ACCEPT)
+ return err;
if (action & BIT(NF_NAT_MANIP_SRC))
ovs_nat_update_key(key, skb, NF_NAT_MANIP_SRC);
@@ -697,6 +699,22 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
}
#endif
+static int verdict_to_errno(unsigned int verdict)
+{
+ switch (verdict & NF_VERDICT_MASK) {
+ case NF_ACCEPT:
+ return 0;
+ case NF_DROP:
+ return -EINVAL;
+ case NF_STOLEN:
+ return -EINPROGRESS;
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
+
/* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if
* not done already. Update key with new CT state after passing the packet
* through conntrack.
@@ -735,7 +753,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
err = nf_conntrack_in(skb, &state);
if (err != NF_ACCEPT)
- return -ENOENT;
+ return verdict_to_errno(err);
/* Clear CT state NAT flags to mark that we have not yet done
* NAT after the nf_conntrack_in() call. We can actually clear
@@ -762,9 +780,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
* the key->ct_state.
*/
if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) &&
- (nf_ct_is_confirmed(ct) || info->commit) &&
- ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) {
- return -EINVAL;
+ (nf_ct_is_confirmed(ct) || info->commit)) {
+ int err = ovs_ct_nat(net, key, info, skb, ct, ctinfo);
+
+ err = verdict_to_errno(err);
+ if (err)
+ return err;
}
/* Userspace may decide to perform a ct lookup without a helper
@@ -795,9 +816,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
* - When committing an unconfirmed connection.
*/
if ((nf_ct_is_confirmed(ct) ? !cached || add_helper :
- info->commit) &&
- nf_ct_helper(skb, ct, ctinfo, info->family) != NF_ACCEPT) {
- return -EINVAL;
+ info->commit)) {
+ int err = nf_ct_helper(skb, ct, ctinfo, info->family);
+
+ err = verdict_to_errno(err);
+ if (err)
+ return err;
}
if (nf_ct_protonum(ct) == IPPROTO_TCP &&
@@ -1001,10 +1025,9 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
/* This will take care of sending queued events even if the connection
* is already confirmed.
*/
- if (nf_conntrack_confirm(skb) != NF_ACCEPT)
- return -EINVAL;
+ err = nf_conntrack_confirm(skb);
- return 0;
+ return verdict_to_errno(err);
}
/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
@@ -1039,6 +1062,10 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
else
err = ovs_ct_lookup(net, key, info, skb);
+ /* conntrack core returned NF_STOLEN */
+ if (err == -EINPROGRESS)
+ return err;
+
skb_push_rcsum(skb, nh_ofs);
if (err)
ovs_kfree_skb_reason(skb, OVS_DROP_CONNTRACK);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 0cd29971a907..9ca6231ea647 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -115,12 +115,15 @@ struct datapath {
* fragmented.
* @acts_origlen: The netlink size of the flow actions applied to this skb.
* @cutlen: The number of bytes from the packet end to be removed.
+ * @probability: The sampling probability that was applied to this skb; 0 means
+ * no sampling has occurred; U32_MAX means 100% probability.
*/
struct ovs_skb_cb {
struct vport *input_vport;
u16 mru;
u16 acts_origlen;
u32 cutlen;
+ u32 probability;
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index f224d9bcea5e..c92bdc4dfe19 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -64,6 +64,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
case OVS_ACTION_ATTR_TRUNC:
case OVS_ACTION_ATTR_USERSPACE:
case OVS_ACTION_ATTR_DROP:
+ case OVS_ACTION_ATTR_PSAMPLE:
break;
case OVS_ACTION_ATTR_CT:
@@ -2409,7 +2410,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
/* Whenever new actions are added, the need to update this
* function should be considered.
*/
- BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24);
+ BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 25);
if (!actions)
return;
@@ -3157,6 +3158,28 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return 0;
}
+static int validate_psample(const struct nlattr *attr)
+{
+ static const struct nla_policy policy[OVS_PSAMPLE_ATTR_MAX + 1] = {
+ [OVS_PSAMPLE_ATTR_GROUP] = { .type = NLA_U32 },
+ [OVS_PSAMPLE_ATTR_COOKIE] = {
+ .type = NLA_BINARY,
+ .len = OVS_PSAMPLE_COOKIE_MAX_SIZE,
+ },
+ };
+ struct nlattr *a[OVS_PSAMPLE_ATTR_MAX + 1];
+ int err;
+
+ if (!IS_ENABLED(CONFIG_PSAMPLE))
+ return -EOPNOTSUPP;
+
+ err = nla_parse_nested(a, OVS_PSAMPLE_ATTR_MAX, attr, policy, NULL);
+ if (err)
+ return err;
+
+ return a[OVS_PSAMPLE_ATTR_GROUP] ? 0 : -EINVAL;
+}
+
static int copy_action(const struct nlattr *from,
struct sw_flow_actions **sfa, bool log)
{
@@ -3212,6 +3235,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls),
[OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
[OVS_ACTION_ATTR_DROP] = sizeof(u32),
+ [OVS_ACTION_ATTR_PSAMPLE] = (u32)-1,
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -3490,6 +3514,12 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return -EINVAL;
break;
+ case OVS_ACTION_ATTR_PSAMPLE:
+ err = validate_psample(a);
+ if (err)
+ return err;
+ break;
+
default:
OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL;
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 972ae01a70f7..8732f6e51ae5 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -500,6 +500,7 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
OVS_CB(skb)->input_vport = vport;
OVS_CB(skb)->mru = 0;
OVS_CB(skb)->cutlen = 0;
+ OVS_CB(skb)->probability = 0;
if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) {
u32 mark;
diff --git a/net/psample/psample.c b/net/psample/psample.c
index a5d9b8446f77..f48b5b9cd409 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -376,6 +376,10 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
void *data;
int ret;
+ if (!genl_has_listeners(&psample_nl_family, group->net,
+ PSAMPLE_NL_MCGRP_SAMPLE))
+ return;
+
meta_len = (in_ifindex ? nla_total_size(sizeof(u16)) : 0) +
(out_ifindex ? nla_total_size(sizeof(u16)) : 0) +
(md->out_tc_valid ? nla_total_size(sizeof(u16)) : 0) +
@@ -386,7 +390,9 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
nla_total_size(sizeof(u32)) + /* group_num */
nla_total_size(sizeof(u32)) + /* seq */
nla_total_size_64bit(sizeof(u64)) + /* timestamp */
- nla_total_size(sizeof(u16)); /* protocol */
+ nla_total_size(sizeof(u16)) + /* protocol */
+ (md->user_cookie_len ?
+ nla_total_size(md->user_cookie_len) : 0); /* user cookie */
#ifdef CONFIG_INET
tun_info = skb_tunnel_info(skb);
@@ -486,6 +492,14 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
}
#endif
+ if (md->user_cookie && md->user_cookie_len &&
+ nla_put(nl_skb, PSAMPLE_ATTR_USER_COOKIE, md->user_cookie_len,
+ md->user_cookie))
+ goto error;
+
+ if (md->rate_as_probability)
+ nla_put_flag(skb, PSAMPLE_ATTR_SAMPLE_PROBABILITY);
+
genlmsg_end(nl_skb, data);
genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0,
PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC);
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 2a96d9c1db65..113b907da0f7 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -944,6 +944,8 @@ static int tcf_ct_act_nat(struct sk_buff *skb,
action |= BIT(NF_NAT_MANIP_DST);
err = nf_ct_nat(skb, ct, ctinfo, &action, range, commit);
+ if (err != NF_ACCEPT)
+ return err & NF_VERDICT_MASK;
if (action & BIT(NF_NAT_MANIP_SRC))
tc_skb_cb(skb)->post_ct_snat = 1;
@@ -1035,7 +1037,7 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
state.pf = family;
err = nf_conntrack_in(skb, &state);
if (err != NF_ACCEPT)
- goto out_push;
+ goto nf_error;
}
do_nat:
@@ -1047,7 +1049,7 @@ do_nat:
err = tcf_ct_act_nat(skb, ct, ctinfo, p->ct_action, &p->range, commit);
if (err != NF_ACCEPT)
- goto drop;
+ goto nf_error;
if (!nf_ct_is_confirmed(ct) && commit && p->helper && !nfct_help(ct)) {
err = __nf_ct_try_assign_helper(ct, p->tmpl, GFP_ATOMIC);
@@ -1061,8 +1063,9 @@ do_nat:
}
if (nf_ct_is_confirmed(ct) ? ((!cached && !skip_add) || add_helper) : commit) {
- if (nf_ct_helper(skb, ct, ctinfo, family) != NF_ACCEPT)
- goto drop;
+ err = nf_ct_helper(skb, ct, ctinfo, family);
+ if (err != NF_ACCEPT)
+ goto nf_error;
}
if (commit) {
@@ -1075,8 +1078,17 @@ do_nat:
/* This will take care of sending queued events
* even if the connection is already confirmed.
*/
- if (nf_conntrack_confirm(skb) != NF_ACCEPT)
- goto drop;
+ err = nf_conntrack_confirm(skb);
+ if (err != NF_ACCEPT)
+ goto nf_error;
+
+ /* The ct may be dropped if a clash has been resolved,
+ * so it's necessary to retrieve it from skb again to
+ * prevent UAF.
+ */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ skip_add = true;
}
if (!skip_add)
@@ -1100,6 +1112,21 @@ out_frag:
drop:
tcf_action_inc_drop_qstats(&c->common);
return TC_ACT_SHOT;
+
+nf_error:
+ /* some verdicts store extra data in upper bits, such
+ * as errno or queue number.
+ */
+ switch (err & NF_VERDICT_MASK) {
+ case NF_DROP:
+ goto drop;
+ case NF_STOLEN:
+ tcf_action_inc_drop_qstats(&c->common);
+ return TC_ACT_CONSUMED;
+ default:
+ DEBUG_NET_WARN_ON_ONCE(1);
+ goto drop;
+ }
}
static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = {
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index a69b53d54039..2ceb4d141b71 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -167,7 +167,9 @@ TC_INDIRECT_SCOPE int tcf_sample_act(struct sk_buff *skb,
{
struct tcf_sample *s = to_sample(a);
struct psample_group *psample_group;
+ u8 cookie_data[TC_COOKIE_MAX_SIZE];
struct psample_metadata md = {};
+ struct tc_cookie *user_cookie;
int retval;
tcf_lastuse_update(&s->tcf_tm);
@@ -189,6 +191,16 @@ TC_INDIRECT_SCOPE int tcf_sample_act(struct sk_buff *skb,
if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev))
skb_push(skb, skb->mac_len);
+ rcu_read_lock();
+ user_cookie = rcu_dereference(a->user_cookie);
+ if (user_cookie) {
+ memcpy(cookie_data, user_cookie->data,
+ user_cookie->len);
+ md.user_cookie = cookie_data;
+ md.user_cookie_len = user_cookie->len;
+ }
+ rcu_read_unlock();
+
md.trunc_size = s->truncate ? s->trunc_size : skb->len;
psample_sample_packet(psample_group, skb, s->rate, &md);
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index cd0accaf844a..dc0229693461 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -246,7 +246,7 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
memset(&opt, 0, sizeof(opt));
opt.index = d->tcf_index;
- opt.refcnt = refcount_read(&d->tcf_refcnt) - ref,
+ opt.refcnt = refcount_read(&d->tcf_refcnt) - ref;
opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind;
spin_lock_bh(&d->tcf_lock);
opt.action = d->tcf_action;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index c2ef9dcf91d2..cc6051d4f2ef 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -91,7 +91,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
entry = tcx_entry_fetch_or_create(dev, true, &created);
if (!entry)
return -ENOMEM;
- tcx_miniq_set_active(entry, true);
+ tcx_miniq_inc(entry);
mini_qdisc_pair_init(&q->miniqp, sch, &tcx_entry(entry)->miniq);
if (created)
tcx_entry_update(dev, entry, true);
@@ -121,7 +121,7 @@ static void ingress_destroy(struct Qdisc *sch)
tcf_block_put_ext(q->block, sch, &q->block_info);
if (entry) {
- tcx_miniq_set_active(entry, false);
+ tcx_miniq_dec(entry);
if (!tcx_entry_is_active(entry)) {
tcx_entry_update(dev, NULL, true);
tcx_entry_free(entry);
@@ -257,7 +257,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
entry = tcx_entry_fetch_or_create(dev, true, &created);
if (!entry)
return -ENOMEM;
- tcx_miniq_set_active(entry, true);
+ tcx_miniq_inc(entry);
mini_qdisc_pair_init(&q->miniqp_ingress, sch, &tcx_entry(entry)->miniq);
if (created)
tcx_entry_update(dev, entry, true);
@@ -276,7 +276,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
entry = tcx_entry_fetch_or_create(dev, false, &created);
if (!entry)
return -ENOMEM;
- tcx_miniq_set_active(entry, true);
+ tcx_miniq_inc(entry);
mini_qdisc_pair_init(&q->miniqp_egress, sch, &tcx_entry(entry)->miniq);
if (created)
tcx_entry_update(dev, entry, false);
@@ -302,7 +302,7 @@ static void clsact_destroy(struct Qdisc *sch)
tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
if (ingress_entry) {
- tcx_miniq_set_active(ingress_entry, false);
+ tcx_miniq_dec(ingress_entry);
if (!tcx_entry_is_active(ingress_entry)) {
tcx_entry_update(dev, NULL, true);
tcx_entry_free(ingress_entry);
@@ -310,7 +310,7 @@ static void clsact_destroy(struct Qdisc *sch)
}
if (egress_entry) {
- tcx_miniq_set_active(egress_entry, false);
+ tcx_miniq_dec(egress_entry);
if (!tcx_entry_is_active(egress_entry)) {
tcx_entry_update(dev, NULL, false);
tcx_entry_free(egress_entry);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index c009383369b2..32f76f1298da 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4834,10 +4834,14 @@ int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr,
return sctp_connect(sock->sk, uaddr, addr_len, flags);
}
-/* FIXME: Write comments. */
+/* Only called when shutdown a listening SCTP socket. */
static int sctp_disconnect(struct sock *sk, int flags)
{
- return -EOPNOTSUPP; /* STUB */
+ if (!sctp_style(sk, TCP))
+ return -EOPNOTSUPP;
+
+ sk->sk_shutdown |= RCV_SHUTDOWN;
+ return 0;
}
/* 4.1.4 accept() - TCP Style Syntax
@@ -4866,7 +4870,8 @@ static struct sock *sctp_accept(struct sock *sk, struct proto_accept_arg *arg)
goto out;
}
- if (!sctp_sstate(sk, LISTENING)) {
+ if (!sctp_sstate(sk, LISTENING) ||
+ (sk->sk_shutdown & RCV_SHUTDOWN)) {
error = -EINVAL;
goto out;
}
@@ -9393,7 +9398,8 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo)
}
err = -EINVAL;
- if (!sctp_sstate(sk, LISTENING))
+ if (!sctp_sstate(sk, LISTENING) ||
+ (sk->sk_shutdown & RCV_SHUTDOWN))
break;
err = 0;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 2b4b1276d4e8..d9cda1e53a01 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1557,9 +1557,11 @@ out_drop:
*/
void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
{
+ struct rpc_timeout timeout = {
+ .to_increment = 0,
+ };
struct rpc_task *task;
int proc_error;
- struct rpc_timeout timeout;
/* Build the svc_rqst used by the common processing routine */
rqstp->rq_xid = req->rq_xid;
@@ -1612,6 +1614,7 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
timeout.to_initval = req->rq_xprt->timeout->to_initval;
timeout.to_retries = req->rq_xprt->timeout->to_retries;
}
+ timeout.to_maxval = timeout.to_initval;
memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
task = rpc_run_bc_task(req, &timeout);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 49a3bea33f9d..dd86d7f1e97e 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -1421,13 +1421,12 @@ static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos)
dprintk("svc_pool_stats_start, *pidx=%u\n", pidx);
- if (!si->serv)
- return NULL;
-
mutex_lock(si->mutex);
if (!pidx)
return SEQ_START_TOKEN;
+ if (!si->serv)
+ return NULL;
return pidx > si->serv->sv_nrpools ? NULL
: &si->serv->sv_pools[pidx - 1];
}
@@ -1459,8 +1458,7 @@ static void svc_pool_stats_stop(struct seq_file *m, void *p)
{
struct svc_info *si = m->private;
- if (si->serv)
- mutex_unlock(si->mutex);
+ mutex_unlock(si->mutex);
}
static int svc_pool_stats_show(struct seq_file *m, void *p)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index dfc353eea8ed..0e1691316f42 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2441,6 +2441,13 @@ static void xs_tcp_setup_socket(struct work_struct *work)
transport->srcport = 0;
status = -EAGAIN;
break;
+ case -EPERM:
+ /* Happens, for instance, if a BPF program is preventing
+ * the connect. Remap the error so upper layers can better
+ * deal with it.
+ */
+ status = -ECONNREFUSED;
+ fallthrough;
case -EINVAL:
/* Happens, for instance, if the user specified a link
* local IPv6 address without a scope-id.
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 90b7f253d363..6b4b9f2749a6 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -616,6 +616,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
struct tls_crypto_info *alt_crypto_info;
struct tls_context *ctx = tls_get_ctx(sk);
const struct tls_cipher_desc *cipher_desc;
+ union tls_crypto_context *crypto_ctx;
int rc = 0;
int conf;
@@ -623,13 +624,15 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
return -EINVAL;
if (tx) {
- crypto_info = &ctx->crypto_send.info;
+ crypto_ctx = &ctx->crypto_send;
alt_crypto_info = &ctx->crypto_recv.info;
} else {
- crypto_info = &ctx->crypto_recv.info;
+ crypto_ctx = &ctx->crypto_recv;
alt_crypto_info = &ctx->crypto_send.info;
}
+ crypto_info = &crypto_ctx->info;
+
/* Currently we don't support set crypto info more than one time */
if (TLS_CRYPTO_INFO_READY(crypto_info))
return -EBUSY;
@@ -710,7 +713,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
return 0;
err_crypto_info:
- memzero_explicit(crypto_info, sizeof(union tls_crypto_context));
+ memzero_explicit(crypto_ctx, sizeof(*crypto_ctx));
return rc;
}
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index eb8aa5171a68..06d94ad999e9 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -470,6 +470,7 @@ prev_vertex:
}
if (vertex->index == vertex->scc_index) {
+ struct unix_vertex *v;
struct list_head scc;
bool scc_dead = true;
@@ -480,15 +481,15 @@ prev_vertex:
*/
__list_cut_position(&scc, &vertex_stack, &vertex->scc_entry);
- list_for_each_entry_reverse(vertex, &scc, scc_entry) {
+ list_for_each_entry_reverse(v, &scc, scc_entry) {
/* Don't restart DFS from this vertex in unix_walk_scc(). */
- list_move_tail(&vertex->entry, &unix_visited_vertices);
+ list_move_tail(&v->entry, &unix_visited_vertices);
/* Mark vertex as off-stack. */
- vertex->index = unix_vertex_grouped_index;
+ v->index = unix_vertex_grouped_index;
if (scc_dead)
- scc_dead = unix_vertex_dead(vertex);
+ scc_dead = unix_vertex_dead(v);
}
if (scc_dead)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index ed062e038389..7e16336044b2 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -370,22 +370,23 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
{
- struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list();
int err;
err = xsk_rcv(xs, xdp);
if (err)
return err;
- if (!xs->flush_node.prev)
+ if (!xs->flush_node.prev) {
+ struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list();
+
list_add(&xs->flush_node, flush_list);
+ }
return 0;
}
-void __xsk_map_flush(void)
+void __xsk_map_flush(struct list_head *flush_list)
{
- struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list();
struct xdp_sock *xs, *tmp;
list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
@@ -394,16 +395,6 @@ void __xsk_map_flush(void)
}
}
-#ifdef CONFIG_DEBUG_NET
-bool xsk_map_check_flush(void)
-{
- if (list_empty(bpf_net_ctx_get_xskmap_flush_list()))
- return false;
- __xsk_map_flush();
- return true;
-}
-#endif
-
void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries)
{
xskq_prod_submit_n(pool->cq, nb_entries);
diff --git a/scripts/Makefile.btf b/scripts/Makefile.btf
index bca8a8f26ea4..b75f09f3f424 100644
--- a/scripts/Makefile.btf
+++ b/scripts/Makefile.btf
@@ -19,10 +19,15 @@ pahole-flags-$(call test-ge, $(pahole-ver), 125) += --skip_encoding_btf_inconsis
else
# Switch to using --btf_features for v1.26 and later.
-pahole-flags-$(call test-ge, $(pahole-ver), 126) = -j --btf_features=encode_force,var,float,enum64,decl_tag,type_tag,optimized_func,consistent_func
+pahole-flags-$(call test-ge, $(pahole-ver), 126) = -j --btf_features=encode_force,var,float,enum64,decl_tag,type_tag,optimized_func,consistent_func,decl_tag_kfuncs
+
+ifneq ($(KBUILD_EXTMOD),)
+module-pahole-flags-$(call test-ge, $(pahole-ver), 126) += --btf_features=distilled_base
+endif
endif
pahole-flags-$(CONFIG_PAHOLE_HAS_LANG_EXCLUDE) += --lang_exclude=rust
export PAHOLE_FLAGS := $(pahole-flags-y)
+export MODULE_PAHOLE_FLAGS := $(module-pahole-flags-y)
diff --git a/scripts/Makefile.dtbinst b/scripts/Makefile.dtbinst
index 67956f6496a5..9d920419a62c 100644
--- a/scripts/Makefile.dtbinst
+++ b/scripts/Makefile.dtbinst
@@ -17,7 +17,7 @@ include $(srctree)/scripts/Kbuild.include
dst := $(INSTALL_DTBS_PATH)
quiet_cmd_dtb_install = INSTALL $@
- cmd_dtb_install = install -D $< $@
+ cmd_dtb_install = install -D -m 0644 $< $@
$(dst)/%: $(obj)/%
$(call cmd,dtb_install)
diff --git a/scripts/Makefile.host b/scripts/Makefile.host
index d35f55e0d141..e85be7721a48 100644
--- a/scripts/Makefile.host
+++ b/scripts/Makefile.host
@@ -146,7 +146,7 @@ $(call multi_depend, $(host-cxxmulti), , -objs -cxxobjs)
# Create .o file from a single .cc (C++) file
quiet_cmd_host-cxxobjs = HOSTCXX $@
cmd_host-cxxobjs = $(HOSTCXX) $(hostcxx_flags) -c -o $@ $<
-$(host-cxxobjs): $(obj)/%.o: $(src)/%.cc FORCE
+$(host-cxxobjs): $(obj)/%.o: $(obj)/%.cc FORCE
$(call if_changed_dep,host-cxxobjs)
# Create executable from a single Rust crate (which may consist of
diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal
index 3bec9043e4f3..1fa98b5e952b 100644
--- a/scripts/Makefile.modfinal
+++ b/scripts/Makefile.modfinal
@@ -41,7 +41,7 @@ quiet_cmd_btf_ko = BTF [M] $@
if [ ! -f vmlinux ]; then \
printf "Skipping BTF generation for %s due to unavailability of vmlinux\n" $@ 1>&2; \
else \
- LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J $(PAHOLE_FLAGS) --btf_base vmlinux $@; \
+ LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J $(PAHOLE_FLAGS) $(MODULE_PAHOLE_FLAGS) --btf_base vmlinux $@; \
$(RESOLVE_BTFIDS) -b vmlinux $@; \
fi;
diff --git a/scripts/Makefile.package b/scripts/Makefile.package
index 38653f3e8108..bf016af8bf8a 100644
--- a/scripts/Makefile.package
+++ b/scripts/Makefile.package
@@ -103,7 +103,7 @@ debian-orig: private version = $(shell dpkg-parsechangelog -S Version | sed 's/-
debian-orig: private orig-name = $(source)_$(version).orig.tar$(debian-orig-suffix)
debian-orig: mkdebian-opts = --need-source
debian-orig: linux.tar$(debian-orig-suffix) debian
- $(Q)if [ "$(df --output=target .. 2>/dev/null)" = "$(df --output=target $< 2>/dev/null)" ]; then \
+ $(Q)if [ "$$(df --output=target .. 2>/dev/null)" = "$$(df --output=target $< 2>/dev/null)" ]; then \
ln -f $< ../$(orig-name); \
else \
cp $< ../$(orig-name); \
diff --git a/scripts/gdb/linux/Makefile b/scripts/gdb/linux/Makefile
index fd1402c0a1a1..fcd32fcf3ae0 100644
--- a/scripts/gdb/linux/Makefile
+++ b/scripts/gdb/linux/Makefile
@@ -5,7 +5,7 @@ ifdef building_out_of_srctree
symlinks := $(patsubst $(src)/%,%,$(wildcard $(src)/*.py))
quiet_cmd_symlink = SYMLINK $@
- cmd_symlink = ln -fsn $(patsubst $(obj)/%,$(src)/%,$@) $@
+ cmd_symlink = ln -fsn $(patsubst $(obj)/%,$(abspath $(src))/%,$@) $@
always-y += $(symlinks)
$(addprefix $(obj)/, $(symlinks)): FORCE
diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec
index e095eb1e290e..fffc8af8deb1 100644
--- a/scripts/package/kernel.spec
+++ b/scripts/package/kernel.spec
@@ -57,7 +57,8 @@ patch -p1 < %{SOURCE2}
%install
mkdir -p %{buildroot}/lib/modules/%{KERNELRELEASE}
cp $(%{make} %{makeflags} -s image_name) %{buildroot}/lib/modules/%{KERNELRELEASE}/vmlinuz
-%{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} modules_install
+# DEPMOD=true makes depmod no-op. We do not package depmod-generated files.
+%{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} DEPMOD=true modules_install
%{make} %{makeflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install
cp System.map %{buildroot}/lib/modules/%{KERNELRELEASE}
cp .config %{buildroot}/lib/modules/%{KERNELRELEASE}/config
@@ -70,10 +71,7 @@ ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEA
%endif
{
- for x in System.map config kernel modules.builtin \
- modules.builtin.modinfo modules.order vmlinuz; do
- echo "/lib/modules/%{KERNELRELEASE}/${x}"
- done
+ echo "/lib/modules/%{KERNELRELEASE}"
for x in alias alias.bin builtin.alias.bin builtin.bin dep dep.bin \
devname softdep symbols symbols.bin; do
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index abdd22007ed8..e4a79a9b2d58 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -427,8 +427,6 @@ static void __init remove_securityfs_measurement_lists(struct dentry **lists)
kfree(lists);
}
-
- securityfs_measurement_list_count = 0;
}
static int __init create_securityfs_measurement_lists(void)
@@ -625,6 +623,7 @@ out:
securityfs_remove(binary_runtime_measurements);
remove_securityfs_measurement_lists(ascii_securityfs_measurement_lists);
remove_securityfs_measurement_lists(binary_securityfs_measurement_lists);
+ securityfs_measurement_list_count = 0;
securityfs_remove(ima_symlink);
securityfs_remove(ima_dir);
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index af047dedde38..6789c7a4d5ca 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -20,6 +20,8 @@
#include "json_writer.h"
#include "main.h"
+#define KFUNC_DECL_TAG "bpf_kfunc"
+
static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_UNKN] = "UNKNOWN",
[BTF_KIND_INT] = "INT",
@@ -461,6 +463,49 @@ static int dump_btf_raw(const struct btf *btf,
return 0;
}
+static int dump_btf_kfuncs(struct btf_dump *d, const struct btf *btf)
+{
+ LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts);
+ int cnt = btf__type_cnt(btf);
+ int i;
+
+ printf("\n/* BPF kfuncs */\n");
+ printf("#ifndef BPF_NO_KFUNC_PROTOTYPES\n");
+
+ for (i = 1; i < cnt; i++) {
+ const struct btf_type *t = btf__type_by_id(btf, i);
+ const char *name;
+ int err;
+
+ if (!btf_is_decl_tag(t))
+ continue;
+
+ if (btf_decl_tag(t)->component_idx != -1)
+ continue;
+
+ name = btf__name_by_offset(btf, t->name_off);
+ if (strncmp(name, KFUNC_DECL_TAG, sizeof(KFUNC_DECL_TAG)))
+ continue;
+
+ t = btf__type_by_id(btf, t->type);
+ if (!btf_is_func(t))
+ continue;
+
+ printf("extern ");
+
+ opts.field_name = btf__name_by_offset(btf, t->name_off);
+ err = btf_dump__emit_type_decl(d, t->type, &opts);
+ if (err)
+ return err;
+
+ printf(" __weak __ksym;\n");
+ }
+
+ printf("#endif\n\n");
+
+ return 0;
+}
+
static void __printf(2, 0) btf_dump_printf(void *ctx,
const char *fmt, va_list args)
{
@@ -596,6 +641,12 @@ static int dump_btf_c(const struct btf *btf,
printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n");
printf("#pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)\n");
printf("#endif\n\n");
+ printf("#ifndef __ksym\n");
+ printf("#define __ksym __attribute__((section(\".ksyms\")))\n");
+ printf("#endif\n\n");
+ printf("#ifndef __weak\n");
+ printf("#define __weak __attribute__((weak))\n");
+ printf("#endif\n\n");
if (root_type_cnt) {
for (i = 0; i < root_type_cnt; i++) {
@@ -615,6 +666,10 @@ static int dump_btf_c(const struct btf *btf,
if (err)
goto done;
}
+
+ err = dump_btf_kfuncs(d, btf);
+ if (err)
+ goto done;
}
printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n");
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index af6898c0f388..9af426d43299 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -19,6 +19,38 @@
#include "main.h"
+static const int cgroup_attach_types[] = {
+ BPF_CGROUP_INET_INGRESS,
+ BPF_CGROUP_INET_EGRESS,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ BPF_CGROUP_INET_SOCK_RELEASE,
+ BPF_CGROUP_INET4_BIND,
+ BPF_CGROUP_INET6_BIND,
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_INET4_CONNECT,
+ BPF_CGROUP_INET6_CONNECT,
+ BPF_CGROUP_UNIX_CONNECT,
+ BPF_CGROUP_INET4_GETPEERNAME,
+ BPF_CGROUP_INET6_GETPEERNAME,
+ BPF_CGROUP_UNIX_GETPEERNAME,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ BPF_CGROUP_UNIX_GETSOCKNAME,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UNIX_SENDMSG,
+ BPF_CGROUP_UDP4_RECVMSG,
+ BPF_CGROUP_UDP6_RECVMSG,
+ BPF_CGROUP_UNIX_RECVMSG,
+ BPF_CGROUP_SOCK_OPS,
+ BPF_CGROUP_DEVICE,
+ BPF_CGROUP_SYSCTL,
+ BPF_CGROUP_GETSOCKOPT,
+ BPF_CGROUP_SETSOCKOPT,
+ BPF_LSM_CGROUP
+};
+
#define HELP_SPEC_ATTACH_FLAGS \
"ATTACH_FLAGS := { multi | override }"
@@ -183,13 +215,13 @@ static int count_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
static int cgroup_has_attached_progs(int cgroup_fd)
{
- enum bpf_attach_type type;
+ unsigned int i = 0;
bool no_prog = true;
- for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
- int count = count_attached_bpf_progs(cgroup_fd, type);
+ for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++) {
+ int count = count_attached_bpf_progs(cgroup_fd, cgroup_attach_types[i]);
- if (count < 0 && errno != EINVAL)
+ if (count < 0)
return -1;
if (count > 0) {
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 4a4eedfcd479..51eaed76db97 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -1272,6 +1272,8 @@ static int do_skeleton(int argc, char **argv)
#include <stdlib.h> \n\
#include <bpf/libbpf.h> \n\
\n\
+ #define BPF_SKEL_SUPPORTS_MAP_AUTO_ATTACH 1 \n\
+ \n\
struct %1$s { \n\
struct bpf_object_skeleton *skeleton; \n\
struct bpf_object *obj; \n\
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 1a501cf09e78..40ea743d139f 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -1813,6 +1813,10 @@ offload_dev:
}
if (pinmaps) {
+ err = create_and_mount_bpffs_dir(pinmaps);
+ if (err)
+ goto err_unpin;
+
err = bpf_object__pin_maps(obj, pinmaps);
if (err) {
p_err("failed to pin all maps");
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index af393c7dee1f..936ef95c3d32 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -409,6 +409,14 @@ static int elf_collect(struct object *obj)
obj->efile.idlist = data;
obj->efile.idlist_shndx = idx;
obj->efile.idlist_addr = sh.sh_addr;
+ } else if (!strcmp(name, BTF_BASE_ELF_SEC)) {
+ /* If a .BTF.base section is found, do not resolve
+ * BTF ids relative to vmlinux; resolve relative
+ * to the .BTF.base section instead. btf__parse_split()
+ * will take care of this once the base BTF it is
+ * passed is NULL.
+ */
+ obj->base_btf_path = NULL;
}
if (compressed_section_fix(elf, scn, &sh))
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 25ea393cf084..35bcf52dbc65 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1425,6 +1425,8 @@ enum {
#define BPF_F_TEST_RUN_ON_CPU (1U << 0)
/* If set, XDP frames will be transmitted after processing */
#define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1)
+/* If set, apply CHECKSUM_COMPLETE to skb and validate the checksum */
+#define BPF_F_TEST_SKB_CHECKSUM_COMPLETE (1U << 2)
/* type for BPF_ENABLE_STATS */
enum bpf_stats_type {
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index b6619199a706..e2cd558ca0b4 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,4 +1,4 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
- usdt.o zip.o elf.o features.o
+ usdt.o zip.o elf.o features.o btf_iter.o btf_relocate.o
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 775ca55a541c..32c00db3b91b 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -116,6 +116,9 @@ struct btf {
/* whether strings are already deduplicated */
bool strs_deduped;
+ /* whether base_btf should be freed in btf_free for this instance */
+ bool owns_base;
+
/* BTF object FD, if loaded into kernel */
int fd;
@@ -598,7 +601,7 @@ static int btf_sanity_check(const struct btf *btf)
__u32 i, n = btf__type_cnt(btf);
int err;
- for (i = 1; i < n; i++) {
+ for (i = btf->start_id; i < n; i++) {
t = btf_type_by_id(btf, i);
err = btf_validate_type(btf, t, i);
if (err)
@@ -969,6 +972,8 @@ void btf__free(struct btf *btf)
free(btf->raw_data);
free(btf->raw_data_swapped);
free(btf->type_offs);
+ if (btf->owns_base)
+ btf__free(btf->base_btf);
free(btf);
}
@@ -1084,53 +1089,38 @@ struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf)
return libbpf_ptr(btf_new(data, size, base_btf));
}
-static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
- struct btf_ext **btf_ext)
+struct btf_elf_secs {
+ Elf_Data *btf_data;
+ Elf_Data *btf_ext_data;
+ Elf_Data *btf_base_data;
+};
+
+static int btf_find_elf_sections(Elf *elf, const char *path, struct btf_elf_secs *secs)
{
- Elf_Data *btf_data = NULL, *btf_ext_data = NULL;
- int err = 0, fd = -1, idx = 0;
- struct btf *btf = NULL;
Elf_Scn *scn = NULL;
- Elf *elf = NULL;
+ Elf_Data *data;
GElf_Ehdr ehdr;
size_t shstrndx;
+ int idx = 0;
- if (elf_version(EV_CURRENT) == EV_NONE) {
- pr_warn("failed to init libelf for %s\n", path);
- return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
- }
-
- fd = open(path, O_RDONLY | O_CLOEXEC);
- if (fd < 0) {
- err = -errno;
- pr_warn("failed to open %s: %s\n", path, strerror(errno));
- return ERR_PTR(err);
- }
-
- err = -LIBBPF_ERRNO__FORMAT;
-
- elf = elf_begin(fd, ELF_C_READ, NULL);
- if (!elf) {
- pr_warn("failed to open %s as ELF file\n", path);
- goto done;
- }
if (!gelf_getehdr(elf, &ehdr)) {
pr_warn("failed to get EHDR from %s\n", path);
- goto done;
+ goto err;
}
if (elf_getshdrstrndx(elf, &shstrndx)) {
pr_warn("failed to get section names section index for %s\n",
path);
- goto done;
+ goto err;
}
if (!elf_rawdata(elf_getscn(elf, shstrndx), NULL)) {
pr_warn("failed to get e_shstrndx from %s\n", path);
- goto done;
+ goto err;
}
while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ Elf_Data **field;
GElf_Shdr sh;
char *name;
@@ -1138,42 +1128,102 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
if (gelf_getshdr(scn, &sh) != &sh) {
pr_warn("failed to get section(%d) header from %s\n",
idx, path);
- goto done;
+ goto err;
}
name = elf_strptr(elf, shstrndx, sh.sh_name);
if (!name) {
pr_warn("failed to get section(%d) name from %s\n",
idx, path);
- goto done;
+ goto err;
}
- if (strcmp(name, BTF_ELF_SEC) == 0) {
- btf_data = elf_getdata(scn, 0);
- if (!btf_data) {
- pr_warn("failed to get section(%d, %s) data from %s\n",
- idx, name, path);
- goto done;
- }
- continue;
- } else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) {
- btf_ext_data = elf_getdata(scn, 0);
- if (!btf_ext_data) {
- pr_warn("failed to get section(%d, %s) data from %s\n",
- idx, name, path);
- goto done;
- }
+
+ if (strcmp(name, BTF_ELF_SEC) == 0)
+ field = &secs->btf_data;
+ else if (strcmp(name, BTF_EXT_ELF_SEC) == 0)
+ field = &secs->btf_ext_data;
+ else if (strcmp(name, BTF_BASE_ELF_SEC) == 0)
+ field = &secs->btf_base_data;
+ else
continue;
+
+ data = elf_getdata(scn, 0);
+ if (!data) {
+ pr_warn("failed to get section(%d, %s) data from %s\n",
+ idx, name, path);
+ goto err;
}
+ *field = data;
+ }
+
+ return 0;
+
+err:
+ return -LIBBPF_ERRNO__FORMAT;
+}
+
+static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
+ struct btf_ext **btf_ext)
+{
+ struct btf_elf_secs secs = {};
+ struct btf *dist_base_btf = NULL;
+ struct btf *btf = NULL;
+ int err = 0, fd = -1;
+ Elf *elf = NULL;
+
+ if (elf_version(EV_CURRENT) == EV_NONE) {
+ pr_warn("failed to init libelf for %s\n", path);
+ return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
+ }
+
+ fd = open(path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0) {
+ err = -errno;
+ pr_warn("failed to open %s: %s\n", path, strerror(errno));
+ return ERR_PTR(err);
+ }
+
+ elf = elf_begin(fd, ELF_C_READ, NULL);
+ if (!elf) {
+ pr_warn("failed to open %s as ELF file\n", path);
+ goto done;
}
- if (!btf_data) {
+ err = btf_find_elf_sections(elf, path, &secs);
+ if (err)
+ goto done;
+
+ if (!secs.btf_data) {
pr_warn("failed to find '%s' ELF section in %s\n", BTF_ELF_SEC, path);
err = -ENODATA;
goto done;
}
- btf = btf_new(btf_data->d_buf, btf_data->d_size, base_btf);
- err = libbpf_get_error(btf);
- if (err)
+
+ if (secs.btf_base_data) {
+ dist_base_btf = btf_new(secs.btf_base_data->d_buf, secs.btf_base_data->d_size,
+ NULL);
+ if (IS_ERR(dist_base_btf)) {
+ err = PTR_ERR(dist_base_btf);
+ dist_base_btf = NULL;
+ goto done;
+ }
+ }
+
+ btf = btf_new(secs.btf_data->d_buf, secs.btf_data->d_size,
+ dist_base_btf ?: base_btf);
+ if (IS_ERR(btf)) {
+ err = PTR_ERR(btf);
goto done;
+ }
+ if (dist_base_btf && base_btf) {
+ err = btf__relocate(btf, base_btf);
+ if (err)
+ goto done;
+ btf__free(dist_base_btf);
+ dist_base_btf = NULL;
+ }
+
+ if (dist_base_btf)
+ btf->owns_base = true;
switch (gelf_getclass(elf)) {
case ELFCLASS32:
@@ -1187,11 +1237,12 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
break;
}
- if (btf_ext && btf_ext_data) {
- *btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
- err = libbpf_get_error(*btf_ext);
- if (err)
+ if (btf_ext && secs.btf_ext_data) {
+ *btf_ext = btf_ext__new(secs.btf_ext_data->d_buf, secs.btf_ext_data->d_size);
+ if (IS_ERR(*btf_ext)) {
+ err = PTR_ERR(*btf_ext);
goto done;
+ }
} else if (btf_ext) {
*btf_ext = NULL;
}
@@ -1205,6 +1256,7 @@ done:
if (btf_ext)
btf_ext__free(*btf_ext);
+ btf__free(dist_base_btf);
btf__free(btf);
return ERR_PTR(err);
@@ -1770,9 +1822,8 @@ static int btf_rewrite_str(struct btf_pipe *p, __u32 *str_off)
return 0;
}
-int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type)
+static int btf_add_type(struct btf_pipe *p, const struct btf_type *src_type)
{
- struct btf_pipe p = { .src = src_btf, .dst = btf };
struct btf_field_iter it;
struct btf_type *t;
__u32 *str_off;
@@ -1783,10 +1834,10 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t
return libbpf_err(sz);
/* deconstruct BTF, if necessary, and invalidate raw_data */
- if (btf_ensure_modifiable(btf))
+ if (btf_ensure_modifiable(p->dst))
return libbpf_err(-ENOMEM);
- t = btf_add_type_mem(btf, sz);
+ t = btf_add_type_mem(p->dst, sz);
if (!t)
return libbpf_err(-ENOMEM);
@@ -1797,12 +1848,19 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t
return libbpf_err(err);
while ((str_off = btf_field_iter_next(&it))) {
- err = btf_rewrite_str(&p, str_off);
+ err = btf_rewrite_str(p, str_off);
if (err)
return libbpf_err(err);
}
- return btf_commit_type(btf, sz);
+ return btf_commit_type(p->dst, sz);
+}
+
+int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type)
+{
+ struct btf_pipe p = { .src = src_btf, .dst = btf };
+
+ return btf_add_type(&p, src_type);
}
static size_t btf_dedup_identity_hash_fn(long key, void *ctx);
@@ -5035,168 +5093,6 @@ struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_bt
return btf__parse_split(path, vmlinux_btf);
}
-int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t, enum btf_field_iter_kind iter_kind)
-{
- it->p = NULL;
- it->m_idx = -1;
- it->off_idx = 0;
- it->vlen = 0;
-
- switch (iter_kind) {
- case BTF_FIELD_ITER_IDS:
- switch (btf_kind(t)) {
- case BTF_KIND_UNKN:
- case BTF_KIND_INT:
- case BTF_KIND_FLOAT:
- case BTF_KIND_ENUM:
- case BTF_KIND_ENUM64:
- it->desc = (struct btf_field_desc) {};
- break;
- case BTF_KIND_FWD:
- case BTF_KIND_CONST:
- case BTF_KIND_VOLATILE:
- case BTF_KIND_RESTRICT:
- case BTF_KIND_PTR:
- case BTF_KIND_TYPEDEF:
- case BTF_KIND_FUNC:
- case BTF_KIND_VAR:
- case BTF_KIND_DECL_TAG:
- case BTF_KIND_TYPE_TAG:
- it->desc = (struct btf_field_desc) { 1, {offsetof(struct btf_type, type)} };
- break;
- case BTF_KIND_ARRAY:
- it->desc = (struct btf_field_desc) {
- 2, {sizeof(struct btf_type) + offsetof(struct btf_array, type),
- sizeof(struct btf_type) + offsetof(struct btf_array, index_type)}
- };
- break;
- case BTF_KIND_STRUCT:
- case BTF_KIND_UNION:
- it->desc = (struct btf_field_desc) {
- 0, {},
- sizeof(struct btf_member),
- 1, {offsetof(struct btf_member, type)}
- };
- break;
- case BTF_KIND_FUNC_PROTO:
- it->desc = (struct btf_field_desc) {
- 1, {offsetof(struct btf_type, type)},
- sizeof(struct btf_param),
- 1, {offsetof(struct btf_param, type)}
- };
- break;
- case BTF_KIND_DATASEC:
- it->desc = (struct btf_field_desc) {
- 0, {},
- sizeof(struct btf_var_secinfo),
- 1, {offsetof(struct btf_var_secinfo, type)}
- };
- break;
- default:
- return -EINVAL;
- }
- break;
- case BTF_FIELD_ITER_STRS:
- switch (btf_kind(t)) {
- case BTF_KIND_UNKN:
- it->desc = (struct btf_field_desc) {};
- break;
- case BTF_KIND_INT:
- case BTF_KIND_FLOAT:
- case BTF_KIND_FWD:
- case BTF_KIND_ARRAY:
- case BTF_KIND_CONST:
- case BTF_KIND_VOLATILE:
- case BTF_KIND_RESTRICT:
- case BTF_KIND_PTR:
- case BTF_KIND_TYPEDEF:
- case BTF_KIND_FUNC:
- case BTF_KIND_VAR:
- case BTF_KIND_DECL_TAG:
- case BTF_KIND_TYPE_TAG:
- case BTF_KIND_DATASEC:
- it->desc = (struct btf_field_desc) {
- 1, {offsetof(struct btf_type, name_off)}
- };
- break;
- case BTF_KIND_ENUM:
- it->desc = (struct btf_field_desc) {
- 1, {offsetof(struct btf_type, name_off)},
- sizeof(struct btf_enum),
- 1, {offsetof(struct btf_enum, name_off)}
- };
- break;
- case BTF_KIND_ENUM64:
- it->desc = (struct btf_field_desc) {
- 1, {offsetof(struct btf_type, name_off)},
- sizeof(struct btf_enum64),
- 1, {offsetof(struct btf_enum64, name_off)}
- };
- break;
- case BTF_KIND_STRUCT:
- case BTF_KIND_UNION:
- it->desc = (struct btf_field_desc) {
- 1, {offsetof(struct btf_type, name_off)},
- sizeof(struct btf_member),
- 1, {offsetof(struct btf_member, name_off)}
- };
- break;
- case BTF_KIND_FUNC_PROTO:
- it->desc = (struct btf_field_desc) {
- 1, {offsetof(struct btf_type, name_off)},
- sizeof(struct btf_param),
- 1, {offsetof(struct btf_param, name_off)}
- };
- break;
- default:
- return -EINVAL;
- }
- break;
- default:
- return -EINVAL;
- }
-
- if (it->desc.m_sz)
- it->vlen = btf_vlen(t);
-
- it->p = t;
- return 0;
-}
-
-__u32 *btf_field_iter_next(struct btf_field_iter *it)
-{
- if (!it->p)
- return NULL;
-
- if (it->m_idx < 0) {
- if (it->off_idx < it->desc.t_off_cnt)
- return it->p + it->desc.t_offs[it->off_idx++];
- /* move to per-member iteration */
- it->m_idx = 0;
- it->p += sizeof(struct btf_type);
- it->off_idx = 0;
- }
-
- /* if type doesn't have members, stop */
- if (it->desc.m_sz == 0) {
- it->p = NULL;
- return NULL;
- }
-
- if (it->off_idx >= it->desc.m_off_cnt) {
- /* exhausted this member's fields, go to the next member */
- it->m_idx++;
- it->p += it->desc.m_sz;
- it->off_idx = 0;
- }
-
- if (it->m_idx < it->vlen)
- return it->p + it->desc.m_offs[it->off_idx++];
-
- it->p = NULL;
- return NULL;
-}
-
int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx)
{
const struct btf_ext_info *seg;
@@ -5276,3 +5172,325 @@ int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void
return 0;
}
+
+struct btf_distill {
+ struct btf_pipe pipe;
+ int *id_map;
+ unsigned int split_start_id;
+ unsigned int split_start_str;
+ int diff_id;
+};
+
+static int btf_add_distilled_type_ids(struct btf_distill *dist, __u32 i)
+{
+ struct btf_type *split_t = btf_type_by_id(dist->pipe.src, i);
+ struct btf_field_iter it;
+ __u32 *id;
+ int err;
+
+ err = btf_field_iter_init(&it, split_t, BTF_FIELD_ITER_IDS);
+ if (err)
+ return err;
+ while ((id = btf_field_iter_next(&it))) {
+ struct btf_type *base_t;
+
+ if (!*id)
+ continue;
+ /* split BTF id, not needed */
+ if (*id >= dist->split_start_id)
+ continue;
+ /* already added ? */
+ if (dist->id_map[*id] > 0)
+ continue;
+
+ /* only a subset of base BTF types should be referenced from
+ * split BTF; ensure nothing unexpected is referenced.
+ */
+ base_t = btf_type_by_id(dist->pipe.src, *id);
+ switch (btf_kind(base_t)) {
+ case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_FWD:
+ case BTF_KIND_ARRAY:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
+ case BTF_KIND_PTR:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_FUNC_PROTO:
+ case BTF_KIND_TYPE_TAG:
+ dist->id_map[*id] = *id;
+ break;
+ default:
+ pr_warn("unexpected reference to base type[%u] of kind [%u] when creating distilled base BTF.\n",
+ *id, btf_kind(base_t));
+ return -EINVAL;
+ }
+ /* If a base type is used, ensure types it refers to are
+ * marked as used also; so for example if we find a PTR to INT
+ * we need both the PTR and INT.
+ *
+ * The only exception is named struct/unions, since distilled
+ * base BTF composite types have no members.
+ */
+ if (btf_is_composite(base_t) && base_t->name_off)
+ continue;
+ err = btf_add_distilled_type_ids(dist, *id);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int btf_add_distilled_types(struct btf_distill *dist)
+{
+ bool adding_to_base = dist->pipe.dst->start_id == 1;
+ int id = btf__type_cnt(dist->pipe.dst);
+ struct btf_type *t;
+ int i, err = 0;
+
+
+ /* Add types for each of the required references to either distilled
+ * base or split BTF, depending on type characteristics.
+ */
+ for (i = 1; i < dist->split_start_id; i++) {
+ const char *name;
+ int kind;
+
+ if (!dist->id_map[i])
+ continue;
+ t = btf_type_by_id(dist->pipe.src, i);
+ kind = btf_kind(t);
+ name = btf__name_by_offset(dist->pipe.src, t->name_off);
+
+ switch (kind) {
+ case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_FWD:
+ /* Named int, float, fwd are added to base. */
+ if (!adding_to_base)
+ continue;
+ err = btf_add_type(&dist->pipe, t);
+ break;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ /* Named struct/union are added to base as 0-vlen
+ * struct/union of same size. Anonymous struct/unions
+ * are added to split BTF as-is.
+ */
+ if (adding_to_base) {
+ if (!t->name_off)
+ continue;
+ err = btf_add_composite(dist->pipe.dst, kind, name, t->size);
+ } else {
+ if (t->name_off)
+ continue;
+ err = btf_add_type(&dist->pipe, t);
+ }
+ break;
+ case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
+ /* Named enum[64]s are added to base as a sized
+ * enum; relocation will match with appropriately-named
+ * and sized enum or enum64.
+ *
+ * Anonymous enums are added to split BTF as-is.
+ */
+ if (adding_to_base) {
+ if (!t->name_off)
+ continue;
+ err = btf__add_enum(dist->pipe.dst, name, t->size);
+ } else {
+ if (t->name_off)
+ continue;
+ err = btf_add_type(&dist->pipe, t);
+ }
+ break;
+ case BTF_KIND_ARRAY:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_PTR:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_FUNC_PROTO:
+ case BTF_KIND_TYPE_TAG:
+ /* All other types are added to split BTF. */
+ if (adding_to_base)
+ continue;
+ err = btf_add_type(&dist->pipe, t);
+ break;
+ default:
+ pr_warn("unexpected kind when adding base type '%s'[%u] of kind [%u] to distilled base BTF.\n",
+ name, i, kind);
+ return -EINVAL;
+
+ }
+ if (err < 0)
+ break;
+ dist->id_map[i] = id++;
+ }
+ return err;
+}
+
+/* Split BTF ids without a mapping will be shifted downwards since distilled
+ * base BTF is smaller than the original base BTF. For those that have a
+ * mapping (either to base or updated split BTF), update the id based on
+ * that mapping.
+ */
+static int btf_update_distilled_type_ids(struct btf_distill *dist, __u32 i)
+{
+ struct btf_type *t = btf_type_by_id(dist->pipe.dst, i);
+ struct btf_field_iter it;
+ __u32 *id;
+ int err;
+
+ err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS);
+ if (err)
+ return err;
+ while ((id = btf_field_iter_next(&it))) {
+ if (dist->id_map[*id])
+ *id = dist->id_map[*id];
+ else if (*id >= dist->split_start_id)
+ *id -= dist->diff_id;
+ }
+ return 0;
+}
+
+/* Create updated split BTF with distilled base BTF; distilled base BTF
+ * consists of BTF information required to clarify the types that split
+ * BTF refers to, omitting unneeded details. Specifically it will contain
+ * base types and memberless definitions of named structs, unions and enumerated
+ * types. Associated reference types like pointers, arrays and anonymous
+ * structs, unions and enumerated types will be added to split BTF.
+ * Size is recorded for named struct/unions to help guide matching to the
+ * target base BTF during later relocation.
+ *
+ * The only case where structs, unions or enumerated types are fully represented
+ * is when they are anonymous; in such cases, the anonymous type is added to
+ * split BTF in full.
+ *
+ * We return newly-created split BTF where the split BTF refers to a newly-created
+ * distilled base BTF. Both must be freed separately by the caller.
+ */
+int btf__distill_base(const struct btf *src_btf, struct btf **new_base_btf,
+ struct btf **new_split_btf)
+{
+ struct btf *new_base = NULL, *new_split = NULL;
+ const struct btf *old_base;
+ unsigned int n = btf__type_cnt(src_btf);
+ struct btf_distill dist = {};
+ struct btf_type *t;
+ int i, err = 0;
+
+ /* src BTF must be split BTF. */
+ old_base = btf__base_btf(src_btf);
+ if (!new_base_btf || !new_split_btf || !old_base)
+ return libbpf_err(-EINVAL);
+
+ new_base = btf__new_empty();
+ if (!new_base)
+ return libbpf_err(-ENOMEM);
+ dist.id_map = calloc(n, sizeof(*dist.id_map));
+ if (!dist.id_map) {
+ err = -ENOMEM;
+ goto done;
+ }
+ dist.pipe.src = src_btf;
+ dist.pipe.dst = new_base;
+ dist.pipe.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
+ if (IS_ERR(dist.pipe.str_off_map)) {
+ err = -ENOMEM;
+ goto done;
+ }
+ dist.split_start_id = btf__type_cnt(old_base);
+ dist.split_start_str = old_base->hdr->str_len;
+
+ /* Pass over src split BTF; generate the list of base BTF type ids it
+ * references; these will constitute our distilled BTF set to be
+ * distributed over base and split BTF as appropriate.
+ */
+ for (i = src_btf->start_id; i < n; i++) {
+ err = btf_add_distilled_type_ids(&dist, i);
+ if (err < 0)
+ goto done;
+ }
+ /* Next add types for each of the required references to base BTF and split BTF
+ * in turn.
+ */
+ err = btf_add_distilled_types(&dist);
+ if (err < 0)
+ goto done;
+
+ /* Create new split BTF with distilled base BTF as its base; the final
+ * state is split BTF with distilled base BTF that represents enough
+ * about its base references to allow it to be relocated with the base
+ * BTF available.
+ */
+ new_split = btf__new_empty_split(new_base);
+ if (!new_split) {
+ err = -errno;
+ goto done;
+ }
+ dist.pipe.dst = new_split;
+ /* First add all split types */
+ for (i = src_btf->start_id; i < n; i++) {
+ t = btf_type_by_id(src_btf, i);
+ err = btf_add_type(&dist.pipe, t);
+ if (err < 0)
+ goto done;
+ }
+ /* Now add distilled types to split BTF that are not added to base. */
+ err = btf_add_distilled_types(&dist);
+ if (err < 0)
+ goto done;
+
+ /* All split BTF ids will be shifted downwards since there are less base
+ * BTF ids in distilled base BTF.
+ */
+ dist.diff_id = dist.split_start_id - btf__type_cnt(new_base);
+
+ n = btf__type_cnt(new_split);
+ /* Now update base/split BTF ids. */
+ for (i = 1; i < n; i++) {
+ err = btf_update_distilled_type_ids(&dist, i);
+ if (err < 0)
+ break;
+ }
+done:
+ free(dist.id_map);
+ hashmap__free(dist.pipe.str_off_map);
+ if (err) {
+ btf__free(new_split);
+ btf__free(new_base);
+ return libbpf_err(err);
+ }
+ *new_base_btf = new_base;
+ *new_split_btf = new_split;
+
+ return 0;
+}
+
+const struct btf_header *btf_header(const struct btf *btf)
+{
+ return btf->hdr;
+}
+
+void btf_set_base_btf(struct btf *btf, const struct btf *base_btf)
+{
+ btf->base_btf = (struct btf *)base_btf;
+ btf->start_id = btf__type_cnt(base_btf);
+ btf->start_str_off = base_btf->hdr->str_len;
+}
+
+int btf__relocate(struct btf *btf, const struct btf *base_btf)
+{
+ int err = btf_relocate(btf, base_btf, NULL);
+
+ if (!err)
+ btf->owns_base = false;
+ return libbpf_err(err);
+}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 8e6880d91c84..b68d216837a9 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -18,6 +18,7 @@ extern "C" {
#define BTF_ELF_SEC ".BTF"
#define BTF_EXT_ELF_SEC ".BTF.ext"
+#define BTF_BASE_ELF_SEC ".BTF.base"
#define MAPS_ELF_SEC ".maps"
struct btf;
@@ -107,6 +108,27 @@ LIBBPF_API struct btf *btf__new_empty(void);
*/
LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf);
+/**
+ * @brief **btf__distill_base()** creates new versions of the split BTF
+ * *src_btf* and its base BTF. The new base BTF will only contain the types
+ * needed to improve robustness of the split BTF to small changes in base BTF.
+ * When that split BTF is loaded against a (possibly changed) base, this
+ * distilled base BTF will help update references to that (possibly changed)
+ * base BTF.
+ *
+ * Both the new split and its associated new base BTF must be freed by
+ * the caller.
+ *
+ * If successful, 0 is returned and **new_base_btf** and **new_split_btf**
+ * will point at new base/split BTF. Both the new split and its associated
+ * new base BTF must be freed by the caller.
+ *
+ * A negative value is returned on error and the thread-local `errno` variable
+ * is set to the error code as well.
+ */
+LIBBPF_API int btf__distill_base(const struct btf *src_btf, struct btf **new_base_btf,
+ struct btf **new_split_btf);
+
LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext);
LIBBPF_API struct btf *btf__parse_split(const char *path, struct btf *base_btf);
LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext);
@@ -231,6 +253,20 @@ struct btf_dedup_opts {
LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts);
+/**
+ * @brief **btf__relocate()** will check the split BTF *btf* for references
+ * to base BTF kinds, and verify those references are compatible with
+ * *base_btf*; if they are, *btf* is adjusted such that is re-parented to
+ * *base_btf* and type ids and strings are adjusted to accommodate this.
+ *
+ * If successful, 0 is returned and **btf** now has **base_btf** as its
+ * base.
+ *
+ * A negative value is returned on error and the thread-local `errno` variable
+ * is set to the error code as well.
+ */
+LIBBPF_API int btf__relocate(struct btf *btf, const struct btf *base_btf);
+
struct btf_dump;
struct btf_dump_opts {
diff --git a/tools/lib/bpf/btf_iter.c b/tools/lib/bpf/btf_iter.c
new file mode 100644
index 000000000000..9a6c822c2294
--- /dev/null
+++ b/tools/lib/bpf/btf_iter.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2021 Facebook */
+/* Copyright (c) 2024, Oracle and/or its affiliates. */
+
+#ifdef __KERNEL__
+#include <linux/bpf.h>
+#include <linux/btf.h>
+
+#define btf_var_secinfos(t) (struct btf_var_secinfo *)btf_type_var_secinfo(t)
+
+#else
+#include "btf.h"
+#include "libbpf_internal.h"
+#endif
+
+int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t,
+ enum btf_field_iter_kind iter_kind)
+{
+ it->p = NULL;
+ it->m_idx = -1;
+ it->off_idx = 0;
+ it->vlen = 0;
+
+ switch (iter_kind) {
+ case BTF_FIELD_ITER_IDS:
+ switch (btf_kind(t)) {
+ case BTF_KIND_UNKN:
+ case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
+ it->desc = (struct btf_field_desc) {};
+ break;
+ case BTF_KIND_FWD:
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_VAR:
+ case BTF_KIND_DECL_TAG:
+ case BTF_KIND_TYPE_TAG:
+ it->desc = (struct btf_field_desc) { 1, {offsetof(struct btf_type, type)} };
+ break;
+ case BTF_KIND_ARRAY:
+ it->desc = (struct btf_field_desc) {
+ 2, {sizeof(struct btf_type) + offsetof(struct btf_array, type),
+ sizeof(struct btf_type) + offsetof(struct btf_array, index_type)}
+ };
+ break;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ it->desc = (struct btf_field_desc) {
+ 0, {},
+ sizeof(struct btf_member),
+ 1, {offsetof(struct btf_member, type)}
+ };
+ break;
+ case BTF_KIND_FUNC_PROTO:
+ it->desc = (struct btf_field_desc) {
+ 1, {offsetof(struct btf_type, type)},
+ sizeof(struct btf_param),
+ 1, {offsetof(struct btf_param, type)}
+ };
+ break;
+ case BTF_KIND_DATASEC:
+ it->desc = (struct btf_field_desc) {
+ 0, {},
+ sizeof(struct btf_var_secinfo),
+ 1, {offsetof(struct btf_var_secinfo, type)}
+ };
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case BTF_FIELD_ITER_STRS:
+ switch (btf_kind(t)) {
+ case BTF_KIND_UNKN:
+ it->desc = (struct btf_field_desc) {};
+ break;
+ case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_FWD:
+ case BTF_KIND_ARRAY:
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_VAR:
+ case BTF_KIND_DECL_TAG:
+ case BTF_KIND_TYPE_TAG:
+ case BTF_KIND_DATASEC:
+ it->desc = (struct btf_field_desc) {
+ 1, {offsetof(struct btf_type, name_off)}
+ };
+ break;
+ case BTF_KIND_ENUM:
+ it->desc = (struct btf_field_desc) {
+ 1, {offsetof(struct btf_type, name_off)},
+ sizeof(struct btf_enum),
+ 1, {offsetof(struct btf_enum, name_off)}
+ };
+ break;
+ case BTF_KIND_ENUM64:
+ it->desc = (struct btf_field_desc) {
+ 1, {offsetof(struct btf_type, name_off)},
+ sizeof(struct btf_enum64),
+ 1, {offsetof(struct btf_enum64, name_off)}
+ };
+ break;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ it->desc = (struct btf_field_desc) {
+ 1, {offsetof(struct btf_type, name_off)},
+ sizeof(struct btf_member),
+ 1, {offsetof(struct btf_member, name_off)}
+ };
+ break;
+ case BTF_KIND_FUNC_PROTO:
+ it->desc = (struct btf_field_desc) {
+ 1, {offsetof(struct btf_type, name_off)},
+ sizeof(struct btf_param),
+ 1, {offsetof(struct btf_param, name_off)}
+ };
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (it->desc.m_sz)
+ it->vlen = btf_vlen(t);
+
+ it->p = t;
+ return 0;
+}
+
+__u32 *btf_field_iter_next(struct btf_field_iter *it)
+{
+ if (!it->p)
+ return NULL;
+
+ if (it->m_idx < 0) {
+ if (it->off_idx < it->desc.t_off_cnt)
+ return it->p + it->desc.t_offs[it->off_idx++];
+ /* move to per-member iteration */
+ it->m_idx = 0;
+ it->p += sizeof(struct btf_type);
+ it->off_idx = 0;
+ }
+
+ /* if type doesn't have members, stop */
+ if (it->desc.m_sz == 0) {
+ it->p = NULL;
+ return NULL;
+ }
+
+ if (it->off_idx >= it->desc.m_off_cnt) {
+ /* exhausted this member's fields, go to the next member */
+ it->m_idx++;
+ it->p += it->desc.m_sz;
+ it->off_idx = 0;
+ }
+
+ if (it->m_idx < it->vlen)
+ return it->p + it->desc.m_offs[it->off_idx++];
+
+ it->p = NULL;
+ return NULL;
+}
diff --git a/tools/lib/bpf/btf_relocate.c b/tools/lib/bpf/btf_relocate.c
new file mode 100644
index 000000000000..17f8b32f94a0
--- /dev/null
+++ b/tools/lib/bpf/btf_relocate.c
@@ -0,0 +1,519 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024, Oracle and/or its affiliates. */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#ifdef __KERNEL__
+#include <linux/bpf.h>
+#include <linux/bsearch.h>
+#include <linux/btf.h>
+#include <linux/sort.h>
+#include <linux/string.h>
+#include <linux/bpf_verifier.h>
+
+#define btf_type_by_id (struct btf_type *)btf_type_by_id
+#define btf__type_cnt btf_nr_types
+#define btf__base_btf btf_base_btf
+#define btf__name_by_offset btf_name_by_offset
+#define btf__str_by_offset btf_str_by_offset
+#define btf_kflag btf_type_kflag
+
+#define calloc(nmemb, sz) kvcalloc(nmemb, sz, GFP_KERNEL | __GFP_NOWARN)
+#define free(ptr) kvfree(ptr)
+#define qsort(base, num, sz, cmp) sort(base, num, sz, cmp, NULL)
+
+#else
+
+#include "btf.h"
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+#endif /* __KERNEL__ */
+
+struct btf;
+
+struct btf_relocate {
+ struct btf *btf;
+ const struct btf *base_btf;
+ const struct btf *dist_base_btf;
+ unsigned int nr_base_types;
+ unsigned int nr_split_types;
+ unsigned int nr_dist_base_types;
+ int dist_str_len;
+ int base_str_len;
+ __u32 *id_map;
+ __u32 *str_map;
+};
+
+/* Set temporarily in relocation id_map if distilled base struct/union is
+ * embedded in a split BTF struct/union; in such a case, size information must
+ * match between distilled base BTF and base BTF representation of type.
+ */
+#define BTF_IS_EMBEDDED ((__u32)-1)
+
+/* <name, size, id> triple used in sorting/searching distilled base BTF. */
+struct btf_name_info {
+ const char *name;
+ /* set when search requires a size match */
+ bool needs_size: 1;
+ unsigned int size: 31;
+ __u32 id;
+};
+
+static int btf_relocate_rewrite_type_id(struct btf_relocate *r, __u32 i)
+{
+ struct btf_type *t = btf_type_by_id(r->btf, i);
+ struct btf_field_iter it;
+ __u32 *id;
+ int err;
+
+ err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS);
+ if (err)
+ return err;
+
+ while ((id = btf_field_iter_next(&it)))
+ *id = r->id_map[*id];
+ return 0;
+}
+
+/* Simple string comparison used for sorting within BTF, since all distilled
+ * types are named. If strings match, and size is non-zero for both elements
+ * fall back to using size for ordering.
+ */
+static int cmp_btf_name_size(const void *n1, const void *n2)
+{
+ const struct btf_name_info *ni1 = n1;
+ const struct btf_name_info *ni2 = n2;
+ int name_diff = strcmp(ni1->name, ni2->name);
+
+ if (!name_diff && ni1->needs_size && ni2->needs_size)
+ return ni2->size - ni1->size;
+ return name_diff;
+}
+
+/* Binary search with a small twist; find leftmost element that matches
+ * so that we can then iterate through all exact matches. So for example
+ * searching { "a", "bb", "bb", "c" } we would always match on the
+ * leftmost "bb".
+ */
+static struct btf_name_info *search_btf_name_size(struct btf_name_info *key,
+ struct btf_name_info *vals,
+ int nelems)
+{
+ struct btf_name_info *ret = NULL;
+ int high = nelems - 1;
+ int low = 0;
+
+ while (low <= high) {
+ int mid = (low + high)/2;
+ struct btf_name_info *val = &vals[mid];
+ int diff = cmp_btf_name_size(key, val);
+
+ if (diff == 0)
+ ret = val;
+ /* even if found, keep searching for leftmost match */
+ if (diff <= 0)
+ high = mid - 1;
+ else
+ low = mid + 1;
+ }
+ return ret;
+}
+
+/* If a member of a split BTF struct/union refers to a base BTF
+ * struct/union, mark that struct/union id temporarily in the id_map
+ * with BTF_IS_EMBEDDED. Members can be const/restrict/volatile/typedef
+ * reference types, but if a pointer is encountered, the type is no longer
+ * considered embedded.
+ */
+static int btf_mark_embedded_composite_type_ids(struct btf_relocate *r, __u32 i)
+{
+ struct btf_type *t = btf_type_by_id(r->btf, i);
+ struct btf_field_iter it;
+ __u32 *id;
+ int err;
+
+ if (!btf_is_composite(t))
+ return 0;
+
+ err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS);
+ if (err)
+ return err;
+
+ while ((id = btf_field_iter_next(&it))) {
+ __u32 next_id = *id;
+
+ while (next_id) {
+ t = btf_type_by_id(r->btf, next_id);
+ switch (btf_kind(t)) {
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_TYPE_TAG:
+ next_id = t->type;
+ break;
+ case BTF_KIND_ARRAY: {
+ struct btf_array *a = btf_array(t);
+
+ next_id = a->type;
+ break;
+ }
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ if (next_id < r->nr_dist_base_types)
+ r->id_map[next_id] = BTF_IS_EMBEDDED;
+ next_id = 0;
+ break;
+ default:
+ next_id = 0;
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/* Build a map from distilled base BTF ids to base BTF ids. To do so, iterate
+ * through base BTF looking up distilled type (using binary search) equivalents.
+ */
+static int btf_relocate_map_distilled_base(struct btf_relocate *r)
+{
+ struct btf_name_info *info, *info_end;
+ struct btf_type *base_t, *dist_t;
+ __u8 *base_name_cnt = NULL;
+ int err = 0;
+ __u32 id;
+
+ /* generate a sort index array of name/type ids sorted by name for
+ * distilled base BTF to speed name-based lookups.
+ */
+ info = calloc(r->nr_dist_base_types, sizeof(*info));
+ if (!info) {
+ err = -ENOMEM;
+ goto done;
+ }
+ info_end = info + r->nr_dist_base_types;
+ for (id = 0; id < r->nr_dist_base_types; id++) {
+ dist_t = btf_type_by_id(r->dist_base_btf, id);
+ info[id].name = btf__name_by_offset(r->dist_base_btf, dist_t->name_off);
+ info[id].id = id;
+ info[id].size = dist_t->size;
+ info[id].needs_size = true;
+ }
+ qsort(info, r->nr_dist_base_types, sizeof(*info), cmp_btf_name_size);
+
+ /* Mark distilled base struct/union members of split BTF structs/unions
+ * in id_map with BTF_IS_EMBEDDED; this signals that these types
+ * need to match both name and size, otherwise embedding the base
+ * struct/union in the split type is invalid.
+ */
+ for (id = r->nr_dist_base_types; id < r->nr_split_types; id++) {
+ err = btf_mark_embedded_composite_type_ids(r, id);
+ if (err)
+ goto done;
+ }
+
+ /* Collect name counts for composite types in base BTF. If multiple
+ * instances of a struct/union of the same name exist, we need to use
+ * size to determine which to map to since name alone is ambiguous.
+ */
+ base_name_cnt = calloc(r->base_str_len, sizeof(*base_name_cnt));
+ if (!base_name_cnt) {
+ err = -ENOMEM;
+ goto done;
+ }
+ for (id = 1; id < r->nr_base_types; id++) {
+ base_t = btf_type_by_id(r->base_btf, id);
+ if (!btf_is_composite(base_t) || !base_t->name_off)
+ continue;
+ if (base_name_cnt[base_t->name_off] < 255)
+ base_name_cnt[base_t->name_off]++;
+ }
+
+ /* Now search base BTF for matching distilled base BTF types. */
+ for (id = 1; id < r->nr_base_types; id++) {
+ struct btf_name_info *dist_info, base_info = {};
+ int dist_kind, base_kind;
+
+ base_t = btf_type_by_id(r->base_btf, id);
+ /* distilled base consists of named types only. */
+ if (!base_t->name_off)
+ continue;
+ base_kind = btf_kind(base_t);
+ base_info.id = id;
+ base_info.name = btf__name_by_offset(r->base_btf, base_t->name_off);
+ switch (base_kind) {
+ case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
+ /* These types should match both name and size */
+ base_info.needs_size = true;
+ base_info.size = base_t->size;
+ break;
+ case BTF_KIND_FWD:
+ /* No size considerations for fwds. */
+ break;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ /* Size only needs to be used for struct/union if there
+ * are multiple types in base BTF with the same name.
+ * If there are multiple _distilled_ types with the same
+ * name (a very unlikely scenario), that doesn't matter
+ * unless corresponding _base_ types to match them are
+ * missing.
+ */
+ base_info.needs_size = base_name_cnt[base_t->name_off] > 1;
+ base_info.size = base_t->size;
+ break;
+ default:
+ continue;
+ }
+ /* iterate over all matching distilled base types */
+ for (dist_info = search_btf_name_size(&base_info, info, r->nr_dist_base_types);
+ dist_info != NULL && dist_info < info_end &&
+ cmp_btf_name_size(&base_info, dist_info) == 0;
+ dist_info++) {
+ if (!dist_info->id || dist_info->id >= r->nr_dist_base_types) {
+ pr_warn("base BTF id [%d] maps to invalid distilled base BTF id [%d]\n",
+ id, dist_info->id);
+ err = -EINVAL;
+ goto done;
+ }
+ dist_t = btf_type_by_id(r->dist_base_btf, dist_info->id);
+ dist_kind = btf_kind(dist_t);
+
+ /* Validate that the found distilled type is compatible.
+ * Do not error out on mismatch as another match may
+ * occur for an identically-named type.
+ */
+ switch (dist_kind) {
+ case BTF_KIND_FWD:
+ switch (base_kind) {
+ case BTF_KIND_FWD:
+ if (btf_kflag(dist_t) != btf_kflag(base_t))
+ continue;
+ break;
+ case BTF_KIND_STRUCT:
+ if (btf_kflag(base_t))
+ continue;
+ break;
+ case BTF_KIND_UNION:
+ if (!btf_kflag(base_t))
+ continue;
+ break;
+ default:
+ continue;
+ }
+ break;
+ case BTF_KIND_INT:
+ if (dist_kind != base_kind ||
+ btf_int_encoding(base_t) != btf_int_encoding(dist_t))
+ continue;
+ break;
+ case BTF_KIND_FLOAT:
+ if (dist_kind != base_kind)
+ continue;
+ break;
+ case BTF_KIND_ENUM:
+ /* ENUM and ENUM64 are encoded as sized ENUM in
+ * distilled base BTF.
+ */
+ if (base_kind != dist_kind && base_kind != BTF_KIND_ENUM64)
+ continue;
+ break;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ /* size verification is required for embedded
+ * struct/unions.
+ */
+ if (r->id_map[dist_info->id] == BTF_IS_EMBEDDED &&
+ base_t->size != dist_t->size)
+ continue;
+ break;
+ default:
+ continue;
+ }
+ if (r->id_map[dist_info->id] &&
+ r->id_map[dist_info->id] != BTF_IS_EMBEDDED) {
+ /* we already have a match; this tells us that
+ * multiple base types of the same name
+ * have the same size, since for cases where
+ * multiple types have the same name we match
+ * on name and size. In this case, we have
+ * no way of determining which to relocate
+ * to in base BTF, so error out.
+ */
+ pr_warn("distilled base BTF type '%s' [%u], size %u has multiple candidates of the same size (ids [%u, %u]) in base BTF\n",
+ base_info.name, dist_info->id,
+ base_t->size, id, r->id_map[dist_info->id]);
+ err = -EINVAL;
+ goto done;
+ }
+ /* map id and name */
+ r->id_map[dist_info->id] = id;
+ r->str_map[dist_t->name_off] = base_t->name_off;
+ }
+ }
+ /* ensure all distilled BTF ids now have a mapping... */
+ for (id = 1; id < r->nr_dist_base_types; id++) {
+ const char *name;
+
+ if (r->id_map[id] && r->id_map[id] != BTF_IS_EMBEDDED)
+ continue;
+ dist_t = btf_type_by_id(r->dist_base_btf, id);
+ name = btf__name_by_offset(r->dist_base_btf, dist_t->name_off);
+ pr_warn("distilled base BTF type '%s' [%d] is not mapped to base BTF id\n",
+ name, id);
+ err = -EINVAL;
+ break;
+ }
+done:
+ free(base_name_cnt);
+ free(info);
+ return err;
+}
+
+/* distilled base should only have named int/float/enum/fwd/struct/union types. */
+static int btf_relocate_validate_distilled_base(struct btf_relocate *r)
+{
+ unsigned int i;
+
+ for (i = 1; i < r->nr_dist_base_types; i++) {
+ struct btf_type *t = btf_type_by_id(r->dist_base_btf, i);
+ int kind = btf_kind(t);
+
+ switch (kind) {
+ case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_FWD:
+ if (t->name_off)
+ break;
+ pr_warn("type [%d], kind [%d] is invalid for distilled base BTF; it is anonymous\n",
+ i, kind);
+ return -EINVAL;
+ default:
+ pr_warn("type [%d] in distilled based BTF has unexpected kind [%d]\n",
+ i, kind);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static int btf_relocate_rewrite_strs(struct btf_relocate *r, __u32 i)
+{
+ struct btf_type *t = btf_type_by_id(r->btf, i);
+ struct btf_field_iter it;
+ __u32 *str_off;
+ int off, err;
+
+ err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS);
+ if (err)
+ return err;
+
+ while ((str_off = btf_field_iter_next(&it))) {
+ if (!*str_off)
+ continue;
+ if (*str_off >= r->dist_str_len) {
+ *str_off += r->base_str_len - r->dist_str_len;
+ } else {
+ off = r->str_map[*str_off];
+ if (!off) {
+ pr_warn("string '%s' [offset %u] is not mapped to base BTF",
+ btf__str_by_offset(r->btf, off), *str_off);
+ return -ENOENT;
+ }
+ *str_off = off;
+ }
+ }
+ return 0;
+}
+
+/* If successful, output of relocation is updated BTF with base BTF pointing
+ * at base_btf, and type ids, strings adjusted accordingly.
+ */
+int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **id_map)
+{
+ unsigned int nr_types = btf__type_cnt(btf);
+ const struct btf_header *dist_base_hdr;
+ const struct btf_header *base_hdr;
+ struct btf_relocate r = {};
+ int err = 0;
+ __u32 id, i;
+
+ r.dist_base_btf = btf__base_btf(btf);
+ if (!base_btf || r.dist_base_btf == base_btf)
+ return -EINVAL;
+
+ r.nr_dist_base_types = btf__type_cnt(r.dist_base_btf);
+ r.nr_base_types = btf__type_cnt(base_btf);
+ r.nr_split_types = nr_types - r.nr_dist_base_types;
+ r.btf = btf;
+ r.base_btf = base_btf;
+
+ r.id_map = calloc(nr_types, sizeof(*r.id_map));
+ r.str_map = calloc(btf_header(r.dist_base_btf)->str_len, sizeof(*r.str_map));
+ dist_base_hdr = btf_header(r.dist_base_btf);
+ base_hdr = btf_header(r.base_btf);
+ r.dist_str_len = dist_base_hdr->str_len;
+ r.base_str_len = base_hdr->str_len;
+ if (!r.id_map || !r.str_map) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ err = btf_relocate_validate_distilled_base(&r);
+ if (err)
+ goto err_out;
+
+ /* Split BTF ids need to be adjusted as base and distilled base
+ * have different numbers of types, changing the start id of split
+ * BTF.
+ */
+ for (id = r.nr_dist_base_types; id < nr_types; id++)
+ r.id_map[id] = id + r.nr_base_types - r.nr_dist_base_types;
+
+ /* Build a map from distilled base ids to actual base BTF ids; it is used
+ * to update split BTF id references. Also build a str_map mapping from
+ * distilled base BTF names to base BTF names.
+ */
+ err = btf_relocate_map_distilled_base(&r);
+ if (err)
+ goto err_out;
+
+ /* Next, rewrite type ids in split BTF, replacing split ids with updated
+ * ids based on number of types in base BTF, and base ids with
+ * relocated ids from base_btf.
+ */
+ for (i = 0, id = r.nr_dist_base_types; i < r.nr_split_types; i++, id++) {
+ err = btf_relocate_rewrite_type_id(&r, id);
+ if (err)
+ goto err_out;
+ }
+ /* String offsets now need to be updated using the str_map. */
+ for (i = 0; i < r.nr_split_types; i++) {
+ err = btf_relocate_rewrite_strs(&r, i + r.nr_dist_base_types);
+ if (err)
+ goto err_out;
+ }
+ /* Finally reset base BTF to be base_btf */
+ btf_set_base_btf(btf, base_btf);
+
+ if (id_map) {
+ *id_map = r.id_map;
+ r.id_map = NULL;
+ }
+err_out:
+ free(r.id_map);
+ free(r.str_map);
+ return err;
+}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 4a28fac4908a..30f121754d83 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -10375,7 +10375,7 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
struct bpf_map *
bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
{
- if (prev == NULL)
+ if (prev == NULL && obj != NULL)
return obj->maps;
return __bpf_map__iter(prev, obj, 1);
@@ -10384,7 +10384,7 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
struct bpf_map *
bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
{
- if (next == NULL) {
+ if (next == NULL && obj != NULL) {
if (!obj->nr_maps)
return NULL;
return obj->maps + obj->nr_maps - 1;
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 40595233dc7f..8f0d9ea3b1b4 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -419,6 +419,8 @@ LIBBPF_1.4.0 {
LIBBPF_1.5.0 {
global:
+ btf__distill_base;
+ btf__relocate;
bpf_map__autoattach;
bpf_map__set_autoattach;
bpf_program__attach_sockmap;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index e2f06609c624..408df59e0771 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -234,6 +234,9 @@ struct btf_type;
struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id);
const char *btf_kind_str(const struct btf_type *t);
const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
+const struct btf_header *btf_header(const struct btf *btf);
+void btf_set_base_btf(struct btf *btf, const struct btf *base_btf);
+int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **id_map);
static inline enum btf_func_linkage btf_func_linkage(const struct btf_type *t)
{
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index fa11a671da3e..9cd3d4109788 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -2227,10 +2227,17 @@ static int linker_fixup_btf(struct src_obj *obj)
vi = btf_var_secinfos(t);
for (j = 0, m = btf_vlen(t); j < m; j++, vi++) {
const struct btf_type *vt = btf__type_by_id(obj->btf, vi->type);
- const char *var_name = btf__str_by_offset(obj->btf, vt->name_off);
- int var_linkage = btf_var(vt)->linkage;
+ const char *var_name;
+ int var_linkage;
Elf64_Sym *sym;
+ /* could be a variable or function */
+ if (!btf_is_var(vt))
+ continue;
+
+ var_name = btf__str_by_offset(obj->btf, vt->name_off);
+ var_linkage = btf_var(vt)->linkage;
+
/* no need to patch up static or extern vars */
if (var_linkage != BTF_VAR_GLOBAL_ALLOCATED)
continue;
diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile
index 8e9e09d84e26..d1cdf2a8f826 100644
--- a/tools/net/ynl/Makefile
+++ b/tools/net/ynl/Makefile
@@ -2,9 +2,12 @@
SUBDIRS = lib generated samples
-all: $(SUBDIRS)
+all: $(SUBDIRS) libynl.a
samples: | lib generated
+libynl.a: | lib generated
+ @echo -e "\tAR $@"
+ @ar rcs $@ lib/ynl.o generated/*-user.o
$(SUBDIRS):
@if [ -f "$@/Makefile" ] ; then \
@@ -17,5 +20,6 @@ clean distclean:
$(MAKE) -C $$dir $@; \
fi \
done
+ rm -f libynl.a
.PHONY: all clean distclean $(SUBDIRS)
diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps
index f4e8eb79c1b8..0712b5e82eb7 100644
--- a/tools/net/ynl/Makefile.deps
+++ b/tools/net/ynl/Makefile.deps
@@ -16,7 +16,8 @@ get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2)
CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h)
CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H,dpll.h)
-CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h)
+CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \
+ $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h)
CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h)
CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h)
CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h)
@@ -25,3 +26,4 @@ CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h)
CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
CFLAGS_ovs_flow:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
CFLAGS_ovs_vport:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
+CFLAGS_tcp_metrics:=$(call get_hdr_inc,_LINUX_TCP_METRICS_H,tcp_metrics.h)
diff --git a/tools/net/ynl/lib/Makefile b/tools/net/ynl/lib/Makefile
index dfff3ecd1cba..2887cc5de530 100644
--- a/tools/net/ynl/lib/Makefile
+++ b/tools/net/ynl/lib/Makefile
@@ -14,7 +14,9 @@ include $(wildcard *.d)
all: ynl.a
ynl.a: $(OBJS)
- ar rcs $@ $(OBJS)
+ @echo -e "\tAR $@"
+ @ar rcs $@ $(OBJS)
+
clean:
rm -f *.o *.d *~
rm -rf __pycache__
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 374ca5e86e24..51529fabd517 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -59,9 +59,9 @@ class Type(SpecAttr):
if 'nested-attributes' in attr:
self.nested_attrs = attr['nested-attributes']
if self.nested_attrs == family.name:
- self.nested_render_name = c_lower(f"{family.name}")
+ self.nested_render_name = c_lower(f"{family.ident_name}")
else:
- self.nested_render_name = c_lower(f"{family.name}_{self.nested_attrs}")
+ self.nested_render_name = c_lower(f"{family.ident_name}_{self.nested_attrs}")
if self.nested_attrs in self.family.consts:
self.nested_struct_type = 'struct ' + self.nested_render_name + '_'
@@ -693,9 +693,9 @@ class Struct:
self.nested = type_list is None
if family.name == c_lower(space_name):
- self.render_name = c_lower(family.name)
+ self.render_name = c_lower(family.ident_name)
else:
- self.render_name = c_lower(family.name + '-' + space_name)
+ self.render_name = c_lower(family.ident_name + '-' + space_name)
self.struct_name = 'struct ' + self.render_name
if self.nested and space_name in family.consts:
self.struct_name += '_'
@@ -761,7 +761,7 @@ class EnumEntry(SpecEnumEntry):
class EnumSet(SpecEnumSet):
def __init__(self, family, yaml):
- self.render_name = c_lower(family.name + '-' + yaml['name'])
+ self.render_name = c_lower(family.ident_name + '-' + yaml['name'])
if 'enum-name' in yaml:
if yaml['enum-name']:
@@ -777,7 +777,7 @@ class EnumSet(SpecEnumSet):
else:
self.user_type = 'int'
- self.value_pfx = yaml.get('name-prefix', f"{family.name}-{yaml['name']}-")
+ self.value_pfx = yaml.get('name-prefix', f"{family.ident_name}-{yaml['name']}-")
super().__init__(family, yaml)
@@ -802,9 +802,9 @@ class AttrSet(SpecAttrSet):
if 'name-prefix' in yaml:
pfx = yaml['name-prefix']
elif self.name == family.name:
- pfx = family.name + '-a-'
+ pfx = family.ident_name + '-a-'
else:
- pfx = f"{family.name}-a-{self.name}-"
+ pfx = f"{family.ident_name}-a-{self.name}-"
self.name_prefix = c_upper(pfx)
self.max_name = c_upper(self.yaml.get('attr-max-name', f"{self.name_prefix}max"))
self.cnt_name = c_upper(self.yaml.get('attr-cnt-name', f"__{self.name_prefix}max"))
@@ -861,7 +861,7 @@ class Operation(SpecOperation):
def __init__(self, family, yaml, req_value, rsp_value):
super().__init__(family, yaml, req_value, rsp_value)
- self.render_name = c_lower(family.name + '_' + self.name)
+ self.render_name = c_lower(family.ident_name + '_' + self.name)
self.dual_policy = ('do' in yaml and 'request' in yaml['do']) and \
('dump' in yaml and 'request' in yaml['dump'])
@@ -911,11 +911,11 @@ class Family(SpecFamily):
if 'uapi-header' in self.yaml:
self.uapi_header = self.yaml['uapi-header']
else:
- self.uapi_header = f"linux/{self.name}.h"
+ self.uapi_header = f"linux/{self.ident_name}.h"
if self.uapi_header.startswith("linux/") and self.uapi_header.endswith('.h'):
self.uapi_header_name = self.uapi_header[6:-2]
else:
- self.uapi_header_name = self.name
+ self.uapi_header_name = self.ident_name
def resolve(self):
self.resolve_up(super())
@@ -923,7 +923,7 @@ class Family(SpecFamily):
if self.yaml.get('protocol', 'genetlink') not in {'genetlink', 'genetlink-c', 'genetlink-legacy'}:
raise Exception("Codegen only supported for genetlink")
- self.c_name = c_lower(self.name)
+ self.c_name = c_lower(self.ident_name)
if 'name-prefix' in self.yaml['operations']:
self.op_prefix = c_upper(self.yaml['operations']['name-prefix'])
else:
@@ -2173,7 +2173,7 @@ def print_kernel_op_table_fwd(family, cw, terminate):
exported = not kernel_can_gen_family_struct(family)
if not terminate or exported:
- cw.p(f"/* Ops table for {family.name} */")
+ cw.p(f"/* Ops table for {family.ident_name} */")
pol_to_struct = {'global': 'genl_small_ops',
'per-op': 'genl_ops',
@@ -2225,12 +2225,12 @@ def print_kernel_op_table_fwd(family, cw, terminate):
continue
if 'do' in op:
- name = c_lower(f"{family.name}-nl-{op_name}-doit")
+ name = c_lower(f"{family.ident_name}-nl-{op_name}-doit")
cw.write_func_prot('int', name,
['struct sk_buff *skb', 'struct genl_info *info'], suffix=';')
if 'dump' in op:
- name = c_lower(f"{family.name}-nl-{op_name}-dumpit")
+ name = c_lower(f"{family.ident_name}-nl-{op_name}-dumpit")
cw.write_func_prot('int', name,
['struct sk_buff *skb', 'struct netlink_callback *cb'], suffix=';')
cw.nl()
@@ -2256,13 +2256,13 @@ def print_kernel_op_table(family, cw):
for x in op['dont-validate']])), )
for op_mode in ['do', 'dump']:
if op_mode in op:
- name = c_lower(f"{family.name}-nl-{op_name}-{op_mode}it")
+ name = c_lower(f"{family.ident_name}-nl-{op_name}-{op_mode}it")
members.append((op_mode + 'it', name))
if family.kernel_policy == 'per-op':
struct = Struct(family, op['attribute-set'],
type_list=op['do']['request']['attributes'])
- name = c_lower(f"{family.name}-{op_name}-nl-policy")
+ name = c_lower(f"{family.ident_name}-{op_name}-nl-policy")
members.append(('policy', name))
members.append(('maxattr', struct.attr_max_val.enum_name))
if 'flags' in op:
@@ -2294,7 +2294,7 @@ def print_kernel_op_table(family, cw):
members.append(('validate',
' | '.join([c_upper('genl-dont-validate-' + x)
for x in dont_validate])), )
- name = c_lower(f"{family.name}-nl-{op_name}-{op_mode}it")
+ name = c_lower(f"{family.ident_name}-nl-{op_name}-{op_mode}it")
if 'pre' in op[op_mode]:
members.append((cb_names[op_mode]['pre'], c_lower(op[op_mode]['pre'])))
members.append((op_mode + 'it', name))
@@ -2305,9 +2305,9 @@ def print_kernel_op_table(family, cw):
type_list=op[op_mode]['request']['attributes'])
if op.dual_policy:
- name = c_lower(f"{family.name}-{op_name}-{op_mode}-nl-policy")
+ name = c_lower(f"{family.ident_name}-{op_name}-{op_mode}-nl-policy")
else:
- name = c_lower(f"{family.name}-{op_name}-nl-policy")
+ name = c_lower(f"{family.ident_name}-{op_name}-nl-policy")
members.append(('policy', name))
members.append(('maxattr', struct.attr_max_val.enum_name))
flags = (op['flags'] if 'flags' in op else []) + ['cmd-cap-' + op_mode]
@@ -2326,7 +2326,7 @@ def print_kernel_mcgrp_hdr(family, cw):
cw.block_start('enum')
for grp in family.mcgrps['list']:
- grp_id = c_upper(f"{family.name}-nlgrp-{grp['name']},")
+ grp_id = c_upper(f"{family.ident_name}-nlgrp-{grp['name']},")
cw.p(grp_id)
cw.block_end(';')
cw.nl()
@@ -2339,7 +2339,7 @@ def print_kernel_mcgrp_src(family, cw):
cw.block_start('static const struct genl_multicast_group ' + family.c_name + '_nl_mcgrps[] =')
for grp in family.mcgrps['list']:
name = grp['name']
- grp_id = c_upper(f"{family.name}-nlgrp-{name}")
+ grp_id = c_upper(f"{family.ident_name}-nlgrp-{name}")
cw.p('[' + grp_id + '] = { "' + name + '", },')
cw.block_end(';')
cw.nl()
@@ -2361,7 +2361,7 @@ def print_kernel_family_struct_src(family, cw):
if not kernel_can_gen_family_struct(family):
return
- cw.block_start(f"struct genl_family {family.name}_nl_family __ro_after_init =")
+ cw.block_start(f"struct genl_family {family.ident_name}_nl_family __ro_after_init =")
cw.p('.name\t\t= ' + family.fam_key + ',')
cw.p('.version\t= ' + family.ver_key + ',')
cw.p('.netnsok\t= true,')
@@ -2429,7 +2429,7 @@ def render_uapi(family, cw):
cw.p(' */')
uapi_enum_start(family, cw, const, 'name')
- name_pfx = const.get('name-prefix', f"{family.name}-{const['name']}-")
+ name_pfx = const.get('name-prefix', f"{family.ident_name}-{const['name']}-")
for entry in enum.entries.values():
suffix = ','
if entry.value_change:
@@ -2451,7 +2451,7 @@ def render_uapi(family, cw):
cw.nl()
elif const['type'] == 'const':
defines.append([c_upper(family.get('c-define-name',
- f"{family.name}-{const['name']}")),
+ f"{family.ident_name}-{const['name']}")),
const['value']])
if defines:
@@ -2529,7 +2529,7 @@ def render_uapi(family, cw):
defines = []
for grp in family.mcgrps['list']:
name = grp['name']
- defines.append([c_upper(grp.get('c-define-name', f"{family.name}-mcgrp-{name}")),
+ defines.append([c_upper(grp.get('c-define-name', f"{family.ident_name}-mcgrp-{name}")),
f'{name}'])
cw.nl()
if defines:
diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
index 233f2b6edf52..49b79cf0c5cc 100644
--- a/tools/perf/util/comm.c
+++ b/tools/perf/util/comm.c
@@ -86,14 +86,6 @@ static struct comm_str *comm_str__new(const char *str)
return result;
}
-static int comm_str__cmp(const void *_lhs, const void *_rhs)
-{
- const struct comm_str *lhs = *(const struct comm_str * const *)_lhs;
- const struct comm_str *rhs = *(const struct comm_str * const *)_rhs;
-
- return strcmp(comm_str__str(lhs), comm_str__str(rhs));
-}
-
static int comm_str__search(const void *_key, const void *_member)
{
const char *key = _key;
@@ -169,9 +161,24 @@ static struct comm_str *comm_strs__findnew(const char *str)
}
result = comm_str__new(str);
if (result) {
- comm_strs->strs[comm_strs->num_strs++] = result;
- qsort(comm_strs->strs, comm_strs->num_strs, sizeof(struct comm_str *),
- comm_str__cmp);
+ int low = 0, high = comm_strs->num_strs - 1;
+ int insert = comm_strs->num_strs; /* Default to inserting at the end. */
+
+ while (low <= high) {
+ int mid = low + (high - low) / 2;
+ int cmp = strcmp(comm_str__str(comm_strs->strs[mid]), str);
+
+ if (cmp < 0) {
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ insert = mid;
+ }
+ }
+ memmove(&comm_strs->strs[insert + 1], &comm_strs->strs[insert],
+ (comm_strs->num_strs - insert) * sizeof(struct comm_str *));
+ comm_strs->num_strs++;
+ comm_strs->strs[insert] = result;
}
}
up_write(&comm_strs->lock);
diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c
index ab3d0c01dd63..a69a9c661200 100644
--- a/tools/perf/util/dsos.c
+++ b/tools/perf/util/dsos.c
@@ -203,11 +203,27 @@ int __dsos__add(struct dsos *dsos, struct dso *dso)
dsos->dsos = temp;
dsos->allocated = to_allocate;
}
- dsos->dsos[dsos->cnt++] = dso__get(dso);
- if (dsos->cnt >= 2 && dsos->sorted) {
- dsos->sorted = dsos__cmp_long_name_id_short_name(&dsos->dsos[dsos->cnt - 2],
- &dsos->dsos[dsos->cnt - 1])
- <= 0;
+ if (!dsos->sorted) {
+ dsos->dsos[dsos->cnt++] = dso__get(dso);
+ } else {
+ int low = 0, high = dsos->cnt - 1;
+ int insert = dsos->cnt; /* Default to inserting at the end. */
+
+ while (low <= high) {
+ int mid = low + (high - low) / 2;
+ int cmp = dsos__cmp_long_name_id_short_name(&dsos->dsos[mid], &dso);
+
+ if (cmp < 0) {
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ insert = mid;
+ }
+ }
+ memmove(&dsos->dsos[insert + 1], &dsos->dsos[insert],
+ (dsos->cnt - insert) * sizeof(struct dso *));
+ dsos->cnt++;
+ dsos->dsos[insert] = dso__get(dso);
}
dso__set_dsos(dso, dsos);
return 0;
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index 2d6dce2c8f77..b1e6817f1e54 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -14,6 +14,7 @@ turbostat : turbostat.c
override CFLAGS += -O2 -Wall -Wextra -I../../../include
override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"'
+override CFLAGS += -DBUILD_BUG_HEADER='"../../../../include/linux/build_bug.h"'
override CFLAGS += -D_FILE_OFFSET_BITS=64
override CFLAGS += -D_FORTIFY_SOURCE=2
@@ -44,10 +45,13 @@ snapshot: turbostat
@echo "#define GENMASK(h, l) (((~0UL) << (l)) & (~0UL >> (sizeof(long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h
@echo "#define GENMASK_ULL(h, l) (((~0ULL) << (l)) & (~0ULL >> (sizeof(long long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h
+ @echo '#define BUILD_BUG_ON(cond) do { enum { compile_time_check ## __COUNTER__ = 1/(!(cond)) }; } while (0)' > $(SNAPSHOT)/build_bug.h
+
@echo PWD=. > $(SNAPSHOT)/Makefile
@echo "CFLAGS += -DMSRHEADER='\"msr-index.h\"'" >> $(SNAPSHOT)/Makefile
@echo "CFLAGS += -DINTEL_FAMILY_HEADER='\"intel-family.h\"'" >> $(SNAPSHOT)/Makefile
- @sed -e's/.*MSRHEADER.*//' -e's/.*INTEL_FAMILY_HEADER.*//' Makefile >> $(SNAPSHOT)/Makefile
+ @echo "CFLAGS += -DBUILD_BUG_HEADER='\"build_bug.h\"'" >> $(SNAPSHOT)/Makefile
+ @sed -e's/.*MSRHEADER.*//' -e's/.*INTEL_FAMILY_HEADER.*//' -e's/.*BUILD_BUG_HEADER.*//' Makefile >> $(SNAPSHOT)/Makefile
@rm -f $(SNAPSHOT).tar.gz
tar cvzf $(SNAPSHOT).tar.gz $(SNAPSHOT)
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 8cdf41906e98..9f5d053d4bc6 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -10,6 +10,7 @@
#define _GNU_SOURCE
#include MSRHEADER
#include INTEL_FAMILY_HEADER
+#include BUILD_BUG_HEADER
#include <stdarg.h>
#include <stdio.h>
#include <err.h>
@@ -38,7 +39,6 @@
#include <stdbool.h>
#include <assert.h>
#include <linux/kernel.h>
-#include <linux/build_bug.h>
#define UNUSED(x) (void)(x)
@@ -5695,9 +5695,6 @@ static void probe_intel_uncore_frequency_cluster(void)
if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK))
return;
- if (quiet)
- return;
-
for (uncore_max_id = 0;; ++uncore_max_id) {
sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", uncore_max_id);
@@ -5727,6 +5724,14 @@ static void probe_intel_uncore_frequency_cluster(void)
sprintf(path, "%s/fabric_cluster_id", path_base);
cluster_id = read_sysfs_int(path);
+ sprintf(path, "%s/current_freq_khz", path_base);
+ sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id);
+
+ add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id);
+
+ if (quiet)
+ continue;
+
sprintf(path, "%s/min_freq_khz", path_base);
k = read_sysfs_int(path);
sprintf(path, "%s/max_freq_khz", path_base);
@@ -5743,11 +5748,6 @@ static void probe_intel_uncore_frequency_cluster(void)
sprintf(path, "%s/current_freq_khz", path_base);
k = read_sysfs_int(path);
fprintf(outf, " %d MHz\n", k / 1000);
-
- sprintf(path, "%s/current_freq_khz", path_base);
- sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id);
-
- add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id);
}
}
@@ -8424,7 +8424,7 @@ void cmdline(int argc, char **argv)
* Parse some options early, because they may make other options invalid,
* like adding the MSR counter with --add and at the same time using --no-msr.
*/
- while ((opt = getopt_long_only(argc, argv, "MP", long_options, &option_index)) != -1) {
+ while ((opt = getopt_long_only(argc, argv, "MPn:", long_options, &option_index)) != -1) {
switch (opt) {
case 'M':
no_msr = 1;
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 3482248aa344..90d5afd52dd0 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -630,11 +630,15 @@ static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port,
struct cxl_endpoint_dvsec_info *info)
{
struct cxl_hdm *cxlhdm = devm_kzalloc(&port->dev, sizeof(*cxlhdm), GFP_KERNEL);
+ struct device *dev = &port->dev;
if (!cxlhdm)
return ERR_PTR(-ENOMEM);
cxlhdm->port = port;
+ cxlhdm->interleave_mask = ~0U;
+ cxlhdm->iw_cap_mask = ~0UL;
+ dev_set_drvdata(dev, cxlhdm);
return cxlhdm;
}
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index 0445ac38bc07..901349da680f 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -1,11 +1,11 @@
bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
-fexit_sleep # The test never returns. The remaining tests cannot start.
kprobe_multi_bench_attach # needs CONFIG_FPROBE
kprobe_multi_test # needs CONFIG_FPROBE
module_attach # prog 'kprobe_multi': failed to auto-attach: -95
fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524
fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524
+tracing_struct/struct_many_args # struct_many_args:FAIL:tracing_struct_many_args__attach unexpected error: -524
fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95
fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95
fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index c34adf39eeb2..3ebd77206f98 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -1,9 +1,5 @@
# TEMPORARY
# Alphabetical order
-exceptions # JIT does not support calling kfunc bpf_throw (exceptions)
get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?)
verifier_iterating_callbacks
-verifier_arena # JIT does not support arena
-arena_htab # JIT does not support arena
-arena_atomics
diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h
index 567491f3e1b5..68a51dcc0669 100644
--- a/tools/testing/selftests/bpf/bpf_arena_common.h
+++ b/tools/testing/selftests/bpf/bpf_arena_common.h
@@ -34,10 +34,12 @@
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM)
#define __arena __attribute__((address_space(1)))
+#define __arena_global __attribute__((address_space(1)))
#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */
#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */
#else
#define __arena
+#define __arena_global SEC(".addr_space.1")
#define cast_kern(ptr) bpf_addr_space_cast(ptr, 0, 1)
#define cast_user(ptr) bpf_addr_space_cast(ptr, 1, 0)
#endif
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 3d9e4b8c6b81..828556cdc2f0 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -163,7 +163,7 @@ struct bpf_iter_task_vma;
extern int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
struct task_struct *task,
- unsigned long addr) __ksym;
+ __u64 addr) __ksym;
extern struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it) __ksym;
extern void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) __ksym;
@@ -351,6 +351,7 @@ l_true: \
l_continue:; \
})
#else
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define can_loop \
({ __label__ l_break, l_continue; \
bool ret = true; \
@@ -376,6 +377,33 @@ l_true: \
l_break: break; \
l_continue:; \
})
+#else
+#define can_loop \
+ ({ __label__ l_break, l_continue; \
+ bool ret = true; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: ret = false; \
+ l_continue:; \
+ ret; \
+ })
+
+#define cond_break \
+ ({ __label__ l_break, l_continue; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: break; \
+ l_continue:; \
+ })
+#endif
#endif
#ifndef bpf_nop_mov
@@ -524,7 +552,7 @@ extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym;
extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym;
extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym;
extern int bpf_wq_set_callback_impl(struct bpf_wq *wq,
- int (callback_fn)(void *map, int *key, struct bpf_wq *wq),
+ int (callback_fn)(void *map, int *key, void *value),
unsigned int flags__k, void *aux__ign) __ksym;
#define bpf_wq_set_callback(timer, cb, flags) \
bpf_wq_set_callback_impl(timer, cb, flags, NULL)
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index be91a6919315..3b6675ab4086 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -77,5 +77,5 @@ extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
struct bpf_key *trusted_keyring) __ksym;
extern bool bpf_session_is_return(void) __ksym __weak;
-extern long *bpf_session_cookie(void) __ksym __weak;
+extern __u64 *bpf_session_cookie(void) __ksym __weak;
#endif
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index 0a09732cde4b..f8962a1dd397 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -53,6 +53,13 @@ struct bpf_testmod_struct_arg_4 {
int b;
};
+struct bpf_testmod_struct_arg_5 {
+ char a;
+ short b;
+ int c;
+ long d;
+};
+
__bpf_hook_start();
noinline int
@@ -111,6 +118,15 @@ bpf_testmod_test_struct_arg_8(u64 a, void *b, short c, int d, void *e,
}
noinline int
+bpf_testmod_test_struct_arg_9(u64 a, void *b, short c, int d, void *e, char f,
+ short g, struct bpf_testmod_struct_arg_5 h, long i)
+{
+ bpf_testmod_test_struct_arg_result = a + (long)b + c + d + (long)e +
+ f + g + h.a + h.b + h.c + h.d + i;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) {
bpf_testmod_test_struct_arg_result = a->a;
return bpf_testmod_test_struct_arg_result;
@@ -154,6 +170,42 @@ __bpf_kfunc void bpf_kfunc_common_test(void)
{
}
+__bpf_kfunc void bpf_kfunc_dynptr_test(struct bpf_dynptr *ptr,
+ struct bpf_dynptr *ptr__nullable)
+{
+}
+
+__bpf_kfunc struct bpf_testmod_ctx *
+bpf_testmod_ctx_create(int *err)
+{
+ struct bpf_testmod_ctx *ctx;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC);
+ if (!ctx) {
+ *err = -ENOMEM;
+ return NULL;
+ }
+ refcount_set(&ctx->usage, 1);
+
+ return ctx;
+}
+
+static void testmod_free_cb(struct rcu_head *head)
+{
+ struct bpf_testmod_ctx *ctx;
+
+ ctx = container_of(head, struct bpf_testmod_ctx, rcu);
+ kfree(ctx);
+}
+
+__bpf_kfunc void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx)
+{
+ if (!ctx)
+ return;
+ if (refcount_dec_and_test(&ctx->usage))
+ call_rcu(&ctx->rcu, testmod_free_cb);
+}
+
struct bpf_testmod_btf_type_tag_1 {
int a;
};
@@ -269,6 +321,7 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
struct bpf_testmod_struct_arg_2 struct_arg2 = {2, 3};
struct bpf_testmod_struct_arg_3 *struct_arg3;
struct bpf_testmod_struct_arg_4 struct_arg4 = {21, 22};
+ struct bpf_testmod_struct_arg_5 struct_arg5 = {23, 24, 25, 26};
int i = 1;
while (bpf_testmod_return_ptr(i))
@@ -283,6 +336,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
(void *)20, struct_arg4);
(void)bpf_testmod_test_struct_arg_8(16, (void *)17, 18, 19,
(void *)20, struct_arg4, 23);
+ (void)bpf_testmod_test_struct_arg_9(16, (void *)17, 18, 19, (void *)20,
+ 21, 22, struct_arg5, 27);
(void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2);
@@ -363,8 +418,15 @@ BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW)
BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_kfunc_common_test)
+BTF_ID_FLAGS(func, bpf_kfunc_dynptr_test)
+BTF_ID_FLAGS(func, bpf_testmod_ctx_create, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_testmod_ctx_release, KF_RELEASE)
BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids)
+BTF_ID_LIST(bpf_testmod_dtor_ids)
+BTF_ID(struct, bpf_testmod_ctx)
+BTF_ID(func, bpf_testmod_ctx_release)
+
static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = {
.owner = THIS_MODULE,
.set = &bpf_testmod_common_kfunc_ids,
@@ -898,6 +960,12 @@ extern int bpf_fentry_test1(int a);
static int bpf_testmod_init(void)
{
+ const struct btf_id_dtor_kfunc bpf_testmod_dtors[] = {
+ {
+ .btf_id = bpf_testmod_dtor_ids[0],
+ .kfunc_btf_id = bpf_testmod_dtor_ids[1]
+ },
+ };
int ret;
ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_testmod_common_kfunc_set);
@@ -906,6 +974,9 @@ static int bpf_testmod_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set);
ret = ret ?: register_bpf_struct_ops(&bpf_bpf_testmod_ops, bpf_testmod_ops);
ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2);
+ ret = ret ?: register_btf_id_dtor_kfuncs(bpf_testmod_dtors,
+ ARRAY_SIZE(bpf_testmod_dtors),
+ THIS_MODULE);
if (ret < 0)
return ret;
if (bpf_fentry_test1(0) < 0)
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
index b0d586a6751f..e587a79f2239 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
@@ -80,6 +80,11 @@ struct sendmsg_args {
int msglen;
};
+struct bpf_testmod_ctx {
+ struct callback_head rcu;
+ refcount_t usage;
+};
+
struct prog_test_ref_kfunc *
bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) __ksym;
void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
@@ -134,4 +139,9 @@ int bpf_kfunc_call_sock_sendmsg(struct sendmsg_args *args) __ksym;
int bpf_kfunc_call_kernel_getsockname(struct addr_args *args) __ksym;
int bpf_kfunc_call_kernel_getpeername(struct addr_args *args) __ksym;
+void bpf_kfunc_dynptr_test(struct bpf_dynptr *ptr, struct bpf_dynptr *ptr__nullable) __ksym;
+
+struct bpf_testmod_ctx *bpf_testmod_ctx_create(int *err) __ksym;
+void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx) __ksym;
+
#endif /* _BPF_TESTMOD_KFUNC_H */
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 2fb16da78dce..4ca84c8d9116 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -58,9 +58,12 @@ CONFIG_MPLS=y
CONFIG_MPLS_IPTUNNEL=y
CONFIG_MPLS_ROUTING=y
CONFIG_MPTCP=y
+CONFIG_NET_ACT_SKBMOD=y
+CONFIG_NET_CLS=y
CONFIG_NET_CLS_ACT=y
CONFIG_NET_CLS_BPF=y
CONFIG_NET_CLS_FLOWER=y
+CONFIG_NET_CLS_MATCHALL=y
CONFIG_NET_FOU=y
CONFIG_NET_FOU_IP_TUNNELS=y
CONFIG_NET_IPGRE=y
@@ -83,6 +86,19 @@ CONFIG_NF_CONNTRACK_MARK=y
CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_DEFRAG_IPV4=y
CONFIG_NF_DEFRAG_IPV6=y
+CONFIG_NF_TABLES=y
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NF_TABLES_IPV6=y
+CONFIG_NETFILTER_INGRESS=y
+CONFIG_NF_FLOW_TABLE=y
+CONFIG_NF_FLOW_TABLE_INET=y
+CONFIG_NETFILTER_NETLINK=y
+CONFIG_NFT_FLOW_OFFLOAD=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_FILTER=y
CONFIG_NF_NAT=y
CONFIG_RC_CORE=y
CONFIG_SECURITY=y
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index e20caef06aae..44c2c8fa542a 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -249,6 +249,34 @@ error_close:
return -1;
}
+int client_socket(int family, int type,
+ const struct network_helper_opts *opts)
+{
+ int fd;
+
+ if (!opts)
+ opts = &default_opts;
+
+ fd = socket(family, type, opts->proto);
+ if (fd < 0) {
+ log_err("Failed to create client socket");
+ return -1;
+ }
+
+ if (settimeo(fd, opts->timeout_ms))
+ goto error_close;
+
+ if (opts->post_socket_cb &&
+ opts->post_socket_cb(fd, opts->cb_opts))
+ goto error_close;
+
+ return fd;
+
+error_close:
+ save_errno_close(fd);
+ return -1;
+}
+
static int connect_fd_to_addr(int fd,
const struct sockaddr_storage *addr,
socklen_t addrlen, const bool must_fail)
@@ -284,15 +312,12 @@ int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t add
if (!opts)
opts = &default_opts;
- fd = socket(addr->ss_family, type, opts->proto);
+ fd = client_socket(addr->ss_family, type, opts);
if (fd < 0) {
log_err("Failed to create client socket");
return -1;
}
- if (settimeo(fd, opts->timeout_ms))
- goto error_close;
-
if (connect_fd_to_addr(fd, addr, addrlen, opts->must_fail))
goto error_close;
@@ -303,65 +328,21 @@ error_close:
return -1;
}
-int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
+int connect_to_fd_opts(int server_fd, int type, const struct network_helper_opts *opts)
{
struct sockaddr_storage addr;
- struct sockaddr_in *addr_in;
- socklen_t addrlen, optlen;
- int fd, type, protocol;
+ socklen_t addrlen;
if (!opts)
opts = &default_opts;
- optlen = sizeof(type);
-
- if (opts->type) {
- type = opts->type;
- } else {
- if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
- log_err("getsockopt(SOL_TYPE)");
- return -1;
- }
- }
-
- if (opts->proto) {
- protocol = opts->proto;
- } else {
- if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) {
- log_err("getsockopt(SOL_PROTOCOL)");
- return -1;
- }
- }
-
addrlen = sizeof(addr);
if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
log_err("Failed to get server addr");
return -1;
}
- addr_in = (struct sockaddr_in *)&addr;
- fd = socket(addr_in->sin_family, type, protocol);
- if (fd < 0) {
- log_err("Failed to create client socket");
- return -1;
- }
-
- if (settimeo(fd, opts->timeout_ms))
- goto error_close;
-
- if (opts->post_socket_cb &&
- opts->post_socket_cb(fd, opts->cb_opts))
- goto error_close;
-
- if (!opts->noconnect)
- if (connect_fd_to_addr(fd, &addr, addrlen, opts->must_fail))
- goto error_close;
-
- return fd;
-
-error_close:
- save_errno_close(fd);
- return -1;
+ return connect_to_addr(type, &addr, addrlen, opts);
}
int connect_to_fd(int server_fd, int timeout_ms)
@@ -369,8 +350,23 @@ int connect_to_fd(int server_fd, int timeout_ms)
struct network_helper_opts opts = {
.timeout_ms = timeout_ms,
};
+ int type, protocol;
+ socklen_t optlen;
+
+ optlen = sizeof(type);
+ if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
+ log_err("getsockopt(SOL_TYPE)");
+ return -1;
+ }
+
+ optlen = sizeof(protocol);
+ if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) {
+ log_err("getsockopt(SOL_PROTOCOL)");
+ return -1;
+ }
+ opts.proto = protocol;
- return connect_to_fd_opts(server_fd, &opts);
+ return connect_to_fd_opts(server_fd, type, &opts);
}
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 11eea8e2e4f1..9ea36524b9db 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -24,8 +24,6 @@ typedef __u16 __sum16;
struct network_helper_opts {
int timeout_ms;
bool must_fail;
- bool noconnect;
- int type;
int proto;
int (*post_socket_cb)(int fd, void *opts);
void *cb_opts;
@@ -58,10 +56,12 @@ int *start_reuseport_server(int family, int type, const char *addr_str,
int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t len,
const struct network_helper_opts *opts);
void free_fds(int *fds, unsigned int nr_close_fds);
+int client_socket(int family, int type,
+ const struct network_helper_opts *opts);
int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t len,
const struct network_helper_opts *opts);
int connect_to_fd(int server_fd, int timeout_ms);
-int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts);
+int connect_to_fd_opts(int server_fd, int type, const struct network_helper_opts *opts);
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
int timeout_ms);
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
index 0807a48a58ee..26e7c06c6cb4 100644
--- a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
@@ -146,6 +146,22 @@ static void test_xchg(struct arena_atomics *skel)
ASSERT_EQ(skel->arena->xchg32_result, 1, "xchg32_result");
}
+static void test_uaf(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.uaf);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->uaf_recovery_fails, 0, "uaf_recovery_fails");
+}
+
void test_arena_atomics(void)
{
struct arena_atomics *skel;
@@ -180,6 +196,8 @@ void test_arena_atomics(void)
test_cmpxchg(skel);
if (test__start_subtest("xchg"))
test_xchg(skel);
+ if (test__start_subtest("uaf"))
+ test_uaf(skel);
cleanup:
arena_atomics__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index 67358adf5db3..bceff5900016 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -49,7 +49,7 @@ static bool start_test(char *addr_str,
goto err;
/* connect to server */
- *cli_fd = connect_to_fd_opts(*srv_fd, cli_opts);
+ *cli_fd = connect_to_fd_opts(*srv_fd, SOCK_STREAM, cli_opts);
if (!ASSERT_NEQ(*cli_fd, -1, "connect_to_fd_opts"))
goto err;
@@ -185,6 +185,39 @@ done:
close(fd);
}
+static void test_dctcp_autoattach_map(void)
+{
+ struct cb_opts cb_opts = {
+ .cc = "bpf_dctcp",
+ };
+ struct network_helper_opts opts = {
+ .post_socket_cb = cc_cb,
+ .cb_opts = &cb_opts,
+ };
+ struct bpf_dctcp *dctcp_skel;
+ struct bpf_link *link;
+
+ dctcp_skel = bpf_dctcp__open_and_load();
+ if (!ASSERT_OK_PTR(dctcp_skel, "bpf_dctcp__open_and_load"))
+ return;
+
+ bpf_map__set_autoattach(dctcp_skel->maps.dctcp, true);
+ bpf_map__set_autoattach(dctcp_skel->maps.dctcp_nouse, false);
+
+ if (!ASSERT_OK(bpf_dctcp__attach(dctcp_skel), "bpf_dctcp__attach"))
+ goto destroy;
+
+ /* struct_ops is auto-attached */
+ link = dctcp_skel->links.dctcp;
+ if (!ASSERT_OK_PTR(link, "link"))
+ goto destroy;
+
+ do_test(&opts);
+
+destroy:
+ bpf_dctcp__destroy(dctcp_skel);
+}
+
static char *err_str;
static bool found;
@@ -598,4 +631,6 @@ void test_bpf_tcp_ca(void)
test_tcp_ca_kfunc();
if (test__start_subtest("cc_cubic"))
test_cc_cubic();
+ if (test__start_subtest("dctcp_autoattach_map"))
+ test_dctcp_autoattach_map();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_distill.c b/tools/testing/selftests/bpf/prog_tests/btf_distill.c
new file mode 100644
index 000000000000..bfbe795823a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_distill.c
@@ -0,0 +1,552 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "btf_helpers.h"
+
+/* Fabricate base, split BTF with references to base types needed; then create
+ * split BTF with distilled base BTF and ensure expectations are met:
+ * - only referenced base types from split BTF are present
+ * - struct/union/enum are represented as empty unless anonymous, when they
+ * are represented in full in split BTF
+ */
+static void test_distilled_base(void)
+{
+ struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_ptr(btf1, 1); /* [2] ptr to int */
+ btf__add_struct(btf1, "s1", 8); /* [3] struct s1 { */
+ btf__add_field(btf1, "f1", 2, 0, 0); /* int *f1; */
+ /* } */
+ btf__add_struct(btf1, "", 12); /* [4] struct { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ btf__add_field(btf1, "f2", 3, 32, 0); /* struct s1 f2; */
+ /* } */
+ btf__add_int(btf1, "unsigned int", 4, 0); /* [5] unsigned int */
+ btf__add_union(btf1, "u1", 12); /* [6] union u1 { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ btf__add_field(btf1, "f2", 2, 0, 0); /* int *f2; */
+ /* } */
+ btf__add_union(btf1, "", 4); /* [7] union { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+ btf__add_enum(btf1, "e1", 4); /* [8] enum e1 { */
+ btf__add_enum_value(btf1, "v1", 1); /* v1 = 1; */
+ /* } */
+ btf__add_enum(btf1, "", 4); /* [9] enum { */
+ btf__add_enum_value(btf1, "av1", 2); /* av1 = 2; */
+ /* } */
+ btf__add_enum64(btf1, "e641", 8, true); /* [10] enum64 { */
+ btf__add_enum64_value(btf1, "v1", 1024); /* v1 = 1024; */
+ /* } */
+ btf__add_enum64(btf1, "", 8, true); /* [11] enum64 { */
+ btf__add_enum64_value(btf1, "v1", 1025); /* v1 = 1025; */
+ /* } */
+ btf__add_struct(btf1, "unneeded", 4); /* [12] struct unneeded { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+ btf__add_struct(btf1, "embedded", 4); /* [13] struct embedded { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+ btf__add_func_proto(btf1, 1); /* [14] int (*)(int *p1); */
+ btf__add_func_param(btf1, "p1", 1);
+
+ btf__add_array(btf1, 1, 1, 3); /* [15] int [3]; */
+
+ btf__add_struct(btf1, "from_proto", 4); /* [16] struct from_proto { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+ btf__add_union(btf1, "u1", 4); /* [17] union u1 { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] STRUCT 's1' size=8 vlen=1\n"
+ "\t'f1' type_id=2 bits_offset=0",
+ "[4] STRUCT '(anon)' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=3 bits_offset=32",
+ "[5] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)",
+ "[6] UNION 'u1' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=2 bits_offset=0",
+ "[7] UNION '(anon)' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[8] ENUM 'e1' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'v1' val=1",
+ "[9] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'av1' val=2",
+ "[10] ENUM64 'e641' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1024",
+ "[11] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1025",
+ "[12] STRUCT 'unneeded' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[13] STRUCT 'embedded' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[14] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=1",
+ "[15] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3",
+ "[16] STRUCT 'from_proto' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[17] UNION 'u1' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0");
+
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+
+ btf__add_ptr(btf2, 3); /* [18] ptr to struct s1 */
+ /* add ptr to struct anon */
+ btf__add_ptr(btf2, 4); /* [19] ptr to struct (anon) */
+ btf__add_const(btf2, 6); /* [20] const union u1 */
+ btf__add_restrict(btf2, 7); /* [21] restrict union (anon) */
+ btf__add_volatile(btf2, 8); /* [22] volatile enum e1 */
+ btf__add_typedef(btf2, "et", 9); /* [23] typedef enum (anon) */
+ btf__add_const(btf2, 10); /* [24] const enum64 e641 */
+ btf__add_ptr(btf2, 11); /* [25] restrict enum64 (anon) */
+ btf__add_struct(btf2, "with_embedded", 4); /* [26] struct with_embedded { */
+ btf__add_field(btf2, "f1", 13, 0, 0); /* struct embedded f1; */
+ /* } */
+ btf__add_func(btf2, "fn", BTF_FUNC_STATIC, 14); /* [27] int fn(int p1); */
+ btf__add_typedef(btf2, "arraytype", 15); /* [28] typedef int[3] foo; */
+ btf__add_func_proto(btf2, 1); /* [29] int (*)(struct from proto p1); */
+ btf__add_func_param(btf2, "p1", 16);
+
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] STRUCT 's1' size=8 vlen=1\n"
+ "\t'f1' type_id=2 bits_offset=0",
+ "[4] STRUCT '(anon)' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=3 bits_offset=32",
+ "[5] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)",
+ "[6] UNION 'u1' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=2 bits_offset=0",
+ "[7] UNION '(anon)' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[8] ENUM 'e1' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'v1' val=1",
+ "[9] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'av1' val=2",
+ "[10] ENUM64 'e641' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1024",
+ "[11] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1025",
+ "[12] STRUCT 'unneeded' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[13] STRUCT 'embedded' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[14] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=1",
+ "[15] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3",
+ "[16] STRUCT 'from_proto' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[17] UNION 'u1' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[18] PTR '(anon)' type_id=3",
+ "[19] PTR '(anon)' type_id=4",
+ "[20] CONST '(anon)' type_id=6",
+ "[21] RESTRICT '(anon)' type_id=7",
+ "[22] VOLATILE '(anon)' type_id=8",
+ "[23] TYPEDEF 'et' type_id=9",
+ "[24] CONST '(anon)' type_id=10",
+ "[25] PTR '(anon)' type_id=11",
+ "[26] STRUCT 'with_embedded' size=4 vlen=1\n"
+ "\t'f1' type_id=13 bits_offset=0",
+ "[27] FUNC 'fn' type_id=14 linkage=static",
+ "[28] TYPEDEF 'arraytype' type_id=15",
+ "[29] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=16");
+
+ if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4),
+ "distilled_base") ||
+ !ASSERT_OK_PTR(btf3, "distilled_base") ||
+ !ASSERT_OK_PTR(btf4, "distilled_split") ||
+ !ASSERT_EQ(8, btf__type_cnt(btf3), "distilled_base_type_cnt"))
+ goto cleanup;
+
+ VALIDATE_RAW_BTF(
+ btf4,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] STRUCT 's1' size=8 vlen=0",
+ "[3] UNION 'u1' size=12 vlen=0",
+ "[4] ENUM 'e1' encoding=UNSIGNED size=4 vlen=0",
+ "[5] ENUM 'e641' encoding=UNSIGNED size=8 vlen=0",
+ "[6] STRUCT 'embedded' size=4 vlen=0",
+ "[7] STRUCT 'from_proto' size=4 vlen=0",
+ /* split BTF; these types should match split BTF above from 17-28, with
+ * updated type id references
+ */
+ "[8] PTR '(anon)' type_id=2",
+ "[9] PTR '(anon)' type_id=20",
+ "[10] CONST '(anon)' type_id=3",
+ "[11] RESTRICT '(anon)' type_id=21",
+ "[12] VOLATILE '(anon)' type_id=4",
+ "[13] TYPEDEF 'et' type_id=22",
+ "[14] CONST '(anon)' type_id=5",
+ "[15] PTR '(anon)' type_id=23",
+ "[16] STRUCT 'with_embedded' size=4 vlen=1\n"
+ "\t'f1' type_id=6 bits_offset=0",
+ "[17] FUNC 'fn' type_id=24 linkage=static",
+ "[18] TYPEDEF 'arraytype' type_id=25",
+ "[19] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=7",
+ /* split BTF types added from original base BTF below */
+ "[20] STRUCT '(anon)' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=2 bits_offset=32",
+ "[21] UNION '(anon)' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[22] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'av1' val=2",
+ "[23] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1025",
+ "[24] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=1",
+ "[25] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3");
+
+ if (!ASSERT_EQ(btf__relocate(btf4, btf1), 0, "relocate_split"))
+ goto cleanup;
+
+ VALIDATE_RAW_BTF(
+ btf4,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] STRUCT 's1' size=8 vlen=1\n"
+ "\t'f1' type_id=2 bits_offset=0",
+ "[4] STRUCT '(anon)' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=3 bits_offset=32",
+ "[5] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)",
+ "[6] UNION 'u1' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=2 bits_offset=0",
+ "[7] UNION '(anon)' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[8] ENUM 'e1' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'v1' val=1",
+ "[9] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'av1' val=2",
+ "[10] ENUM64 'e641' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1024",
+ "[11] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1025",
+ "[12] STRUCT 'unneeded' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[13] STRUCT 'embedded' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[14] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=1",
+ "[15] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3",
+ "[16] STRUCT 'from_proto' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[17] UNION 'u1' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[18] PTR '(anon)' type_id=3",
+ "[19] PTR '(anon)' type_id=30",
+ "[20] CONST '(anon)' type_id=6",
+ "[21] RESTRICT '(anon)' type_id=31",
+ "[22] VOLATILE '(anon)' type_id=8",
+ "[23] TYPEDEF 'et' type_id=32",
+ "[24] CONST '(anon)' type_id=10",
+ "[25] PTR '(anon)' type_id=33",
+ "[26] STRUCT 'with_embedded' size=4 vlen=1\n"
+ "\t'f1' type_id=13 bits_offset=0",
+ "[27] FUNC 'fn' type_id=34 linkage=static",
+ "[28] TYPEDEF 'arraytype' type_id=35",
+ "[29] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=16",
+ /* below here are (duplicate) anon base types added by distill
+ * process to split BTF.
+ */
+ "[30] STRUCT '(anon)' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=3 bits_offset=32",
+ "[31] UNION '(anon)' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[32] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'av1' val=2",
+ "[33] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1025",
+ "[34] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=1",
+ "[35] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3");
+
+cleanup:
+ btf__free(btf4);
+ btf__free(btf3);
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+/* ensure we can cope with multiple types with the same name in
+ * distilled base BTF. In this case because sizes are different,
+ * we can still disambiguate them.
+ */
+static void test_distilled_base_multi(void)
+{
+ struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_int(btf1, "int", 8, BTF_INT_SIGNED); /* [2] int */
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED");
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+ btf__add_ptr(btf2, 1);
+ btf__add_const(btf2, 2);
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED",
+ "[3] PTR '(anon)' type_id=1",
+ "[4] CONST '(anon)' type_id=2");
+ if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4),
+ "distilled_base") ||
+ !ASSERT_OK_PTR(btf3, "distilled_base") ||
+ !ASSERT_OK_PTR(btf4, "distilled_split") ||
+ !ASSERT_EQ(3, btf__type_cnt(btf3), "distilled_base_type_cnt"))
+ goto cleanup;
+ VALIDATE_RAW_BTF(
+ btf3,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED");
+ if (!ASSERT_EQ(btf__relocate(btf4, btf1), 0, "relocate_split"))
+ goto cleanup;
+
+ VALIDATE_RAW_BTF(
+ btf4,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED",
+ "[3] PTR '(anon)' type_id=1",
+ "[4] CONST '(anon)' type_id=2");
+
+cleanup:
+ btf__free(btf4);
+ btf__free(btf3);
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+/* If a needed type is not present in the base BTF we wish to relocate
+ * with, btf__relocate() should error our.
+ */
+static void test_distilled_base_missing_err(void)
+{
+ struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL, *btf5 = NULL;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_int(btf1, "int", 8, BTF_INT_SIGNED); /* [2] int */
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED");
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+ btf__add_ptr(btf2, 1);
+ btf__add_const(btf2, 2);
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED",
+ "[3] PTR '(anon)' type_id=1",
+ "[4] CONST '(anon)' type_id=2");
+ if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4),
+ "distilled_base") ||
+ !ASSERT_OK_PTR(btf3, "distilled_base") ||
+ !ASSERT_OK_PTR(btf4, "distilled_split") ||
+ !ASSERT_EQ(3, btf__type_cnt(btf3), "distilled_base_type_cnt"))
+ goto cleanup;
+ VALIDATE_RAW_BTF(
+ btf3,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED");
+ btf5 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf"))
+ return;
+ btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ VALIDATE_RAW_BTF(
+ btf5,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
+ ASSERT_EQ(btf__relocate(btf4, btf5), -EINVAL, "relocate_split");
+
+cleanup:
+ btf__free(btf5);
+ btf__free(btf4);
+ btf__free(btf3);
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+/* With 2 types of same size in distilled base BTF, relocation should
+ * fail as we have no means to choose between them.
+ */
+static void test_distilled_base_multi_err(void)
+{
+ struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [2] int */
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+ btf__add_ptr(btf2, 1);
+ btf__add_const(btf2, 2);
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[3] PTR '(anon)' type_id=1",
+ "[4] CONST '(anon)' type_id=2");
+ if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4),
+ "distilled_base") ||
+ !ASSERT_OK_PTR(btf3, "distilled_base") ||
+ !ASSERT_OK_PTR(btf4, "distilled_split") ||
+ !ASSERT_EQ(3, btf__type_cnt(btf3), "distilled_base_type_cnt"))
+ goto cleanup;
+ VALIDATE_RAW_BTF(
+ btf3,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
+ ASSERT_EQ(btf__relocate(btf4, btf1), -EINVAL, "relocate_split");
+cleanup:
+ btf__free(btf4);
+ btf__free(btf3);
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+/* With 2 types of same size in base BTF, relocation should
+ * fail as we have no means to choose between them.
+ */
+static void test_distilled_base_multi_err2(void)
+{
+ struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL, *btf5 = NULL;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+ btf__add_ptr(btf2, 1);
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1");
+ if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4),
+ "distilled_base") ||
+ !ASSERT_OK_PTR(btf3, "distilled_base") ||
+ !ASSERT_OK_PTR(btf4, "distilled_split") ||
+ !ASSERT_EQ(2, btf__type_cnt(btf3), "distilled_base_type_cnt"))
+ goto cleanup;
+ VALIDATE_RAW_BTF(
+ btf3,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
+ btf5 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf"))
+ return;
+ btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [2] int */
+ VALIDATE_RAW_BTF(
+ btf5,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
+ ASSERT_EQ(btf__relocate(btf4, btf5), -EINVAL, "relocate_split");
+cleanup:
+ btf__free(btf5);
+ btf__free(btf4);
+ btf__free(btf3);
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+/* create split reference BTF from vmlinux + split BTF with a few type references;
+ * ensure the resultant split reference BTF is as expected, containing only types
+ * needed to disambiguate references from split BTF.
+ */
+static void test_distilled_base_vmlinux(void)
+{
+ struct btf *split_btf = NULL, *vmlinux_btf = btf__load_vmlinux_btf();
+ struct btf *split_dist = NULL, *base_dist = NULL;
+ __s32 int_id, myint_id;
+
+ if (!ASSERT_OK_PTR(vmlinux_btf, "load_vmlinux"))
+ return;
+ int_id = btf__find_by_name_kind(vmlinux_btf, "int", BTF_KIND_INT);
+ if (!ASSERT_GT(int_id, 0, "find_int"))
+ goto cleanup;
+ split_btf = btf__new_empty_split(vmlinux_btf);
+ if (!ASSERT_OK_PTR(split_btf, "new_split"))
+ goto cleanup;
+ myint_id = btf__add_typedef(split_btf, "myint", int_id);
+ btf__add_ptr(split_btf, myint_id);
+
+ if (!ASSERT_EQ(btf__distill_base(split_btf, &base_dist, &split_dist), 0,
+ "distill_vmlinux_base"))
+ goto cleanup;
+
+ if (!ASSERT_OK_PTR(split_dist, "split_distilled") ||
+ !ASSERT_OK_PTR(base_dist, "base_dist"))
+ goto cleanup;
+ VALIDATE_RAW_BTF(
+ split_dist,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] TYPEDEF 'myint' type_id=1",
+ "[3] PTR '(anon)' type_id=2");
+
+cleanup:
+ btf__free(split_dist);
+ btf__free(base_dist);
+ btf__free(split_btf);
+ btf__free(vmlinux_btf);
+}
+
+void test_btf_distill(void)
+{
+ if (test__start_subtest("distilled_base"))
+ test_distilled_base();
+ if (test__start_subtest("distilled_base_multi"))
+ test_distilled_base_multi();
+ if (test__start_subtest("distilled_base_missing_err"))
+ test_distilled_base_missing_err();
+ if (test__start_subtest("distilled_base_multi_err"))
+ test_distilled_base_multi_err();
+ if (test__start_subtest("distilled_base_multi_err2"))
+ test_distilled_base_multi_err2();
+ if (test__start_subtest("distilled_base_vmlinux"))
+ test_distilled_base_vmlinux();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
index addf720428f7..9709c8db7275 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
@@ -32,7 +32,7 @@ static int run_test(int cgroup_fd, int server_fd, bool classid)
goto out;
}
- fd = connect_to_fd_opts(server_fd, &opts);
+ fd = connect_to_fd_opts(server_fd, SOCK_STREAM, &opts);
if (fd < 0)
err = -1;
else
@@ -52,7 +52,7 @@ void test_cgroup_v1v2(void)
server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
if (!ASSERT_GE(server_fd, 0, "server_fd"))
return;
- client_fd = connect_to_fd_opts(server_fd, &opts);
+ client_fd = connect_to_fd_opts(server_fd, SOCK_STREAM, &opts);
if (!ASSERT_GE(client_fd, 0, "client_fd")) {
close(server_fd);
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
index 596536def43d..49b1ffc9af1f 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
@@ -50,9 +50,9 @@ void serial_test_fexit_stress(void)
out:
for (i = 0; i < bpf_max_tramp_links; i++) {
- if (link_fd[i])
+ if (link_fd[i] > 0)
close(link_fd[i]);
- if (fexit_fd[i])
+ if (fexit_fd[i] > 0)
close(fexit_fd[i]);
}
free(fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c
index 284764e7179f..4ddb8a5fece8 100644
--- a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c
+++ b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c
@@ -158,15 +158,13 @@ static int send_frags6(int client)
void test_bpf_ip_check_defrag_ok(bool ipv6)
{
+ int family = ipv6 ? AF_INET6 : AF_INET;
struct network_helper_opts rx_opts = {
.timeout_ms = 1000,
- .noconnect = true,
};
struct network_helper_opts tx_ops = {
.timeout_ms = 1000,
- .type = SOCK_RAW,
.proto = IPPROTO_RAW,
- .noconnect = true,
};
struct sockaddr_storage caddr;
struct ip_check_defrag *skel;
@@ -192,7 +190,7 @@ void test_bpf_ip_check_defrag_ok(bool ipv6)
nstoken = open_netns(NS1);
if (!ASSERT_OK_PTR(nstoken, "setns ns1"))
goto out;
- srv_fd = start_server(ipv6 ? AF_INET6 : AF_INET, SOCK_DGRAM, NULL, SERVER_PORT, 0);
+ srv_fd = start_server(family, SOCK_DGRAM, NULL, SERVER_PORT, 0);
close_netns(nstoken);
if (!ASSERT_GE(srv_fd, 0, "start_server"))
goto out;
@@ -201,18 +199,18 @@ void test_bpf_ip_check_defrag_ok(bool ipv6)
nstoken = open_netns(NS0);
if (!ASSERT_OK_PTR(nstoken, "setns ns0"))
goto out;
- client_tx_fd = connect_to_fd_opts(srv_fd, &tx_ops);
+ client_tx_fd = client_socket(family, SOCK_RAW, &tx_ops);
close_netns(nstoken);
- if (!ASSERT_GE(client_tx_fd, 0, "connect_to_fd_opts"))
+ if (!ASSERT_GE(client_tx_fd, 0, "client_socket"))
goto out;
/* Open rx socket in ns0 */
nstoken = open_netns(NS0);
if (!ASSERT_OK_PTR(nstoken, "setns ns0"))
goto out;
- client_rx_fd = connect_to_fd_opts(srv_fd, &rx_opts);
+ client_rx_fd = client_socket(family, SOCK_DGRAM, &rx_opts);
close_netns(nstoken);
- if (!ASSERT_GE(client_rx_fd, 0, "connect_to_fd_opts"))
+ if (!ASSERT_GE(client_rx_fd, 0, "client_socket"))
goto out;
/* Bind rx socket to a premeditated port */
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index 2eb71559713c..5b743212292f 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -78,6 +78,7 @@ static struct kfunc_test_params kfunc_tests[] = {
SYSCALL_TEST(kfunc_syscall_test, 0),
SYSCALL_NULL_CTX_TEST(kfunc_syscall_test_null, 0),
TC_TEST(kfunc_call_test_static_unused_arg, 0),
+ TC_TEST(kfunc_call_ctx, 0),
};
struct syscall_test_args {
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_param_nullable.c b/tools/testing/selftests/bpf/prog_tests/kfunc_param_nullable.c
new file mode 100644
index 000000000000..c8f4dcaac7c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_param_nullable.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (c) 2024 Meta Platforms, Inc */
+
+#include <test_progs.h>
+#include "test_kfunc_param_nullable.skel.h"
+
+void test_kfunc_param_nullable(void)
+{
+ RUN_TESTS(test_kfunc_param_nullable);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index 274d2e033e39..d2ca32fa3b21 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -89,13 +89,8 @@ static int start_mptcp_server(int family, const char *addr_str, __u16 port,
.timeout_ms = timeout_ms,
.proto = IPPROTO_MPTCP,
};
- struct sockaddr_storage addr;
- socklen_t addrlen;
- if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
- return -1;
-
- return start_server_addr(SOCK_STREAM, &addr, addrlen, &opts);
+ return start_server_str(family, SOCK_STREAM, addr_str, port, &opts);
}
static int verify_tsk(int map_fd, int client_fd)
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c
index bc9841144685..1af9ec1149aa 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_links.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c
@@ -9,6 +9,8 @@
#define ping_cmd "ping -q -c1 -w1 127.0.0.1 > /dev/null"
#include "test_tc_link.skel.h"
+
+#include "netlink_helpers.h"
#include "tc_helpers.h"
void serial_test_tc_links_basic(void)
@@ -1787,6 +1789,65 @@ void serial_test_tc_links_ingress(void)
test_tc_links_ingress(BPF_TCX_INGRESS, false, false);
}
+struct qdisc_req {
+ struct nlmsghdr n;
+ struct tcmsg t;
+ char buf[1024];
+};
+
+static int qdisc_replace(int ifindex, const char *kind, bool block)
+{
+ struct rtnl_handle rth = { .fd = -1 };
+ struct qdisc_req req;
+ int err;
+
+ err = rtnl_open(&rth, 0);
+ if (!ASSERT_OK(err, "open_rtnetlink"))
+ return err;
+
+ memset(&req, 0, sizeof(req));
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_REQUEST;
+ req.n.nlmsg_type = RTM_NEWQDISC;
+ req.t.tcm_family = AF_UNSPEC;
+ req.t.tcm_ifindex = ifindex;
+ req.t.tcm_parent = 0xfffffff1;
+
+ addattr_l(&req.n, sizeof(req), TCA_KIND, kind, strlen(kind) + 1);
+ if (block)
+ addattr32(&req.n, sizeof(req), TCA_INGRESS_BLOCK, 1);
+
+ err = rtnl_talk(&rth, &req.n, NULL);
+ ASSERT_OK(err, "talk_rtnetlink");
+ rtnl_close(&rth);
+ return err;
+}
+
+void serial_test_tc_links_dev_chain0(void)
+{
+ int err, ifindex;
+
+ ASSERT_OK(system("ip link add dev foo type veth peer name bar"), "add veth");
+ ifindex = if_nametoindex("foo");
+ ASSERT_NEQ(ifindex, 0, "non_zero_ifindex");
+ err = qdisc_replace(ifindex, "ingress", true);
+ if (!ASSERT_OK(err, "attaching ingress"))
+ goto cleanup;
+ ASSERT_OK(system("tc filter add block 1 matchall action skbmod swap mac"), "add block");
+ err = qdisc_replace(ifindex, "clsact", false);
+ if (!ASSERT_OK(err, "attaching clsact"))
+ goto cleanup;
+ /* Heuristic: kern_sync_rcu() alone does not work; a wait-time of ~5s
+ * triggered the issue without the fix reliably 100% of the time.
+ */
+ sleep(5);
+ ASSERT_OK(system("tc filter add dev foo ingress matchall action skbmod swap mac"), "add filter");
+cleanup:
+ ASSERT_OK(system("ip link del dev foo"), "del veth");
+ ASSERT_EQ(if_nametoindex("foo"), 0, "foo removed");
+ ASSERT_EQ(if_nametoindex("bar"), 0, "bar removed");
+}
+
static void test_tc_links_dev_mixed(int target)
{
LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
index ae93411fd582..09ca13bdf6ca 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
@@ -11,6 +11,7 @@ static int sanity_run(struct bpf_program *prog)
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
.repeat = 1,
+ .flags = BPF_F_TEST_SKB_CHECKSUM_COMPLETE,
);
prog_fd = bpf_program__fd(prog);
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
new file mode 100644
index 000000000000..871d16cb95cf
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+#include <pthread.h>
+#include <network_helpers.h>
+
+#include "timer_lockup.skel.h"
+
+static long cpu;
+static int *timer1_err;
+static int *timer2_err;
+static bool skip;
+
+volatile int k = 0;
+
+static void *timer_lockup_thread(void *arg)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1000,
+ );
+ int i, prog_fd = *(int *)arg;
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset);
+ ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset),
+ &cpuset),
+ "cpu affinity");
+
+ for (i = 0; !READ_ONCE(*timer1_err) && !READ_ONCE(*timer2_err); i++) {
+ bpf_prog_test_run_opts(prog_fd, &opts);
+ /* Skip the test if we can't reproduce the race in a reasonable
+ * amount of time.
+ */
+ if (i > 50) {
+ WRITE_ONCE(skip, true);
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+void test_timer_lockup(void)
+{
+ int timer1_prog, timer2_prog;
+ struct timer_lockup *skel;
+ pthread_t thrds[2];
+ void *ret;
+
+ skel = timer_lockup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load"))
+ return;
+
+ timer1_prog = bpf_program__fd(skel->progs.timer1_prog);
+ timer2_prog = bpf_program__fd(skel->progs.timer2_prog);
+
+ timer1_err = &skel->bss->timer1_err;
+ timer2_err = &skel->bss->timer2_err;
+
+ if (!ASSERT_OK(pthread_create(&thrds[0], NULL, timer_lockup_thread,
+ &timer1_prog),
+ "pthread_create thread1"))
+ goto out;
+ if (!ASSERT_OK(pthread_create(&thrds[1], NULL, timer_lockup_thread,
+ &timer2_prog),
+ "pthread_create thread2")) {
+ pthread_exit(&thrds[0]);
+ goto out;
+ }
+
+ pthread_join(thrds[1], &ret);
+ pthread_join(thrds[0], &ret);
+
+ if (skip) {
+ test__skip();
+ goto out;
+ }
+
+ if (*timer1_err != -EDEADLK && *timer1_err != 0)
+ ASSERT_FAIL("timer1_err bad value");
+ if (*timer2_err != -EDEADLK && *timer2_err != 0)
+ ASSERT_FAIL("timer2_err bad value");
+out:
+ timer_lockup__destroy(skel);
+ return;
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
index fe0fb0c9849a..19e68d4b3532 100644
--- a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
@@ -3,8 +3,9 @@
#include <test_progs.h>
#include "tracing_struct.skel.h"
+#include "tracing_struct_many_args.skel.h"
-static void test_fentry(void)
+static void test_struct_args(void)
{
struct tracing_struct *skel;
int err;
@@ -55,6 +56,25 @@ static void test_fentry(void)
ASSERT_EQ(skel->bss->t6, 1, "t6 ret");
+destroy_skel:
+ tracing_struct__destroy(skel);
+}
+
+static void test_struct_many_args(void)
+{
+ struct tracing_struct_many_args *skel;
+ int err;
+
+ skel = tracing_struct_many_args__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "tracing_struct_many_args__open_and_load"))
+ return;
+
+ err = tracing_struct_many_args__attach(skel);
+ if (!ASSERT_OK(err, "tracing_struct_many_args__attach"))
+ goto destroy_skel;
+
+ ASSERT_OK(trigger_module_test_read(256), "trigger_read");
+
ASSERT_EQ(skel->bss->t7_a, 16, "t7:a");
ASSERT_EQ(skel->bss->t7_b, 17, "t7:b");
ASSERT_EQ(skel->bss->t7_c, 18, "t7:c");
@@ -74,12 +94,28 @@ static void test_fentry(void)
ASSERT_EQ(skel->bss->t8_g, 23, "t8:g");
ASSERT_EQ(skel->bss->t8_ret, 156, "t8 ret");
- tracing_struct__detach(skel);
+ ASSERT_EQ(skel->bss->t9_a, 16, "t9:a");
+ ASSERT_EQ(skel->bss->t9_b, 17, "t9:b");
+ ASSERT_EQ(skel->bss->t9_c, 18, "t9:c");
+ ASSERT_EQ(skel->bss->t9_d, 19, "t9:d");
+ ASSERT_EQ(skel->bss->t9_e, 20, "t9:e");
+ ASSERT_EQ(skel->bss->t9_f, 21, "t9:f");
+ ASSERT_EQ(skel->bss->t9_g, 22, "t9:f");
+ ASSERT_EQ(skel->bss->t9_h_a, 23, "t9:h.a");
+ ASSERT_EQ(skel->bss->t9_h_b, 24, "t9:h.b");
+ ASSERT_EQ(skel->bss->t9_h_c, 25, "t9:h.c");
+ ASSERT_EQ(skel->bss->t9_h_d, 26, "t9:h.d");
+ ASSERT_EQ(skel->bss->t9_i, 27, "t9:i");
+ ASSERT_EQ(skel->bss->t9_ret, 258, "t9 ret");
+
destroy_skel:
- tracing_struct__destroy(skel);
+ tracing_struct_many_args__destroy(skel);
}
void test_tracing_struct(void)
{
- test_fentry();
+ if (test__start_subtest("struct_args"))
+ test_struct_args();
+ if (test__start_subtest("struct_many_args"))
+ test_struct_many_args();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c
new file mode 100644
index 000000000000..e1bf141d3401
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <bpf/btf.h>
+#include <linux/if_link.h>
+#include <linux/udp.h>
+#include <net/if.h>
+#include <unistd.h>
+
+#include "xdp_flowtable.skel.h"
+
+#define TX_NETNS_NAME "ns0"
+#define RX_NETNS_NAME "ns1"
+
+#define TX_NAME "v0"
+#define FORWARD_NAME "v1"
+#define RX_NAME "d0"
+
+#define TX_MAC "00:00:00:00:00:01"
+#define FORWARD_MAC "00:00:00:00:00:02"
+#define RX_MAC "00:00:00:00:00:03"
+#define DST_MAC "00:00:00:00:00:04"
+
+#define TX_ADDR "10.0.0.1"
+#define FORWARD_ADDR "10.0.0.2"
+#define RX_ADDR "20.0.0.1"
+#define DST_ADDR "20.0.0.2"
+
+#define PREFIX_LEN "8"
+#define N_PACKETS 10
+#define UDP_PORT 12345
+#define UDP_PORT_STR "12345"
+
+static int send_udp_traffic(void)
+{
+ struct sockaddr_storage addr;
+ int i, sock;
+
+ if (make_sockaddr(AF_INET, DST_ADDR, UDP_PORT, &addr, NULL))
+ return -EINVAL;
+
+ sock = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock < 0)
+ return sock;
+
+ for (i = 0; i < N_PACKETS; i++) {
+ unsigned char buf[] = { 0xaa, 0xbb, 0xcc };
+ int n;
+
+ n = sendto(sock, buf, sizeof(buf), MSG_NOSIGNAL | MSG_CONFIRM,
+ (struct sockaddr *)&addr, sizeof(addr));
+ if (n != sizeof(buf)) {
+ close(sock);
+ return -EINVAL;
+ }
+
+ usleep(50000); /* 50ms */
+ }
+ close(sock);
+
+ return 0;
+}
+
+void test_xdp_flowtable(void)
+{
+ struct xdp_flowtable *skel = NULL;
+ struct nstoken *tok = NULL;
+ int iifindex, stats_fd;
+ __u32 value, key = 0;
+ struct bpf_link *link;
+
+ if (SYS_NOFAIL("nft -v")) {
+ fprintf(stdout, "Missing required nft tool\n");
+ test__skip();
+ return;
+ }
+
+ SYS(out, "ip netns add " TX_NETNS_NAME);
+ SYS(out, "ip netns add " RX_NETNS_NAME);
+
+ tok = open_netns(RX_NETNS_NAME);
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
+
+ SYS(out, "sysctl -qw net.ipv4.conf.all.forwarding=1");
+
+ SYS(out, "ip link add " TX_NAME " type veth peer " FORWARD_NAME);
+ SYS(out, "ip link set " TX_NAME " netns " TX_NETNS_NAME);
+ SYS(out, "ip link set dev " FORWARD_NAME " address " FORWARD_MAC);
+ SYS(out,
+ "ip addr add " FORWARD_ADDR "/" PREFIX_LEN " dev " FORWARD_NAME);
+ SYS(out, "ip link set dev " FORWARD_NAME " up");
+
+ SYS(out, "ip link add " RX_NAME " type dummy");
+ SYS(out, "ip link set dev " RX_NAME " address " RX_MAC);
+ SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME);
+ SYS(out, "ip link set dev " RX_NAME " up");
+
+ /* configure the flowtable */
+ SYS(out, "nft add table ip filter");
+ SYS(out,
+ "nft add flowtable ip filter f { hook ingress priority 0\\; "
+ "devices = { " FORWARD_NAME ", " RX_NAME " }\\; }");
+ SYS(out,
+ "nft add chain ip filter forward "
+ "{ type filter hook forward priority 0\\; }");
+ SYS(out,
+ "nft add rule ip filter forward ip protocol udp th dport "
+ UDP_PORT_STR " flow add @f");
+
+ /* Avoid ARP calls */
+ SYS(out,
+ "ip -4 neigh add " DST_ADDR " lladdr " DST_MAC " dev " RX_NAME);
+
+ close_netns(tok);
+ tok = open_netns(TX_NETNS_NAME);
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
+
+ SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME);
+ SYS(out, "ip link set dev " TX_NAME " address " TX_MAC);
+ SYS(out, "ip link set dev " TX_NAME " up");
+ SYS(out, "ip route add default via " FORWARD_ADDR);
+
+ close_netns(tok);
+ tok = open_netns(RX_NETNS_NAME);
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
+
+ iifindex = if_nametoindex(FORWARD_NAME);
+ if (!ASSERT_NEQ(iifindex, 0, "iifindex"))
+ goto out;
+
+ skel = xdp_flowtable__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ goto out;
+
+ link = bpf_program__attach_xdp(skel->progs.xdp_flowtable_do_lookup,
+ iifindex);
+ if (!ASSERT_OK_PTR(link, "prog_attach"))
+ goto out;
+
+ close_netns(tok);
+ tok = open_netns(TX_NETNS_NAME);
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
+
+ if (!ASSERT_OK(send_udp_traffic(), "send udp"))
+ goto out;
+
+ close_netns(tok);
+ tok = open_netns(RX_NETNS_NAME);
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
+
+ stats_fd = bpf_map__fd(skel->maps.stats);
+ if (!ASSERT_OK(bpf_map_lookup_elem(stats_fd, &key, &value),
+ "bpf_map_update_elem stats"))
+ goto out;
+
+ ASSERT_GE(value, N_PACKETS - 2, "bpf_xdp_flow_lookup failed");
+out:
+ xdp_flowtable__destroy(skel);
+ if (tok)
+ close_netns(tok);
+ SYS_NOFAIL("ip netns del " TX_NETNS_NAME);
+ SYS_NOFAIL("ip netns del " RX_NETNS_NAME);
+}
diff --git a/tools/testing/selftests/bpf/progs/arena_atomics.c b/tools/testing/selftests/bpf/progs/arena_atomics.c
index 55f10563208d..bb0acd79d28a 100644
--- a/tools/testing/selftests/bpf/progs/arena_atomics.c
+++ b/tools/testing/selftests/bpf/progs/arena_atomics.c
@@ -25,20 +25,13 @@ bool skip_tests = true;
__u32 pid = 0;
-#undef __arena
-#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
-#define __arena __attribute__((address_space(1)))
-#else
-#define __arena SEC(".addr_space.1")
-#endif
-
-__u64 __arena add64_value = 1;
-__u64 __arena add64_result = 0;
-__u32 __arena add32_value = 1;
-__u32 __arena add32_result = 0;
-__u64 __arena add_stack_value_copy = 0;
-__u64 __arena add_stack_result = 0;
-__u64 __arena add_noreturn_value = 1;
+__u64 __arena_global add64_value = 1;
+__u64 __arena_global add64_result = 0;
+__u32 __arena_global add32_value = 1;
+__u32 __arena_global add32_result = 0;
+__u64 __arena_global add_stack_value_copy = 0;
+__u64 __arena_global add_stack_result = 0;
+__u64 __arena_global add_noreturn_value = 1;
SEC("raw_tp/sys_enter")
int add(const void *ctx)
@@ -58,13 +51,13 @@ int add(const void *ctx)
return 0;
}
-__s64 __arena sub64_value = 1;
-__s64 __arena sub64_result = 0;
-__s32 __arena sub32_value = 1;
-__s32 __arena sub32_result = 0;
-__s64 __arena sub_stack_value_copy = 0;
-__s64 __arena sub_stack_result = 0;
-__s64 __arena sub_noreturn_value = 1;
+__s64 __arena_global sub64_value = 1;
+__s64 __arena_global sub64_result = 0;
+__s32 __arena_global sub32_value = 1;
+__s32 __arena_global sub32_result = 0;
+__s64 __arena_global sub_stack_value_copy = 0;
+__s64 __arena_global sub_stack_result = 0;
+__s64 __arena_global sub_noreturn_value = 1;
SEC("raw_tp/sys_enter")
int sub(const void *ctx)
@@ -84,8 +77,8 @@ int sub(const void *ctx)
return 0;
}
-__u64 __arena and64_value = (0x110ull << 32);
-__u32 __arena and32_value = 0x110;
+__u64 __arena_global and64_value = (0x110ull << 32);
+__u32 __arena_global and32_value = 0x110;
SEC("raw_tp/sys_enter")
int and(const void *ctx)
@@ -101,8 +94,8 @@ int and(const void *ctx)
return 0;
}
-__u32 __arena or32_value = 0x110;
-__u64 __arena or64_value = (0x110ull << 32);
+__u32 __arena_global or32_value = 0x110;
+__u64 __arena_global or64_value = (0x110ull << 32);
SEC("raw_tp/sys_enter")
int or(const void *ctx)
@@ -117,8 +110,8 @@ int or(const void *ctx)
return 0;
}
-__u64 __arena xor64_value = (0x110ull << 32);
-__u32 __arena xor32_value = 0x110;
+__u64 __arena_global xor64_value = (0x110ull << 32);
+__u32 __arena_global xor32_value = 0x110;
SEC("raw_tp/sys_enter")
int xor(const void *ctx)
@@ -133,12 +126,12 @@ int xor(const void *ctx)
return 0;
}
-__u32 __arena cmpxchg32_value = 1;
-__u32 __arena cmpxchg32_result_fail = 0;
-__u32 __arena cmpxchg32_result_succeed = 0;
-__u64 __arena cmpxchg64_value = 1;
-__u64 __arena cmpxchg64_result_fail = 0;
-__u64 __arena cmpxchg64_result_succeed = 0;
+__u32 __arena_global cmpxchg32_value = 1;
+__u32 __arena_global cmpxchg32_result_fail = 0;
+__u32 __arena_global cmpxchg32_result_succeed = 0;
+__u64 __arena_global cmpxchg64_value = 1;
+__u64 __arena_global cmpxchg64_result_fail = 0;
+__u64 __arena_global cmpxchg64_result_succeed = 0;
SEC("raw_tp/sys_enter")
int cmpxchg(const void *ctx)
@@ -156,10 +149,10 @@ int cmpxchg(const void *ctx)
return 0;
}
-__u64 __arena xchg64_value = 1;
-__u64 __arena xchg64_result = 0;
-__u32 __arena xchg32_value = 1;
-__u32 __arena xchg32_result = 0;
+__u64 __arena_global xchg64_value = 1;
+__u64 __arena_global xchg64_result = 0;
+__u32 __arena_global xchg32_value = 1;
+__u32 __arena_global xchg32_result = 0;
SEC("raw_tp/sys_enter")
int xchg(const void *ctx)
@@ -176,3 +169,79 @@ int xchg(const void *ctx)
return 0;
}
+
+__u64 __arena_global uaf_sink;
+volatile __u64 __arena_global uaf_recovery_fails;
+
+SEC("syscall")
+int uaf(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#if defined(ENABLE_ATOMICS_TESTS) && !defined(__TARGET_ARCH_arm64) && \
+ !defined(__TARGET_ARCH_x86)
+ __u32 __arena *page32;
+ __u64 __arena *page64;
+ void __arena *page;
+
+ page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ bpf_arena_free_pages(&arena, page, 1);
+ uaf_recovery_fails = 24;
+
+ page32 = (__u32 __arena *)page;
+ uaf_sink += __sync_fetch_and_add(page32, 1);
+ uaf_recovery_fails -= 1;
+ __sync_add_and_fetch(page32, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_sub(page32, 1);
+ uaf_recovery_fails -= 1;
+ __sync_sub_and_fetch(page32, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_and(page32, 1);
+ uaf_recovery_fails -= 1;
+ __sync_and_and_fetch(page32, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_or(page32, 1);
+ uaf_recovery_fails -= 1;
+ __sync_or_and_fetch(page32, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_xor(page32, 1);
+ uaf_recovery_fails -= 1;
+ __sync_xor_and_fetch(page32, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_val_compare_and_swap(page32, 0, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_lock_test_and_set(page32, 1);
+ uaf_recovery_fails -= 1;
+
+ page64 = (__u64 __arena *)page;
+ uaf_sink += __sync_fetch_and_add(page64, 1);
+ uaf_recovery_fails -= 1;
+ __sync_add_and_fetch(page64, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_sub(page64, 1);
+ uaf_recovery_fails -= 1;
+ __sync_sub_and_fetch(page64, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_and(page64, 1);
+ uaf_recovery_fails -= 1;
+ __sync_and_and_fetch(page64, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_or(page64, 1);
+ uaf_recovery_fails -= 1;
+ __sync_or_and_fetch(page64, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_xor(page64, 1);
+ uaf_recovery_fails -= 1;
+ __sync_xor_and_fetch(page64, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_val_compare_and_swap(page64, 0, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_lock_test_and_set(page64, 1);
+ uaf_recovery_fails -= 1;
+#endif
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/arena_htab.c b/tools/testing/selftests/bpf/progs/arena_htab.c
index 1e6ac187a6a0..81eaa94afeb0 100644
--- a/tools/testing/selftests/bpf/progs/arena_htab.c
+++ b/tools/testing/selftests/bpf/progs/arena_htab.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#define BPF_NO_KFUNC_PROTOTYPES
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
@@ -18,25 +19,35 @@ void __arena *htab_for_user;
bool skip = false;
int zero = 0;
+char __arena arr1[100000];
+char arr2[1000];
SEC("syscall")
int arena_htab_llvm(void *ctx)
{
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) || defined(BPF_ARENA_FORCE_ASM)
struct htab __arena *htab;
+ char __arena *arr = arr1;
__u64 i;
htab = bpf_alloc(sizeof(*htab));
cast_kern(htab);
htab_init(htab);
+ cast_kern(arr);
+
/* first run. No old elems in the table */
- for (i = zero; i < 1000; i++)
+ for (i = zero; i < 100000 && can_loop; i++) {
htab_update_elem(htab, i, i);
+ arr[i] = i;
+ }
- /* should replace all elems with new ones */
- for (i = zero; i < 1000; i++)
+ /* should replace some elems with new ones */
+ for (i = zero; i < 1000 && can_loop; i++) {
htab_update_elem(htab, i, i);
+ /* Access mem to make the verifier use bounded loop logic */
+ arr2[i] = i;
+ }
cast_user(htab);
htab_for_user = htab;
#else
diff --git a/tools/testing/selftests/bpf/progs/arena_list.c b/tools/testing/selftests/bpf/progs/arena_list.c
index 93bd0600eba0..3a2ddcacbea6 100644
--- a/tools/testing/selftests/bpf/progs/arena_list.c
+++ b/tools/testing/selftests/bpf/progs/arena_list.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#define BPF_NO_KFUNC_PROTOTYPES
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index 3c9ffe340312..02f552e7fd4d 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -65,7 +65,7 @@ static void dctcp_reset(const struct tcp_sock *tp, struct bpf_dctcp *ca)
}
SEC("struct_ops")
-void BPF_PROG(dctcp_init, struct sock *sk)
+void BPF_PROG(bpf_dctcp_init, struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct bpf_dctcp *ca = inet_csk_ca(sk);
@@ -77,7 +77,7 @@ void BPF_PROG(dctcp_init, struct sock *sk)
(void *)fallback, sizeof(fallback)) == -EBUSY)
ebusy_cnt++;
- /* Switch back to myself and the recurred dctcp_init()
+ /* Switch back to myself and the recurred bpf_dctcp_init()
* will get -EBUSY for all bpf_setsockopt(TCP_CONGESTION),
* except the last "cdg" one.
*/
@@ -112,7 +112,7 @@ void BPF_PROG(dctcp_init, struct sock *sk)
}
SEC("struct_ops")
-__u32 BPF_PROG(dctcp_ssthresh, struct sock *sk)
+__u32 BPF_PROG(bpf_dctcp_ssthresh, struct sock *sk)
{
struct bpf_dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -122,7 +122,7 @@ __u32 BPF_PROG(dctcp_ssthresh, struct sock *sk)
}
SEC("struct_ops")
-void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags)
+void BPF_PROG(bpf_dctcp_update_alpha, struct sock *sk, __u32 flags)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct bpf_dctcp *ca = inet_csk_ca(sk);
@@ -161,12 +161,12 @@ static void dctcp_react_to_loss(struct sock *sk)
}
SEC("struct_ops")
-void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state)
+void BPF_PROG(bpf_dctcp_state, struct sock *sk, __u8 new_state)
{
if (new_state == TCP_CA_Recovery &&
new_state != BPF_CORE_READ_BITFIELD(inet_csk(sk), icsk_ca_state))
dctcp_react_to_loss(sk);
- /* We handle RTO in dctcp_cwnd_event to ensure that we perform only
+ /* We handle RTO in bpf_dctcp_cwnd_event to ensure that we perform only
* one loss-adjustment per RTT.
*/
}
@@ -208,7 +208,7 @@ static void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
}
SEC("struct_ops")
-void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev)
+void BPF_PROG(bpf_dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev)
{
struct bpf_dctcp *ca = inet_csk_ca(sk);
@@ -227,7 +227,7 @@ void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev)
}
SEC("struct_ops")
-__u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk)
+__u32 BPF_PROG(bpf_dctcp_cwnd_undo, struct sock *sk)
{
const struct bpf_dctcp *ca = inet_csk_ca(sk);
@@ -237,28 +237,28 @@ __u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk)
extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;
SEC("struct_ops")
-void BPF_PROG(dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
+void BPF_PROG(bpf_dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
{
tcp_reno_cong_avoid(sk, ack, acked);
}
SEC(".struct_ops")
struct tcp_congestion_ops dctcp_nouse = {
- .init = (void *)dctcp_init,
- .set_state = (void *)dctcp_state,
+ .init = (void *)bpf_dctcp_init,
+ .set_state = (void *)bpf_dctcp_state,
.flags = TCP_CONG_NEEDS_ECN,
.name = "bpf_dctcp_nouse",
};
SEC(".struct_ops")
struct tcp_congestion_ops dctcp = {
- .init = (void *)dctcp_init,
- .in_ack_event = (void *)dctcp_update_alpha,
- .cwnd_event = (void *)dctcp_cwnd_event,
- .ssthresh = (void *)dctcp_ssthresh,
- .cong_avoid = (void *)dctcp_cong_avoid,
- .undo_cwnd = (void *)dctcp_cwnd_undo,
- .set_state = (void *)dctcp_state,
+ .init = (void *)bpf_dctcp_init,
+ .in_ack_event = (void *)bpf_dctcp_update_alpha,
+ .cwnd_event = (void *)bpf_dctcp_cwnd_event,
+ .ssthresh = (void *)bpf_dctcp_ssthresh,
+ .cong_avoid = (void *)bpf_dctcp_cong_avoid,
+ .undo_cwnd = (void *)bpf_dctcp_cwnd_undo,
+ .set_state = (void *)bpf_dctcp_state,
.flags = TCP_CONG_NEEDS_ECN,
.name = "bpf_dctcp",
};
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index fb2f5513e29e..81097a3f15eb 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -7,9 +7,9 @@
*
* The test_loader sequentially loads each program in a skeleton.
* Programs could be loaded in privileged and unprivileged modes.
- * - __success, __failure, __msg imply privileged mode;
- * - __success_unpriv, __failure_unpriv, __msg_unpriv imply
- * unprivileged mode.
+ * - __success, __failure, __msg, __regex imply privileged mode;
+ * - __success_unpriv, __failure_unpriv, __msg_unpriv, __regex_unpriv
+ * imply unprivileged mode.
* If combination of privileged and unprivileged attributes is present
* both modes are used. If none are present privileged mode is implied.
*
@@ -24,6 +24,9 @@
* Multiple __msg attributes could be specified.
* __msg_unpriv Same as __msg but for unprivileged mode.
*
+ * __regex Same as __msg, but using a regular expression.
+ * __regex_unpriv Same as __msg_unpriv but using a regular expression.
+ *
* __success Expect program load success in privileged mode.
* __success_unpriv Expect program load success in unprivileged mode.
*
@@ -59,10 +62,12 @@
* __auxiliary_unpriv Same, but load program in unprivileged mode.
*/
#define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" msg)))
+#define __regex(regex) __attribute__((btf_decl_tag("comment:test_expect_regex=" regex)))
#define __failure __attribute__((btf_decl_tag("comment:test_expect_failure")))
#define __success __attribute__((btf_decl_tag("comment:test_expect_success")))
#define __description(desc) __attribute__((btf_decl_tag("comment:test_description=" desc)))
#define __msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_msg_unpriv=" msg)))
+#define __regex_unpriv(regex) __attribute__((btf_decl_tag("comment:test_expect_regex_unpriv=" regex)))
#define __failure_unpriv __attribute__((btf_decl_tag("comment:test_expect_failure_unpriv")))
#define __success_unpriv __attribute__((btf_decl_tag("comment:test_expect_success_unpriv")))
#define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl)))
@@ -135,4 +140,8 @@
/* make it look to compiler like value is read and written */
#define __sink(expr) asm volatile("" : "+g"(expr))
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
#endif
diff --git a/tools/testing/selftests/bpf/progs/crypto_bench.c b/tools/testing/selftests/bpf/progs/crypto_bench.c
index e61fe0882293..4ac956b26240 100644
--- a/tools/testing/selftests/bpf/progs/crypto_bench.c
+++ b/tools/testing/selftests/bpf/progs/crypto_bench.c
@@ -57,7 +57,7 @@ int crypto_encrypt(struct __sk_buff *skb)
{
struct __crypto_ctx_value *v;
struct bpf_crypto_ctx *ctx;
- struct bpf_dynptr psrc, pdst, iv;
+ struct bpf_dynptr psrc, pdst;
v = crypto_ctx_value_lookup();
if (!v) {
@@ -73,9 +73,8 @@ int crypto_encrypt(struct __sk_buff *skb)
bpf_dynptr_from_skb(skb, 0, &psrc);
bpf_dynptr_from_mem(dst, len, 0, &pdst);
- bpf_dynptr_from_mem(dst, 0, 0, &iv);
- status = bpf_crypto_encrypt(ctx, &psrc, &pdst, &iv);
+ status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL);
__sync_add_and_fetch(&hits, 1);
return 0;
@@ -84,7 +83,7 @@ int crypto_encrypt(struct __sk_buff *skb)
SEC("tc")
int crypto_decrypt(struct __sk_buff *skb)
{
- struct bpf_dynptr psrc, pdst, iv;
+ struct bpf_dynptr psrc, pdst;
struct __crypto_ctx_value *v;
struct bpf_crypto_ctx *ctx;
@@ -98,9 +97,8 @@ int crypto_decrypt(struct __sk_buff *skb)
bpf_dynptr_from_skb(skb, 0, &psrc);
bpf_dynptr_from_mem(dst, len, 0, &pdst);
- bpf_dynptr_from_mem(dst, 0, 0, &iv);
- status = bpf_crypto_decrypt(ctx, &psrc, &pdst, &iv);
+ status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL);
__sync_add_and_fetch(&hits, 1);
return 0;
diff --git a/tools/testing/selftests/bpf/progs/crypto_sanity.c b/tools/testing/selftests/bpf/progs/crypto_sanity.c
index 1be0a3fa5efd..645be6cddf36 100644
--- a/tools/testing/selftests/bpf/progs/crypto_sanity.c
+++ b/tools/testing/selftests/bpf/progs/crypto_sanity.c
@@ -89,7 +89,7 @@ int decrypt_sanity(struct __sk_buff *skb)
{
struct __crypto_ctx_value *v;
struct bpf_crypto_ctx *ctx;
- struct bpf_dynptr psrc, pdst, iv;
+ struct bpf_dynptr psrc, pdst;
int err;
err = skb_dynptr_validate(skb, &psrc);
@@ -114,12 +114,8 @@ int decrypt_sanity(struct __sk_buff *skb)
* production code, a percpu map should be used to store the result.
*/
bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst);
- /* iv dynptr has to be initialized with 0 size, but proper memory region
- * has to be provided anyway
- */
- bpf_dynptr_from_mem(dst, 0, 0, &iv);
- status = bpf_crypto_decrypt(ctx, &psrc, &pdst, &iv);
+ status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL);
return TC_ACT_SHOT;
}
@@ -129,7 +125,7 @@ int encrypt_sanity(struct __sk_buff *skb)
{
struct __crypto_ctx_value *v;
struct bpf_crypto_ctx *ctx;
- struct bpf_dynptr psrc, pdst, iv;
+ struct bpf_dynptr psrc, pdst;
int err;
status = 0;
@@ -156,12 +152,8 @@ int encrypt_sanity(struct __sk_buff *skb)
* production code, a percpu map should be used to store the result.
*/
bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst);
- /* iv dynptr has to be initialized with 0 size, but proper memory region
- * has to be provided anyway
- */
- bpf_dynptr_from_mem(dst, 0, 0, &iv);
- status = bpf_crypto_encrypt(ctx, &psrc, &pdst, &iv);
+ status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL);
return TC_ACT_SHOT;
}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index 66a60bfb5867..e35bc1eac52a 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -964,7 +964,7 @@ int dynptr_invalidate_slice_reinit(void *ctx)
* mem_or_null pointers.
*/
SEC("?raw_tp")
-__failure __msg("R1 type=scalar expected=percpu_ptr_")
+__failure __regex("R[0-9]+ type=scalar expected=percpu_ptr_")
int dynptr_invalidate_slice_or_null(void *ctx)
{
struct bpf_dynptr ptr;
@@ -982,7 +982,7 @@ int dynptr_invalidate_slice_or_null(void *ctx)
/* Destruction of dynptr should also any slices obtained from it */
SEC("?raw_tp")
-__failure __msg("R7 invalid mem access 'scalar'")
+__failure __regex("R[0-9]+ invalid mem access 'scalar'")
int dynptr_invalidate_slice_failure(void *ctx)
{
struct bpf_dynptr ptr1;
@@ -1069,7 +1069,7 @@ int dynptr_read_into_slot(void *ctx)
/* bpf_dynptr_slice()s are read-only and cannot be written to */
SEC("?tc")
-__failure __msg("R0 cannot write into rdonly_mem")
+__failure __regex("R[0-9]+ cannot write into rdonly_mem")
int skb_invalid_slice_write(struct __sk_buff *skb)
{
struct bpf_dynptr ptr;
@@ -1686,3 +1686,27 @@ int test_dynptr_skb_small_buff(struct __sk_buff *skb)
return !!data;
}
+
+__noinline long global_call_bpf_dynptr(const struct bpf_dynptr *dynptr)
+{
+ long ret = 0;
+ /* Avoid leaving this global function empty to avoid having the compiler
+ * optimize away the call to this global function.
+ */
+ __sink(ret);
+ return ret;
+}
+
+SEC("?raw_tp")
+__failure __msg("arg#1 expected pointer to stack or const struct bpf_dynptr")
+int test_dynptr_reg_type(void *ctx)
+{
+ struct task_struct *current = NULL;
+ /* R1 should be holding a PTR_TO_BTF_ID, so this shouldn't be a
+ * reg->type that can be passed to a function accepting a
+ * ARG_PTR_TO_DYNPTR | MEM_RDONLY. process_dynptr_func() should catch
+ * this.
+ */
+ global_call_bpf_dynptr((const struct bpf_dynptr *)current);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
index 8956eb78a226..2011cacdeb18 100644
--- a/tools/testing/selftests/bpf/progs/get_func_ip_test.c
+++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
@@ -5,13 +5,12 @@
char _license[] SEC("license") = "GPL";
-extern const void bpf_fentry_test1 __ksym;
+extern int bpf_fentry_test1(int a) __ksym;
+extern int bpf_modify_return_test(int a, int *b) __ksym;
+
extern const void bpf_fentry_test2 __ksym;
extern const void bpf_fentry_test3 __ksym;
extern const void bpf_fentry_test4 __ksym;
-extern const void bpf_modify_return_test __ksym;
-extern const void bpf_fentry_test6 __ksym;
-extern const void bpf_fentry_test7 __ksym;
extern bool CONFIG_X86_KERNEL_IBT __kconfig __weak;
diff --git a/tools/testing/selftests/bpf/progs/ip_check_defrag.c b/tools/testing/selftests/bpf/progs/ip_check_defrag.c
index 1c2b6c1616b0..645b2c9f7867 100644
--- a/tools/testing/selftests/bpf/progs/ip_check_defrag.c
+++ b/tools/testing/selftests/bpf/progs/ip_check_defrag.c
@@ -12,7 +12,7 @@
#define IP_OFFSET 0x1FFF
#define NEXTHDR_FRAGMENT 44
-extern int bpf_dynptr_from_skb(struct sk_buff *skb, __u64 flags,
+extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
struct bpf_dynptr *ptr__uninit) __ksym;
extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset,
void *buffer, uint32_t buffer__sz) __ksym;
@@ -42,7 +42,7 @@ static bool is_frag_v6(struct ipv6hdr *ip6h)
return ip6h->nexthdr == NEXTHDR_FRAGMENT;
}
-static int handle_v4(struct sk_buff *skb)
+static int handle_v4(struct __sk_buff *skb)
{
struct bpf_dynptr ptr;
u8 iph_buf[20] = {};
@@ -64,7 +64,7 @@ static int handle_v4(struct sk_buff *skb)
return NF_ACCEPT;
}
-static int handle_v6(struct sk_buff *skb)
+static int handle_v6(struct __sk_buff *skb)
{
struct bpf_dynptr ptr;
struct ipv6hdr *ip6h;
@@ -89,9 +89,9 @@ static int handle_v6(struct sk_buff *skb)
SEC("netfilter")
int defrag(struct bpf_nf_ctx *ctx)
{
- struct sk_buff *skb = ctx->skb;
+ struct __sk_buff *skb = (struct __sk_buff *)ctx->skb;
- switch (bpf_ntohs(skb->protocol)) {
+ switch (bpf_ntohs(ctx->skb->protocol)) {
case ETH_P_IP:
return handle_v4(skb);
case ETH_P_IPV6:
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index fe65e0952a1e..16bdc3e25591 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -7,8 +7,6 @@
#include "bpf_misc.h"
#include "bpf_compiler.h"
-#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0]))
-
static volatile int zero = 0;
int my_pid;
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
index cf68d1e48a0f..f502f755f567 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -177,4 +177,41 @@ int kfunc_call_test_static_unused_arg(struct __sk_buff *skb)
return actual != expected ? -1 : 0;
}
+struct ctx_val {
+ struct bpf_testmod_ctx __kptr *ctx;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct ctx_val);
+} ctx_map SEC(".maps");
+
+SEC("tc")
+int kfunc_call_ctx(struct __sk_buff *skb)
+{
+ struct bpf_testmod_ctx *ctx;
+ int err = 0;
+
+ ctx = bpf_testmod_ctx_create(&err);
+ if (!ctx && !err)
+ err = -1;
+ if (ctx) {
+ int key = 0;
+ struct ctx_val *ctx_val = bpf_map_lookup_elem(&ctx_map, &key);
+
+ /* Transfer ctx to map to be freed via implicit dtor call
+ * on cleanup.
+ */
+ if (ctx_val)
+ ctx = bpf_kptr_xchg(&ctx_val->ctx, ctx);
+ if (ctx) {
+ bpf_testmod_ctx_release(ctx);
+ err = -1;
+ }
+ }
+ return err;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_session.c b/tools/testing/selftests/bpf/progs/kprobe_multi_session.c
index bbba9eb46551..bd8b7fb7061e 100644
--- a/tools/testing/selftests/bpf/progs/kprobe_multi_session.c
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_session.c
@@ -4,8 +4,7 @@
#include <bpf/bpf_tracing.h>
#include <stdbool.h>
#include "bpf_kfuncs.h"
-
-#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0]))
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c b/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c
index d49070803e22..0835b5edf685 100644
--- a/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c
@@ -25,7 +25,7 @@ int BPF_PROG(trigger)
static int check_cookie(__u64 val, __u64 *result)
{
- long *cookie;
+ __u64 *cookie;
if (bpf_get_current_pid_tgid() >> 32 != pid)
return 1;
diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c
index f69bf3e30321..421f40835acd 100644
--- a/tools/testing/selftests/bpf/progs/linked_list.c
+++ b/tools/testing/selftests/bpf/progs/linked_list.c
@@ -4,10 +4,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#include "bpf_experimental.h"
-
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0]))
-#endif
+#include "bpf_misc.h"
#include "linked_list.h"
diff --git a/tools/testing/selftests/bpf/progs/map_percpu_stats.c b/tools/testing/selftests/bpf/progs/map_percpu_stats.c
index 10b2325c1720..63245785eb69 100644
--- a/tools/testing/selftests/bpf/progs/map_percpu_stats.c
+++ b/tools/testing/selftests/bpf/progs/map_percpu_stats.c
@@ -7,7 +7,7 @@
__u32 target_id;
-__s64 bpf_map_sum_elem_count(struct bpf_map *map) __ksym;
+__s64 bpf_map_sum_elem_count(const struct bpf_map *map) __ksym;
SEC("iter/bpf_map")
int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
diff --git a/tools/testing/selftests/bpf/progs/nested_trust_common.h b/tools/testing/selftests/bpf/progs/nested_trust_common.h
index 83d33931136e..1784b496be2e 100644
--- a/tools/testing/selftests/bpf/progs/nested_trust_common.h
+++ b/tools/testing/selftests/bpf/progs/nested_trust_common.h
@@ -7,6 +7,6 @@
#include <stdbool.h>
bool bpf_cpumask_test_cpu(unsigned int cpu, const struct cpumask *cpumask) __ksym;
-bool bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym;
+__u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym;
#endif /* _NESTED_TRUST_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
index c0062645fc68..9e067dcbf607 100644
--- a/tools/testing/selftests/bpf/progs/netif_receive_skb.c
+++ b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
@@ -5,6 +5,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
#include <errno.h>
@@ -23,10 +24,6 @@ bool skip = false;
#define BADPTR 0
#endif
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, 1);
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index 6957d9f2805e..8bd1ebd7d6af 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -9,6 +9,7 @@
#include "err.h"
#include "bpf_experimental.h"
#include "bpf_compiler.h"
+#include "bpf_misc.h"
#ifndef NULL
#define NULL 0
@@ -133,10 +134,6 @@ struct {
__uint(max_entries, 16);
} disallowed_exec_inodes SEC(".maps");
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(arr) (int)(sizeof(arr) / sizeof(arr[0]))
-#endif
-
static INLINE bool IS_ERR(const void* ptr)
{
return IS_ERR_VALUE((unsigned long)ptr);
diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c
index 3fecf1c6dfe5..b722a1e1ddef 100644
--- a/tools/testing/selftests/bpf/progs/rbtree_fail.c
+++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c
@@ -105,7 +105,7 @@ long rbtree_api_remove_unadded_node(void *ctx)
}
SEC("?tc")
-__failure __msg("Unreleased reference id=3 alloc_insn=10")
+__failure __regex("Unreleased reference id=3 alloc_insn=[0-9]+")
long rbtree_api_remove_no_drop(void *ctx)
{
struct bpf_rb_node *res;
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
index 1553b9c16aa7..f8d4b7cfcd68 100644
--- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
@@ -32,7 +32,7 @@ static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
}
SEC("?tc")
-__failure __msg("Unreleased reference id=4 alloc_insn=21")
+__failure __regex("Unreleased reference id=4 alloc_insn=[0-9]+")
long rbtree_refcounted_node_ref_escapes(void *ctx)
{
struct node_acquire *n, *m;
@@ -73,7 +73,7 @@ long refcount_acquire_maybe_null(void *ctx)
}
SEC("?tc")
-__failure __msg("Unreleased reference id=3 alloc_insn=9")
+__failure __regex("Unreleased reference id=3 alloc_insn=[0-9]+")
long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
{
struct node_acquire *n, *m;
diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c
index 7a438600ae98..60518aed1ffc 100644
--- a/tools/testing/selftests/bpf/progs/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c
@@ -6,10 +6,7 @@
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
+#include "bpf_misc.h"
extern unsigned long CONFIG_HZ __kconfig;
diff --git a/tools/testing/selftests/bpf/progs/skb_pkt_end.c b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
index db4abd2682fc..3bb4451524a1 100644
--- a/tools/testing/selftests/bpf/progs/skb_pkt_end.c
+++ b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
@@ -33,6 +33,8 @@ int main_prog(struct __sk_buff *skb)
struct iphdr *ip = NULL;
struct tcphdr *tcp;
__u8 proto = 0;
+ int urg_ptr;
+ u32 offset;
if (!(ip = get_iphdr(skb)))
goto out;
@@ -48,7 +50,14 @@ int main_prog(struct __sk_buff *skb)
if (!tcp)
goto out;
- return tcp->urg_ptr;
+ urg_ptr = tcp->urg_ptr;
+
+ /* Checksum validation part */
+ proto++;
+ offset = sizeof(struct ethhdr) + offsetof(struct iphdr, protocol);
+ bpf_skb_store_bytes(skb, offset, &proto, sizeof(proto), BPF_F_RECOMPUTE_CSUM);
+
+ return urg_ptr;
out:
return -1;
}
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
index 3494ca30fa7f..4a4e0b8d9b72 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_ma.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
@@ -7,10 +7,6 @@
#include "bpf_experimental.h"
#include "bpf_misc.h"
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-
struct generic_map_value {
void *data;
};
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
index 0289d8ce2b80..f7b330ddd007 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#define BPF_NO_KFUNC_PROTOTYPES
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
index 0e4759ab38ff..a586f087ffeb 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#define BPF_NO_KFUNC_PROTOTYPES
#include <vmlinux.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
index 2dde8e3fe4c9..e68667aec6a6 100644
--- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
@@ -45,7 +45,7 @@ int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size)
}
SEC("?lsm.s/bpf")
-__failure __msg("arg#0 expected pointer to stack or dynptr_ptr")
+__failure __msg("arg#1 expected pointer to stack or const struct bpf_dynptr")
int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size)
{
unsigned long val = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c
new file mode 100644
index 000000000000..7ac7e1de34d8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+SEC("tc")
+int kfunc_dynptr_nullable_test1(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_kfunc_dynptr_test(&data, NULL);
+
+ return 0;
+}
+
+SEC("tc")
+int kfunc_dynptr_nullable_test2(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_kfunc_dynptr_test(&data, &data);
+
+ return 0;
+}
+
+SEC("tc")
+__failure __msg("expected pointer to stack or const struct bpf_dynptr")
+int kfunc_dynptr_nullable_test3(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_kfunc_dynptr_test(NULL, &data);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
index 7f74077d6622..548660e299a5 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
@@ -10,10 +10,7 @@
#include <bpf/bpf_helpers.h>
#include "bpf_compiler.h"
-
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
+#include "bpf_misc.h"
/* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */
#define TCP_MEM_LOOPS 28 /* because 30 doesn't fit into 512 bytes of stack */
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
index 68a75436e8af..81249d119a8b 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
@@ -10,10 +10,7 @@
#include <bpf/bpf_helpers.h>
#include "bpf_compiler.h"
-
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
+#include "bpf_misc.h"
/* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */
#define TCP_MEM_LOOPS 20 /* because 30 doesn't fit into 512 bytes of stack */
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
index efc3c61f7852..bbdd08764789 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -10,6 +10,7 @@
#include <bpf/bpf_helpers.h>
#include "bpf_compiler.h"
+#include "bpf_misc.h"
/* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */
#define MAX_ULONG_STR_LEN 0xF
@@ -17,10 +18,6 @@
/* Max supported length of sysctl value string (pow2). */
#define MAX_VALUE_STR_LEN 0x40
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-
const char tcp_mem_name[] = "net/ipv4/tcp_mem";
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
index c8e4553648bf..44ee0d037f95 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
@@ -9,6 +9,7 @@
#include "bpf_kfuncs.h"
#include "test_siphash.h"
#include "test_tcp_custom_syncookie.h"
+#include "bpf_misc.h"
#define MAX_PACKET_OFF 0xffff
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h
index 29a6a53cf229..f8b1b7e68d2e 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h
@@ -7,8 +7,6 @@
#define __packed __attribute__((__packed__))
#define __force
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-
#define swap(a, b) \
do { \
typeof(a) __tmp = (a); \
diff --git a/tools/testing/selftests/bpf/progs/timer_lockup.c b/tools/testing/selftests/bpf/progs/timer_lockup.c
new file mode 100644
index 000000000000..3e520133281e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_lockup.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct elem {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} timer1_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} timer2_map SEC(".maps");
+
+int timer1_err;
+int timer2_err;
+
+static int timer_cb1(void *map, int *k, struct elem *v)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer2_map, &key);
+ if (timer)
+ timer2_err = bpf_timer_cancel(timer);
+
+ return 0;
+}
+
+static int timer_cb2(void *map, int *k, struct elem *v)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer1_map, &key);
+ if (timer)
+ timer1_err = bpf_timer_cancel(timer);
+
+ return 0;
+}
+
+SEC("tc")
+int timer1_prog(void *ctx)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer1_map, &key);
+ if (timer) {
+ bpf_timer_init(timer, &timer1_map, CLOCK_BOOTTIME);
+ bpf_timer_set_callback(timer, timer_cb1);
+ bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN);
+ }
+
+ return 0;
+}
+
+SEC("tc")
+int timer2_prog(void *ctx)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer2_map, &key);
+ if (timer) {
+ bpf_timer_init(timer, &timer2_map, CLOCK_BOOTTIME);
+ bpf_timer_set_callback(timer, timer_cb2);
+ bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tracing_struct.c b/tools/testing/selftests/bpf/progs/tracing_struct.c
index 515daef3c84b..c435a3a8328a 100644
--- a/tools/testing/selftests/bpf/progs/tracing_struct.c
+++ b/tools/testing/selftests/bpf/progs/tracing_struct.c
@@ -18,11 +18,6 @@ struct bpf_testmod_struct_arg_3 {
int b[];
};
-struct bpf_testmod_struct_arg_4 {
- u64 a;
- int b;
-};
-
long t1_a_a, t1_a_b, t1_b, t1_c, t1_ret, t1_nregs;
__u64 t1_reg0, t1_reg1, t1_reg2, t1_reg3;
long t2_a, t2_b_a, t2_b_b, t2_c, t2_ret;
@@ -30,9 +25,6 @@ long t3_a, t3_b, t3_c_a, t3_c_b, t3_ret;
long t4_a_a, t4_b, t4_c, t4_d, t4_e_a, t4_e_b, t4_ret;
long t5_ret;
int t6;
-long t7_a, t7_b, t7_c, t7_d, t7_e, t7_f_a, t7_f_b, t7_ret;
-long t8_a, t8_b, t8_c, t8_d, t8_e, t8_f_a, t8_f_b, t8_g, t8_ret;
-
SEC("fentry/bpf_testmod_test_struct_arg_1")
int BPF_PROG2(test_struct_arg_1, struct bpf_testmod_struct_arg_2, a, int, b, int, c)
@@ -138,50 +130,4 @@ int BPF_PROG2(test_struct_arg_11, struct bpf_testmod_struct_arg_3 *, a)
return 0;
}
-SEC("fentry/bpf_testmod_test_struct_arg_7")
-int BPF_PROG2(test_struct_arg_12, __u64, a, void *, b, short, c, int, d,
- void *, e, struct bpf_testmod_struct_arg_4, f)
-{
- t7_a = a;
- t7_b = (long)b;
- t7_c = c;
- t7_d = d;
- t7_e = (long)e;
- t7_f_a = f.a;
- t7_f_b = f.b;
- return 0;
-}
-
-SEC("fexit/bpf_testmod_test_struct_arg_7")
-int BPF_PROG2(test_struct_arg_13, __u64, a, void *, b, short, c, int, d,
- void *, e, struct bpf_testmod_struct_arg_4, f, int, ret)
-{
- t7_ret = ret;
- return 0;
-}
-
-SEC("fentry/bpf_testmod_test_struct_arg_8")
-int BPF_PROG2(test_struct_arg_14, __u64, a, void *, b, short, c, int, d,
- void *, e, struct bpf_testmod_struct_arg_4, f, int, g)
-{
- t8_a = a;
- t8_b = (long)b;
- t8_c = c;
- t8_d = d;
- t8_e = (long)e;
- t8_f_a = f.a;
- t8_f_b = f.b;
- t8_g = g;
- return 0;
-}
-
-SEC("fexit/bpf_testmod_test_struct_arg_8")
-int BPF_PROG2(test_struct_arg_15, __u64, a, void *, b, short, c, int, d,
- void *, e, struct bpf_testmod_struct_arg_4, f, int, g,
- int, ret)
-{
- t8_ret = ret;
- return 0;
-}
-
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tracing_struct_many_args.c b/tools/testing/selftests/bpf/progs/tracing_struct_many_args.c
new file mode 100644
index 000000000000..4742012ace06
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tracing_struct_many_args.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct bpf_testmod_struct_arg_4 {
+ u64 a;
+ int b;
+};
+
+struct bpf_testmod_struct_arg_5 {
+ char a;
+ short b;
+ int c;
+ long d;
+};
+
+long t7_a, t7_b, t7_c, t7_d, t7_e, t7_f_a, t7_f_b, t7_ret;
+long t8_a, t8_b, t8_c, t8_d, t8_e, t8_f_a, t8_f_b, t8_g, t8_ret;
+long t9_a, t9_b, t9_c, t9_d, t9_e, t9_f, t9_g, t9_h_a, t9_h_b, t9_h_c, t9_h_d, t9_i, t9_ret;
+
+SEC("fentry/bpf_testmod_test_struct_arg_7")
+int BPF_PROG2(test_struct_many_args_1, __u64, a, void *, b, short, c, int, d,
+ void *, e, struct bpf_testmod_struct_arg_4, f)
+{
+ t7_a = a;
+ t7_b = (long)b;
+ t7_c = c;
+ t7_d = d;
+ t7_e = (long)e;
+ t7_f_a = f.a;
+ t7_f_b = f.b;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_7")
+int BPF_PROG2(test_struct_many_args_2, __u64, a, void *, b, short, c, int, d,
+ void *, e, struct bpf_testmod_struct_arg_4, f, int, ret)
+{
+ t7_ret = ret;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_8")
+int BPF_PROG2(test_struct_many_args_3, __u64, a, void *, b, short, c, int, d,
+ void *, e, struct bpf_testmod_struct_arg_4, f, int, g)
+{
+ t8_a = a;
+ t8_b = (long)b;
+ t8_c = c;
+ t8_d = d;
+ t8_e = (long)e;
+ t8_f_a = f.a;
+ t8_f_b = f.b;
+ t8_g = g;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_8")
+int BPF_PROG2(test_struct_many_args_4, __u64, a, void *, b, short, c, int, d,
+ void *, e, struct bpf_testmod_struct_arg_4, f, int, g,
+ int, ret)
+{
+ t8_ret = ret;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_9")
+int BPF_PROG2(test_struct_many_args_5, __u64, a, void *, b, short, c, int, d, void *, e,
+ char, f, short, g, struct bpf_testmod_struct_arg_5, h, long, i)
+{
+ t9_a = a;
+ t9_b = (long)b;
+ t9_c = c;
+ t9_d = d;
+ t9_e = (long)e;
+ t9_f = f;
+ t9_g = g;
+ t9_h_a = h.a;
+ t9_h_b = h.b;
+ t9_h_c = h.c;
+ t9_h_d = h.d;
+ t9_i = i;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_9")
+int BPF_PROG2(test_struct_many_args_6, __u64, a, void *, b, short, c, int, d, void *, e,
+ char, f, short, g, struct bpf_testmod_struct_arg_5, h, long, i, int, ret)
+{
+ t9_ret = ret;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
index 11ab25c42c36..54de0389f878 100644
--- a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
+++ b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
@@ -221,3 +221,25 @@ int user_ringbuf_callback_reinit_dynptr_ringbuf(void *ctx)
bpf_user_ringbuf_drain(&user_ringbuf, try_reinit_dynptr_ringbuf, NULL, 0);
return 0;
}
+
+__noinline long global_call_bpf_dynptr_data(struct bpf_dynptr *dynptr)
+{
+ bpf_dynptr_data(dynptr, 0xA, 0xA);
+ return 0;
+}
+
+static long callback_adjust_bpf_dynptr_reg_off(struct bpf_dynptr *dynptr,
+ void *ctx)
+{
+ global_call_bpf_dynptr_data(dynptr += 1024);
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("dereference of modified dynptr_ptr ptr R1 off=16384 disallowed")
+int user_ringbuf_callback_const_ptr_to_dynptr_reg_off(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf,
+ callback_adjust_bpf_dynptr_reg_off, NULL, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c
index 93144ae6df74..67509c5d3982 100644
--- a/tools/testing/selftests/bpf/progs/verifier_arena.c
+++ b/tools/testing/selftests/bpf/progs/verifier_arena.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#define BPF_NO_KFUNC_PROTOTYPES
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
index ef66ea460264..6065f862d964 100644
--- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c
+++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#define BPF_NO_KFUNC_PROTOTYPES
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
index 80c737b6d340..e54bb5385bc1 100644
--- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
+++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
@@ -551,4 +551,240 @@ int cond_break5(const void *ctx)
return cnt1 > 1 && cnt2 > 1 ? 1 : 0;
}
+#define ARR2_SZ 1000
+SEC(".data.arr2")
+char arr2[ARR2_SZ];
+
+SEC("socket")
+__success __flag(BPF_F_TEST_STATE_FREQ)
+int loop_inside_iter(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, sum = 0;
+ __u64 i = 0;
+
+ bpf_iter_num_new(&it, 0, ARR2_SZ);
+ while ((v = bpf_iter_num_next(&it))) {
+ if (i < ARR2_SZ)
+ sum += arr2[i++];
+ }
+ bpf_iter_num_destroy(&it);
+ return sum;
+}
+
+SEC("socket")
+__success __flag(BPF_F_TEST_STATE_FREQ)
+int loop_inside_iter_signed(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, sum = 0;
+ long i = 0;
+
+ bpf_iter_num_new(&it, 0, ARR2_SZ);
+ while ((v = bpf_iter_num_next(&it))) {
+ if (i < ARR2_SZ && i >= 0)
+ sum += arr2[i++];
+ }
+ bpf_iter_num_destroy(&it);
+ return sum;
+}
+
+volatile const int limit = ARR2_SZ;
+
+SEC("socket")
+__success __flag(BPF_F_TEST_STATE_FREQ)
+int loop_inside_iter_volatile_limit(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, sum = 0;
+ __u64 i = 0;
+
+ bpf_iter_num_new(&it, 0, ARR2_SZ);
+ while ((v = bpf_iter_num_next(&it))) {
+ if (i < limit)
+ sum += arr2[i++];
+ }
+ bpf_iter_num_destroy(&it);
+ return sum;
+}
+
+#define ARR_LONG_SZ 1000
+
+SEC(".data.arr_long")
+long arr_long[ARR_LONG_SZ];
+
+SEC("socket")
+__success
+int test1(const void *ctx)
+{
+ long i;
+
+ for (i = 0; i < ARR_LONG_SZ && can_loop; i++)
+ arr_long[i] = i;
+ return 0;
+}
+
+SEC("socket")
+__success
+int test2(const void *ctx)
+{
+ __u64 i;
+
+ for (i = zero; i < ARR_LONG_SZ && can_loop; i++) {
+ barrier_var(i);
+ arr_long[i] = i;
+ }
+ return 0;
+}
+
+SEC(".data.arr_foo")
+struct {
+ int a;
+ int b;
+} arr_foo[ARR_LONG_SZ];
+
+SEC("socket")
+__success
+int test3(const void *ctx)
+{
+ __u64 i;
+
+ for (i = zero; i < ARR_LONG_SZ && can_loop; i++) {
+ barrier_var(i);
+ arr_foo[i].a = i;
+ arr_foo[i].b = i;
+ }
+ return 0;
+}
+
+SEC("socket")
+__success
+int test4(const void *ctx)
+{
+ long i;
+
+ for (i = zero + ARR_LONG_SZ - 1; i < ARR_LONG_SZ && i >= 0 && can_loop; i--) {
+ barrier_var(i);
+ arr_foo[i].a = i;
+ arr_foo[i].b = i;
+ }
+ return 0;
+}
+
+char buf[10] SEC(".data.buf");
+
+SEC("socket")
+__description("check add const")
+__success
+__naked void check_add_const(void)
+{
+ /* typical LLVM generated loop with may_goto */
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 > 9 goto l1_%=; \
+l0_%=: r1 = %[buf]; \
+ r2 = r0; \
+ r1 += r2; \
+ r3 = *(u8 *)(r1 +0); \
+ .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 4; /* off of l1_%=: */ \
+ .long 0; /* imm */ \
+ r0 = r2; \
+ r0 += 1; \
+ if r2 < 9 goto l0_%=; \
+ exit; \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm_ptr(buf)
+ : __clobber_common);
+}
+
+SEC("socket")
+__failure
+__msg("*(u8 *)(r7 +0) = r0")
+__msg("invalid access to map value, value_size=10 off=10 size=1")
+__naked void check_add_const_3regs(void)
+{
+ asm volatile (
+ "r6 = %[buf];"
+ "r7 = %[buf];"
+ "call %[bpf_ktime_get_ns];"
+ "r1 = r0;" /* link r0.id == r1.id == r2.id */
+ "r2 = r0;"
+ "r1 += 1;" /* r1 == r0+1 */
+ "r2 += 2;" /* r2 == r0+2 */
+ "if r0 > 8 goto 1f;" /* r0 range [0, 8] */
+ "r6 += r1;" /* r1 range [1, 9] */
+ "r7 += r2;" /* r2 range [2, 10] */
+ "*(u8 *)(r6 +0) = r0;" /* safe, within bounds */
+ "*(u8 *)(r7 +0) = r0;" /* unsafe, out of bounds */
+ "1: exit;"
+ :
+ : __imm(bpf_ktime_get_ns),
+ __imm_ptr(buf)
+ : __clobber_common);
+}
+
+SEC("socket")
+__failure
+__msg("*(u8 *)(r8 -1) = r0")
+__msg("invalid access to map value, value_size=10 off=10 size=1")
+__naked void check_add_const_3regs_2if(void)
+{
+ asm volatile (
+ "r6 = %[buf];"
+ "r7 = %[buf];"
+ "r8 = %[buf];"
+ "call %[bpf_ktime_get_ns];"
+ "if r0 < 2 goto 1f;"
+ "r1 = r0;" /* link r0.id == r1.id == r2.id */
+ "r2 = r0;"
+ "r1 += 1;" /* r1 == r0+1 */
+ "r2 += 2;" /* r2 == r0+2 */
+ "if r2 > 11 goto 1f;" /* r2 range [0, 11] -> r0 range [-2, 9]; r1 range [-1, 10] */
+ "if r0 s< 0 goto 1f;" /* r0 range [0, 9] -> r1 range [1, 10]; r2 range [2, 11]; */
+ "r6 += r0;" /* r0 range [0, 9] */
+ "r7 += r1;" /* r1 range [1, 10] */
+ "r8 += r2;" /* r2 range [2, 11] */
+ "*(u8 *)(r6 +0) = r0;" /* safe, within bounds */
+ "*(u8 *)(r7 -1) = r0;" /* safe */
+ "*(u8 *)(r8 -1) = r0;" /* unsafe */
+ "1: exit;"
+ :
+ : __imm(bpf_ktime_get_ns),
+ __imm_ptr(buf)
+ : __clobber_common);
+}
+
+SEC("socket")
+__failure
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void check_add_const_regsafe_off(void)
+{
+ asm volatile (
+ "r8 = %[buf];"
+ "call %[bpf_ktime_get_ns];"
+ "r6 = r0;"
+ "call %[bpf_ktime_get_ns];"
+ "r7 = r0;"
+ "call %[bpf_ktime_get_ns];"
+ "r1 = r0;" /* same ids for r1 and r0 */
+ "if r6 > r7 goto 1f;" /* this jump can't be predicted */
+ "r1 += 1;" /* r1.off == +1 */
+ "goto 2f;"
+ "1: r1 += 100;" /* r1.off == +100 */
+ "goto +0;" /* verify r1.off in regsafe() after this insn */
+ "2: if r0 > 8 goto 3f;" /* r0 range [0,8], r1 range either [1,9] or [100,108]*/
+ "r8 += r1;"
+ "*(u8 *)(r8 +0) = r0;" /* potentially unsafe, buf size is 10 */
+ "3: exit;"
+ :
+ : __imm(bpf_ktime_get_ns),
+ __imm_ptr(buf)
+ : __clobber_common);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c
index 65bba330e7e5..ab9f9f2620ed 100644
--- a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c
@@ -79,7 +79,7 @@ int with_invalid_ctx_access_test5(struct bpf_nf_ctx *ctx)
return NF_ACCEPT;
}
-extern int bpf_dynptr_from_skb(struct sk_buff *skb, __u64 flags,
+extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
struct bpf_dynptr *ptr__uninit) __ksym;
extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset,
void *buffer, uint32_t buffer__sz) __ksym;
@@ -90,8 +90,8 @@ __success __failure_unpriv
__retval(0)
int with_valid_ctx_access_test6(struct bpf_nf_ctx *ctx)
{
+ struct __sk_buff *skb = (struct __sk_buff *)ctx->skb;
const struct nf_hook_state *state = ctx->state;
- struct sk_buff *skb = ctx->skb;
const struct iphdr *iph;
const struct tcphdr *th;
u8 buffer_iph[20] = {};
@@ -99,7 +99,7 @@ int with_valid_ctx_access_test6(struct bpf_nf_ctx *ctx)
struct bpf_dynptr ptr;
uint8_t ihl;
- if (skb->len <= 20 || bpf_dynptr_from_skb(skb, 0, &ptr))
+ if (ctx->skb->len <= 20 || bpf_dynptr_from_skb(skb, 0, &ptr))
return NF_ACCEPT;
iph = bpf_dynptr_slice(&ptr, 0, buffer_iph, sizeof(buffer_iph));
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
index 4a58e0398e72..6a6fad625f7e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -8,8 +8,6 @@
#include "bpf_misc.h"
#include <../../../tools/include/linux/filter.h>
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
-
int vals[] SEC(".data.vals") = {1, 2, 3, 4};
__naked __noinline __used
diff --git a/tools/testing/selftests/bpf/progs/wq.c b/tools/testing/selftests/bpf/progs/wq.c
index 49e712acbf60..f8d3ae0c29ae 100644
--- a/tools/testing/selftests/bpf/progs/wq.c
+++ b/tools/testing/selftests/bpf/progs/wq.c
@@ -32,6 +32,7 @@ struct {
} hmap_malloc SEC(".maps");
struct elem {
+ int ok_offset;
struct bpf_wq w;
};
@@ -53,7 +54,7 @@ __u32 ok;
__u32 ok_sleepable;
static int test_elem_callback(void *map, int *key,
- int (callback_fn)(void *map, int *key, struct bpf_wq *wq))
+ int (callback_fn)(void *map, int *key, void *value))
{
struct elem init = {}, *val;
struct bpf_wq *wq;
@@ -70,6 +71,8 @@ static int test_elem_callback(void *map, int *key,
if (!val)
return -2;
+ val->ok_offset = *key;
+
wq = &val->w;
if (bpf_wq_init(wq, map, 0) != 0)
return -3;
@@ -84,7 +87,7 @@ static int test_elem_callback(void *map, int *key,
}
static int test_hmap_elem_callback(void *map, int *key,
- int (callback_fn)(void *map, int *key, struct bpf_wq *wq))
+ int (callback_fn)(void *map, int *key, void *value))
{
struct hmap_elem init = {}, *val;
struct bpf_wq *wq;
@@ -114,7 +117,7 @@ static int test_hmap_elem_callback(void *map, int *key,
}
/* callback for non sleepable workqueue */
-static int wq_callback(void *map, int *key, struct bpf_wq *work)
+static int wq_callback(void *map, int *key, void *value)
{
bpf_kfunc_common_test();
ok |= (1 << *key);
@@ -122,10 +125,16 @@ static int wq_callback(void *map, int *key, struct bpf_wq *work)
}
/* callback for sleepable workqueue */
-static int wq_cb_sleepable(void *map, int *key, struct bpf_wq *work)
+static int wq_cb_sleepable(void *map, int *key, void *value)
{
+ struct elem *data = (struct elem *)value;
+ int offset = data->ok_offset;
+
+ if (*key != offset)
+ return 0;
+
bpf_kfunc_call_test_sleepable();
- ok_sleepable |= (1 << *key);
+ ok_sleepable |= (1 << offset);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/wq_failures.c b/tools/testing/selftests/bpf/progs/wq_failures.c
index 4cbdb425f223..25b51a72fe0f 100644
--- a/tools/testing/selftests/bpf/progs/wq_failures.c
+++ b/tools/testing/selftests/bpf/progs/wq_failures.c
@@ -28,14 +28,14 @@ struct {
} lru SEC(".maps");
/* callback for non sleepable workqueue */
-static int wq_callback(void *map, int *key, struct bpf_wq *work)
+static int wq_callback(void *map, int *key, void *value)
{
bpf_kfunc_common_test();
return 0;
}
/* callback for sleepable workqueue */
-static int wq_cb_sleepable(void *map, int *key, struct bpf_wq *work)
+static int wq_cb_sleepable(void *map, int *key, void *value)
{
bpf_kfunc_call_test_sleepable();
return 0;
diff --git a/tools/testing/selftests/bpf/progs/xdp_flowtable.c b/tools/testing/selftests/bpf/progs/xdp_flowtable.c
new file mode 100644
index 000000000000..15209650f73b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_flowtable.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+#define BPF_NO_KFUNC_PROTOTYPES
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define ETH_P_IP 0x0800
+#define ETH_P_IPV6 0x86dd
+#define IP_MF 0x2000 /* "More Fragments" */
+#define IP_OFFSET 0x1fff /* "Fragment Offset" */
+#define AF_INET 2
+#define AF_INET6 10
+
+struct bpf_flowtable_opts___local {
+ s32 error;
+};
+
+struct flow_offload_tuple_rhash *
+bpf_xdp_flow_lookup(struct xdp_md *, struct bpf_fib_lookup *,
+ struct bpf_flowtable_opts___local *, u32) __ksym;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1);
+} stats SEC(".maps");
+
+static bool xdp_flowtable_offload_check_iphdr(struct iphdr *iph)
+{
+ /* ip fragmented traffic */
+ if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET))
+ return false;
+
+ /* ip options */
+ if (iph->ihl * 4 != sizeof(*iph))
+ return false;
+
+ if (iph->ttl <= 1)
+ return false;
+
+ return true;
+}
+
+static bool xdp_flowtable_offload_check_tcp_state(void *ports, void *data_end,
+ u8 proto)
+{
+ if (proto == IPPROTO_TCP) {
+ struct tcphdr *tcph = ports;
+
+ if (tcph + 1 > data_end)
+ return false;
+
+ if (tcph->fin || tcph->rst)
+ return false;
+ }
+
+ return true;
+}
+
+SEC("xdp.frags")
+int xdp_flowtable_do_lookup(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ struct bpf_flowtable_opts___local opts = {};
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct bpf_fib_lookup tuple = {
+ .ifindex = ctx->ingress_ifindex,
+ };
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ struct flow_ports *ports;
+ __u32 *val, key = 0;
+
+ if (eth + 1 > data_end)
+ return XDP_DROP;
+
+ switch (eth->h_proto) {
+ case bpf_htons(ETH_P_IP): {
+ struct iphdr *iph = data + sizeof(*eth);
+
+ ports = (struct flow_ports *)(iph + 1);
+ if (ports + 1 > data_end)
+ return XDP_PASS;
+
+ /* sanity check on ip header */
+ if (!xdp_flowtable_offload_check_iphdr(iph))
+ return XDP_PASS;
+
+ if (!xdp_flowtable_offload_check_tcp_state(ports, data_end,
+ iph->protocol))
+ return XDP_PASS;
+
+ tuple.family = AF_INET;
+ tuple.tos = iph->tos;
+ tuple.l4_protocol = iph->protocol;
+ tuple.tot_len = bpf_ntohs(iph->tot_len);
+ tuple.ipv4_src = iph->saddr;
+ tuple.ipv4_dst = iph->daddr;
+ tuple.sport = ports->source;
+ tuple.dport = ports->dest;
+ break;
+ }
+ case bpf_htons(ETH_P_IPV6): {
+ struct in6_addr *src = (struct in6_addr *)tuple.ipv6_src;
+ struct in6_addr *dst = (struct in6_addr *)tuple.ipv6_dst;
+ struct ipv6hdr *ip6h = data + sizeof(*eth);
+
+ ports = (struct flow_ports *)(ip6h + 1);
+ if (ports + 1 > data_end)
+ return XDP_PASS;
+
+ if (ip6h->hop_limit <= 1)
+ return XDP_PASS;
+
+ if (!xdp_flowtable_offload_check_tcp_state(ports, data_end,
+ ip6h->nexthdr))
+ return XDP_PASS;
+
+ tuple.family = AF_INET6;
+ tuple.l4_protocol = ip6h->nexthdr;
+ tuple.tot_len = bpf_ntohs(ip6h->payload_len);
+ *src = ip6h->saddr;
+ *dst = ip6h->daddr;
+ tuple.sport = ports->source;
+ tuple.dport = ports->dest;
+ break;
+ }
+ default:
+ return XDP_PASS;
+ }
+
+ tuplehash = bpf_xdp_flow_lookup(ctx, &tuple, &opts, sizeof(opts));
+ if (!tuplehash)
+ return XDP_PASS;
+
+ val = bpf_map_lookup_elem(&stats, &key);
+ if (val)
+ __sync_add_and_fetch(val, 1);
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
index 7ea9785738b5..f8f5dc9f72b8 100644
--- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+#define BPF_NO_KFUNC_PROTOTYPES
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
diff --git a/tools/testing/selftests/bpf/progs/xfrm_info.c b/tools/testing/selftests/bpf/progs/xfrm_info.c
index f6a501fbba2b..a1d9f106c3f0 100644
--- a/tools/testing/selftests/bpf/progs/xfrm_info.c
+++ b/tools/testing/selftests/bpf/progs/xfrm_info.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#define BPF_NO_KFUNC_PROTOTYPES
#include "vmlinux.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 524c38e9cde4..f14e10b0de96 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
#include <linux/capability.h>
#include <stdlib.h>
+#include <regex.h>
#include <test_progs.h>
#include <bpf/btf.h>
@@ -17,9 +18,11 @@
#define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure"
#define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success"
#define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg="
+#define TEST_TAG_EXPECT_REGEX_PFX "comment:test_expect_regex="
#define TEST_TAG_EXPECT_FAILURE_UNPRIV "comment:test_expect_failure_unpriv"
#define TEST_TAG_EXPECT_SUCCESS_UNPRIV "comment:test_expect_success_unpriv"
#define TEST_TAG_EXPECT_MSG_PFX_UNPRIV "comment:test_expect_msg_unpriv="
+#define TEST_TAG_EXPECT_REGEX_PFX_UNPRIV "comment:test_expect_regex_unpriv="
#define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level="
#define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags="
#define TEST_TAG_DESCRIPTION_PFX "comment:test_description="
@@ -46,10 +49,16 @@ enum mode {
UNPRIV = 2
};
+struct expect_msg {
+ const char *substr; /* substring match */
+ const char *regex_str; /* regex-based match */
+ regex_t regex;
+};
+
struct test_subspec {
char *name;
bool expect_failure;
- const char **expect_msgs;
+ struct expect_msg *expect_msgs;
size_t expect_msg_cnt;
int retval;
bool execute;
@@ -89,6 +98,16 @@ void test_loader_fini(struct test_loader *tester)
static void free_test_spec(struct test_spec *spec)
{
+ int i;
+
+ /* Deallocate expect_msgs arrays. */
+ for (i = 0; i < spec->priv.expect_msg_cnt; i++)
+ if (spec->priv.expect_msgs[i].regex_str)
+ regfree(&spec->priv.expect_msgs[i].regex);
+ for (i = 0; i < spec->unpriv.expect_msg_cnt; i++)
+ if (spec->unpriv.expect_msgs[i].regex_str)
+ regfree(&spec->unpriv.expect_msgs[i].regex);
+
free(spec->priv.name);
free(spec->unpriv.name);
free(spec->priv.expect_msgs);
@@ -100,18 +119,38 @@ static void free_test_spec(struct test_spec *spec)
spec->unpriv.expect_msgs = NULL;
}
-static int push_msg(const char *msg, struct test_subspec *subspec)
+static int push_msg(const char *substr, const char *regex_str, struct test_subspec *subspec)
{
void *tmp;
+ int regcomp_res;
+ char error_msg[100];
+ struct expect_msg *msg;
- tmp = realloc(subspec->expect_msgs, (1 + subspec->expect_msg_cnt) * sizeof(void *));
+ tmp = realloc(subspec->expect_msgs,
+ (1 + subspec->expect_msg_cnt) * sizeof(struct expect_msg));
if (!tmp) {
ASSERT_FAIL("failed to realloc memory for messages\n");
return -ENOMEM;
}
subspec->expect_msgs = tmp;
- subspec->expect_msgs[subspec->expect_msg_cnt++] = msg;
+ msg = &subspec->expect_msgs[subspec->expect_msg_cnt];
+
+ if (substr) {
+ msg->substr = substr;
+ msg->regex_str = NULL;
+ } else {
+ msg->regex_str = regex_str;
+ msg->substr = NULL;
+ regcomp_res = regcomp(&msg->regex, regex_str, REG_EXTENDED|REG_NEWLINE);
+ if (regcomp_res != 0) {
+ regerror(regcomp_res, &msg->regex, error_msg, sizeof(error_msg));
+ PRINT_FAIL("Regexp compilation error in '%s': '%s'\n",
+ regex_str, error_msg);
+ return -EINVAL;
+ }
+ }
+ subspec->expect_msg_cnt += 1;
return 0;
}
@@ -233,13 +272,25 @@ static int parse_test_spec(struct test_loader *tester,
spec->mode_mask |= UNPRIV;
} else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX)) {
msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1;
- err = push_msg(msg, &spec->priv);
+ err = push_msg(msg, NULL, &spec->priv);
if (err)
goto cleanup;
spec->mode_mask |= PRIV;
} else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX_UNPRIV)) {
msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX_UNPRIV) - 1;
- err = push_msg(msg, &spec->unpriv);
+ err = push_msg(msg, NULL, &spec->unpriv);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= UNPRIV;
+ } else if (str_has_pfx(s, TEST_TAG_EXPECT_REGEX_PFX)) {
+ msg = s + sizeof(TEST_TAG_EXPECT_REGEX_PFX) - 1;
+ err = push_msg(NULL, msg, &spec->priv);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= PRIV;
+ } else if (str_has_pfx(s, TEST_TAG_EXPECT_REGEX_PFX_UNPRIV)) {
+ msg = s + sizeof(TEST_TAG_EXPECT_REGEX_PFX_UNPRIV) - 1;
+ err = push_msg(NULL, msg, &spec->unpriv);
if (err)
goto cleanup;
spec->mode_mask |= UNPRIV;
@@ -337,16 +388,13 @@ static int parse_test_spec(struct test_loader *tester,
}
if (!spec->unpriv.expect_msgs) {
- size_t sz = spec->priv.expect_msg_cnt * sizeof(void *);
+ for (i = 0; i < spec->priv.expect_msg_cnt; i++) {
+ struct expect_msg *msg = &spec->priv.expect_msgs[i];
- spec->unpriv.expect_msgs = malloc(sz);
- if (!spec->unpriv.expect_msgs) {
- PRINT_FAIL("failed to allocate memory for unpriv.expect_msgs\n");
- err = -ENOMEM;
- goto cleanup;
+ err = push_msg(msg->substr, msg->regex_str, &spec->unpriv);
+ if (err)
+ goto cleanup;
}
- memcpy(spec->unpriv.expect_msgs, spec->priv.expect_msgs, sz);
- spec->unpriv.expect_msg_cnt = spec->priv.expect_msg_cnt;
}
}
@@ -402,27 +450,40 @@ static void validate_case(struct test_loader *tester,
struct bpf_program *prog,
int load_err)
{
- int i, j;
+ int i, j, err;
+ char *match;
+ regmatch_t reg_match[1];
for (i = 0; i < subspec->expect_msg_cnt; i++) {
- char *match;
- const char *expect_msg;
-
- expect_msg = subspec->expect_msgs[i];
+ struct expect_msg *msg = &subspec->expect_msgs[i];
+
+ if (msg->substr) {
+ match = strstr(tester->log_buf + tester->next_match_pos, msg->substr);
+ if (match)
+ tester->next_match_pos = match - tester->log_buf + strlen(msg->substr);
+ } else {
+ err = regexec(&msg->regex,
+ tester->log_buf + tester->next_match_pos, 1, reg_match, 0);
+ if (err == 0) {
+ match = tester->log_buf + tester->next_match_pos + reg_match[0].rm_so;
+ tester->next_match_pos += reg_match[0].rm_eo;
+ } else {
+ match = NULL;
+ }
+ }
- match = strstr(tester->log_buf + tester->next_match_pos, expect_msg);
if (!ASSERT_OK_PTR(match, "expect_msg")) {
- /* if we are in verbose mode, we've already emitted log */
if (env.verbosity == VERBOSE_NONE)
emit_verifier_log(tester->log_buf, true /*force*/);
- for (j = 0; j < i; j++)
- fprintf(stderr,
- "MATCHED MSG: '%s'\n", subspec->expect_msgs[j]);
- fprintf(stderr, "EXPECTED MSG: '%s'\n", expect_msg);
+ for (j = 0; j <= i; j++) {
+ msg = &subspec->expect_msgs[j];
+ fprintf(stderr, "%s %s: '%s'\n",
+ j < i ? "MATCHED " : "EXPECTED",
+ msg->substr ? "SUBSTR" : " REGEX",
+ msg->substr ?: msg->regex_str);
+ }
return;
}
-
- tester->next_match_pos = match - tester->log_buf + strlen(expect_msg);
}
}
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 9cba4ec844a5..3e02d7267de8 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -1936,7 +1936,6 @@ static void test_selftests_ktls(int cg_fd, struct sockmap_options *opt)
static int test_selftest(int cg_fd, struct sockmap_options *opt)
{
-
test_selftests_sockmap(cg_fd, opt);
test_selftests_sockhash(cg_fd, opt);
test_selftests_ktls(cg_fd, opt);
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
index aebc58c24dc5..3844f9b8232a 100644
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
@@ -156,10 +156,6 @@ static int v6only_false(int fd, void *opts)
int main(int argc, char **argv)
{
struct network_helper_opts opts = { 0 };
- struct sockaddr_in addr4;
- struct sockaddr_in6 addr6;
- struct sockaddr_in addr4dual;
- struct sockaddr_in6 addr6dual;
int server = -1;
int server_v6 = -1;
int server_dual = -1;
@@ -181,36 +177,17 @@ int main(int argc, char **argv)
goto err;
}
- memset(&addr4, 0, sizeof(addr4));
- addr4.sin_family = AF_INET;
- addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- addr4.sin_port = 0;
- memcpy(&addr4dual, &addr4, sizeof(addr4dual));
-
- memset(&addr6, 0, sizeof(addr6));
- addr6.sin6_family = AF_INET6;
- addr6.sin6_addr = in6addr_loopback;
- addr6.sin6_port = 0;
-
- memset(&addr6dual, 0, sizeof(addr6dual));
- addr6dual.sin6_family = AF_INET6;
- addr6dual.sin6_addr = in6addr_any;
- addr6dual.sin6_port = 0;
-
- server = start_server_addr(SOCK_STREAM, (struct sockaddr_storage *)&addr4,
- sizeof(addr4), NULL);
+ server = start_server_str(AF_INET, SOCK_STREAM, "127.0.0.1", 0, NULL);
if (server == -1)
goto err;
opts.post_socket_cb = v6only_true;
- server_v6 = start_server_addr(SOCK_STREAM, (struct sockaddr_storage *)&addr6,
- sizeof(addr6), &opts);
+ server_v6 = start_server_str(AF_INET6, SOCK_STREAM, "::1", 0, &opts);
if (server_v6 == -1)
goto err;
opts.post_socket_cb = v6only_false;
- server_dual = start_server_addr(SOCK_STREAM, (struct sockaddr_storage *)&addr6dual,
- sizeof(addr6dual), &opts);
+ server_dual = start_server_str(AF_INET6, SOCK_STREAM, "::0", 0, &opts);
if (server_dual == -1)
goto err;
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
index 0a9293a57211..90643ccc221d 100644
--- a/tools/testing/selftests/bpf/verifier/precise.c
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -39,12 +39,12 @@
.result = VERBOSE_ACCEPT,
.errstr =
"mark_precise: frame0: last_idx 26 first_idx 20\
- mark_precise: frame0: regs=r2 stack= before 25\
- mark_precise: frame0: regs=r2 stack= before 24\
- mark_precise: frame0: regs=r2 stack= before 23\
- mark_precise: frame0: regs=r2 stack= before 22\
- mark_precise: frame0: regs=r2 stack= before 20\
- mark_precise: frame0: parent state regs=r2 stack=:\
+ mark_precise: frame0: regs=r2,r9 stack= before 25\
+ mark_precise: frame0: regs=r2,r9 stack= before 24\
+ mark_precise: frame0: regs=r2,r9 stack= before 23\
+ mark_precise: frame0: regs=r2,r9 stack= before 22\
+ mark_precise: frame0: regs=r2,r9 stack= before 20\
+ mark_precise: frame0: parent state regs=r2,r9 stack=:\
mark_precise: frame0: last_idx 19 first_idx 10\
mark_precise: frame0: regs=r2,r9 stack= before 19\
mark_precise: frame0: regs=r9 stack= before 18\
@@ -100,11 +100,11 @@
.errstr =
"26: (85) call bpf_probe_read_kernel#113\
mark_precise: frame0: last_idx 26 first_idx 22\
- mark_precise: frame0: regs=r2 stack= before 25\
- mark_precise: frame0: regs=r2 stack= before 24\
- mark_precise: frame0: regs=r2 stack= before 23\
- mark_precise: frame0: regs=r2 stack= before 22\
- mark_precise: frame0: parent state regs=r2 stack=:\
+ mark_precise: frame0: regs=r2,r9 stack= before 25\
+ mark_precise: frame0: regs=r2,r9 stack= before 24\
+ mark_precise: frame0: regs=r2,r9 stack= before 23\
+ mark_precise: frame0: regs=r2,r9 stack= before 22\
+ mark_precise: frame0: parent state regs=r2,r9 stack=:\
mark_precise: frame0: last_idx 20 first_idx 20\
mark_precise: frame0: regs=r2,r9 stack= before 20\
mark_precise: frame0: parent state regs=r2,r9 stack=:\
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 2eac0895b0a1..8144fd145237 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -196,6 +196,12 @@ static int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem
};
int ret;
+ if (umem->fill_size)
+ cfg.fill_size = umem->fill_size;
+
+ if (umem->comp_size)
+ cfg.comp_size = umem->comp_size;
+
if (umem->unaligned_mode)
cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
@@ -265,6 +271,10 @@ static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_i
cfg.bind_flags |= XDP_SHARED_UMEM;
if (ifobject->mtu > MAX_ETH_PKT_SIZE)
cfg.bind_flags |= XDP_USE_SG;
+ if (umem->comp_size)
+ cfg.tx_size = umem->comp_size;
+ if (umem->fill_size)
+ cfg.rx_size = umem->fill_size;
txr = ifobject->tx_on ? &xsk->tx : NULL;
rxr = ifobject->rx_on ? &xsk->rx : NULL;
@@ -1616,7 +1626,7 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream
if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
buffers_to_fill = umem->num_frames;
else
- buffers_to_fill = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ buffers_to_fill = umem->fill_size;
ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx);
if (ret != buffers_to_fill)
@@ -1899,11 +1909,15 @@ static int testapp_validate_traffic(struct test_spec *test)
}
if (test->set_ring) {
- if (ifobj_tx->hw_ring_size_supp)
- return set_ring_size(ifobj_tx);
-
- ksft_test_result_skip("Changing HW ring size not supported.\n");
- return TEST_SKIP;
+ if (ifobj_tx->hw_ring_size_supp) {
+ if (set_ring_size(ifobj_tx)) {
+ ksft_test_result_skip("Failed to change HW ring size.\n");
+ return TEST_FAILURE;
+ }
+ } else {
+ ksft_test_result_skip("Changing HW ring size not supported.\n");
+ return TEST_SKIP;
+ }
}
xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx);
@@ -2441,7 +2455,7 @@ static int testapp_hw_sw_min_ring_size(struct test_spec *test)
static int testapp_hw_sw_max_ring_size(struct test_spec *test)
{
- u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2;
+ u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 4;
int ret;
test->set_ring = true;
@@ -2449,7 +2463,8 @@ static int testapp_hw_sw_max_ring_size(struct test_spec *test)
test->ifobj_tx->ring.tx_pending = test->ifobj_tx->ring.tx_max_pending;
test->ifobj_tx->ring.rx_pending = test->ifobj_tx->ring.rx_max_pending;
test->ifobj_rx->umem->num_frames = max_descs;
- test->ifobj_rx->xsk->rxqsize = max_descs;
+ test->ifobj_rx->umem->fill_size = max_descs;
+ test->ifobj_rx->umem->comp_size = max_descs;
test->ifobj_tx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
test->ifobj_rx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
@@ -2457,9 +2472,12 @@ static int testapp_hw_sw_max_ring_size(struct test_spec *test)
if (ret)
return ret;
- /* Set batch_size to 4095 */
- test->ifobj_tx->xsk->batch_size = max_descs - 1;
- test->ifobj_rx->xsk->batch_size = max_descs - 1;
+ /* Set batch_size to 8152 for testing, as the ice HW ignores the 3 lowest bits when
+ * updating the Rx HW tail register.
+ */
+ test->ifobj_tx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
+ test->ifobj_rx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
+ pkt_stream_replace(test, max_descs, MIN_PKT_SIZE);
return testapp_validate_traffic(test);
}
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index 906de5fab7a3..885c948c5d83 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -80,6 +80,8 @@ struct xsk_umem_info {
void *buffer;
u32 frame_size;
u32 base_addr;
+ u32 fill_size;
+ u32 comp_size;
bool unaligned_mode;
};
diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
index 68c7d40214eb..931dbc36ca43 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
@@ -5,10 +5,10 @@ import datetime
import random
from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ge, ksft_lt
from lib.py import NetDrvEpEnv
-from lib.py import NetdevFamily
+from lib.py import EthtoolFamily, NetdevFamily
from lib.py import KsftSkipEx
from lib.py import rand_port
-from lib.py import ethtool, ip, GenerateTraffic, CmdExitFailure
+from lib.py import ethtool, ip, defer, GenerateTraffic, CmdExitFailure
def _rss_key_str(key):
@@ -63,10 +63,33 @@ def _get_rx_cnts(cfg, prev=None):
return queue_stats
+def _send_traffic_check(cfg, port, name, params):
+ # params is a dict with 3 possible keys:
+ # - "target": required, which queues we expect to get iperf traffic
+ # - "empty": optional, which queues should see no traffic at all
+ # - "noise": optional, which queues we expect to see low traffic;
+ # used for queues of the main context, since some background
+ # OS activity may use those queues while we're testing
+ # the value for each is a list, or some other iterable containing queue ids.
+
+ cnts = _get_rx_cnts(cfg)
+ GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
+ cnts = _get_rx_cnts(cfg, prev=cnts)
+
+ directed = sum(cnts[i] for i in params['target'])
+
+ ksft_ge(directed, 20000, f"traffic on {name}: " + str(cnts))
+ if params.get('noise'):
+ ksft_lt(sum(cnts[i] for i in params['noise']), directed / 2,
+ "traffic on other queues:" + str(cnts))
+ if params.get('empty'):
+ ksft_eq(sum(cnts[i] for i in params['empty']), 0,
+ "traffic on inactive queues: " + str(cnts))
+
+
def test_rss_key_indir(cfg):
- """
- Test basics like updating the main RSS key and indirection table.
- """
+ """Test basics like updating the main RSS key and indirection table."""
+
if len(_get_rx_cnts(cfg)) < 2:
KsftSkipEx("Device has only one queue (or doesn't support queue stats)")
@@ -89,6 +112,7 @@ def test_rss_key_indir(cfg):
# Set the indirection table
ethtool(f"-X {cfg.ifname} equal 2")
+ reset_indir = defer(ethtool, f"-X {cfg.ifname} default")
data = get_rss(cfg)
ksft_eq(0, min(data['rss-indirection-table']))
ksft_eq(1, max(data['rss-indirection-table']))
@@ -104,7 +128,7 @@ def test_rss_key_indir(cfg):
ksft_eq(sum(cnts[2:]), 0, "traffic on unused queues: " + str(cnts))
# Restore, and check traffic gets spread again
- ethtool(f"-X {cfg.ifname} default")
+ reset_indir.exec()
cnts = _get_rx_cnts(cfg)
GenerateTraffic(cfg).wait_pkts_and_stop(20000)
@@ -113,6 +137,143 @@ def test_rss_key_indir(cfg):
ksft_lt(sum(cnts[:2]), sum(cnts[2:]), "traffic distributed: " + str(cnts))
+def test_rss_queue_reconfigure(cfg, main_ctx=True):
+ """Make sure queue changes can't override requested RSS config.
+
+ By default main RSS table should change to include all queues.
+ When user sets a specific RSS config the driver should preserve it,
+ even when queue count changes. Driver should refuse to deactivate
+ queues used in the user-set RSS config.
+ """
+
+ if not main_ctx:
+ require_ntuple(cfg)
+
+ # Start with 4 queues, an arbitrary known number.
+ try:
+ qcnt = len(_get_rx_cnts(cfg))
+ ethtool(f"-L {cfg.ifname} combined 4")
+ defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
+ except:
+ raise KsftSkipEx("Not enough queues for the test or qstat not supported")
+
+ if main_ctx:
+ ctx_id = 0
+ ctx_ref = ""
+ else:
+ ctx_id = ethtool_create(cfg, "-X", "context new")
+ ctx_ref = f"context {ctx_id}"
+ defer(ethtool, f"-X {cfg.ifname} {ctx_ref} delete")
+
+ # Indirection table should be distributing to all queues.
+ data = get_rss(cfg, context=ctx_id)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(3, max(data['rss-indirection-table']))
+
+ # Increase queues, indirection table should be distributing to all queues.
+ # It's unclear whether tables of additional contexts should be reset, too.
+ if main_ctx:
+ ethtool(f"-L {cfg.ifname} combined 5")
+ data = get_rss(cfg)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(4, max(data['rss-indirection-table']))
+ ethtool(f"-L {cfg.ifname} combined 4")
+
+ # Configure the table explicitly
+ port = rand_port()
+ ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 0 1")
+ if main_ctx:
+ other_key = 'empty'
+ defer(ethtool, f"-X {cfg.ifname} default")
+ else:
+ other_key = 'noise'
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}"
+ ntuple = ethtool_create(cfg, "-N", flow)
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple}")
+
+ _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3),
+ other_key: (1, 2) })
+
+ # We should be able to increase queues, but table should be left untouched
+ ethtool(f"-L {cfg.ifname} combined 5")
+ data = get_rss(cfg, context=ctx_id)
+ ksft_eq({0, 3}, set(data['rss-indirection-table']))
+
+ _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3),
+ other_key: (1, 2, 4) })
+
+ # Setting queue count to 3 should fail, queue 3 is used
+ try:
+ ethtool(f"-L {cfg.ifname} combined 3")
+ except CmdExitFailure:
+ pass
+ else:
+ raise Exception(f"Driver didn't prevent us from deactivating a used queue (context {ctx_id})")
+
+
+def test_rss_resize(cfg):
+ """Test resizing of the RSS table.
+
+ Some devices dynamically increase and decrease the size of the RSS
+ indirection table based on the number of enabled queues.
+ When that happens driver must maintain the balance of entries
+ (preferably duplicating the smaller table).
+ """
+
+ channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ ch_max = channels['combined-max']
+ qcnt = channels['combined-count']
+
+ if ch_max < 2:
+ raise KsftSkipEx(f"Not enough queues for the test: {ch_max}")
+
+ ethtool(f"-L {cfg.ifname} combined 2")
+ defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
+
+ ethtool(f"-X {cfg.ifname} weight 1 7")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ ethtool(f"-L {cfg.ifname} combined {ch_max}")
+ data = get_rss(cfg)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(1, max(data['rss-indirection-table']))
+
+ ksft_eq(7,
+ data['rss-indirection-table'].count(1) /
+ data['rss-indirection-table'].count(0),
+ f"Table imbalance after resize: {data['rss-indirection-table']}")
+
+
+def test_hitless_key_update(cfg):
+ """Test that flows may be rehashed without impacting traffic.
+
+ Some workloads may want to rehash the flows in response to an imbalance.
+ Most effective way to do that is changing the RSS key. Check that changing
+ the key does not cause link flaps or traffic disruption.
+
+ Disrupting traffic for key update is not a bug, but makes the key
+ update unusable for rehashing under load.
+ """
+ data = get_rss(cfg)
+ key_len = len(data['rss-hash-key'])
+
+ key = _rss_key_rand(key_len)
+
+ tgen = GenerateTraffic(cfg)
+ try:
+ errors0, carrier0 = get_drop_err_sum(cfg)
+ t0 = datetime.datetime.now()
+ ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key))
+ t1 = datetime.datetime.now()
+ errors1, carrier1 = get_drop_err_sum(cfg)
+ finally:
+ tgen.wait_pkts_and_stop(5000)
+
+ ksft_lt((t1 - t0).total_seconds(), 0.2)
+ ksft_eq(errors1 - errors1, 0)
+ ksft_eq(carrier1 - carrier0, 0)
+
+
def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None):
"""
Test separating traffic into RSS contexts.
@@ -127,64 +288,53 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None):
# Try to allocate more queues when necessary
qcnt = len(_get_rx_cnts(cfg))
- if qcnt >= 2 + 2 * ctx_cnt:
- qcnt = None
- else:
+ if qcnt < 2 + 2 * ctx_cnt:
try:
ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}")
ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}")
+ defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
except:
raise KsftSkipEx("Not enough queues for the test")
- ntuple = []
- ctx_id = []
ports = []
- try:
- # Use queues 0 and 1 for normal traffic
- ethtool(f"-X {cfg.ifname} equal 2")
- for i in range(ctx_cnt):
- want_cfg = f"start {2 + i * 2} equal 2"
- create_cfg = want_cfg if create_with_cfg else ""
-
- try:
- ctx_id.append(ethtool_create(cfg, "-X", f"context new {create_cfg}"))
- except CmdExitFailure:
- # try to carry on and skip at the end
- if i == 0:
- raise
- ksft_pr(f"Failed to create context {i + 1}, trying to test what we got")
- ctx_cnt = i
- break
-
- if not create_with_cfg:
- ethtool(f"-X {cfg.ifname} context {ctx_id[i]} {want_cfg}")
-
- # Sanity check the context we just created
- data = get_rss(cfg, ctx_id[i])
- ksft_eq(min(data['rss-indirection-table']), 2 + i * 2, "Unexpected context cfg: " + str(data))
- ksft_eq(max(data['rss-indirection-table']), 2 + i * 2 + 1, "Unexpected context cfg: " + str(data))
-
- ports.append(rand_port())
- flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id[i]}"
- ntuple.append(ethtool_create(cfg, "-N", flow))
+ # Use queues 0 and 1 for normal traffic
+ ethtool(f"-X {cfg.ifname} equal 2")
+ defer(ethtool, f"-X {cfg.ifname} default")
- for i in range(ctx_cnt):
- cnts = _get_rx_cnts(cfg)
- GenerateTraffic(cfg, port=ports[i]).wait_pkts_and_stop(20000)
- cnts = _get_rx_cnts(cfg, prev=cnts)
+ for i in range(ctx_cnt):
+ want_cfg = f"start {2 + i * 2} equal 2"
+ create_cfg = want_cfg if create_with_cfg else ""
- ksft_lt(sum(cnts[ :2]), 10000, "traffic on main context:" + str(cnts))
- ksft_ge(sum(cnts[2+i*2:4+i*2]), 20000, f"traffic on context {i}: " + str(cnts))
- ksft_eq(sum(cnts[2:2+i*2] + cnts[4+i*2:]), 0, "traffic on other contexts: " + str(cnts))
- finally:
- for nid in ntuple:
- ethtool(f"-N {cfg.ifname} delete {nid}")
- for cid in ctx_id:
- ethtool(f"-X {cfg.ifname} context {cid} delete")
- ethtool(f"-X {cfg.ifname} default")
- if qcnt:
- ethtool(f"-L {cfg.ifname} combined {qcnt}")
+ try:
+ ctx_id = ethtool_create(cfg, "-X", f"context new {create_cfg}")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+ except CmdExitFailure:
+ # try to carry on and skip at the end
+ if i == 0:
+ raise
+ ksft_pr(f"Failed to create context {i + 1}, trying to test what we got")
+ ctx_cnt = i
+ break
+
+ if not create_with_cfg:
+ ethtool(f"-X {cfg.ifname} context {ctx_id} {want_cfg}")
+
+ # Sanity check the context we just created
+ data = get_rss(cfg, ctx_id)
+ ksft_eq(min(data['rss-indirection-table']), 2 + i * 2, "Unexpected context cfg: " + str(data))
+ ksft_eq(max(data['rss-indirection-table']), 2 + i * 2 + 1, "Unexpected context cfg: " + str(data))
+
+ ports.append(rand_port())
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id}"
+ ntuple = ethtool_create(cfg, "-N", flow)
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple}")
+
+ for i in range(ctx_cnt):
+ _send_traffic_check(cfg, ports[i], f"context {i}",
+ { 'target': (2+i*2, 3+i*2),
+ 'noise': (0, 1),
+ 'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt)) })
if requested_ctx_cnt != ctx_cnt:
raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}")
@@ -202,6 +352,10 @@ def test_rss_context4_create_with_cfg(cfg):
test_rss_context(cfg, 4, create_with_cfg=True)
+def test_rss_context_queue_reconfigure(cfg):
+ test_rss_queue_reconfigure(cfg, main_ctx=False)
+
+
def test_rss_context_out_of_order(cfg, ctx_cnt=4):
"""
Test separating traffic into RSS contexts.
@@ -216,74 +370,66 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4):
# Try to allocate more queues when necessary
qcnt = len(_get_rx_cnts(cfg))
- if qcnt >= 2 + 2 * ctx_cnt:
- qcnt = None
- else:
+ if qcnt < 2 + 2 * ctx_cnt:
try:
ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}")
ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}")
+ defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
except:
raise KsftSkipEx("Not enough queues for the test")
ntuple = []
- ctx_id = []
+ ctx = []
ports = []
def remove_ctx(idx):
- ethtool(f"-N {cfg.ifname} delete {ntuple[idx]}")
+ ntuple[idx].exec()
ntuple[idx] = None
- ethtool(f"-X {cfg.ifname} context {ctx_id[idx]} delete")
- ctx_id[idx] = None
+ ctx[idx].exec()
+ ctx[idx] = None
def check_traffic():
for i in range(ctx_cnt):
- cnts = _get_rx_cnts(cfg)
- GenerateTraffic(cfg, port=ports[i]).wait_pkts_and_stop(20000)
- cnts = _get_rx_cnts(cfg, prev=cnts)
-
- if ctx_id[i] is None:
- ksft_lt(sum(cnts[ :2]), 10000, "traffic on main context:" + str(cnts))
- ksft_ge(sum(cnts[2+i*2:4+i*2]), 20000, f"traffic on context {i}: " + str(cnts))
- ksft_eq(sum(cnts[2:2+i*2] + cnts[4+i*2:]), 0, "traffic on other contexts: " + str(cnts))
+ if ctx[i]:
+ expected = {
+ 'target': (2+i*2, 3+i*2),
+ 'noise': (0, 1),
+ 'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt))
+ }
else:
- ksft_ge(sum(cnts[ :2]), 20000, "traffic on main context:" + str(cnts))
- ksft_eq(sum(cnts[2: ]), 0, "traffic on other contexts: " + str(cnts))
+ expected = {
+ 'target': (0, 1),
+ 'empty': range(2, 2+2*ctx_cnt)
+ }
- try:
- # Use queues 0 and 1 for normal traffic
- ethtool(f"-X {cfg.ifname} equal 2")
+ _send_traffic_check(cfg, ports[i], f"context {i}", expected)
- for i in range(ctx_cnt):
- ctx_id.append(ethtool_create(cfg, "-X", f"context new start {2 + i * 2} equal 2"))
+ # Use queues 0 and 1 for normal traffic
+ ethtool(f"-X {cfg.ifname} equal 2")
+ defer(ethtool, f"-X {cfg.ifname} default")
- ports.append(rand_port())
- flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id[i]}"
- ntuple.append(ethtool_create(cfg, "-N", flow))
+ for i in range(ctx_cnt):
+ ctx_id = ethtool_create(cfg, "-X", f"context new start {2 + i * 2} equal 2")
+ ctx.append(defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete"))
- check_traffic()
+ ports.append(rand_port())
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id}"
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ ntuple.append(defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}"))
- # Remove middle context
- remove_ctx(ctx_cnt // 2)
- check_traffic()
+ check_traffic()
- # Remove first context
- remove_ctx(0)
- check_traffic()
+ # Remove middle context
+ remove_ctx(ctx_cnt // 2)
+ check_traffic()
- # Remove last context
- remove_ctx(-1)
- check_traffic()
+ # Remove first context
+ remove_ctx(0)
+ check_traffic()
- finally:
- for nid in ntuple:
- if nid is not None:
- ethtool(f"-N {cfg.ifname} delete {nid}")
- for cid in ctx_id:
- if cid is not None:
- ethtool(f"-X {cfg.ifname} context {cid} delete")
- ethtool(f"-X {cfg.ifname} default")
- if qcnt:
- ethtool(f"-L {cfg.ifname} combined {qcnt}")
+ # Remove last context
+ remove_ctx(-1)
+ check_traffic()
if requested_ctx_cnt != ctx_cnt:
raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}")
@@ -298,69 +444,59 @@ def test_rss_context_overlap(cfg, other_ctx=0):
require_ntuple(cfg)
queue_cnt = len(_get_rx_cnts(cfg))
- if queue_cnt >= 4:
- queue_cnt = None
- else:
+ if queue_cnt < 4:
try:
ksft_pr(f"Increasing queue count {queue_cnt} -> 4")
ethtool(f"-L {cfg.ifname} combined 4")
+ defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}")
except:
raise KsftSkipEx("Not enough queues for the test")
- ctx_id = None
- ntuple = None
if other_ctx == 0:
ethtool(f"-X {cfg.ifname} equal 4")
+ defer(ethtool, f"-X {cfg.ifname} default")
else:
other_ctx = ethtool_create(cfg, "-X", "context new")
ethtool(f"-X {cfg.ifname} context {other_ctx} equal 4")
+ defer(ethtool, f"-X {cfg.ifname} context {other_ctx} delete")
- try:
- ctx_id = ethtool_create(cfg, "-X", "context new")
- ethtool(f"-X {cfg.ifname} context {ctx_id} start 2 equal 2")
-
- port = rand_port()
- if other_ctx:
- flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {other_ctx}"
- ntuple = ethtool_create(cfg, "-N", flow)
-
- # Test the main context
- cnts = _get_rx_cnts(cfg)
- GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
- cnts = _get_rx_cnts(cfg, prev=cnts)
-
- ksft_ge(sum(cnts[ :4]), 20000, "traffic on main context: " + str(cnts))
- ksft_ge(sum(cnts[ :2]), 7000, "traffic on main context (1/2): " + str(cnts))
- ksft_ge(sum(cnts[2:4]), 7000, "traffic on main context (2/2): " + str(cnts))
- if other_ctx == 0:
- ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts))
-
- # Now create a rule for context 1 and make sure traffic goes to a subset
- if other_ctx:
- ethtool(f"-N {cfg.ifname} delete {ntuple}")
- ntuple = None
- flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}"
- ntuple = ethtool_create(cfg, "-N", flow)
+ ctx_id = ethtool_create(cfg, "-X", "context new")
+ ethtool(f"-X {cfg.ifname} context {ctx_id} start 2 equal 2")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
- cnts = _get_rx_cnts(cfg)
- GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
- cnts = _get_rx_cnts(cfg, prev=cnts)
+ port = rand_port()
+ if other_ctx:
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {other_ctx}"
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ ntuple = defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
- ksft_lt(sum(cnts[ :2]), 7000, "traffic on main context: " + str(cnts))
- ksft_ge(sum(cnts[2:4]), 20000, "traffic on extra context: " + str(cnts))
- if other_ctx == 0:
- ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts))
- finally:
- if ntuple is not None:
- ethtool(f"-N {cfg.ifname} delete {ntuple}")
- if ctx_id:
- ethtool(f"-X {cfg.ifname} context {ctx_id} delete")
- if other_ctx == 0:
- ethtool(f"-X {cfg.ifname} default")
- else:
- ethtool(f"-X {cfg.ifname} context {other_ctx} delete")
- if queue_cnt:
- ethtool(f"-L {cfg.ifname} combined {queue_cnt}")
+ # Test the main context
+ cnts = _get_rx_cnts(cfg)
+ GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
+ cnts = _get_rx_cnts(cfg, prev=cnts)
+
+ ksft_ge(sum(cnts[ :4]), 20000, "traffic on main context: " + str(cnts))
+ ksft_ge(sum(cnts[ :2]), 7000, "traffic on main context (1/2): " + str(cnts))
+ ksft_ge(sum(cnts[2:4]), 7000, "traffic on main context (2/2): " + str(cnts))
+ if other_ctx == 0:
+ ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts))
+
+ # Now create a rule for context 1 and make sure traffic goes to a subset
+ if other_ctx:
+ ntuple.exec()
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}"
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
+
+ cnts = _get_rx_cnts(cfg)
+ GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
+ cnts = _get_rx_cnts(cfg, prev=cnts)
+
+ directed = sum(cnts[2:4])
+ ksft_lt(sum(cnts[ :2]), directed / 2, "traffic on main context: " + str(cnts))
+ ksft_ge(directed, 20000, "traffic on extra context: " + str(cnts))
+ if other_ctx == 0:
+ ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts))
def test_rss_context_overlap2(cfg):
@@ -369,10 +505,13 @@ def test_rss_context_overlap2(cfg):
def main() -> None:
with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
cfg.netdevnl = NetdevFamily()
- ksft_run([test_rss_key_indir,
+ ksft_run([test_rss_key_indir, test_rss_queue_reconfigure,
+ test_rss_resize, test_hitless_key_update,
test_rss_context, test_rss_context4, test_rss_context32,
+ test_rss_context_queue_reconfigure,
test_rss_context_overlap, test_rss_context_overlap2,
test_rss_context_out_of_order, test_rss_context4_create_with_cfg],
args=(cfg, ))
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
index 76f1ab4898d9..e1ad623146d7 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
@@ -15,6 +15,13 @@ source $lib_dir/mirror_lib.sh
source $lib_dir/mirror_gre_lib.sh
source $lib_dir/mirror_gre_topo_lib.sh
+ALL_TESTS="
+ test_keyful
+ test_soft
+ test_tos_fixed
+ test_ttl_inherit
+"
+
setup_keyful()
{
tunnel_create gt6-key ip6gretap 2001:db8:3::1 2001:db8:3::2 \
@@ -118,15 +125,15 @@ test_span_gre_ttl_inherit()
RET=0
ip link set dev $tundev type $type ttl inherit
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- fail_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ fail_test_span_gre_dir $tundev
ip link set dev $tundev type $type ttl 100
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: no offload on TTL of inherit ($tcflags)"
+ log_test "$what: no offload on TTL of inherit"
}
test_span_gre_tos_fixed()
@@ -138,61 +145,49 @@ test_span_gre_tos_fixed()
RET=0
ip link set dev $tundev type $type tos 0x10
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- fail_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ fail_test_span_gre_dir $tundev
ip link set dev $tundev type $type tos inherit
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: no offload on a fixed TOS ($tcflags)"
+ log_test "$what: no offload on a fixed TOS"
}
test_span_failable()
{
- local should_fail=$1; shift
local tundev=$1; shift
local what=$1; shift
RET=0
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- if ((should_fail)); then
- fail_test_span_gre_dir $tundev ingress
- else
- quick_test_span_gre_dir $tundev ingress
- fi
+ mirror_install $swp1 ingress $tundev "matchall"
+ fail_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: should_fail=$should_fail ($tcflags)"
+ log_test "fail $what"
}
-test_failable()
+test_keyful()
{
- local should_fail=$1; shift
-
- test_span_failable $should_fail gt6-key "mirror to keyful gretap"
- test_span_failable $should_fail gt6-soft "mirror to gretap w/ soft underlay"
+ test_span_failable gt6-key "mirror to keyful gretap"
}
-test_sw()
+test_soft()
{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- test_failable 0
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
+ test_span_failable gt6-soft "mirror to gretap w/ soft underlay"
}
-test_hw()
+test_tos_fixed()
{
- test_failable 1
-
test_span_gre_tos_fixed gt4 gretap "mirror to gretap"
test_span_gre_tos_fixed gt6 ip6gretap "mirror to ip6gretap"
+}
+
+test_ttl_inherit()
+{
test_span_gre_ttl_inherit gt4 gretap "mirror to gretap"
test_span_gre_ttl_inherit gt6 ip6gretap "mirror to ip6gretap"
}
@@ -202,16 +197,6 @@ trap cleanup EXIT
setup_prepare
setup_wait
-if ! tc_offload_check; then
- check_err 1 "Could not test offloaded functionality"
- log_test "mlxsw-specific tests for mirror to gretap"
- exit
-fi
-
-tcflags="skip_hw"
-test_sw
-
-tcflags="skip_sw"
-test_hw
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
index e5589e2fca85..d43093310e23 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
@@ -79,7 +79,7 @@ mirror_gre_tunnels_create()
cat >> $MIRROR_GRE_BATCH_FILE <<-EOF
filter add dev $swp1 ingress pref 1000 \
protocol ipv6 \
- flower $tcflags dst_ip $match_dip \
+ flower skip_sw dst_ip $match_dip \
action mirred egress mirror dev $tun
EOF
done
@@ -107,7 +107,7 @@ mirror_gre_tunnels_destroy()
done
}
-__mirror_gre_test()
+mirror_gre_test()
{
local count=$1; shift
local should_fail=$1; shift
@@ -131,20 +131,6 @@ __mirror_gre_test()
done
}
-mirror_gre_test()
-{
- local count=$1; shift
- local should_fail=$1; shift
-
- if ! tc_offload_check $TC_FLOWER_NUM_NETIFS; then
- check_err 1 "Could not test offloaded functionality"
- return
- fi
-
- tcflags="skip_sw"
- __mirror_gre_test $count $should_fail
-}
-
mirror_gre_setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index b634969cbb6f..40723a6a083f 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -66,8 +66,6 @@
#include <sys/wait.h>
#include <unistd.h>
#include <setjmp.h>
-#include <syscall.h>
-#include <linux/sched.h>
#include "kselftest.h"
@@ -82,17 +80,6 @@
# define TH_LOG_ENABLED 1
#endif
-/* Wait for the child process to end but without sharing memory mapping. */
-static inline pid_t clone3_vfork(void)
-{
- struct clone_args args = {
- .flags = CLONE_VFORK,
- .exit_signal = SIGCHLD,
- };
-
- return syscall(__NR_clone3, &args, sizeof(args));
-}
-
/**
* TH_LOG()
*
@@ -437,7 +424,7 @@ static inline pid_t clone3_vfork(void)
} \
if (setjmp(_metadata->env) == 0) { \
/* _metadata and potentially self are shared with all forks. */ \
- child = clone3_vfork(); \
+ child = fork(); \
if (child == 0) { \
fixture_name##_setup(_metadata, self, variant->data); \
/* Let setup failure terminate early. */ \
@@ -1016,7 +1003,14 @@ void __wait_for_test(struct __test_metadata *t)
.sa_flags = SA_SIGINFO,
};
struct sigaction saved_action;
- int status;
+ /*
+ * Sets status so that WIFEXITED(status) returns true and
+ * WEXITSTATUS(status) returns KSFT_FAIL. This safe default value
+ * should never be evaluated because of the waitpid(2) check and
+ * SIGALRM handling.
+ */
+ int status = KSFT_FAIL << 8;
+ int child;
if (sigaction(SIGALRM, &action, &saved_action)) {
t->exit_code = KSFT_FAIL;
@@ -1028,7 +1022,15 @@ void __wait_for_test(struct __test_metadata *t)
__active_test = t;
t->timed_out = false;
alarm(t->timeout);
- waitpid(t->pid, &status, 0);
+ child = waitpid(t->pid, &status, 0);
+ if (child == -1 && errno != EINTR) {
+ t->exit_code = KSFT_FAIL;
+ fprintf(TH_LOG_STREAM,
+ "# %s: Failed to wait for PID %d (errno: %d)\n",
+ t->name, t->pid, errno);
+ return;
+ }
+
alarm(0);
if (sigaction(SIGALRM, &saved_action, NULL)) {
t->exit_code = KSFT_FAIL;
@@ -1083,6 +1085,7 @@ void __wait_for_test(struct __test_metadata *t)
WTERMSIG(status));
}
} else {
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
"# %s: Test ended in some other way [%u]\n",
t->name,
@@ -1218,6 +1221,7 @@ void __run_test(struct __fixture_metadata *f,
struct __test_xfail *xfail;
char test_name[1024];
const char *diagnostic;
+ int child;
/* reset test struct */
t->exit_code = KSFT_PASS;
@@ -1236,15 +1240,16 @@ void __run_test(struct __fixture_metadata *f,
fflush(stdout);
fflush(stderr);
- t->pid = clone3_vfork();
- if (t->pid < 0) {
+ child = fork();
+ if (child < 0) {
ksft_print_msg("ERROR SPAWNING TEST CHILD\n");
t->exit_code = KSFT_FAIL;
- } else if (t->pid == 0) {
+ } else if (child == 0) {
setpgrp();
t->fn(t, variant);
_exit(t->exit_code);
} else {
+ t->pid = child;
__wait_for_test(t);
}
ksft_print_msg(" %4s %s\n",
diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c
index 2bfed46e0b19..d66336256580 100644
--- a/tools/testing/selftests/net/af_unix/scm_rights.c
+++ b/tools/testing/selftests/net/af_unix/scm_rights.c
@@ -14,12 +14,12 @@
FIXTURE(scm_rights)
{
- int fd[16];
+ int fd[32];
};
FIXTURE_VARIANT(scm_rights)
{
- char name[16];
+ char name[32];
int type;
int flags;
bool test_listener;
@@ -172,6 +172,8 @@ static void __create_sockets(struct __test_metadata *_metadata,
const FIXTURE_VARIANT(scm_rights) *variant,
int n)
{
+ ASSERT_LE(n * 2, sizeof(self->fd) / sizeof(self->fd[0]));
+
if (variant->test_listener)
create_listeners(_metadata, self, n);
else
@@ -283,4 +285,23 @@ TEST_F(scm_rights, cross_edge)
close_sockets(8);
}
+TEST_F(scm_rights, backtrack_from_scc)
+{
+ create_sockets(10);
+
+ send_fd(0, 1);
+ send_fd(0, 4);
+ send_fd(1, 2);
+ send_fd(2, 3);
+ send_fd(3, 1);
+
+ send_fd(5, 6);
+ send_fd(5, 9);
+ send_fd(6, 7);
+ send_fd(7, 8);
+ send_fd(8, 6);
+
+ close_sockets(10);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 9086d2015296..ff96bb7535ff 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1225,22 +1225,6 @@ trap_uninstall()
tc filter del dev $dev $direction pref 1 flower
}
-slow_path_trap_install()
-{
- # For slow-path testing, we need to install a trap to get to
- # slow path the packets that would otherwise be switched in HW.
- if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
- trap_install "$@"
- fi
-}
-
-slow_path_trap_uninstall()
-{
- if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
- trap_uninstall "$@"
- fi
-}
-
__icmp_capture_add_del()
{
local add_del=$1; shift
@@ -1257,22 +1241,34 @@ __icmp_capture_add_del()
icmp_capture_install()
{
- __icmp_capture_add_del add 100 "" "$@"
+ local tundev=$1; shift
+ local filter=$1; shift
+
+ __icmp_capture_add_del add 100 "" "$tundev" "$filter"
}
icmp_capture_uninstall()
{
- __icmp_capture_add_del del 100 "" "$@"
+ local tundev=$1; shift
+ local filter=$1; shift
+
+ __icmp_capture_add_del del 100 "" "$tundev" "$filter"
}
icmp6_capture_install()
{
- __icmp_capture_add_del add 100 v6 "$@"
+ local tundev=$1; shift
+ local filter=$1; shift
+
+ __icmp_capture_add_del add 100 v6 "$tundev" "$filter"
}
icmp6_capture_uninstall()
{
- __icmp_capture_add_del del 100 v6 "$@"
+ local tundev=$1; shift
+ local filter=$1; shift
+
+ __icmp_capture_add_del del 100 v6 "$tundev" "$filter"
}
__vlan_capture_add_del()
@@ -1290,12 +1286,18 @@ __vlan_capture_add_del()
vlan_capture_install()
{
- __vlan_capture_add_del add 100 "$@"
+ local dev=$1; shift
+ local filter=$1; shift
+
+ __vlan_capture_add_del add 100 "$dev" "$filter"
}
vlan_capture_uninstall()
{
- __vlan_capture_add_del del 100 "$@"
+ local dev=$1; shift
+ local filter=$1; shift
+
+ __vlan_capture_add_del del 100 "$dev" "$filter"
}
__dscp_capture_add_del()
@@ -1655,34 +1657,61 @@ __start_traffic()
local sip=$1; shift
local dip=$1; shift
local dmac=$1; shift
+ local -a mz_args=("$@")
$MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \
- -a own -b $dmac -t "$proto" -q "$@" &
+ -a own -b $dmac -t "$proto" -q "${mz_args[@]}" &
sleep 1
}
start_traffic_pktsize()
{
local pktsize=$1; shift
+ local h_in=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local dmac=$1; shift
+ local -a mz_args=("$@")
- __start_traffic $pktsize udp "$@"
+ __start_traffic $pktsize udp "$h_in" "$sip" "$dip" "$dmac" \
+ "${mz_args[@]}"
}
start_tcp_traffic_pktsize()
{
local pktsize=$1; shift
+ local h_in=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local dmac=$1; shift
+ local -a mz_args=("$@")
- __start_traffic $pktsize tcp "$@"
+ __start_traffic $pktsize tcp "$h_in" "$sip" "$dip" "$dmac" \
+ "${mz_args[@]}"
}
start_traffic()
{
- start_traffic_pktsize 8000 "$@"
+ local h_in=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local dmac=$1; shift
+ local -a mz_args=("$@")
+
+ start_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \
+ "${mz_args[@]}"
}
start_tcp_traffic()
{
- start_tcp_traffic_pktsize 8000 "$@"
+ local h_in=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local dmac=$1; shift
+ local -a mz_args=("$@")
+
+ start_tcp_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \
+ "${mz_args[@]}"
}
stop_traffic()
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh
index 0266443601bc..921c733ee04f 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh
@@ -74,7 +74,7 @@ test_span_gre_mac()
RET=0
- mirror_install $swp1 $direction $tundev "matchall $tcflags"
+ mirror_install $swp1 $direction $tundev "matchall"
icmp_capture_install h3-${tundev} "src_mac $src_mac dst_mac $dst_mac"
mirror_test v$h1 192.0.2.1 192.0.2.2 h3-${tundev} 100 10
@@ -82,29 +82,29 @@ test_span_gre_mac()
icmp_capture_uninstall h3-${tundev}
mirror_uninstall $swp1 $direction
- log_test "$direction $what: envelope MAC ($tcflags)"
+ log_test "$direction $what: envelope MAC"
}
test_two_spans()
{
RET=0
- mirror_install $swp1 ingress gt4 "matchall $tcflags"
- mirror_install $swp1 egress gt6 "matchall $tcflags"
- quick_test_span_gre_dir gt4 ingress
- quick_test_span_gre_dir gt6 egress
+ mirror_install $swp1 ingress gt4 "matchall"
+ mirror_install $swp1 egress gt6 "matchall"
+ quick_test_span_gre_dir gt4 8 0
+ quick_test_span_gre_dir gt6 0 8
mirror_uninstall $swp1 ingress
- fail_test_span_gre_dir gt4 ingress
- quick_test_span_gre_dir gt6 egress
+ fail_test_span_gre_dir gt4 8 0
+ quick_test_span_gre_dir gt6 0 8
- mirror_install $swp1 ingress gt4 "matchall $tcflags"
+ mirror_install $swp1 ingress gt4 "matchall"
mirror_uninstall $swp1 egress
- quick_test_span_gre_dir gt4 ingress
- fail_test_span_gre_dir gt6 egress
+ quick_test_span_gre_dir gt4 8 0
+ fail_test_span_gre_dir gt6 0 8
mirror_uninstall $swp1 ingress
- log_test "two simultaneously configured mirrors ($tcflags)"
+ log_test "two simultaneously configured mirrors"
}
test_gretap()
@@ -131,30 +131,11 @@ test_ip6gretap_mac()
test_span_gre_mac gt6 egress "mirror to ip6gretap"
}
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
index 6c257ec03756..e3cd48e18eeb 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
@@ -196,32 +196,11 @@ test_ip6gretap()
full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap w/ UL"
}
-test_all()
-{
- RET=0
-
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
index 04fd14b0a9b7..6c7bd33332c2 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
@@ -108,30 +108,11 @@ test_ip6gretap()
full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
}
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
index f35313c76fac..909ec956a5e5 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
@@ -104,30 +104,11 @@ test_ip6gretap_stp()
full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap"
}
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
index 0cf4c47a46f9..40ac9dd3aff1 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
@@ -104,30 +104,11 @@ test_ip6gretap()
full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
}
-tests()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-tests
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- tests
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
index c53148b1dc63..fe4d7c906a70 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
@@ -227,10 +227,10 @@ test_lag_slave()
RET=0
tc filter add dev $swp1 ingress pref 999 \
- proto 802.1q flower vlan_ethtype arp $tcflags \
+ proto 802.1q flower vlan_ethtype arp \
action pass
mirror_install $swp1 ingress gt4 \
- "proto 802.1q flower vlan_id 333 $tcflags"
+ "proto 802.1q flower vlan_id 333"
# Test connectivity through $up_dev when $down_dev is set down.
ip link set dev $down_dev down
@@ -239,7 +239,7 @@ test_lag_slave()
setup_wait_dev $host_dev
$ARPING -I br1 192.0.2.130 -qfc 1
sleep 2
- mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 10
+ mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 ">= 10"
# Test lack of connectivity when both slaves are down.
ip link set dev $up_dev down
@@ -252,7 +252,7 @@ test_lag_slave()
mirror_uninstall $swp1 ingress
tc filter del dev $swp1 ingress pref 999
- log_test "$what ($tcflags)"
+ log_test "$what"
}
test_mirror_gretap_first()
@@ -265,30 +265,11 @@ test_mirror_gretap_second()
test_lag_slave $h4 $swp4 $swp3 "mirror to gretap: LAG second slave"
}
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
index 5ea9d63915f7..65ae9d960c18 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
@@ -73,7 +73,7 @@ test_span_gre_ttl()
RET=0
mirror_install $swp1 ingress $tundev \
- "prot ip flower $tcflags ip_prot icmp"
+ "prot ip flower ip_prot icmp"
tc filter add dev $h3 ingress pref 77 prot $prot \
flower skip_hw ip_ttl 50 action pass
@@ -81,13 +81,13 @@ test_span_gre_ttl()
ip link set dev $tundev type $type ttl 50
sleep 2
- mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
+ mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 ">= 10"
ip link set dev $tundev type $type ttl 100
tc filter del dev $h3 ingress pref 77
mirror_uninstall $swp1 ingress
- log_test "$what: TTL change ($tcflags)"
+ log_test "$what: TTL change"
}
test_span_gre_tun_up()
@@ -98,15 +98,15 @@ test_span_gre_tun_up()
RET=0
ip link set dev $tundev down
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- fail_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ fail_test_span_gre_dir $tundev
ip link set dev $tundev up
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: tunnel down/up ($tcflags)"
+ log_test "$what: tunnel down/up"
}
test_span_gre_egress_up()
@@ -118,8 +118,8 @@ test_span_gre_egress_up()
RET=0
ip link set dev $swp3 down
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- fail_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ fail_test_span_gre_dir $tundev
# After setting the device up, wait for neighbor to get resolved so that
# we can expect mirroring to work.
@@ -127,10 +127,10 @@ test_span_gre_egress_up()
setup_wait_dev $swp3
ping -c 1 -I $swp3 $remote_ip &>/dev/null
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: egress down/up ($tcflags)"
+ log_test "$what: egress down/up"
}
test_span_gre_remote_ip()
@@ -144,14 +144,14 @@ test_span_gre_remote_ip()
RET=0
ip link set dev $tundev type $type remote $wrong_ip
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- fail_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ fail_test_span_gre_dir $tundev
ip link set dev $tundev type $type remote $correct_ip
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: remote address change ($tcflags)"
+ log_test "$what: remote address change"
}
test_span_gre_tun_del()
@@ -165,10 +165,10 @@ test_span_gre_tun_del()
RET=0
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- quick_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ quick_test_span_gre_dir $tundev
ip link del dev $tundev
- fail_test_span_gre_dir $tundev ingress
+ fail_test_span_gre_dir $tundev
tunnel_create $tundev $type $local_ip $remote_ip \
ttl 100 tos inherit $flags
@@ -176,11 +176,11 @@ test_span_gre_tun_del()
# Recreating the tunnel doesn't reestablish mirroring, so reinstall it
# and verify it works for the follow-up tests.
mirror_uninstall $swp1 ingress
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- quick_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: tunnel deleted ($tcflags)"
+ log_test "$what: tunnel deleted"
}
test_span_gre_route_del()
@@ -192,18 +192,18 @@ test_span_gre_route_del()
RET=0
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- quick_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ quick_test_span_gre_dir $tundev
ip route del $route dev $edev
- fail_test_span_gre_dir $tundev ingress
+ fail_test_span_gre_dir $tundev
ip route add $route dev $edev
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: underlay route removal ($tcflags)"
+ log_test "$what: underlay route removal"
}
test_ttl()
@@ -244,30 +244,11 @@ test_route_del()
test_span_gre_route_del gt6 $swp3 2001:db8:2::/64 "mirror to ip6gretap"
}
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
index 09389f3b9369..3a84f3ab5856 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
@@ -64,12 +64,19 @@ cleanup()
test_span_gre_dir_acl()
{
- test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
+ local tundev=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+
+ test_span_gre_dir_ips "$tundev" "$forward_type" \
+ "$backward_type" 192.0.2.3 192.0.2.4
}
fail_test_span_gre_dir_acl()
{
- fail_test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
+ local tundev=$1; shift
+
+ fail_test_span_gre_dir_ips "$tundev" 192.0.2.3 192.0.2.4
}
full_test_span_gre_dir_acl()
@@ -84,16 +91,15 @@ full_test_span_gre_dir_acl()
RET=0
mirror_install $swp1 $direction $tundev \
- "protocol ip flower $tcflags dst_ip $match_dip"
- fail_test_span_gre_dir $tundev $direction
- test_span_gre_dir_acl "$tundev" "$direction" \
- "$forward_type" "$backward_type"
+ "protocol ip flower dst_ip $match_dip"
+ fail_test_span_gre_dir $tundev
+ test_span_gre_dir_acl "$tundev" "$forward_type" "$backward_type"
mirror_uninstall $swp1 $direction
# Test lack of mirroring after ACL mirror is uninstalled.
- fail_test_span_gre_dir_acl "$tundev" "$direction"
+ fail_test_span_gre_dir_acl "$tundev"
- log_test "$direction $what ($tcflags)"
+ log_test "$direction $what"
}
test_gretap()
@@ -108,30 +114,11 @@ test_ip6gretap()
full_test_span_gre_dir_acl gt6 egress 0 8 192.0.2.3 "ACL mirror to ip6gretap"
}
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
index 9edf4cb104a8..1261e6f46e34 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
@@ -37,8 +37,14 @@
# | \ / |
# | \____________________________________________/ |
# | | |
-# | + lag2 (team) |
-# | 192.0.2.130/28 |
+# | + lag2 (team) ------> + gt4-dst (gretap) |
+# | 192.0.2.130/28 loc=192.0.2.130 |
+# | rem=192.0.2.129 |
+# | ttl=100 |
+# | tos=inherit |
+# | |
+# | |
+# | |
# | |
# +---------------------------------------------------------------------------+
@@ -50,9 +56,6 @@ ALL_TESTS="
NUM_NETIFS=6
source lib.sh
source mirror_lib.sh
-source mirror_gre_lib.sh
-
-require_command $ARPING
vlan_host_create()
{
@@ -122,16 +125,21 @@ h3_create()
{
vrf_create vrf-h3
ip link set dev vrf-h3 up
- tc qdisc add dev $h3 clsact
- tc qdisc add dev $h4 clsact
h3_create_team
+
+ tunnel_create gt4-dst gretap 192.0.2.130 192.0.2.129 \
+ ttl 100 tos inherit
+ ip link set dev gt4-dst master vrf-h3
+ tc qdisc add dev gt4-dst clsact
}
h3_destroy()
{
+ tc qdisc del dev gt4-dst clsact
+ ip link set dev gt4-dst nomaster
+ tunnel_destroy gt4-dst
+
h3_destroy_team
- tc qdisc del dev $h4 clsact
- tc qdisc del dev $h3 clsact
ip link set dev vrf-h3 down
vrf_destroy vrf-h3
}
@@ -188,18 +196,12 @@ setup_prepare()
h2_create
h3_create
switch_create
-
- trap_install $h3 ingress
- trap_install $h4 ingress
}
cleanup()
{
pre_cleanup
- trap_uninstall $h4 ingress
- trap_uninstall $h3 ingress
-
switch_destroy
h3_destroy
h2_destroy
@@ -218,7 +220,8 @@ test_lag_slave()
RET=0
mirror_install $swp1 ingress gt4 \
- "proto 802.1q flower vlan_id 333 $tcflags"
+ "proto 802.1q flower vlan_id 333"
+ vlan_capture_install gt4-dst "vlan_ethtype ipv4 ip_proto icmp type 8"
# Move $down_dev away from the team. That will prompt change in
# txability of the connected device, without changing its upness. The
@@ -226,13 +229,14 @@ test_lag_slave()
# other slave.
ip link set dev $down_dev nomaster
sleep 2
- mirror_test vrf-h1 192.0.2.1 192.0.2.18 $up_dev 1 10
+ mirror_test vrf-h1 192.0.2.1 192.0.2.18 gt4-dst 100 10
# Test lack of connectivity when neither slave is txable.
ip link set dev $up_dev nomaster
sleep 2
- mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h3 1 0
- mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h4 1 0
+ mirror_test vrf-h1 192.0.2.1 192.0.2.18 gt4-dst 100 0
+
+ vlan_capture_uninstall gt4-dst
mirror_uninstall $swp1 ingress
# Recreate H3's team device, because mlxsw, which this test is
@@ -243,7 +247,7 @@ test_lag_slave()
# Wait for ${h,swp}{3,4}.
setup_wait
- log_test "$what ($tcflags)"
+ log_test "$what"
}
test_mirror_gretap_first()
@@ -256,30 +260,11 @@ test_mirror_gretap_second()
test_lag_slave $h4 $h3 "mirror to gretap: LAG second slave"
}
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
index 0c36546e131e..20078cc55f24 100644
--- a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
@@ -5,22 +5,34 @@ source "$net_forwarding_dir/mirror_lib.sh"
quick_test_span_gre_dir_ips()
{
local tundev=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
- do_test_span_dir_ips 10 h3-$tundev "$@"
+ do_test_span_dir_ips 10 h3-$tundev "$ip1" "$ip2" \
+ "$forward_type" "$backward_type"
}
fail_test_span_gre_dir_ips()
{
local tundev=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
- do_test_span_dir_ips 0 h3-$tundev "$@"
+ do_test_span_dir_ips 0 h3-$tundev "$ip1" "$ip2"
}
test_span_gre_dir_ips()
{
local tundev=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
- test_span_dir_ips h3-$tundev "$@"
+ test_span_dir_ips h3-$tundev "$forward_type" \
+ "$backward_type" "$ip1" "$ip2"
}
full_test_span_gre_dir_ips()
@@ -35,12 +47,12 @@ full_test_span_gre_dir_ips()
RET=0
- mirror_install $swp1 $direction $tundev "matchall $tcflags"
- test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
+ mirror_install $swp1 $direction $tundev "matchall"
+ test_span_dir_ips "h3-$tundev" "$forward_type" \
"$backward_type" "$ip1" "$ip2"
mirror_uninstall $swp1 $direction
- log_test "$direction $what ($tcflags)"
+ log_test "$direction $what"
}
full_test_span_gre_dir_vlan_ips()
@@ -56,45 +68,63 @@ full_test_span_gre_dir_vlan_ips()
RET=0
- mirror_install $swp1 $direction $tundev "matchall $tcflags"
+ mirror_install $swp1 $direction $tundev "matchall"
- test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
+ test_span_dir_ips "h3-$tundev" "$forward_type" \
"$backward_type" "$ip1" "$ip2"
tc filter add dev $h3 ingress pref 77 prot 802.1q \
flower $vlan_match \
action pass
- mirror_test v$h1 $ip1 $ip2 $h3 77 10
+ mirror_test v$h1 $ip1 $ip2 $h3 77 '>= 10'
tc filter del dev $h3 ingress pref 77
mirror_uninstall $swp1 $direction
- log_test "$direction $what ($tcflags)"
+ log_test "$direction $what"
}
quick_test_span_gre_dir()
{
- quick_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+ local tundev=$1; shift
+ local forward_type=${1-8}; shift
+ local backward_type=${1-0}; shift
+
+ quick_test_span_gre_dir_ips "$tundev" 192.0.2.1 192.0.2.2 \
+ "$forward_type" "$backward_type"
}
fail_test_span_gre_dir()
{
- fail_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
-}
+ local tundev=$1; shift
-test_span_gre_dir()
-{
- test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+ fail_test_span_gre_dir_ips "$tundev" 192.0.2.1 192.0.2.2
}
full_test_span_gre_dir()
{
- full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+ local tundev=$1; shift
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local what=$1; shift
+
+ full_test_span_gre_dir_ips "$tundev" "$direction" "$forward_type" \
+ "$backward_type" "$what" 192.0.2.1 192.0.2.2
}
full_test_span_gre_dir_vlan()
{
- full_test_span_gre_dir_vlan_ips "$@" 192.0.2.1 192.0.2.2
+ local tundev=$1; shift
+ local direction=$1; shift
+ local vlan_match=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local what=$1; shift
+
+ full_test_span_gre_dir_vlan_ips "$tundev" "$direction" "$vlan_match" \
+ "$forward_type" "$backward_type" \
+ "$what" 192.0.2.1 192.0.2.2
}
full_test_span_gre_stp_ips()
@@ -104,27 +134,39 @@ full_test_span_gre_stp_ips()
local what=$1; shift
local ip1=$1; shift
local ip2=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
local h3mac=$(mac_get $h3)
RET=0
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+ mirror_install $swp1 ingress $tundev "matchall"
+ quick_test_span_gre_dir_ips $tundev $ip1 $ip2 \
+ "$forward_type" "$backward_type"
bridge link set dev $nbpdev state disabled
sleep 1
- fail_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+ fail_test_span_gre_dir_ips $tundev $ip1 $ip2
bridge link set dev $nbpdev state forwarding
sleep 1
- quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+ quick_test_span_gre_dir_ips $tundev $ip1 $ip2 \
+ "$forward_type" "$backward_type"
mirror_uninstall $swp1 ingress
- log_test "$what: STP state ($tcflags)"
+ log_test "$what: STP state"
}
full_test_span_gre_stp()
{
- full_test_span_gre_stp_ips "$@" 192.0.2.1 192.0.2.2
+ local tundev=$1; shift
+ local nbpdev=$1; shift
+ local what=$1; shift
+ local forward_type=${1-8}; shift
+ local backward_type=${1-0}; shift
+
+ full_test_span_gre_stp_ips "$tundev" "$nbpdev" "$what" \
+ 192.0.2.1 192.0.2.2 \
+ "$forward_type" "$backward_type"
}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
index fc0508e40fca..2cbfbecf25c8 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
@@ -60,41 +60,32 @@ test_span_gre_neigh()
local addr=$1; shift
local tundev=$1; shift
local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
local what=$1; shift
RET=0
ip neigh replace dev $swp3 $addr lladdr 00:11:22:33:44:55
- mirror_install $swp1 $direction $tundev "matchall $tcflags"
- fail_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 $direction $tundev "matchall"
+ fail_test_span_gre_dir $tundev "$forward_type" "$backward_type"
ip neigh del dev $swp3 $addr
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev "$forward_type" "$backward_type"
mirror_uninstall $swp1 $direction
- log_test "$direction $what: neighbor change ($tcflags)"
+ log_test "$direction $what: neighbor change"
}
test_gretap()
{
- test_span_gre_neigh 192.0.2.130 gt4 ingress "mirror to gretap"
- test_span_gre_neigh 192.0.2.130 gt4 egress "mirror to gretap"
+ test_span_gre_neigh 192.0.2.130 gt4 ingress 8 0 "mirror to gretap"
+ test_span_gre_neigh 192.0.2.130 gt4 egress 0 8 "mirror to gretap"
}
test_ip6gretap()
{
- test_span_gre_neigh 2001:db8:2::2 gt6 ingress "mirror to ip6gretap"
- test_span_gre_neigh 2001:db8:2::2 gt6 egress "mirror to ip6gretap"
-}
-
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
+ test_span_gre_neigh 2001:db8:2::2 gt6 ingress 8 0 "mirror to ip6gretap"
+ test_span_gre_neigh 2001:db8:2::2 gt6 egress 0 8 "mirror to ip6gretap"
}
trap cleanup EXIT
@@ -102,14 +93,6 @@ trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
index 6f9ef1820e93..34bc646938e3 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
@@ -75,42 +75,31 @@ cleanup()
test_gretap()
{
RET=0
- mirror_install $swp1 ingress gt4 "matchall $tcflags"
+ mirror_install $swp1 ingress gt4 "matchall"
# For IPv4, test that there's no mirroring without the route directing
# the traffic to tunnel remote address. Then add it and test that
# mirroring starts. For IPv6 we can't test this due to the limitation
# that routes for locally-specified IPv6 addresses can't be added.
- fail_test_span_gre_dir gt4 ingress
+ fail_test_span_gre_dir gt4
ip route add 192.0.2.130/32 via 192.0.2.162
- quick_test_span_gre_dir gt4 ingress
+ quick_test_span_gre_dir gt4
ip route del 192.0.2.130/32 via 192.0.2.162
mirror_uninstall $swp1 ingress
- log_test "mirror to gre with next-hop remote ($tcflags)"
+ log_test "mirror to gre with next-hop remote"
}
test_ip6gretap()
{
RET=0
- mirror_install $swp1 ingress gt6 "matchall $tcflags"
- quick_test_span_gre_dir gt6 ingress
+ mirror_install $swp1 ingress gt6 "matchall"
+ quick_test_span_gre_dir gt6
mirror_uninstall $swp1 ingress
- log_test "mirror to ip6gre with next-hop remote ($tcflags)"
-}
-
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
+ log_test "mirror to ip6gre with next-hop remote"
}
trap cleanup EXIT
@@ -118,14 +107,6 @@ trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
index 88cecdb9a861..63689928cb51 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
@@ -63,30 +63,11 @@ test_gretap()
full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
}
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
index c8a9b5bd841f..1b902cc579f6 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
@@ -153,21 +153,21 @@ test_span_gre_forbidden_cpu()
RET=0
# Run the pass-test first, to prime neighbor table.
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- quick_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ quick_test_span_gre_dir $tundev
# Now forbid the VLAN at the bridge and see it fail.
bridge vlan del dev br1 vid 555 self
sleep 1
- fail_test_span_gre_dir $tundev ingress
+ fail_test_span_gre_dir $tundev
bridge vlan add dev br1 vid 555 self
sleep 1
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: vlan forbidden at a bridge ($tcflags)"
+ log_test "$what: vlan forbidden at a bridge"
}
test_gretap_forbidden_cpu()
@@ -187,22 +187,22 @@ test_span_gre_forbidden_egress()
RET=0
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- quick_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ quick_test_span_gre_dir $tundev
bridge vlan del dev $swp3 vid 555
sleep 1
- fail_test_span_gre_dir $tundev ingress
+ fail_test_span_gre_dir $tundev
bridge vlan add dev $swp3 vid 555
# Re-prime FDB
$ARPING -I br1.555 192.0.2.130 -fqc 1
sleep 1
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: vlan forbidden at a bridge egress ($tcflags)"
+ log_test "$what: vlan forbidden at a bridge egress"
}
test_gretap_forbidden_egress()
@@ -223,30 +223,30 @@ test_span_gre_untagged_egress()
RET=0
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ mirror_install $swp1 ingress $tundev "matchall"
- quick_test_span_gre_dir $tundev ingress
- quick_test_span_vlan_dir $h3 555 ingress "$ul_proto"
+ quick_test_span_gre_dir $tundev
+ quick_test_span_vlan_dir $h3 555 "$ul_proto"
h3_addr_add_del del $h3.555
bridge vlan add dev $swp3 vid 555 pvid untagged
h3_addr_add_del add $h3
sleep 5
- quick_test_span_gre_dir $tundev ingress
- fail_test_span_vlan_dir $h3 555 ingress "$ul_proto"
+ quick_test_span_gre_dir $tundev
+ fail_test_span_vlan_dir $h3 555 "$ul_proto"
h3_addr_add_del del $h3
bridge vlan add dev $swp3 vid 555
h3_addr_add_del add $h3.555
sleep 5
- quick_test_span_gre_dir $tundev ingress
- quick_test_span_vlan_dir $h3 555 ingress "$ul_proto"
+ quick_test_span_gre_dir $tundev
+ quick_test_span_vlan_dir $h3 555 "$ul_proto"
mirror_uninstall $swp1 ingress
- log_test "$what: vlan untagged at a bridge egress ($tcflags)"
+ log_test "$what: vlan untagged at a bridge egress"
}
test_gretap_untagged_egress()
@@ -267,19 +267,19 @@ test_span_gre_fdb_roaming()
RET=0
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- quick_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ quick_test_span_gre_dir $tundev
while ((RET == 0)); do
bridge fdb del dev $swp3 $h3mac vlan 555 master 2>/dev/null
bridge fdb add dev $swp2 $h3mac vlan 555 master static
sleep 1
- fail_test_span_gre_dir $tundev ingress
+ fail_test_span_gre_dir $tundev
if ! bridge fdb sh dev $swp2 vlan 555 master \
| grep -q $h3mac; then
printf "TEST: %-60s [RETRY]\n" \
- "$what: MAC roaming ($tcflags)"
+ "$what: MAC roaming"
# ARP or ND probably reprimed the FDB while the test
# was running. We would get a spurious failure.
RET=0
@@ -292,11 +292,11 @@ test_span_gre_fdb_roaming()
# Re-prime FDB
$ARPING -I br1.555 192.0.2.130 -fqc 1
sleep 1
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: MAC roaming ($tcflags)"
+ log_test "$what: MAC roaming"
}
test_gretap_fdb_roaming()
@@ -319,30 +319,11 @@ test_ip6gretap_stp()
full_test_span_gre_stp gt6 $swp3 "mirror to ip6gretap"
}
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
index 3e8ebeff3019..6bf9d5ae933c 100644
--- a/tools/testing/selftests/net/forwarding/mirror_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -44,14 +44,17 @@ mirror_test()
local type="icmp echoreq"
fi
+ if [[ -z ${expect//[[:digit:]]/} ]]; then
+ expect="== $expect"
+ fi
+
local t0=$(tc_rule_stats_get $dev $pref)
$MZ $proto $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \
-c 10 -d 100msec -t $type
sleep 0.5
local t1=$(tc_rule_stats_get $dev $pref)
local delta=$((t1 - t0))
- # Tolerate a couple stray extra packets.
- ((expect <= delta && delta <= expect + 2))
+ ((delta $expect))
check_err $? "Expected to capture $expect packets, got $delta."
}
@@ -59,36 +62,42 @@ do_test_span_dir_ips()
{
local expect=$1; shift
local dev=$1; shift
- local direction=$1; shift
local ip1=$1; shift
local ip2=$1; shift
+ local forward_type=${1-8}; shift
+ local backward_type=${1-0}; shift
- icmp_capture_install $dev
+ icmp_capture_install $dev "type $forward_type"
mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
+ icmp_capture_uninstall $dev
+
+ icmp_capture_install $dev "type $backward_type"
mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
icmp_capture_uninstall $dev
}
quick_test_span_dir_ips()
{
- do_test_span_dir_ips 10 "$@"
-}
+ local dev=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+ local forward_type=${1-8}; shift
+ local backward_type=${1-0}; shift
-fail_test_span_dir_ips()
-{
- do_test_span_dir_ips 0 "$@"
+ do_test_span_dir_ips 10 "$dev" "$ip1" "$ip2" \
+ "$forward_type" "$backward_type"
}
test_span_dir_ips()
{
local dev=$1; shift
- local direction=$1; shift
local forward_type=$1; shift
local backward_type=$1; shift
local ip1=$1; shift
local ip2=$1; shift
- quick_test_span_dir_ips "$dev" "$direction" "$ip1" "$ip2"
+ quick_test_span_dir_ips "$dev" "$ip1" "$ip2" \
+ "$forward_type" "$backward_type"
icmp_capture_install $dev "type $forward_type"
mirror_test v$h1 $ip1 $ip2 $dev 100 10
@@ -99,14 +108,14 @@ test_span_dir_ips()
icmp_capture_uninstall $dev
}
-fail_test_span_dir()
-{
- fail_test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
-}
-
test_span_dir()
{
- test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
+ local dev=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+
+ test_span_dir_ips "$dev" "$forward_type" "$backward_type" \
+ 192.0.2.1 192.0.2.2
}
do_test_span_vlan_dir_ips()
@@ -114,7 +123,6 @@ do_test_span_vlan_dir_ips()
local expect=$1; shift
local dev=$1; shift
local vid=$1; shift
- local direction=$1; shift
local ul_proto=$1; shift
local ip1=$1; shift
local ip2=$1; shift
@@ -123,27 +131,50 @@ do_test_span_vlan_dir_ips()
# The traffic is meant for local box anyway, so will be trapped to
# kernel.
vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype $ul_proto"
- mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
- mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
+ mirror_test v$h1 $ip1 $ip2 $dev 100 "$expect"
+ mirror_test v$h2 $ip2 $ip1 $dev 100 "$expect"
vlan_capture_uninstall $dev
}
quick_test_span_vlan_dir_ips()
{
- do_test_span_vlan_dir_ips 10 "$@"
+ local dev=$1; shift
+ local vid=$1; shift
+ local ul_proto=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ do_test_span_vlan_dir_ips '>= 10' "$dev" "$vid" "$ul_proto" \
+ "$ip1" "$ip2"
}
fail_test_span_vlan_dir_ips()
{
- do_test_span_vlan_dir_ips 0 "$@"
+ local dev=$1; shift
+ local vid=$1; shift
+ local ul_proto=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ do_test_span_vlan_dir_ips 0 "$dev" "$vid" "$ul_proto" "$ip1" "$ip2"
}
quick_test_span_vlan_dir()
{
- quick_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2
+ local dev=$1; shift
+ local vid=$1; shift
+ local ul_proto=$1; shift
+
+ quick_test_span_vlan_dir_ips "$dev" "$vid" "$ul_proto" \
+ 192.0.2.1 192.0.2.2
}
fail_test_span_vlan_dir()
{
- fail_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2
+ local dev=$1; shift
+ local vid=$1; shift
+ local ul_proto=$1; shift
+
+ fail_test_span_vlan_dir_ips "$dev" "$vid" "$ul_proto" \
+ 192.0.2.1 192.0.2.2
}
diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
index 0b44e148235e..2f150a414d38 100755
--- a/tools/testing/selftests/net/forwarding/mirror_vlan.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
@@ -40,12 +40,16 @@ setup_prepare()
vlan_create $h2 111 v$h2 192.0.2.18/28
bridge vlan add dev $swp2 vid 111
+
+ trap_install $h3 ingress
}
cleanup()
{
pre_cleanup
+ trap_uninstall $h3 ingress
+
vlan_destroy $h2 111
vlan_destroy $h1 111
vlan_destroy $h3 555
@@ -63,11 +67,11 @@ test_vlan_dir()
RET=0
- mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
- test_span_dir "$h3.555" "$direction" "$forward_type" "$backward_type"
+ mirror_install $swp1 $direction $swp3.555 "matchall"
+ test_span_dir "$h3.555" "$forward_type" "$backward_type"
mirror_uninstall $swp1 $direction
- log_test "$direction mirror to vlan ($tcflags)"
+ log_test "$direction mirror to vlan"
}
test_vlan()
@@ -84,14 +88,12 @@ test_tagged_vlan_dir()
RET=0
- mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
- do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" ip \
- 192.0.2.17 192.0.2.18
- do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" ip \
- 192.0.2.17 192.0.2.18
+ mirror_install $swp1 $direction $swp3.555 "matchall"
+ do_test_span_vlan_dir_ips '>= 10' "$h3.555" 111 ip 192.0.2.17 192.0.2.18
+ do_test_span_vlan_dir_ips 0 "$h3.555" 555 ip 192.0.2.17 192.0.2.18
mirror_uninstall $swp1 $direction
- log_test "$direction mirror tagged to vlan ($tcflags)"
+ log_test "$direction mirror tagged to vlan"
}
test_tagged_vlan()
@@ -100,32 +102,11 @@ test_tagged_vlan()
test_tagged_vlan_dir egress 0 8
}
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
- trap_install $h3 ingress
-
- tests_run
-
- trap_uninstall $h3 ingress
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
index 6f0a2e452ba1..3f9d50f1ef9e 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
@@ -680,9 +680,9 @@ test_learning()
local mac=de:ad:be:ef:13:37
local dst=192.0.2.100
- # Enable learning on the VxLAN device and set ageing time to 10 seconds
- ip link set dev br1 type bridge ageing_time 1000
- ip link set dev vx1 type vxlan ageing 10
+ # Enable learning on the VxLAN device and set ageing time to 30 seconds
+ ip link set dev br1 type bridge ageing_time 3000
+ ip link set dev vx1 type vxlan ageing 30
ip link set dev vx1 type vxlan learning
reapply_config
@@ -740,7 +740,7 @@ test_learning()
vxlan_flood_test $mac $dst 0 10 0
- sleep 20
+ sleep 60
bridge fdb show brport vx1 | grep $mac | grep -q self
check_fail $?
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 915f319bcc8b..d0219032f773 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -199,10 +199,10 @@ tc_rule_stats_get()
{
local dev=$1; shift
local pref=$1; shift
- local dir=$1; shift
+ local dir=${1:-ingress}; shift
local selector=${1:-.packets}; shift
- tc -j -s filter show dev $dev ${dir:-ingress} pref $pref \
+ tc -j -s filter show dev $dev $dir pref $pref \
| jq ".[1].options.actions[].stats$selector"
}
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index b6ce3f33d41e..f26c20df9db4 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -6,6 +6,7 @@ import sys
import time
import traceback
from .consts import KSFT_MAIN_NAME
+from .utils import global_defer_queue
KSFT_RESULT = None
KSFT_RESULT_ALL = True
@@ -108,6 +109,24 @@ def ktap_result(ok, cnt=1, case="", comment=""):
print(res)
+def ksft_flush_defer():
+ global KSFT_RESULT
+
+ i = 0
+ qlen_start = len(global_defer_queue)
+ while global_defer_queue:
+ i += 1
+ entry = global_defer_queue.pop()
+ try:
+ entry.exec_only()
+ except:
+ ksft_pr(f"Exception while handling defer / cleanup (callback {i} of {qlen_start})!")
+ tb = traceback.format_exc()
+ for line in tb.strip().split('\n'):
+ ksft_pr("Defer Exception|", line)
+ KSFT_RESULT = False
+
+
def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
cases = cases or []
@@ -127,32 +146,41 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
global KSFT_RESULT
cnt = 0
+ stop = False
for case in cases:
KSFT_RESULT = True
cnt += 1
+ comment = ""
+ cnt_key = ""
+
try:
case(*args)
except KsftSkipEx as e:
- ktap_result(True, cnt, case, comment="SKIP " + str(e))
- totals['skip'] += 1
- continue
+ comment = "SKIP " + str(e)
+ cnt_key = 'skip'
except KsftXfailEx as e:
- ktap_result(True, cnt, case, comment="XFAIL " + str(e))
- totals['xfail'] += 1
- continue
- except Exception as e:
+ comment = "XFAIL " + str(e)
+ cnt_key = 'xfail'
+ except BaseException as e:
+ stop |= isinstance(e, KeyboardInterrupt)
tb = traceback.format_exc()
for line in tb.strip().split('\n'):
ksft_pr("Exception|", line)
- ktap_result(False, cnt, case)
- totals['fail'] += 1
- continue
-
- ktap_result(KSFT_RESULT, cnt, case)
- if KSFT_RESULT:
- totals['pass'] += 1
- else:
- totals['fail'] += 1
+ if stop:
+ ksft_pr("Stopping tests due to KeyboardInterrupt.")
+ KSFT_RESULT = False
+ cnt_key = 'fail'
+
+ ksft_flush_defer()
+
+ if not cnt_key:
+ cnt_key = 'pass' if KSFT_RESULT else 'fail'
+
+ ktap_result(KSFT_RESULT, cnt, case, comment=comment)
+ totals[cnt_key] += 1
+
+ if stop:
+ break
print(
f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0"
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
index 405aa510aaf2..72590c3f90f1 100644
--- a/tools/testing/selftests/net/lib/py/utils.py
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -66,6 +66,40 @@ class bkg(cmd):
return self.process(terminate=self.terminate, fail=self.check_fail)
+global_defer_queue = []
+
+
+class defer:
+ def __init__(self, func, *args, **kwargs):
+ global global_defer_queue
+
+ if not callable(func):
+ raise Exception("defer created with un-callable object, did you call the function instead of passing its name?")
+
+ self.func = func
+ self.args = args
+ self.kwargs = kwargs
+
+ self._queue = global_defer_queue
+ self._queue.append(self)
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, ex_type, ex_value, ex_tb):
+ return self.exec()
+
+ def exec_only(self):
+ self.func(*self.args, **self.kwargs)
+
+ def cancel(self):
+ self._queue.remove(self)
+
+ def exec(self):
+ self.cancel()
+ self.exec_only()
+
+
def tool(name, args, json=None, ns=None, host=None):
cmd_str = name + ' '
if json:
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index bdc03a2097e8..7ea5fb28c93d 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -85,6 +85,7 @@ static bool cfg_rx;
static int cfg_runtime_ms = 4200;
static int cfg_verbose;
static int cfg_waittime_ms = 500;
+static int cfg_notification_limit = 32;
static bool cfg_zerocopy;
static socklen_t cfg_alen;
@@ -95,6 +96,7 @@ static char payload[IP_MAXPACKET];
static long packets, bytes, completions, expected_completions;
static int zerocopied = -1;
static uint32_t next_completion;
+static uint32_t sends_since_notify;
static unsigned long gettimeofday_ms(void)
{
@@ -208,6 +210,7 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
error(1, errno, "send");
if (cfg_verbose && ret != len)
fprintf(stderr, "send: ret=%u != %u\n", ret, len);
+ sends_since_notify++;
if (len) {
packets++;
@@ -435,7 +438,7 @@ static bool do_recv_completion(int fd, int domain)
/* Detect notification gaps. These should not happen often, if at all.
* Gaps can occur due to drops, reordering and retransmissions.
*/
- if (lo != next_completion)
+ if (cfg_verbose && lo != next_completion)
fprintf(stderr, "gap: %u..%u does not append to %u\n",
lo, hi, next_completion);
next_completion = hi + 1;
@@ -460,6 +463,7 @@ static bool do_recv_completion(int fd, int domain)
static void do_recv_completions(int fd, int domain)
{
while (do_recv_completion(fd, domain)) {}
+ sends_since_notify = 0;
}
/* Wait for all remaining completions on the errqueue */
@@ -549,6 +553,9 @@ static void do_tx(int domain, int type, int protocol)
else
do_sendmsg(fd, &msg, cfg_zerocopy, domain);
+ if (cfg_zerocopy && sends_since_notify >= cfg_notification_limit)
+ do_recv_completions(fd, domain);
+
while (!do_poll(fd, POLLOUT)) {
if (cfg_zerocopy)
do_recv_completions(fd, domain);
@@ -708,7 +715,7 @@ static void parse_opts(int argc, char **argv)
cfg_payload_len = max_payload_len;
- while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) {
+ while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) {
switch (c) {
case '4':
if (cfg_family != PF_UNSPEC)
@@ -736,6 +743,9 @@ static void parse_opts(int argc, char **argv)
if (cfg_ifindex == 0)
error(1, errno, "invalid iface: %s", optarg);
break;
+ case 'l':
+ cfg_notification_limit = strtoul(optarg, NULL, 0);
+ break;
case 'm':
cfg_cork_mixed = true;
break;
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index 8538f08c64c2..c61d23a8c88d 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -375,6 +375,42 @@ EOF
wait 2>/dev/null
}
+test_queue_removal()
+{
+ read tainted_then < /proc/sys/kernel/tainted
+
+ ip netns exec "$ns1" nft -f - <<EOF
+flush ruleset
+table ip filter {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ ip protocol icmp queue num 0
+ }
+}
+EOF
+ ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 -t "$timeout" &
+ local nfqpid=$!
+
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 0
+
+ ip netns exec "$ns1" ping -w 2 -f -c 10 127.0.0.1 -q >/dev/null
+ kill $nfqpid
+
+ ip netns exec "$ns1" nft flush ruleset
+
+ if [ "$tainted_then" -ne 0 ];then
+ return
+ fi
+
+ read tainted_now < /proc/sys/kernel/tainted
+ if [ "$tainted_now" -eq 0 ];then
+ echo "PASS: queue program exiting while packets queued"
+ else
+ echo "TAINT: queue program exiting while packets queued"
+ ret=1
+ fi
+}
+
ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
@@ -413,5 +449,6 @@ test_tcp_localhost
test_tcp_localhost_connectclose
test_tcp_localhost_requeue
test_icmp_vrf
+test_queue_removal
exit $ret
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
index 15bca0708717..bc71dbc18b21 100755
--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -20,10 +20,13 @@ tests="
nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT
netlink_checks ovsnl: validate netlink attrs and settings
upcall_interfaces ovs: test the upcall interfaces
- drop_reason drop: test drop reasons are emitted"
+ drop_reason drop: test drop reasons are emitted
+ psample psample: Sampling packets with psample"
info() {
- [ $VERBOSE = 0 ] || echo $*
+ [ "${ovs_dir}" != "" ] &&
+ echo "`date +"[%m-%d %H:%M:%S]"` $*" >> ${ovs_dir}/debug.log
+ [ $VERBOSE = 0 ] || echo $*
}
ovs_base=`pwd`
@@ -65,7 +68,8 @@ ovs_setenv() {
ovs_sbx() {
if test "X$2" != X; then
- (ovs_setenv $1; shift; "$@" >> ${ovs_dir}/debug.log)
+ (ovs_setenv $1; shift;
+ info "run cmd: $@"; "$@" >> ${ovs_dir}/debug.log)
else
ovs_setenv $1
fi
@@ -102,12 +106,21 @@ ovs_netns_spawn_daemon() {
shift
netns=$1
shift
- info "spawning cmd: $*"
- ip netns exec $netns $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr &
+ if [ "$netns" == "_default" ]; then
+ $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr &
+ else
+ ip netns exec $netns $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr &
+ fi
pid=$!
ovs_sbx "$sbx" on_exit "kill -TERM $pid 2>/dev/null"
}
+ovs_spawn_daemon() {
+ sbx=$1
+ shift
+ ovs_netns_spawn_daemon $sbx "_default" $*
+}
+
ovs_add_netns_and_veths () {
info "Adding netns attached: sbx:$1 dp:$2 {$3, $4, $5}"
ovs_sbx "$1" ip netns add "$3" || return 1
@@ -139,7 +152,7 @@ ovs_add_flow () {
info "Adding flow to DP: sbx:$1 br:$2 flow:$3 act:$4"
ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-flow "$2" "$3" "$4"
if [ $? -ne 0 ]; then
- echo "Flow [ $3 : $4 ] failed" >> ${ovs_dir}/debug.log
+ info "Flow [ $3 : $4 ] failed"
return 1
fi
return 0
@@ -170,6 +183,19 @@ ovs_drop_reason_count()
return `echo "$perf_output" | grep "$pattern" | wc -l`
}
+ovs_test_flow_fails () {
+ ERR_MSG="Flow actions may not be safe on all matching packets"
+
+ PRE_TEST=$(dmesg | grep -c "${ERR_MSG}")
+ ovs_add_flow $@ &> /dev/null $@ && return 1
+ POST_TEST=$(dmesg | grep -c "${ERR_MSG}")
+
+ if [ "$PRE_TEST" == "$POST_TEST" ]; then
+ return 1
+ fi
+ return 0
+}
+
usage() {
echo
echo "$0 [OPTIONS] [TEST]..."
@@ -184,6 +210,92 @@ usage() {
exit 1
}
+
+# psample test
+# - use psample to observe packets
+test_psample() {
+ sbx_add "test_psample" || return $?
+
+ # Add a datapath with per-vport dispatching.
+ ovs_add_dp "test_psample" psample -V 2:1 || return 1
+
+ info "create namespaces"
+ ovs_add_netns_and_veths "test_psample" "psample" \
+ client c0 c1 172.31.110.10/24 -u || return 1
+ ovs_add_netns_and_veths "test_psample" "psample" \
+ server s0 s1 172.31.110.20/24 -u || return 1
+
+ # Check if psample actions can be configured.
+ ovs_add_flow "test_psample" psample \
+ 'in_port(1),eth(),eth_type(0x0806),arp()' 'psample(group=1)' &> /dev/null
+ if [ $? == 1 ]; then
+ info "no support for psample - skipping"
+ ovs_exit_sig
+ return $ksft_skip
+ fi
+
+ ovs_del_flows "test_psample" psample
+
+ # Test action verification.
+ OLDIFS=$IFS
+ IFS='*'
+ min_key='in_port(1),eth(),eth_type(0x0800),ipv4()'
+ for testcase in \
+ "cookie to large"*"psample(group=1,cookie=1615141312111009080706050403020100)" \
+ "no group with cookie"*"psample(cookie=abcd)" \
+ "no group"*"psample()";
+ do
+ set -- $testcase;
+ ovs_test_flow_fails "test_psample" psample $min_key $2
+ if [ $? == 1 ]; then
+ info "failed - $1"
+ return 1
+ fi
+ done
+ IFS=$OLDIFS
+
+ ovs_del_flows "test_psample" psample
+ # Allow ARP
+ ovs_add_flow "test_psample" psample \
+ 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1
+ ovs_add_flow "test_psample" psample \
+ 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1
+
+ # Sample first 14 bytes of all traffic.
+ ovs_add_flow "test_psample" psample \
+ "in_port(1),eth(),eth_type(0x0800),ipv4()" \
+ "trunc(14),psample(group=1,cookie=c0ffee),2"
+
+ # Sample all traffic. In this case, use a sample() action with both
+ # psample and an upcall emulating simultaneous local sampling and
+ # sFlow / IPFIX.
+ nlpid=$(grep -E "listening on upcall packet handler" \
+ $ovs_dir/s0.out | cut -d ":" -f 2 | tr -d ' ')
+
+ ovs_add_flow "test_psample" psample \
+ "in_port(2),eth(),eth_type(0x0800),ipv4()" \
+ "sample(sample=100%,actions(psample(group=2,cookie=eeff0c),userspace(pid=${nlpid},userdata=eeff0c))),1"
+
+ # Record psample data.
+ ovs_spawn_daemon "test_psample" python3 $ovs_base/ovs-dpctl.py psample-events
+
+ # Send a single ping.
+ sleep 1
+ ovs_sbx "test_psample" ip netns exec client ping -I c1 172.31.110.20 -c 1 || return 1
+ sleep 1
+
+ # We should have received one userspace action upcall and 2 psample packets.
+ grep -E "userspace action command" $ovs_dir/s0.out >/dev/null 2>&1 || return 1
+
+ # client -> server samples should only contain the first 14 bytes of the packet.
+ grep -E "rate:4294967295,group:1,cookie:c0ffee data:[0-9a-f]{28}$" \
+ $ovs_dir/stdout >/dev/null 2>&1 || return 1
+ grep -E "rate:4294967295,group:2,cookie:eeff0c" \
+ $ovs_dir/stdout >/dev/null 2>&1 || return 1
+
+ return 0
+}
+
# drop_reason test
# - drop packets and verify the right drop reason is reported
test_drop_reason() {
@@ -613,16 +725,20 @@ run_test() {
tname="$1"
tdesc="$2"
- if ! lsmod | grep openvswitch >/dev/null 2>&1; then
- stdbuf -o0 printf "TEST: %-60s [NOMOD]\n" "${tdesc}"
- return $ksft_skip
- fi
-
if python3 ovs-dpctl.py -h 2>&1 | \
grep -E "Need to (install|upgrade) the python" >/dev/null 2>&1; then
stdbuf -o0 printf "TEST: %-60s [PYLIB]\n" "${tdesc}"
return $ksft_skip
fi
+
+ python3 ovs-dpctl.py show >/dev/null 2>&1 || \
+ echo "[DPCTL] show exception."
+
+ if ! lsmod | grep openvswitch >/dev/null 2>&1; then
+ stdbuf -o0 printf "TEST: %-60s [NOMOD]\n" "${tdesc}"
+ return $ksft_skip
+ fi
+
printf "TEST: %-60s [START]\n" "${tname}"
unset IFS
diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 182a09975975..1e15b0818074 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -8,6 +8,7 @@ import argparse
import errno
import ipaddress
import logging
+import math
import multiprocessing
import re
import socket
@@ -27,8 +28,10 @@ try:
from pyroute2.netlink import genlmsg
from pyroute2.netlink import nla
from pyroute2.netlink import nlmsg_atoms
+ from pyroute2.netlink.event import EventSocket
from pyroute2.netlink.exceptions import NetlinkError
from pyroute2.netlink.generic import GenericNetlinkSocket
+ from pyroute2.netlink.nlsocket import Marshal
import pyroute2
import pyroute2.iproute
@@ -60,6 +63,7 @@ OVS_FLOW_CMD_DEL = 2
OVS_FLOW_CMD_GET = 3
OVS_FLOW_CMD_SET = 4
+UINT32_MAX = 0xFFFFFFFF
def macstr(mac):
outstr = ":".join(["%02X" % i for i in mac])
@@ -281,6 +285,75 @@ def parse_extract_field(
return str_skipped, data
+def parse_attrs(actstr, attr_desc):
+ """Parses the given action string and returns a list of netlink
+ attributes based on a list of attribute descriptions.
+
+ Each element in the attribute description list is a tuple such as:
+ (name, attr_name, parse_func)
+ where:
+ name: is the string representing the attribute
+ attr_name: is the name of the attribute as defined in the uAPI.
+ parse_func: is a callable accepting a string and returning either
+ a single object (the parsed attribute value) or a tuple of
+ two values (the parsed attribute value and the remaining string)
+
+ Returns a list of attributes and the remaining string.
+ """
+ def parse_attr(actstr, key, func):
+ actstr = actstr[len(key) :]
+
+ if not func:
+ return None, actstr
+
+ delim = actstr[0]
+ actstr = actstr[1:]
+
+ if delim == "=":
+ pos = strcspn(actstr, ",)")
+ ret = func(actstr[:pos])
+ else:
+ ret = func(actstr)
+
+ if isinstance(ret, tuple):
+ (datum, actstr) = ret
+ else:
+ datum = ret
+ actstr = actstr[strcspn(actstr, ",)"):]
+
+ if delim == "(":
+ if not actstr or actstr[0] != ")":
+ raise ValueError("Action contains unbalanced parentheses")
+
+ actstr = actstr[1:]
+
+ actstr = actstr[strspn(actstr, ", ") :]
+
+ return datum, actstr
+
+ attrs = []
+ attr_desc = list(attr_desc)
+ while actstr and actstr[0] != ")" and attr_desc:
+ found = False
+ for i, (key, attr, func) in enumerate(attr_desc):
+ if actstr.startswith(key):
+ datum, actstr = parse_attr(actstr, key, func)
+ attrs.append([attr, datum])
+ found = True
+ del attr_desc[i]
+
+ if not found:
+ raise ValueError("Unknown attribute: '%s'" % actstr)
+
+ actstr = actstr[strspn(actstr, ", ") :]
+
+ if actstr[0] != ")":
+ raise ValueError("Action string contains extra garbage or has "
+ "unbalanced parenthesis: '%s'" % actstr)
+
+ return attrs, actstr[1:]
+
+
class ovs_dp_msg(genlmsg):
# include the OVS version
# We need a custom header rather than just being able to rely on
@@ -299,7 +372,7 @@ class ovsactions(nla):
("OVS_ACTION_ATTR_SET", "ovskey"),
("OVS_ACTION_ATTR_PUSH_VLAN", "none"),
("OVS_ACTION_ATTR_POP_VLAN", "flag"),
- ("OVS_ACTION_ATTR_SAMPLE", "none"),
+ ("OVS_ACTION_ATTR_SAMPLE", "sample"),
("OVS_ACTION_ATTR_RECIRC", "uint32"),
("OVS_ACTION_ATTR_HASH", "none"),
("OVS_ACTION_ATTR_PUSH_MPLS", "none"),
@@ -318,8 +391,85 @@ class ovsactions(nla):
("OVS_ACTION_ATTR_ADD_MPLS", "none"),
("OVS_ACTION_ATTR_DEC_TTL", "none"),
("OVS_ACTION_ATTR_DROP", "uint32"),
+ ("OVS_ACTION_ATTR_PSAMPLE", "psample"),
)
+ class psample(nla):
+ nla_flags = NLA_F_NESTED
+
+ nla_map = (
+ ("OVS_PSAMPLE_ATTR_UNSPEC", "none"),
+ ("OVS_PSAMPLE_ATTR_GROUP", "uint32"),
+ ("OVS_PSAMPLE_ATTR_COOKIE", "array(uint8)"),
+ )
+
+ def dpstr(self, more=False):
+ args = "group=%d" % self.get_attr("OVS_PSAMPLE_ATTR_GROUP")
+
+ cookie = self.get_attr("OVS_PSAMPLE_ATTR_COOKIE")
+ if cookie:
+ args += ",cookie(%s)" % \
+ "".join(format(x, "02x") for x in cookie)
+
+ return "psample(%s)" % args
+
+ def parse(self, actstr):
+ desc = (
+ ("group", "OVS_PSAMPLE_ATTR_GROUP", int),
+ ("cookie", "OVS_PSAMPLE_ATTR_COOKIE",
+ lambda x: list(bytearray.fromhex(x)))
+ )
+
+ attrs, actstr = parse_attrs(actstr, desc)
+
+ for attr in attrs:
+ self["attrs"].append(attr)
+
+ return actstr
+
+ class sample(nla):
+ nla_flags = NLA_F_NESTED
+
+ nla_map = (
+ ("OVS_SAMPLE_ATTR_UNSPEC", "none"),
+ ("OVS_SAMPLE_ATTR_PROBABILITY", "uint32"),
+ ("OVS_SAMPLE_ATTR_ACTIONS", "ovsactions"),
+ )
+
+ def dpstr(self, more=False):
+ args = []
+
+ args.append("sample={:.2f}%".format(
+ 100 * self.get_attr("OVS_SAMPLE_ATTR_PROBABILITY") /
+ UINT32_MAX))
+
+ actions = self.get_attr("OVS_SAMPLE_ATTR_ACTIONS")
+ if actions:
+ args.append("actions(%s)" % actions.dpstr(more))
+
+ return "sample(%s)" % ",".join(args)
+
+ def parse(self, actstr):
+ def parse_nested_actions(actstr):
+ subacts = ovsactions()
+ parsed_len = subacts.parse(actstr)
+ return subacts, actstr[parsed_len :]
+
+ def percent_to_rate(percent):
+ percent = float(percent.strip('%'))
+ return int(math.floor(UINT32_MAX * (percent / 100.0) + .5))
+
+ desc = (
+ ("sample", "OVS_SAMPLE_ATTR_PROBABILITY", percent_to_rate),
+ ("actions", "OVS_SAMPLE_ATTR_ACTIONS", parse_nested_actions),
+ )
+ attrs, actstr = parse_attrs(actstr, desc)
+
+ for attr in attrs:
+ self["attrs"].append(attr)
+
+ return actstr
+
class ctact(nla):
nla_flags = NLA_F_NESTED
@@ -441,13 +591,27 @@ class ovsactions(nla):
print_str += "userdata="
for f in self.get_attr("OVS_USERSPACE_ATTR_USERDATA"):
print_str += "%x." % f
- if self.get_attr("OVS_USERSPACE_ATTR_TUN_PORT") is not None:
+ if self.get_attr("OVS_USERSPACE_ATTR_EGRESS_TUN_PORT") is not None:
print_str += "egress_tun_port=%d" % self.get_attr(
- "OVS_USERSPACE_ATTR_TUN_PORT"
+ "OVS_USERSPACE_ATTR_EGRESS_TUN_PORT"
)
print_str += ")"
return print_str
+ def parse(self, actstr):
+ attrs_desc = (
+ ("pid", "OVS_USERSPACE_ATTR_PID", int),
+ ("userdata", "OVS_USERSPACE_ATTR_USERDATA",
+ lambda x: list(bytearray.fromhex(x))),
+ ("egress_tun_port", "OVS_USERSPACE_ATTR_EGRESS_TUN_PORT", int)
+ )
+
+ attrs, actstr = parse_attrs(actstr, attrs_desc)
+ for attr in attrs:
+ self["attrs"].append(attr)
+
+ return actstr
+
def dpstr(self, more=False):
print_str = ""
@@ -683,6 +847,37 @@ class ovsactions(nla):
self["attrs"].append(["OVS_ACTION_ATTR_CT", ctact])
parsed = True
+ elif parse_starts_block(actstr, "sample(", False):
+ sampleact = self.sample()
+ actstr = sampleact.parse(actstr[len("sample(") : ])
+ self["attrs"].append(["OVS_ACTION_ATTR_SAMPLE", sampleact])
+ parsed = True
+
+ elif parse_starts_block(actstr, "psample(", False):
+ psampleact = self.psample()
+ actstr = psampleact.parse(actstr[len("psample(") : ])
+ self["attrs"].append(["OVS_ACTION_ATTR_PSAMPLE", psampleact])
+ parsed = True
+
+ elif parse_starts_block(actstr, "userspace(", False):
+ uact = self.userspace()
+ actstr = uact.parse(actstr[len("userspace(") : ])
+ self["attrs"].append(["OVS_ACTION_ATTR_USERSPACE", uact])
+ parsed = True
+
+ elif parse_starts_block(actstr, "trunc(", False):
+ parencount += 1
+ actstr, val = parse_extract_field(
+ actstr,
+ "trunc(",
+ r"([0-9]+)",
+ int,
+ False,
+ None,
+ )
+ self["attrs"].append(["OVS_ACTION_ATTR_TRUNC", val])
+ parsed = True
+
actstr = actstr[strspn(actstr, ", ") :]
while parencount > 0:
parencount -= 1
@@ -2267,10 +2462,70 @@ class OvsFlow(GenericNetlinkSocket):
print("MISS upcall[%d/%s]: %s" % (seq, pktpres, keystr), flush=True)
def execute(self, packetmsg):
- print("userspace execute command")
+ print("userspace execute command", flush=True)
def action(self, packetmsg):
- print("userspace action command")
+ print("userspace action command", flush=True)
+
+
+class psample_sample(genlmsg):
+ nla_map = (
+ ("PSAMPLE_ATTR_IIFINDEX", "none"),
+ ("PSAMPLE_ATTR_OIFINDEX", "none"),
+ ("PSAMPLE_ATTR_ORIGSIZE", "none"),
+ ("PSAMPLE_ATTR_SAMPLE_GROUP", "uint32"),
+ ("PSAMPLE_ATTR_GROUP_SEQ", "none"),
+ ("PSAMPLE_ATTR_SAMPLE_RATE", "uint32"),
+ ("PSAMPLE_ATTR_DATA", "array(uint8)"),
+ ("PSAMPLE_ATTR_GROUP_REFCOUNT", "none"),
+ ("PSAMPLE_ATTR_TUNNEL", "none"),
+ ("PSAMPLE_ATTR_PAD", "none"),
+ ("PSAMPLE_ATTR_OUT_TC", "none"),
+ ("PSAMPLE_ATTR_OUT_TC_OCC", "none"),
+ ("PSAMPLE_ATTR_LATENCY", "none"),
+ ("PSAMPLE_ATTR_TIMESTAMP", "none"),
+ ("PSAMPLE_ATTR_PROTO", "none"),
+ ("PSAMPLE_ATTR_USER_COOKIE", "array(uint8)"),
+ )
+
+ def dpstr(self):
+ fields = []
+ data = ""
+ for (attr, value) in self["attrs"]:
+ if attr == "PSAMPLE_ATTR_SAMPLE_GROUP":
+ fields.append("group:%d" % value)
+ if attr == "PSAMPLE_ATTR_SAMPLE_RATE":
+ fields.append("rate:%d" % value)
+ if attr == "PSAMPLE_ATTR_USER_COOKIE":
+ value = "".join(format(x, "02x") for x in value)
+ fields.append("cookie:%s" % value)
+ if attr == "PSAMPLE_ATTR_DATA" and len(value) > 0:
+ data = "data:%s" % "".join(format(x, "02x") for x in value)
+
+ return ("%s %s" % (",".join(fields), data)).strip()
+
+
+class psample_msg(Marshal):
+ PSAMPLE_CMD_SAMPLE = 0
+ PSAMPLE_CMD_GET_GROUP = 1
+ PSAMPLE_CMD_NEW_GROUP = 2
+ PSAMPLE_CMD_DEL_GROUP = 3
+ PSAMPLE_CMD_SET_FILTER = 4
+ msg_map = {PSAMPLE_CMD_SAMPLE: psample_sample}
+
+
+class PsampleEvent(EventSocket):
+ genl_family = "psample"
+ mcast_groups = ["packets"]
+ marshal_class = psample_msg
+
+ def read_samples(self):
+ while True:
+ try:
+ for msg in self.get():
+ print(msg.dpstr(), flush=True)
+ except NetlinkError as ne:
+ raise ne
def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), vpl=OvsVport()):
@@ -2337,7 +2592,7 @@ def main(argv):
help="Increment 'verbose' output counter.",
default=0,
)
- subparsers = parser.add_subparsers()
+ subparsers = parser.add_subparsers(dest="subcommand")
showdpcmd = subparsers.add_parser("show")
showdpcmd.add_argument(
@@ -2412,6 +2667,8 @@ def main(argv):
delfscmd = subparsers.add_parser("del-flows")
delfscmd.add_argument("flsbr", help="Datapath name")
+ subparsers.add_parser("psample-events")
+
args = parser.parse_args()
if args.verbose > 0:
@@ -2426,6 +2683,9 @@ def main(argv):
sys.setrecursionlimit(100000)
+ if args.subcommand == "psample-events":
+ PsampleEvent().read_samples()
+
if hasattr(args, "showdp"):
found = False
for iface in ndb.interfaces:
diff --git a/tools/testing/selftests/net/openvswitch/settings b/tools/testing/selftests/net/openvswitch/settings
new file mode 100644
index 000000000000..e2206265f67c
--- /dev/null
+++ b/tools/testing/selftests/net/openvswitch/settings
@@ -0,0 +1 @@
+timeout=900
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index 85b3baa3f7f3..3e74cfa1a2bf 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -53,6 +53,7 @@ static bool cfg_do_ipv6;
static bool cfg_do_connected;
static bool cfg_do_connectionless;
static bool cfg_do_msgmore;
+static bool cfg_do_recv = true;
static bool cfg_do_setsockopt;
static int cfg_specific_test_id = -1;
@@ -414,6 +415,9 @@ static void run_one(struct testcase *test, int fdt, int fdr,
if (!sent)
return;
+ if (!cfg_do_recv)
+ return;
+
if (test->gso_len)
mss = test->gso_len;
else
@@ -464,8 +468,10 @@ static void run_test(struct sockaddr *addr, socklen_t alen)
if (fdr == -1)
error(1, errno, "socket r");
- if (bind(fdr, addr, alen))
- error(1, errno, "bind");
+ if (cfg_do_recv) {
+ if (bind(fdr, addr, alen))
+ error(1, errno, "bind");
+ }
/* Have tests fail quickly instead of hang */
if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
@@ -524,7 +530,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "46cCmst:")) != -1) {
+ while ((c = getopt(argc, argv, "46cCmRst:")) != -1) {
switch (c) {
case '4':
cfg_do_ipv4 = true;
@@ -541,6 +547,9 @@ static void parse_opts(int argc, char **argv)
case 'm':
cfg_do_msgmore = true;
break;
+ case 'R':
+ cfg_do_recv = false;
+ break;
case 's':
cfg_do_setsockopt = true;
break;
diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh
index 6c63178086b0..85d1fa3c1ff7 100755
--- a/tools/testing/selftests/net/udpgso.sh
+++ b/tools/testing/selftests/net/udpgso.sh
@@ -27,6 +27,31 @@ test_route_mtu() {
ip route add local fd00::1/128 table local dev lo mtu 1500
}
+setup_dummy_sink() {
+ ip link add name sink mtu 1500 type dummy
+ ip addr add dev sink 10.0.0.0/24
+ ip addr add dev sink fd00::2/64 nodad
+ ip link set dev sink up
+}
+
+test_hw_gso_hw_csum() {
+ setup_dummy_sink
+ ethtool -K sink tx-checksum-ip-generic on >/dev/null
+ ethtool -K sink tx-udp-segmentation on >/dev/null
+}
+
+test_sw_gso_hw_csum() {
+ setup_dummy_sink
+ ethtool -K sink tx-checksum-ip-generic on >/dev/null
+ ethtool -K sink tx-udp-segmentation off >/dev/null
+}
+
+test_sw_gso_sw_csum() {
+ setup_dummy_sink
+ ethtool -K sink tx-checksum-ip-generic off >/dev/null
+ ethtool -K sink tx-udp-segmentation off >/dev/null
+}
+
if [ "$#" -gt 0 ]; then
"$1"
shift 2 # pop "test_*" arg and "--" delimiter
@@ -56,3 +81,21 @@ echo "ipv4 msg_more"
echo "ipv6 msg_more"
./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -m
+
+echo "ipv4 hw-gso hw-csum"
+./in_netns.sh "$0" test_hw_gso_hw_csum -- ./udpgso -4 -C -R
+
+echo "ipv6 hw-gso hw-csum"
+./in_netns.sh "$0" test_hw_gso_hw_csum -- ./udpgso -6 -C -R
+
+echo "ipv4 sw-gso hw-csum"
+./in_netns.sh "$0" test_sw_gso_hw_csum -- ./udpgso -4 -C -R
+
+echo "ipv6 sw-gso hw-csum"
+./in_netns.sh "$0" test_sw_gso_hw_csum -- ./udpgso -6 -C -R
+
+echo "ipv4 sw-gso sw-csum"
+./in_netns.sh "$0" test_sw_gso_sw_csum -- ./udpgso -4 -C -R
+
+echo "ipv6 sw-gso sw-csum"
+./in_netns.sh "$0" test_sw_gso_sw_csum -- ./udpgso -6 -C -R
diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk
new file mode 100644
index 000000000000..59cb26cf3f73
--- /dev/null
+++ b/tools/testing/selftests/net/ynl.mk
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# YNL selftest build snippet
+
+# Inputs:
+#
+# YNL_GENS: families we need in the selftests
+# YNL_PROGS: TEST_PROGS which need YNL (TODO, none exist, yet)
+# YNL_GEN_FILES: TEST_GEN_FILES which need YNL
+
+YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES))
+
+$(YNL_OUTPUTS): $(OUTPUT)/libynl.a
+$(YNL_OUTPUTS): CFLAGS += \
+ -I$(top_srcdir)/usr/include/ $(KHDR_INCLUDES) \
+ -I$(top_srcdir)/tools/net/ynl/lib/ \
+ -I$(top_srcdir)/tools/net/ynl/generated/
+
+$(OUTPUT)/libynl.a:
+ $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl GENS="$(YNL_GENS)" libynl.a
+ $(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a
diff --git a/tools/testing/selftests/powerpc/flags.mk b/tools/testing/selftests/powerpc/flags.mk
index b909bee3cb2a..abb9e58d95c4 100644
--- a/tools/testing/selftests/powerpc/flags.mk
+++ b/tools/testing/selftests/powerpc/flags.mk
@@ -5,8 +5,5 @@ GIT_VERSION := $(shell git describe --always --long --dirty || echo "unknown")
export GIT_VERSION
endif
-ifeq ($(CFLAGS),)
-CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(CFLAGS)
+CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(USERCFLAGS)
export CFLAGS
-endif
-
diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
index c7686fb6641a..55315ed695f4 100644
--- a/tools/testing/selftests/resctrl/cat_test.c
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -291,11 +291,30 @@ static int cat_run_test(const struct resctrl_test *test, const struct user_param
return ret;
}
+static bool arch_supports_noncont_cat(const struct resctrl_test *test)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ /* AMD always supports non-contiguous CBM. */
+ if (get_vendor() == ARCH_AMD)
+ return true;
+
+ /* Intel support for non-contiguous CBM needs to be discovered. */
+ if (!strcmp(test->resource, "L3"))
+ __cpuid_count(0x10, 1, eax, ebx, ecx, edx);
+ else if (!strcmp(test->resource, "L2"))
+ __cpuid_count(0x10, 2, eax, ebx, ecx, edx);
+ else
+ return false;
+
+ return ((ecx >> 3) & 1);
+}
+
static int noncont_cat_run_test(const struct resctrl_test *test,
const struct user_params *uparams)
{
unsigned long full_cache_mask, cont_mask, noncont_mask;
- unsigned int eax, ebx, ecx, edx, sparse_masks;
+ unsigned int sparse_masks;
int bit_center, ret;
char schemata[64];
@@ -304,15 +323,8 @@ static int noncont_cat_run_test(const struct resctrl_test *test,
if (ret)
return ret;
- if (!strcmp(test->resource, "L3"))
- __cpuid_count(0x10, 1, eax, ebx, ecx, edx);
- else if (!strcmp(test->resource, "L2"))
- __cpuid_count(0x10, 2, eax, ebx, ecx, edx);
- else
- return -EINVAL;
-
- if (sparse_masks != ((ecx >> 3) & 1)) {
- ksft_print_msg("CPUID output doesn't match 'sparse_masks' file content!\n");
+ if (arch_supports_noncont_cat(test) != sparse_masks) {
+ ksft_print_msg("Hardware and kernel differ on non-contiguous CBM support!\n");
return 1;
}
diff --git a/tools/testing/selftests/riscv/sigreturn/sigreturn.c b/tools/testing/selftests/riscv/sigreturn/sigreturn.c
index 62397d5934f1..ed351a1cb917 100644
--- a/tools/testing/selftests/riscv/sigreturn/sigreturn.c
+++ b/tools/testing/selftests/riscv/sigreturn/sigreturn.c
@@ -51,7 +51,7 @@ static int vector_sigreturn(int data, void (*handler)(int, siginfo_t *, void *))
asm(".option push \n\
.option arch, +v \n\
- vsetivli x0, 1, e32, ta, ma \n\
+ vsetivli x0, 1, e32, m1, ta, ma \n\
vmv.s.x v0, %1 \n\
# Generate SIGSEGV \n\
lw a0, 0(x0) \n\
diff --git a/tools/testing/selftests/timens/exec.c b/tools/testing/selftests/timens/exec.c
index e40dc5be2f66..d12ff955de0d 100644
--- a/tools/testing/selftests/timens/exec.c
+++ b/tools/testing/selftests/timens/exec.c
@@ -30,7 +30,7 @@ int main(int argc, char *argv[])
for (i = 0; i < 2; i++) {
_gettime(CLOCK_MONOTONIC, &tst, i);
- if (abs(tst.tv_sec - now.tv_sec) > 5)
+ if (labs(tst.tv_sec - now.tv_sec) > 5)
return pr_fail("%ld %ld\n", now.tv_sec, tst.tv_sec);
}
return 0;
@@ -50,7 +50,7 @@ int main(int argc, char *argv[])
for (i = 0; i < 2; i++) {
_gettime(CLOCK_MONOTONIC, &tst, i);
- if (abs(tst.tv_sec - now.tv_sec) > 5)
+ if (labs(tst.tv_sec - now.tv_sec) > 5)
return pr_fail("%ld %ld\n",
now.tv_sec, tst.tv_sec);
}
@@ -70,7 +70,7 @@ int main(int argc, char *argv[])
/* Check that a child process is in the new timens. */
for (i = 0; i < 2; i++) {
_gettime(CLOCK_MONOTONIC, &tst, i);
- if (abs(tst.tv_sec - now.tv_sec - OFFSET) > 5)
+ if (labs(tst.tv_sec - now.tv_sec - OFFSET) > 5)
return pr_fail("%ld %ld\n",
now.tv_sec + OFFSET, tst.tv_sec);
}
diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c
index 5e7f0051bd7b..5b939f59dfa4 100644
--- a/tools/testing/selftests/timens/timer.c
+++ b/tools/testing/selftests/timens/timer.c
@@ -56,7 +56,7 @@ int run_test(int clockid, struct timespec now)
return pr_perror("timerfd_gettime");
elapsed = new_value.it_value.tv_sec;
- if (abs(elapsed - 3600) > 60) {
+ if (llabs(elapsed - 3600) > 60) {
ksft_test_result_fail("clockid: %d elapsed: %lld\n",
clockid, elapsed);
return 1;
diff --git a/tools/testing/selftests/timens/timerfd.c b/tools/testing/selftests/timens/timerfd.c
index 9edd43d6b2c1..a4196bbd6e33 100644
--- a/tools/testing/selftests/timens/timerfd.c
+++ b/tools/testing/selftests/timens/timerfd.c
@@ -61,7 +61,7 @@ int run_test(int clockid, struct timespec now)
return pr_perror("timerfd_gettime(%d)", clockid);
elapsed = new_value.it_value.tv_sec;
- if (abs(elapsed - 3600) > 60) {
+ if (llabs(elapsed - 3600) > 60) {
ksft_test_result_fail("clockid: %d elapsed: %lld\n",
clockid, elapsed);
return 1;
diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c
index beb7614941fb..5b8907bf451d 100644
--- a/tools/testing/selftests/timens/vfork_exec.c
+++ b/tools/testing/selftests/timens/vfork_exec.c
@@ -32,7 +32,7 @@ static void *tcheck(void *_args)
for (i = 0; i < 2; i++) {
_gettime(CLOCK_MONOTONIC, &tst, i);
- if (abs(tst.tv_sec - now->tv_sec) > 5) {
+ if (labs(tst.tv_sec - now->tv_sec) > 5) {
pr_fail("%s: in-thread: unexpected value: %ld (%ld)\n",
args->tst_name, tst.tv_sec, now->tv_sec);
return (void *)1UL;
@@ -64,7 +64,7 @@ static int check(char *tst_name, struct timespec *now)
for (i = 0; i < 2; i++) {
_gettime(CLOCK_MONOTONIC, &tst, i);
- if (abs(tst.tv_sec - now->tv_sec) > 5)
+ if (labs(tst.tv_sec - now->tv_sec) > 5)
return pr_fail("%s: unexpected value: %ld (%ld)\n",
tst_name, tst.tv_sec, now->tv_sec);
}
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index d53a4d8008f9..98d8ba2afa00 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -1,35 +1,30 @@
# SPDX-License-Identifier: GPL-2.0
-include ../lib.mk
-
uname_M := $(shell uname -m 2>/dev/null || echo not)
ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
-TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu
-TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi
-TEST_GEN_PROGS += $(OUTPUT)/vdso_test_clock_getres
+TEST_GEN_PROGS := vdso_test_gettimeofday
+TEST_GEN_PROGS += vdso_test_getcpu
+TEST_GEN_PROGS += vdso_test_abi
+TEST_GEN_PROGS += vdso_test_clock_getres
ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
-TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
+TEST_GEN_PROGS += vdso_standalone_test_x86
endif
-TEST_GEN_PROGS += $(OUTPUT)/vdso_test_correctness
+TEST_GEN_PROGS += vdso_test_correctness
CFLAGS := -std=gnu99
-CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
-LDFLAGS_vdso_test_correctness := -ldl
+
ifeq ($(CONFIG_X86_32),y)
LDLIBS += -lgcc_s
endif
-all: $(TEST_GEN_PROGS)
+include ../lib.mk
$(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c
$(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c
$(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c
$(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c
+
$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
- $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
- vdso_standalone_test_x86.c parse_vdso.c \
- -o $@
+$(OUTPUT)/vdso_standalone_test_x86: CFLAGS +=-nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
+
$(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c
- $(CC) $(CFLAGS) \
- vdso_test_correctness.c \
- -o $@ \
- $(LDFLAGS_vdso_test_correctness)
+$(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl
diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c
index 413f75620a35..4ae417372e9e 100644
--- a/tools/testing/selftests/vDSO/parse_vdso.c
+++ b/tools/testing/selftests/vDSO/parse_vdso.c
@@ -55,14 +55,20 @@ static struct vdso_info
ELF(Verdef) *verdef;
} vdso_info;
-/* Straight from the ELF specification. */
-static unsigned long elf_hash(const unsigned char *name)
+/*
+ * Straight from the ELF specification...and then tweaked slightly, in order to
+ * avoid a few clang warnings.
+ */
+static unsigned long elf_hash(const char *name)
{
unsigned long h = 0, g;
- while (*name)
+ const unsigned char *uch_name = (const unsigned char *)name;
+
+ while (*uch_name)
{
- h = (h << 4) + *name++;
- if (g = h & 0xf0000000)
+ h = (h << 4) + *uch_name++;
+ g = h & 0xf0000000;
+ if (g)
h ^= g >> 24;
h &= ~g;
}
diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
index 8a44ff973ee1..27f6fdf11969 100644
--- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
+++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
@@ -18,7 +18,7 @@
#include "parse_vdso.h"
-/* We need a libc functions... */
+/* We need some libc functions... */
int strcmp(const char *a, const char *b)
{
/* This implementation is buggy: it never returns -1. */
@@ -34,6 +34,20 @@ int strcmp(const char *a, const char *b)
return 0;
}
+/*
+ * The clang build needs this, although gcc does not.
+ * Stolen from lib/string.c.
+ */
+void *memcpy(void *dest, const void *src, size_t count)
+{
+ char *tmp = dest;
+ const char *s = src;
+
+ while (count--)
+ *tmp++ = *s++;
+ return dest;
+}
+
/* ...and two syscalls. This is x86-specific. */
static inline long x86_syscall3(long nr, long a0, long a1, long a2)
{
@@ -70,7 +84,7 @@ void to_base10(char *lastdig, time_t n)
}
}
-__attribute__((externally_visible)) void c_main(void **stack)
+void c_main(void **stack)
{
/* Parse the stack */
long argc = (long)*stack;
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index e95bd56b332f..35856b11c143 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -109,9 +109,9 @@ KERNEL_ARCH := x86_64
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
-QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi
+QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off,acpi=off
else
-QEMU_MACHINE := -cpu max -machine microvm -no-acpi
+QEMU_MACHINE := -cpu max -machine microvm,acpi=off
endif
else ifeq ($(ARCH),i686)
CHOST := i686-linux-musl
@@ -120,9 +120,9 @@ KERNEL_ARCH := x86
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH))
-QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi
+QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off,acpi=off
else
-QEMU_MACHINE := -cpu coreduo -machine microvm -no-acpi
+QEMU_MACHINE := -cpu coreduo -machine microvm,acpi=off
endif
else ifeq ($(ARCH),mips64)
CHOST := mips64-linux-musl