aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.clang-format1
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml2
-rw-r--r--Documentation/loongarch/booting.rst42
-rw-r--r--Documentation/loongarch/index.rst1
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst46
-rw-r--r--Documentation/networking/devlink/devlink-port.rst122
-rw-r--r--Documentation/networking/ethtool-netlink.rst31
-rw-r--r--Documentation/networking/timestamping.rst32
-rw-r--r--Documentation/translations/zh_CN/loongarch/booting.rst48
-rw-r--r--Documentation/translations/zh_CN/loongarch/index.rst1
-rw-r--r--Documentation/virt/kvm/api.rst15
-rw-r--r--Documentation/virt/kvm/halt-polling.rst (renamed from Documentation/virt/kvm/x86/halt-polling.rst)13
-rw-r--r--Documentation/virt/kvm/index.rst1
-rw-r--r--Documentation/virt/kvm/x86/index.rst1
-rw-r--r--Makefile2
-rw-r--r--arch/arm/boot/dts/at91rm9200.dtsi2
-rw-r--r--arch/arm/mach-at91/sama5.c2
-rw-r--r--arch/arm64/include/asm/efi.h8
-rw-r--r--arch/arm64/kernel/efi-rt-wrapper.S33
-rw-r--r--arch/arm64/kernel/efi.c26
-rw-r--r--arch/arm64/mm/dma-mapping.c17
-rw-r--r--arch/arm64/mm/fault.c4
-rw-r--r--arch/loongarch/include/asm/pgtable.h1
-rw-r--r--arch/loongarch/include/asm/smp.h10
-rw-r--r--arch/loongarch/kernel/smp.c11
-rw-r--r--arch/loongarch/mm/tlbex.S30
-rw-r--r--arch/mips/include/asm/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/interrupt.h1
-rw-r--r--arch/powerpc/net/bpf_jit_comp32.c52
-rw-r--r--arch/riscv/Kconfig6
-rw-r--r--arch/riscv/include/asm/asm.h1
-rw-r--r--arch/riscv/include/asm/efi.h6
-rw-r--r--arch/riscv/include/asm/pgalloc.h11
-rw-r--r--arch/riscv/include/asm/pgtable.h1
-rw-r--r--arch/riscv/include/asm/smp.h3
-rw-r--r--arch/riscv/kernel/entry.S13
-rw-r--r--arch/riscv/kernel/machine_kexec.c46
-rw-r--r--arch/riscv/kernel/setup.c9
-rw-r--r--arch/riscv/kernel/smp.c97
-rw-r--r--arch/riscv/kernel/traps.c18
-rw-r--r--arch/riscv/kernel/vdso/Makefile1
-rw-r--r--arch/s390/include/asm/pgtable.h1
-rw-r--r--arch/s390/kvm/vsie.c4
-rw-r--r--arch/sparc/include/asm/pgtable_64.h1
-rw-r--r--arch/x86/include/asm/nospec-branch.h2
-rw-r--r--arch/x86/include/asm/pgtable.h9
-rw-r--r--arch/x86/kernel/cpu/bugs.c21
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--drivers/acpi/numa/hmat.c27
-rw-r--r--drivers/ata/libahci_platform.c2
-rw-r--r--drivers/bluetooth/btusb.c6
-rw-r--r--drivers/char/tpm/tpm-interface.c5
-rw-r--r--drivers/clk/at91/at91rm9200.c2
-rw-r--r--drivers/clk/qcom/gcc-sc8280xp.c6
-rw-r--r--drivers/clk/qcom/gdsc.c61
-rw-r--r--drivers/clk/qcom/gdsc.h2
-rw-r--r--drivers/clk/samsung/clk-exynos-clkout.c6
-rw-r--r--drivers/clk/samsung/clk-exynos7885.c4
-rw-r--r--drivers/clocksource/timer-riscv.c2
-rw-r--r--drivers/dax/hmem/device.c24
-rw-r--r--drivers/gpio/gpio-amd8111.c4
-rw-r--r--drivers/gpio/gpio-rockchip.c1
-rw-r--r--drivers/gpio/gpiolib.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c3
-rw-r--r--drivers/gpu/drm/amd/display/Kconfig7
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.c2
-rw-r--r--drivers/gpu/drm/i915/intel_dram.c3
-rw-r--r--drivers/hid/hid-core.c3
-rw-r--r--drivers/hid/hid-ids.h4
-rw-r--r--drivers/hid/hid-ite.c5
-rw-r--r--drivers/hid/hid-lg4ff.c6
-rw-r--r--drivers/hid/hid-logitech-hidpp.c28
-rw-r--r--drivers/hid/hid-quirks.c3
-rw-r--r--drivers/hid/hid-uclogic-core.c1
-rw-r--r--drivers/hid/hid-uclogic-rdesc.c2
-rw-r--r--drivers/hid/i2c-hid/Kconfig4
-rw-r--r--drivers/hwmon/asus-ec-sensors.c2
-rw-r--r--drivers/hwmon/coretemp.c9
-rw-r--r--drivers/hwmon/i5500_temp.c2
-rw-r--r--drivers/hwmon/ibmpex.c1
-rw-r--r--drivers/hwmon/ina3221.c4
-rw-r--r--drivers/hwmon/ltc2947-core.c2
-rw-r--r--drivers/i2c/busses/i2c-cadence.c11
-rw-r--r--drivers/i2c/busses/i2c-imx.c6
-rw-r--r--drivers/i2c/busses/i2c-npcm7xx.c11
-rw-r--r--drivers/i2c/busses/i2c-qcom-geni.c1
-rw-r--r--drivers/i2c/i2c-core-base.c9
-rw-r--r--drivers/input/touchscreen/raydium_i2c_ts.c4
-rw-r--r--drivers/iommu/intel/dmar.c1
-rw-r--r--drivers/iommu/intel/iommu.c73
-rw-r--r--drivers/iommu/intel/iommu.h4
-rw-r--r--drivers/iommu/intel/svm.c19
-rw-r--r--drivers/media/common/videobuf2/frame_vector.c68
-rw-r--r--drivers/media/common/videobuf2/videobuf2-core.c102
-rw-r--r--drivers/mmc/core/core.c9
-rw-r--r--drivers/mmc/core/mmc_test.c3
-rw-r--r--drivers/mmc/host/mtk-sd.c6
-rw-r--r--drivers/mmc/host/sdhci-esdhc-imx.c2
-rw-r--r--drivers/mmc/host/sdhci-sprd.c4
-rw-r--r--drivers/mmc/host/sdhci.c61
-rw-r--r--drivers/mmc/host/sdhci.h2
-rw-r--r--drivers/net/bonding/bond_main.c2
-rw-r--r--drivers/net/can/can327.c17
-rw-r--r--drivers/net/can/slcan/slcan-core.c10
-rw-r--r--drivers/net/can/usb/esd_usb.c6
-rw-r--r--drivers/net/dsa/microchip/ksz8.h1
-rw-r--r--drivers/net/dsa/microchip/ksz8795.c75
-rw-r--r--drivers/net/dsa/microchip/ksz8795_reg.h3
-rw-r--r--drivers/net/dsa/microchip/ksz9477.c21
-rw-r--r--drivers/net/dsa/microchip/ksz9477.h1
-rw-r--r--drivers/net/dsa/microchip/ksz9477_reg.h2
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c116
-rw-r--r--drivers/net/dsa/microchip/ksz_common.h14
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c7
-rw-r--r--drivers/net/dsa/sja1105/sja1105_devlink.c2
-rw-r--r--drivers/net/dsa/sja1105/sja1105_main.c2
-rw-r--r--drivers/net/ethernet/aeroflex/greth.c1
-rw-r--r--drivers/net/ethernet/broadcom/Kconfig3
-rw-r--r--drivers/net/ethernet/broadcom/bnx2.c2
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c18
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c4
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c4
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c3
-rw-r--r--drivers/net/ethernet/hisilicon/hisi_femac.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hix5hd2_gmac.c2
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c12
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c19
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c9
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.c546
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.h39
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp_hw.c336
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp_hw.h8
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ethtool.c2
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx.c78
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx.h9
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h15
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rpm.c262
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rpm.h36
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.h12
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c49
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c10
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c34
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c7
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c1
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed.c50
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed_mcu.c3
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed_wo.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c86
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c39
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c402
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h39
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c94
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c436
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c235
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c79
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c300
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c77
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_definer.c151
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c69
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c160
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c102
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h4
-rw-r--r--drivers/net/ethernet/microchip/encx24j600-regmap.c4
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.c19
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.h14
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c236
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c8
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c11
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c2
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_main.c3
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_packet.c41
-rw-r--r--drivers/net/ethernet/microchip/vcap/vcap_api.c828
-rw-r--r--drivers/net/ethernet/microchip/vcap/vcap_api_client.h8
-rw-r--r--drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.c498
-rw-r--r--drivers/net/ethernet/microchip/vcap/vcap_api_private.h14
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c16
-rw-r--r--drivers/net/ethernet/netronome/nfp/ccm_mbox.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfdk/dp.c6
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net.h5
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c63
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h15
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_ll2.c2
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c2
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c2
-rw-r--r--drivers/net/ethernet/sfc/efx_common.c2
-rw-r--r--drivers/net/ethernet/sfc/siena/efx_common.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Kconfig9
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Makefile1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c388
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c7
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c8
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c199
-rw-r--r--drivers/net/ieee802154/ca8210.c2
-rw-r--r--drivers/net/ieee802154/cc2520.c2
-rw-r--r--drivers/net/ipa/ipa_sysfs.c6
-rw-r--r--drivers/net/macsec.c1
-rw-r--r--drivers/net/mdio/fwnode_mdio.c4
-rw-r--r--drivers/net/mdio/of_mdio.c3
-rw-r--r--drivers/net/phy/Kconfig3
-rw-r--r--drivers/net/phy/mdio_device.c2
-rw-r--r--drivers/net/phy/mxl-gpy.c93
-rw-r--r--drivers/net/phy/sfp.c77
-rw-r--r--drivers/net/plip/plip.c4
-rw-r--r--drivers/net/thunderbolt.c1
-rw-r--r--drivers/net/usb/asix_devices.c18
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c27
-rw-r--r--drivers/net/wireless/mediatek/mt76/dma.c2
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_mux.c1
-rw-r--r--drivers/net/xen-netback/common.h2
-rw-r--r--drivers/net/xen-netback/interface.c6
-rw-r--r--drivers/net/xen-netback/netback.c225
-rw-r--r--drivers/net/xen-netback/rx.c8
-rw-r--r--drivers/net/xen-netfront.c6
-rw-r--r--drivers/nvme/host/core.c2
-rw-r--r--drivers/nvme/host/multipath.c3
-rw-r--r--drivers/nvme/host/pci.c2
-rw-r--r--drivers/pinctrl/intel/pinctrl-intel.c27
-rw-r--r--drivers/pinctrl/mediatek/mtk-eint.c9
-rw-r--r--drivers/pinctrl/pinctrl-single.c2
-rw-r--r--drivers/platform/x86/amd/pmc.c6
-rw-r--r--drivers/s390/net/qeth_l2_main.c2
-rw-r--r--fs/afs/server.c2
-rw-r--r--fs/fscache/cookie.c8
-rw-r--r--fs/nilfs2/dat.c7
-rw-r--r--include/asm-generic/tlb.h4
-rw-r--r--include/linux/cgroup.h1
-rw-r--r--include/linux/gfp.h18
-rw-r--r--include/linux/license.h2
-rw-r--r--include/linux/mlx5/fs.h12
-rw-r--r--include/linux/mlx5/mlx5_ifc.h74
-rw-r--r--include/linux/mlx5/vport.h2
-rw-r--r--include/linux/mm.h29
-rw-r--r--include/linux/mmc/mmc.h2
-rw-r--r--include/linux/netdevice.h1
-rw-r--r--include/linux/pgtable.h18
-rw-r--r--include/linux/rhashtable.h61
-rw-r--r--include/linux/skbuff.h1
-rw-r--r--include/linux/soc/mediatek/mtk_wed.h8
-rw-r--r--include/linux/stmmac.h1
-rw-r--r--include/net/act_api.h10
-rw-r--r--include/net/af_rxrpc.h2
-rw-r--r--include/net/bluetooth/hci.h12
-rw-r--r--include/net/cfg802154.h18
-rw-r--r--include/net/devlink.h39
-rw-r--r--include/net/mana/gdma.h9
-rw-r--r--include/net/nl802154.h43
-rw-r--r--include/net/ping.h3
-rw-r--r--include/net/sock.h6
-rw-r--r--include/net/tc_wrapper.h251
-rw-r--r--include/trace/events/fscache.h2
-rw-r--r--include/trace/events/rxrpc.h486
-rw-r--r--include/uapi/linux/devlink.h13
-rw-r--r--include/uapi/linux/ethtool_netlink.h14
-rw-r--r--include/uapi/linux/net_tstamp.h3
-rw-r--r--include/uapi/linux/openvswitch.h14
-rw-r--r--ipc/sem.c3
-rw-r--r--kernel/cgroup/cgroup-internal.h1
-rw-r--r--kernel/events/core.c17
-rw-r--r--kernel/sysctl.c30
-rw-r--r--lib/Kconfig.debug9
-rw-r--r--lib/rhashtable.c16
-rw-r--r--mm/compaction.c22
-rw-r--r--mm/damon/sysfs.c46
-rw-r--r--mm/hugetlb.c27
-rw-r--r--mm/khugepaged.c62
-rw-r--r--mm/madvise.c6
-rw-r--r--mm/memcontrol.c15
-rw-r--r--mm/memory.c25
-rw-r--r--mm/mmap.c3
-rw-r--r--mm/mmu_gather.c4
-rw-r--r--mm/vmscan.c10
-rw-r--r--net/bluetooth/6lowpan.c1
-rw-r--r--net/bluetooth/af_bluetooth.c4
-rw-r--r--net/bluetooth/hci_codec.c19
-rw-r--r--net/bluetooth/hci_core.c8
-rw-r--r--net/bluetooth/hci_request.c2
-rw-r--r--net/bluetooth/hci_sync.c19
-rw-r--r--net/bluetooth/iso.c1
-rw-r--r--net/bluetooth/l2cap_core.c3
-rw-r--r--net/bpf/test_run.c2
-rw-r--r--net/bridge/br_mdb.c312
-rw-r--r--net/bridge/br_multicast.c2
-rw-r--r--net/bridge/br_private.h10
-rw-r--r--net/can/af_can.c6
-rw-r--r--net/core/dev.c16
-rw-r--r--net/core/devlink.c207
-rw-r--r--net/core/skbuff.c70
-rw-r--r--net/core/sock.c9
-rw-r--r--net/dsa/tag_hellcreek.c3
-rw-r--r--net/dsa/tag_ksz.c3
-rw-r--r--net/dsa/tag_sja1105.c3
-rw-r--r--net/ethernet/eth.c2
-rw-r--r--net/ethtool/Makefile2
-rw-r--r--net/ethtool/common.c1
-rw-r--r--net/ethtool/netlink.c7
-rw-r--r--net/ethtool/netlink.h2
-rw-r--r--net/ethtool/rss.c153
-rw-r--r--net/ieee802154/nl802154.c103
-rw-r--r--net/ieee802154/nl802154.h2
-rw-r--r--net/ipv4/fib_frontend.c3
-rw-r--r--net/ipv4/fib_semantics.c1
-rw-r--r--net/ipv4/ip_gre.c48
-rw-r--r--net/ipv4/ping.c7
-rw-r--r--net/ipv6/ip6_output.c5
-rw-r--r--net/mac802154/iface.c16
-rw-r--r--net/mac802154/main.c2
-rw-r--r--net/mac802154/rx.c24
-rw-r--r--net/mac802154/trace.h25
-rw-r--r--net/mptcp/pm_netlink.c12
-rw-r--r--net/mptcp/sockopt.c2
-rw-r--r--net/ncsi/ncsi-cmd.c3
-rw-r--r--net/netfilter/nf_conntrack_core.c6
-rw-r--r--net/netfilter/nf_conntrack_netlink.c19
-rw-r--r--net/netfilter/nf_flow_table_offload.c6
-rw-r--r--net/netfilter/nft_set_pipapo.c5
-rw-r--r--net/nfc/nci/ntf.c6
-rw-r--r--net/openvswitch/datapath.c41
-rw-r--r--net/openvswitch/vport.c50
-rw-r--r--net/openvswitch/vport.h16
-rw-r--r--net/rxrpc/Kconfig7
-rw-r--r--net/rxrpc/Makefile4
-rw-r--r--net/rxrpc/af_rxrpc.c18
-rw-r--r--net/rxrpc/ar-internal.h211
-rw-r--r--net/rxrpc/call_accept.c191
-rw-r--r--net/rxrpc/call_event.c260
-rw-r--r--net/rxrpc/call_object.c318
-rw-r--r--net/rxrpc/conn_client.c143
-rw-r--r--net/rxrpc/conn_event.c128
-rw-r--r--net/rxrpc/conn_object.c309
-rw-r--r--net/rxrpc/conn_service.c29
-rw-r--r--net/rxrpc/input.c653
-rw-r--r--net/rxrpc/io_thread.c496
-rw-r--r--net/rxrpc/key.c16
-rw-r--r--net/rxrpc/local_event.c46
-rw-r--r--net/rxrpc/local_object.c167
-rw-r--r--net/rxrpc/net_ns.c2
-rw-r--r--net/rxrpc/output.c227
-rw-r--r--net/rxrpc/peer_event.c167
-rw-r--r--net/rxrpc/peer_object.c52
-rw-r--r--net/rxrpc/proc.c67
-rw-r--r--net/rxrpc/recvmsg.c88
-rw-r--r--net/rxrpc/rxkad.c63
-rw-r--r--net/rxrpc/rxperf.c619
-rw-r--r--net/rxrpc/security.c34
-rw-r--r--net/rxrpc/sendmsg.c105
-rw-r--r--net/rxrpc/server_key.c25
-rw-r--r--net/rxrpc/skbuff.c36
-rw-r--r--net/rxrpc/txbuf.c15
-rw-r--r--net/sched/act_api.c3
-rw-r--r--net/sched/act_bpf.c6
-rw-r--r--net/sched/act_connmark.c6
-rw-r--r--net/sched/act_csum.c6
-rw-r--r--net/sched/act_ct.c5
-rw-r--r--net/sched/act_ctinfo.c6
-rw-r--r--net/sched/act_gact.c6
-rw-r--r--net/sched/act_gate.c6
-rw-r--r--net/sched/act_ife.c6
-rw-r--r--net/sched/act_ipt.c6
-rw-r--r--net/sched/act_mirred.c6
-rw-r--r--net/sched/act_mpls.c6
-rw-r--r--net/sched/act_nat.c7
-rw-r--r--net/sched/act_pedit.c6
-rw-r--r--net/sched/act_police.c6
-rw-r--r--net/sched/act_sample.c6
-rw-r--r--net/sched/act_simple.c6
-rw-r--r--net/sched/act_skbedit.c6
-rw-r--r--net/sched/act_skbmod.c6
-rw-r--r--net/sched/act_tunnel_key.c6
-rw-r--r--net/sched/act_vlan.c6
-rw-r--r--net/sched/cls_api.c3
-rw-r--r--net/sched/cls_basic.c6
-rw-r--r--net/sched/cls_bpf.c6
-rw-r--r--net/sched/cls_cgroup.c6
-rw-r--r--net/sched/cls_flow.c6
-rw-r--r--net/sched/cls_flower.c6
-rw-r--r--net/sched/cls_fw.c6
-rw-r--r--net/sched/cls_matchall.c6
-rw-r--r--net/sched/cls_route.c6
-rw-r--r--net/sched/cls_rsvp.c2
-rw-r--r--net/sched/cls_rsvp.h6
-rw-r--r--net/sched/cls_rsvp6.c2
-rw-r--r--net/sched/cls_tcindex.c7
-rw-r--r--net/sched/cls_u32.c6
-rw-r--r--net/sched/sch_api.c5
-rw-r--r--net/tipc/link.c4
-rw-r--r--net/tipc/node.c12
-rw-r--r--net/unix/diag.c20
-rw-r--r--sound/firewire/dice/dice-stream.c12
-rw-r--r--sound/soc/codecs/cs42l51.c2
-rw-r--r--sound/soc/codecs/tlv320adc3xxx.c3
-rw-r--r--sound/soc/fsl/fsl_micfil.c19
-rw-r--r--sound/soc/soc-ops.c11
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh (renamed from tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh)2
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile2
-rw-r--r--tools/testing/selftests/net/af_unix/diag_uid.c178
-rw-r--r--tools/testing/selftests/net/bpf/Makefile6
-rw-r--r--tools/testing/selftests/net/config2
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh37
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh2
-rwxr-xr-xtools/testing/selftests/net/toeplitz.sh2
-rw-r--r--tools/vm/slabinfo-gnuplot.sh4
446 files changed, 11820 insertions, 5027 deletions
diff --git a/.clang-format b/.clang-format
index 1247d54f9e49..8d01225bfcb7 100644
--- a/.clang-format
+++ b/.clang-format
@@ -535,6 +535,7 @@ ForEachMacros:
- 'perf_hpp_list__for_each_sort_list_safe'
- 'perf_pmu__for_each_hybrid_pmu'
- 'ping_portaddr_for_each_entry'
+ - 'ping_portaddr_for_each_entry_rcu'
- 'plist_for_each'
- 'plist_for_each_continue'
- 'plist_for_each_entry'
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml
index 2ab4642679c0..55c4f94a14d1 100644
--- a/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml
@@ -148,7 +148,7 @@ allOf:
items:
- const: oscclk
- const: dout_clkcmu_fsys1_bus
- - const: dout_clkcmu_fsys1_mmc_card
+ - const: gout_clkcmu_fsys1_mmc_card
- const: dout_clkcmu_fsys1_usbdrd
- if:
diff --git a/Documentation/loongarch/booting.rst b/Documentation/loongarch/booting.rst
new file mode 100644
index 000000000000..91eccd410478
--- /dev/null
+++ b/Documentation/loongarch/booting.rst
@@ -0,0 +1,42 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================
+Booting Linux/LoongArch
+=======================
+
+:Author: Yanteng Si <[email protected]>
+:Date: 18 Nov 2022
+
+Information passed from BootLoader to kernel
+============================================
+
+LoongArch supports ACPI and FDT. The information that needs to be passed
+to the kernel includes the memmap, the initrd, the command line, optionally
+the ACPI/FDT tables, and so on.
+
+The kernel is passed the following arguments on `kernel_entry` :
+
+ - a0 = efi_boot: `efi_boot` is a flag indicating whether
+ this boot environment is fully UEFI-compliant.
+
+ - a1 = cmdline: `cmdline` is a pointer to the kernel command line.
+
+ - a2 = systemtable: `systemtable` points to the EFI system table.
+ All pointers involved at this stage are in physical addresses.
+
+Header of Linux/LoongArch kernel images
+=======================================
+
+Linux/LoongArch kernel images are EFI images. Being PE files, they have
+a 64-byte header structured like::
+
+ u32 MZ_MAGIC /* "MZ", MS-DOS header */
+ u32 res0 = 0 /* Reserved */
+ u64 kernel_entry /* Kernel entry point */
+ u64 _end - _text /* Kernel image effective size */
+ u64 load_offset /* Kernel image load offset from start of RAM */
+ u64 res1 = 0 /* Reserved */
+ u64 res2 = 0 /* Reserved */
+ u64 res3 = 0 /* Reserved */
+ u32 LINUX_PE_MAGIC /* Magic number */
+ u32 pe_header - _head /* Offset to the PE header */
diff --git a/Documentation/loongarch/index.rst b/Documentation/loongarch/index.rst
index aaba648db907..c779bfa00c05 100644
--- a/Documentation/loongarch/index.rst
+++ b/Documentation/loongarch/index.rst
@@ -9,6 +9,7 @@ LoongArch Architecture
:numbered:
introduction
+ booting
irq-chip-model
features
diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
index e8fa7ac9e6b1..6969652f593c 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
@@ -351,42 +351,26 @@ driver.
MAC address setup
-----------------
-mlx5 driver provides mechanism to setup the MAC address of the PCI VF/SF.
+mlx5 driver support devlink port function attr mechanism to setup MAC
+address. (refer to Documentation/networking/devlink/devlink-port.rst)
-The configured MAC address of the PCI VF/SF will be used by netdevice and rdma
-device created for the PCI VF/SF.
+RoCE capability setup
+---------------------
+Not all mlx5 PCI devices/SFs require RoCE capability.
-- Get the MAC address of the VF identified by its unique devlink port index::
+When RoCE capability is disabled, it saves 1 Mbytes worth of system memory per
+PCI devices/SF.
- $ devlink port show pci/0000:06:00.0/2
- pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
- function:
- hw_addr 00:00:00:00:00:00
-
-- Set the MAC address of the VF identified by its unique devlink port index::
-
- $ devlink port function set pci/0000:06:00.0/2 hw_addr 00:11:22:33:44:55
-
- $ devlink port show pci/0000:06:00.0/2
- pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
- function:
- hw_addr 00:11:22:33:44:55
-
-- Get the MAC address of the SF identified by its unique devlink port index::
-
- $ devlink port show pci/0000:06:00.0/32768
- pci/0000:06:00.0/32768: type eth netdev enp6s0pf0sf88 flavour pcisf pfnum 0 sfnum 88
- function:
- hw_addr 00:00:00:00:00:00
-
-- Set the MAC address of the SF identified by its unique devlink port index::
+mlx5 driver support devlink port function attr mechanism to setup RoCE
+capability. (refer to Documentation/networking/devlink/devlink-port.rst)
- $ devlink port function set pci/0000:06:00.0/32768 hw_addr 00:00:00:00:88:88
+migratable capability setup
+---------------------------
+User who wants mlx5 PCI VFs to be able to perform live migration need to
+explicitly enable the VF migratable capability.
- $ devlink port show pci/0000:06:00.0/32768
- pci/0000:06:00.0/32768: type eth netdev enp6s0pf0sf88 flavour pcisf pfnum 0 sfnum 88
- function:
- hw_addr 00:00:00:00:88:88
+mlx5 driver support devlink port function attr mechanism to setup migratable
+capability. (refer to Documentation/networking/devlink/devlink-port.rst)
SF state setup
--------------
diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst
index 98557c2ab1c1..3da590953ce8 100644
--- a/Documentation/networking/devlink/devlink-port.rst
+++ b/Documentation/networking/devlink/devlink-port.rst
@@ -110,7 +110,7 @@ devlink ports for both the controllers.
Function configuration
======================
-A user can configure the function attribute before enumerating the PCI
+Users can configure one or more function attributes before enumerating the PCI
function. Usually it means, user should configure function attribute
before a bus specific device for the function is created. However, when
SRIOV is enabled, virtual function devices are created on the PCI bus.
@@ -119,9 +119,127 @@ function device to the driver. For subfunctions, this means user should
configure port function attribute before activating the port function.
A user may set the hardware address of the function using
-'devlink port function set hw_addr' command. For Ethernet port function
+`devlink port function set hw_addr` command. For Ethernet port function
this means a MAC address.
+Users may also set the RoCE capability of the function using
+`devlink port function set roce` command.
+
+Users may also set the function as migratable using
+'devlink port function set migratable' command.
+
+Function attributes
+===================
+
+MAC address setup
+-----------------
+The configured MAC address of the PCI VF/SF will be used by netdevice and rdma
+device created for the PCI VF/SF.
+
+- Get the MAC address of the VF identified by its unique devlink port index::
+
+ $ devlink port show pci/0000:06:00.0/2
+ pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
+ function:
+ hw_addr 00:00:00:00:00:00
+
+- Set the MAC address of the VF identified by its unique devlink port index::
+
+ $ devlink port function set pci/0000:06:00.0/2 hw_addr 00:11:22:33:44:55
+
+ $ devlink port show pci/0000:06:00.0/2
+ pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
+ function:
+ hw_addr 00:11:22:33:44:55
+
+- Get the MAC address of the SF identified by its unique devlink port index::
+
+ $ devlink port show pci/0000:06:00.0/32768
+ pci/0000:06:00.0/32768: type eth netdev enp6s0pf0sf88 flavour pcisf pfnum 0 sfnum 88
+ function:
+ hw_addr 00:00:00:00:00:00
+
+- Set the MAC address of the SF identified by its unique devlink port index::
+
+ $ devlink port function set pci/0000:06:00.0/32768 hw_addr 00:00:00:00:88:88
+
+ $ devlink port show pci/0000:06:00.0/32768
+ pci/0000:06:00.0/32768: type eth netdev enp6s0pf0sf88 flavour pcisf pfnum 0 sfnum 88
+ function:
+ hw_addr 00:00:00:00:88:88
+
+RoCE capability setup
+---------------------
+Not all PCI VFs/SFs require RoCE capability.
+
+When RoCE capability is disabled, it saves system memory per PCI VF/SF.
+
+When user disables RoCE capability for a VF/SF, user application cannot send or
+receive any RoCE packets through this VF/SF and RoCE GID table for this PCI
+will be empty.
+
+When RoCE capability is disabled in the device using port function attribute,
+VF/SF driver cannot override it.
+
+- Get RoCE capability of the VF device::
+
+ $ devlink port show pci/0000:06:00.0/2
+ pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
+ function:
+ hw_addr 00:00:00:00:00:00 roce enable
+
+- Set RoCE capability of the VF device::
+
+ $ devlink port function set pci/0000:06:00.0/2 roce disable
+
+ $ devlink port show pci/0000:06:00.0/2
+ pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
+ function:
+ hw_addr 00:00:00:00:00:00 roce disable
+
+migratable capability setup
+---------------------------
+Live migration is the process of transferring a live virtual machine
+from one physical host to another without disrupting its normal
+operation.
+
+User who want PCI VFs to be able to perform live migration need to
+explicitly enable the VF migratable capability.
+
+When user enables migratable capability for a VF, and the HV binds the VF to VFIO driver
+with migration support, the user can migrate the VM with this VF from one HV to a
+different one.
+
+However, when migratable capability is enable, device will disable features which cannot
+be migrated. Thus migratable cap can impose limitations on a VF so let the user decide.
+
+Example of LM with migratable function configuration:
+- Get migratable capability of the VF device::
+
+ $ devlink port show pci/0000:06:00.0/2
+ pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
+ function:
+ hw_addr 00:00:00:00:00:00 migratable disable
+
+- Set migratable capability of the VF device::
+
+ $ devlink port function set pci/0000:06:00.0/2 migratable enable
+
+ $ devlink port show pci/0000:06:00.0/2
+ pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
+ function:
+ hw_addr 00:00:00:00:00:00 migratable enable
+
+- Bind VF to VFIO driver with migration support::
+
+ $ echo <pci_id> > /sys/bus/pci/devices/0000:08:00.0/driver/unbind
+ $ echo mlx5_vfio_pci > /sys/bus/pci/devices/0000:08:00.0/driver_override
+ $ echo <pci_id> > /sys/bus/pci/devices/0000:08:00.0/driver/bind
+
+Attach VF to the VM.
+Start the VM.
+Perform live migration.
+
Subfunction
============
diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index bede24ef44fd..f10f8eb44255 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -222,6 +222,7 @@ Userspace to kernel:
``ETHTOOL_MSG_MODULE_GET`` get transceiver module parameters
``ETHTOOL_MSG_PSE_SET`` set PSE parameters
``ETHTOOL_MSG_PSE_GET`` get PSE parameters
+ ``ETHTOOL_MSG_RSS_GET`` get RSS settings
===================================== =================================
Kernel to userspace:
@@ -263,6 +264,7 @@ Kernel to userspace:
``ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY`` PHC virtual clocks info
``ETHTOOL_MSG_MODULE_GET_REPLY`` transceiver module parameters
``ETHTOOL_MSG_PSE_GET_REPLY`` PSE parameters
+ ``ETHTOOL_MSG_RSS_GET_REPLY`` RSS settings
======================================== =================================
``GET`` requests are sent by userspace applications to retrieve device
@@ -1687,6 +1689,33 @@ to control PoDL PSE Admin functions. This option is implementing
``IEEE 802.3-2018`` 30.15.1.2.1 acPoDLPSEAdminControl. See
``ETHTOOL_A_PODL_PSE_ADMIN_STATE`` for supported values.
+RSS_GET
+=======
+
+Get indirection table, hash key and hash function info associated with a
+RSS context of an interface similar to ``ETHTOOL_GRSSH`` ioctl request.
+
+Request contents:
+
+===================================== ====== ==========================
+ ``ETHTOOL_A_RSS_HEADER`` nested request header
+ ``ETHTOOL_A_RSS_CONTEXT`` u32 context number
+===================================== ====== ==========================
+
+Kernel response contents:
+
+===================================== ====== ==========================
+ ``ETHTOOL_A_RSS_HEADER`` nested reply header
+ ``ETHTOOL_A_RSS_HFUNC`` u32 RSS hash func
+ ``ETHTOOL_A_RSS_INDIR`` binary Indir table bytes
+ ``ETHTOOL_A_RSS_HKEY`` binary Hash key bytes
+===================================== ====== ==========================
+
+ETHTOOL_A_RSS_HFUNC attribute is bitmap indicating the hash function
+being used. Current supported options are toeplitz, xor or crc32.
+ETHTOOL_A_RSS_INDIR attribute returns RSS indrection table where each byte
+indicates queue number.
+
Request translation
===================
@@ -1768,7 +1797,7 @@ are netlink only.
``ETHTOOL_GMODULEEEPROM`` ``ETHTOOL_MSG_MODULE_EEPROM_GET``
``ETHTOOL_GEEE`` ``ETHTOOL_MSG_EEE_GET``
``ETHTOOL_SEEE`` ``ETHTOOL_MSG_EEE_SET``
- ``ETHTOOL_GRSSH`` n/a
+ ``ETHTOOL_GRSSH`` ``ETHTOOL_MSG_RSS_GET``
``ETHTOOL_SRSSH`` n/a
``ETHTOOL_GTUNABLE`` n/a
``ETHTOOL_STUNABLE`` n/a
diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst
index be4eb1242057..f17c01834a12 100644
--- a/Documentation/networking/timestamping.rst
+++ b/Documentation/networking/timestamping.rst
@@ -179,7 +179,8 @@ SOF_TIMESTAMPING_OPT_ID:
identifier and returns that along with the timestamp. The identifier
is derived from a per-socket u32 counter (that wraps). For datagram
sockets, the counter increments with each sent packet. For stream
- sockets, it increments with every byte.
+ sockets, it increments with every byte. For stream sockets, also set
+ SOF_TIMESTAMPING_OPT_ID_TCP, see the section below.
The counter starts at zero. It is initialized the first time that
the socket option is enabled. It is reset each time the option is
@@ -192,6 +193,35 @@ SOF_TIMESTAMPING_OPT_ID:
among all possibly concurrently outstanding timestamp requests for
that socket.
+SOF_TIMESTAMPING_OPT_ID_TCP:
+ Pass this modifier along with SOF_TIMESTAMPING_OPT_ID for new TCP
+ timestamping applications. SOF_TIMESTAMPING_OPT_ID defines how the
+ counter increments for stream sockets, but its starting point is
+ not entirely trivial. This option fixes that.
+
+ For stream sockets, if SOF_TIMESTAMPING_OPT_ID is set, this should
+ always be set too. On datagram sockets the option has no effect.
+
+ A reasonable expectation is that the counter is reset to zero with
+ the system call, so that a subsequent write() of N bytes generates
+ a timestamp with counter N-1. SOF_TIMESTAMPING_OPT_ID_TCP
+ implements this behavior under all conditions.
+
+ SOF_TIMESTAMPING_OPT_ID without modifier often reports the same,
+ especially when the socket option is set when no data is in
+ transmission. If data is being transmitted, it may be off by the
+ length of the output queue (SIOCOUTQ).
+
+ The difference is due to being based on snd_una versus write_seq.
+ snd_una is the offset in the stream acknowledged by the peer. This
+ depends on factors outside of process control, such as network RTT.
+ write_seq is the last byte written by the process. This offset is
+ not affected by external inputs.
+
+ The difference is subtle and unlikely to be noticed when configured
+ at initial socket creation, when no data is queued or sent. But
+ SOF_TIMESTAMPING_OPT_ID_TCP behavior is more robust regardless of
+ when the socket option is set.
SOF_TIMESTAMPING_OPT_CMSG:
Support recv() cmsg for all timestamped packets. Control messages
diff --git a/Documentation/translations/zh_CN/loongarch/booting.rst b/Documentation/translations/zh_CN/loongarch/booting.rst
new file mode 100644
index 000000000000..fb6440c438f0
--- /dev/null
+++ b/Documentation/translations/zh_CN/loongarch/booting.rst
@@ -0,0 +1,48 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/loongarch/booting.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+====================
+启动 Linux/LoongArch
+====================
+
+:作者: 司延腾 <[email protected]>
+:日期: 2022年11月18日
+
+BootLoader传递给内核的信息
+==========================
+
+LoongArch支持ACPI和FDT启动,需要传递给内核的信息包括memmap、initrd、cmdline、可
+选的ACPI/FDT表等。
+
+内核在 `kernel_entry` 入口处被传递以下参数:
+
+ - a0 = efi_boot: `efi_boot` 是一个标志,表示这个启动环境是否完全符合UEFI
+ 的要求。
+
+ - a1 = cmdline: `cmdline` 是一个指向内核命令行的指针。
+
+ - a2 = systemtable: `systemtable` 指向EFI的系统表,在这个阶段涉及的所有
+ 指针都是物理地址。
+
+Linux/LoongArch内核镜像文件头
+=============================
+
+内核镜像是EFI镜像。作为PE文件,它们有一个64字节的头部结构体,如下所示::
+
+ u32 MZ_MAGIC /* "MZ", MS-DOS 头 */
+ u32 res0 = 0 /* 保留 */
+ u64 kernel_entry /* 内核入口点 */
+ u64 _end - _text /* 内核镜像有效大小 */
+ u64 load_offset /* 加载内核镜像相对内存起始地址的偏移量 */
+ u64 res1 = 0 /* 保留 */
+ u64 res2 = 0 /* 保留 */
+ u64 res3 = 0 /* 保留 */
+ u32 LINUX_PE_MAGIC /* 魔术数 */
+ u32 pe_header - _head /* 到PE头的偏移量 */
diff --git a/Documentation/translations/zh_CN/loongarch/index.rst b/Documentation/translations/zh_CN/loongarch/index.rst
index 7d23eb78379d..0273a08342f7 100644
--- a/Documentation/translations/zh_CN/loongarch/index.rst
+++ b/Documentation/translations/zh_CN/loongarch/index.rst
@@ -14,6 +14,7 @@ LoongArch体系结构
:numbered:
introduction
+ booting
irq-chip-model
features
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index eee9f857a986..896914e3a847 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -7213,14 +7213,13 @@ veto the transition.
:Parameters: args[0] is the maximum poll time in nanoseconds
:Returns: 0 on success; -1 on error
-This capability overrides the kvm module parameter halt_poll_ns for the
-target VM.
-
-VCPU polling allows a VCPU to poll for wakeup events instead of immediately
-scheduling during guest halts. The maximum time a VCPU can spend polling is
-controlled by the kvm module parameter halt_poll_ns. This capability allows
-the maximum halt time to specified on a per-VM basis, effectively overriding
-the module parameter for the target VM.
+KVM_CAP_HALT_POLL overrides the kvm.halt_poll_ns module parameter to set the
+maximum halt-polling time for all vCPUs in the target VM. This capability can
+be invoked at any time and any number of times to dynamically change the
+maximum halt-polling time.
+
+See Documentation/virt/kvm/halt-polling.rst for more information on halt
+polling.
7.21 KVM_CAP_X86_USER_SPACE_MSR
-------------------------------
diff --git a/Documentation/virt/kvm/x86/halt-polling.rst b/Documentation/virt/kvm/halt-polling.rst
index 4922e4a15f18..3fae39b1a5ba 100644
--- a/Documentation/virt/kvm/x86/halt-polling.rst
+++ b/Documentation/virt/kvm/halt-polling.rst
@@ -119,6 +119,19 @@ These module parameters can be set from the debugfs files in:
Note: that these module parameters are system wide values and are not able to
be tuned on a per vm basis.
+Any changes to these parameters will be picked up by new and existing vCPUs the
+next time they halt, with the notable exception of VMs using KVM_CAP_HALT_POLL
+(see next section).
+
+KVM_CAP_HALT_POLL
+=================
+
+KVM_CAP_HALT_POLL is a VM capability that allows userspace to override halt_poll_ns
+on a per-VM basis. VMs using KVM_CAP_HALT_POLL ignore halt_poll_ns completely (but
+still obey halt_poll_ns_grow, halt_poll_ns_grow_start, and halt_poll_ns_shrink).
+
+See Documentation/virt/kvm/api.rst for more information on this capability.
+
Further Notes
=============
diff --git a/Documentation/virt/kvm/index.rst b/Documentation/virt/kvm/index.rst
index e0a2c74e1043..ad13ec55ddfe 100644
--- a/Documentation/virt/kvm/index.rst
+++ b/Documentation/virt/kvm/index.rst
@@ -17,4 +17,5 @@ KVM
locking
vcpu-requests
+ halt-polling
review-checklist
diff --git a/Documentation/virt/kvm/x86/index.rst b/Documentation/virt/kvm/x86/index.rst
index 7ff588826b9f..9ece6b8dc817 100644
--- a/Documentation/virt/kvm/x86/index.rst
+++ b/Documentation/virt/kvm/x86/index.rst
@@ -10,7 +10,6 @@ KVM for x86 systems
amd-memory-encryption
cpuid
errata
- halt-polling
hypercalls
mmu
msr
diff --git a/Makefile b/Makefile
index 78525ebea876..0992f827888d 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 1
SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION = -rc8
NAME = Hurr durr I'ma ninja sloth
# *DOCUMENTATION*
diff --git a/arch/arm/boot/dts/at91rm9200.dtsi b/arch/arm/boot/dts/at91rm9200.dtsi
index 7a113325abb9..6f9004ebf424 100644
--- a/arch/arm/boot/dts/at91rm9200.dtsi
+++ b/arch/arm/boot/dts/at91rm9200.dtsi
@@ -666,7 +666,7 @@
compatible = "atmel,at91rm9200-udc";
reg = <0xfffb0000 0x4000>;
interrupts = <11 IRQ_TYPE_LEVEL_HIGH 2>;
- clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 2>;
+ clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 1>;
clock-names = "pclk", "hclk";
status = "disabled";
};
diff --git a/arch/arm/mach-at91/sama5.c b/arch/arm/mach-at91/sama5.c
index 67ed68fbe3a5..bf2b5c6a18c6 100644
--- a/arch/arm/mach-at91/sama5.c
+++ b/arch/arm/mach-at91/sama5.c
@@ -26,7 +26,7 @@ static void sama5_l2c310_write_sec(unsigned long val, unsigned reg)
static void __init sama5_secure_cache_init(void)
{
sam_secure_init();
- if (sam_linux_is_optee_available())
+ if (IS_ENABLED(CONFIG_OUTER_CACHE) && sam_linux_is_optee_available())
outer_cache.write_sec = sama5_l2c310_write_sec;
}
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index d6cf535d8352..439e2bc5d5d8 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -14,16 +14,8 @@
#ifdef CONFIG_EFI
extern void efi_init(void);
-
-bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg);
#else
#define efi_init()
-
-static inline
-bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg)
-{
- return false;
-}
#endif
int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
index 67babd5f04c2..75691a2641c1 100644
--- a/arch/arm64/kernel/efi-rt-wrapper.S
+++ b/arch/arm64/kernel/efi-rt-wrapper.S
@@ -6,7 +6,7 @@
#include <linux/linkage.h>
SYM_FUNC_START(__efi_rt_asm_wrapper)
- stp x29, x30, [sp, #-112]!
+ stp x29, x30, [sp, #-32]!
mov x29, sp
/*
@@ -17,20 +17,6 @@ SYM_FUNC_START(__efi_rt_asm_wrapper)
stp x1, x18, [sp, #16]
/*
- * Preserve all callee saved registers and record the stack pointer
- * value in a per-CPU variable so we can recover from synchronous
- * exceptions occurring while running the firmware routines.
- */
- stp x19, x20, [sp, #32]
- stp x21, x22, [sp, #48]
- stp x23, x24, [sp, #64]
- stp x25, x26, [sp, #80]
- stp x27, x28, [sp, #96]
-
- adr_this_cpu x8, __efi_rt_asm_recover_sp, x9
- str x29, [x8]
-
- /*
* We are lucky enough that no EFI runtime services take more than
* 5 arguments, so all are passed in registers rather than via the
* stack.
@@ -45,7 +31,7 @@ SYM_FUNC_START(__efi_rt_asm_wrapper)
ldp x1, x2, [sp, #16]
cmp x2, x18
- ldp x29, x30, [sp], #112
+ ldp x29, x30, [sp], #32
b.ne 0f
ret
0:
@@ -59,18 +45,3 @@ SYM_FUNC_START(__efi_rt_asm_wrapper)
mov x18, x2
b efi_handle_corrupted_x18 // tail call
SYM_FUNC_END(__efi_rt_asm_wrapper)
-
-SYM_FUNC_START(__efi_rt_asm_recover)
- ldr_this_cpu x8, __efi_rt_asm_recover_sp, x9
- mov sp, x8
-
- ldp x0, x18, [sp, #16]
- ldp x19, x20, [sp, #32]
- ldp x21, x22, [sp, #48]
- ldp x23, x24, [sp, #64]
- ldp x25, x26, [sp, #80]
- ldp x27, x28, [sp, #96]
- ldp x29, x30, [sp], #112
-
- b efi_handle_runtime_exception
-SYM_FUNC_END(__efi_rt_asm_recover)
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index ee53f2a0aa03..a908a37f0367 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -9,7 +9,6 @@
#include <linux/efi.h>
#include <linux/init.h>
-#include <linux/percpu.h>
#include <asm/efi.h>
@@ -145,28 +144,3 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f)
pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f);
return s;
}
-
-asmlinkage DEFINE_PER_CPU(u64, __efi_rt_asm_recover_sp);
-
-asmlinkage efi_status_t __efi_rt_asm_recover(void);
-
-asmlinkage efi_status_t efi_handle_runtime_exception(const char *f)
-{
- pr_err(FW_BUG "Synchronous exception occurred in EFI runtime service %s()\n", f);
- clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
- return EFI_ABORTED;
-}
-
-bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg)
-{
- /* Check whether the exception occurred while running the firmware */
- if (current_work() != &efi_rts_work.work || regs->pc >= TASK_SIZE_64)
- return false;
-
- pr_err(FW_BUG "Unable to handle %s in EFI runtime service\n", msg);
- add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
- dump_stack();
-
- regs->pc = (u64)__efi_rt_asm_recover;
- return true;
-}
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 3cb101e8cb29..5240f6acad64 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -36,7 +36,22 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
{
unsigned long start = (unsigned long)page_address(page);
- dcache_clean_poc(start, start + size);
+ /*
+ * The architecture only requires a clean to the PoC here in order to
+ * meet the requirements of the DMA API. However, some vendors (i.e.
+ * Qualcomm) abuse the DMA API for transferring buffers from the
+ * non-secure to the secure world, resetting the system if a non-secure
+ * access shows up after the buffer has been transferred:
+ *
+ * https://lore.kernel.org/r/[email protected]
+ *
+ * Using clean+invalidate appears to make this issue less likely, but
+ * the drivers themselves still need fixing as the CPU could issue a
+ * speculative read from the buffer via the linear mapping irrespective
+ * of the cache maintenance we use. Once the drivers are fixed, we can
+ * relax this to a clean operation.
+ */
+ dcache_clean_inval_poc(start, start + size);
}
#ifdef CONFIG_IOMMU_DMA
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 3e9cf9826417..5b391490e045 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -30,7 +30,6 @@
#include <asm/bug.h>
#include <asm/cmpxchg.h>
#include <asm/cpufeature.h>
-#include <asm/efi.h>
#include <asm/exception.h>
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
@@ -392,9 +391,6 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr,
msg = "paging request";
}
- if (efi_runtime_fixup_exception(regs, msg))
- return;
-
die_kernel_fault(msg, addr, esr, regs);
}
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index aa0e0e0d4ee5..79d5bfd913e0 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -490,6 +490,7 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
return pmd;
}
+#define pmd_young pmd_young
static inline int pmd_young(pmd_t pmd)
{
return !!(pmd_val(pmd) & _PAGE_ACCESSED);
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index 3dd172d9ffea..d82687390b4a 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -78,16 +78,6 @@ extern void calculate_cpu_foreign_map(void);
*/
extern void show_ipi_list(struct seq_file *p, int prec);
-/*
- * This function sends a 'reschedule' IPI to another CPU.
- * it goes straight through and wastes no time serializing
- * anything. Worst case is that we lose a reschedule ...
- */
-static inline void smp_send_reschedule(int cpu)
-{
- loongson_send_ipi_single(cpu, SMP_RESCHEDULE);
-}
-
static inline void arch_send_call_function_single_ipi(int cpu)
{
loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION);
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 6ed72f7ff278..14508d429ffa 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -149,6 +149,17 @@ void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
ipi_write_action(cpu_logical_map(i), (u32)action);
}
+/*
+ * This function sends a 'reschedule' IPI to another CPU.
+ * it goes straight through and wastes no time serializing
+ * anything. Worst case is that we lose a reschedule ...
+ */
+void smp_send_reschedule(int cpu)
+{
+ loongson_send_ipi_single(cpu, SMP_RESCHEDULE);
+}
+EXPORT_SYMBOL_GPL(smp_send_reschedule);
+
irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
{
unsigned int action;
diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S
index d8ee8fbc8c67..58781c6e4191 100644
--- a/arch/loongarch/mm/tlbex.S
+++ b/arch/loongarch/mm/tlbex.S
@@ -10,6 +10,8 @@
#include <asm/regdef.h>
#include <asm/stackframe.h>
+#define INVTLB_ADDR_GFALSE_AND_ASID 5
+
#define PTRS_PER_PGD_BITS (PAGE_SHIFT - 3)
#define PTRS_PER_PUD_BITS (PAGE_SHIFT - 3)
#define PTRS_PER_PMD_BITS (PAGE_SHIFT - 3)
@@ -136,13 +138,10 @@ tlb_huge_update_load:
ori t0, ra, _PAGE_VALID
st.d t0, t1, 0
#endif
- tlbsrch
- addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16)
- addi.d ra, t1, 0
- csrxchg ra, t1, LOONGARCH_CSR_TLBIDX
- tlbwr
-
- csrxchg zero, t1, LOONGARCH_CSR_TLBIDX
+ csrrd ra, LOONGARCH_CSR_ASID
+ csrrd t1, LOONGARCH_CSR_BADV
+ andi ra, ra, CSR_ASID_ASID
+ invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1
/*
* A huge PTE describes an area the size of the
@@ -287,13 +286,11 @@ tlb_huge_update_store:
ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
st.d t0, t1, 0
#endif
- tlbsrch
- addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16)
- addi.d ra, t1, 0
- csrxchg ra, t1, LOONGARCH_CSR_TLBIDX
- tlbwr
+ csrrd ra, LOONGARCH_CSR_ASID
+ csrrd t1, LOONGARCH_CSR_BADV
+ andi ra, ra, CSR_ASID_ASID
+ invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1
- csrxchg zero, t1, LOONGARCH_CSR_TLBIDX
/*
* A huge PTE describes an area the size of the
* configured huge page size. This is twice the
@@ -436,6 +433,11 @@ tlb_huge_update_modify:
ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
st.d t0, t1, 0
#endif
+ csrrd ra, LOONGARCH_CSR_ASID
+ csrrd t1, LOONGARCH_CSR_BADV
+ andi ra, ra, CSR_ASID_ASID
+ invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1
+
/*
* A huge PTE describes an area the size of the
* configured huge page size. This is twice the
@@ -466,7 +468,7 @@ tlb_huge_update_modify:
addu16i.d t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
- tlbwr
+ tlbfill
/* Reset default page size */
addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16)
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 6caec386ad2f..4678627673df 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -622,6 +622,7 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
return pmd;
}
+#define pmd_young pmd_young
static inline int pmd_young(pmd_t pmd)
{
return !!(pmd_val(pmd) & _PAGE_ACCESSED);
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 4745bb9998bd..6d8492b6e2b8 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -602,6 +602,7 @@ ____##func(struct pt_regs *regs)
/* kernel/traps.c */
DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception);
#ifdef CONFIG_PPC_BOOK3S_64
+DECLARE_INTERRUPT_HANDLER_RAW(machine_check_early_boot);
DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async);
#endif
DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception);
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index 43f1c76d48ce..a379b0ce19ff 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -113,23 +113,19 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
{
int i;
- /* First arg comes in as a 32 bits pointer. */
- EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3));
- EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0));
+ /* Initialize tail_call_cnt, to be skipped if we do tail calls. */
+ EMIT(PPC_RAW_LI(_R4, 0));
+
+#define BPF_TAILCALL_PROLOGUE_SIZE 4
+
EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx)));
- /*
- * Initialize tail_call_cnt in stack frame if we do tail calls.
- * Otherwise, put in NOPs so that it can be skipped when we are
- * invoked through a tail call.
- */
if (ctx->seen & SEEN_TAILCALL)
- EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_1) - 1, _R1,
- bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
- else
- EMIT(PPC_RAW_NOP());
+ EMIT(PPC_RAW_STW(_R4, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
-#define BPF_TAILCALL_PROLOGUE_SIZE 16
+ /* First arg comes in as a 32 bits pointer. */
+ EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3));
+ EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0));
/*
* We need a stack frame, but we don't necessarily need to
@@ -170,24 +166,24 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
if (bpf_is_seen_register(ctx, i))
EMIT(PPC_RAW_LWZ(i, _R1, bpf_jit_stack_offsetof(ctx, i)));
-}
-
-void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
-{
- EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0)));
-
- bpf_jit_emit_common_epilogue(image, ctx);
-
- /* Tear down our stack frame */
if (ctx->seen & SEEN_FUNC)
EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+ /* Tear down our stack frame */
EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx)));
if (ctx->seen & SEEN_FUNC)
EMIT(PPC_RAW_MTLR(_R0));
+}
+
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+{
+ EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0)));
+
+ bpf_jit_emit_common_epilogue(image, ctx);
+
EMIT(PPC_RAW_BLR());
}
@@ -244,7 +240,6 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29));
EMIT(PPC_RAW_ADD(_R3, _R3, b2p_bpf_array));
EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_array, ptrs)));
- EMIT(PPC_RAW_STW(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
/*
* if (prog == NULL)
@@ -255,19 +250,14 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
/* goto *(prog->bpf_func + prologue_size); */
EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func)));
-
- if (ctx->seen & SEEN_FUNC)
- EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
-
EMIT(PPC_RAW_ADDIC(_R3, _R3, BPF_TAILCALL_PROLOGUE_SIZE));
-
- if (ctx->seen & SEEN_FUNC)
- EMIT(PPC_RAW_MTLR(_R0));
-
EMIT(PPC_RAW_MTCTR(_R3));
EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_1)));
+ /* Put tail_call_cnt in r4 */
+ EMIT(PPC_RAW_MR(_R4, _R0));
+
/* tear restore NVRs, ... */
bpf_jit_emit_common_epilogue(image, ctx);
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index fa78595a6089..593cf09264d8 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -317,9 +317,9 @@ config SMP
config NR_CPUS
int "Maximum number of CPUs (2-512)"
depends on SMP
- range 2 512 if !SBI_V01
- range 2 32 if SBI_V01 && 32BIT
- range 2 64 if SBI_V01 && 64BIT
+ range 2 512 if !RISCV_SBI_V01
+ range 2 32 if RISCV_SBI_V01 && 32BIT
+ range 2 64 if RISCV_SBI_V01 && 64BIT
default "32" if 32BIT
default "64" if 64BIT
diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h
index 1b471ff73178..816e753de636 100644
--- a/arch/riscv/include/asm/asm.h
+++ b/arch/riscv/include/asm/asm.h
@@ -23,6 +23,7 @@
#define REG_L __REG_SEL(ld, lw)
#define REG_S __REG_SEL(sd, sw)
#define REG_SC __REG_SEL(sc.d, sc.w)
+#define REG_AMOSWAP_AQ __REG_SEL(amoswap.d.aq, amoswap.w.aq)
#define REG_ASM __REG_SEL(.dword, .word)
#define SZREG __REG_SEL(8, 4)
#define LGREG __REG_SEL(3, 2)
diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h
index f74879a8f1ea..e229d7be4b66 100644
--- a/arch/riscv/include/asm/efi.h
+++ b/arch/riscv/include/asm/efi.h
@@ -10,6 +10,7 @@
#include <asm/mmu_context.h>
#include <asm/ptrace.h>
#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
#ifdef CONFIG_EFI
extern void efi_init(void);
@@ -20,7 +21,10 @@ extern void efi_init(void);
int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
-#define arch_efi_call_virt_setup() efi_virtmap_load()
+#define arch_efi_call_virt_setup() ({ \
+ sync_kernel_mappings(efi_mm.pgd); \
+ efi_virtmap_load(); \
+ })
#define arch_efi_call_virt_teardown() efi_virtmap_unload()
#define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index 947f23d7b6af..59dc12b5b7e8 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -127,6 +127,13 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d)
#endif /* __PAGETABLE_PMD_FOLDED */
+static inline void sync_kernel_mappings(pgd_t *pgd)
+{
+ memcpy(pgd + USER_PTRS_PER_PGD,
+ init_mm.pgd + USER_PTRS_PER_PGD,
+ (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+}
+
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
@@ -135,9 +142,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
if (likely(pgd != NULL)) {
memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
/* Copy kernel mappings */
- memcpy(pgd + USER_PTRS_PER_PGD,
- init_mm.pgd + USER_PTRS_PER_PGD,
- (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+ sync_kernel_mappings(pgd);
}
return pgd;
}
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 7ec936910a96..92ec2d9d7273 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -600,6 +600,7 @@ static inline int pmd_dirty(pmd_t pmd)
return pte_dirty(pmd_pte(pmd));
}
+#define pmd_young pmd_young
static inline int pmd_young(pmd_t pmd)
{
return pte_young(pmd_pte(pmd));
diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
index d3443be7eedc..3831b638ecab 100644
--- a/arch/riscv/include/asm/smp.h
+++ b/arch/riscv/include/asm/smp.h
@@ -50,6 +50,9 @@ void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops);
/* Clear IPI for current CPU */
void riscv_clear_ipi(void);
+/* Check other CPUs stop or not */
+bool smp_crash_stop_failed(void);
+
/* Secondary hart entry */
asmlinkage void smp_callin(void);
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index b9eda3fcbd6d..186abd146eaf 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -404,6 +404,19 @@ handle_syscall_trace_exit:
#ifdef CONFIG_VMAP_STACK
handle_kernel_stack_overflow:
+ /*
+ * Takes the psuedo-spinlock for the shadow stack, in case multiple
+ * harts are concurrently overflowing their kernel stacks. We could
+ * store any value here, but since we're overflowing the kernel stack
+ * already we only have SP to use as a scratch register. So we just
+ * swap in the address of the spinlock, as that's definately non-zero.
+ *
+ * Pairs with a store_release in handle_bad_stack().
+ */
+1: la sp, spin_shadow_stack
+ REG_AMOSWAP_AQ sp, sp, (sp)
+ bnez sp, 1b
+
la sp, shadow_stack
addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE
diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c
index ee79e6839b86..2d139b724bc8 100644
--- a/arch/riscv/kernel/machine_kexec.c
+++ b/arch/riscv/kernel/machine_kexec.c
@@ -15,6 +15,8 @@
#include <linux/compiler.h> /* For unreachable() */
#include <linux/cpu.h> /* For cpu_down() */
#include <linux/reboot.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
/*
* kexec_image_info - Print received image details
@@ -138,20 +140,35 @@ void machine_shutdown(void)
#endif
}
-/* Override the weak function in kernel/panic.c */
-void crash_smp_send_stop(void)
+static void machine_kexec_mask_interrupts(void)
{
- static int cpus_stopped;
+ unsigned int i;
+ struct irq_desc *desc;
- /*
- * This function can be called twice in panic path, but obviously
- * we execute this only once.
- */
- if (cpus_stopped)
- return;
+ for_each_irq_desc(i, desc) {
+ struct irq_chip *chip;
+ int ret;
+
+ chip = irq_desc_get_chip(desc);
+ if (!chip)
+ continue;
+
+ /*
+ * First try to remove the active state. If this
+ * fails, try to EOI the interrupt.
+ */
+ ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
+
+ if (ret && irqd_irq_inprogress(&desc->irq_data) &&
+ chip->irq_eoi)
+ chip->irq_eoi(&desc->irq_data);
- smp_send_stop();
- cpus_stopped = 1;
+ if (chip->irq_mask)
+ chip->irq_mask(&desc->irq_data);
+
+ if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
+ chip->irq_disable(&desc->irq_data);
+ }
}
/*
@@ -169,6 +186,8 @@ machine_crash_shutdown(struct pt_regs *regs)
crash_smp_send_stop();
crash_save_cpu(regs, smp_processor_id());
+ machine_kexec_mask_interrupts();
+
pr_info("Starting crashdump kernel...\n");
}
@@ -195,6 +214,11 @@ machine_kexec(struct kimage *image)
void *control_code_buffer = page_address(image->control_code_page);
riscv_kexec_method kexec_method = NULL;
+#ifdef CONFIG_SMP
+ WARN(smp_crash_stop_failed(),
+ "Some CPUs may be stale, kdump will be unreliable.\n");
+#endif
+
if (image->type != KEXEC_TYPE_CRASH)
kexec_method = control_code_buffer;
else
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 67ec1fadcfe2..86acd690d529 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -322,10 +322,11 @@ subsys_initcall(topology_init);
void free_initmem(void)
{
- if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
- set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end),
- IS_ENABLED(CONFIG_64BIT) ?
- set_memory_rw : set_memory_rw_nx);
+ if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
+ set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), set_memory_rw_nx);
+ if (IS_ENABLED(CONFIG_64BIT))
+ set_kernel_memory(__init_begin, __init_end, set_memory_nx);
+ }
free_initmem_default(POISON_FREE_INITMEM);
}
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index 760a64518c58..8c3b59f1f9b8 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -12,6 +12,7 @@
#include <linux/clockchips.h>
#include <linux/interrupt.h>
#include <linux/module.h>
+#include <linux/kexec.h>
#include <linux/profile.h>
#include <linux/smp.h>
#include <linux/sched.h>
@@ -22,11 +23,13 @@
#include <asm/sbi.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
+#include <asm/cpu_ops.h>
enum ipi_message_type {
IPI_RESCHEDULE,
IPI_CALL_FUNC,
IPI_CPU_STOP,
+ IPI_CPU_CRASH_STOP,
IPI_IRQ_WORK,
IPI_TIMER,
IPI_MAX
@@ -71,6 +74,32 @@ static void ipi_stop(void)
wait_for_interrupt();
}
+#ifdef CONFIG_KEXEC_CORE
+static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0);
+
+static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
+{
+ crash_save_cpu(regs, cpu);
+
+ atomic_dec(&waiting_for_crash_ipi);
+
+ local_irq_disable();
+
+#ifdef CONFIG_HOTPLUG_CPU
+ if (cpu_has_hotplug(cpu))
+ cpu_ops[cpu]->cpu_stop();
+#endif
+
+ for(;;)
+ wait_for_interrupt();
+}
+#else
+static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
+{
+ unreachable();
+}
+#endif
+
static const struct riscv_ipi_ops *ipi_ops __ro_after_init;
void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops)
@@ -124,8 +153,9 @@ void arch_irq_work_raise(void)
void handle_IPI(struct pt_regs *regs)
{
- unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits;
- unsigned long *stats = ipi_data[smp_processor_id()].stats;
+ unsigned int cpu = smp_processor_id();
+ unsigned long *pending_ipis = &ipi_data[cpu].bits;
+ unsigned long *stats = ipi_data[cpu].stats;
riscv_clear_ipi();
@@ -154,6 +184,10 @@ void handle_IPI(struct pt_regs *regs)
ipi_stop();
}
+ if (ops & (1 << IPI_CPU_CRASH_STOP)) {
+ ipi_cpu_crash_stop(cpu, get_irq_regs());
+ }
+
if (ops & (1 << IPI_IRQ_WORK)) {
stats[IPI_IRQ_WORK]++;
irq_work_run();
@@ -176,6 +210,7 @@ static const char * const ipi_names[] = {
[IPI_RESCHEDULE] = "Rescheduling interrupts",
[IPI_CALL_FUNC] = "Function call interrupts",
[IPI_CPU_STOP] = "CPU stop interrupts",
+ [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts",
[IPI_IRQ_WORK] = "IRQ work interrupts",
[IPI_TIMER] = "Timer broadcast interrupts",
};
@@ -235,6 +270,64 @@ void smp_send_stop(void)
cpumask_pr_args(cpu_online_mask));
}
+#ifdef CONFIG_KEXEC_CORE
+/*
+ * The number of CPUs online, not counting this CPU (which may not be
+ * fully online and so not counted in num_online_cpus()).
+ */
+static inline unsigned int num_other_online_cpus(void)
+{
+ unsigned int this_cpu_online = cpu_online(smp_processor_id());
+
+ return num_online_cpus() - this_cpu_online;
+}
+
+void crash_smp_send_stop(void)
+{
+ static int cpus_stopped;
+ cpumask_t mask;
+ unsigned long timeout;
+
+ /*
+ * This function can be called twice in panic path, but obviously
+ * we execute this only once.
+ */
+ if (cpus_stopped)
+ return;
+
+ cpus_stopped = 1;
+
+ /*
+ * If this cpu is the only one alive at this point in time, online or
+ * not, there are no stop messages to be sent around, so just back out.
+ */
+ if (num_other_online_cpus() == 0)
+ return;
+
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+
+ atomic_set(&waiting_for_crash_ipi, num_other_online_cpus());
+
+ pr_crit("SMP: stopping secondary CPUs\n");
+ send_ipi_mask(&mask, IPI_CPU_CRASH_STOP);
+
+ /* Wait up to one second for other CPUs to stop */
+ timeout = USEC_PER_SEC;
+ while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--)
+ udelay(1);
+
+ if (atomic_read(&waiting_for_crash_ipi) > 0)
+ pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
+ cpumask_pr_args(&mask));
+}
+
+bool smp_crash_stop_failed(void)
+{
+ return (atomic_read(&waiting_for_crash_ipi) > 0);
+}
+#endif
+
void smp_send_reschedule(int cpu)
{
send_ipi_single(cpu, IPI_RESCHEDULE);
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index f3e96d60a2ff..7abd8e4c4df6 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -221,11 +221,29 @@ asmlinkage unsigned long get_overflow_stack(void)
OVERFLOW_STACK_SIZE;
}
+/*
+ * A pseudo spinlock to protect the shadow stack from being used by multiple
+ * harts concurrently. This isn't a real spinlock because the lock side must
+ * be taken without a valid stack and only a single register, it's only taken
+ * while in the process of panicing anyway so the performance and error
+ * checking a proper spinlock gives us doesn't matter.
+ */
+unsigned long spin_shadow_stack;
+
asmlinkage void handle_bad_stack(struct pt_regs *regs)
{
unsigned long tsk_stk = (unsigned long)current->stack;
unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
+ /*
+ * We're done with the shadow stack by this point, as we're on the
+ * overflow stack. Tell any other concurrent overflowing harts that
+ * they can proceed with panicing by releasing the pseudo-spinlock.
+ *
+ * This pairs with an amoswap.aq in handle_kernel_stack_overflow.
+ */
+ smp_store_release(&spin_shadow_stack, 0);
+
console_verbose();
pr_emerg("Insufficient stack space to handle exception!\n");
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index db6548509bb3..06e6b27f3bcc 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -17,6 +17,7 @@ vdso-syms += flush_icache
obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
ccflags-y := -fno-stack-protector
+ccflags-y += -DDISABLE_BRANCH_PROFILING
ifneq ($(c-gettimeofday-y),)
CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index f1cb9391190d..11e901286414 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -763,6 +763,7 @@ static inline int pmd_dirty(pmd_t pmd)
return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
}
+#define pmd_young pmd_young
static inline int pmd_young(pmd_t pmd)
{
return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 94138f8f0c1c..ace2541ababd 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -546,8 +546,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
scb_s->eca |= scb_o->eca & ECA_CEI;
/* Epoch Extension */
- if (test_kvm_facility(vcpu->kvm, 139))
+ if (test_kvm_facility(vcpu->kvm, 139)) {
scb_s->ecd |= scb_o->ecd & ECD_MEF;
+ scb_s->epdx = scb_o->epdx;
+ }
/* etoken */
if (test_kvm_facility(vcpu->kvm, 156))
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index a779418ceba9..3bc9736bddb1 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -693,6 +693,7 @@ static inline unsigned long pmd_dirty(pmd_t pmd)
return pte_dirty(pte);
}
+#define pmd_young pmd_young
static inline unsigned long pmd_young(pmd_t pmd)
{
pte_t pte = __pte(pmd_val(pmd));
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index c936ce9f0c47..dfdb103ae4f6 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -321,7 +321,7 @@ static inline void indirect_branch_prediction_barrier(void)
/* The Intel SPEC CTRL MSR base value cache */
extern u64 x86_spec_ctrl_base;
DECLARE_PER_CPU(u64, x86_spec_ctrl_current);
-extern void write_spec_ctrl_current(u64 val, bool force);
+extern void update_spec_ctrl_cond(u64 val);
extern u64 spec_ctrl_current(void);
/*
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5059799bebe3..286a71810f9e 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -139,6 +139,7 @@ static inline int pmd_dirty(pmd_t pmd)
return pmd_flags(pmd) & _PAGE_DIRTY;
}
+#define pmd_young pmd_young
static inline int pmd_young(pmd_t pmd)
{
return pmd_flags(pmd) & _PAGE_ACCESSED;
@@ -1438,6 +1439,14 @@ static inline bool arch_has_hw_pte_young(void)
return true;
}
+#ifdef CONFIG_XEN_PV
+#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young
+static inline bool arch_has_hw_nonleaf_pmd_young(void)
+{
+ return !cpu_feature_enabled(X86_FEATURE_XENPV);
+}
+#endif
+
#ifdef CONFIG_PAGE_TABLE_CHECK
static inline bool pte_user_accessible_page(pte_t pte)
{
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 3e3230cccaa7..6daf84229548 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -60,11 +60,18 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
static DEFINE_MUTEX(spec_ctrl_mutex);
+/* Update SPEC_CTRL MSR and its cached copy unconditionally */
+static void update_spec_ctrl(u64 val)
+{
+ this_cpu_write(x86_spec_ctrl_current, val);
+ wrmsrl(MSR_IA32_SPEC_CTRL, val);
+}
+
/*
* Keep track of the SPEC_CTRL MSR value for the current task, which may differ
* from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update().
*/
-void write_spec_ctrl_current(u64 val, bool force)
+void update_spec_ctrl_cond(u64 val)
{
if (this_cpu_read(x86_spec_ctrl_current) == val)
return;
@@ -75,7 +82,7 @@ void write_spec_ctrl_current(u64 val, bool force)
* When KERNEL_IBRS this MSR is written on return-to-user, unless
* forced the update can be delayed until that time.
*/
- if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS))
+ if (!cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS))
wrmsrl(MSR_IA32_SPEC_CTRL, val);
}
@@ -1328,7 +1335,7 @@ static void __init spec_ctrl_disable_kernel_rrsba(void)
if (ia32_cap & ARCH_CAP_RRSBA) {
x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
- write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ update_spec_ctrl(x86_spec_ctrl_base);
}
}
@@ -1450,7 +1457,7 @@ static void __init spectre_v2_select_mitigation(void)
if (spectre_v2_in_ibrs_mode(mode)) {
x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
- write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ update_spec_ctrl(x86_spec_ctrl_base);
}
switch (mode) {
@@ -1564,7 +1571,7 @@ static void __init spectre_v2_select_mitigation(void)
static void update_stibp_msr(void * __unused)
{
u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP);
- write_spec_ctrl_current(val, true);
+ update_spec_ctrl(val);
}
/* Update x86_spec_ctrl_base in case SMT state changed. */
@@ -1797,7 +1804,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
x86_amd_ssb_disable();
} else {
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
- write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ update_spec_ctrl(x86_spec_ctrl_base);
}
}
@@ -2048,7 +2055,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
void x86_spec_ctrl_setup_ap(void)
{
if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
- write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ update_spec_ctrl(x86_spec_ctrl_base);
if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
x86_amd_ssb_disable();
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c21b7347a26d..e436c9c1ef3b 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
}
if (updmsr)
- write_spec_ctrl_current(msr, false);
+ update_spec_ctrl_cond(msr);
}
static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2835bd796639..69227f77b201 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10574,8 +10574,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
vcpu->mmio_needed = 0;
r = 0;
+ goto out;
}
- goto out;
}
if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
/* Page is swapped out. Do synthetic halt */
diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
index 23f49a2f4d14..6cceca64a6bc 100644
--- a/drivers/acpi/numa/hmat.c
+++ b/drivers/acpi/numa/hmat.c
@@ -562,17 +562,26 @@ static int initiator_cmp(void *priv, const struct list_head *a,
{
struct memory_initiator *ia;
struct memory_initiator *ib;
- unsigned long *p_nodes = priv;
ia = list_entry(a, struct memory_initiator, node);
ib = list_entry(b, struct memory_initiator, node);
- set_bit(ia->processor_pxm, p_nodes);
- set_bit(ib->processor_pxm, p_nodes);
-
return ia->processor_pxm - ib->processor_pxm;
}
+static int initiators_to_nodemask(unsigned long *p_nodes)
+{
+ struct memory_initiator *initiator;
+
+ if (list_empty(&initiators))
+ return -ENXIO;
+
+ list_for_each_entry(initiator, &initiators, node)
+ set_bit(initiator->processor_pxm, p_nodes);
+
+ return 0;
+}
+
static void hmat_register_target_initiators(struct memory_target *target)
{
static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);
@@ -609,7 +618,10 @@ static void hmat_register_target_initiators(struct memory_target *target)
* initiators.
*/
bitmap_zero(p_nodes, MAX_NUMNODES);
- list_sort(p_nodes, &initiators, initiator_cmp);
+ list_sort(NULL, &initiators, initiator_cmp);
+ if (initiators_to_nodemask(p_nodes) < 0)
+ return;
+
if (!access0done) {
for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) {
loc = localities_types[i];
@@ -643,8 +655,9 @@ static void hmat_register_target_initiators(struct memory_target *target)
/* Access 1 ignores Generic Initiators */
bitmap_zero(p_nodes, MAX_NUMNODES);
- list_sort(p_nodes, &initiators, initiator_cmp);
- best = 0;
+ if (initiators_to_nodemask(p_nodes) < 0)
+ return;
+
for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) {
loc = localities_types[i];
if (!loc)
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index ddf17e2d266c..b9e336bacf17 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -109,7 +109,7 @@ struct clk *ahci_platform_find_clk(struct ahci_host_priv *hpriv, const char *con
int i;
for (i = 0; i < hpriv->n_clks; i++) {
- if (!strcmp(hpriv->clks[i].id, con_id))
+ if (hpriv->clks[i].id && !strcmp(hpriv->clks[i].id, con_id))
return hpriv->clks[i].clk;
}
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 271963805a38..f05018988a17 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2056,6 +2056,11 @@ static int btusb_setup_csr(struct hci_dev *hdev)
rp = (struct hci_rp_read_local_version *)skb->data;
+ bt_dev_info(hdev, "CSR: Setting up dongle with HCI ver=%u rev=%04x; LMP ver=%u subver=%04x; manufacturer=%u",
+ le16_to_cpu(rp->hci_ver), le16_to_cpu(rp->hci_rev),
+ le16_to_cpu(rp->lmp_ver), le16_to_cpu(rp->lmp_subver),
+ le16_to_cpu(rp->manufacturer));
+
/* Detect a wide host of Chinese controllers that aren't CSR.
*
* Known fake bcdDevices: 0x0100, 0x0134, 0x1915, 0x2520, 0x7558, 0x8891
@@ -2118,6 +2123,7 @@ static int btusb_setup_csr(struct hci_dev *hdev)
* without these the controller will lock up.
*/
set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks);
+ set_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks);
set_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks);
set_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks);
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 1621ce818705..d69905233aff 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -401,13 +401,14 @@ int tpm_pm_suspend(struct device *dev)
!pm_suspend_via_firmware())
goto suspended;
- if (!tpm_chip_start(chip)) {
+ rc = tpm_try_get_ops(chip);
+ if (!rc) {
if (chip->flags & TPM_CHIP_FLAG_TPM2)
tpm2_shutdown(chip, TPM2_SU_STATE);
else
rc = tpm1_pm_suspend(chip, tpm_suspend_pcr);
- tpm_chip_stop(chip);
+ tpm_put_ops(chip);
}
suspended:
diff --git a/drivers/clk/at91/at91rm9200.c b/drivers/clk/at91/at91rm9200.c
index b174f727a8ef..16870943a13e 100644
--- a/drivers/clk/at91/at91rm9200.c
+++ b/drivers/clk/at91/at91rm9200.c
@@ -40,7 +40,7 @@ static const struct clk_pll_characteristics rm9200_pll_characteristics = {
};
static const struct sck at91rm9200_systemck[] = {
- { .n = "udpck", .p = "usbck", .id = 2 },
+ { .n = "udpck", .p = "usbck", .id = 1 },
{ .n = "uhpck", .p = "usbck", .id = 4 },
{ .n = "pck0", .p = "prog0", .id = 8 },
{ .n = "pck1", .p = "prog1", .id = 9 },
diff --git a/drivers/clk/qcom/gcc-sc8280xp.c b/drivers/clk/qcom/gcc-sc8280xp.c
index a18ed88f3b82..b3198784e1c3 100644
--- a/drivers/clk/qcom/gcc-sc8280xp.c
+++ b/drivers/clk/qcom/gcc-sc8280xp.c
@@ -5364,6 +5364,8 @@ static struct clk_branch gcc_ufs_1_card_clkref_clk = {
.enable_mask = BIT(0),
.hw.init = &(const struct clk_init_data) {
.name = "gcc_ufs_1_card_clkref_clk",
+ .parent_data = &gcc_parent_data_tcxo,
+ .num_parents = 1,
.ops = &clk_branch2_ops,
},
},
@@ -5432,6 +5434,8 @@ static struct clk_branch gcc_ufs_card_clkref_clk = {
.enable_mask = BIT(0),
.hw.init = &(const struct clk_init_data) {
.name = "gcc_ufs_card_clkref_clk",
+ .parent_data = &gcc_parent_data_tcxo,
+ .num_parents = 1,
.ops = &clk_branch2_ops,
},
},
@@ -5848,6 +5852,8 @@ static struct clk_branch gcc_ufs_ref_clkref_clk = {
.enable_mask = BIT(0),
.hw.init = &(const struct clk_init_data) {
.name = "gcc_ufs_ref_clkref_clk",
+ .parent_data = &gcc_parent_data_tcxo,
+ .num_parents = 1,
.ops = &clk_branch2_ops,
},
},
diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c
index 7cf5e130e92f..0f21a8a767ac 100644
--- a/drivers/clk/qcom/gdsc.c
+++ b/drivers/clk/qcom/gdsc.c
@@ -11,7 +11,6 @@
#include <linux/kernel.h>
#include <linux/ktime.h>
#include <linux/pm_domain.h>
-#include <linux/pm_runtime.h>
#include <linux/regmap.h>
#include <linux/regulator/consumer.h>
#include <linux/reset-controller.h>
@@ -56,22 +55,6 @@ enum gdsc_status {
GDSC_ON
};
-static int gdsc_pm_runtime_get(struct gdsc *sc)
-{
- if (!sc->dev)
- return 0;
-
- return pm_runtime_resume_and_get(sc->dev);
-}
-
-static int gdsc_pm_runtime_put(struct gdsc *sc)
-{
- if (!sc->dev)
- return 0;
-
- return pm_runtime_put_sync(sc->dev);
-}
-
/* Returns 1 if GDSC status is status, 0 if not, and < 0 on error */
static int gdsc_check_status(struct gdsc *sc, enum gdsc_status status)
{
@@ -271,8 +254,9 @@ static void gdsc_retain_ff_on(struct gdsc *sc)
regmap_update_bits(sc->regmap, sc->gdscr, mask, mask);
}
-static int _gdsc_enable(struct gdsc *sc)
+static int gdsc_enable(struct generic_pm_domain *domain)
{
+ struct gdsc *sc = domain_to_gdsc(domain);
int ret;
if (sc->pwrsts == PWRSTS_ON)
@@ -328,22 +312,11 @@ static int _gdsc_enable(struct gdsc *sc)
return 0;
}
-static int gdsc_enable(struct generic_pm_domain *domain)
+static int gdsc_disable(struct generic_pm_domain *domain)
{
struct gdsc *sc = domain_to_gdsc(domain);
int ret;
- ret = gdsc_pm_runtime_get(sc);
- if (ret)
- return ret;
-
- return _gdsc_enable(sc);
-}
-
-static int _gdsc_disable(struct gdsc *sc)
-{
- int ret;
-
if (sc->pwrsts == PWRSTS_ON)
return gdsc_assert_reset(sc);
@@ -388,18 +361,6 @@ static int _gdsc_disable(struct gdsc *sc)
return 0;
}
-static int gdsc_disable(struct generic_pm_domain *domain)
-{
- struct gdsc *sc = domain_to_gdsc(domain);
- int ret;
-
- ret = _gdsc_disable(sc);
-
- gdsc_pm_runtime_put(sc);
-
- return ret;
-}
-
static int gdsc_init(struct gdsc *sc)
{
u32 mask, val;
@@ -447,11 +408,6 @@ static int gdsc_init(struct gdsc *sc)
return ret;
}
- /* ...and the power-domain */
- ret = gdsc_pm_runtime_get(sc);
- if (ret)
- goto err_disable_supply;
-
/*
* Votable GDSCs can be ON due to Vote from other masters.
* If a Votable GDSC is ON, make sure we have a Vote.
@@ -459,14 +415,14 @@ static int gdsc_init(struct gdsc *sc)
if (sc->flags & VOTABLE) {
ret = gdsc_update_collapse_bit(sc, false);
if (ret)
- goto err_put_rpm;
+ goto err_disable_supply;
}
/* Turn on HW trigger mode if supported */
if (sc->flags & HW_CTRL) {
ret = gdsc_hwctrl(sc, true);
if (ret < 0)
- goto err_put_rpm;
+ goto err_disable_supply;
}
/*
@@ -496,13 +452,10 @@ static int gdsc_init(struct gdsc *sc)
ret = pm_genpd_init(&sc->pd, NULL, !on);
if (ret)
- goto err_put_rpm;
+ goto err_disable_supply;
return 0;
-err_put_rpm:
- if (on)
- gdsc_pm_runtime_put(sc);
err_disable_supply:
if (on && sc->rsupply)
regulator_disable(sc->rsupply);
@@ -541,8 +494,6 @@ int gdsc_register(struct gdsc_desc *desc,
for (i = 0; i < num; i++) {
if (!scs[i])
continue;
- if (pm_runtime_enabled(dev))
- scs[i]->dev = dev;
scs[i]->regmap = regmap;
scs[i]->rcdev = rcdev;
ret = gdsc_init(scs[i]);
diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h
index 981a12c8502d..803512688336 100644
--- a/drivers/clk/qcom/gdsc.h
+++ b/drivers/clk/qcom/gdsc.h
@@ -30,7 +30,6 @@ struct reset_controller_dev;
* @resets: ids of resets associated with this gdsc
* @reset_count: number of @resets
* @rcdev: reset controller
- * @dev: the device holding the GDSC, used for pm_runtime calls
*/
struct gdsc {
struct generic_pm_domain pd;
@@ -74,7 +73,6 @@ struct gdsc {
const char *supply;
struct regulator *rsupply;
- struct device *dev;
};
struct gdsc_desc {
diff --git a/drivers/clk/samsung/clk-exynos-clkout.c b/drivers/clk/samsung/clk-exynos-clkout.c
index 273f77d54dab..e6d6cbf8c4e6 100644
--- a/drivers/clk/samsung/clk-exynos-clkout.c
+++ b/drivers/clk/samsung/clk-exynos-clkout.c
@@ -81,17 +81,19 @@ MODULE_DEVICE_TABLE(of, exynos_clkout_ids);
static int exynos_clkout_match_parent_dev(struct device *dev, u32 *mux_mask)
{
const struct exynos_clkout_variant *variant;
+ const struct of_device_id *match;
if (!dev->parent) {
dev_err(dev, "not instantiated from MFD\n");
return -EINVAL;
}
- variant = of_device_get_match_data(dev->parent);
- if (!variant) {
+ match = of_match_device(exynos_clkout_ids, dev->parent);
+ if (!match) {
dev_err(dev, "cannot match parent device\n");
return -EINVAL;
}
+ variant = match->data;
*mux_mask = variant->mux_mask;
diff --git a/drivers/clk/samsung/clk-exynos7885.c b/drivers/clk/samsung/clk-exynos7885.c
index 62ce6814f141..0d2a950ed184 100644
--- a/drivers/clk/samsung/clk-exynos7885.c
+++ b/drivers/clk/samsung/clk-exynos7885.c
@@ -231,7 +231,7 @@ static const struct samsung_div_clock top_div_clks[] __initconst = {
CLK_CON_DIV_PLL_SHARED0_DIV2, 0, 1),
DIV(CLK_DOUT_SHARED0_DIV3, "dout_shared0_div3", "fout_shared0_pll",
CLK_CON_DIV_PLL_SHARED0_DIV3, 0, 2),
- DIV(CLK_DOUT_SHARED0_DIV4, "dout_shared0_div4", "fout_shared0_pll",
+ DIV(CLK_DOUT_SHARED0_DIV4, "dout_shared0_div4", "dout_shared0_div2",
CLK_CON_DIV_PLL_SHARED0_DIV4, 0, 1),
DIV(CLK_DOUT_SHARED0_DIV5, "dout_shared0_div5", "fout_shared0_pll",
CLK_CON_DIV_PLL_SHARED0_DIV5, 0, 3),
@@ -239,7 +239,7 @@ static const struct samsung_div_clock top_div_clks[] __initconst = {
CLK_CON_DIV_PLL_SHARED1_DIV2, 0, 1),
DIV(CLK_DOUT_SHARED1_DIV3, "dout_shared1_div3", "fout_shared1_pll",
CLK_CON_DIV_PLL_SHARED1_DIV3, 0, 2),
- DIV(CLK_DOUT_SHARED1_DIV4, "dout_shared1_div4", "fout_shared1_pll",
+ DIV(CLK_DOUT_SHARED1_DIV4, "dout_shared1_div4", "dout_shared1_div2",
CLK_CON_DIV_PLL_SHARED1_DIV4, 0, 1),
/* CORE */
diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c
index 969a552da8d2..a0d66fabf073 100644
--- a/drivers/clocksource/timer-riscv.c
+++ b/drivers/clocksource/timer-riscv.c
@@ -51,7 +51,7 @@ static int riscv_clock_next_event(unsigned long delta,
static unsigned int riscv_clock_event_irq;
static DEFINE_PER_CPU(struct clock_event_device, riscv_clock_event) = {
.name = "riscv_timer_clockevent",
- .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP,
+ .features = CLOCK_EVT_FEAT_ONESHOT,
.rating = 100,
.set_next_event = riscv_clock_next_event,
};
diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
index 97086fab698e..903325aac991 100644
--- a/drivers/dax/hmem/device.c
+++ b/drivers/dax/hmem/device.c
@@ -8,6 +8,13 @@
static bool nohmem;
module_param_named(disable, nohmem, bool, 0444);
+static struct resource hmem_active = {
+ .name = "HMEM devices",
+ .start = 0,
+ .end = -1,
+ .flags = IORESOURCE_MEM,
+};
+
void hmem_register_device(int target_nid, struct resource *r)
{
/* define a clean / non-busy resource for the platform device */
@@ -41,6 +48,12 @@ void hmem_register_device(int target_nid, struct resource *r)
goto out_pdev;
}
+ if (!__request_region(&hmem_active, res.start, resource_size(&res),
+ dev_name(&pdev->dev), 0)) {
+ dev_dbg(&pdev->dev, "hmem range %pr already active\n", &res);
+ goto out_active;
+ }
+
pdev->dev.numa_node = numa_map_to_online_node(target_nid);
info = (struct memregion_info) {
.target_node = target_nid,
@@ -66,6 +79,8 @@ void hmem_register_device(int target_nid, struct resource *r)
return;
out_resource:
+ __release_region(&hmem_active, res.start, resource_size(&res));
+out_active:
platform_device_put(pdev);
out_pdev:
memregion_free(id);
@@ -73,15 +88,6 @@ out_pdev:
static __init int hmem_register_one(struct resource *res, void *data)
{
- /*
- * If the resource is not a top-level resource it was already
- * assigned to a device by the HMAT parsing.
- */
- if (res->parent != &iomem_resource) {
- pr_info("HMEM: skip %pr, already claimed\n", res);
- return 0;
- }
-
hmem_register_device(phys_to_target_node(res->start), res);
return 0;
diff --git a/drivers/gpio/gpio-amd8111.c b/drivers/gpio/gpio-amd8111.c
index 14e6b3e64add..6f3ded619c8b 100644
--- a/drivers/gpio/gpio-amd8111.c
+++ b/drivers/gpio/gpio-amd8111.c
@@ -226,7 +226,10 @@ found:
ioport_unmap(gp.pm);
goto out;
}
+ return 0;
+
out:
+ pci_dev_put(pdev);
return err;
}
@@ -234,6 +237,7 @@ static void __exit amd_gpio_exit(void)
{
gpiochip_remove(&gp.chip);
ioport_unmap(gp.pm);
+ pci_dev_put(gp.pdev);
}
module_init(amd_gpio_init);
diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c
index 870910bb9dd3..200e43a6f4b4 100644
--- a/drivers/gpio/gpio-rockchip.c
+++ b/drivers/gpio/gpio-rockchip.c
@@ -610,6 +610,7 @@ static int rockchip_gpiolib_register(struct rockchip_pin_bank *bank)
return -ENODATA;
pctldev = of_pinctrl_get(pctlnp);
+ of_node_put(pctlnp);
if (!pctldev)
return -ENODEV;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 4756ea08894f..a70522aef355 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -526,12 +526,13 @@ static int gpiochip_setup_dev(struct gpio_device *gdev)
if (ret)
return ret;
+ /* From this point, the .release() function cleans up gpio_device */
+ gdev->dev.release = gpiodevice_release;
+
ret = gpiochip_sysfs_register(gdev);
if (ret)
goto err_remove_device;
- /* From this point, the .release() function cleans up gpio_device */
- gdev->dev.release = gpiodevice_release;
dev_dbg(&gdev->dev, "registered GPIOs %d to %d on %s\n", gdev->base,
gdev->base + gdev->ngpio - 1, gdev->chip->label ? : "generic");
@@ -597,10 +598,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
struct fwnode_handle *fwnode = NULL;
struct gpio_device *gdev;
unsigned long flags;
- int base = gc->base;
unsigned int i;
+ u32 ngpios = 0;
+ int base = 0;
int ret = 0;
- u32 ngpios;
if (gc->fwnode)
fwnode = gc->fwnode;
@@ -647,17 +648,12 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
else
gdev->owner = THIS_MODULE;
- gdev->descs = kcalloc(gc->ngpio, sizeof(gdev->descs[0]), GFP_KERNEL);
- if (!gdev->descs) {
- ret = -ENOMEM;
- goto err_free_dev_name;
- }
-
/*
* Try the device properties if the driver didn't supply the number
* of GPIO lines.
*/
- if (gc->ngpio == 0) {
+ ngpios = gc->ngpio;
+ if (ngpios == 0) {
ret = device_property_read_u32(&gdev->dev, "ngpios", &ngpios);
if (ret == -ENODATA)
/*
@@ -668,7 +664,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
*/
ngpios = 0;
else if (ret)
- goto err_free_descs;
+ goto err_free_dev_name;
gc->ngpio = ngpios;
}
@@ -676,13 +672,19 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
if (gc->ngpio == 0) {
chip_err(gc, "tried to insert a GPIO chip with zero lines\n");
ret = -EINVAL;
- goto err_free_descs;
+ goto err_free_dev_name;
}
if (gc->ngpio > FASTPATH_NGPIO)
chip_warn(gc, "line cnt %u is greater than fast path cnt %u\n",
gc->ngpio, FASTPATH_NGPIO);
+ gdev->descs = kcalloc(gc->ngpio, sizeof(*gdev->descs), GFP_KERNEL);
+ if (!gdev->descs) {
+ ret = -ENOMEM;
+ goto err_free_dev_name;
+ }
+
gdev->label = kstrdup_const(gc->label ?: "unknown", GFP_KERNEL);
if (!gdev->label) {
ret = -ENOMEM;
@@ -701,11 +703,13 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
* it may be a pipe dream. It will not happen before we get rid
* of the sysfs interface anyways.
*/
+ base = gc->base;
if (base < 0) {
base = gpiochip_find_base(gc->ngpio);
if (base < 0) {
- ret = base;
spin_unlock_irqrestore(&gpio_lock, flags);
+ ret = base;
+ base = 0;
goto err_free_label;
}
/*
@@ -816,6 +820,11 @@ err_remove_of_chip:
err_free_gpiochip_mask:
gpiochip_remove_pin_ranges(gc);
gpiochip_free_valid_mask(gc);
+ if (gdev->dev.release) {
+ /* release() has been registered by gpiochip_setup_dev() */
+ put_device(&gdev->dev);
+ goto err_print_message;
+ }
err_remove_from_list:
spin_lock_irqsave(&gpio_lock, flags);
list_del(&gdev->list);
@@ -829,13 +838,14 @@ err_free_dev_name:
err_free_ida:
ida_free(&gpio_ida, gdev->id);
err_free_gdev:
+ kfree(gdev);
+err_print_message:
/* failures here can mean systems won't boot... */
if (ret != -EPROBE_DEFER) {
pr_err("%s: GPIOs %d..%d (%s) failed to register, %d\n", __func__,
- gdev->base, gdev->base + gdev->ngpio - 1,
+ base, base + (int)ngpios - 1,
gc->label ? : "generic", ret);
}
- kfree(gdev);
return ret;
}
EXPORT_SYMBOL_GPL(gpiochip_add_data_with_key);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 0b52af415b28..ce64ca1c6e66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -156,6 +156,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
break;
case IP_VERSION(3, 0, 2):
fw_name = FIRMWARE_VANGOGH;
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
+ adev->vcn.indirect_sram = true;
break;
case IP_VERSION(3, 0, 16):
fw_name = FIRMWARE_DIMGREY_CAVEFISH;
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 6925e0280dbe..f4f3d2665a6b 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -5,6 +5,7 @@ menu "Display Engine Configuration"
config DRM_AMD_DC
bool "AMD DC - Enable new display engine"
default y
+ depends on BROKEN || !CC_IS_CLANG || X86_64 || SPARC64 || ARM64
select SND_HDA_COMPONENT if SND_HDA_CORE
select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128)
help
@@ -12,6 +13,12 @@ config DRM_AMD_DC
support for AMDGPU. This adds required support for Vega and
Raven ASICs.
+ calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64)
+ architectures built with Clang (all released versions), whereby the stack
+ frame gets blown up to well over 5k. This would cause an immediate kernel
+ panic on most architectures. We'll revert this when the following bug report
+ has been resolved: https://github.com/llvm/llvm-project/issues/41896.
+
config DRM_AMD_DC_DCN
def_bool n
help
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 461c62c88413..de77054195c6 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -3723,12 +3723,16 @@ out:
static u8 bigjoiner_pipes(struct drm_i915_private *i915)
{
+ u8 pipes;
+
if (DISPLAY_VER(i915) >= 12)
- return BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D);
+ pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D);
else if (DISPLAY_VER(i915) >= 11)
- return BIT(PIPE_B) | BIT(PIPE_C);
+ pipes = BIT(PIPE_B) | BIT(PIPE_C);
else
- return 0;
+ pipes = 0;
+
+ return pipes & RUNTIME_INFO(i915)->pipe_mask;
}
static bool transcoder_ddi_func_is_enabled(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index d0b03a928b9a..7caa3412a244 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -625,8 +625,13 @@ int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
return -EINTR;
}
- return timeout ? timeout : intel_uc_wait_for_idle(&gt->uc,
- remaining_timeout);
+ if (timeout)
+ return timeout;
+
+ if (remaining_timeout < 0)
+ remaining_timeout = 0;
+
+ return intel_uc_wait_for_idle(&gt->uc, remaining_timeout);
}
int intel_gt_init(struct intel_gt *gt)
@@ -1017,6 +1022,11 @@ static void mmio_invalidate_full(struct intel_gt *gt)
if (!i915_mmio_reg_offset(rb.reg))
continue;
+ if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS ||
+ engine->class == VIDEO_ENHANCEMENT_CLASS ||
+ engine->class == COMPUTE_CLASS))
+ rb.bit = _MASKED_BIT_ENABLE(rb.bit);
+
intel_uncore_write_fw(uncore, rb.reg, rb.bit);
awake |= engine->mask;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index edb881d75630..1dfd01668c79 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -199,7 +199,7 @@ out_active: spin_lock(&timelines->lock);
if (remaining_timeout)
*remaining_timeout = timeout;
- return active_count ? timeout : 0;
+ return active_count ? timeout ?: -ETIME : 0;
}
static void retire_work_handler(struct work_struct *work)
diff --git a/drivers/gpu/drm/i915/intel_dram.c b/drivers/gpu/drm/i915/intel_dram.c
index 2403ccd52c74..bba8cb6e8ae4 100644
--- a/drivers/gpu/drm/i915/intel_dram.c
+++ b/drivers/gpu/drm/i915/intel_dram.c
@@ -471,8 +471,7 @@ static int xelpdp_get_dram_info(struct drm_i915_private *i915)
u32 val = intel_uncore_read(&i915->uncore, MTL_MEM_SS_INFO_GLOBAL);
struct dram_info *dram_info = &i915->dram_info;
- val = REG_FIELD_GET(MTL_DDR_TYPE_MASK, val);
- switch (val) {
+ switch (REG_FIELD_GET(MTL_DDR_TYPE_MASK, val)) {
case 0:
dram_info->type = INTEL_DRAM_DDR4;
break;
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 9c1d31f63f85..bd47628da6be 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1315,6 +1315,9 @@ static s32 snto32(__u32 value, unsigned n)
if (!value || !n)
return 0;
+ if (n > 32)
+ n = 32;
+
switch (n) {
case 8: return ((__s8)value);
case 16: return ((__s16)value);
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index dad953f66996..8f58c3c1bec3 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -274,6 +274,7 @@
#define USB_DEVICE_ID_CH_AXIS_295 0x001c
#define USB_VENDOR_ID_CHERRY 0x046a
+#define USB_DEVICE_ID_CHERRY_MOUSE_000C 0x000c
#define USB_DEVICE_ID_CHERRY_CYMOTION 0x0023
#define USB_DEVICE_ID_CHERRY_CYMOTION_SOLAR 0x0027
@@ -917,6 +918,7 @@
#define USB_DEVICE_ID_MS_XBOX_ONE_S_CONTROLLER 0x02fd
#define USB_DEVICE_ID_MS_PIXART_MOUSE 0x00cb
#define USB_DEVICE_ID_8BITDO_SN30_PRO_PLUS 0x02e0
+#define USB_DEVICE_ID_MS_MOUSE_0783 0x0783
#define USB_VENDOR_ID_MOJO 0x8282
#define USB_DEVICE_ID_RETRO_ADAPTER 0x3201
@@ -1215,6 +1217,7 @@
#define USB_DEVICE_ID_SYNAPTICS_DELL_K15A 0x6e21
#define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1002 0x73f4
#define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003 0x73f5
+#define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_017 0x73f6
#define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5 0x81a7
#define USB_VENDOR_ID_TEXAS_INSTRUMENTS 0x2047
@@ -1381,6 +1384,7 @@
#define USB_VENDOR_ID_PRIMAX 0x0461
#define USB_DEVICE_ID_PRIMAX_MOUSE_4D22 0x4d22
+#define USB_DEVICE_ID_PRIMAX_MOUSE_4E2A 0x4e2a
#define USB_DEVICE_ID_PRIMAX_KEYBOARD 0x4e05
#define USB_DEVICE_ID_PRIMAX_REZEL 0x4e72
#define USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D0F 0x4d0f
diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
index 430fa4f52ed3..75ebfcf31889 100644
--- a/drivers/hid/hid-ite.c
+++ b/drivers/hid/hid-ite.c
@@ -121,6 +121,11 @@ static const struct hid_device_id ite_devices[] = {
USB_VENDOR_ID_SYNAPTICS,
USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003),
.driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT },
+ /* ITE8910 USB kbd ctlr, with Synaptics touchpad connected to it. */
+ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
+ USB_VENDOR_ID_SYNAPTICS,
+ USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_017),
+ .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT },
{ }
};
MODULE_DEVICE_TABLE(hid, ite_devices);
diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c
index 5e6a0cef2a06..e3fcf1353fb3 100644
--- a/drivers/hid/hid-lg4ff.c
+++ b/drivers/hid/hid-lg4ff.c
@@ -872,6 +872,12 @@ static ssize_t lg4ff_alternate_modes_store(struct device *dev, struct device_att
return -ENOMEM;
i = strlen(lbuf);
+
+ if (i == 0) {
+ kfree(lbuf);
+ return -EINVAL;
+ }
+
if (lbuf[i-1] == '\n') {
if (i == 1) {
kfree(lbuf);
diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index 71a9c258a20b..8a2aac18dcc5 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -4269,21 +4269,6 @@ static void hidpp_remove(struct hid_device *hdev)
mutex_destroy(&hidpp->send_mutex);
}
-static const struct hid_device_id unhandled_hidpp_devices[] = {
- /* Logitech Harmony Adapter for PS3, handled in hid-sony */
- { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_PS3) },
- /* Handled in hid-generic */
- { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD) },
- {}
-};
-
-static bool hidpp_match(struct hid_device *hdev,
- bool ignore_special_driver)
-{
- /* Refuse to handle devices handled by other HID drivers */
- return !hid_match_id(hdev, unhandled_hidpp_devices);
-}
-
#define LDJ_DEVICE(product) \
HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_DJ_DEVICE, \
USB_VENDOR_ID_LOGITECH, (product))
@@ -4367,9 +4352,15 @@ static const struct hid_device_id hidpp_devices[] = {
{ /* MX5500 keyboard over Bluetooth */
HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb30b),
.driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS },
-
- { /* And try to enable HID++ for all the Logitech Bluetooth devices */
- HID_DEVICE(BUS_BLUETOOTH, HID_GROUP_ANY, USB_VENDOR_ID_LOGITECH, HID_ANY_ID) },
+ { /* M-RCQ142 V470 Cordless Laser Mouse over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb008) },
+ { /* MX Master mouse over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb012) },
+ { /* MX Ergo trackball over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01d) },
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01e) },
+ { /* MX Master 3 mouse over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb023) },
{}
};
@@ -4383,7 +4374,6 @@ static const struct hid_usage_id hidpp_usages[] = {
static struct hid_driver hidpp_driver = {
.name = "logitech-hidpp-device",
.id_table = hidpp_devices,
- .match = hidpp_match,
.report_fixup = hidpp_report_fixup,
.probe = hidpp_probe,
.remove = hidpp_remove,
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index 50e1c717fc0a..0e9702c7f7d6 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -54,6 +54,7 @@ static const struct hid_device_id hid_quirks[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FLIGHT_SIM_YOKE), HID_QUIRK_NOGET },
{ HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_PEDALS), HID_QUIRK_NOGET },
{ HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_THROTTLE), HID_QUIRK_NOGET },
+ { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_MOUSE_000C), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB), HID_QUIRK_NO_INIT_REPORTS },
{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE), HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB), HID_QUIRK_NO_INIT_REPORTS },
@@ -122,6 +123,7 @@ static const struct hid_device_id hid_quirks[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C05A), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C06A), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_MCS, USB_DEVICE_ID_MCS_GAMEPADBLOCK), HID_QUIRK_MULTI_INPUT },
+ { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_MOUSE_0783), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PIXART_MOUSE), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER), HID_QUIRK_NO_INIT_REPORTS },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE3_COVER), HID_QUIRK_NO_INIT_REPORTS },
@@ -146,6 +148,7 @@ static const struct hid_device_id hid_quirks[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN), HID_QUIRK_NO_INIT_REPORTS },
{ HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_MOUSE_4D22), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_MOUSE_4E2A), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D0F), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D65), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4E22), HID_QUIRK_ALWAYS_POLL },
diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c
index 0fbc408c2607..7fa6fe04f1b2 100644
--- a/drivers/hid/hid-uclogic-core.c
+++ b/drivers/hid/hid-uclogic-core.c
@@ -192,6 +192,7 @@ static int uclogic_probe(struct hid_device *hdev,
* than the pen, so use QUIRK_MULTI_INPUT for all tablets.
*/
hdev->quirks |= HID_QUIRK_MULTI_INPUT;
+ hdev->quirks |= HID_QUIRK_HIDINPUT_FORCE;
/* Allocate and assign driver data */
drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL);
diff --git a/drivers/hid/hid-uclogic-rdesc.c b/drivers/hid/hid-uclogic-rdesc.c
index 4bd54c4fb5b0..6b73eb0df6bd 100644
--- a/drivers/hid/hid-uclogic-rdesc.c
+++ b/drivers/hid/hid-uclogic-rdesc.c
@@ -1193,7 +1193,7 @@ __u8 *uclogic_rdesc_template_apply(const __u8 *template_ptr,
p[sizeof(btn_head)] < param_num) {
v = param_list[p[sizeof(btn_head)]];
put_unaligned((__u8)0x2A, p); /* Usage Maximum */
- put_unaligned_le16((__force u16)cpu_to_le16(v), p + 1);
+ put_unaligned((__force u16)cpu_to_le16(v), (s16 *)(p + 1));
p += sizeof(btn_head) + 1;
} else {
p++;
diff --git a/drivers/hid/i2c-hid/Kconfig b/drivers/hid/i2c-hid/Kconfig
index 5273ee2bb134..d65abe65ce73 100644
--- a/drivers/hid/i2c-hid/Kconfig
+++ b/drivers/hid/i2c-hid/Kconfig
@@ -66,6 +66,6 @@ endmenu
config I2C_HID_CORE
tristate
- default y if I2C_HID_ACPI=y || I2C_HID_OF=y || I2C_HID_OF_GOODIX=y
- default m if I2C_HID_ACPI=m || I2C_HID_OF=m || I2C_HID_OF_GOODIX=m
+ default y if I2C_HID_ACPI=y || I2C_HID_OF=y || I2C_HID_OF_ELAN=y || I2C_HID_OF_GOODIX=y
+ default m if I2C_HID_ACPI=m || I2C_HID_OF=m || I2C_HID_OF_ELAN=m || I2C_HID_OF_GOODIX=m
select HID
diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c
index 81e688975c6a..a901e4e33d81 100644
--- a/drivers/hwmon/asus-ec-sensors.c
+++ b/drivers/hwmon/asus-ec-sensors.c
@@ -938,6 +938,8 @@ static int asus_ec_probe(struct platform_device *pdev)
ec_data->nr_sensors = hweight_long(ec_data->board_info->sensors);
ec_data->sensors = devm_kcalloc(dev, ec_data->nr_sensors,
sizeof(struct ec_sensor), GFP_KERNEL);
+ if (!ec_data->sensors)
+ return -ENOMEM;
status = setup_lock_data(dev);
if (status) {
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 8bf32c6c85d9..9bee4d33fbdf 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -242,10 +242,13 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev)
*/
if (host_bridge && host_bridge->vendor == PCI_VENDOR_ID_INTEL) {
for (i = 0; i < ARRAY_SIZE(tjmax_pci_table); i++) {
- if (host_bridge->device == tjmax_pci_table[i].device)
+ if (host_bridge->device == tjmax_pci_table[i].device) {
+ pci_dev_put(host_bridge);
return tjmax_pci_table[i].tjmax;
+ }
}
}
+ pci_dev_put(host_bridge);
for (i = 0; i < ARRAY_SIZE(tjmax_table); i++) {
if (strstr(c->x86_model_id, tjmax_table[i].id))
@@ -533,6 +536,10 @@ static void coretemp_remove_core(struct platform_data *pdata, int indx)
{
struct temp_data *tdata = pdata->core_data[indx];
+ /* if we errored on add then this is already gone */
+ if (!tdata)
+ return;
+
/* Remove the sysfs attributes */
sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata->attr_group);
diff --git a/drivers/hwmon/i5500_temp.c b/drivers/hwmon/i5500_temp.c
index 05f68e9c9477..23b9f94fe0a9 100644
--- a/drivers/hwmon/i5500_temp.c
+++ b/drivers/hwmon/i5500_temp.c
@@ -117,7 +117,7 @@ static int i5500_temp_probe(struct pci_dev *pdev,
u32 tstimer;
s8 tsfsc;
- err = pci_enable_device(pdev);
+ err = pcim_enable_device(pdev);
if (err) {
dev_err(&pdev->dev, "Failed to enable device\n");
return err;
diff --git a/drivers/hwmon/ibmpex.c b/drivers/hwmon/ibmpex.c
index f6ec165c0fa8..1837cccd993c 100644
--- a/drivers/hwmon/ibmpex.c
+++ b/drivers/hwmon/ibmpex.c
@@ -502,6 +502,7 @@ static void ibmpex_register_bmc(int iface, struct device *dev)
return;
out_register:
+ list_del(&data->list);
hwmon_device_unregister(data->hwmon_dev);
out_user:
ipmi_destroy_user(data->user);
diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c
index 2a57f4b60c29..e06186986444 100644
--- a/drivers/hwmon/ina3221.c
+++ b/drivers/hwmon/ina3221.c
@@ -228,7 +228,7 @@ static int ina3221_read_value(struct ina3221_data *ina, unsigned int reg,
* Shunt Voltage Sum register has 14-bit value with 1-bit shift
* Other Shunt Voltage registers have 12 bits with 3-bit shift
*/
- if (reg == INA3221_SHUNT_SUM)
+ if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM)
*val = sign_extend32(regval >> 1, 14);
else
*val = sign_extend32(regval >> 3, 12);
@@ -465,7 +465,7 @@ static int ina3221_write_curr(struct device *dev, u32 attr,
* SHUNT_SUM: (1 / 40uV) << 1 = 1 / 20uV
* SHUNT[1-3]: (1 / 40uV) << 3 = 1 / 5uV
*/
- if (reg == INA3221_SHUNT_SUM)
+ if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM)
regval = DIV_ROUND_CLOSEST(voltage_uv, 20) & 0xfffe;
else
regval = DIV_ROUND_CLOSEST(voltage_uv, 5) & 0xfff8;
diff --git a/drivers/hwmon/ltc2947-core.c b/drivers/hwmon/ltc2947-core.c
index 7404e974762f..2dbbbac9de09 100644
--- a/drivers/hwmon/ltc2947-core.c
+++ b/drivers/hwmon/ltc2947-core.c
@@ -396,7 +396,7 @@ static int ltc2947_read_temp(struct device *dev, const u32 attr, long *val,
return ret;
/* in milidegrees celcius, temp is given by: */
- *val = (__val * 204) + 550;
+ *val = (__val * 204) + 5500;
return 0;
}
diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c
index fe0cd205502d..f58943cb1341 100644
--- a/drivers/i2c/busses/i2c-cadence.c
+++ b/drivers/i2c/busses/i2c-cadence.c
@@ -852,7 +852,8 @@ static int cdns_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
CDNS_I2C_POLL_US, CDNS_I2C_TIMEOUT_US);
if (ret) {
ret = -EAGAIN;
- i2c_recover_bus(adap);
+ if (id->adap.bus_recovery_info)
+ i2c_recover_bus(adap);
goto out;
}
@@ -1263,8 +1264,13 @@ static int cdns_i2c_probe(struct platform_device *pdev)
id->rinfo.pinctrl = devm_pinctrl_get(&pdev->dev);
if (IS_ERR(id->rinfo.pinctrl)) {
+ int err = PTR_ERR(id->rinfo.pinctrl);
+
dev_info(&pdev->dev, "can't get pinctrl, bus recovery not supported\n");
- return PTR_ERR(id->rinfo.pinctrl);
+ if (err != -ENODEV)
+ return err;
+ } else {
+ id->adap.bus_recovery_info = &id->rinfo;
}
id->membase = devm_platform_get_and_ioremap_resource(pdev, 0, &r_mem);
@@ -1283,7 +1289,6 @@ static int cdns_i2c_probe(struct platform_device *pdev)
id->adap.retries = 3; /* Default retry value. */
id->adap.algo_data = id;
id->adap.dev.parent = &pdev->dev;
- id->adap.bus_recovery_info = &id->rinfo;
init_completion(&id->xfer_done);
snprintf(id->adap.name, sizeof(id->adap.name),
"Cadence I2C at %08lx", (unsigned long)r_mem->start);
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index 3082183bd66a..fc70920c4dda 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -1132,7 +1132,8 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs,
int i, result;
unsigned int temp;
int block_data = msgs->flags & I2C_M_RECV_LEN;
- int use_dma = i2c_imx->dma && msgs->len >= DMA_THRESHOLD && !block_data;
+ int use_dma = i2c_imx->dma && msgs->flags & I2C_M_DMA_SAFE &&
+ msgs->len >= DMA_THRESHOLD && !block_data;
dev_dbg(&i2c_imx->adapter.dev,
"<%s> write slave address: addr=0x%x\n",
@@ -1298,7 +1299,8 @@ static int i2c_imx_xfer_common(struct i2c_adapter *adapter,
result = i2c_imx_read(i2c_imx, &msgs[i], is_lastmsg, atomic);
} else {
if (!atomic &&
- i2c_imx->dma && msgs[i].len >= DMA_THRESHOLD)
+ i2c_imx->dma && msgs[i].len >= DMA_THRESHOLD &&
+ msgs[i].flags & I2C_M_DMA_SAFE)
result = i2c_imx_dma_write(i2c_imx, &msgs[i]);
else
result = i2c_imx_write(i2c_imx, &msgs[i], atomic);
diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c
index 0c365b57d957..83457359ec45 100644
--- a/drivers/i2c/busses/i2c-npcm7xx.c
+++ b/drivers/i2c/busses/i2c-npcm7xx.c
@@ -2393,8 +2393,17 @@ static struct platform_driver npcm_i2c_bus_driver = {
static int __init npcm_i2c_init(void)
{
+ int ret;
+
npcm_i2c_debugfs_dir = debugfs_create_dir("npcm_i2c", NULL);
- return platform_driver_register(&npcm_i2c_bus_driver);
+
+ ret = platform_driver_register(&npcm_i2c_bus_driver);
+ if (ret) {
+ debugfs_remove_recursive(npcm_i2c_debugfs_dir);
+ return ret;
+ }
+
+ return 0;
}
module_init(npcm_i2c_init);
diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c
index 84a77512614d..8fce98bb77ff 100644
--- a/drivers/i2c/busses/i2c-qcom-geni.c
+++ b/drivers/i2c/busses/i2c-qcom-geni.c
@@ -626,7 +626,6 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i
dev_err(gi2c->se.dev, "I2C timeout gpi flags:%d addr:0x%x\n",
gi2c->cur->flags, gi2c->cur->addr);
gi2c->err = -ETIMEDOUT;
- goto err;
}
if (gi2c->err) {
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 9aa7b9d9a485..13fafb74bab8 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -467,6 +467,7 @@ static int i2c_device_probe(struct device *dev)
{
struct i2c_client *client = i2c_verify_client(dev);
struct i2c_driver *driver;
+ bool do_power_on;
int status;
if (!client)
@@ -545,8 +546,8 @@ static int i2c_device_probe(struct device *dev)
if (status < 0)
goto err_clear_wakeup_irq;
- status = dev_pm_domain_attach(&client->dev,
- !i2c_acpi_waive_d0_probe(dev));
+ do_power_on = !i2c_acpi_waive_d0_probe(dev);
+ status = dev_pm_domain_attach(&client->dev, do_power_on);
if (status)
goto err_clear_wakeup_irq;
@@ -585,7 +586,7 @@ static int i2c_device_probe(struct device *dev)
err_release_driver_resources:
devres_release_group(&client->dev, client->devres_group_id);
err_detach_pm_domain:
- dev_pm_domain_detach(&client->dev, !i2c_acpi_waive_d0_probe(dev));
+ dev_pm_domain_detach(&client->dev, do_power_on);
err_clear_wakeup_irq:
dev_pm_clear_wake_irq(&client->dev);
device_init_wakeup(&client->dev, false);
@@ -610,7 +611,7 @@ static void i2c_device_remove(struct device *dev)
devres_release_group(&client->dev, client->devres_group_id);
- dev_pm_domain_detach(&client->dev, !i2c_acpi_waive_d0_probe(dev));
+ dev_pm_domain_detach(&client->dev, true);
dev_pm_clear_wake_irq(&client->dev);
device_init_wakeup(&client->dev, false);
diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c
index 3a4952935366..3d9c5758d8a4 100644
--- a/drivers/input/touchscreen/raydium_i2c_ts.c
+++ b/drivers/input/touchscreen/raydium_i2c_ts.c
@@ -211,12 +211,14 @@ static int raydium_i2c_send(struct i2c_client *client,
error = raydium_i2c_xfer(client, addr, xfer, ARRAY_SIZE(xfer));
if (likely(!error))
- return 0;
+ goto out;
msleep(RM_RETRY_DELAY_MS);
} while (++tries < RM_MAX_RETRIES);
dev_err(&client->dev, "%s failed: %d\n", __func__, error);
+out:
+ kfree(tx_buf);
return error;
}
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 5a8f780e7ffd..bc94059a5b87 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -820,6 +820,7 @@ int __init dmar_dev_scope_init(void)
info = dmar_alloc_pci_notify_info(dev,
BUS_NOTIFY_ADD_DEVICE);
if (!info) {
+ pci_dev_put(dev);
return dmar_dev_scope_status;
} else {
dmar_pci_bus_add_dev(info);
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 996a8b5ee5ee..5287efe247b1 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1396,6 +1396,24 @@ static void domain_update_iotlb(struct dmar_domain *domain)
spin_unlock_irqrestore(&domain->lock, flags);
}
+/*
+ * The extra devTLB flush quirk impacts those QAT devices with PCI device
+ * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device()
+ * check because it applies only to the built-in QAT devices and it doesn't
+ * grant additional privileges.
+ */
+#define BUGGY_QAT_DEVID_MASK 0x494c
+static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev)
+{
+ if (pdev->vendor != PCI_VENDOR_ID_INTEL)
+ return false;
+
+ if ((pdev->device & 0xfffc) != BUGGY_QAT_DEVID_MASK)
+ return false;
+
+ return true;
+}
+
static void iommu_enable_pci_caps(struct device_domain_info *info)
{
struct pci_dev *pdev;
@@ -1478,6 +1496,7 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
qdep = info->ats_qdep;
qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
qdep, addr, mask);
+ quirk_extra_dev_tlb_flush(info, addr, mask, PASID_RID2PASID, qdep);
}
static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
@@ -3854,8 +3873,10 @@ static inline bool has_external_pci(void)
struct pci_dev *pdev = NULL;
for_each_pci_dev(pdev)
- if (pdev->external_facing)
+ if (pdev->external_facing) {
+ pci_dev_put(pdev);
return true;
+ }
return false;
}
@@ -4490,9 +4511,10 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
if (dev_is_pci(dev)) {
if (ecap_dev_iotlb_support(iommu->ecap) &&
pci_ats_supported(pdev) &&
- dmar_ats_supported(pdev, iommu))
+ dmar_ats_supported(pdev, iommu)) {
info->ats_supported = 1;
-
+ info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev);
+ }
if (sm_supported(iommu)) {
if (pasid_supported(iommu)) {
int features = pci_pasid_features(pdev);
@@ -4931,3 +4953,48 @@ static void __init check_tylersburg_isoch(void)
pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
vtisochctrl);
}
+
+/*
+ * Here we deal with a device TLB defect where device may inadvertently issue ATS
+ * invalidation completion before posted writes initiated with translated address
+ * that utilized translations matching the invalidation address range, violating
+ * the invalidation completion ordering.
+ * Therefore, any use cases that cannot guarantee DMA is stopped before unmap is
+ * vulnerable to this defect. In other words, any dTLB invalidation initiated not
+ * under the control of the trusted/privileged host device driver must use this
+ * quirk.
+ * Device TLBs are invalidated under the following six conditions:
+ * 1. Device driver does DMA API unmap IOVA
+ * 2. Device driver unbind a PASID from a process, sva_unbind_device()
+ * 3. PASID is torn down, after PASID cache is flushed. e.g. process
+ * exit_mmap() due to crash
+ * 4. Under SVA usage, called by mmu_notifier.invalidate_range() where
+ * VM has to free pages that were unmapped
+ * 5. Userspace driver unmaps a DMA buffer
+ * 6. Cache invalidation in vSVA usage (upcoming)
+ *
+ * For #1 and #2, device drivers are responsible for stopping DMA traffic
+ * before unmap/unbind. For #3, iommu driver gets mmu_notifier to
+ * invalidate TLB the same way as normal user unmap which will use this quirk.
+ * The dTLB invalidation after PASID cache flush does not need this quirk.
+ *
+ * As a reminder, #6 will *NEED* this quirk as we enable nested translation.
+ */
+void quirk_extra_dev_tlb_flush(struct device_domain_info *info,
+ unsigned long address, unsigned long mask,
+ u32 pasid, u16 qdep)
+{
+ u16 sid;
+
+ if (likely(!info->dtlb_extra_inval))
+ return;
+
+ sid = PCI_DEVID(info->bus, info->devfn);
+ if (pasid == PASID_RID2PASID) {
+ qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
+ qdep, address, mask);
+ } else {
+ qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid,
+ pasid, qdep, address, mask);
+ }
+}
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 92023dff9513..db9df7c3790c 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -623,6 +623,7 @@ struct device_domain_info {
u8 pri_enabled:1;
u8 ats_supported:1;
u8 ats_enabled:1;
+ u8 dtlb_extra_inval:1; /* Quirk for devices need extra flush */
u8 ats_qdep;
struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
struct intel_iommu *iommu; /* IOMMU used by this device */
@@ -728,6 +729,9 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
u32 pasid, u16 qdep, u64 addr,
unsigned int size_order);
+void quirk_extra_dev_tlb_flush(struct device_domain_info *info,
+ unsigned long address, unsigned long pages,
+ u32 pasid, u16 qdep);
void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu,
u32 pasid);
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 7d08eb034f2d..03b25358946c 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -184,10 +184,13 @@ static void __flush_svm_range_dev(struct intel_svm *svm,
return;
qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih);
- if (info->ats_enabled)
+ if (info->ats_enabled) {
qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
svm->pasid, sdev->qdep, address,
order_base_2(pages));
+ quirk_extra_dev_tlb_flush(info, address, order_base_2(pages),
+ svm->pasid, sdev->qdep);
+ }
}
static void intel_flush_svm_range_dev(struct intel_svm *svm,
@@ -745,12 +748,16 @@ bad_req:
* If prq is to be handled outside iommu driver via receiver of
* the fault notifiers, we skip the page response here.
*/
- if (!pdev || intel_svm_prq_report(iommu, &pdev->dev, req))
- handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
+ if (!pdev)
+ goto bad_req;
- trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1,
- req->priv_data[0], req->priv_data[1],
- iommu->prq_seq_number++);
+ if (intel_svm_prq_report(iommu, &pdev->dev, req))
+ handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
+ else
+ trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1,
+ req->priv_data[0], req->priv_data[1],
+ iommu->prq_seq_number++);
+ pci_dev_put(pdev);
prq_advance:
head = (head + sizeof(*req)) & PRQ_RING_MASK;
}
diff --git a/drivers/media/common/videobuf2/frame_vector.c b/drivers/media/common/videobuf2/frame_vector.c
index 542dde9d2609..144027035892 100644
--- a/drivers/media/common/videobuf2/frame_vector.c
+++ b/drivers/media/common/videobuf2/frame_vector.c
@@ -35,11 +35,7 @@
int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
struct frame_vector *vec)
{
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- int ret_pin_user_pages_fast = 0;
- int ret = 0;
- int err;
+ int ret;
if (nr_frames == 0)
return 0;
@@ -52,57 +48,17 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
ret = pin_user_pages_fast(start, nr_frames,
FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM,
(struct page **)(vec->ptrs));
- if (ret > 0) {
- vec->got_ref = true;
- vec->is_pfns = false;
- goto out_unlocked;
- }
- ret_pin_user_pages_fast = ret;
-
- mmap_read_lock(mm);
- vec->got_ref = false;
- vec->is_pfns = true;
- ret = 0;
- do {
- unsigned long *nums = frame_vector_pfns(vec);
-
- vma = vma_lookup(mm, start);
- if (!vma)
- break;
-
- while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) {
- err = follow_pfn(vma, start, &nums[ret]);
- if (err) {
- if (ret)
- goto out;
- // If follow_pfn() returns -EINVAL, then this
- // is not an IO mapping or a raw PFN mapping.
- // In that case, return the original error from
- // pin_user_pages_fast(). Otherwise this
- // function would return -EINVAL when
- // pin_user_pages_fast() returned -ENOMEM,
- // which makes debugging hard.
- if (err == -EINVAL && ret_pin_user_pages_fast)
- ret = ret_pin_user_pages_fast;
- else
- ret = err;
- goto out;
- }
- start += PAGE_SIZE;
- ret++;
- }
- /* Bail out if VMA doesn't completely cover the tail page. */
- if (start < vma->vm_end)
- break;
- } while (ret < nr_frames);
-out:
- mmap_read_unlock(mm);
-out_unlocked:
- if (!ret)
- ret = -EFAULT;
- if (ret > 0)
- vec->nr_frames = ret;
- return ret;
+ vec->got_ref = true;
+ vec->is_pfns = false;
+ vec->nr_frames = ret;
+
+ if (likely(ret > 0))
+ return ret;
+
+ /* This used to (racily) return non-refcounted pfns. Let people know */
+ WARN_ONCE(1, "get_vaddr_frames() cannot follow VM_IO mapping");
+ vec->nr_frames = 0;
+ return ret ? ret : -EFAULT;
}
EXPORT_SYMBOL(get_vaddr_frames);
diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c
index ab9697f3b5f1..92efc4676df6 100644
--- a/drivers/media/common/videobuf2/videobuf2-core.c
+++ b/drivers/media/common/videobuf2/videobuf2-core.c
@@ -813,7 +813,13 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
num_buffers = max_t(unsigned int, *count, q->min_buffers_needed);
num_buffers = min_t(unsigned int, num_buffers, VB2_MAX_FRAME);
memset(q->alloc_devs, 0, sizeof(q->alloc_devs));
+ /*
+ * Set this now to ensure that drivers see the correct q->memory value
+ * in the queue_setup op.
+ */
+ mutex_lock(&q->mmap_lock);
q->memory = memory;
+ mutex_unlock(&q->mmap_lock);
set_queue_coherency(q, non_coherent_mem);
/*
@@ -823,22 +829,27 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes,
plane_sizes, q->alloc_devs);
if (ret)
- return ret;
+ goto error;
/* Check that driver has set sane values */
- if (WARN_ON(!num_planes))
- return -EINVAL;
+ if (WARN_ON(!num_planes)) {
+ ret = -EINVAL;
+ goto error;
+ }
for (i = 0; i < num_planes; i++)
- if (WARN_ON(!plane_sizes[i]))
- return -EINVAL;
+ if (WARN_ON(!plane_sizes[i])) {
+ ret = -EINVAL;
+ goto error;
+ }
/* Finally, allocate buffers and video memory */
allocated_buffers =
__vb2_queue_alloc(q, memory, num_buffers, num_planes, plane_sizes);
if (allocated_buffers == 0) {
dprintk(q, 1, "memory allocation failed\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto error;
}
/*
@@ -879,7 +890,8 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
if (ret < 0) {
/*
* Note: __vb2_queue_free() will subtract 'allocated_buffers'
- * from q->num_buffers.
+ * from q->num_buffers and it will reset q->memory to
+ * VB2_MEMORY_UNKNOWN.
*/
__vb2_queue_free(q, allocated_buffers);
mutex_unlock(&q->mmap_lock);
@@ -895,6 +907,12 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
q->waiting_for_buffers = !q->is_output;
return 0;
+
+error:
+ mutex_lock(&q->mmap_lock);
+ q->memory = VB2_MEMORY_UNKNOWN;
+ mutex_unlock(&q->mmap_lock);
+ return ret;
}
EXPORT_SYMBOL_GPL(vb2_core_reqbufs);
@@ -906,6 +924,7 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
unsigned int num_planes = 0, num_buffers, allocated_buffers;
unsigned plane_sizes[VB2_MAX_PLANES] = { };
bool non_coherent_mem = flags & V4L2_MEMORY_FLAG_NON_COHERENT;
+ bool no_previous_buffers = !q->num_buffers;
int ret;
if (q->num_buffers == VB2_MAX_FRAME) {
@@ -913,13 +932,19 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
return -ENOBUFS;
}
- if (!q->num_buffers) {
+ if (no_previous_buffers) {
if (q->waiting_in_dqbuf && *count) {
dprintk(q, 1, "another dup()ped fd is waiting for a buffer\n");
return -EBUSY;
}
memset(q->alloc_devs, 0, sizeof(q->alloc_devs));
+ /*
+ * Set this now to ensure that drivers see the correct q->memory
+ * value in the queue_setup op.
+ */
+ mutex_lock(&q->mmap_lock);
q->memory = memory;
+ mutex_unlock(&q->mmap_lock);
q->waiting_for_buffers = !q->is_output;
set_queue_coherency(q, non_coherent_mem);
} else {
@@ -945,14 +970,15 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
ret = call_qop(q, queue_setup, q, &num_buffers,
&num_planes, plane_sizes, q->alloc_devs);
if (ret)
- return ret;
+ goto error;
/* Finally, allocate buffers and video memory */
allocated_buffers = __vb2_queue_alloc(q, memory, num_buffers,
num_planes, plane_sizes);
if (allocated_buffers == 0) {
dprintk(q, 1, "memory allocation failed\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto error;
}
/*
@@ -983,7 +1009,8 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
if (ret < 0) {
/*
* Note: __vb2_queue_free() will subtract 'allocated_buffers'
- * from q->num_buffers.
+ * from q->num_buffers and it will reset q->memory to
+ * VB2_MEMORY_UNKNOWN.
*/
__vb2_queue_free(q, allocated_buffers);
mutex_unlock(&q->mmap_lock);
@@ -998,6 +1025,14 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
*count = allocated_buffers;
return 0;
+
+error:
+ if (no_previous_buffers) {
+ mutex_lock(&q->mmap_lock);
+ q->memory = VB2_MEMORY_UNKNOWN;
+ mutex_unlock(&q->mmap_lock);
+ }
+ return ret;
}
EXPORT_SYMBOL_GPL(vb2_core_create_bufs);
@@ -2165,6 +2200,22 @@ static int __find_plane_by_offset(struct vb2_queue *q, unsigned long off,
unsigned int buffer, plane;
/*
+ * Sanity checks to ensure the lock is held, MEMORY_MMAP is
+ * used and fileio isn't active.
+ */
+ lockdep_assert_held(&q->mmap_lock);
+
+ if (q->memory != VB2_MEMORY_MMAP) {
+ dprintk(q, 1, "queue is not currently set up for mmap\n");
+ return -EINVAL;
+ }
+
+ if (vb2_fileio_is_active(q)) {
+ dprintk(q, 1, "file io in progress\n");
+ return -EBUSY;
+ }
+
+ /*
* Go over all buffers and their planes, comparing the given offset
* with an offset assigned to each plane. If a match is found,
* return its buffer and plane numbers.
@@ -2265,11 +2316,6 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma)
int ret;
unsigned long length;
- if (q->memory != VB2_MEMORY_MMAP) {
- dprintk(q, 1, "queue is not currently set up for mmap\n");
- return -EINVAL;
- }
-
/*
* Check memory area access mode.
*/
@@ -2291,14 +2337,9 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma)
mutex_lock(&q->mmap_lock);
- if (vb2_fileio_is_active(q)) {
- dprintk(q, 1, "mmap: file io in progress\n");
- ret = -EBUSY;
- goto unlock;
- }
-
/*
- * Find the plane corresponding to the offset passed by userspace.
+ * Find the plane corresponding to the offset passed by userspace. This
+ * will return an error if not MEMORY_MMAP or file I/O is in progress.
*/
ret = __find_plane_by_offset(q, off, &buffer, &plane);
if (ret)
@@ -2351,22 +2392,25 @@ unsigned long vb2_get_unmapped_area(struct vb2_queue *q,
void *vaddr;
int ret;
- if (q->memory != VB2_MEMORY_MMAP) {
- dprintk(q, 1, "queue is not currently set up for mmap\n");
- return -EINVAL;
- }
+ mutex_lock(&q->mmap_lock);
/*
- * Find the plane corresponding to the offset passed by userspace.
+ * Find the plane corresponding to the offset passed by userspace. This
+ * will return an error if not MEMORY_MMAP or file I/O is in progress.
*/
ret = __find_plane_by_offset(q, off, &buffer, &plane);
if (ret)
- return ret;
+ goto unlock;
vb = q->bufs[buffer];
vaddr = vb2_plane_vaddr(vb, plane);
+ mutex_unlock(&q->mmap_lock);
return vaddr ? (unsigned long)vaddr : -EINVAL;
+
+unlock:
+ mutex_unlock(&q->mmap_lock);
+ return ret;
}
EXPORT_SYMBOL_GPL(vb2_get_unmapped_area);
#endif
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index c5de202f530a..de1cc9e1ae57 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1484,6 +1484,11 @@ void mmc_init_erase(struct mmc_card *card)
card->pref_erase = 0;
}
+static bool is_trim_arg(unsigned int arg)
+{
+ return (arg & MMC_TRIM_OR_DISCARD_ARGS) && arg != MMC_DISCARD_ARG;
+}
+
static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card,
unsigned int arg, unsigned int qty)
{
@@ -1766,7 +1771,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr,
!(card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN))
return -EOPNOTSUPP;
- if (mmc_card_mmc(card) && (arg & MMC_TRIM_ARGS) &&
+ if (mmc_card_mmc(card) && is_trim_arg(arg) &&
!(card->ext_csd.sec_feature_support & EXT_CSD_SEC_GB_CL_EN))
return -EOPNOTSUPP;
@@ -1796,7 +1801,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr,
* identified by the card->eg_boundary flag.
*/
rem = card->erase_size - (from % card->erase_size);
- if ((arg & MMC_TRIM_ARGS) && (card->eg_boundary) && (nr > rem)) {
+ if ((arg & MMC_TRIM_OR_DISCARD_ARGS) && card->eg_boundary && nr > rem) {
err = mmc_do_erase(card, from, from + rem - 1, arg);
from += rem;
if ((err) || (to <= from))
diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c
index 8d9bceeff986..155ce2bdfe62 100644
--- a/drivers/mmc/core/mmc_test.c
+++ b/drivers/mmc/core/mmc_test.c
@@ -3179,7 +3179,8 @@ static int __mmc_test_register_dbgfs_file(struct mmc_card *card,
struct mmc_test_dbgfs_file *df;
if (card->debugfs_root)
- debugfs_create_file(name, mode, card->debugfs_root, card, fops);
+ file = debugfs_create_file(name, mode, card->debugfs_root,
+ card, fops);
df = kmalloc(sizeof(*df), GFP_KERNEL);
if (!df) {
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index df941438aef5..26bc59b5a7cc 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -2588,13 +2588,11 @@ static int msdc_of_clock_parse(struct platform_device *pdev,
return PTR_ERR(host->src_clk_cg);
}
- host->sys_clk_cg = devm_clk_get_optional(&pdev->dev, "sys_cg");
+ /* If present, always enable for this clock gate */
+ host->sys_clk_cg = devm_clk_get_optional_enabled(&pdev->dev, "sys_cg");
if (IS_ERR(host->sys_clk_cg))
host->sys_clk_cg = NULL;
- /* If present, always enable for this clock gate */
- clk_prepare_enable(host->sys_clk_cg);
-
host->bulk_clks[0].id = "pclk_cg";
host->bulk_clks[1].id = "axi_cg";
host->bulk_clks[2].id = "ahb_cg";
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 31ea0a2fce35..ffeb5759830f 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -1512,7 +1512,7 @@ static void esdhc_cqe_enable(struct mmc_host *mmc)
* system resume back.
*/
cqhci_writel(cq_host, 0, CQHCI_CTL);
- if (cqhci_readl(cq_host, CQHCI_CTL) && CQHCI_HALT)
+ if (cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT)
dev_err(mmc_dev(host->mmc),
"failed to exit halt state when enable CQE\n");
diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c
index b92a408f138d..bec3f9e3cd3f 100644
--- a/drivers/mmc/host/sdhci-sprd.c
+++ b/drivers/mmc/host/sdhci-sprd.c
@@ -470,7 +470,7 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios)
}
if (IS_ERR(sprd_host->pinctrl))
- return 0;
+ goto reset;
switch (ios->signal_voltage) {
case MMC_SIGNAL_VOLTAGE_180:
@@ -498,6 +498,8 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios)
/* Wait for 300 ~ 500 us for pin state stable */
usleep_range(300, 500);
+
+reset:
sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
return 0;
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index fef03de85b99..c7ad32a75b57 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -373,6 +373,7 @@ static void sdhci_init(struct sdhci_host *host, int soft)
if (soft) {
/* force clock reconfiguration */
host->clock = 0;
+ host->reinit_uhs = true;
mmc->ops->set_ios(mmc, &mmc->ios);
}
}
@@ -2293,11 +2294,46 @@ void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing)
}
EXPORT_SYMBOL_GPL(sdhci_set_uhs_signaling);
+static bool sdhci_timing_has_preset(unsigned char timing)
+{
+ switch (timing) {
+ case MMC_TIMING_UHS_SDR12:
+ case MMC_TIMING_UHS_SDR25:
+ case MMC_TIMING_UHS_SDR50:
+ case MMC_TIMING_UHS_SDR104:
+ case MMC_TIMING_UHS_DDR50:
+ case MMC_TIMING_MMC_DDR52:
+ return true;
+ };
+ return false;
+}
+
+static bool sdhci_preset_needed(struct sdhci_host *host, unsigned char timing)
+{
+ return !(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) &&
+ sdhci_timing_has_preset(timing);
+}
+
+static bool sdhci_presetable_values_change(struct sdhci_host *host, struct mmc_ios *ios)
+{
+ /*
+ * Preset Values are: Driver Strength, Clock Generator and SDCLK/RCLK
+ * Frequency. Check if preset values need to be enabled, or the Driver
+ * Strength needs updating. Note, clock changes are handled separately.
+ */
+ return !host->preset_enabled &&
+ (sdhci_preset_needed(host, ios->timing) || host->drv_type != ios->drv_type);
+}
+
void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
{
struct sdhci_host *host = mmc_priv(mmc);
+ bool reinit_uhs = host->reinit_uhs;
+ bool turning_on_clk = false;
u8 ctrl;
+ host->reinit_uhs = false;
+
if (ios->power_mode == MMC_POWER_UNDEFINED)
return;
@@ -2323,6 +2359,8 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
sdhci_enable_preset_value(host, false);
if (!ios->clock || ios->clock != host->clock) {
+ turning_on_clk = ios->clock && !host->clock;
+
host->ops->set_clock(host, ios->clock);
host->clock = ios->clock;
@@ -2349,6 +2387,17 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
host->ops->set_bus_width(host, ios->bus_width);
+ /*
+ * Special case to avoid multiple clock changes during voltage
+ * switching.
+ */
+ if (!reinit_uhs &&
+ turning_on_clk &&
+ host->timing == ios->timing &&
+ host->version >= SDHCI_SPEC_300 &&
+ !sdhci_presetable_values_change(host, ios))
+ return;
+
ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
if (!(host->quirks & SDHCI_QUIRK_NO_HISPD_BIT)) {
@@ -2392,6 +2441,7 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
}
sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
+ host->drv_type = ios->drv_type;
} else {
/*
* According to SDHC Spec v3.00, if the Preset Value
@@ -2419,19 +2469,14 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
host->ops->set_uhs_signaling(host, ios->timing);
host->timing = ios->timing;
- if (!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) &&
- ((ios->timing == MMC_TIMING_UHS_SDR12) ||
- (ios->timing == MMC_TIMING_UHS_SDR25) ||
- (ios->timing == MMC_TIMING_UHS_SDR50) ||
- (ios->timing == MMC_TIMING_UHS_SDR104) ||
- (ios->timing == MMC_TIMING_UHS_DDR50) ||
- (ios->timing == MMC_TIMING_MMC_DDR52))) {
+ if (sdhci_preset_needed(host, ios->timing)) {
u16 preset;
sdhci_enable_preset_value(host, true);
preset = sdhci_get_preset_value(host);
ios->drv_type = FIELD_GET(SDHCI_PRESET_DRV_MASK,
preset);
+ host->drv_type = ios->drv_type;
}
/* Re-enable SD Clock */
@@ -3768,6 +3813,7 @@ int sdhci_resume_host(struct sdhci_host *host)
sdhci_init(host, 0);
host->pwr = 0;
host->clock = 0;
+ host->reinit_uhs = true;
mmc->ops->set_ios(mmc, &mmc->ios);
} else {
sdhci_init(host, (mmc->pm_flags & MMC_PM_KEEP_POWER));
@@ -3830,6 +3876,7 @@ int sdhci_runtime_resume_host(struct sdhci_host *host, int soft_reset)
/* Force clock and power re-program */
host->pwr = 0;
host->clock = 0;
+ host->reinit_uhs = true;
mmc->ops->start_signal_voltage_switch(mmc, &mmc->ios);
mmc->ops->set_ios(mmc, &mmc->ios);
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index d750c464bd1e..87a3aaa07438 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -524,6 +524,8 @@ struct sdhci_host {
unsigned int clock; /* Current clock (MHz) */
u8 pwr; /* Current voltage */
+ u8 drv_type; /* Current UHS-I driver type */
+ bool reinit_uhs; /* Force UHS-related re-initialization */
bool runtime_suspended; /* Host is runtime suspended */
bool bus_on; /* Bus power prevents runtime suspend */
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index e01bb0412f1c..4048876f842c 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3249,7 +3249,7 @@ static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond,
goto out;
saddr = &combined->ip6.saddr;
- daddr = &combined->ip6.saddr;
+ daddr = &combined->ip6.daddr;
slave_dbg(bond->dev, slave->dev, "%s: %s/%d av %d sv %d sip %pI6c tip %pI6c\n",
__func__, slave->dev->name, bond_slave_state(slave),
diff --git a/drivers/net/can/can327.c b/drivers/net/can/can327.c
index ed3d0b8989a0..dc7192ecb001 100644
--- a/drivers/net/can/can327.c
+++ b/drivers/net/can/can327.c
@@ -796,9 +796,9 @@ static int can327_netdev_close(struct net_device *dev)
netif_stop_queue(dev);
- /* Give UART one final chance to flush. */
- clear_bit(TTY_DO_WRITE_WAKEUP, &elm->tty->flags);
- flush_work(&elm->tx_work);
+ /* We don't flush the UART TX queue here, as we want final stop
+ * commands (like the above dummy char) to be flushed out.
+ */
can_rx_offload_disable(&elm->offload);
elm->can.state = CAN_STATE_STOPPED;
@@ -1069,12 +1069,15 @@ static void can327_ldisc_close(struct tty_struct *tty)
{
struct can327 *elm = (struct can327 *)tty->disc_data;
- /* unregister_netdev() calls .ndo_stop() so we don't have to.
- * Our .ndo_stop() also flushes the TTY write wakeup handler,
- * so we can safely set elm->tty = NULL after this.
- */
+ /* unregister_netdev() calls .ndo_stop() so we don't have to. */
unregister_candev(elm->dev);
+ /* Give UART one final chance to flush.
+ * No need to clear TTY_DO_WRITE_WAKEUP since .write_wakeup() is
+ * serialised against .close() and will not be called once we return.
+ */
+ flush_work(&elm->tx_work);
+
/* Mark channel as dead */
spin_lock_bh(&elm->lock);
tty->disc_data = NULL;
diff --git a/drivers/net/can/slcan/slcan-core.c b/drivers/net/can/slcan/slcan-core.c
index fbb34139daa1..f4db77007c13 100644
--- a/drivers/net/can/slcan/slcan-core.c
+++ b/drivers/net/can/slcan/slcan-core.c
@@ -864,12 +864,14 @@ static void slcan_close(struct tty_struct *tty)
{
struct slcan *sl = (struct slcan *)tty->disc_data;
- /* unregister_netdev() calls .ndo_stop() so we don't have to.
- * Our .ndo_stop() also flushes the TTY write wakeup handler,
- * so we can safely set sl->tty = NULL after this.
- */
unregister_candev(sl->dev);
+ /*
+ * The netdev needn't be UP (so .ndo_stop() is not called). Hence make
+ * sure this is not running before freeing it up.
+ */
+ flush_work(&sl->tx_work);
+
/* Mark channel as dead */
spin_lock_bh(&sl->lock);
tty->disc_data = NULL;
diff --git a/drivers/net/can/usb/esd_usb.c b/drivers/net/can/usb/esd_usb.c
index 81b88e9e5bdc..42323f5e6f3a 100644
--- a/drivers/net/can/usb/esd_usb.c
+++ b/drivers/net/can/usb/esd_usb.c
@@ -234,6 +234,10 @@ static void esd_usb_rx_event(struct esd_usb_net_priv *priv,
u8 rxerr = msg->msg.rx.data[2];
u8 txerr = msg->msg.rx.data[3];
+ netdev_dbg(priv->netdev,
+ "CAN_ERR_EV_EXT: dlc=%#02x state=%02x ecc=%02x rec=%02x tec=%02x\n",
+ msg->msg.rx.dlc, state, ecc, rxerr, txerr);
+
skb = alloc_can_err_skb(priv->netdev, &cf);
if (skb == NULL) {
stats->rx_dropped++;
@@ -260,6 +264,8 @@ static void esd_usb_rx_event(struct esd_usb_net_priv *priv,
break;
default:
priv->can.state = CAN_STATE_ERROR_ACTIVE;
+ txerr = 0;
+ rxerr = 0;
break;
}
} else {
diff --git a/drivers/net/dsa/microchip/ksz8.h b/drivers/net/dsa/microchip/ksz8.h
index 8582b4b67d98..ea05abfbd51d 100644
--- a/drivers/net/dsa/microchip/ksz8.h
+++ b/drivers/net/dsa/microchip/ksz8.h
@@ -57,5 +57,6 @@ int ksz8_reset_switch(struct ksz_device *dev);
int ksz8_switch_detect(struct ksz_device *dev);
int ksz8_switch_init(struct ksz_device *dev);
void ksz8_switch_exit(struct ksz_device *dev);
+int ksz8_change_mtu(struct ksz_device *dev, int port, int mtu);
#endif
diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index bd3b133e7085..003b0ac2854c 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -76,6 +76,57 @@ int ksz8_reset_switch(struct ksz_device *dev)
return 0;
}
+static int ksz8863_change_mtu(struct ksz_device *dev, int frame_size)
+{
+ u8 ctrl2 = 0;
+
+ if (frame_size <= KSZ8_LEGAL_PACKET_SIZE)
+ ctrl2 |= KSZ8863_LEGAL_PACKET_ENABLE;
+ else if (frame_size > KSZ8863_NORMAL_PACKET_SIZE)
+ ctrl2 |= KSZ8863_HUGE_PACKET_ENABLE;
+
+ return ksz_rmw8(dev, REG_SW_CTRL_2, KSZ8863_LEGAL_PACKET_ENABLE |
+ KSZ8863_HUGE_PACKET_ENABLE, ctrl2);
+}
+
+static int ksz8795_change_mtu(struct ksz_device *dev, int frame_size)
+{
+ u8 ctrl1 = 0, ctrl2 = 0;
+ int ret;
+
+ if (frame_size > KSZ8_LEGAL_PACKET_SIZE)
+ ctrl2 |= SW_LEGAL_PACKET_DISABLE;
+ else if (frame_size > KSZ8863_NORMAL_PACKET_SIZE)
+ ctrl1 |= SW_HUGE_PACKET;
+
+ ret = ksz_rmw8(dev, REG_SW_CTRL_1, SW_HUGE_PACKET, ctrl1);
+ if (ret)
+ return ret;
+
+ return ksz_rmw8(dev, REG_SW_CTRL_2, SW_LEGAL_PACKET_DISABLE, ctrl2);
+}
+
+int ksz8_change_mtu(struct ksz_device *dev, int port, int mtu)
+{
+ u16 frame_size;
+
+ if (!dsa_is_cpu_port(dev->ds, port))
+ return 0;
+
+ frame_size = mtu + VLAN_ETH_HLEN + ETH_FCS_LEN;
+
+ switch (dev->chip_id) {
+ case KSZ8795_CHIP_ID:
+ case KSZ8794_CHIP_ID:
+ case KSZ8765_CHIP_ID:
+ return ksz8795_change_mtu(dev, frame_size);
+ case KSZ8830_CHIP_ID:
+ return ksz8863_change_mtu(dev, frame_size);
+ }
+
+ return -EOPNOTSUPP;
+}
+
static void ksz8795_set_prio_queue(struct ksz_device *dev, int port, int queue)
{
u8 hi, lo;
@@ -1233,8 +1284,6 @@ void ksz8_config_cpu_port(struct dsa_switch *ds)
masks = dev->info->masks;
regs = dev->info->regs;
- /* Switch marks the maximum frame with extra byte as oversize. */
- ksz_cfg(dev, REG_SW_CTRL_2, SW_LEGAL_PACKET_DISABLE, true);
ksz_cfg(dev, regs[S_TAIL_TAG_CTRL], masks[SW_TAIL_TAG_ENABLE], true);
p = &dev->ports[dev->cpu_port];
@@ -1308,6 +1357,18 @@ int ksz8_setup(struct dsa_switch *ds)
struct ksz_device *dev = ds->priv;
int i;
+ ds->mtu_enforcement_ingress = true;
+
+ /* We rely on software untagging on the CPU port, so that we
+ * can support both tagged and untagged VLANs
+ */
+ ds->untag_bridge_pvid = true;
+
+ /* VLAN filtering is partly controlled by the global VLAN
+ * Enable flag
+ */
+ ds->vlan_filtering_is_global = true;
+
ksz_cfg(dev, S_REPLACE_VID_CTRL, SW_FLOW_CTRL, true);
/* Enable automatic fast aging when link changed detected. */
@@ -1367,16 +1428,6 @@ int ksz8_switch_init(struct ksz_device *dev)
dev->phy_port_cnt = dev->info->port_cnt - 1;
dev->port_mask = (BIT(dev->phy_port_cnt) - 1) | dev->info->cpu_ports;
- /* We rely on software untagging on the CPU port, so that we
- * can support both tagged and untagged VLANs
- */
- dev->ds->untag_bridge_pvid = true;
-
- /* VLAN filtering is partly controlled by the global VLAN
- * Enable flag
- */
- dev->ds->vlan_filtering_is_global = true;
-
return 0;
}
diff --git a/drivers/net/dsa/microchip/ksz8795_reg.h b/drivers/net/dsa/microchip/ksz8795_reg.h
index 77487d611824..7a57c6088f80 100644
--- a/drivers/net/dsa/microchip/ksz8795_reg.h
+++ b/drivers/net/dsa/microchip/ksz8795_reg.h
@@ -48,6 +48,9 @@
#define NO_EXC_COLLISION_DROP BIT(3)
#define SW_LEGAL_PACKET_DISABLE BIT(1)
+#define KSZ8863_HUGE_PACKET_ENABLE BIT(2)
+#define KSZ8863_LEGAL_PACKET_ENABLE BIT(1)
+
#define REG_SW_CTRL_3 0x05
#define WEIGHTED_FAIR_QUEUE_ENABLE BIT(3)
diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index 0d6b40968657..47b54ecf2c6f 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -45,24 +45,15 @@ static void ksz9477_port_cfg32(struct ksz_device *dev, int port, int offset,
int ksz9477_change_mtu(struct ksz_device *dev, int port, int mtu)
{
- u16 frame_size, max_frame = 0;
- int i;
-
- frame_size = mtu + VLAN_ETH_HLEN + ETH_FCS_LEN;
+ u16 frame_size;
- /* Cache the per-port MTU setting */
- dev->ports[port].max_frame = frame_size;
+ if (!dsa_is_cpu_port(dev->ds, port))
+ return 0;
- for (i = 0; i < dev->info->port_cnt; i++)
- max_frame = max(max_frame, dev->ports[i].max_frame);
+ frame_size = mtu + VLAN_ETH_HLEN + ETH_FCS_LEN;
return regmap_update_bits(dev->regmap[1], REG_SW_MTU__2,
- REG_SW_MTU_MASK, max_frame);
-}
-
-int ksz9477_max_mtu(struct ksz_device *dev, int port)
-{
- return KSZ9477_MAX_FRAME_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN;
+ REG_SW_MTU_MASK, frame_size);
}
static int ksz9477_wait_vlan_ctrl_ready(struct ksz_device *dev)
@@ -1143,6 +1134,8 @@ int ksz9477_setup(struct dsa_switch *ds)
struct ksz_device *dev = ds->priv;
int ret = 0;
+ ds->mtu_enforcement_ingress = true;
+
/* Required for port partitioning. */
ksz9477_cfg32(dev, REG_SW_QM_CTRL__4, UNICAST_VLAN_BOUNDARY,
true);
diff --git a/drivers/net/dsa/microchip/ksz9477.h b/drivers/net/dsa/microchip/ksz9477.h
index 00862c4cfb7f..7c5bb3032772 100644
--- a/drivers/net/dsa/microchip/ksz9477.h
+++ b/drivers/net/dsa/microchip/ksz9477.h
@@ -50,7 +50,6 @@ int ksz9477_mdb_add(struct ksz_device *dev, int port,
int ksz9477_mdb_del(struct ksz_device *dev, int port,
const struct switchdev_obj_port_mdb *mdb, struct dsa_db db);
int ksz9477_change_mtu(struct ksz_device *dev, int port, int mtu);
-int ksz9477_max_mtu(struct ksz_device *dev, int port);
void ksz9477_config_cpu_port(struct dsa_switch *ds);
int ksz9477_enable_stp_addr(struct ksz_device *dev);
int ksz9477_reset_switch(struct ksz_device *dev);
diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h
index 53c68d286dd3..cc457fa64939 100644
--- a/drivers/net/dsa/microchip/ksz9477_reg.h
+++ b/drivers/net/dsa/microchip/ksz9477_reg.h
@@ -1615,6 +1615,4 @@
#define PTP_TRIG_UNIT_M (BIT(MAX_TRIG_UNIT) - 1)
#define PTP_TS_UNIT_M (BIT(MAX_TIMESTAMP_UNIT) - 1)
-#define KSZ9477_MAX_FRAME_SIZE 9000
-
#endif /* KSZ9477_REGS_H */
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 8c8db315317d..423f944cc34c 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -14,6 +14,7 @@
#include <linux/phy.h>
#include <linux/etherdevice.h>
#include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
#include <linux/irq.h>
#include <linux/irqdomain.h>
#include <linux/of_mdio.h>
@@ -69,6 +70,43 @@ struct ksz_stats_raw {
u64 tx_discards;
};
+struct ksz88xx_stats_raw {
+ u64 rx;
+ u64 rx_hi;
+ u64 rx_undersize;
+ u64 rx_fragments;
+ u64 rx_oversize;
+ u64 rx_jabbers;
+ u64 rx_symbol_err;
+ u64 rx_crc_err;
+ u64 rx_align_err;
+ u64 rx_mac_ctrl;
+ u64 rx_pause;
+ u64 rx_bcast;
+ u64 rx_mcast;
+ u64 rx_ucast;
+ u64 rx_64_or_less;
+ u64 rx_65_127;
+ u64 rx_128_255;
+ u64 rx_256_511;
+ u64 rx_512_1023;
+ u64 rx_1024_1522;
+ u64 tx;
+ u64 tx_hi;
+ u64 tx_late_col;
+ u64 tx_pause;
+ u64 tx_bcast;
+ u64 tx_mcast;
+ u64 tx_ucast;
+ u64 tx_deferred;
+ u64 tx_total_col;
+ u64 tx_exc_col;
+ u64 tx_single_col;
+ u64 tx_mult_col;
+ u64 rx_discards;
+ u64 tx_discards;
+};
+
static const struct ksz_mib_names ksz88xx_mib_names[] = {
{ 0x00, "rx" },
{ 0x01, "rx_hi" },
@@ -155,6 +193,7 @@ static const struct ksz_dev_ops ksz8_dev_ops = {
.w_phy = ksz8_w_phy,
.r_mib_cnt = ksz8_r_mib_cnt,
.r_mib_pkt = ksz8_r_mib_pkt,
+ .r_mib_stat64 = ksz88xx_r_mib_stats64,
.freeze_mib = ksz8_freeze_mib,
.port_init_cnt = ksz8_port_init_cnt,
.fdb_dump = ksz8_fdb_dump,
@@ -171,6 +210,7 @@ static const struct ksz_dev_ops ksz8_dev_ops = {
.reset = ksz8_reset_switch,
.init = ksz8_switch_init,
.exit = ksz8_switch_exit,
+ .change_mtu = ksz8_change_mtu,
};
static void ksz9477_phylink_mac_link_up(struct ksz_device *dev, int port,
@@ -206,7 +246,6 @@ static const struct ksz_dev_ops ksz9477_dev_ops = {
.mdb_add = ksz9477_mdb_add,
.mdb_del = ksz9477_mdb_del,
.change_mtu = ksz9477_change_mtu,
- .max_mtu = ksz9477_max_mtu,
.phylink_mac_link_up = ksz9477_phylink_mac_link_up,
.config_cpu_port = ksz9477_config_cpu_port,
.enable_stp_addr = ksz9477_enable_stp_addr,
@@ -243,7 +282,6 @@ static const struct ksz_dev_ops lan937x_dev_ops = {
.mdb_add = ksz9477_mdb_add,
.mdb_del = ksz9477_mdb_del,
.change_mtu = lan937x_change_mtu,
- .max_mtu = ksz9477_max_mtu,
.phylink_mac_link_up = ksz9477_phylink_mac_link_up,
.config_cpu_port = lan937x_config_cpu_port,
.enable_stp_addr = ksz9477_enable_stp_addr,
@@ -1583,6 +1621,55 @@ void ksz_r_mib_stats64(struct ksz_device *dev, int port)
spin_unlock(&mib->stats64_lock);
}
+void ksz88xx_r_mib_stats64(struct ksz_device *dev, int port)
+{
+ struct ethtool_pause_stats *pstats;
+ struct rtnl_link_stats64 *stats;
+ struct ksz88xx_stats_raw *raw;
+ struct ksz_port_mib *mib;
+
+ mib = &dev->ports[port].mib;
+ stats = &mib->stats64;
+ pstats = &mib->pause_stats;
+ raw = (struct ksz88xx_stats_raw *)mib->counters;
+
+ spin_lock(&mib->stats64_lock);
+
+ stats->rx_packets = raw->rx_bcast + raw->rx_mcast + raw->rx_ucast +
+ raw->rx_pause;
+ stats->tx_packets = raw->tx_bcast + raw->tx_mcast + raw->tx_ucast +
+ raw->tx_pause;
+
+ /* HW counters are counting bytes + FCS which is not acceptable
+ * for rtnl_link_stats64 interface
+ */
+ stats->rx_bytes = raw->rx + raw->rx_hi - stats->rx_packets * ETH_FCS_LEN;
+ stats->tx_bytes = raw->tx + raw->tx_hi - stats->tx_packets * ETH_FCS_LEN;
+
+ stats->rx_length_errors = raw->rx_undersize + raw->rx_fragments +
+ raw->rx_oversize;
+
+ stats->rx_crc_errors = raw->rx_crc_err;
+ stats->rx_frame_errors = raw->rx_align_err;
+ stats->rx_dropped = raw->rx_discards;
+ stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors +
+ stats->rx_frame_errors + stats->rx_dropped;
+
+ stats->tx_window_errors = raw->tx_late_col;
+ stats->tx_fifo_errors = raw->tx_discards;
+ stats->tx_aborted_errors = raw->tx_exc_col;
+ stats->tx_errors = stats->tx_window_errors + stats->tx_fifo_errors +
+ stats->tx_aborted_errors;
+
+ stats->multicast = raw->rx_mcast;
+ stats->collisions = raw->tx_total_col;
+
+ pstats->tx_pause_frames = raw->tx_pause;
+ pstats->rx_pause_frames = raw->rx_pause;
+
+ spin_unlock(&mib->stats64_lock);
+}
+
static void ksz_get_stats64(struct dsa_switch *ds, int port,
struct rtnl_link_stats64 *s)
{
@@ -2500,10 +2587,29 @@ static int ksz_max_mtu(struct dsa_switch *ds, int port)
{
struct ksz_device *dev = ds->priv;
- if (!dev->dev_ops->max_mtu)
- return -EOPNOTSUPP;
+ switch (dev->chip_id) {
+ case KSZ8795_CHIP_ID:
+ case KSZ8794_CHIP_ID:
+ case KSZ8765_CHIP_ID:
+ return KSZ8795_HUGE_PACKET_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN;
+ case KSZ8830_CHIP_ID:
+ return KSZ8863_HUGE_PACKET_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN;
+ case KSZ8563_CHIP_ID:
+ case KSZ9477_CHIP_ID:
+ case KSZ9563_CHIP_ID:
+ case KSZ9567_CHIP_ID:
+ case KSZ9893_CHIP_ID:
+ case KSZ9896_CHIP_ID:
+ case KSZ9897_CHIP_ID:
+ case LAN9370_CHIP_ID:
+ case LAN9371_CHIP_ID:
+ case LAN9372_CHIP_ID:
+ case LAN9373_CHIP_ID:
+ case LAN9374_CHIP_ID:
+ return KSZ9477_MAX_FRAME_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN;
+ }
- return dev->dev_ops->max_mtu(dev, port);
+ return -EOPNOTSUPP;
}
static void ksz_set_xmii(struct ksz_device *dev, int port,
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index c6726cbd5465..055d61ff3fb8 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -95,7 +95,6 @@ struct ksz_port {
struct ksz_port_mib mib;
phy_interface_t interface;
- u16 max_frame;
u32 rgmii_tx_val;
u32 rgmii_rx_val;
struct ksz_device *ksz_dev;
@@ -322,7 +321,6 @@ struct ksz_dev_ops {
void (*get_caps)(struct ksz_device *dev, int port,
struct phylink_config *config);
int (*change_mtu)(struct ksz_device *dev, int port, int mtu);
- int (*max_mtu)(struct ksz_device *dev, int port);
void (*freeze_mib)(struct ksz_device *dev, int port, bool freeze);
void (*port_init_cnt)(struct ksz_device *dev, int port);
void (*phylink_mac_config)(struct ksz_device *dev, int port,
@@ -347,6 +345,7 @@ void ksz_switch_remove(struct ksz_device *dev);
void ksz_init_mib_timer(struct ksz_device *dev);
void ksz_r_mib_stats64(struct ksz_device *dev, int port);
+void ksz88xx_r_mib_stats64(struct ksz_device *dev, int port);
void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state);
bool ksz_get_gbit(struct ksz_device *dev, int port);
phy_interface_t ksz_get_xmii(struct ksz_device *dev, int port, bool gbit);
@@ -456,6 +455,11 @@ static inline int ksz_write64(struct ksz_device *dev, u32 reg, u64 value)
return regmap_bulk_write(dev->regmap[2], reg, val, 2);
}
+static inline int ksz_rmw8(struct ksz_device *dev, int offset, u8 mask, u8 val)
+{
+ return regmap_update_bits(dev->regmap[0], offset, mask, val);
+}
+
static inline int ksz_pread8(struct ksz_device *dev, int port, int offset,
u8 *data)
{
@@ -588,6 +592,12 @@ static inline int is_lan937x(struct ksz_device *dev)
#define PORT_SRC_PHY_INT 1
+#define KSZ8795_HUGE_PACKET_SIZE 2000
+#define KSZ8863_HUGE_PACKET_SIZE 1916
+#define KSZ8863_NORMAL_PACKET_SIZE 1536
+#define KSZ8_LEGAL_PACKET_SIZE 1518
+#define KSZ9477_MAX_FRAME_SIZE 9000
+
/* Regmap tables generation */
#define KSZ_SPI_OP_RD 3
#define KSZ_SPI_OP_WR 2
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index ccfa4751d3b7..ba4fff8690aa 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -833,10 +833,13 @@ static void mv88e6xxx_get_caps(struct dsa_switch *ds, int port,
chip->info->ops->phylink_get_caps(chip, port, config);
- /* Internal ports need GMII for PHYLIB */
- if (mv88e6xxx_phy_is_internal(ds, port))
+ if (mv88e6xxx_phy_is_internal(ds, port)) {
+ __set_bit(PHY_INTERFACE_MODE_INTERNAL,
+ config->supported_interfaces);
+ /* Internal ports with no phy-mode need GMII for PHYLIB */
__set_bit(PHY_INTERFACE_MODE_GMII,
config->supported_interfaces);
+ }
}
static void mv88e6xxx_mac_config(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c
index da532614f34a..30b1f1ba762f 100644
--- a/drivers/net/dsa/sja1105/sja1105_devlink.c
+++ b/drivers/net/dsa/sja1105/sja1105_devlink.c
@@ -95,6 +95,8 @@ static int sja1105_setup_devlink_regions(struct dsa_switch *ds)
if (IS_ERR(region)) {
while (--i >= 0)
dsa_devlink_region_destroy(priv->regions[i]);
+
+ kfree(priv->regions);
return PTR_ERR(region);
}
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 412666111b0c..b70dcf32a26d 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1038,7 +1038,7 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv)
policing[bcast].sharindx = port;
/* Only SJA1110 has multicast policers */
- if (mcast <= table->ops->max_entry_count)
+ if (mcast < table->ops->max_entry_count)
policing[mcast].sharindx = port;
}
diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c
index e104fb02817d..aa0d2f3aaeaa 100644
--- a/drivers/net/ethernet/aeroflex/greth.c
+++ b/drivers/net/ethernet/aeroflex/greth.c
@@ -258,6 +258,7 @@ static int greth_init_rings(struct greth_private *greth)
if (dma_mapping_error(greth->dev, dma_addr)) {
if (netif_msg_ifup(greth))
dev_err(greth->dev, "Could not create initial DMA mapping\n");
+ dev_kfree_skb(skb);
goto cleanup;
}
greth->rx_skbuff[i] = skb;
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index 55dfdb34e37b..f4ca0c6c0f51 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -71,13 +71,14 @@ config BCM63XX_ENET
config BCMGENET
tristate "Broadcom GENET internal MAC support"
depends on HAS_IOMEM
+ depends on PTP_1588_CLOCK_OPTIONAL || !ARCH_BCM2835
select MII
select PHYLIB
select FIXED_PHY
select BCM7XXX_PHY
select MDIO_BCM_UNIMAC
select DIMLIB
- select BROADCOM_PHY if (ARCH_BCM2835 && PTP_1588_CLOCK_OPTIONAL)
+ select BROADCOM_PHY if ARCH_BCM2835
help
This driver supports the built-in Ethernet MACs found in the
Broadcom BCM7xxx Set Top Box family chipset.
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index dbe310144780..9f473854b0f4 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -3045,7 +3045,7 @@ error:
dma_unmap_single(&bp->pdev->dev, dma_addr, bp->rx_buf_use_size,
DMA_FROM_DEVICE);
- skb = build_skb(data, 0);
+ skb = slab_build_skb(data);
if (!skb) {
kfree(data);
goto error;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index a8ce8d0cf9c4..21973046b12b 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -117,24 +117,6 @@ static inline void dmadesc_set(struct bcmgenet_priv *priv,
dmadesc_set_length_status(priv, d, val);
}
-static inline dma_addr_t dmadesc_get_addr(struct bcmgenet_priv *priv,
- void __iomem *d)
-{
- dma_addr_t addr;
-
- addr = bcmgenet_readl(d + DMA_DESC_ADDRESS_LO);
-
- /* Register writes to GISB bus can take couple hundred nanoseconds
- * and are done for each packet, save these expensive writes unless
- * the platform is explicitly configured for 64-bits/LPAE.
- */
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
- if (priv->hw_params->flags & GENET_HAS_40BITS)
- addr |= (u64)bcmgenet_readl(d + DMA_DESC_ADDRESS_HI) << 32;
-#endif
- return addr;
-}
-
#define GENET_VER_FMT "%1d.%1d EPHY: 0x%04x"
#define GENET_MSG_DEFAULT (NETIF_MSG_DRV | NETIF_MSG_PROBE | \
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 98f3dc460ca7..f2f95493ec89 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -2239,7 +2239,7 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
err = register_netdev(netdev);
if (err) {
dev_err(dev, "Failed to register netdevice\n");
- goto err_unregister_interrupts;
+ goto err_destroy_workqueue;
}
nic->msg_enable = debug;
@@ -2248,6 +2248,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
return 0;
+err_destroy_workqueue:
+ destroy_workqueue(nic->nicvf_rx_mode_wq);
err_unregister_interrupts:
nicvf_unregister_interrupts(nic);
err_free_netdev:
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
index cacd454ac696..c39b866e2582 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
@@ -132,6 +132,7 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block,
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) {
dev_err(dev, "DMA mapping failed\n");
+ kfree(cmd_buff);
return -EFAULT;
}
@@ -142,6 +143,7 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block,
DMA_TO_DEVICE);
if (err) {
dev_err(dev, "dpsw_acl_add_entry() failed %d\n", err);
+ kfree(cmd_buff);
return err;
}
@@ -172,6 +174,7 @@ dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block,
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) {
dev_err(dev, "DMA mapping failed\n");
+ kfree(cmd_buff);
return -EFAULT;
}
@@ -182,6 +185,7 @@ dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block,
DMA_TO_DEVICE);
if (err) {
dev_err(dev, "dpsw_acl_remove_entry() failed %d\n", err);
+ kfree(cmd_buff);
return err;
}
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 9471baa11d39..5528b0af82ae 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1216,7 +1216,8 @@ fec_restart(struct net_device *ndev)
writel(0, fep->hwp + FEC_IMASK);
/* Init the interrupt coalescing */
- fec_enet_itr_coal_set(ndev);
+ if (fep->quirks & FEC_QUIRK_HAS_COALESCE)
+ fec_enet_itr_coal_set(ndev);
}
static int fec_enet_ipc_handle_init(struct fec_enet_private *fep)
diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c
index 93846bace028..ce2571c16e43 100644
--- a/drivers/net/ethernet/hisilicon/hisi_femac.c
+++ b/drivers/net/ethernet/hisilicon/hisi_femac.c
@@ -283,7 +283,7 @@ static int hisi_femac_rx(struct net_device *dev, int limit)
skb->protocol = eth_type_trans(skb, dev);
napi_gro_receive(&priv->napi, skb);
dev->stats.rx_packets++;
- dev->stats.rx_bytes += skb->len;
+ dev->stats.rx_bytes += len;
next:
pos = (pos + 1) % rxq->num;
if (rx_pkts_num >= limit)
diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
index ffcf797dfa90..f867e9531117 100644
--- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
@@ -550,7 +550,7 @@ static int hix5hd2_rx(struct net_device *dev, int limit)
skb->protocol = eth_type_trans(skb, dev);
napi_gro_receive(&priv->napi, skb);
dev->stats.rx_packets++;
- dev->stats.rx_bytes += skb->len;
+ dev->stats.rx_bytes += len;
next:
pos = dma_ring_incr(pos, RX_DESC_NUM);
}
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 36bc4fd91ef4..04acd1a992fa 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -5931,9 +5931,9 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
e1000_tx_queue(tx_ring, tx_flags, count);
/* Make sure there is space in the ring for the next send. */
e1000_maybe_stop_tx(tx_ring,
- (MAX_SKB_FRAGS *
+ ((MAX_SKB_FRAGS + 1) *
DIV_ROUND_UP(PAGE_SIZE,
- adapter->tx_fifo_limit) + 2));
+ adapter->tx_fifo_limit) + 4));
if (!netdev_xmit_more() ||
netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 616d27ec3226..887a735fe2a7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -4466,11 +4466,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi,
return -EOPNOTSUPP;
/* First 4 bytes of L4 header */
- if (usr_ip4_spec->l4_4_bytes == htonl(0xFFFFFFFF))
- new_mask |= I40E_L4_SRC_MASK | I40E_L4_DST_MASK;
- else if (!usr_ip4_spec->l4_4_bytes)
- new_mask &= ~(I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
- else
+ if (usr_ip4_spec->l4_4_bytes)
return -EOPNOTSUPP;
/* Filtering on Type of Service is not supported. */
@@ -4509,11 +4505,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi,
else
return -EOPNOTSUPP;
- if (usr_ip6_spec->l4_4_bytes == htonl(0xFFFFFFFF))
- new_mask |= I40E_L4_SRC_MASK | I40E_L4_DST_MASK;
- else if (!usr_ip6_spec->l4_4_bytes)
- new_mask &= ~(I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
- else
+ if (usr_ip6_spec->l4_4_bytes)
return -EOPNOTSUPP;
/* Filtering on Traffic class is not supported. */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 6861b3e2ced3..95485b56d6c3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -10656,6 +10656,21 @@ static int i40e_rebuild_channels(struct i40e_vsi *vsi)
}
/**
+ * i40e_clean_xps_state - clean xps state for every tx_ring
+ * @vsi: ptr to the VSI
+ **/
+static void i40e_clean_xps_state(struct i40e_vsi *vsi)
+{
+ int i;
+
+ if (vsi->tx_rings)
+ for (i = 0; i < vsi->num_queue_pairs; i++)
+ if (vsi->tx_rings[i])
+ clear_bit(__I40E_TX_XPS_INIT_DONE,
+ vsi->tx_rings[i]->state);
+}
+
+/**
* i40e_prep_for_reset - prep for the core to reset
* @pf: board private structure
*
@@ -10679,8 +10694,10 @@ static void i40e_prep_for_reset(struct i40e_pf *pf)
i40e_pf_quiesce_all_vsi(pf);
for (v = 0; v < pf->num_alloc_vsi; v++) {
- if (pf->vsi[v])
+ if (pf->vsi[v]) {
+ i40e_clean_xps_state(pf->vsi[v]);
pf->vsi[v]->seid = 0;
+ }
}
i40e_shutdown_adminq(&pf->hw);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 72ddcefc45b1..635f93d60318 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1578,6 +1578,7 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr)
i40e_cleanup_reset_vf(vf);
i40e_flush(hw);
+ usleep_range(20000, 40000);
clear_bit(I40E_VF_STATE_RESETTING, &vf->vf_states);
return true;
@@ -1701,6 +1702,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
}
i40e_flush(hw);
+ usleep_range(20000, 40000);
clear_bit(__I40E_VF_DISABLE, pf->state);
return true;
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 2b23b4714a26..a9a7f8b52140 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1111,8 +1111,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
if (link_up == old_link && link_speed == old_link_speed)
return 0;
- if (!ice_is_e810(&pf->hw))
- ice_ptp_link_change(pf, pf->hw.pf_id, link_up);
+ ice_ptp_link_change(pf, pf->hw.pf_id, link_up);
if (ice_is_dcb_active(pf)) {
if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
@@ -6340,8 +6339,7 @@ static int ice_up_complete(struct ice_vsi *vsi)
ice_print_link_msg(vsi, true);
netif_tx_start_all_queues(vsi->netdev);
netif_carrier_on(vsi->netdev);
- if (!ice_is_e810(&pf->hw))
- ice_ptp_link_change(pf, pf->hw.pf_id, true);
+ ice_ptp_link_change(pf, pf->hw.pf_id, true);
}
/* Perform an initial read of the statistics registers now to
@@ -6773,8 +6771,7 @@ int ice_down(struct ice_vsi *vsi)
if (vsi->netdev && vsi->type == ICE_VSI_PF) {
vlan_err = ice_vsi_del_vlan_zero(vsi);
- if (!ice_is_e810(&vsi->back->hw))
- ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false);
+ ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false);
netif_carrier_off(vsi->netdev);
netif_tx_disable(vsi->netdev);
} else if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) {
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index 13e75279e71c..d63161d73eb1 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -600,6 +600,23 @@ static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp)
}
/**
+ * ice_ptp_is_tx_tracker_up - Check if Tx tracker is ready for new timestamps
+ * @tx: the PTP Tx timestamp tracker to check
+ *
+ * Check that a given PTP Tx timestamp tracker is up, i.e. that it is ready
+ * to accept new timestamp requests.
+ *
+ * Assumes the tx->lock spinlock is already held.
+ */
+static bool
+ice_ptp_is_tx_tracker_up(struct ice_ptp_tx *tx)
+{
+ lockdep_assert_held(&tx->lock);
+
+ return tx->init && !tx->calibrating;
+}
+
+/**
* ice_ptp_tx_tstamp - Process Tx timestamps for a port
* @tx: the PTP Tx timestamp tracker
*
@@ -608,11 +625,13 @@ static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp)
*
* If a given index has a valid timestamp, perform the following steps:
*
- * 1) copy the timestamp out of the PHY register
- * 4) clear the timestamp valid bit in the PHY register
- * 5) unlock the index by clearing the associated in_use bit.
- * 2) extend the 40b timestamp value to get a 64bit timestamp
- * 3) send that timestamp to the stack
+ * 1) check that the timestamp request is not stale
+ * 2) check that a timestamp is ready and available in the PHY memory bank
+ * 3) read and copy the timestamp out of the PHY register
+ * 4) unlock the index by clearing the associated in_use bit
+ * 5) check if the timestamp is stale, and discard if so
+ * 6) extend the 40 bit timestamp value to get a 64 bit timestamp value
+ * 7) send this 64 bit timestamp to the stack
*
* Returns true if all timestamps were handled, and false if any slots remain
* without a timestamp.
@@ -623,24 +642,45 @@ static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp)
* interrupt. In some cases hardware might not interrupt us again when the
* timestamp is captured.
*
- * Note that we only take the tracking lock when clearing the bit and when
- * checking if we need to re-queue this task. The only place where bits can be
- * set is the hard xmit routine where an SKB has a request flag set. The only
- * places where we clear bits are this work function, or the periodic cleanup
- * thread. If the cleanup thread clears a bit we're processing we catch it
- * when we lock to clear the bit and then grab the SKB pointer. If a Tx thread
- * starts a new timestamp, we might not begin processing it right away but we
- * will notice it at the end when we re-queue the task. If a Tx thread starts
- * a new timestamp just after this function exits without re-queuing,
- * the interrupt when the timestamp finishes should trigger. Avoiding holding
- * the lock for the entire function is important in order to ensure that Tx
- * threads do not get blocked while waiting for the lock.
+ * Note that we do not hold the tracking lock while reading the Tx timestamp.
+ * This is because reading the timestamp requires taking a mutex that might
+ * sleep.
+ *
+ * The only place where we set in_use is when a new timestamp is initiated
+ * with a slot index. This is only called in the hard xmit routine where an
+ * SKB has a request flag set. The only places where we clear this bit is this
+ * function, or during teardown when the Tx timestamp tracker is being
+ * removed. A timestamp index will never be re-used until the in_use bit for
+ * that index is cleared.
+ *
+ * If a Tx thread starts a new timestamp, we might not begin processing it
+ * right away but we will notice it at the end when we re-queue the task.
+ *
+ * If a Tx thread starts a new timestamp just after this function exits, the
+ * interrupt for that timestamp should re-trigger this function once
+ * a timestamp is ready.
+ *
+ * In cases where the PTP hardware clock was directly adjusted, some
+ * timestamps may not be able to safely use the timestamp extension math. In
+ * this case, software will set the stale bit for any outstanding Tx
+ * timestamps when the clock is adjusted. Then this function will discard
+ * those captured timestamps instead of sending them to the stack.
+ *
+ * If a Tx packet has been waiting for more than 2 seconds, it is not possible
+ * to correctly extend the timestamp using the cached PHC time. It is
+ * extremely unlikely that a packet will ever take this long to timestamp. If
+ * we detect a Tx timestamp request that has waited for this long we assume
+ * the packet will never be sent by hardware and discard it without reading
+ * the timestamp register.
*/
static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx)
{
struct ice_ptp_port *ptp_port;
- bool ts_handled = true;
+ bool more_timestamps;
struct ice_pf *pf;
+ struct ice_hw *hw;
+ u64 tstamp_ready;
+ int err;
u8 idx;
if (!tx->init)
@@ -648,44 +688,86 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx)
ptp_port = container_of(tx, struct ice_ptp_port, tx);
pf = ptp_port_to_pf(ptp_port);
+ hw = &pf->hw;
+
+ /* Read the Tx ready status first */
+ err = ice_get_phy_tx_tstamp_ready(hw, tx->block, &tstamp_ready);
+ if (err)
+ return false;
for_each_set_bit(idx, tx->in_use, tx->len) {
struct skb_shared_hwtstamps shhwtstamps = {};
- u8 phy_idx = idx + tx->quad_offset;
- u64 raw_tstamp, tstamp;
+ u8 phy_idx = idx + tx->offset;
+ u64 raw_tstamp = 0, tstamp;
+ bool drop_ts = false;
struct sk_buff *skb;
- int err;
+
+ /* Drop packets which have waited for more than 2 seconds */
+ if (time_is_before_jiffies(tx->tstamps[idx].start + 2 * HZ)) {
+ drop_ts = true;
+
+ /* Count the number of Tx timestamps that timed out */
+ pf->ptp.tx_hwtstamp_timeouts++;
+ }
+
+ /* Only read a timestamp from the PHY if its marked as ready
+ * by the tstamp_ready register. This avoids unnecessary
+ * reading of timestamps which are not yet valid. This is
+ * important as we must read all timestamps which are valid
+ * and only timestamps which are valid during each interrupt.
+ * If we do not, the hardware logic for generating a new
+ * interrupt can get stuck on some devices.
+ */
+ if (!(tstamp_ready & BIT_ULL(phy_idx))) {
+ if (drop_ts)
+ goto skip_ts_read;
+
+ continue;
+ }
ice_trace(tx_tstamp_fw_req, tx->tstamps[idx].skb, idx);
- err = ice_read_phy_tstamp(&pf->hw, tx->quad, phy_idx,
- &raw_tstamp);
+ err = ice_read_phy_tstamp(hw, tx->block, phy_idx, &raw_tstamp);
if (err)
continue;
ice_trace(tx_tstamp_fw_done, tx->tstamps[idx].skb, idx);
- /* Check if the timestamp is invalid or stale */
- if (!(raw_tstamp & ICE_PTP_TS_VALID) ||
+ /* For PHYs which don't implement a proper timestamp ready
+ * bitmap, verify that the timestamp value is different
+ * from the last cached timestamp. If it is not, skip this for
+ * now assuming it hasn't yet been captured by hardware.
+ */
+ if (!drop_ts && tx->verify_cached &&
raw_tstamp == tx->tstamps[idx].cached_tstamp)
continue;
- /* The timestamp is valid, so we'll go ahead and clear this
- * index and then send the timestamp up to the stack.
- */
+ /* Discard any timestamp value without the valid bit set */
+ if (!(raw_tstamp & ICE_PTP_TS_VALID))
+ drop_ts = true;
+
+skip_ts_read:
spin_lock(&tx->lock);
- tx->tstamps[idx].cached_tstamp = raw_tstamp;
+ if (tx->verify_cached && raw_tstamp)
+ tx->tstamps[idx].cached_tstamp = raw_tstamp;
clear_bit(idx, tx->in_use);
skb = tx->tstamps[idx].skb;
tx->tstamps[idx].skb = NULL;
+ if (test_and_clear_bit(idx, tx->stale))
+ drop_ts = true;
spin_unlock(&tx->lock);
- /* it's (unlikely but) possible we raced with the cleanup
- * thread for discarding old timestamp requests.
+ /* It is unlikely but possible that the SKB will have been
+ * flushed at this point due to link change or teardown.
*/
if (!skb)
continue;
+ if (drop_ts) {
+ dev_kfree_skb_any(skb);
+ continue;
+ }
+
/* Extend the timestamp using cached PHC time */
tstamp = ice_ptp_extend_40b_ts(pf, raw_tstamp);
if (tstamp) {
@@ -701,11 +783,10 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx)
* poll for remaining timestamps.
*/
spin_lock(&tx->lock);
- if (!bitmap_empty(tx->in_use, tx->len))
- ts_handled = false;
+ more_timestamps = tx->init && !bitmap_empty(tx->in_use, tx->len);
spin_unlock(&tx->lock);
- return ts_handled;
+ return !more_timestamps;
}
/**
@@ -713,26 +794,33 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx)
* @tx: Tx tracking structure to initialize
*
* Assumes that the length has already been initialized. Do not call directly,
- * use the ice_ptp_init_tx_e822 or ice_ptp_init_tx_e810 instead.
+ * use the ice_ptp_init_tx_* instead.
*/
static int
ice_ptp_alloc_tx_tracker(struct ice_ptp_tx *tx)
{
- tx->tstamps = kcalloc(tx->len, sizeof(*tx->tstamps), GFP_KERNEL);
- if (!tx->tstamps)
- return -ENOMEM;
+ unsigned long *in_use, *stale;
+ struct ice_tx_tstamp *tstamps;
+
+ tstamps = kcalloc(tx->len, sizeof(*tstamps), GFP_KERNEL);
+ in_use = bitmap_zalloc(tx->len, GFP_KERNEL);
+ stale = bitmap_zalloc(tx->len, GFP_KERNEL);
+
+ if (!tstamps || !in_use || !stale) {
+ kfree(tstamps);
+ bitmap_free(in_use);
+ bitmap_free(stale);
- tx->in_use = bitmap_zalloc(tx->len, GFP_KERNEL);
- if (!tx->in_use) {
- kfree(tx->tstamps);
- tx->tstamps = NULL;
return -ENOMEM;
}
- spin_lock_init(&tx->lock);
-
+ tx->tstamps = tstamps;
+ tx->in_use = in_use;
+ tx->stale = stale;
tx->init = 1;
+ spin_lock_init(&tx->lock);
+
return 0;
}
@@ -740,31 +828,71 @@ ice_ptp_alloc_tx_tracker(struct ice_ptp_tx *tx)
* ice_ptp_flush_tx_tracker - Flush any remaining timestamps from the tracker
* @pf: Board private structure
* @tx: the tracker to flush
+ *
+ * Called during teardown when a Tx tracker is being removed.
*/
static void
ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx)
{
+ struct ice_hw *hw = &pf->hw;
+ u64 tstamp_ready;
+ int err;
u8 idx;
- for (idx = 0; idx < tx->len; idx++) {
- u8 phy_idx = idx + tx->quad_offset;
+ err = ice_get_phy_tx_tstamp_ready(hw, tx->block, &tstamp_ready);
+ if (err) {
+ dev_dbg(ice_pf_to_dev(pf), "Failed to get the Tx tstamp ready bitmap for block %u, err %d\n",
+ tx->block, err);
+
+ /* If we fail to read the Tx timestamp ready bitmap just
+ * skip clearing the PHY timestamps.
+ */
+ tstamp_ready = 0;
+ }
+
+ for_each_set_bit(idx, tx->in_use, tx->len) {
+ u8 phy_idx = idx + tx->offset;
+ struct sk_buff *skb;
+
+ /* In case this timestamp is ready, we need to clear it. */
+ if (!hw->reset_ongoing && (tstamp_ready & BIT_ULL(phy_idx)))
+ ice_clear_phy_tstamp(hw, tx->block, phy_idx);
spin_lock(&tx->lock);
- if (tx->tstamps[idx].skb) {
- dev_kfree_skb_any(tx->tstamps[idx].skb);
- tx->tstamps[idx].skb = NULL;
- pf->ptp.tx_hwtstamp_flushed++;
- }
+ skb = tx->tstamps[idx].skb;
+ tx->tstamps[idx].skb = NULL;
clear_bit(idx, tx->in_use);
+ clear_bit(idx, tx->stale);
spin_unlock(&tx->lock);
- /* Clear any potential residual timestamp in the PHY block */
- if (!pf->hw.reset_ongoing)
- ice_clear_phy_tstamp(&pf->hw, tx->quad, phy_idx);
+ /* Count the number of Tx timestamps flushed */
+ pf->ptp.tx_hwtstamp_flushed++;
+
+ /* Free the SKB after we've cleared the bit */
+ dev_kfree_skb_any(skb);
}
}
/**
+ * ice_ptp_mark_tx_tracker_stale - Mark unfinished timestamps as stale
+ * @tx: the tracker to mark
+ *
+ * Mark currently outstanding Tx timestamps as stale. This prevents sending
+ * their timestamp value to the stack. This is required to prevent extending
+ * the 40bit hardware timestamp incorrectly.
+ *
+ * This should be called when the PTP clock is modified such as after a set
+ * time request.
+ */
+static void
+ice_ptp_mark_tx_tracker_stale(struct ice_ptp_tx *tx)
+{
+ spin_lock(&tx->lock);
+ bitmap_or(tx->stale, tx->stale, tx->in_use, tx->len);
+ spin_unlock(&tx->lock);
+}
+
+/**
* ice_ptp_release_tx_tracker - Release allocated memory for Tx tracker
* @pf: Board private structure
* @tx: Tx tracking structure to release
@@ -774,7 +902,12 @@ ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx)
static void
ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx)
{
+ spin_lock(&tx->lock);
tx->init = 0;
+ spin_unlock(&tx->lock);
+
+ /* wait for potentially outstanding interrupt to complete */
+ synchronize_irq(pf->msix_entries[pf->oicr_idx].vector);
ice_ptp_flush_tx_tracker(pf, tx);
@@ -784,6 +917,9 @@ ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx)
bitmap_free(tx->in_use);
tx->in_use = NULL;
+ bitmap_free(tx->stale);
+ tx->stale = NULL;
+
tx->len = 0;
}
@@ -801,9 +937,10 @@ ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx)
static int
ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port)
{
- tx->quad = port / ICE_PORTS_PER_QUAD;
- tx->quad_offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT;
- tx->len = INDEX_PER_PORT;
+ tx->block = port / ICE_PORTS_PER_QUAD;
+ tx->offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT_E822;
+ tx->len = INDEX_PER_PORT_E822;
+ tx->verify_cached = 0;
return ice_ptp_alloc_tx_tracker(tx);
}
@@ -819,59 +956,19 @@ ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port)
static int
ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx)
{
- tx->quad = pf->hw.port_info->lport;
- tx->quad_offset = 0;
- tx->len = INDEX_PER_QUAD;
+ tx->block = pf->hw.port_info->lport;
+ tx->offset = 0;
+ tx->len = INDEX_PER_PORT_E810;
+ /* The E810 PHY does not provide a timestamp ready bitmap. Instead,
+ * verify new timestamps against cached copy of the last read
+ * timestamp.
+ */
+ tx->verify_cached = 1;
return ice_ptp_alloc_tx_tracker(tx);
}
/**
- * ice_ptp_tx_tstamp_cleanup - Cleanup old timestamp requests that got dropped
- * @pf: pointer to the PF struct
- * @tx: PTP Tx tracker to clean up
- *
- * Loop through the Tx timestamp requests and see if any of them have been
- * waiting for a long time. Discard any SKBs that have been waiting for more
- * than 2 seconds. This is long enough to be reasonably sure that the
- * timestamp will never be captured. This might happen if the packet gets
- * discarded before it reaches the PHY timestamping block.
- */
-static void ice_ptp_tx_tstamp_cleanup(struct ice_pf *pf, struct ice_ptp_tx *tx)
-{
- struct ice_hw *hw = &pf->hw;
- u8 idx;
-
- if (!tx->init)
- return;
-
- for_each_set_bit(idx, tx->in_use, tx->len) {
- struct sk_buff *skb;
- u64 raw_tstamp;
-
- /* Check if this SKB has been waiting for too long */
- if (time_is_after_jiffies(tx->tstamps[idx].start + 2 * HZ))
- continue;
-
- /* Read tstamp to be able to use this register again */
- ice_read_phy_tstamp(hw, tx->quad, idx + tx->quad_offset,
- &raw_tstamp);
-
- spin_lock(&tx->lock);
- skb = tx->tstamps[idx].skb;
- tx->tstamps[idx].skb = NULL;
- clear_bit(idx, tx->in_use);
- spin_unlock(&tx->lock);
-
- /* Count the number of Tx timestamps which have timed out */
- pf->ptp.tx_hwtstamp_timeouts++;
-
- /* Free the SKB after we've cleared the bit */
- dev_kfree_skb_any(skb);
- }
-}
-
-/**
* ice_ptp_update_cached_phctime - Update the cached PHC time values
* @pf: Board specific private structure
*
@@ -941,20 +1038,13 @@ static int ice_ptp_update_cached_phctime(struct ice_pf *pf)
* @pf: Board specific private structure
*
* This function must be called when the cached PHC time is no longer valid,
- * such as after a time adjustment. It discards any outstanding Tx timestamps,
- * and updates the cached PHC time for both the PF and Rx rings. If updating
- * the PHC time cannot be done immediately, a warning message is logged and
- * the work item is scheduled.
- *
- * These steps are required in order to ensure that we do not accidentally
- * report a timestamp extended by the wrong PHC cached copy. Note that we
- * do not directly update the cached timestamp here because it is possible
- * this might produce an error when ICE_CFG_BUSY is set. If this occurred, we
- * would have to try again. During that time window, timestamps might be
- * requested and returned with an invalid extension. Thus, on failure to
- * immediately update the cached PHC time we would need to zero the value
- * anyways. For this reason, we just zero the value immediately and queue the
- * update work item.
+ * such as after a time adjustment. It marks any currently outstanding Tx
+ * timestamps as stale and updates the cached PHC time for both the PF and Rx
+ * rings.
+ *
+ * If updating the PHC time cannot be done immediately, a warning message is
+ * logged and the work item is scheduled immediately to minimize the window
+ * with a wrong cached timestamp.
*/
static void ice_ptp_reset_cached_phctime(struct ice_pf *pf)
{
@@ -978,8 +1068,12 @@ static void ice_ptp_reset_cached_phctime(struct ice_pf *pf)
msecs_to_jiffies(10));
}
- /* Flush any outstanding Tx timestamps */
- ice_ptp_flush_tx_tracker(pf, &pf->ptp.port.tx);
+ /* Mark any outstanding timestamps as stale, since they might have
+ * been captured in hardware before the time update. This could lead
+ * to us extending them with the wrong cached value resulting in
+ * incorrect timestamp values.
+ */
+ ice_ptp_mark_tx_tracker_stale(&pf->ptp.port.tx);
}
/**
@@ -1060,19 +1154,6 @@ static u64 ice_base_incval(struct ice_pf *pf)
}
/**
- * ice_ptp_reset_ts_memory_quad - Reset timestamp memory for one quad
- * @pf: The PF private data structure
- * @quad: The quad (0-4)
- */
-static void ice_ptp_reset_ts_memory_quad(struct ice_pf *pf, int quad)
-{
- struct ice_hw *hw = &pf->hw;
-
- ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, Q_REG_TS_CTRL_M);
- ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, ~(u32)Q_REG_TS_CTRL_M);
-}
-
-/**
* ice_ptp_check_tx_fifo - Check whether Tx FIFO is in an OK state
* @port: PTP port for which Tx FIFO is checked
*/
@@ -1124,7 +1205,7 @@ static int ice_ptp_check_tx_fifo(struct ice_ptp_port *port)
dev_dbg(ice_pf_to_dev(pf),
"Port %d Tx FIFO still not empty; resetting quad %d\n",
port->port_num, quad);
- ice_ptp_reset_ts_memory_quad(pf, quad);
+ ice_ptp_reset_ts_memory_quad_e822(hw, quad);
port->tx_fifo_busy_cnt = FIFO_OK;
return 0;
}
@@ -1133,130 +1214,49 @@ static int ice_ptp_check_tx_fifo(struct ice_ptp_port *port)
}
/**
- * ice_ptp_check_tx_offset_valid - Check if the Tx PHY offset is valid
- * @port: the PTP port to check
- *
- * Checks whether the Tx offset for the PHY associated with this port is
- * valid. Returns 0 if the offset is valid, and a non-zero error code if it is
- * not.
- */
-static int ice_ptp_check_tx_offset_valid(struct ice_ptp_port *port)
-{
- struct ice_pf *pf = ptp_port_to_pf(port);
- struct device *dev = ice_pf_to_dev(pf);
- struct ice_hw *hw = &pf->hw;
- u32 val;
- int err;
-
- err = ice_ptp_check_tx_fifo(port);
- if (err)
- return err;
-
- err = ice_read_phy_reg_e822(hw, port->port_num, P_REG_TX_OV_STATUS,
- &val);
- if (err) {
- dev_err(dev, "Failed to read TX_OV_STATUS for port %d, err %d\n",
- port->port_num, err);
- return -EAGAIN;
- }
-
- if (!(val & P_REG_TX_OV_STATUS_OV_M))
- return -EAGAIN;
-
- return 0;
-}
-
-/**
- * ice_ptp_check_rx_offset_valid - Check if the Rx PHY offset is valid
- * @port: the PTP port to check
- *
- * Checks whether the Rx offset for the PHY associated with this port is
- * valid. Returns 0 if the offset is valid, and a non-zero error code if it is
- * not.
- */
-static int ice_ptp_check_rx_offset_valid(struct ice_ptp_port *port)
-{
- struct ice_pf *pf = ptp_port_to_pf(port);
- struct device *dev = ice_pf_to_dev(pf);
- struct ice_hw *hw = &pf->hw;
- int err;
- u32 val;
-
- err = ice_read_phy_reg_e822(hw, port->port_num, P_REG_RX_OV_STATUS,
- &val);
- if (err) {
- dev_err(dev, "Failed to read RX_OV_STATUS for port %d, err %d\n",
- port->port_num, err);
- return err;
- }
-
- if (!(val & P_REG_RX_OV_STATUS_OV_M))
- return -EAGAIN;
-
- return 0;
-}
-
-/**
- * ice_ptp_check_offset_valid - Check port offset valid bit
- * @port: Port for which offset valid bit is checked
- *
- * Returns 0 if both Tx and Rx offset are valid, and -EAGAIN if one of the
- * offset is not ready.
- */
-static int ice_ptp_check_offset_valid(struct ice_ptp_port *port)
-{
- int tx_err, rx_err;
-
- /* always check both Tx and Rx offset validity */
- tx_err = ice_ptp_check_tx_offset_valid(port);
- rx_err = ice_ptp_check_rx_offset_valid(port);
-
- if (tx_err || rx_err)
- return -EAGAIN;
-
- return 0;
-}
-
-/**
- * ice_ptp_wait_for_offset_valid - Check for valid Tx and Rx offsets
+ * ice_ptp_wait_for_offsets - Check for valid Tx and Rx offsets
* @work: Pointer to the kthread_work structure for this task
*
- * Check whether both the Tx and Rx offsets are valid for enabling the vernier
- * calibration.
+ * Check whether hardware has completed measuring the Tx and Rx offset values
+ * used to configure and enable vernier timestamp calibration.
+ *
+ * Once the offset in either direction is measured, configure the associated
+ * registers with the calibrated offset values and enable timestamping. The Tx
+ * and Rx directions are configured independently as soon as their associated
+ * offsets are known.
*
- * Once we have valid offsets from hardware, update the total Tx and Rx
- * offsets, and exit bypass mode. This enables more precise timestamps using
- * the extra data measured during the vernier calibration process.
+ * This function reschedules itself until both Tx and Rx calibration have
+ * completed.
*/
-static void ice_ptp_wait_for_offset_valid(struct kthread_work *work)
+static void ice_ptp_wait_for_offsets(struct kthread_work *work)
{
struct ice_ptp_port *port;
- int err;
- struct device *dev;
struct ice_pf *pf;
struct ice_hw *hw;
+ int tx_err;
+ int rx_err;
port = container_of(work, struct ice_ptp_port, ov_work.work);
pf = ptp_port_to_pf(port);
hw = &pf->hw;
- dev = ice_pf_to_dev(pf);
-
- if (ice_is_reset_in_progress(pf->state))
- return;
- if (ice_ptp_check_offset_valid(port)) {
- /* Offsets not ready yet, try again later */
+ if (ice_is_reset_in_progress(pf->state)) {
+ /* wait for device driver to complete reset */
kthread_queue_delayed_work(pf->ptp.kworker,
&port->ov_work,
msecs_to_jiffies(100));
return;
}
- /* Offsets are valid, so it is safe to exit bypass mode */
- err = ice_phy_exit_bypass_e822(hw, port->port_num);
- if (err) {
- dev_warn(dev, "Failed to exit bypass mode for PHY port %u, err %d\n",
- port->port_num, err);
+ tx_err = ice_ptp_check_tx_fifo(port);
+ if (!tx_err)
+ tx_err = ice_phy_cfg_tx_offset_e822(hw, port->port_num);
+ rx_err = ice_phy_cfg_rx_offset_e822(hw, port->port_num);
+ if (tx_err || rx_err) {
+ /* Tx and/or Rx offset not yet configured, try again later */
+ kthread_queue_delayed_work(pf->ptp.kworker,
+ &port->ov_work,
+ msecs_to_jiffies(100));
return;
}
}
@@ -1317,16 +1317,20 @@ ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port)
kthread_cancel_delayed_work_sync(&ptp_port->ov_work);
/* temporarily disable Tx timestamps while calibrating PHY offset */
+ spin_lock(&ptp_port->tx.lock);
ptp_port->tx.calibrating = true;
+ spin_unlock(&ptp_port->tx.lock);
ptp_port->tx_fifo_busy_cnt = 0;
- /* Start the PHY timer in bypass mode */
- err = ice_start_phy_timer_e822(hw, port, true);
+ /* Start the PHY timer in Vernier mode */
+ err = ice_start_phy_timer_e822(hw, port);
if (err)
goto out_unlock;
/* Enable Tx timestamps right away */
+ spin_lock(&ptp_port->tx.lock);
ptp_port->tx.calibrating = false;
+ spin_unlock(&ptp_port->tx.lock);
kthread_queue_delayed_work(pf->ptp.kworker, &ptp_port->ov_work, 0);
@@ -1341,45 +1345,33 @@ out_unlock:
}
/**
- * ice_ptp_link_change - Set or clear port registers for timestamping
+ * ice_ptp_link_change - Reconfigure PTP after link status change
* @pf: Board private structure
* @port: Port for which the PHY start is set
* @linkup: Link is up or down
*/
-int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup)
+void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup)
{
struct ice_ptp_port *ptp_port;
- if (!test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
- return 0;
+ if (!test_bit(ICE_FLAG_PTP, pf->flags))
+ return;
- if (port >= ICE_NUM_EXTERNAL_PORTS)
- return -EINVAL;
+ if (WARN_ON_ONCE(port >= ICE_NUM_EXTERNAL_PORTS))
+ return;
ptp_port = &pf->ptp.port;
- if (ptp_port->port_num != port)
- return -EINVAL;
+ if (WARN_ON_ONCE(ptp_port->port_num != port))
+ return;
- /* Update cached link err for this port immediately */
+ /* Update cached link status for this port immediately */
ptp_port->link_up = linkup;
- if (!test_bit(ICE_FLAG_PTP, pf->flags))
- /* PTP is not setup */
- return -EAGAIN;
-
- return ice_ptp_port_phy_restart(ptp_port);
-}
-
-/**
- * ice_ptp_reset_ts_memory - Reset timestamp memory for all quads
- * @pf: The PF private data structure
- */
-static void ice_ptp_reset_ts_memory(struct ice_pf *pf)
-{
- int quad;
+ /* E810 devices do not need to reconfigure the PHY */
+ if (ice_is_e810(&pf->hw))
+ return;
- quad = pf->hw.port_info->lport / ICE_PORTS_PER_QUAD;
- ice_ptp_reset_ts_memory_quad(pf, quad);
+ ice_ptp_port_phy_restart(ptp_port);
}
/**
@@ -1397,7 +1389,7 @@ static int ice_ptp_tx_ena_intr(struct ice_pf *pf, bool ena, u32 threshold)
int quad;
u32 val;
- ice_ptp_reset_ts_memory(pf);
+ ice_ptp_reset_ts_memory(hw);
for (quad = 0; quad < ICE_MAX_QUAD; quad++) {
err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEM_GBL_CFG,
@@ -2332,11 +2324,14 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb)
{
u8 idx;
- /* Check if this tracker is initialized */
- if (!tx->init || tx->calibrating)
+ spin_lock(&tx->lock);
+
+ /* Check that this tracker is accepting new timestamp requests */
+ if (!ice_ptp_is_tx_tracker_up(tx)) {
+ spin_unlock(&tx->lock);
return -1;
+ }
- spin_lock(&tx->lock);
/* Find and set the first available index */
idx = find_first_zero_bit(tx->in_use, tx->len);
if (idx < tx->len) {
@@ -2345,6 +2340,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb)
* requests.
*/
set_bit(idx, tx->in_use);
+ clear_bit(idx, tx->stale);
tx->tstamps[idx].start = jiffies;
tx->tstamps[idx].skb = skb_get(skb);
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
@@ -2359,7 +2355,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb)
if (idx >= tx->len)
return -1;
else
- return idx + tx->quad_offset;
+ return idx + tx->offset;
}
/**
@@ -2384,8 +2380,6 @@ static void ice_ptp_periodic_work(struct kthread_work *work)
err = ice_ptp_update_cached_phctime(pf);
- ice_ptp_tx_tstamp_cleanup(pf, &pf->ptp.port.tx);
-
/* Run twice a second or reschedule if phc update failed */
kthread_queue_delayed_work(ptp->kworker, &ptp->work,
msecs_to_jiffies(err ? 10 : 500));
@@ -2462,7 +2456,7 @@ pfr:
err = ice_ptp_init_tx_e810(pf, &ptp->port.tx);
} else {
kthread_init_delayed_work(&ptp->port.ov_work,
- ice_ptp_wait_for_offset_valid);
+ ice_ptp_wait_for_offsets);
err = ice_ptp_init_tx_e822(pf, &ptp->port.tx,
ptp->port.port_num);
}
@@ -2625,7 +2619,7 @@ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port)
return ice_ptp_init_tx_e810(pf, &ptp_port->tx);
kthread_init_delayed_work(&ptp_port->ov_work,
- ice_ptp_wait_for_offset_valid);
+ ice_ptp_wait_for_offsets);
return ice_ptp_init_tx_e822(pf, &ptp_port->tx, ptp_port->port_num);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h
index 028349295b71..9cda2f43e0e5 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
@@ -93,9 +93,14 @@ struct ice_perout_channel {
* we discard old requests that were not fulfilled within a 2 second time
* window.
* Timestamp values in the PHY are read only and do not get cleared except at
- * hardware reset or when a new timestamp value is captured. The cached_tstamp
- * field is used to detect the case where a new timestamp has not yet been
- * captured, ensuring that we avoid sending stale timestamp data to the stack.
+ * hardware reset or when a new timestamp value is captured.
+ *
+ * Some PHY types do not provide a "ready" bitmap indicating which timestamp
+ * indexes are valid. In these cases, we use a cached_tstamp to keep track of
+ * the last timestamp we read for a given index. If the current timestamp
+ * value is the same as the cached value, we assume a new timestamp hasn't
+ * been captured. This avoids reporting stale timestamps to the stack. This is
+ * only done if the verify_cached flag is set in ice_ptp_tx structure.
*/
struct ice_tx_tstamp {
struct sk_buff *skb;
@@ -105,30 +110,35 @@ struct ice_tx_tstamp {
/**
* struct ice_ptp_tx - Tracking structure for all Tx timestamp requests on a port
- * @lock: lock to prevent concurrent write to in_use bitmap
+ * @lock: lock to prevent concurrent access to fields of this struct
* @tstamps: array of len to store outstanding requests
* @in_use: bitmap of len to indicate which slots are in use
- * @quad: which quad the timestamps are captured in
- * @quad_offset: offset into timestamp block of the quad to get the real index
+ * @stale: bitmap of len to indicate slots which have stale timestamps
+ * @block: which memory block (quad or port) the timestamps are captured in
+ * @offset: offset into timestamp block to get the real index
* @len: length of the tstamps and in_use fields.
* @init: if true, the tracker is initialized;
* @calibrating: if true, the PHY is calibrating the Tx offset. During this
* window, timestamps are temporarily disabled.
+ * @verify_cached: if true, verify new timestamp differs from last read value
*/
struct ice_ptp_tx {
spinlock_t lock; /* lock protecting in_use bitmap */
struct ice_tx_tstamp *tstamps;
unsigned long *in_use;
- u8 quad;
- u8 quad_offset;
+ unsigned long *stale;
+ u8 block;
+ u8 offset;
u8 len;
- u8 init;
- u8 calibrating;
+ u8 init : 1;
+ u8 calibrating : 1;
+ u8 verify_cached : 1;
};
/* Quad and port information for initializing timestamp blocks */
#define INDEX_PER_QUAD 64
-#define INDEX_PER_PORT (INDEX_PER_QUAD / ICE_PORTS_PER_QUAD)
+#define INDEX_PER_PORT_E822 16
+#define INDEX_PER_PORT_E810 64
/**
* struct ice_ptp_port - data used to initialize an external port for PTP
@@ -256,7 +266,7 @@ void ice_ptp_reset(struct ice_pf *pf);
void ice_ptp_prepare_for_reset(struct ice_pf *pf);
void ice_ptp_init(struct ice_pf *pf);
void ice_ptp_release(struct ice_pf *pf);
-int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup);
+void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup);
#else /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
static inline int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr)
{
@@ -291,7 +301,8 @@ static inline void ice_ptp_reset(struct ice_pf *pf) { }
static inline void ice_ptp_prepare_for_reset(struct ice_pf *pf) { }
static inline void ice_ptp_init(struct ice_pf *pf) { }
static inline void ice_ptp_release(struct ice_pf *pf) { }
-static inline int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup)
-{ return 0; }
+static inline void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup)
+{
+}
#endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
#endif /* _ICE_PTP_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
index 1f8dd50db524..a38614d21ea8 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
@@ -656,6 +656,32 @@ ice_clear_phy_tstamp_e822(struct ice_hw *hw, u8 quad, u8 idx)
}
/**
+ * ice_ptp_reset_ts_memory_quad_e822 - Clear all timestamps from the quad block
+ * @hw: pointer to the HW struct
+ * @quad: the quad to read from
+ *
+ * Clear all timestamps from the PHY quad block that is shared between the
+ * internal PHYs on the E822 devices.
+ */
+void ice_ptp_reset_ts_memory_quad_e822(struct ice_hw *hw, u8 quad)
+{
+ ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, Q_REG_TS_CTRL_M);
+ ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, ~(u32)Q_REG_TS_CTRL_M);
+}
+
+/**
+ * ice_ptp_reset_ts_memory_e822 - Clear all timestamps from all quad blocks
+ * @hw: pointer to the HW struct
+ */
+static void ice_ptp_reset_ts_memory_e822(struct ice_hw *hw)
+{
+ unsigned int quad;
+
+ for (quad = 0; quad < ICE_MAX_QUAD; quad++)
+ ice_ptp_reset_ts_memory_quad_e822(hw, quad);
+}
+
+/**
* ice_read_cgu_reg_e822 - Read a CGU register
* @hw: pointer to the HW struct
* @addr: Register address to read
@@ -1715,21 +1741,48 @@ ice_calc_fixed_tx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd)
* adjust Tx timestamps by. This is calculated by combining some known static
* latency along with the Vernier offset computations done by hardware.
*
- * This function must be called only after the offset registers are valid,
- * i.e. after the Vernier calibration wait has passed, to ensure that the PHY
- * has measured the offset.
+ * This function will not return successfully until the Tx offset calculations
+ * have been completed, which requires waiting until at least one packet has
+ * been transmitted by the device. It is safe to call this function
+ * periodically until calibration succeeds, as it will only program the offset
+ * once.
*
* To avoid overflow, when calculating the offset based on the known static
* latency values, we use measurements in 1/100th of a nanosecond, and divide
* the TUs per second up front. This avoids overflow while allowing
* calculation of the adjustment using integer arithmetic.
+ *
+ * Returns zero on success, -EBUSY if the hardware vernier offset
+ * calibration has not completed, or another error code on failure.
*/
-static int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port)
+int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port)
{
enum ice_ptp_link_spd link_spd;
enum ice_ptp_fec_mode fec_mode;
u64 total_offset, val;
int err;
+ u32 reg;
+
+ /* Nothing to do if we've already programmed the offset */
+ err = ice_read_phy_reg_e822(hw, port, P_REG_TX_OR, &reg);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_OR for port %u, err %d\n",
+ port, err);
+ return err;
+ }
+
+ if (reg)
+ return 0;
+
+ err = ice_read_phy_reg_e822(hw, port, P_REG_TX_OV_STATUS, &reg);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_OV_STATUS for port %u, err %d\n",
+ port, err);
+ return err;
+ }
+
+ if (!(reg & P_REG_TX_OV_STATUS_OV_M))
+ return -EBUSY;
err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode);
if (err)
@@ -1783,46 +1836,8 @@ static int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port)
if (err)
return err;
- return 0;
-}
-
-/**
- * ice_phy_cfg_fixed_tx_offset_e822 - Configure Tx offset for bypass mode
- * @hw: pointer to the HW struct
- * @port: the PHY port to configure
- *
- * Calculate and program the fixed Tx offset, and indicate that the offset is
- * ready. This can be used when operating in bypass mode.
- */
-static int
-ice_phy_cfg_fixed_tx_offset_e822(struct ice_hw *hw, u8 port)
-{
- enum ice_ptp_link_spd link_spd;
- enum ice_ptp_fec_mode fec_mode;
- u64 total_offset;
- int err;
-
- err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode);
- if (err)
- return err;
-
- total_offset = ice_calc_fixed_tx_offset_e822(hw, link_spd);
-
- /* Program the fixed Tx offset into the P_REG_TOTAL_TX_OFFSET_L
- * register, then indicate that the Tx offset is ready. After this,
- * timestamps will be enabled.
- *
- * Note that this skips including the more precise offsets generated
- * by the Vernier calibration.
- */
- err = ice_write_64b_phy_reg_e822(hw, port, P_REG_TOTAL_TX_OFFSET_L,
- total_offset);
- if (err)
- return err;
-
- err = ice_write_phy_reg_e822(hw, port, P_REG_TX_OR, 1);
- if (err)
- return err;
+ dev_info(ice_hw_to_dev(hw), "Port=%d Tx vernier offset calibration complete\n",
+ port);
return 0;
}
@@ -2026,6 +2041,11 @@ ice_calc_fixed_rx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd)
* measurements taken in hardware with some data about known fixed delay as
* well as adjusting for multi-lane alignment delay.
*
+ * This function will not return successfully until the Rx offset calculations
+ * have been completed, which requires waiting until at least one packet has
+ * been received by the device. It is safe to call this function periodically
+ * until calibration succeeds, as it will only program the offset once.
+ *
* This function must be called only after the offset registers are valid,
* i.e. after the Vernier calibration wait has passed, to ensure that the PHY
* has measured the offset.
@@ -2034,13 +2054,38 @@ ice_calc_fixed_rx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd)
* latency values, we use measurements in 1/100th of a nanosecond, and divide
* the TUs per second up front. This avoids overflow while allowing
* calculation of the adjustment using integer arithmetic.
+ *
+ * Returns zero on success, -EBUSY if the hardware vernier offset
+ * calibration has not completed, or another error code on failure.
*/
-static int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port)
+int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port)
{
enum ice_ptp_link_spd link_spd;
enum ice_ptp_fec_mode fec_mode;
u64 total_offset, pmd, val;
int err;
+ u32 reg;
+
+ /* Nothing to do if we've already programmed the offset */
+ err = ice_read_phy_reg_e822(hw, port, P_REG_RX_OR, &reg);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_OR for port %u, err %d\n",
+ port, err);
+ return err;
+ }
+
+ if (reg)
+ return 0;
+
+ err = ice_read_phy_reg_e822(hw, port, P_REG_RX_OV_STATUS, &reg);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_OV_STATUS for port %u, err %d\n",
+ port, err);
+ return err;
+ }
+
+ if (!(reg & P_REG_RX_OV_STATUS_OV_M))
+ return -EBUSY;
err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode);
if (err)
@@ -2101,46 +2146,8 @@ static int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port)
if (err)
return err;
- return 0;
-}
-
-/**
- * ice_phy_cfg_fixed_rx_offset_e822 - Configure fixed Rx offset for bypass mode
- * @hw: pointer to the HW struct
- * @port: the PHY port to configure
- *
- * Calculate and program the fixed Rx offset, and indicate that the offset is
- * ready. This can be used when operating in bypass mode.
- */
-static int
-ice_phy_cfg_fixed_rx_offset_e822(struct ice_hw *hw, u8 port)
-{
- enum ice_ptp_link_spd link_spd;
- enum ice_ptp_fec_mode fec_mode;
- u64 total_offset;
- int err;
-
- err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode);
- if (err)
- return err;
-
- total_offset = ice_calc_fixed_rx_offset_e822(hw, link_spd);
-
- /* Program the fixed Rx offset into the P_REG_TOTAL_RX_OFFSET_L
- * register, then indicate that the Rx offset is ready. After this,
- * timestamps will be enabled.
- *
- * Note that this skips including the more precise offsets generated
- * by Vernier calibration.
- */
- err = ice_write_64b_phy_reg_e822(hw, port, P_REG_TOTAL_RX_OFFSET_L,
- total_offset);
- if (err)
- return err;
-
- err = ice_write_phy_reg_e822(hw, port, P_REG_RX_OR, 1);
- if (err)
- return err;
+ dev_info(ice_hw_to_dev(hw), "Port=%d Rx vernier offset calibration complete\n",
+ port);
return 0;
}
@@ -2323,20 +2330,14 @@ ice_stop_phy_timer_e822(struct ice_hw *hw, u8 port, bool soft_reset)
* ice_start_phy_timer_e822 - Start the PHY clock timer
* @hw: pointer to the HW struct
* @port: the PHY port to start
- * @bypass: if true, start the PHY in bypass mode
*
* Start the clock of a PHY port. This must be done as part of the flow to
* re-calibrate Tx and Rx timestamping offsets whenever the clock time is
* initialized or when link speed changes.
*
- * Bypass mode enables timestamps immediately without waiting for Vernier
- * calibration to complete. Hardware will still continue taking Vernier
- * measurements on Tx or Rx of packets, but they will not be applied to
- * timestamps. Use ice_phy_exit_bypass_e822 to exit bypass mode once hardware
- * has completed offset calculation.
+ * Hardware will take Vernier measurements on Tx or Rx of packets.
*/
-int
-ice_start_phy_timer_e822(struct ice_hw *hw, u8 port, bool bypass)
+int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port)
{
u32 lo, hi, val;
u64 incval;
@@ -2414,110 +2415,42 @@ ice_start_phy_timer_e822(struct ice_hw *hw, u8 port, bool bypass)
if (err)
return err;
- if (bypass) {
- val |= P_REG_PS_BYPASS_MODE_M;
- /* Enter BYPASS mode, enabling timestamps immediately. */
- err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val);
- if (err)
- return err;
-
- /* Program the fixed Tx offset */
- err = ice_phy_cfg_fixed_tx_offset_e822(hw, port);
- if (err)
- return err;
-
- /* Program the fixed Rx offset */
- err = ice_phy_cfg_fixed_rx_offset_e822(hw, port);
- if (err)
- return err;
- }
-
ice_debug(hw, ICE_DBG_PTP, "Enabled clock on PHY port %u\n", port);
return 0;
}
/**
- * ice_phy_exit_bypass_e822 - Exit bypass mode, after vernier calculations
+ * ice_get_phy_tx_tstamp_ready_e822 - Read Tx memory status register
* @hw: pointer to the HW struct
- * @port: the PHY port to configure
- *
- * After hardware finishes vernier calculations for the Tx and Rx offset, this
- * function can be used to exit bypass mode by updating the total Tx and Rx
- * offsets, and then disabling bypass. This will enable hardware to include
- * the more precise offset calibrations, increasing precision of the generated
- * timestamps.
+ * @quad: the timestamp quad to read from
+ * @tstamp_ready: contents of the Tx memory status register
*
- * This cannot be done until hardware has measured the offsets, which requires
- * waiting until at least one packet has been sent and received by the device.
+ * Read the Q_REG_TX_MEMORY_STATUS register indicating which timestamps in
+ * the PHY are ready. A set bit means the corresponding timestamp is valid and
+ * ready to be captured from the PHY timestamp block.
*/
-int ice_phy_exit_bypass_e822(struct ice_hw *hw, u8 port)
+static int
+ice_get_phy_tx_tstamp_ready_e822(struct ice_hw *hw, u8 quad, u64 *tstamp_ready)
{
+ u32 hi, lo;
int err;
- u32 val;
-
- err = ice_read_phy_reg_e822(hw, port, P_REG_TX_OV_STATUS, &val);
- if (err) {
- ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_OV_STATUS for port %u, err %d\n",
- port, err);
- return err;
- }
-
- if (!(val & P_REG_TX_OV_STATUS_OV_M)) {
- ice_debug(hw, ICE_DBG_PTP, "Tx offset is not yet valid for port %u\n",
- port);
- return -EBUSY;
- }
-
- err = ice_read_phy_reg_e822(hw, port, P_REG_RX_OV_STATUS, &val);
- if (err) {
- ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_OV_STATUS for port %u, err %d\n",
- port, err);
- return err;
- }
-
- if (!(val & P_REG_TX_OV_STATUS_OV_M)) {
- ice_debug(hw, ICE_DBG_PTP, "Rx offset is not yet valid for port %u\n",
- port);
- return -EBUSY;
- }
-
- err = ice_phy_cfg_tx_offset_e822(hw, port);
- if (err) {
- ice_debug(hw, ICE_DBG_PTP, "Failed to program total Tx offset for port %u, err %d\n",
- port, err);
- return err;
- }
-
- err = ice_phy_cfg_rx_offset_e822(hw, port);
- if (err) {
- ice_debug(hw, ICE_DBG_PTP, "Failed to program total Rx offset for port %u, err %d\n",
- port, err);
- return err;
- }
- /* Exit bypass mode now that the offset has been updated */
- err = ice_read_phy_reg_e822(hw, port, P_REG_PS, &val);
+ err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEMORY_STATUS_U, &hi);
if (err) {
- ice_debug(hw, ICE_DBG_PTP, "Failed to read P_REG_PS for port %u, err %d\n",
- port, err);
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_MEMORY_STATUS_U for quad %u, err %d\n",
+ quad, err);
return err;
}
- if (!(val & P_REG_PS_BYPASS_MODE_M))
- ice_debug(hw, ICE_DBG_PTP, "Port %u not in bypass mode\n",
- port);
-
- val &= ~P_REG_PS_BYPASS_MODE_M;
- err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val);
+ err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEMORY_STATUS_L, &lo);
if (err) {
- ice_debug(hw, ICE_DBG_PTP, "Failed to disable bypass for port %u, err %d\n",
- port, err);
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_MEMORY_STATUS_L for quad %u, err %d\n",
+ quad, err);
return err;
}
- dev_info(ice_hw_to_dev(hw), "Exiting bypass mode on PHY port %u\n",
- port);
+ *tstamp_ready = (u64)hi << 32 | (u64)lo;
return 0;
}
@@ -3196,6 +3129,22 @@ int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx)
return ice_clear_phy_tstamp_e822(hw, block, idx);
}
+/**
+ * ice_get_phy_tx_tstamp_ready_e810 - Read Tx memory status register
+ * @hw: pointer to the HW struct
+ * @port: the PHY port to read
+ * @tstamp_ready: contents of the Tx memory status register
+ *
+ * E810 devices do not use a Tx memory status register. Instead simply
+ * indicate that all timestamps are currently ready.
+ */
+static int
+ice_get_phy_tx_tstamp_ready_e810(struct ice_hw *hw, u8 port, u64 *tstamp_ready)
+{
+ *tstamp_ready = 0xFFFFFFFFFFFFFFFF;
+ return 0;
+}
+
/* E810T SMA functions
*
* The following functions operate specifically on E810T hardware and are used
@@ -3379,6 +3328,18 @@ bool ice_is_pca9575_present(struct ice_hw *hw)
}
/**
+ * ice_ptp_reset_ts_memory - Reset timestamp memory for all blocks
+ * @hw: pointer to the HW struct
+ */
+void ice_ptp_reset_ts_memory(struct ice_hw *hw)
+{
+ if (ice_is_e810(hw))
+ return;
+
+ ice_ptp_reset_ts_memory_e822(hw);
+}
+
+/**
* ice_ptp_init_phc - Initialize PTP hardware clock
* @hw: pointer to the HW struct
*
@@ -3399,3 +3360,24 @@ int ice_ptp_init_phc(struct ice_hw *hw)
else
return ice_ptp_init_phc_e822(hw);
}
+
+/**
+ * ice_get_phy_tx_tstamp_ready - Read PHY Tx memory status indication
+ * @hw: pointer to the HW struct
+ * @block: the timestamp block to check
+ * @tstamp_ready: storage for the PHY Tx memory status information
+ *
+ * Check the PHY for Tx timestamp memory status. This reports a 64 bit value
+ * which indicates which timestamps in the block may be captured. A set bit
+ * means the timestamp can be read. An unset bit means the timestamp is not
+ * ready and software should avoid reading the register.
+ */
+int ice_get_phy_tx_tstamp_ready(struct ice_hw *hw, u8 block, u64 *tstamp_ready)
+{
+ if (ice_is_e810(hw))
+ return ice_get_phy_tx_tstamp_ready_e810(hw, block,
+ tstamp_ready);
+ else
+ return ice_get_phy_tx_tstamp_ready_e822(hw, block,
+ tstamp_ready);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
index 2bda64c76abc..3b68cb91bd81 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
@@ -133,7 +133,9 @@ int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval);
int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj);
int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp);
int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx);
+void ice_ptp_reset_ts_memory(struct ice_hw *hw);
int ice_ptp_init_phc(struct ice_hw *hw);
+int ice_get_phy_tx_tstamp_ready(struct ice_hw *hw, u8 block, u64 *tstamp_ready);
/* E822 family functions */
int ice_read_phy_reg_e822(struct ice_hw *hw, u8 port, u16 offset, u32 *val);
@@ -141,6 +143,7 @@ int ice_write_phy_reg_e822(struct ice_hw *hw, u8 port, u16 offset, u32 val);
int ice_read_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 *val);
int ice_write_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 val);
int ice_ptp_prep_port_adj_e822(struct ice_hw *hw, u8 port, s64 time);
+void ice_ptp_reset_ts_memory_quad_e822(struct ice_hw *hw, u8 quad);
/**
* ice_e822_time_ref - Get the current TIME_REF from capabilities
@@ -184,8 +187,9 @@ static inline u64 ice_e822_pps_delay(enum ice_time_ref_freq time_ref)
/* E822 Vernier calibration functions */
int ice_stop_phy_timer_e822(struct ice_hw *hw, u8 port, bool soft_reset);
-int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port, bool bypass);
-int ice_phy_exit_bypass_e822(struct ice_hw *hw, u8 port);
+int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port);
+int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port);
+int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port);
/* E810 family functions */
int ice_ptp_init_phy_e810(struct ice_hw *hw);
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 36acec89d3d4..7d60da1b7bf4 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -1413,6 +1413,8 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data)
*data = 1;
return -1;
}
+ wr32(E1000_IVAR_MISC, E1000_IVAR_VALID << 8);
+ wr32(E1000_EIMS, BIT(0));
} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
shared_int = false;
if (request_irq(irq,
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index c2cb98d24f5c..f8925cac61e4 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -4270,7 +4270,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp)
/* Use the cpu associated to the rxq when it is online, in all
* the other cases, use the cpu 0 which can't be offline.
*/
- if (cpu_online(pp->rxq_def))
+ if (pp->rxq_def < nr_cpu_ids && cpu_online(pp->rxq_def))
elected_cpu = pp->rxq_def;
max_cpu = num_present_cpus();
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index c8724bfa86b0..b2b71fe80d61 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -64,6 +64,7 @@ static int cgx_fwi_link_change(struct cgx *cgx, int lmac_id, bool en);
static const struct pci_device_id cgx_id_table[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_CGX) },
{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN10K_RPM) },
+ { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN10KB_RPM) },
{ 0, } /* end of table */
};
@@ -73,12 +74,13 @@ static bool is_dev_rpm(void *cgxd)
{
struct cgx *cgx = cgxd;
- return (cgx->pdev->device == PCI_DEVID_CN10K_RPM);
+ return (cgx->pdev->device == PCI_DEVID_CN10K_RPM) ||
+ (cgx->pdev->device == PCI_DEVID_CN10KB_RPM);
}
bool is_lmac_valid(struct cgx *cgx, int lmac_id)
{
- if (!cgx || lmac_id < 0 || lmac_id >= MAX_LMAC_PER_CGX)
+ if (!cgx || lmac_id < 0 || lmac_id >= cgx->max_lmac_per_mac)
return false;
return test_bit(lmac_id, &cgx->lmac_bmap);
}
@@ -90,7 +92,7 @@ static int get_sequence_id_of_lmac(struct cgx *cgx, int lmac_id)
{
int tmp, id = 0;
- for_each_set_bit(tmp, &cgx->lmac_bmap, MAX_LMAC_PER_CGX) {
+ for_each_set_bit(tmp, &cgx->lmac_bmap, cgx->max_lmac_per_mac) {
if (tmp == lmac_id)
break;
id++;
@@ -121,7 +123,7 @@ u64 cgx_read(struct cgx *cgx, u64 lmac, u64 offset)
struct lmac *lmac_pdata(u8 lmac_id, struct cgx *cgx)
{
- if (!cgx || lmac_id >= MAX_LMAC_PER_CGX)
+ if (!cgx || lmac_id >= cgx->max_lmac_per_mac)
return NULL;
return cgx->lmac_idmap[lmac_id];
@@ -485,7 +487,7 @@ int cgx_set_pkind(void *cgxd, u8 lmac_id, int pkind)
if (!is_lmac_valid(cgx, lmac_id))
return -ENODEV;
- cgx_write(cgx, lmac_id, CGXX_CMRX_RX_ID_MAP, (pkind & 0x3F));
+ cgx_write(cgx, lmac_id, cgx->mac_ops->rxid_map_offset, (pkind & 0x3F));
return 0;
}
@@ -740,6 +742,10 @@ int cgx_get_fec_stats(void *cgxd, int lmac_id, struct cgx_fec_stats_rsp *rsp)
if (!cgx || lmac_id >= cgx->lmac_count)
return -ENODEV;
+
+ if (cgx->lmac_idmap[lmac_id]->link_info.fec == OTX2_FEC_NONE)
+ return 0;
+
fec_stats_count =
cgx_set_fec_stats_count(&cgx->lmac_idmap[lmac_id]->link_info);
if (cgx->lmac_idmap[lmac_id]->link_info.fec == OTX2_FEC_BASER) {
@@ -1224,7 +1230,7 @@ static inline void link_status_user_format(u64 lstat,
linfo->speed = cgx_speed_mbps[FIELD_GET(RESP_LINKSTAT_SPEED, lstat)];
linfo->an = FIELD_GET(RESP_LINKSTAT_AN, lstat);
linfo->fec = FIELD_GET(RESP_LINKSTAT_FEC, lstat);
- linfo->lmac_type_id = cgx_get_lmac_type(cgx, lmac_id);
+ linfo->lmac_type_id = FIELD_GET(RESP_LINKSTAT_LMAC_TYPE, lstat);
lmac_string = cgx_lmactype_string[linfo->lmac_type_id];
strncpy(linfo->lmac_type, lmac_string, LMACTYPE_STR_LEN - 1);
}
@@ -1395,7 +1401,7 @@ int cgx_get_fwdata_base(u64 *base)
if (!cgx)
return -ENXIO;
- first_lmac = find_first_bit(&cgx->lmac_bmap, MAX_LMAC_PER_CGX);
+ first_lmac = find_first_bit(&cgx->lmac_bmap, cgx->max_lmac_per_mac);
req = FIELD_SET(CMDREG_ID, CGX_CMD_GET_FWD_BASE, req);
err = cgx_fwi_cmd_generic(req, &resp, cgx, first_lmac);
if (!err)
@@ -1484,7 +1490,7 @@ static int cgx_fwi_link_change(struct cgx *cgx, int lmac_id, bool enable)
static inline int cgx_fwi_read_version(u64 *resp, struct cgx *cgx)
{
- int first_lmac = find_first_bit(&cgx->lmac_bmap, MAX_LMAC_PER_CGX);
+ int first_lmac = find_first_bit(&cgx->lmac_bmap, cgx->max_lmac_per_mac);
u64 req = 0;
req = FIELD_SET(CMDREG_ID, CGX_CMD_GET_FW_VER, req);
@@ -1522,7 +1528,7 @@ static void cgx_lmac_linkup_work(struct work_struct *work)
int i, err;
/* Do Link up for all the enabled lmacs */
- for_each_set_bit(i, &cgx->lmac_bmap, MAX_LMAC_PER_CGX) {
+ for_each_set_bit(i, &cgx->lmac_bmap, cgx->max_lmac_per_mac) {
err = cgx_fwi_link_change(cgx, i, true);
if (err)
dev_info(dev, "cgx port %d:%d Link up command failed\n",
@@ -1542,14 +1548,6 @@ int cgx_lmac_linkup_start(void *cgxd)
return 0;
}
-static void cgx_lmac_get_fifolen(struct cgx *cgx)
-{
- u64 cfg;
-
- cfg = cgx_read(cgx, 0, CGX_CONST);
- cgx->mac_ops->fifo_len = FIELD_GET(CGX_CONST_RXFIFO_SIZE, cfg);
-}
-
static int cgx_configure_interrupt(struct cgx *cgx, struct lmac *lmac,
int cnt, bool req_free)
{
@@ -1604,17 +1602,20 @@ static int cgx_lmac_init(struct cgx *cgx)
u64 lmac_list;
int i, err;
- cgx_lmac_get_fifolen(cgx);
-
- cgx->lmac_count = cgx->mac_ops->get_nr_lmacs(cgx);
/* lmac_list specifies which lmacs are enabled
* when bit n is set to 1, LMAC[n] is enabled
*/
- if (cgx->mac_ops->non_contiguous_serdes_lane)
- lmac_list = cgx_read(cgx, 0, CGXX_CMRX_RX_LMACS) & 0xFULL;
+ if (cgx->mac_ops->non_contiguous_serdes_lane) {
+ if (is_dev_rpm2(cgx))
+ lmac_list =
+ cgx_read(cgx, 0, RPM2_CMRX_RX_LMACS) & 0xFFULL;
+ else
+ lmac_list =
+ cgx_read(cgx, 0, CGXX_CMRX_RX_LMACS) & 0xFULL;
+ }
- if (cgx->lmac_count > MAX_LMAC_PER_CGX)
- cgx->lmac_count = MAX_LMAC_PER_CGX;
+ if (cgx->lmac_count > cgx->max_lmac_per_mac)
+ cgx->lmac_count = cgx->max_lmac_per_mac;
for (i = 0; i < cgx->lmac_count; i++) {
lmac = kzalloc(sizeof(struct lmac), GFP_KERNEL);
@@ -1635,7 +1636,9 @@ static int cgx_lmac_init(struct cgx *cgx)
lmac->cgx = cgx;
lmac->mac_to_index_bmap.max =
- MAX_DMAC_ENTRIES_PER_CGX / cgx->lmac_count;
+ cgx->mac_ops->dmac_filter_count /
+ cgx->lmac_count;
+
err = rvu_alloc_bitmap(&lmac->mac_to_index_bmap);
if (err)
goto err_name_free;
@@ -1692,7 +1695,7 @@ static int cgx_lmac_exit(struct cgx *cgx)
}
/* Free all lmac related resources */
- for_each_set_bit(i, &cgx->lmac_bmap, MAX_LMAC_PER_CGX) {
+ for_each_set_bit(i, &cgx->lmac_bmap, cgx->max_lmac_per_mac) {
lmac = cgx->lmac_idmap[i];
if (!lmac)
continue;
@@ -1708,6 +1711,12 @@ static int cgx_lmac_exit(struct cgx *cgx)
static void cgx_populate_features(struct cgx *cgx)
{
+ u64 cfg;
+
+ cfg = cgx_read(cgx, 0, CGX_CONST);
+ cgx->mac_ops->fifo_len = FIELD_GET(CGX_CONST_RXFIFO_SIZE, cfg);
+ cgx->max_lmac_per_mac = FIELD_GET(CGX_CONST_MAX_LMACS, cfg);
+
if (is_dev_rpm(cgx))
cgx->hw_features = (RVU_LMAC_FEAT_DMACF | RVU_MAC_RPM |
RVU_LMAC_FEAT_FC | RVU_LMAC_FEAT_PTP);
@@ -1716,6 +1725,15 @@ static void cgx_populate_features(struct cgx *cgx)
RVU_LMAC_FEAT_PTP | RVU_LMAC_FEAT_DMACF);
}
+static u8 cgx_get_rxid_mapoffset(struct cgx *cgx)
+{
+ if (cgx->pdev->subsystem_device == PCI_SUBSYS_DEVID_CNF10KB_RPM ||
+ is_dev_rpm2(cgx))
+ return 0x80;
+ else
+ return 0x60;
+}
+
static struct mac_ops cgx_mac_ops = {
.name = "cgx",
.csr_offset = 0,
@@ -1728,12 +1746,14 @@ static struct mac_ops cgx_mac_ops = {
.non_contiguous_serdes_lane = false,
.rx_stats_cnt = 9,
.tx_stats_cnt = 18,
+ .dmac_filter_count = 32,
.get_nr_lmacs = cgx_get_nr_lmacs,
.get_lmac_type = cgx_get_lmac_type,
.lmac_fifo_len = cgx_get_lmac_fifo_len,
.mac_lmac_intl_lbk = cgx_lmac_internal_loopback,
.mac_get_rx_stats = cgx_get_rx_stats,
.mac_get_tx_stats = cgx_get_tx_stats,
+ .get_fec_stats = cgx_get_fec_stats,
.mac_enadis_rx_pause_fwding = cgx_lmac_enadis_rx_pause_fwding,
.mac_get_pause_frm_status = cgx_lmac_get_pause_frm_status,
.mac_enadis_pause_frm = cgx_lmac_enadis_pause_frm,
@@ -1759,11 +1779,13 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id)
pci_set_drvdata(pdev, cgx);
/* Use mac_ops to get MAC specific features */
- if (pdev->device == PCI_DEVID_CN10K_RPM)
- cgx->mac_ops = rpm_get_mac_ops();
+ if (is_dev_rpm(cgx))
+ cgx->mac_ops = rpm_get_mac_ops(cgx);
else
cgx->mac_ops = &cgx_mac_ops;
+ cgx->mac_ops->rxid_map_offset = cgx_get_rxid_mapoffset(cgx);
+
err = pci_enable_device(pdev);
if (err) {
dev_err(dev, "Failed to enable PCI device\n");
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
index 0b06788b8d80..fb2d37676d84 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
@@ -18,11 +18,7 @@
/* PCI BAR nos */
#define PCI_CFG_REG_BAR_NUM 0
-#define CGX_ID_MASK 0x7
-#define MAX_LMAC_PER_CGX 4
-#define MAX_DMAC_ENTRIES_PER_CGX 32
-#define CGX_FIFO_LEN 65536 /* 64K for both Rx & Tx */
-#define CGX_OFFSET(x) ((x) * MAX_LMAC_PER_CGX)
+#define CGX_ID_MASK 0xF
/* Registers */
#define CGXX_CMRX_CFG 0x00
@@ -56,7 +52,8 @@
#define CGXX_SCRATCH0_REG 0x1050
#define CGXX_SCRATCH1_REG 0x1058
#define CGX_CONST 0x2000
-#define CGX_CONST_RXFIFO_SIZE GENMASK_ULL(23, 0)
+#define CGX_CONST_RXFIFO_SIZE GENMASK_ULL(55, 32)
+#define CGX_CONST_MAX_LMACS GENMASK_ULL(31, 24)
#define CGXX_SPUX_CONTROL1 0x10000
#define CGXX_SPUX_LNX_FEC_CORR_BLOCKS 0x10700
#define CGXX_SPUX_LNX_FEC_UNCORR_BLOCKS 0x10800
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
index 52b6016789fa..39aaf0e4467d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
@@ -75,6 +75,11 @@ struct mac_ops {
/* RPM & CGX differs in number of Receive/transmit stats */
u8 rx_stats_cnt;
u8 tx_stats_cnt;
+ /* Unlike CN10K which shares same CSR offset with CGX
+ * CNF10KB has different csr offset
+ */
+ u64 rxid_map_offset;
+ u8 dmac_filter_count;
/* Incase of RPM get number of lmacs from RPMX_CMR_RX_LMACS[LMAC_EXIST]
* number of setbits in lmac_exist tells number of lmacs
*/
@@ -121,6 +126,9 @@ struct mac_ops {
int (*mac_get_pfc_frm_cfg)(void *cgxd, int lmac_id,
u8 *tx_pause, u8 *rx_pause);
+ /* FEC stats */
+ int (*get_fec_stats)(void *cgxd, int lmac_id,
+ struct cgx_fec_stats_rsp *rsp);
};
struct cgx {
@@ -128,7 +136,10 @@ struct cgx {
struct pci_dev *pdev;
u8 cgx_id;
u8 lmac_count;
- struct lmac *lmac_idmap[MAX_LMAC_PER_CGX];
+ /* number of LMACs per MAC could be 4 or 8 */
+ u8 max_lmac_per_mac;
+#define MAX_LMAC_COUNT 8
+ struct lmac *lmac_idmap[MAX_LMAC_COUNT];
struct work_struct cgx_cmd_work;
struct workqueue_struct *cgx_cmd_workq;
struct list_head cgx_list;
@@ -150,6 +161,6 @@ struct lmac *lmac_pdata(u8 lmac_id, struct cgx *cgx);
int cgx_fwi_cmd_send(u64 req, u64 *resp, struct lmac *lmac);
int cgx_fwi_cmd_generic(u64 req, u64 *resp, struct cgx *cgx, int lmac_id);
bool is_lmac_valid(struct cgx *cgx, int lmac_id);
-struct mac_ops *rpm_get_mac_ops(void);
+struct mac_ops *rpm_get_mac_ops(struct cgx *cgx);
#endif /* LMAC_COMMON_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
index a70e1153fa04..de0d88dd10d6 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
@@ -8,7 +8,7 @@
#include "cgx.h"
#include "lmac_common.h"
-static struct mac_ops rpm_mac_ops = {
+static struct mac_ops rpm_mac_ops = {
.name = "rpm",
.csr_offset = 0x4e00,
.lmac_offset = 20,
@@ -20,12 +20,14 @@ static struct mac_ops rpm_mac_ops = {
.non_contiguous_serdes_lane = true,
.rx_stats_cnt = 43,
.tx_stats_cnt = 34,
+ .dmac_filter_count = 32,
.get_nr_lmacs = rpm_get_nr_lmacs,
.get_lmac_type = rpm_get_lmac_type,
.lmac_fifo_len = rpm_get_lmac_fifo_len,
.mac_lmac_intl_lbk = rpm_lmac_internal_loopback,
.mac_get_rx_stats = rpm_get_rx_stats,
.mac_get_tx_stats = rpm_get_tx_stats,
+ .get_fec_stats = rpm_get_fec_stats,
.mac_enadis_rx_pause_fwding = rpm_lmac_enadis_rx_pause_fwding,
.mac_get_pause_frm_status = rpm_lmac_get_pause_frm_status,
.mac_enadis_pause_frm = rpm_lmac_enadis_pause_frm,
@@ -37,9 +39,50 @@ static struct mac_ops rpm_mac_ops = {
.mac_get_pfc_frm_cfg = rpm_lmac_get_pfc_frm_cfg,
};
-struct mac_ops *rpm_get_mac_ops(void)
+static struct mac_ops rpm2_mac_ops = {
+ .name = "rpm",
+ .csr_offset = RPM2_CSR_OFFSET,
+ .lmac_offset = 20,
+ .int_register = RPM2_CMRX_SW_INT,
+ .int_set_reg = RPM2_CMRX_SW_INT_ENA_W1S,
+ .irq_offset = 1,
+ .int_ena_bit = BIT_ULL(0),
+ .lmac_fwi = RPM_LMAC_FWI,
+ .non_contiguous_serdes_lane = true,
+ .rx_stats_cnt = 43,
+ .tx_stats_cnt = 34,
+ .dmac_filter_count = 64,
+ .get_nr_lmacs = rpm2_get_nr_lmacs,
+ .get_lmac_type = rpm_get_lmac_type,
+ .lmac_fifo_len = rpm2_get_lmac_fifo_len,
+ .mac_lmac_intl_lbk = rpm_lmac_internal_loopback,
+ .mac_get_rx_stats = rpm_get_rx_stats,
+ .mac_get_tx_stats = rpm_get_tx_stats,
+ .get_fec_stats = rpm_get_fec_stats,
+ .mac_enadis_rx_pause_fwding = rpm_lmac_enadis_rx_pause_fwding,
+ .mac_get_pause_frm_status = rpm_lmac_get_pause_frm_status,
+ .mac_enadis_pause_frm = rpm_lmac_enadis_pause_frm,
+ .mac_pause_frm_config = rpm_lmac_pause_frm_config,
+ .mac_enadis_ptp_config = rpm_lmac_ptp_config,
+ .mac_rx_tx_enable = rpm_lmac_rx_tx_enable,
+ .mac_tx_enable = rpm_lmac_tx_enable,
+ .pfc_config = rpm_lmac_pfc_config,
+ .mac_get_pfc_frm_cfg = rpm_lmac_get_pfc_frm_cfg,
+};
+
+bool is_dev_rpm2(void *rpmd)
+{
+ rpm_t *rpm = rpmd;
+
+ return (rpm->pdev->device == PCI_DEVID_CN10KB_RPM);
+}
+
+struct mac_ops *rpm_get_mac_ops(rpm_t *rpm)
{
- return &rpm_mac_ops;
+ if (is_dev_rpm2(rpm))
+ return &rpm2_mac_ops;
+ else
+ return &rpm_mac_ops;
}
static void rpm_write(rpm_t *rpm, u64 lmac, u64 offset, u64 val)
@@ -52,6 +95,16 @@ static u64 rpm_read(rpm_t *rpm, u64 lmac, u64 offset)
return cgx_read(rpm, lmac, offset);
}
+/* Read HW major version to determine RPM
+ * MAC type 100/USX
+ */
+static bool is_mac_rpmusx(void *rpmd)
+{
+ rpm_t *rpm = rpmd;
+
+ return rpm_read(rpm, 0, RPMX_CONST1) & 0x700ULL;
+}
+
int rpm_get_nr_lmacs(void *rpmd)
{
rpm_t *rpm = rpmd;
@@ -59,6 +112,13 @@ int rpm_get_nr_lmacs(void *rpmd)
return hweight8(rpm_read(rpm, 0, CGXX_CMRX_RX_LMACS) & 0xFULL);
}
+int rpm2_get_nr_lmacs(void *rpmd)
+{
+ rpm_t *rpm = rpmd;
+
+ return hweight8(rpm_read(rpm, 0, RPM2_CMRX_RX_LMACS) & 0xFFULL);
+}
+
int rpm_lmac_tx_enable(void *rpmd, int lmac_id, bool enable)
{
rpm_t *rpm = rpmd;
@@ -222,6 +282,46 @@ static void rpm_cfg_pfc_quanta_thresh(rpm_t *rpm, int lmac_id,
}
}
+static void rpm2_lmac_cfg_bp(rpm_t *rpm, int lmac_id, u8 tx_pause, u8 rx_pause)
+{
+ u64 cfg;
+
+ cfg = rpm_read(rpm, lmac_id, RPM2_CMR_RX_OVR_BP);
+ if (tx_pause) {
+ /* Configure CL0 Pause Quanta & threshold
+ * for 802.3X frames
+ */
+ rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 1, true);
+ cfg &= ~RPM2_CMR_RX_OVR_BP_EN;
+ } else {
+ /* Disable all Pause Quanta & threshold values */
+ rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 0xffff, false);
+ cfg |= RPM2_CMR_RX_OVR_BP_EN;
+ cfg &= ~RPM2_CMR_RX_OVR_BP_BP;
+ }
+ rpm_write(rpm, lmac_id, RPM2_CMR_RX_OVR_BP, cfg);
+}
+
+static void rpm_lmac_cfg_bp(rpm_t *rpm, int lmac_id, u8 tx_pause, u8 rx_pause)
+{
+ u64 cfg;
+
+ cfg = rpm_read(rpm, 0, RPMX_CMR_RX_OVR_BP);
+ if (tx_pause) {
+ /* Configure CL0 Pause Quanta & threshold for
+ * 802.3X frames
+ */
+ rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 1, true);
+ cfg &= ~RPMX_CMR_RX_OVR_BP_EN(lmac_id);
+ } else {
+ /* Disable all Pause Quanta & threshold values */
+ rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 0xffff, false);
+ cfg |= RPMX_CMR_RX_OVR_BP_EN(lmac_id);
+ cfg &= ~RPMX_CMR_RX_OVR_BP_BP(lmac_id);
+ }
+ rpm_write(rpm, 0, RPMX_CMR_RX_OVR_BP, cfg);
+}
+
int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause,
u8 rx_pause)
{
@@ -243,18 +343,11 @@ int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause,
cfg |= tx_pause ? 0x0 : RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE;
rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
- cfg = rpm_read(rpm, 0, RPMX_CMR_RX_OVR_BP);
- if (tx_pause) {
- /* Configure CL0 Pause Quanta & threshold for 802.3X frames */
- rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 1, true);
- cfg &= ~RPMX_CMR_RX_OVR_BP_EN(lmac_id);
- } else {
- /* Disable all Pause Quanta & threshold values */
- rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 0xffff, false);
- cfg |= RPMX_CMR_RX_OVR_BP_EN(lmac_id);
- cfg &= ~RPMX_CMR_RX_OVR_BP_BP(lmac_id);
- }
- rpm_write(rpm, 0, RPMX_CMR_RX_OVR_BP, cfg);
+ if (is_dev_rpm2(rpm))
+ rpm2_lmac_cfg_bp(rpm, lmac_id, tx_pause, rx_pause);
+ else
+ rpm_lmac_cfg_bp(rpm, lmac_id, tx_pause, rx_pause);
+
return 0;
}
@@ -278,13 +371,16 @@ void rpm_lmac_pause_frm_config(void *rpmd, int lmac_id, bool enable)
cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE;
rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
+ /* Enable channel mask for all LMACS */
+ if (is_dev_rpm2(rpm))
+ rpm_write(rpm, lmac_id, RPM2_CMR_CHAN_MSK_OR, 0xffff);
+ else
+ rpm_write(rpm, 0, RPMX_CMR_CHAN_MSK_OR, ~0ULL);
+
/* Disable all PFC classes */
cfg = rpm_read(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL);
cfg = FIELD_SET(RPM_PFC_CLASS_MASK, 0, cfg);
rpm_write(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL, cfg);
-
- /* Enable channel mask for all LMACS */
- rpm_write(rpm, 0, RPMX_CMR_CHAN_MSK_OR, ~0ULL);
}
int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat)
@@ -292,7 +388,7 @@ int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat)
rpm_t *rpm = rpmd;
u64 val_lo, val_hi;
- if (!rpm || lmac_id >= rpm->lmac_count)
+ if (!is_lmac_valid(rpm, lmac_id))
return -ENODEV;
mutex_lock(&rpm->lock);
@@ -320,7 +416,7 @@ int rpm_get_tx_stats(void *rpmd, int lmac_id, int idx, u64 *tx_stat)
rpm_t *rpm = rpmd;
u64 val_lo, val_hi;
- if (!rpm || lmac_id >= rpm->lmac_count)
+ if (!is_lmac_valid(rpm, lmac_id))
return -ENODEV;
mutex_lock(&rpm->lock);
@@ -380,13 +476,71 @@ u32 rpm_get_lmac_fifo_len(void *rpmd, int lmac_id)
return 0;
}
+static int rpmusx_lmac_internal_loopback(rpm_t *rpm, int lmac_id, bool enable)
+{
+ u64 cfg;
+
+ cfg = rpm_read(rpm, lmac_id, RPM2_USX_PCSX_CONTROL1);
+
+ if (enable)
+ cfg |= RPM2_USX_PCS_LBK;
+ else
+ cfg &= ~RPM2_USX_PCS_LBK;
+ rpm_write(rpm, lmac_id, RPM2_USX_PCSX_CONTROL1, cfg);
+
+ return 0;
+}
+
+u32 rpm2_get_lmac_fifo_len(void *rpmd, int lmac_id)
+{
+ u64 hi_perf_lmac, lmac_info;
+ rpm_t *rpm = rpmd;
+ u8 num_lmacs;
+ u32 fifo_len;
+
+ lmac_info = rpm_read(rpm, 0, RPM2_CMRX_RX_LMACS);
+ /* LMACs are divided into two groups and each group
+ * gets half of the FIFO
+ * Group0 lmac_id range {0..3}
+ * Group1 lmac_id range {4..7}
+ */
+ fifo_len = rpm->mac_ops->fifo_len / 2;
+
+ if (lmac_id < 4) {
+ num_lmacs = hweight8(lmac_info & 0xF);
+ hi_perf_lmac = (lmac_info >> 8) & 0x3ULL;
+ } else {
+ num_lmacs = hweight8(lmac_info & 0xF0);
+ hi_perf_lmac = (lmac_info >> 10) & 0x3ULL;
+ hi_perf_lmac += 4;
+ }
+
+ switch (num_lmacs) {
+ case 1:
+ return fifo_len;
+ case 2:
+ return fifo_len / 2;
+ case 3:
+ /* LMAC marked as hi_perf gets half of the FIFO
+ * and rest 1/4th
+ */
+ if (lmac_id == hi_perf_lmac)
+ return fifo_len / 2;
+ return fifo_len / 4;
+ case 4:
+ default:
+ return fifo_len / 4;
+ }
+ return 0;
+}
+
int rpm_lmac_internal_loopback(void *rpmd, int lmac_id, bool enable)
{
rpm_t *rpm = rpmd;
u8 lmac_type;
u64 cfg;
- if (!rpm || lmac_id >= rpm->lmac_count)
+ if (!is_lmac_valid(rpm, lmac_id))
return -ENODEV;
lmac_type = rpm->mac_ops->get_lmac_type(rpm, lmac_id);
@@ -395,6 +549,9 @@ int rpm_lmac_internal_loopback(void *rpmd, int lmac_id, bool enable)
return 0;
}
+ if (is_dev_rpm2(rpm) && is_mac_rpmusx(rpm))
+ return rpmusx_lmac_internal_loopback(rpm, lmac_id, enable);
+
cfg = rpm_read(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1);
if (enable)
@@ -439,8 +596,8 @@ void rpm_lmac_ptp_config(void *rpmd, int lmac_id, bool enable)
int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, u16 pfc_en)
{
+ u64 cfg, class_en, pfc_class_mask_cfg;
rpm_t *rpm = rpmd;
- u64 cfg, class_en;
if (!is_lmac_valid(rpm, lmac_id))
return -ENODEV;
@@ -476,7 +633,10 @@ int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, u16 p
rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
- rpm_write(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL, class_en);
+ pfc_class_mask_cfg = is_dev_rpm2(rpm) ? RPM2_CMRX_PRT_CBFC_CTL :
+ RPMX_CMRX_PRT_CBFC_CTL;
+
+ rpm_write(rpm, lmac_id, pfc_class_mask_cfg, class_en);
return 0;
}
@@ -497,3 +657,59 @@ int rpm_lmac_get_pfc_frm_cfg(void *rpmd, int lmac_id, u8 *tx_pause, u8 *rx_paus
return 0;
}
+
+int rpm_get_fec_stats(void *rpmd, int lmac_id, struct cgx_fec_stats_rsp *rsp)
+{
+ u64 val_lo, val_hi;
+ rpm_t *rpm = rpmd;
+ u64 cfg;
+
+ if (!is_lmac_valid(rpm, lmac_id))
+ return -ENODEV;
+
+ if (rpm->lmac_idmap[lmac_id]->link_info.fec == OTX2_FEC_NONE)
+ return 0;
+
+ if (rpm->lmac_idmap[lmac_id]->link_info.fec == OTX2_FEC_BASER) {
+ val_lo = rpm_read(rpm, lmac_id, RPMX_MTI_FCFECX_VL0_CCW_LO);
+ val_hi = rpm_read(rpm, lmac_id, RPMX_MTI_FCFECX_CW_HI);
+ rsp->fec_corr_blks = (val_hi << 16 | val_lo);
+
+ val_lo = rpm_read(rpm, lmac_id, RPMX_MTI_FCFECX_VL0_NCCW_LO);
+ val_hi = rpm_read(rpm, lmac_id, RPMX_MTI_FCFECX_CW_HI);
+ rsp->fec_uncorr_blks = (val_hi << 16 | val_lo);
+
+ /* 50G uses 2 Physical serdes lines */
+ if (rpm->lmac_idmap[lmac_id]->link_info.lmac_type_id ==
+ LMAC_MODE_50G_R) {
+ val_lo = rpm_read(rpm, lmac_id,
+ RPMX_MTI_FCFECX_VL1_CCW_LO);
+ val_hi = rpm_read(rpm, lmac_id,
+ RPMX_MTI_FCFECX_CW_HI);
+ rsp->fec_corr_blks += (val_hi << 16 | val_lo);
+
+ val_lo = rpm_read(rpm, lmac_id,
+ RPMX_MTI_FCFECX_VL1_NCCW_LO);
+ val_hi = rpm_read(rpm, lmac_id,
+ RPMX_MTI_FCFECX_CW_HI);
+ rsp->fec_uncorr_blks += (val_hi << 16 | val_lo);
+ }
+ } else {
+ /* enable RS-FEC capture */
+ cfg = rpm_read(rpm, 0, RPMX_MTI_STAT_STATN_CONTROL);
+ cfg |= RPMX_RSFEC_RX_CAPTURE | BIT(lmac_id);
+ rpm_write(rpm, 0, RPMX_MTI_STAT_STATN_CONTROL, cfg);
+
+ val_lo = rpm_read(rpm, 0,
+ RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_2);
+ val_hi = rpm_read(rpm, 0, RPMX_MTI_STAT_DATA_HI_CDC);
+ rsp->fec_corr_blks = (val_hi << 32 | val_lo);
+
+ val_lo = rpm_read(rpm, 0,
+ RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_3);
+ val_hi = rpm_read(rpm, 0, RPMX_MTI_STAT_DATA_HI_CDC);
+ rsp->fec_uncorr_blks = (val_hi << 32 | val_lo);
+ }
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
index 77f2ef9e1425..22147b4c2137 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h
@@ -12,17 +12,19 @@
/* PCI device IDs */
#define PCI_DEVID_CN10K_RPM 0xA060
+#define PCI_SUBSYS_DEVID_CNF10KB_RPM 0xBC00
+#define PCI_DEVID_CN10KB_RPM 0xA09F
/* Registers */
#define RPMX_CMRX_CFG 0x00
#define RPMX_RX_TS_PREPEND BIT_ULL(22)
#define RPMX_TX_PTP_1S_SUPPORT BIT_ULL(17)
+#define RPMX_CMRX_RX_ID_MAP 0x80
#define RPMX_CMRX_SW_INT 0x180
#define RPMX_CMRX_SW_INT_W1S 0x188
#define RPMX_CMRX_SW_INT_ENA_W1S 0x198
#define RPMX_CMRX_LINK_CFG 0x1070
#define RPMX_MTI_PCS100X_CONTROL1 0x20000
-#define RPMX_MTI_LPCSX_CONTROL1 0x30000
#define RPMX_MTI_PCS_LBK BIT_ULL(14)
#define RPMX_MTI_LPCSX_CONTROL(id) (0x30000 | ((id) * 0x100))
@@ -76,11 +78,40 @@
#define RPMX_MTI_MAC100X_XIF_MODE 0x8100
#define RPMX_ONESTEP_ENABLE BIT_ULL(5)
#define RPMX_TS_BINARY_MODE BIT_ULL(11)
+#define RPMX_CONST1 0x2008
+
+/* FEC stats */
+#define RPMX_MTI_STAT_STATN_CONTROL 0x10018
+#define RPMX_MTI_STAT_DATA_HI_CDC 0x10038
+#define RPMX_RSFEC_RX_CAPTURE BIT_ULL(27)
+#define RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_2 0x40050
+#define RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_3 0x40058
+#define RPMX_MTI_FCFECX_VL0_CCW_LO 0x38618
+#define RPMX_MTI_FCFECX_VL0_NCCW_LO 0x38620
+#define RPMX_MTI_FCFECX_VL1_CCW_LO 0x38628
+#define RPMX_MTI_FCFECX_VL1_NCCW_LO 0x38630
+#define RPMX_MTI_FCFECX_CW_HI 0x38638
+
+/* CN10KB CSR Declaration */
+#define RPM2_CMRX_SW_INT 0x1b0
+#define RPM2_CMRX_SW_INT_ENA_W1S 0x1b8
+#define RPM2_CMR_CHAN_MSK_OR 0x3120
+#define RPM2_CMR_RX_OVR_BP_EN BIT_ULL(2)
+#define RPM2_CMR_RX_OVR_BP_BP BIT_ULL(1)
+#define RPM2_CMR_RX_OVR_BP 0x3130
+#define RPM2_CSR_OFFSET 0x3e00
+#define RPM2_CMRX_PRT_CBFC_CTL 0x6510
+#define RPM2_CMRX_RX_LMACS 0x100
+#define RPM2_CMRX_RX_LOGL_XON 0x3100
+#define RPM2_CMRX_RX_STAT2 0x3010
+#define RPM2_USX_PCSX_CONTROL1 0x80000
+#define RPM2_USX_PCS_LBK BIT_ULL(14)
/* Function Declarations */
int rpm_get_nr_lmacs(void *rpmd);
u8 rpm_get_lmac_type(void *rpmd, int lmac_id);
u32 rpm_get_lmac_fifo_len(void *rpmd, int lmac_id);
+u32 rpm2_get_lmac_fifo_len(void *rpmd, int lmac_id);
int rpm_lmac_internal_loopback(void *rpmd, int lmac_id, bool enable);
void rpm_lmac_enadis_rx_pause_fwding(void *rpmd, int lmac_id, bool enable);
int rpm_lmac_get_pause_frm_status(void *cgxd, int lmac_id, u8 *tx_pause,
@@ -97,4 +128,7 @@ int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause,
u16 pfc_en);
int rpm_lmac_get_pfc_frm_cfg(void *rpmd, int lmac_id, u8 *tx_pause,
u8 *rx_pause);
+int rpm2_get_nr_lmacs(void *rpmd);
+bool is_dev_rpm2(void *rpmd);
+int rpm_get_fec_stats(void *cgxd, int lmac_id, struct cgx_fec_stats_rsp *rsp);
#endif /* RPM_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index f718cbd32a94..7f0a64731c67 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -410,9 +410,15 @@ struct rvu_fwdata {
u32 ptp_ext_tstamp;
#define FWDATA_RESERVED_MEM 1022
u64 reserved[FWDATA_RESERVED_MEM];
-#define CGX_MAX 5
+#define CGX_MAX 9
#define CGX_LMACS_MAX 4
- struct cgx_lmac_fwdata_s cgx_fw_data[CGX_MAX][CGX_LMACS_MAX];
+#define CGX_LMACS_USX 8
+ union {
+ struct cgx_lmac_fwdata_s
+ cgx_fw_data[CGX_MAX][CGX_LMACS_MAX];
+ struct cgx_lmac_fwdata_s
+ cgx_fw_data_usx[CGX_MAX][CGX_LMACS_USX];
+ };
/* Do not add new fields below this line */
};
@@ -478,7 +484,7 @@ struct rvu {
u8 cgx_mapped_pfs;
u8 cgx_cnt_max; /* CGX port count max */
u8 *pf2cgxlmac_map; /* pf to cgx_lmac map */
- u16 *cgxlmac2pf_map; /* bitmap of mapped pfs for
+ u64 *cgxlmac2pf_map; /* bitmap of mapped pfs for
* every cgx lmac port
*/
unsigned long pf_notify_bmap; /* Flags for PF notification */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
index addc69f4b65c..438b212fb54a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -55,8 +55,9 @@ bool is_mac_feature_supported(struct rvu *rvu, int pf, int feature)
return (cgx_features_get(cgxd) & feature);
}
+#define CGX_OFFSET(x) ((x) * rvu->hw->lmac_per_cgx)
/* Returns bitmap of mapped PFs */
-static u16 cgxlmac_to_pfmap(struct rvu *rvu, u8 cgx_id, u8 lmac_id)
+static u64 cgxlmac_to_pfmap(struct rvu *rvu, u8 cgx_id, u8 lmac_id)
{
return rvu->cgxlmac2pf_map[CGX_OFFSET(cgx_id) + lmac_id];
}
@@ -71,7 +72,8 @@ int cgxlmac_to_pf(struct rvu *rvu, int cgx_id, int lmac_id)
if (!pfmap)
return -ENODEV;
else
- return find_first_bit(&pfmap, 16);
+ return find_first_bit(&pfmap,
+ rvu->cgx_cnt_max * rvu->hw->lmac_per_cgx);
}
static u8 cgxlmac_id_to_bmap(u8 cgx_id, u8 lmac_id)
@@ -129,14 +131,14 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu)
if (!cgx_cnt_max)
return 0;
- if (cgx_cnt_max > 0xF || MAX_LMAC_PER_CGX > 0xF)
+ if (cgx_cnt_max > 0xF || rvu->hw->lmac_per_cgx > 0xF)
return -EINVAL;
/* Alloc map table
* An additional entry is required since PF id starts from 1 and
* hence entry at offset 0 is invalid.
*/
- size = (cgx_cnt_max * MAX_LMAC_PER_CGX + 1) * sizeof(u8);
+ size = (cgx_cnt_max * rvu->hw->lmac_per_cgx + 1) * sizeof(u8);
rvu->pf2cgxlmac_map = devm_kmalloc(rvu->dev, size, GFP_KERNEL);
if (!rvu->pf2cgxlmac_map)
return -ENOMEM;
@@ -145,9 +147,10 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu)
memset(rvu->pf2cgxlmac_map, 0xFF, size);
/* Reverse map table */
- rvu->cgxlmac2pf_map = devm_kzalloc(rvu->dev,
- cgx_cnt_max * MAX_LMAC_PER_CGX * sizeof(u16),
- GFP_KERNEL);
+ rvu->cgxlmac2pf_map =
+ devm_kzalloc(rvu->dev,
+ cgx_cnt_max * rvu->hw->lmac_per_cgx * sizeof(u64),
+ GFP_KERNEL);
if (!rvu->cgxlmac2pf_map)
return -ENOMEM;
@@ -156,7 +159,7 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu)
if (!rvu_cgx_pdata(cgx, rvu))
continue;
lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu));
- for_each_set_bit(iter, &lmac_bmap, MAX_LMAC_PER_CGX) {
+ for_each_set_bit(iter, &lmac_bmap, rvu->hw->lmac_per_cgx) {
lmac = cgx_get_lmacid(rvu_cgx_pdata(cgx, rvu),
iter);
rvu->pf2cgxlmac_map[pf] = cgxlmac_id_to_bmap(cgx, lmac);
@@ -235,7 +238,8 @@ static void cgx_notify_pfs(struct cgx_link_event *event, struct rvu *rvu)
pfmap = cgxlmac_to_pfmap(rvu, event->cgx_id, event->lmac_id);
do {
- pfid = find_first_bit(&pfmap, 16);
+ pfid = find_first_bit(&pfmap,
+ rvu->cgx_cnt_max * rvu->hw->lmac_per_cgx);
clear_bit(pfid, &pfmap);
/* check if notification is enabled */
@@ -310,7 +314,7 @@ static int cgx_lmac_event_handler_init(struct rvu *rvu)
if (!cgxd)
continue;
lmac_bmap = cgx_get_lmac_bmap(cgxd);
- for_each_set_bit(lmac, &lmac_bmap, MAX_LMAC_PER_CGX) {
+ for_each_set_bit(lmac, &lmac_bmap, rvu->hw->lmac_per_cgx) {
err = cgx_lmac_evh_register(&cb, cgxd, lmac);
if (err)
dev_err(rvu->dev,
@@ -396,7 +400,7 @@ int rvu_cgx_exit(struct rvu *rvu)
if (!cgxd)
continue;
lmac_bmap = cgx_get_lmac_bmap(cgxd);
- for_each_set_bit(lmac, &lmac_bmap, MAX_LMAC_PER_CGX)
+ for_each_set_bit(lmac, &lmac_bmap, rvu->hw->lmac_per_cgx)
cgx_lmac_evh_unregister(cgxd, lmac);
}
@@ -468,6 +472,7 @@ void rvu_cgx_disable_dmac_entries(struct rvu *rvu, u16 pcifunc)
{
int pf = rvu_get_pf(pcifunc);
int i = 0, lmac_count = 0;
+ struct mac_ops *mac_ops;
u8 max_dmac_filters;
u8 cgx_id, lmac_id;
void *cgx_dev;
@@ -483,7 +488,12 @@ void rvu_cgx_disable_dmac_entries(struct rvu *rvu, u16 pcifunc)
rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
cgx_dev = cgx_get_pdata(cgx_id);
lmac_count = cgx_get_lmac_cnt(cgx_dev);
- max_dmac_filters = MAX_DMAC_ENTRIES_PER_CGX / lmac_count;
+
+ mac_ops = get_mac_ops(cgx_dev);
+ if (!mac_ops)
+ return;
+
+ max_dmac_filters = mac_ops->dmac_filter_count / lmac_count;
for (i = 0; i < max_dmac_filters; i++)
cgx_lmac_addr_del(cgx_id, lmac_id, i);
@@ -569,6 +579,7 @@ int rvu_mbox_handler_cgx_fec_stats(struct rvu *rvu,
struct cgx_fec_stats_rsp *rsp)
{
int pf = rvu_get_pf(req->hdr.pcifunc);
+ struct mac_ops *mac_ops;
u8 cgx_idx, lmac;
void *cgxd;
@@ -577,7 +588,8 @@ int rvu_mbox_handler_cgx_fec_stats(struct rvu *rvu,
rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_idx, &lmac);
cgxd = rvu_cgx_pdata(cgx_idx, rvu);
- return cgx_get_fec_stats(cgxd, lmac, rsp);
+ mac_ops = get_mac_ops(cgxd);
+ return mac_ops->get_fec_stats(cgxd, lmac, rsp);
}
int rvu_mbox_handler_cgx_mac_addr_set(struct rvu *rvu,
@@ -1110,8 +1122,15 @@ int rvu_mbox_handler_cgx_get_aux_link_info(struct rvu *rvu, struct msg_req *req,
rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
- memcpy(&rsp->fwdata, &rvu->fwdata->cgx_fw_data[cgx_id][lmac_id],
- sizeof(struct cgx_lmac_fwdata_s));
+ if (rvu->hw->lmac_per_cgx == CGX_LMACS_USX)
+ memcpy(&rsp->fwdata,
+ &rvu->fwdata->cgx_fw_data_usx[cgx_id][lmac_id],
+ sizeof(struct cgx_lmac_fwdata_s));
+ else
+ memcpy(&rsp->fwdata,
+ &rvu->fwdata->cgx_fw_data[cgx_id][lmac_id],
+ sizeof(struct cgx_lmac_fwdata_s));
+
return 0;
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
index 0eb3085c4c21..fa280ebd3052 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
@@ -2613,7 +2613,7 @@ static void rvu_dbg_cgx_init(struct rvu *rvu)
rvu->rvu_dbg.cgx = debugfs_create_dir(dname,
rvu->rvu_dbg.cgx_root);
- for_each_set_bit(lmac_id, &lmac_bmap, MAX_LMAC_PER_CGX) {
+ for_each_set_bit(lmac_id, &lmac_bmap, rvu->hw->lmac_per_cgx) {
/* lmac debugfs dir */
sprintf(dname, "lmac%d", lmac_id);
rvu->rvu_dbg.lmac =
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index a62c1b322012..6b8747ebc08c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -3197,8 +3197,12 @@ static void rvu_get_lbk_link_max_frs(struct rvu *rvu, u16 *max_mtu)
static void rvu_get_lmac_link_max_frs(struct rvu *rvu, u16 *max_mtu)
{
- /* RPM supports FIFO len 128 KB */
- if (rvu_cgx_get_fifolen(rvu) == 0x20000)
+ int fifo_size = rvu_cgx_get_fifolen(rvu);
+
+ /* RPM supports FIFO len 128 KB and RPM2 supports double the
+ * FIFO len to accommodate 8 LMACS
+ */
+ if (fifo_size == 0x20000 || fifo_size == 0x40000)
*max_mtu = CN10K_LMAC_LINK_MAX_FRS;
else
*max_mtu = NIC_HW_MAX_FRS;
@@ -4109,7 +4113,7 @@ static void nix_link_config(struct rvu *rvu, int blkaddr,
/* Get LMAC id's from bitmap */
lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu));
- for_each_set_bit(iter, &lmac_bmap, MAX_LMAC_PER_CGX) {
+ for_each_set_bit(iter, &lmac_bmap, rvu->hw->lmac_per_cgx) {
lmac_fifo_len = rvu_cgx_get_lmac_fifolen(rvu, cgx, iter);
if (!lmac_fifo_len) {
dev_err(rvu->dev,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c
index 00aef8f5ac29..f69102d20c90 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c
@@ -1956,7 +1956,9 @@ int rvu_npc_exact_init(struct rvu *rvu)
/* Install SDP drop rule */
drop_mcam_idx = &table->num_drop_rules;
- max_lmac_cnt = rvu->cgx_cnt_max * MAX_LMAC_PER_CGX + PF_CGXMAP_BASE;
+ max_lmac_cnt = rvu->cgx_cnt_max * rvu->hw->lmac_per_cgx +
+ PF_CGXMAP_BASE;
+
for (i = PF_CGXMAP_BASE; i < max_lmac_cnt; i++) {
if (rvu->pf2cgxlmac_map[i] == 0xFF)
continue;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
index 0eb74e8c553d..0f8d1a69139f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -1268,6 +1268,39 @@ end:
return err;
}
+static void otx2_get_fec_stats(struct net_device *netdev,
+ struct ethtool_fec_stats *fec_stats)
+{
+ struct otx2_nic *pfvf = netdev_priv(netdev);
+ struct cgx_fw_data *rsp;
+
+ otx2_update_lmac_fec_stats(pfvf);
+
+ /* Report MAC FEC stats */
+ fec_stats->corrected_blocks.total = pfvf->hw.cgx_fec_corr_blks;
+ fec_stats->uncorrectable_blocks.total = pfvf->hw.cgx_fec_uncorr_blks;
+
+ rsp = otx2_get_fwdata(pfvf);
+ if (!IS_ERR(rsp) && rsp->fwdata.phy.misc.has_fec_stats &&
+ !otx2_get_phy_fec_stats(pfvf)) {
+ /* Fetch fwdata again because it's been recently populated with
+ * latest PHY FEC stats.
+ */
+ rsp = otx2_get_fwdata(pfvf);
+ if (!IS_ERR(rsp)) {
+ struct fec_stats_s *p = &rsp->fwdata.phy.fec_stats;
+
+ if (pfvf->linfo.fec == OTX2_FEC_BASER) {
+ fec_stats->corrected_blocks.total = p->brfec_corr_blks;
+ fec_stats->uncorrectable_blocks.total = p->brfec_uncorr_blks;
+ } else {
+ fec_stats->corrected_blocks.total = p->rsfec_corr_cws;
+ fec_stats->uncorrectable_blocks.total = p->rsfec_uncorr_cws;
+ }
+ }
+ }
+}
+
static const struct ethtool_ops otx2_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
ETHTOOL_COALESCE_MAX_FRAMES |
@@ -1298,6 +1331,7 @@ static const struct ethtool_ops otx2_ethtool_ops = {
.get_pauseparam = otx2_get_pauseparam,
.set_pauseparam = otx2_set_pauseparam,
.get_ts_info = otx2_get_ts_info,
+ .get_fec_stats = otx2_get_fec_stats,
.get_fecparam = otx2_get_fecparam,
.set_fecparam = otx2_set_fecparam,
.get_link_ksettings = otx2_get_link_ksettings,
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
index e421714524c2..044cc211424e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -1159,7 +1159,12 @@ int otx2_init_tc(struct otx2_nic *nic)
return err;
tc->flow_ht_params = tc_flow_ht_params;
- return rhashtable_init(&tc->flow_table, &tc->flow_ht_params);
+ err = rhashtable_init(&tc->flow_table, &tc->flow_ht_params);
+ if (err) {
+ kfree(tc->tc_entries_bitmap);
+ tc->tc_entries_bitmap = NULL;
+ }
+ return err;
}
EXPORT_SYMBOL(otx2_init_tc);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 8b93dab79141..e3de9a53b2d9 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -4593,6 +4593,7 @@ static const struct mtk_soc_data mt7986_data = {
.hw_features = MTK_HW_FEATURES,
.required_clks = MT7986_CLKS_BITMAP,
.required_pctl = false,
+ .offload_version = 2,
.hash_offset = 4,
.foe_entry_size = sizeof(struct mtk_foe_entry),
.txrx = {
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
index d041615b2bac..a6271449617f 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed.c
@@ -174,9 +174,10 @@ mtk_wed_wo_reset(struct mtk_wed_device *dev)
mtk_wdma_tx_reset(dev);
mtk_wed_reset(dev, MTK_WED_RESET_WED);
- mtk_wed_mcu_send_msg(wo, MTK_WED_MODULE_ID_WO,
- MTK_WED_WO_CMD_CHANGE_STATE, &state,
- sizeof(state), false);
+ if (mtk_wed_mcu_send_msg(wo, MTK_WED_MODULE_ID_WO,
+ MTK_WED_WO_CMD_CHANGE_STATE, &state,
+ sizeof(state), false))
+ return;
if (readx_poll_timeout(mtk_wed_wo_read_status, dev, val,
val == MTK_WED_WOIF_DISABLE_DONE,
@@ -576,12 +577,10 @@ mtk_wed_deinit(struct mtk_wed_device *dev)
}
static void
-mtk_wed_detach(struct mtk_wed_device *dev)
+__mtk_wed_detach(struct mtk_wed_device *dev)
{
struct mtk_wed_hw *hw = dev->hw;
- mutex_lock(&hw_lock);
-
mtk_wed_deinit(dev);
mtk_wdma_rx_reset(dev);
@@ -590,9 +589,11 @@ mtk_wed_detach(struct mtk_wed_device *dev)
mtk_wed_free_tx_rings(dev);
if (mtk_wed_get_rx_capa(dev)) {
- mtk_wed_wo_reset(dev);
+ if (hw->wed_wo)
+ mtk_wed_wo_reset(dev);
mtk_wed_free_rx_rings(dev);
- mtk_wed_wo_deinit(hw);
+ if (hw->wed_wo)
+ mtk_wed_wo_deinit(hw);
}
if (dev->wlan.bus_type == MTK_WED_BUS_PCIE) {
@@ -612,6 +613,13 @@ mtk_wed_detach(struct mtk_wed_device *dev)
module_put(THIS_MODULE);
hw->wed_dev = NULL;
+}
+
+static void
+mtk_wed_detach(struct mtk_wed_device *dev)
+{
+ mutex_lock(&hw_lock);
+ __mtk_wed_detach(dev);
mutex_unlock(&hw_lock);
}
@@ -1210,7 +1218,8 @@ mtk_wed_wdma_rx_ring_setup(struct mtk_wed_device *dev, int idx, int size,
}
static int
-mtk_wed_wdma_tx_ring_setup(struct mtk_wed_device *dev, int idx, int size)
+mtk_wed_wdma_tx_ring_setup(struct mtk_wed_device *dev, int idx, int size,
+ bool reset)
{
u32 desc_size = sizeof(struct mtk_wdma_desc) * dev->hw->version;
struct mtk_wed_ring *wdma;
@@ -1219,8 +1228,8 @@ mtk_wed_wdma_tx_ring_setup(struct mtk_wed_device *dev, int idx, int size)
return -EINVAL;
wdma = &dev->tx_wdma[idx];
- if (mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE, desc_size,
- true))
+ if (!reset && mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE,
+ desc_size, true))
return -ENOMEM;
wdma_w32(dev, MTK_WDMA_RING_TX(idx) + MTK_WED_RING_OFS_BASE,
@@ -1230,6 +1239,9 @@ mtk_wed_wdma_tx_ring_setup(struct mtk_wed_device *dev, int idx, int size)
wdma_w32(dev, MTK_WDMA_RING_TX(idx) + MTK_WED_RING_OFS_CPU_IDX, 0);
wdma_w32(dev, MTK_WDMA_RING_TX(idx) + MTK_WED_RING_OFS_DMA_IDX, 0);
+ if (reset)
+ mtk_wed_ring_reset(wdma, MTK_WED_WDMA_RING_SIZE, true);
+
if (!idx) {
wed_w32(dev, MTK_WED_WDMA_RING_TX + MTK_WED_RING_OFS_BASE,
wdma->desc_phys);
@@ -1490,8 +1502,10 @@ mtk_wed_attach(struct mtk_wed_device *dev)
ret = mtk_wed_wo_init(hw);
}
out:
- if (ret)
- mtk_wed_detach(dev);
+ if (ret) {
+ dev_err(dev->hw->dev, "failed to attach wed device\n");
+ __mtk_wed_detach(dev);
+ }
unlock:
mutex_unlock(&hw_lock);
@@ -1569,18 +1583,20 @@ mtk_wed_txfree_ring_setup(struct mtk_wed_device *dev, void __iomem *regs)
}
static int
-mtk_wed_rx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs)
+mtk_wed_rx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs,
+ bool reset)
{
struct mtk_wed_ring *ring = &dev->rx_ring[idx];
if (WARN_ON(idx >= ARRAY_SIZE(dev->rx_ring)))
return -EINVAL;
- if (mtk_wed_ring_alloc(dev, ring, MTK_WED_RX_RING_SIZE,
- sizeof(*ring->desc), false))
+ if (!reset && mtk_wed_ring_alloc(dev, ring, MTK_WED_RX_RING_SIZE,
+ sizeof(*ring->desc), false))
return -ENOMEM;
- if (mtk_wed_wdma_tx_ring_setup(dev, idx, MTK_WED_WDMA_RING_SIZE))
+ if (mtk_wed_wdma_tx_ring_setup(dev, idx, MTK_WED_WDMA_RING_SIZE,
+ reset))
return -ENOMEM;
ring->reg_base = MTK_WED_RING_RX_DATA(idx);
diff --git a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c
index f9539e6233c9..6bad0d262f28 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c
@@ -207,6 +207,9 @@ int mtk_wed_mcu_msg_update(struct mtk_wed_device *dev, int id, void *data,
if (dev->hw->version == 1)
return 0;
+ if (WARN_ON(!wo))
+ return -ENODEV;
+
return mtk_wed_mcu_send_msg(wo, MTK_WED_MODULE_ID_WO, id, data, len,
true);
}
diff --git a/drivers/net/ethernet/mediatek/mtk_wed_wo.c b/drivers/net/ethernet/mediatek/mtk_wed_wo.c
index a219da85f4db..a0a39643caf7 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed_wo.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed_wo.c
@@ -408,8 +408,10 @@ mtk_wed_wo_hardware_init(struct mtk_wed_wo *wo)
return -ENODEV;
wo->mmio.regs = syscon_regmap_lookup_by_phandle(np, NULL);
- if (IS_ERR_OR_NULL(wo->mmio.regs))
- return PTR_ERR(wo->mmio.regs);
+ if (IS_ERR(wo->mmio.regs)) {
+ ret = PTR_ERR(wo->mmio.regs);
+ goto error_put;
+ }
wo->mmio.irq = irq_of_parse_and_map(np, 0);
wo->mmio.irq_mask = MTK_WED_WO_ALL_INT_MASK;
@@ -457,7 +459,8 @@ mtk_wed_wo_hardware_init(struct mtk_wed_wo *wo)
error:
devm_free_irq(wo->hw->dev, wo->mmio.irq, wo);
-
+error_put:
+ of_node_put(np);
return ret;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 43a4102e9c09..c5758637b7be 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -65,7 +65,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
ring->size = size;
ring->size_mask = size - 1;
ring->sp_stride = stride;
- ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS;
+ ring->full_size = ring->size - HEADROOM - MLX4_MAX_DESC_TXBBS;
tmp = size * sizeof(struct mlx4_en_tx_info);
ring->tx_info = kvmalloc_node(tmp, GFP_KERNEL, node);
@@ -77,9 +77,11 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n",
ring->tx_info, tmp);
- ring->bounce_buf = kmalloc_node(MAX_DESC_SIZE, GFP_KERNEL, node);
+ ring->bounce_buf = kmalloc_node(MLX4_TX_BOUNCE_BUFFER_SIZE,
+ GFP_KERNEL, node);
if (!ring->bounce_buf) {
- ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL);
+ ring->bounce_buf = kmalloc(MLX4_TX_BOUNCE_BUFFER_SIZE,
+ GFP_KERNEL);
if (!ring->bounce_buf) {
err = -ENOMEM;
goto err_info;
@@ -909,11 +911,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
/* Align descriptor to TXBB size */
desc_size = ALIGN(real_size, TXBB_SIZE);
nr_txbb = desc_size >> LOG_TXBB_SIZE;
- if (unlikely(nr_txbb > MAX_DESC_TXBBS)) {
- if (netif_msg_tx_err(priv))
- en_warn(priv, "Oversized header or SG list\n");
- goto tx_drop_count;
- }
bf_ok = ring->bf_enabled;
if (skb_vlan_tag_present(skb)) {
@@ -941,6 +938,11 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
if (likely(index + nr_txbb <= ring->size))
tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
else {
+ if (unlikely(nr_txbb > MLX4_MAX_DESC_TXBBS)) {
+ if (netif_msg_tx_err(priv))
+ en_warn(priv, "Oversized header or SG list\n");
+ goto tx_drop_count;
+ }
tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf;
bounce = true;
bf_ok = false;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index e132ff4c82f2..3d4226ddba5e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -89,9 +89,19 @@
#define MLX4_EN_FILTER_HASH_SHIFT 4
#define MLX4_EN_FILTER_EXPIRY_QUOTA 60
-/* Typical TSO descriptor with 16 gather entries is 352 bytes... */
-#define MAX_DESC_SIZE 512
-#define MAX_DESC_TXBBS (MAX_DESC_SIZE / TXBB_SIZE)
+#define CTRL_SIZE sizeof(struct mlx4_wqe_ctrl_seg)
+#define DS_SIZE sizeof(struct mlx4_wqe_data_seg)
+
+/* Maximal size of the bounce buffer:
+ * 256 bytes for LSO headers.
+ * CTRL_SIZE for control desc.
+ * DS_SIZE if skb->head contains some payload.
+ * MAX_SKB_FRAGS frags.
+ */
+#define MLX4_TX_BOUNCE_BUFFER_SIZE \
+ ALIGN(256 + CTRL_SIZE + DS_SIZE + MAX_SKB_FRAGS * DS_SIZE, TXBB_SIZE)
+
+#define MLX4_MAX_DESC_TXBBS (MLX4_TX_BOUNCE_BUFFER_SIZE / TXBB_SIZE)
/*
* OS related constants and tunables
@@ -217,9 +227,7 @@ struct mlx4_en_tx_info {
#define MLX4_EN_BIT_DESC_OWN 0x80000000
-#define CTRL_SIZE sizeof(struct mlx4_wqe_ctrl_seg)
#define MLX4_EN_MEMTYPE_PAD 0x100
-#define DS_SIZE sizeof(struct mlx4_wqe_data_seg)
struct mlx4_en_tx_desc {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index a22c32aabf11..cd4a1ab0ea78 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -111,6 +111,7 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o
steering/dr_ste_v2.o \
steering/dr_cmd.o steering/dr_fw.o \
steering/dr_action.o steering/fs_dr.o \
+ steering/dr_definer.o \
steering/dr_dbg.o lib/smfs.o
#
# SF device
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 751bc4a9edcf..ddb197970c22 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -314,6 +314,10 @@ static const struct devlink_ops mlx5_devlink_ops = {
.rate_node_new = mlx5_esw_devlink_rate_node_new,
.rate_node_del = mlx5_esw_devlink_rate_node_del,
.rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set,
+ .port_fn_roce_get = mlx5_devlink_port_fn_roce_get,
+ .port_fn_roce_set = mlx5_devlink_port_fn_roce_set,
+ .port_fn_migratable_get = mlx5_devlink_port_fn_migratable_get,
+ .port_fn_migratable_set = mlx5_devlink_port_fn_migratable_set,
#endif
#ifdef CONFIG_MLX5_SF_MANAGER
.port_new = mlx5_devlink_sf_port_new,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
index c5bb79a4fa57..2732128e7a6e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -228,6 +228,17 @@ const char *parse_fs_hdrs(struct trace_seq *p,
return ret;
}
+static const char
+*fs_dest_range_field_to_str(enum mlx5_flow_dest_range_field field)
+{
+ switch (field) {
+ case MLX5_FLOW_DEST_RANGE_FIELD_PKT_LEN:
+ return "packet len";
+ default:
+ return "unknown dest range field";
+ }
+}
+
const char *parse_fs_dst(struct trace_seq *p,
const struct mlx5_flow_destination *dst,
u32 counter_id)
@@ -259,6 +270,11 @@ const char *parse_fs_dst(struct trace_seq *p,
case MLX5_FLOW_DESTINATION_TYPE_PORT:
trace_seq_printf(p, "port\n");
break;
+ case MLX5_FLOW_DESTINATION_TYPE_RANGE:
+ trace_seq_printf(p, "field=%s min=%d max=%d\n",
+ fs_dest_range_field_to_str(dst->range.field),
+ dst->range.min, dst->range.max);
+ break;
case MLX5_FLOW_DESTINATION_TYPE_NONE:
trace_seq_printf(p, "none\n");
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
index 21aab96357b5..a278f52d52b0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
@@ -28,4 +28,5 @@ tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state,
struct mlx5e_tc_act mlx5e_tc_act_accept = {
.can_offload = tc_act_can_offload_accept,
.parse_action = tc_act_parse_accept,
+ .is_terminating_action = true,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
index 3337241cfd84..eba0c8698926 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
@@ -11,7 +11,7 @@ static struct mlx5e_tc_act *tc_acts_fdb[NUM_FLOW_ACTIONS] = {
[FLOW_ACTION_DROP] = &mlx5e_tc_act_drop,
[FLOW_ACTION_TRAP] = &mlx5e_tc_act_trap,
[FLOW_ACTION_GOTO] = &mlx5e_tc_act_goto,
- [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_mirred,
+ [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_redirect,
[FLOW_ACTION_MIRRED] = &mlx5e_tc_act_mirred,
[FLOW_ACTION_REDIRECT_INGRESS] = &mlx5e_tc_act_redirect_ingress,
[FLOW_ACTION_VLAN_PUSH] = &mlx5e_tc_act_vlan,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
index e1570ff056ae..8346557eeaf6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
@@ -32,6 +32,11 @@ struct mlx5e_tc_act_parse_state {
struct mlx5_tc_ct_priv *ct_priv;
};
+struct mlx5e_tc_act_branch_ctrl {
+ enum flow_action_id act_id;
+ u32 extval;
+};
+
struct mlx5e_tc_act {
bool (*can_offload)(struct mlx5e_tc_act_parse_state *parse_state,
const struct flow_action_entry *act,
@@ -60,6 +65,12 @@ struct mlx5e_tc_act {
int (*stats_action)(struct mlx5e_priv *priv,
struct flow_offload_action *fl_act);
+
+ bool (*get_branch_ctrl)(const struct flow_action_entry *act,
+ struct mlx5e_tc_act_branch_ctrl *cond_true,
+ struct mlx5e_tc_act_branch_ctrl *cond_false);
+
+ bool is_terminating_action;
};
struct mlx5e_tc_flow_action {
@@ -81,6 +92,7 @@ extern struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle;
extern struct mlx5e_tc_act mlx5e_tc_act_mpls_push;
extern struct mlx5e_tc_act mlx5e_tc_act_mpls_pop;
extern struct mlx5e_tc_act mlx5e_tc_act_mirred;
+extern struct mlx5e_tc_act mlx5e_tc_act_redirect;
extern struct mlx5e_tc_act mlx5e_tc_act_mirred_nic;
extern struct mlx5e_tc_act mlx5e_tc_act_ct;
extern struct mlx5e_tc_act mlx5e_tc_act_sample;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
index dd025a95c439..7d16aeabb119 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
@@ -27,4 +27,5 @@ tc_act_parse_drop(struct mlx5e_tc_act_parse_state *parse_state,
struct mlx5e_tc_act mlx5e_tc_act_drop = {
.can_offload = tc_act_can_offload_drop,
.parse_action = tc_act_parse_drop,
+ .is_terminating_action = true,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c
index 25174f68613e..0923e6db2d0a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c
@@ -121,4 +121,5 @@ struct mlx5e_tc_act mlx5e_tc_act_goto = {
.can_offload = tc_act_can_offload_goto,
.parse_action = tc_act_parse_goto,
.post_parse = tc_act_post_parse_goto,
+ .is_terminating_action = true,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
index 4ac7de3f6afa..78c427b38048 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
@@ -334,4 +334,11 @@ tc_act_parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
struct mlx5e_tc_act mlx5e_tc_act_mirred = {
.can_offload = tc_act_can_offload_mirred,
.parse_action = tc_act_parse_mirred,
+ .is_terminating_action = false,
+};
+
+struct mlx5e_tc_act mlx5e_tc_act_redirect = {
+ .can_offload = tc_act_can_offload_mirred,
+ .parse_action = tc_act_parse_mirred,
+ .is_terminating_action = true,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c
index 90b4c1b34776..7f409692b18f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c
@@ -48,4 +48,5 @@ tc_act_parse_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state,
struct mlx5e_tc_act mlx5e_tc_act_mirred_nic = {
.can_offload = tc_act_can_offload_mirred_nic,
.parse_action = tc_act_parse_mirred_nic,
+ .is_terminating_action = true,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
index c8e5ca65bb6e..512d43148922 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
@@ -3,6 +3,45 @@
#include "act.h"
#include "en/tc_priv.h"
+#include "fs_core.h"
+
+static bool police_act_validate_control(enum flow_action_id act_id,
+ struct netlink_ext_ack *extack)
+{
+ if (act_id != FLOW_ACTION_PIPE &&
+ act_id != FLOW_ACTION_ACCEPT &&
+ act_id != FLOW_ACTION_JUMP &&
+ act_id != FLOW_ACTION_DROP) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when conform-exceed action is not pipe, ok, jump or drop");
+ return false;
+ }
+
+ return true;
+}
+
+static int police_act_validate(const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ if (!police_act_validate_control(act->police.exceed.act_id, extack) ||
+ !police_act_validate_control(act->police.notexceed.act_id, extack))
+ return -EOPNOTSUPP;
+
+ if (act->police.peakrate_bytes_ps ||
+ act->police.avrate || act->police.overhead) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when peakrate/avrate/overhead is configured");
+ return -EOPNOTSUPP;
+ }
+
+ if (act->police.rate_pkt_ps) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "QoS offload not support packets per second");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
static bool
tc_act_can_offload_police(struct mlx5e_tc_act_parse_state *parse_state,
@@ -10,14 +49,10 @@ tc_act_can_offload_police(struct mlx5e_tc_act_parse_state *parse_state,
int act_index,
struct mlx5_flow_attr *attr)
{
- if (act->police.notexceed.act_id != FLOW_ACTION_PIPE &&
- act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) {
- NL_SET_ERR_MSG_MOD(parse_state->extack,
- "Offload not supported when conform action is not pipe or ok");
- return false;
- }
- if (mlx5e_policer_validate(parse_state->flow_action, act,
- parse_state->extack))
+ int err;
+
+ err = police_act_validate(act, parse_state->extack);
+ if (err)
return false;
return !!mlx5e_get_flow_meters(parse_state->flow->priv->mdev);
@@ -37,6 +72,8 @@ fill_meter_params_from_act(const struct flow_action_entry *act,
params->mode = MLX5_RATE_LIMIT_PPS;
params->rate = act->police.rate_pkt_ps;
params->burst = act->police.burst_pkt;
+ } else if (act->police.mtu) {
+ params->mtu = act->police.mtu;
} else {
return -EOPNOTSUPP;
}
@@ -50,14 +87,25 @@ tc_act_parse_police(struct mlx5e_tc_act_parse_state *parse_state,
struct mlx5e_priv *priv,
struct mlx5_flow_attr *attr)
{
+ enum mlx5_flow_namespace_type ns = mlx5e_get_flow_namespace(parse_state->flow);
+ struct mlx5e_flow_meter_params *params = &attr->meter_attr.params;
int err;
- err = fill_meter_params_from_act(act, &attr->meter_attr.params);
+ err = fill_meter_params_from_act(act, params);
if (err)
return err;
- attr->action |= MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO;
- attr->exe_aso_type = MLX5_EXE_ASO_FLOW_METER;
+ if (params->mtu) {
+ if (!(mlx5_fs_get_capabilities(priv->mdev, ns) &
+ MLX5_FLOW_STEERING_CAP_MATCH_RANGES))
+ return -EOPNOTSUPP;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ attr->flags |= MLX5_ATTR_FLAG_MTU;
+ } else {
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO;
+ attr->exe_aso_type = MLX5_EXE_ASO_FLOW_METER;
+ }
return 0;
}
@@ -79,7 +127,7 @@ tc_act_police_offload(struct mlx5e_priv *priv,
struct mlx5e_flow_meter_handle *meter;
int err = 0;
- err = mlx5e_policer_validate(&fl_act->action, act, fl_act->extack);
+ err = police_act_validate(act, fl_act->extack);
if (err)
return err;
@@ -147,6 +195,19 @@ tc_act_police_stats(struct mlx5e_priv *priv,
return 0;
}
+static bool
+tc_act_police_get_branch_ctrl(const struct flow_action_entry *act,
+ struct mlx5e_tc_act_branch_ctrl *cond_true,
+ struct mlx5e_tc_act_branch_ctrl *cond_false)
+{
+ cond_true->act_id = act->police.notexceed.act_id;
+ cond_true->extval = act->police.notexceed.extval;
+
+ cond_false->act_id = act->police.exceed.act_id;
+ cond_false->extval = act->police.exceed.extval;
+ return true;
+}
+
struct mlx5e_tc_act mlx5e_tc_act_police = {
.can_offload = tc_act_can_offload_police,
.parse_action = tc_act_parse_police,
@@ -154,4 +215,5 @@ struct mlx5e_tc_act mlx5e_tc_act_police = {
.offload_action = tc_act_police_offload,
.destroy_action = tc_act_police_destroy,
.stats_action = tc_act_police_stats,
+ .get_branch_ctrl = tc_act_police_get_branch_ctrl,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c
index 25cd449e8aad..78af8a3175bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c
@@ -240,7 +240,7 @@ mlx5e_flow_meter_destroy_aso_obj(struct mlx5_core_dev *mdev, u32 obj_id)
}
static struct mlx5e_flow_meter_handle *
-__mlx5e_flow_meter_alloc(struct mlx5e_flow_meters *flow_meters)
+__mlx5e_flow_meter_alloc(struct mlx5e_flow_meters *flow_meters, bool alloc_aso)
{
struct mlx5_core_dev *mdev = flow_meters->mdev;
struct mlx5e_flow_meter_aso_obj *meters_obj;
@@ -256,16 +256,19 @@ __mlx5e_flow_meter_alloc(struct mlx5e_flow_meters *flow_meters)
counter = mlx5_fc_create(mdev, true);
if (IS_ERR(counter)) {
err = PTR_ERR(counter);
- goto err_red_counter;
+ goto err_drop_counter;
}
- meter->red_counter = counter;
+ meter->drop_counter = counter;
counter = mlx5_fc_create(mdev, true);
if (IS_ERR(counter)) {
err = PTR_ERR(counter);
- goto err_green_counter;
+ goto err_act_counter;
}
- meter->green_counter = counter;
+ meter->act_counter = counter;
+
+ if (!alloc_aso)
+ goto no_aso;
meters_obj = list_first_entry_or_null(&flow_meters->partial_list,
struct mlx5e_flow_meter_aso_obj,
@@ -299,11 +302,12 @@ __mlx5e_flow_meter_alloc(struct mlx5e_flow_meters *flow_meters)
}
bitmap_set(meters_obj->meters_map, pos, 1);
- meter->flow_meters = flow_meters;
meter->meters_obj = meters_obj;
meter->obj_id = meters_obj->base_id + pos / 2;
meter->idx = pos % 2;
+no_aso:
+ meter->flow_meters = flow_meters;
mlx5_core_dbg(mdev, "flow meter allocated, obj_id=0x%x, index=%d\n",
meter->obj_id, meter->idx);
@@ -312,10 +316,10 @@ __mlx5e_flow_meter_alloc(struct mlx5e_flow_meters *flow_meters)
err_mem:
mlx5e_flow_meter_destroy_aso_obj(mdev, id);
err_create:
- mlx5_fc_destroy(mdev, meter->green_counter);
-err_green_counter:
- mlx5_fc_destroy(mdev, meter->red_counter);
-err_red_counter:
+ mlx5_fc_destroy(mdev, meter->act_counter);
+err_act_counter:
+ mlx5_fc_destroy(mdev, meter->drop_counter);
+err_drop_counter:
kfree(meter);
return ERR_PTR(err);
}
@@ -328,8 +332,11 @@ __mlx5e_flow_meter_free(struct mlx5e_flow_meter_handle *meter)
struct mlx5e_flow_meter_aso_obj *meters_obj;
int n, pos;
- mlx5_fc_destroy(mdev, meter->green_counter);
- mlx5_fc_destroy(mdev, meter->red_counter);
+ mlx5_fc_destroy(mdev, meter->act_counter);
+ mlx5_fc_destroy(mdev, meter->drop_counter);
+
+ if (meter->params.mtu)
+ goto out_no_aso;
meters_obj = meter->meters_obj;
pos = (meter->obj_id - meters_obj->base_id) * 2 + meter->idx;
@@ -344,6 +351,7 @@ __mlx5e_flow_meter_free(struct mlx5e_flow_meter_handle *meter)
list_add(&meters_obj->entry, &flow_meters->partial_list);
}
+out_no_aso:
mlx5_core_dbg(mdev, "flow meter freed, obj_id=0x%x, index=%d\n",
meter->obj_id, meter->idx);
kfree(meter);
@@ -408,12 +416,13 @@ mlx5e_tc_meter_alloc(struct mlx5e_flow_meters *flow_meters,
{
struct mlx5e_flow_meter_handle *meter;
- meter = __mlx5e_flow_meter_alloc(flow_meters);
+ meter = __mlx5e_flow_meter_alloc(flow_meters, !params->mtu);
if (IS_ERR(meter))
return meter;
hash_add(flow_meters->hashtbl, &meter->hlist, params->index);
meter->params.index = params->index;
+ meter->params.mtu = params->mtu;
meter->refcnt++;
return meter;
@@ -574,8 +583,8 @@ mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter,
u64 bytes1, packets1, lastuse1;
u64 bytes2, packets2, lastuse2;
- mlx5_fc_query_cached(meter->green_counter, &bytes1, &packets1, &lastuse1);
- mlx5_fc_query_cached(meter->red_counter, &bytes2, &packets2, &lastuse2);
+ mlx5_fc_query_cached(meter->act_counter, &bytes1, &packets1, &lastuse1);
+ mlx5_fc_query_cached(meter->drop_counter, &bytes2, &packets2, &lastuse2);
*bytes = bytes1 + bytes2;
*packets = packets1 + packets2;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h
index 6de6e8a16327..9b795cd106bb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h
@@ -20,6 +20,7 @@ struct mlx5e_flow_meter_params {
u32 index;
u64 rate;
u64 burst;
+ u32 mtu;
};
struct mlx5e_flow_meter_handle {
@@ -32,8 +33,8 @@ struct mlx5e_flow_meter_handle {
struct hlist_node hlist;
struct mlx5e_flow_meter_params params;
- struct mlx5_fc *green_counter;
- struct mlx5_fc *red_counter;
+ struct mlx5_fc *act_counter;
+ struct mlx5_fc *drop_counter;
};
struct mlx5e_meter_attr {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c
index 8b77e822810e..8d7d761482d2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c
@@ -8,23 +8,56 @@
#define MLX5_PACKET_COLOR_BITS MLX5_REG_MAPPING_MBITS(PACKET_COLOR_TO_REG)
#define MLX5_PACKET_COLOR_MASK MLX5_REG_MAPPING_MASK(PACKET_COLOR_TO_REG)
-struct mlx5e_post_meter_priv {
+struct mlx5e_post_meter_rate_table {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg;
+ struct mlx5_flow_handle *green_rule;
+ struct mlx5_flow_attr *green_attr;
+ struct mlx5_flow_handle *red_rule;
+ struct mlx5_flow_attr *red_attr;
+};
+
+struct mlx5e_post_meter_mtu_table {
struct mlx5_flow_table *ft;
struct mlx5_flow_group *fg;
- struct mlx5_flow_handle *fwd_green_rule;
- struct mlx5_flow_handle *drop_red_rule;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+};
+
+struct mlx5e_post_meter_mtu_tables {
+ struct mlx5e_post_meter_mtu_table green_table;
+ struct mlx5e_post_meter_mtu_table red_table;
+};
+
+struct mlx5e_post_meter_priv {
+ enum mlx5e_post_meter_type type;
+ union {
+ struct mlx5e_post_meter_rate_table rate_steering_table;
+ struct mlx5e_post_meter_mtu_tables mtu_tables;
+ };
};
struct mlx5_flow_table *
mlx5e_post_meter_get_ft(struct mlx5e_post_meter_priv *post_meter)
{
- return post_meter->ft;
+ return post_meter->rate_steering_table.ft;
}
-static int
+struct mlx5_flow_table *
+mlx5e_post_meter_get_mtu_true_ft(struct mlx5e_post_meter_priv *post_meter)
+{
+ return post_meter->mtu_tables.green_table.ft;
+}
+
+struct mlx5_flow_table *
+mlx5e_post_meter_get_mtu_false_ft(struct mlx5e_post_meter_priv *post_meter)
+{
+ return post_meter->mtu_tables.red_table.ft;
+}
+
+static struct mlx5_flow_table *
mlx5e_post_meter_table_create(struct mlx5e_priv *priv,
- enum mlx5_flow_namespace_type ns_type,
- struct mlx5e_post_meter_priv *post_meter)
+ enum mlx5_flow_namespace_type ns_type)
{
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_namespace *root_ns;
@@ -32,7 +65,7 @@ mlx5e_post_meter_table_create(struct mlx5e_priv *priv,
root_ns = mlx5_get_flow_namespace(priv->mdev, ns_type);
if (!root_ns) {
mlx5_core_warn(priv->mdev, "Failed to get namespace for flow meter\n");
- return -EOPNOTSUPP;
+ return ERR_PTR(-EOPNOTSUPP);
}
ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
@@ -40,19 +73,14 @@ mlx5e_post_meter_table_create(struct mlx5e_priv *priv,
ft_attr.max_fte = 2;
ft_attr.level = 1;
- post_meter->ft = mlx5_create_flow_table(root_ns, &ft_attr);
- if (IS_ERR(post_meter->ft)) {
- mlx5_core_warn(priv->mdev, "Failed to create post_meter table\n");
- return PTR_ERR(post_meter->ft);
- }
-
- return 0;
+ return mlx5_create_flow_table(root_ns, &ft_attr);
}
static int
-mlx5e_post_meter_fg_create(struct mlx5e_priv *priv,
- struct mlx5e_post_meter_priv *post_meter)
+mlx5e_post_meter_rate_fg_create(struct mlx5e_priv *priv,
+ struct mlx5e_post_meter_priv *post_meter)
{
+ struct mlx5e_post_meter_rate_table *table = &post_meter->rate_steering_table;
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
void *misc2, *match_criteria;
u32 *flow_group_in;
@@ -71,25 +99,58 @@ mlx5e_post_meter_fg_create(struct mlx5e_priv *priv,
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
- post_meter->fg = mlx5_create_flow_group(post_meter->ft, flow_group_in);
- if (IS_ERR(post_meter->fg)) {
+ table->fg = mlx5_create_flow_group(table->ft, flow_group_in);
+ if (IS_ERR(table->fg)) {
mlx5_core_warn(priv->mdev, "Failed to create post_meter flow group\n");
- err = PTR_ERR(post_meter->fg);
+ err = PTR_ERR(table->fg);
}
kvfree(flow_group_in);
return err;
}
+static struct mlx5_flow_handle *
+mlx5e_post_meter_add_rule(struct mlx5e_priv *priv,
+ struct mlx5e_post_meter_priv *post_meter,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_fc *act_counter,
+ struct mlx5_fc *drop_counter)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_flow_handle *ret;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_DROP)
+ attr->counter = drop_counter;
+ else
+ attr->counter = act_counter;
+
+ attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
+ attr->outer_match_level = MLX5_MATCH_NONE;
+ attr->chain = 0;
+ attr->prio = 0;
+
+ ret = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+
+ /* We did not create the counter, so we can't delete it.
+ * Avoid freeing the counter when the attr is deleted in free_branching_attr
+ */
+ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
+
+ return ret;
+}
+
static int
-mlx5e_post_meter_rules_create(struct mlx5e_priv *priv,
- struct mlx5e_post_meter_priv *post_meter,
- struct mlx5e_post_act *post_act,
- struct mlx5_fc *green_counter,
- struct mlx5_fc *red_counter)
-{
- struct mlx5_flow_destination dest[2] = {};
- struct mlx5_flow_act flow_act = {};
+mlx5e_post_meter_rate_rules_create(struct mlx5e_priv *priv,
+ struct mlx5e_post_meter_priv *post_meter,
+ struct mlx5e_post_act *post_act,
+ struct mlx5_fc *act_counter,
+ struct mlx5_fc *drop_counter,
+ struct mlx5_flow_attr *green_attr,
+ struct mlx5_flow_attr *red_attr)
+{
+ struct mlx5e_post_meter_rate_table *table = &post_meter->rate_steering_table;
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
int err;
@@ -100,72 +161,242 @@ mlx5e_post_meter_rules_create(struct mlx5e_priv *priv,
mlx5e_tc_match_to_reg_match(spec, PACKET_COLOR_TO_REG,
MLX5_FLOW_METER_COLOR_RED, MLX5_PACKET_COLOR_MASK);
- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
- flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
- dest[0].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dest[0].counter_id = mlx5_fc_id(red_counter);
-
- rule = mlx5_add_flow_rules(post_meter->ft, spec, &flow_act, dest, 1);
+ red_attr->ft = post_meter->rate_steering_table.ft;
+ rule = mlx5e_post_meter_add_rule(priv, post_meter, spec, red_attr,
+ act_counter, drop_counter);
if (IS_ERR(rule)) {
- mlx5_core_warn(priv->mdev, "Failed to create post_meter flow drop rule\n");
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter exceed rule\n");
err = PTR_ERR(rule);
goto err_red;
}
- post_meter->drop_red_rule = rule;
+ table->red_rule = rule;
+ table->red_attr = red_attr;
mlx5e_tc_match_to_reg_match(spec, PACKET_COLOR_TO_REG,
MLX5_FLOW_METER_COLOR_GREEN, MLX5_PACKET_COLOR_MASK);
- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
- MLX5_FLOW_CONTEXT_ACTION_COUNT;
- dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest[0].ft = mlx5e_tc_post_act_get_ft(post_act);
- dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dest[1].counter_id = mlx5_fc_id(green_counter);
-
- rule = mlx5_add_flow_rules(post_meter->ft, spec, &flow_act, dest, 2);
+ green_attr->ft = post_meter->rate_steering_table.ft;
+ rule = mlx5e_post_meter_add_rule(priv, post_meter, spec, green_attr,
+ act_counter, drop_counter);
if (IS_ERR(rule)) {
- mlx5_core_warn(priv->mdev, "Failed to create post_meter flow fwd rule\n");
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter notexceed rule\n");
err = PTR_ERR(rule);
goto err_green;
}
- post_meter->fwd_green_rule = rule;
+ table->green_rule = rule;
+ table->green_attr = green_attr;
kvfree(spec);
return 0;
err_green:
- mlx5_del_flow_rules(post_meter->drop_red_rule);
+ mlx5_del_flow_rules(table->red_rule);
err_red:
kvfree(spec);
return err;
}
static void
-mlx5e_post_meter_rules_destroy(struct mlx5e_post_meter_priv *post_meter)
+mlx5e_post_meter_rate_rules_destroy(struct mlx5_eswitch *esw,
+ struct mlx5e_post_meter_priv *post_meter)
{
- mlx5_del_flow_rules(post_meter->drop_red_rule);
- mlx5_del_flow_rules(post_meter->fwd_green_rule);
+ struct mlx5e_post_meter_rate_table *rate_table = &post_meter->rate_steering_table;
+
+ mlx5_eswitch_del_offloaded_rule(esw, rate_table->red_rule, rate_table->red_attr);
+ mlx5_eswitch_del_offloaded_rule(esw, rate_table->green_rule, rate_table->green_attr);
}
static void
-mlx5e_post_meter_fg_destroy(struct mlx5e_post_meter_priv *post_meter)
+mlx5e_post_meter_rate_fg_destroy(struct mlx5e_post_meter_priv *post_meter)
{
- mlx5_destroy_flow_group(post_meter->fg);
+ mlx5_destroy_flow_group(post_meter->rate_steering_table.fg);
}
static void
-mlx5e_post_meter_table_destroy(struct mlx5e_post_meter_priv *post_meter)
+mlx5e_post_meter_rate_table_destroy(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5_destroy_flow_table(post_meter->rate_steering_table.ft);
+}
+
+static void
+mlx5e_post_meter_mtu_rules_destroy(struct mlx5e_post_meter_priv *post_meter)
+{
+ struct mlx5e_post_meter_mtu_tables *mtu_tables = &post_meter->mtu_tables;
+
+ mlx5_del_flow_rules(mtu_tables->green_table.rule);
+ mlx5_del_flow_rules(mtu_tables->red_table.rule);
+}
+
+static void
+mlx5e_post_meter_mtu_fg_destroy(struct mlx5e_post_meter_priv *post_meter)
+{
+ struct mlx5e_post_meter_mtu_tables *mtu_tables = &post_meter->mtu_tables;
+
+ mlx5_destroy_flow_group(mtu_tables->green_table.fg);
+ mlx5_destroy_flow_group(mtu_tables->red_table.fg);
+}
+
+static void
+mlx5e_post_meter_mtu_table_destroy(struct mlx5e_post_meter_priv *post_meter)
+{
+ struct mlx5e_post_meter_mtu_tables *mtu_tables = &post_meter->mtu_tables;
+
+ mlx5_destroy_flow_table(mtu_tables->green_table.ft);
+ mlx5_destroy_flow_table(mtu_tables->red_table.ft);
+}
+
+static int
+mlx5e_post_meter_rate_create(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act,
+ struct mlx5_fc *act_counter,
+ struct mlx5_fc *drop_counter,
+ struct mlx5e_post_meter_priv *post_meter,
+ struct mlx5_flow_attr *green_attr,
+ struct mlx5_flow_attr *red_attr)
+{
+ struct mlx5_flow_table *ft;
+ int err;
+
+ post_meter->type = MLX5E_POST_METER_RATE;
+
+ ft = mlx5e_post_meter_table_create(priv, ns_type);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter table\n");
+ goto err_ft;
+ }
+
+ post_meter->rate_steering_table.ft = ft;
+
+ err = mlx5e_post_meter_rate_fg_create(priv, post_meter);
+ if (err)
+ goto err_fg;
+
+ err = mlx5e_post_meter_rate_rules_create(priv, post_meter, post_act,
+ act_counter, drop_counter,
+ green_attr, red_attr);
+ if (err)
+ goto err_rules;
+
+ return 0;
+
+err_rules:
+ mlx5e_post_meter_rate_fg_destroy(post_meter);
+err_fg:
+ mlx5e_post_meter_rate_table_destroy(post_meter);
+err_ft:
+ return err;
+}
+
+static int
+mlx5e_post_meter_create_mtu_table(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_meter_mtu_table *table)
{
- mlx5_destroy_flow_table(post_meter->ft);
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ u32 *flow_group_in;
+ int err;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ table->ft = mlx5e_post_meter_table_create(priv, ns_type);
+ if (IS_ERR(table->ft)) {
+ err = PTR_ERR(table->ft);
+ goto err_ft;
+ }
+
+ /* create miss group */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+ fg = mlx5_create_flow_group(table->ft, flow_group_in);
+ if (IS_ERR(fg)) {
+ err = PTR_ERR(fg);
+ goto err_miss_grp;
+ }
+ table->fg = fg;
+
+ kvfree(flow_group_in);
+ return 0;
+
+err_miss_grp:
+ mlx5_destroy_flow_table(table->ft);
+err_ft:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static int
+mlx5e_post_meter_mtu_create(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act,
+ struct mlx5_fc *act_counter,
+ struct mlx5_fc *drop_counter,
+ struct mlx5e_post_meter_priv *post_meter,
+ struct mlx5_flow_attr *green_attr,
+ struct mlx5_flow_attr *red_attr)
+{
+ struct mlx5e_post_meter_mtu_tables *mtu_tables = &post_meter->mtu_tables;
+ static struct mlx5_flow_spec zero_spec = {};
+ struct mlx5_flow_handle *rule;
+ int err;
+
+ post_meter->type = MLX5E_POST_METER_MTU;
+
+ err = mlx5e_post_meter_create_mtu_table(priv, ns_type, &mtu_tables->green_table);
+ if (err)
+ goto err_green_ft;
+
+ green_attr->ft = mtu_tables->green_table.ft;
+ rule = mlx5e_post_meter_add_rule(priv, post_meter, &zero_spec, green_attr,
+ act_counter, drop_counter);
+ if (IS_ERR(rule)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter conform rule\n");
+ err = PTR_ERR(rule);
+ goto err_green_rule;
+ }
+ mtu_tables->green_table.rule = rule;
+ mtu_tables->green_table.attr = green_attr;
+
+ err = mlx5e_post_meter_create_mtu_table(priv, ns_type, &mtu_tables->red_table);
+ if (err)
+ goto err_red_ft;
+
+ red_attr->ft = mtu_tables->red_table.ft;
+ rule = mlx5e_post_meter_add_rule(priv, post_meter, &zero_spec, red_attr,
+ act_counter, drop_counter);
+ if (IS_ERR(rule)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter exceed rule\n");
+ err = PTR_ERR(rule);
+ goto err_red_rule;
+ }
+ mtu_tables->red_table.rule = rule;
+ mtu_tables->red_table.attr = red_attr;
+
+ return 0;
+
+err_red_rule:
+ mlx5_destroy_flow_table(mtu_tables->red_table.ft);
+err_red_ft:
+ mlx5_del_flow_rules(mtu_tables->green_table.rule);
+err_green_rule:
+ mlx5_destroy_flow_table(mtu_tables->green_table.ft);
+err_green_ft:
+ return err;
}
struct mlx5e_post_meter_priv *
mlx5e_post_meter_init(struct mlx5e_priv *priv,
enum mlx5_flow_namespace_type ns_type,
struct mlx5e_post_act *post_act,
- struct mlx5_fc *green_counter,
- struct mlx5_fc *red_counter)
+ enum mlx5e_post_meter_type type,
+ struct mlx5_fc *act_counter,
+ struct mlx5_fc *drop_counter,
+ struct mlx5_flow_attr *branch_true,
+ struct mlx5_flow_attr *branch_false)
{
struct mlx5e_post_meter_priv *post_meter;
int err;
@@ -174,36 +405,55 @@ mlx5e_post_meter_init(struct mlx5e_priv *priv,
if (!post_meter)
return ERR_PTR(-ENOMEM);
- err = mlx5e_post_meter_table_create(priv, ns_type, post_meter);
- if (err)
- goto err_ft;
-
- err = mlx5e_post_meter_fg_create(priv, post_meter);
- if (err)
- goto err_fg;
+ switch (type) {
+ case MLX5E_POST_METER_MTU:
+ err = mlx5e_post_meter_mtu_create(priv, ns_type, post_act,
+ act_counter, drop_counter, post_meter,
+ branch_true, branch_false);
+ break;
+ case MLX5E_POST_METER_RATE:
+ err = mlx5e_post_meter_rate_create(priv, ns_type, post_act,
+ act_counter, drop_counter, post_meter,
+ branch_true, branch_false);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
- err = mlx5e_post_meter_rules_create(priv, post_meter, post_act, green_counter,
- red_counter);
if (err)
- goto err_rules;
+ goto err;
return post_meter;
-err_rules:
- mlx5e_post_meter_fg_destroy(post_meter);
-err_fg:
- mlx5e_post_meter_table_destroy(post_meter);
-err_ft:
+err:
kfree(post_meter);
return ERR_PTR(err);
}
+static void
+mlx5e_post_meter_rate_destroy(struct mlx5_eswitch *esw, struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5e_post_meter_rate_rules_destroy(esw, post_meter);
+ mlx5e_post_meter_rate_fg_destroy(post_meter);
+ mlx5e_post_meter_rate_table_destroy(post_meter);
+}
+
+static void
+mlx5e_post_meter_mtu_destroy(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5e_post_meter_mtu_rules_destroy(post_meter);
+ mlx5e_post_meter_mtu_fg_destroy(post_meter);
+ mlx5e_post_meter_mtu_table_destroy(post_meter);
+}
+
void
-mlx5e_post_meter_cleanup(struct mlx5e_post_meter_priv *post_meter)
+mlx5e_post_meter_cleanup(struct mlx5_eswitch *esw, struct mlx5e_post_meter_priv *post_meter)
{
- mlx5e_post_meter_rules_destroy(post_meter);
- mlx5e_post_meter_fg_destroy(post_meter);
- mlx5e_post_meter_table_destroy(post_meter);
+ if (post_meter->type == MLX5E_POST_METER_RATE)
+ mlx5e_post_meter_rate_destroy(esw, post_meter);
+ else
+ mlx5e_post_meter_mtu_destroy(post_meter);
+
kfree(post_meter);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h
index 34d0e4b9fc7a..e013b77186b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h
@@ -14,16 +14,49 @@
struct mlx5e_post_meter_priv;
+enum mlx5e_post_meter_type {
+ MLX5E_POST_METER_RATE = 0,
+ MLX5E_POST_METER_MTU
+};
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
struct mlx5_flow_table *
mlx5e_post_meter_get_ft(struct mlx5e_post_meter_priv *post_meter);
+struct mlx5_flow_table *
+mlx5e_post_meter_get_mtu_true_ft(struct mlx5e_post_meter_priv *post_meter);
+
+struct mlx5_flow_table *
+mlx5e_post_meter_get_mtu_false_ft(struct mlx5e_post_meter_priv *post_meter);
+
struct mlx5e_post_meter_priv *
mlx5e_post_meter_init(struct mlx5e_priv *priv,
enum mlx5_flow_namespace_type ns_type,
struct mlx5e_post_act *post_act,
- struct mlx5_fc *green_counter,
- struct mlx5_fc *red_counter);
+ enum mlx5e_post_meter_type type,
+ struct mlx5_fc *act_counter,
+ struct mlx5_fc *drop_counter,
+ struct mlx5_flow_attr *branch_true,
+ struct mlx5_flow_attr *branch_false);
+
void
-mlx5e_post_meter_cleanup(struct mlx5e_post_meter_priv *post_meter);
+mlx5e_post_meter_cleanup(struct mlx5_eswitch *esw, struct mlx5e_post_meter_priv *post_meter);
+
+#else /* CONFIG_MLX5_CLS_ACT */
+
+static inline struct mlx5_flow_table *
+mlx5e_post_meter_get_mtu_true_ft(struct mlx5e_post_meter_priv *post_meter)
+{
+ return NULL;
+}
+
+static inline struct mlx5_flow_table *
+mlx5e_post_meter_get_mtu_false_ft(struct mlx5e_post_meter_priv *post_meter)
+{
+ return NULL;
+}
+
+#endif
#endif /* __MLX5_EN_POST_METER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
index 2e42d7c5451e..2b7fd1c0e643 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
@@ -211,8 +211,4 @@ struct mlx5e_flow_meters *mlx5e_get_flow_meters(struct mlx5_core_dev *dev);
void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec);
void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec);
-int mlx5e_policer_validate(const struct flow_action *action,
- const struct flow_action_entry *act,
- struct netlink_ext_ack *extack);
-
#endif /* __MLX5_EN_TC_PRIV_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 623886462c10..75b9e1528fd2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -85,18 +85,25 @@ static const struct counter_desc sw_rep_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) },
};
-struct vport_stats {
- u64 vport_rx_packets;
- u64 vport_tx_packets;
- u64 vport_rx_bytes;
- u64 vport_tx_bytes;
-};
-
static const struct counter_desc vport_rep_stats_desc[] = {
- { MLX5E_DECLARE_STAT(struct vport_stats, vport_rx_packets) },
- { MLX5E_DECLARE_STAT(struct vport_stats, vport_rx_bytes) },
- { MLX5E_DECLARE_STAT(struct vport_stats, vport_tx_packets) },
- { MLX5E_DECLARE_STAT(struct vport_stats, vport_tx_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, vport_rx_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, vport_rx_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, vport_tx_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, vport_tx_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats,
+ rx_vport_rdma_unicast_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, rx_vport_rdma_unicast_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats,
+ tx_vport_rdma_unicast_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, tx_vport_rdma_unicast_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats,
+ rx_vport_rdma_multicast_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats,
+ rx_vport_rdma_multicast_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats,
+ tx_vport_rdma_multicast_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats,
+ tx_vport_rdma_multicast_bytes) },
};
#define NUM_VPORT_REP_SW_COUNTERS ARRAY_SIZE(sw_rep_stats_desc)
@@ -161,33 +168,80 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport_rep)
int i;
for (i = 0; i < NUM_VPORT_REP_HW_COUNTERS; i++)
- data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.vf_vport,
+ data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.rep_stats,
vport_rep_stats_desc, i);
return idx;
}
static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep)
{
+ struct mlx5e_rep_stats *rep_stats = &priv->stats.rep_stats;
+ int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *rep = rpriv->rep;
- struct rtnl_link_stats64 *vport_stats;
- struct ifla_vf_stats vf_stats;
+ u32 *out;
int err;
- err = mlx5_eswitch_get_vport_stats(esw, rep->vport, &vf_stats);
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return;
+
+ err = mlx5_core_query_vport_counter(esw->dev, 1, rep->vport - 1, 0, out);
if (err) {
netdev_warn(priv->netdev, "vport %d error %d reading stats\n",
rep->vport, err);
return;
}
- vport_stats = &priv->stats.vf_vport;
+ #define MLX5_GET_CTR(p, x) \
+ MLX5_GET64(query_vport_counter_out, p, x)
/* flip tx/rx as we are reporting the counters for the switch vport */
- vport_stats->rx_packets = vf_stats.tx_packets;
- vport_stats->rx_bytes = vf_stats.tx_bytes;
- vport_stats->tx_packets = vf_stats.rx_packets;
- vport_stats->tx_bytes = vf_stats.rx_bytes;
+ rep_stats->vport_rx_packets =
+ MLX5_GET_CTR(out, transmitted_ib_unicast.packets) +
+ MLX5_GET_CTR(out, transmitted_eth_unicast.packets) +
+ MLX5_GET_CTR(out, transmitted_ib_multicast.packets) +
+ MLX5_GET_CTR(out, transmitted_eth_multicast.packets) +
+ MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
+
+ rep_stats->vport_tx_packets =
+ MLX5_GET_CTR(out, received_ib_unicast.packets) +
+ MLX5_GET_CTR(out, received_eth_unicast.packets) +
+ MLX5_GET_CTR(out, received_ib_multicast.packets) +
+ MLX5_GET_CTR(out, received_eth_multicast.packets) +
+ MLX5_GET_CTR(out, received_eth_broadcast.packets);
+
+ rep_stats->vport_rx_bytes =
+ MLX5_GET_CTR(out, transmitted_ib_unicast.octets) +
+ MLX5_GET_CTR(out, transmitted_eth_unicast.octets) +
+ MLX5_GET_CTR(out, transmitted_ib_multicast.octets) +
+ MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
+
+ rep_stats->vport_tx_bytes =
+ MLX5_GET_CTR(out, received_ib_unicast.octets) +
+ MLX5_GET_CTR(out, received_eth_unicast.octets) +
+ MLX5_GET_CTR(out, received_ib_multicast.octets) +
+ MLX5_GET_CTR(out, received_eth_multicast.octets) +
+ MLX5_GET_CTR(out, received_eth_broadcast.octets);
+
+ rep_stats->rx_vport_rdma_unicast_packets =
+ MLX5_GET_CTR(out, transmitted_ib_unicast.packets);
+ rep_stats->tx_vport_rdma_unicast_packets =
+ MLX5_GET_CTR(out, received_ib_unicast.packets);
+ rep_stats->rx_vport_rdma_unicast_bytes =
+ MLX5_GET_CTR(out, transmitted_ib_unicast.octets);
+ rep_stats->tx_vport_rdma_unicast_bytes =
+ MLX5_GET_CTR(out, received_ib_unicast.octets);
+ rep_stats->rx_vport_rdma_multicast_packets =
+ MLX5_GET_CTR(out, transmitted_ib_multicast.packets);
+ rep_stats->tx_vport_rdma_multicast_packets =
+ MLX5_GET_CTR(out, received_ib_multicast.packets);
+ rep_stats->rx_vport_rdma_multicast_bytes =
+ MLX5_GET_CTR(out, transmitted_ib_multicast.octets);
+ rep_stats->tx_vport_rdma_multicast_bytes =
+ MLX5_GET_CTR(out, received_ib_multicast.octets);
+
+ kvfree(out);
}
static void mlx5e_rep_get_strings(struct net_device *dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 712cac10ba49..375752d6546d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -463,6 +463,21 @@ struct mlx5e_ptp_cq_stats {
u64 resync_event;
};
+struct mlx5e_rep_stats {
+ u64 vport_rx_packets;
+ u64 vport_tx_packets;
+ u64 vport_rx_bytes;
+ u64 vport_tx_bytes;
+ u64 rx_vport_rdma_unicast_packets;
+ u64 tx_vport_rdma_unicast_packets;
+ u64 rx_vport_rdma_unicast_bytes;
+ u64 tx_vport_rdma_unicast_bytes;
+ u64 rx_vport_rdma_multicast_packets;
+ u64 tx_vport_rdma_multicast_packets;
+ u64 rx_vport_rdma_multicast_bytes;
+ u64 tx_vport_rdma_multicast_bytes;
+};
+
struct mlx5e_stats {
struct mlx5e_sw_stats sw;
struct mlx5e_qcounter_stats qcnt;
@@ -471,6 +486,7 @@ struct mlx5e_stats {
struct mlx5e_pport_stats pport;
struct rtnl_link_stats64 vf_vport;
struct mlx5e_pcie_stats pcie;
+ struct mlx5e_rep_stats rep_stats;
};
extern mlx5e_stats_grp_t mlx5e_nic_stats_grps[];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 10d1609ece58..9af2aa2922f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -132,6 +132,15 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
[PACKET_COLOR_TO_REG] = packet_color_to_reg,
};
+struct mlx5e_tc_jump_state {
+ u32 jump_count;
+ bool jump_target;
+ struct mlx5_flow_attr *jumping_attr;
+
+ enum flow_action_id last_id;
+ u32 last_index;
+};
+
struct mlx5e_tc_table *mlx5e_tc_table_alloc(void)
{
struct mlx5e_tc_table *tc;
@@ -160,6 +169,7 @@ static struct lock_class_key tc_ht_lock_key;
static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
static void free_flow_post_acts(struct mlx5e_tc_flow *flow);
+static void mlx5_free_flow_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr);
void
mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
@@ -392,8 +402,9 @@ mlx5_tc_rule_delete(struct mlx5e_priv *priv,
static bool
is_flow_meter_action(struct mlx5_flow_attr *attr)
{
- return ((attr->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) &&
- (attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER));
+ return (((attr->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) &&
+ (attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER)) ||
+ attr->flags & MLX5_ATTR_FLAG_MTU);
}
static int
@@ -404,6 +415,7 @@ mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv,
struct mlx5e_post_meter_priv *post_meter;
enum mlx5_flow_namespace_type ns_type;
struct mlx5e_flow_meter_handle *meter;
+ enum mlx5e_post_meter_type type;
meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params);
if (IS_ERR(meter)) {
@@ -412,8 +424,11 @@ mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv,
}
ns_type = mlx5e_tc_meter_get_namespace(meter->flow_meters);
- post_meter = mlx5e_post_meter_init(priv, ns_type, post_act, meter->green_counter,
- meter->red_counter);
+ type = meter->params.mtu ? MLX5E_POST_METER_MTU : MLX5E_POST_METER_RATE;
+ post_meter = mlx5e_post_meter_init(priv, ns_type, post_act,
+ type,
+ meter->act_counter, meter->drop_counter,
+ attr->branch_true, attr->branch_false);
if (IS_ERR(post_meter)) {
mlx5_core_err(priv->mdev, "Failed to init post meter\n");
goto err_meter_init;
@@ -432,9 +447,9 @@ err_meter_init:
}
static void
-mlx5e_tc_del_flow_meter(struct mlx5_flow_attr *attr)
+mlx5e_tc_del_flow_meter(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr)
{
- mlx5e_post_meter_cleanup(attr->meter_attr.post_meter);
+ mlx5e_post_meter_cleanup(esw, attr->meter_attr.post_meter);
mlx5e_tc_meter_put(attr->meter_attr.meter);
}
@@ -495,7 +510,7 @@ mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
if (attr->meter_attr.meter)
- mlx5e_tc_del_flow_meter(attr);
+ mlx5e_tc_del_flow_meter(esw, attr);
}
int
@@ -606,6 +621,12 @@ int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow)
MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
}
+static struct mlx5_core_dev *
+get_flow_counter_dev(struct mlx5e_tc_flow *flow)
+{
+ return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev;
+}
+
static struct mod_hdr_tbl *
get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
{
@@ -1719,6 +1740,90 @@ clean_encap_dests(struct mlx5e_priv *priv,
}
static int
+verify_attr_actions(u32 actions, struct netlink_ext_ack *extack)
+{
+ if (!(actions &
+ (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
+ NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action");
+ return -EOPNOTSUPP;
+ }
+
+ if (!(~actions &
+ (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
+ NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
+ return -EOPNOTSUPP;
+ }
+
+ if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
+ actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+ NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int
+post_process_attr(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ bool is_post_act_attr,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
+ bool vf_tun;
+ int err = 0;
+
+ err = verify_attr_actions(attr->action, extack);
+ if (err)
+ goto err_out;
+
+ err = set_encap_dests(flow->priv, flow, attr, extack, &vf_tun);
+ if (err)
+ goto err_out;
+
+ if (mlx5e_is_eswitch_flow(flow)) {
+ err = mlx5_eswitch_add_vlan_action(esw, attr);
+ if (err)
+ goto err_out;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ if (vf_tun || is_post_act_attr) {
+ err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr);
+ if (err)
+ goto err_out;
+ } else {
+ err = mlx5e_attach_mod_hdr(flow->priv, flow, attr->parse_attr);
+ if (err)
+ goto err_out;
+ }
+ }
+
+ if (attr->branch_true &&
+ attr->branch_true->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr->branch_true);
+ if (err)
+ goto err_out;
+ }
+
+ if (attr->branch_false &&
+ attr->branch_false->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr->branch_false);
+ if (err)
+ goto err_out;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr);
+ if (err)
+ goto err_out;
+ }
+
+err_out:
+ return err;
+}
+
+static int
mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
@@ -1728,7 +1833,6 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_esw_flow_attr *esw_attr;
u32 max_prio, max_chain;
- bool vf_tun;
int err = 0;
parse_attr = attr->parse_attr;
@@ -1818,32 +1922,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
esw_attr->int_port = int_port;
}
- err = set_encap_dests(priv, flow, attr, extack, &vf_tun);
- if (err)
- goto err_out;
-
- err = mlx5_eswitch_add_vlan_action(esw, attr);
+ err = post_process_attr(flow, attr, false, extack);
if (err)
goto err_out;
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
- if (vf_tun) {
- err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
- if (err)
- goto err_out;
- } else {
- err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
- if (err)
- goto err_out;
- }
- }
-
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- err = alloc_flow_attr_counter(esw_attr->counter_dev, attr);
- if (err)
- goto err_out;
- }
-
/* we get here if one of the following takes place:
* (1) there's no error
* (2) there's an encap action and we don't have valid neigh
@@ -1879,6 +1961,16 @@ static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
return !!geneve_tlv_opt_0_data;
}
+static void free_branch_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
+{
+ if (!attr)
+ return;
+
+ mlx5_free_flow_attr(flow, attr);
+ kvfree(attr->parse_attr);
+ kfree(attr);
+}
+
static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow)
{
@@ -1934,6 +2026,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
mlx5e_detach_decap(priv, flow);
free_flow_post_acts(flow);
+ free_branch_attr(flow, attr->branch_true);
+ free_branch_attr(flow, attr->branch_false);
if (flow->attr->lag.count)
mlx5_lag_del_mpesw_rule(esw->dev);
@@ -3507,36 +3601,6 @@ actions_match_supported(struct mlx5e_priv *priv,
ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
ct_flow = flow_flag_test(flow, CT) && !ct_clear;
- if (!(actions &
- (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
- NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action");
- return false;
- }
-
- if (!(~actions &
- (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
- NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
- return false;
- }
-
- if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
- actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
- NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
- return false;
- }
-
- if (!(~actions &
- (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
- NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
- return false;
- }
-
- if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
- actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
- NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
- return false;
- }
-
if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
!modify_header_match_supported(priv, &parse_attr->spec, flow_action,
actions, ct_flow, ct_clear, extack))
@@ -3636,15 +3700,12 @@ mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr,
attr2->esw_attr->split_count = 0;
}
+ attr2->branch_true = NULL;
+ attr2->branch_false = NULL;
+ attr2->jumping_attr = NULL;
return attr2;
}
-static struct mlx5_core_dev *
-get_flow_counter_dev(struct mlx5e_tc_flow *flow)
-{
- return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev;
-}
-
struct mlx5_flow_attr *
mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow)
{
@@ -3680,28 +3741,15 @@ mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow)
static void
free_flow_post_acts(struct mlx5e_tc_flow *flow)
{
- struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
- struct mlx5e_post_act *post_act = get_post_action(flow->priv);
struct mlx5_flow_attr *attr, *tmp;
- bool vf_tun;
list_for_each_entry_safe(attr, tmp, &flow->attrs, list) {
if (list_is_last(&attr->list, &flow->attrs))
break;
- if (attr->post_act_handle)
- mlx5e_tc_post_act_del(post_act, attr->post_act_handle);
-
- clean_encap_dests(flow->priv, flow, attr, &vf_tun);
-
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
- mlx5_fc_destroy(counter_dev, attr->counter);
-
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
- mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
- if (attr->modify_hdr)
- mlx5_modify_header_dealloc(flow->priv->mdev, attr->modify_hdr);
- }
+ mlx5_free_flow_attr(flow, attr);
+ free_branch_attr(flow, attr->branch_true);
+ free_branch_attr(flow, attr->branch_false);
list_del(&attr->list);
kvfree(attr->parse_attr);
@@ -3754,7 +3802,6 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack)
struct mlx5e_post_act *post_act = get_post_action(flow->priv);
struct mlx5_flow_attr *attr, *next_attr = NULL;
struct mlx5e_post_act_handle *handle;
- bool vf_tun;
int err;
/* This is going in reverse order as needed.
@@ -3764,7 +3811,9 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack)
if (!next_attr) {
/* Set counter action on last post act rule. */
attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
- } else {
+ }
+
+ if (next_attr && !(attr->flags & MLX5_ATTR_FLAG_TERMINATING)) {
err = mlx5e_tc_act_set_next_post_act(flow, attr, next_attr);
if (err)
goto out_free;
@@ -3776,26 +3825,14 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack)
if (list_is_last(&attr->list, &flow->attrs))
break;
- err = set_encap_dests(flow->priv, flow, attr, extack, &vf_tun);
+ err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack);
if (err)
goto out_free;
- err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack);
+ err = post_process_attr(flow, attr, true, extack);
if (err)
goto out_free;
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
- err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr);
- if (err)
- goto out_free;
- }
-
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr);
- if (err)
- goto out_free;
- }
-
handle = mlx5e_tc_post_act_add(post_act, attr);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
@@ -3803,6 +3840,13 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack)
}
attr->post_act_handle = handle;
+
+ if (attr->jumping_attr) {
+ err = mlx5e_tc_act_set_next_post_act(flow, attr->jumping_attr, attr);
+ if (err)
+ goto out_free;
+ }
+
next_attr = attr;
}
@@ -3822,12 +3866,145 @@ out_free:
}
static int
+alloc_branch_attr(struct mlx5e_tc_flow *flow,
+ struct mlx5e_tc_act_branch_ctrl *cond,
+ struct mlx5_flow_attr **cond_attr,
+ u32 *jump_count,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_flow_attr *attr;
+ int err = 0;
+
+ *cond_attr = mlx5e_clone_flow_attr_for_post_act(flow->attr,
+ mlx5e_get_flow_namespace(flow));
+ if (!(*cond_attr))
+ return -ENOMEM;
+
+ attr = *cond_attr;
+
+ switch (cond->act_id) {
+ case FLOW_ACTION_DROP:
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+ break;
+ case FLOW_ACTION_ACCEPT:
+ case FLOW_ACTION_PIPE:
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv));
+ break;
+ case FLOW_ACTION_JUMP:
+ if (*jump_count) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot offload flows with nested jumps");
+ err = -EOPNOTSUPP;
+ goto out_err;
+ }
+ *jump_count = cond->extval;
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv));
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ goto out_err;
+ }
+
+ return err;
+out_err:
+ kfree(*cond_attr);
+ *cond_attr = NULL;
+ return err;
+}
+
+static void
+dec_jump_count(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act,
+ struct mlx5_flow_attr *attr, struct mlx5e_priv *priv,
+ struct mlx5e_tc_jump_state *jump_state)
+{
+ if (!jump_state->jump_count)
+ return;
+
+ /* Single tc action can instantiate multiple offload actions (e.g. pedit)
+ * Jump only over a tc action
+ */
+ if (act->id == jump_state->last_id && act->hw_index == jump_state->last_index)
+ return;
+
+ jump_state->last_id = act->id;
+ jump_state->last_index = act->hw_index;
+
+ /* nothing to do for intermediate actions */
+ if (--jump_state->jump_count > 1)
+ return;
+
+ if (jump_state->jump_count == 1) { /* last action in the jump action list */
+
+ /* create a new attribute after this action */
+ jump_state->jump_target = true;
+
+ if (tc_act->is_terminating_action) { /* the branch ends here */
+ attr->flags |= MLX5_ATTR_FLAG_TERMINATING;
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ } else { /* the branch continues executing the rest of the actions */
+ struct mlx5e_post_act *post_act;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ post_act = get_post_action(priv);
+ attr->dest_ft = mlx5e_tc_post_act_get_ft(post_act);
+ }
+ } else if (jump_state->jump_count == 0) { /* first attr after the jump action list */
+ /* This is the post action for the jumping attribute (either red or green)
+ * Use the stored jumping_attr to set the post act id on the jumping attribute
+ */
+ attr->jumping_attr = jump_state->jumping_attr;
+ }
+}
+
+static int
+parse_branch_ctrl(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act,
+ struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_jump_state *jump_state,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_tc_act_branch_ctrl cond_true, cond_false;
+ u32 jump_count = jump_state->jump_count;
+ int err;
+
+ if (!tc_act->get_branch_ctrl)
+ return 0;
+
+ tc_act->get_branch_ctrl(act, &cond_true, &cond_false);
+
+ err = alloc_branch_attr(flow, &cond_true,
+ &attr->branch_true, &jump_count, extack);
+ if (err)
+ goto out_err;
+
+ if (jump_count)
+ jump_state->jumping_attr = attr->branch_true;
+
+ err = alloc_branch_attr(flow, &cond_false,
+ &attr->branch_false, &jump_count, extack);
+ if (err)
+ goto err_branch_false;
+
+ if (jump_count && !jump_state->jumping_attr)
+ jump_state->jumping_attr = attr->branch_false;
+
+ jump_state->jump_count = jump_count;
+ return 0;
+
+err_branch_false:
+ free_branch_attr(flow, attr->branch_true);
+out_err:
+ return err;
+}
+
+static int
parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
struct flow_action *flow_action)
{
struct netlink_ext_ack *extack = parse_state->extack;
struct mlx5e_tc_flow_action flow_action_reorder;
struct mlx5e_tc_flow *flow = parse_state->flow;
+ struct mlx5e_tc_jump_state jump_state = {};
struct mlx5_flow_attr *attr = flow->attr;
enum mlx5_flow_namespace_type ns_type;
struct mlx5e_priv *priv = flow->priv;
@@ -3847,6 +4024,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
list_add(&attr->list, &flow->attrs);
flow_action_for_each(i, _act, &flow_action_reorder) {
+ jump_state.jump_target = false;
act = *_act;
tc_act = mlx5e_tc_act_get(act->id, ns_type);
if (!tc_act) {
@@ -3864,12 +4042,19 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
if (err)
goto out_free;
+ dec_jump_count(act, tc_act, attr, priv, &jump_state);
+
+ err = parse_branch_ctrl(act, tc_act, flow, attr, &jump_state, extack);
+ if (err)
+ goto out_free;
+
parse_state->actions |= attr->action;
/* Split attr for multi table act if not the last act. */
- if (tc_act->is_multi_table_act &&
+ if (jump_state.jump_target ||
+ (tc_act->is_multi_table_act &&
tc_act->is_multi_table_act(priv, act, attr) &&
- i < flow_action_reorder.num_entries - 1) {
+ i < flow_action_reorder.num_entries - 1)) {
err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
if (err)
goto out_free;
@@ -3951,6 +4136,10 @@ parse_tc_nic_actions(struct mlx5e_priv *priv,
if (err)
return err;
+ err = verify_attr_actions(attr->action, extack);
+ if (err)
+ return err;
+
if (!actions_match_supported(priv, flow_action, parse_state->actions,
parse_attr, flow, extack))
return -EOPNOTSUPP;
@@ -4188,6 +4377,30 @@ mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
return attr;
}
+static void
+mlx5_free_flow_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
+{
+ struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
+ bool vf_tun;
+
+ if (!attr)
+ return;
+
+ if (attr->post_act_handle)
+ mlx5e_tc_post_act_del(get_post_action(flow->priv), attr->post_act_handle);
+
+ clean_encap_dests(flow->priv, flow, attr, &vf_tun);
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+ mlx5_fc_destroy(counter_dev, attr->counter);
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
+ if (attr->modify_hdr)
+ mlx5_modify_header_dealloc(flow->priv->mdev, attr->modify_hdr);
+ }
+}
+
static int
mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
struct flow_cls_offload *f, unsigned long flow_flags,
@@ -4730,10 +4943,17 @@ static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
return err;
}
-int mlx5e_policer_validate(const struct flow_action *action,
- const struct flow_action_entry *act,
- struct netlink_ext_ack *extack)
+static int
+tc_matchall_police_validate(const struct flow_action *action,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
{
+ if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when conform action is not continue");
+ return -EOPNOTSUPP;
+ }
+
if (act->police.exceed.act_id != FLOW_ACTION_DROP) {
NL_SET_ERR_MSG_MOD(extack,
"Offload not supported when exceed action is not drop");
@@ -4784,13 +5004,7 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
flow_action_for_each(i, act, flow_action) {
switch (act->id) {
case FLOW_ACTION_POLICE:
- if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) {
- NL_SET_ERR_MSG_MOD(extack,
- "Offload not supported when conform action is not continue");
- return -EOPNOTSUPP;
- }
-
- err = mlx5e_policer_validate(flow_action, act, extack);
+ err = tc_matchall_police_validate(flow_action, act, extack);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 0db41fa4a9a6..50af70ef22f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -95,6 +95,9 @@ struct mlx5_flow_attr {
*/
bool count;
} lag;
+ struct mlx5_flow_attr *branch_true;
+ struct mlx5_flow_attr *branch_false;
+ struct mlx5_flow_attr *jumping_attr;
/* keep this union last */
union {
DECLARE_FLEX_ARRAY(struct mlx5_esw_flow_attr, esw_attr);
@@ -110,6 +113,8 @@ enum {
MLX5_ATTR_FLAG_SAMPLE = BIT(4),
MLX5_ATTR_FLAG_ACCEPT = BIT(5),
MLX5_ATTR_FLAG_CT = BIT(6),
+ MLX5_ATTR_FLAG_TERMINATING = BIT(7),
+ MLX5_ATTR_FLAG_MTU = BIT(8),
};
/* Returns true if any of the flags that require skipping further TC/NF processing are set. */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c
index 2db13c71e88c..3d0bbcca1cb9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c
@@ -12,10 +12,11 @@ enum vnic_diag_counter {
MLX5_VNIC_DIAG_CQ_OVERRUN,
MLX5_VNIC_DIAG_INVALID_COMMAND,
MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND,
+ MLX5_VNIC_DIAG_RX_STEERING_DISCARD,
};
static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_counter counter,
- u32 *val)
+ u64 *val)
{
u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
@@ -57,6 +58,10 @@ static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_cou
case MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND:
*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, quota_exceeded_command);
break;
+ case MLX5_VNIC_DIAG_RX_STEERING_DISCARD:
+ *val = MLX5_GET64(vnic_diagnostic_statistics, vnic_diag_out,
+ nic_receive_steering_discard);
+ break;
}
return 0;
@@ -65,14 +70,14 @@ static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_cou
static int __show_vnic_diag(struct seq_file *file, struct mlx5_vport *vport,
enum vnic_diag_counter type)
{
- u32 val = 0;
+ u64 val = 0;
int ret;
ret = mlx5_esw_query_vnic_diag(vport, type, &val);
if (ret)
return ret;
- seq_printf(file, "%d\n", val);
+ seq_printf(file, "%llu\n", val);
return 0;
}
@@ -112,6 +117,11 @@ static int quota_exceeded_command_show(struct seq_file *file, void *priv)
return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND);
}
+static int rx_steering_discard_show(struct seq_file *file, void *priv)
+{
+ return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_RX_STEERING_DISCARD);
+}
+
DEFINE_SHOW_ATTRIBUTE(total_q_under_processor_handle);
DEFINE_SHOW_ATTRIBUTE(send_queue_priority_update_flow);
DEFINE_SHOW_ATTRIBUTE(comp_eq_overrun);
@@ -119,6 +129,7 @@ DEFINE_SHOW_ATTRIBUTE(async_eq_overrun);
DEFINE_SHOW_ATTRIBUTE(cq_overrun);
DEFINE_SHOW_ATTRIBUTE(invalid_command);
DEFINE_SHOW_ATTRIBUTE(quota_exceeded_command);
+DEFINE_SHOW_ATTRIBUTE(rx_steering_discard);
void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num)
{
@@ -179,4 +190,9 @@ void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool
if (MLX5_CAP_GEN(esw->dev, quota_exceeded_count))
debugfs_create_file("quota_exceeded_command", 0444, vnic_diag, vport,
&quota_exceeded_command_fops);
+
+ if (MLX5_CAP_GEN(esw->dev, nic_receive_steering_discard))
+ debugfs_create_file("rx_steering_discard", 0444, vnic_diag, vport,
+ &rx_steering_discard_fops);
+
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 374e3fbdc2cf..527e4bffda8d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -772,6 +772,41 @@ static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw,
esw_vport_destroy_offloads_acl_tables(esw, vport);
}
+static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ void *query_ctx;
+ void *hca_caps;
+ int err;
+
+ if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager))
+ return 0;
+
+ query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
+ if (!query_ctx)
+ return -ENOMEM;
+
+ err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx,
+ MLX5_CAP_GENERAL);
+ if (err)
+ goto out_free;
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+ vport->info.roce_enabled = MLX5_GET(cmd_hca_cap, hca_caps, roce);
+
+ memset(query_ctx, 0, query_out_sz);
+ err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx,
+ MLX5_CAP_GENERAL_2);
+ if (err)
+ goto out_free;
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+ vport->info.mig_enabled = MLX5_GET(cmd_hca_cap_2, hca_caps, migratable);
+out_free:
+ kfree(query_ctx);
+ return err;
+}
+
static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
{
u16 vport_num = vport->vport;
@@ -785,6 +820,10 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
if (mlx5_esw_is_manager_vport(esw, vport_num))
return 0;
+ err = mlx5_esw_vport_caps_get(esw, vport);
+ if (err)
+ goto err_caps;
+
mlx5_modify_vport_admin_state(esw->dev,
MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
vport_num, 1,
@@ -804,6 +843,10 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
vport->info.qos, flags);
return 0;
+
+err_caps:
+ esw_vport_cleanup_acl(esw, vport);
+ return err;
}
/* Don't cleanup vport->info, it's needed to restore vport configuration */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 42d9df417e20..5a85a5d32be7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -153,6 +153,8 @@ struct mlx5_vport_info {
u8 qos;
u8 spoofchk: 1;
u8 trusted: 1;
+ u8 roce_enabled: 1;
+ u8 mig_enabled: 1;
};
/* Vport context events */
@@ -508,7 +510,14 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
const u8 *hw_addr, int hw_addr_len,
struct netlink_ext_ack *extack);
-
+int mlx5_devlink_port_fn_roce_get(struct devlink_port *port, bool *is_enabled,
+ struct netlink_ext_ack *extack);
+int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable,
+ struct netlink_ext_ack *extack);
+int mlx5_devlink_port_fn_migratable_get(struct devlink_port *port, bool *is_enabled,
+ struct netlink_ext_ack *extack);
+int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable,
+ struct netlink_ext_ack *extack);
void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 9b6fbb19c22a..e455b215c708 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -50,6 +50,7 @@
#include "en/mapping.h"
#include "devlink.h"
#include "lag/lag.h"
+#include "en/tc/post_meter.h"
#define mlx5_esw_for_each_rep(esw, i, rep) \
xa_for_each(&((esw)->offloads.vport_reps), i, rep)
@@ -202,6 +203,21 @@ esw_cleanup_decap_indir(struct mlx5_eswitch *esw,
}
static int
+esw_setup_mtu_dest(struct mlx5_flow_destination *dest,
+ struct mlx5e_meter_attr *meter,
+ int i)
+{
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_RANGE;
+ dest[i].range.field = MLX5_FLOW_DEST_RANGE_FIELD_PKT_LEN;
+ dest[i].range.min = 0;
+ dest[i].range.max = meter->params.mtu;
+ dest[i].range.hit_ft = mlx5e_post_meter_get_mtu_true_ft(meter->post_meter);
+ dest[i].range.miss_ft = mlx5e_post_meter_get_mtu_false_ft(meter->post_meter);
+
+ return 0;
+}
+
+static int
esw_setup_sampler_dest(struct mlx5_flow_destination *dest,
struct mlx5_flow_act *flow_act,
u32 sampler_id,
@@ -491,6 +507,9 @@ esw_setup_dests(struct mlx5_flow_destination *dest,
} else if (attr->flags & MLX5_ATTR_FLAG_ACCEPT) {
esw_setup_accept_dest(dest, flow_act, chains, *i);
(*i)++;
+ } else if (attr->flags & MLX5_ATTR_FLAG_MTU) {
+ err = esw_setup_mtu_dest(dest, &attr->meter_attr, *i);
+ (*i)++;
} else if (esw_is_indir_table(esw, attr)) {
err = esw_setup_indir_table(dest, flow_act, esw, attr, spec, true, i);
} else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) {
@@ -640,6 +659,11 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
goto err_esw_get;
}
+ if (!i) {
+ kfree(dest);
+ dest = NULL;
+ }
+
if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec))
rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, esw_attr,
&flow_act, dest, i);
@@ -3889,7 +3913,7 @@ static int mlx5_esw_query_vport_vhca_id(struct mlx5_eswitch *esw, u16 vport_num,
if (!query_ctx)
return -ENOMEM;
- err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx);
+ err = mlx5_vport_get_other_func_general_cap(esw->dev, vport_num, query_ctx);
if (err)
goto out_free;
@@ -4022,3 +4046,212 @@ int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
return mlx5_eswitch_set_vport_mac(esw, vport_num, hw_addr);
}
+
+static struct mlx5_vport *
+mlx5_devlink_port_fn_get_vport(struct devlink_port *port, struct mlx5_eswitch *esw)
+{
+ u16 vport_num;
+
+ if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index);
+ if (!is_port_function_supported(esw, vport_num))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ return mlx5_eswitch_get_vport(esw, vport_num);
+}
+
+int mlx5_devlink_port_fn_migratable_get(struct devlink_port *port, bool *is_enabled,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw;
+ struct mlx5_vport *vport;
+ int err = -EOPNOTSUPP;
+
+ esw = mlx5_devlink_eswitch_get(port->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ if (!MLX5_CAP_GEN(esw->dev, migration)) {
+ NL_SET_ERR_MSG_MOD(extack, "Device doesn't support migration");
+ return err;
+ }
+
+ vport = mlx5_devlink_port_fn_get_vport(port, esw);
+ if (IS_ERR(vport)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid port");
+ return PTR_ERR(vport);
+ }
+
+ mutex_lock(&esw->state_lock);
+ if (vport->enabled) {
+ *is_enabled = vport->info.mig_enabled;
+ err = 0;
+ }
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ struct mlx5_eswitch *esw;
+ struct mlx5_vport *vport;
+ void *query_ctx;
+ void *hca_caps;
+ int err = -EOPNOTSUPP;
+
+ esw = mlx5_devlink_eswitch_get(port->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ if (!MLX5_CAP_GEN(esw->dev, migration)) {
+ NL_SET_ERR_MSG_MOD(extack, "Device doesn't support migration");
+ return err;
+ }
+
+ vport = mlx5_devlink_port_fn_get_vport(port, esw);
+ if (IS_ERR(vport)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid port");
+ return PTR_ERR(vport);
+ }
+
+ mutex_lock(&esw->state_lock);
+ if (!vport->enabled) {
+ NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled");
+ goto out;
+ }
+
+ if (vport->info.mig_enabled == enable) {
+ err = 0;
+ goto out;
+ }
+
+ query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
+ if (!query_ctx) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx,
+ MLX5_CAP_GENERAL_2);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
+ goto out_free;
+ }
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+ memcpy(hca_caps, MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability),
+ MLX5_UN_SZ_BYTES(hca_cap_union));
+ MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, 1);
+
+ err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport->vport,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA migratable cap");
+ goto out_free;
+ }
+
+ vport->info.mig_enabled = enable;
+
+out_free:
+ kfree(query_ctx);
+out:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_devlink_port_fn_roce_get(struct devlink_port *port, bool *is_enabled,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw;
+ struct mlx5_vport *vport;
+ int err = -EOPNOTSUPP;
+
+ esw = mlx5_devlink_eswitch_get(port->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ vport = mlx5_devlink_port_fn_get_vport(port, esw);
+ if (IS_ERR(vport)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid port");
+ return PTR_ERR(vport);
+ }
+
+ mutex_lock(&esw->state_lock);
+ if (vport->enabled) {
+ *is_enabled = vport->info.roce_enabled;
+ err = 0;
+ }
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ struct mlx5_eswitch *esw;
+ struct mlx5_vport *vport;
+ int err = -EOPNOTSUPP;
+ void *query_ctx;
+ void *hca_caps;
+ u16 vport_num;
+
+ esw = mlx5_devlink_eswitch_get(port->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ vport = mlx5_devlink_port_fn_get_vport(port, esw);
+ if (IS_ERR(vport)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid port");
+ return PTR_ERR(vport);
+ }
+ vport_num = vport->vport;
+
+ mutex_lock(&esw->state_lock);
+ if (!vport->enabled) {
+ NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled");
+ goto out;
+ }
+
+ if (vport->info.roce_enabled == enable) {
+ err = 0;
+ goto out;
+ }
+
+ query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
+ if (!query_ctx) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx,
+ MLX5_CAP_GENERAL);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
+ goto out_free;
+ }
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+ memcpy(hca_caps, MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability),
+ MLX5_UN_SZ_BYTES(hca_cap_union));
+ MLX5_SET(cmd_hca_cap, hca_caps, roce, enable);
+
+ err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport_num,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA roce cap");
+ goto out_free;
+ }
+
+ vport->info.roce_enabled = enable;
+
+out_free:
+ kfree(query_ctx);
+out:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 9995307d374b..5a85d8c1e797 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -448,7 +448,8 @@ static bool is_fwd_dest_type(enum mlx5_flow_destination_type type)
type == MLX5_FLOW_DESTINATION_TYPE_UPLINK ||
type == MLX5_FLOW_DESTINATION_TYPE_VPORT ||
type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER ||
- type == MLX5_FLOW_DESTINATION_TYPE_TIR;
+ type == MLX5_FLOW_DESTINATION_TYPE_TIR ||
+ type == MLX5_FLOW_DESTINATION_TYPE_RANGE;
}
static bool check_valid_spec(const struct mlx5_flow_spec *spec)
@@ -1578,7 +1579,13 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
(d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM &&
d1->ft_num == d2->ft_num) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER &&
- d1->sampler_id == d2->sampler_id))
+ d1->sampler_id == d2->sampler_id) ||
+ (d1->type == MLX5_FLOW_DESTINATION_TYPE_RANGE &&
+ d1->range.field == d2->range.field &&
+ d1->range.hit_ft == d2->range.hit_ft &&
+ d1->range.miss_ft == d2->range.miss_ft &&
+ d1->range.min == d2->range.min &&
+ d1->range.max == d2->range.max))
return true;
}
@@ -1962,6 +1969,9 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
if (flow_act->fg && ft->autogroup.active)
return ERR_PTR(-EINVAL);
+ if (dest && dest_num <= 0)
+ return ERR_PTR(-EINVAL);
+
for (i = 0; i < dest_num; i++) {
if (!dest_is_valid(&dest[i], flow_act, ft))
return ERR_PTR(-EINVAL);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 3af50fd04d28..f137a0611b77 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -123,6 +123,7 @@ enum mlx5_flow_steering_mode {
enum mlx5_flow_steering_capabilty {
MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX = 1UL << 0,
MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX = 1UL << 1,
+ MLX5_FLOW_STEERING_CAP_MATCH_RANGES = 1UL << 2,
};
struct mlx5_flow_steering {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
index 0259a149a64c..d9fcb9ed726f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
@@ -118,13 +118,41 @@ struct mlx5_fib_event_work {
};
};
+static struct net_device*
+mlx5_lag_get_next_fib_dev(struct mlx5_lag *ldev,
+ struct fib_info *fi,
+ struct net_device *current_dev)
+{
+ struct net_device *fib_dev;
+ int i, ldev_idx, nhs;
+
+ nhs = fib_info_num_path(fi);
+ i = 0;
+ if (current_dev) {
+ for (; i < nhs; i++) {
+ fib_dev = fib_info_nh(fi, i)->fib_nh_dev;
+ if (fib_dev == current_dev) {
+ i++;
+ break;
+ }
+ }
+ }
+ for (; i < nhs; i++) {
+ fib_dev = fib_info_nh(fi, i)->fib_nh_dev;
+ ldev_idx = mlx5_lag_dev_get_netdev_idx(ldev, fib_dev);
+ if (ldev_idx >= 0)
+ return ldev->pf[ldev_idx].netdev;
+ }
+
+ return NULL;
+}
+
static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
struct fib_entry_notifier_info *fen_info)
{
+ struct net_device *nh_dev0, *nh_dev1;
struct fib_info *fi = fen_info->fi;
struct lag_mp *mp = &ldev->lag_mp;
- struct fib_nh *fib_nh0, *fib_nh1;
- unsigned int nhs;
/* Handle delete event */
if (event == FIB_EVENT_ENTRY_DEL) {
@@ -140,16 +168,25 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
fi->fib_priority >= mp->fib.priority)
return;
+ nh_dev0 = mlx5_lag_get_next_fib_dev(ldev, fi, NULL);
+ nh_dev1 = mlx5_lag_get_next_fib_dev(ldev, fi, nh_dev0);
+
/* Handle add/replace event */
- nhs = fib_info_num_path(fi);
- if (nhs == 1) {
- if (__mlx5_lag_is_active(ldev)) {
- struct fib_nh *nh = fib_info_nh(fi, 0);
- struct net_device *nh_dev = nh->fib_nh_dev;
- int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
+ if (!nh_dev0) {
+ if (mp->fib.dst == fen_info->dst && mp->fib.dst_len == fen_info->dst_len)
+ mp->fib.mfi = NULL;
+ return;
+ }
- if (i < 0)
- return;
+ if (nh_dev0 == nh_dev1) {
+ mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
+ "Multipath offload doesn't support routes with multiple nexthops of the same device");
+ return;
+ }
+
+ if (!nh_dev1) {
+ if (__mlx5_lag_is_active(ldev)) {
+ int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev0);
i++;
mlx5_lag_set_port_affinity(ldev, i);
@@ -159,21 +196,6 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
return;
}
- if (nhs != 2)
- return;
-
- /* Verify next hops are ports of the same hca */
- fib_nh0 = fib_info_nh(fi, 0);
- fib_nh1 = fib_info_nh(fi, 1);
- if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev &&
- fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) &&
- !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev &&
- fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) {
- mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
- "Multipath offload require two ports of the same HCA\n");
- return;
- }
-
/* First time we see multipath route */
if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) {
struct lag_tracker tracker;
@@ -268,7 +290,6 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
struct mlx5_fib_event_work *fib_work;
struct fib_entry_notifier_info *fen_info;
struct fib_nh_notifier_info *fnh_info;
- struct net_device *fib_dev;
struct fib_info *fi;
if (info->family != AF_INET)
@@ -285,11 +306,7 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
fi = fen_info->fi;
if (fi->nh)
return NOTIFY_DONE;
- fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
- if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
- fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
- return NOTIFY_DONE;
- }
+
fib_work = mlx5_lag_init_fib_work(ldev, event);
if (!fib_work)
return NOTIFY_DONE;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index a806e3de7b7c..029305a8b80a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -324,7 +324,10 @@ void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev);
int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery);
int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery);
-int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out);
+int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 function_id,
+ u16 opmod);
+#define mlx5_vport_get_other_func_general_cap(dev, fid, out) \
+ mlx5_vport_get_other_func_cap(dev, fid, out, MLX5_CAP_GENERAL)
void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work);
static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 662f1d55e30e..6bde18bcd42f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -4,6 +4,7 @@
#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
#include "mlx5_core.h"
#include "mlx5_irq.h"
#include "pci_irq.h"
@@ -101,7 +102,7 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
goto out;
}
- ret = mlx5_vport_get_other_func_cap(dev, function_id, query_cap);
+ ret = mlx5_vport_get_other_func_general_cap(dev, function_id, query_cap);
if (ret)
goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index b1dfad274a39..ee104cf04392 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -44,6 +44,7 @@ static const char * const action_type_to_str[] = {
[DR_ACTION_TYP_INSERT_HDR] = "DR_ACTION_TYP_INSERT_HDR",
[DR_ACTION_TYP_REMOVE_HDR] = "DR_ACTION_TYP_REMOVE_HDR",
[DR_ACTION_TYP_ASO_FLOW_METER] = "DR_ACTION_TYP_ASO_FLOW_METER",
+ [DR_ACTION_TYP_RANGE] = "DR_ACTION_TYP_RANGE",
[DR_ACTION_TYP_MAX] = "DR_ACTION_UNKNOWN",
};
@@ -61,6 +62,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_TAG] = DR_ACTION_STATE_NON_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
@@ -79,6 +81,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_TAG] = DR_ACTION_STATE_DECAP,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP,
@@ -94,6 +97,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_TAG] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP,
@@ -103,6 +107,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_TAG] = DR_ACTION_STATE_MODIFY_HDR,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR,
@@ -116,6 +121,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_TAG] = DR_ACTION_STATE_POP_VLAN,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN,
@@ -129,6 +135,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_PUSH_VLAN] = {
[DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_TAG] = DR_ACTION_STATE_PUSH_VLAN,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN,
@@ -141,6 +148,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_TAG] = DR_ACTION_STATE_NON_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
@@ -159,6 +167,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO,
},
[DR_ACTION_STATE_TERM] = {
@@ -169,6 +178,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_NO_ACTION] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
@@ -183,6 +193,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_DECAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP,
[DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
@@ -190,6 +201,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_ENCAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO,
@@ -197,6 +209,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_MODIFY_HDR] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR,
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
@@ -207,6 +220,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
},
[DR_ACTION_STATE_POP_VLAN] = {
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN,
[DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
@@ -220,6 +234,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_PUSH_VLAN] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN,
[DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
@@ -231,6 +246,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_NON_TERM] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
@@ -250,6 +266,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO,
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
},
[DR_ACTION_STATE_TERM] = {
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM,
@@ -259,6 +276,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_NO_ACTION] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
[DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_DECAP,
@@ -276,6 +294,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_DECAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
@@ -291,6 +310,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP,
@@ -299,6 +319,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_MODIFY_HDR] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
@@ -311,6 +332,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_POP_VLAN] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN,
@@ -324,6 +346,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_PUSH_VLAN] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN,
@@ -337,6 +360,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_NON_TERM] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
[DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_DECAP,
@@ -354,6 +378,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_ASO] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO,
},
@@ -365,6 +390,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_NO_ACTION] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
@@ -380,6 +406,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_DECAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
@@ -388,6 +415,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_ENCAP] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
@@ -396,6 +424,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_MODIFY_HDR] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR,
[DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP,
@@ -407,6 +436,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
},
[DR_ACTION_STATE_POP_VLAN] = {
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN,
[DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN,
@@ -421,6 +451,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_PUSH_VLAN] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN,
@@ -433,6 +464,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_STATE_NON_TERM] = {
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
[DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
@@ -452,6 +484,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
[DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN,
[DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
[DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO,
},
@@ -634,6 +667,83 @@ static void dr_action_print_sequence(struct mlx5dr_domain *dmn,
actions[i]->action_type);
}
+static int dr_action_get_dest_fw_tbl_addr(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_action_dest_tbl *dest_tbl,
+ bool is_rx_rule,
+ u64 *final_icm_addr)
+{
+ struct mlx5dr_cmd_query_flow_table_details output;
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ int ret;
+
+ if (!dest_tbl->fw_tbl.rx_icm_addr) {
+ ret = mlx5dr_cmd_query_flow_table(dmn->mdev,
+ dest_tbl->fw_tbl.type,
+ dest_tbl->fw_tbl.id,
+ &output);
+ if (ret) {
+ mlx5dr_err(dmn,
+ "Failed mlx5_cmd_query_flow_table ret: %d\n",
+ ret);
+ return ret;
+ }
+
+ dest_tbl->fw_tbl.tx_icm_addr = output.sw_owner_icm_root_1;
+ dest_tbl->fw_tbl.rx_icm_addr = output.sw_owner_icm_root_0;
+ }
+
+ *final_icm_addr = is_rx_rule ? dest_tbl->fw_tbl.rx_icm_addr :
+ dest_tbl->fw_tbl.tx_icm_addr;
+ return 0;
+}
+
+static int dr_action_get_dest_sw_tbl_addr(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_action_dest_tbl *dest_tbl,
+ bool is_rx_rule,
+ u64 *final_icm_addr)
+{
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_icm_chunk *chunk;
+
+ if (dest_tbl->tbl->dmn != dmn) {
+ mlx5dr_err(dmn,
+ "Destination table belongs to a different domain\n");
+ return -EINVAL;
+ }
+
+ if (dest_tbl->tbl->level <= matcher->tbl->level) {
+ mlx5_core_dbg_once(dmn->mdev,
+ "Connecting table to a lower/same level destination table\n");
+ mlx5dr_dbg(dmn,
+ "Connecting table at level %d to a destination table at level %d\n",
+ matcher->tbl->level,
+ dest_tbl->tbl->level);
+ }
+
+ chunk = is_rx_rule ? dest_tbl->tbl->rx.s_anchor->chunk :
+ dest_tbl->tbl->tx.s_anchor->chunk;
+
+ *final_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(chunk);
+ return 0;
+}
+
+static int dr_action_get_dest_tbl_addr(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_action_dest_tbl *dest_tbl,
+ bool is_rx_rule,
+ u64 *final_icm_addr)
+{
+ if (dest_tbl->is_fw_tbl)
+ return dr_action_get_dest_fw_tbl_addr(matcher,
+ dest_tbl,
+ is_rx_rule,
+ final_icm_addr);
+
+ return dr_action_get_dest_sw_tbl_addr(matcher,
+ dest_tbl,
+ is_rx_rule,
+ final_icm_addr);
+}
+
#define WITH_VLAN_NUM_HW_ACTIONS 6
int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
@@ -661,8 +771,6 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
action_domain = dr_action_get_action_domain(dmn->type, nic_dmn->type);
for (i = 0; i < num_actions; i++) {
- struct mlx5dr_action_dest_tbl *dest_tbl;
- struct mlx5dr_icm_chunk *chunk;
struct mlx5dr_action *action;
int max_actions_type = 1;
u32 action_type;
@@ -676,50 +784,27 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
break;
case DR_ACTION_TYP_FT:
dest_action = action;
- dest_tbl = action->dest_tbl;
- if (!dest_tbl->is_fw_tbl) {
- if (dest_tbl->tbl->dmn != dmn) {
- mlx5dr_err(dmn,
- "Destination table belongs to a different domain\n");
- return -EINVAL;
- }
- if (dest_tbl->tbl->level <= matcher->tbl->level) {
- mlx5_core_dbg_once(dmn->mdev,
- "Connecting table to a lower/same level destination table\n");
- mlx5dr_dbg(dmn,
- "Connecting table at level %d to a destination table at level %d\n",
- matcher->tbl->level,
- dest_tbl->tbl->level);
- }
- chunk = rx_rule ? dest_tbl->tbl->rx.s_anchor->chunk :
- dest_tbl->tbl->tx.s_anchor->chunk;
- attr.final_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(chunk);
- } else {
- struct mlx5dr_cmd_query_flow_table_details output;
- int ret;
-
- /* get the relevant addresses */
- if (!action->dest_tbl->fw_tbl.rx_icm_addr) {
- ret = mlx5dr_cmd_query_flow_table(dmn->mdev,
- dest_tbl->fw_tbl.type,
- dest_tbl->fw_tbl.id,
- &output);
- if (!ret) {
- dest_tbl->fw_tbl.tx_icm_addr =
- output.sw_owner_icm_root_1;
- dest_tbl->fw_tbl.rx_icm_addr =
- output.sw_owner_icm_root_0;
- } else {
- mlx5dr_err(dmn,
- "Failed mlx5_cmd_query_flow_table ret: %d\n",
- ret);
- return ret;
- }
- }
- attr.final_icm_addr = rx_rule ?
- dest_tbl->fw_tbl.rx_icm_addr :
- dest_tbl->fw_tbl.tx_icm_addr;
- }
+ ret = dr_action_get_dest_tbl_addr(matcher, action->dest_tbl,
+ rx_rule, &attr.final_icm_addr);
+ if (ret)
+ return ret;
+ break;
+ case DR_ACTION_TYP_RANGE:
+ ret = dr_action_get_dest_tbl_addr(matcher,
+ action->range->hit_tbl_action->dest_tbl,
+ rx_rule, &attr.final_icm_addr);
+ if (ret)
+ return ret;
+
+ ret = dr_action_get_dest_tbl_addr(matcher,
+ action->range->miss_tbl_action->dest_tbl,
+ rx_rule, &attr.range.miss_icm_addr);
+ if (ret)
+ return ret;
+
+ attr.range.definer_id = action->range->definer_id;
+ attr.range.min = action->range->min;
+ attr.range.max = action->range->max;
break;
case DR_ACTION_TYP_QP:
mlx5dr_info(dmn, "Domain doesn't support QP\n");
@@ -866,6 +951,7 @@ static unsigned int action_size[DR_ACTION_TYP_MAX] = {
[DR_ACTION_TYP_REMOVE_HDR] = sizeof(struct mlx5dr_action_reformat),
[DR_ACTION_TYP_SAMPLER] = sizeof(struct mlx5dr_action_sampler),
[DR_ACTION_TYP_ASO_FLOW_METER] = sizeof(struct mlx5dr_action_aso_flow_meter),
+ [DR_ACTION_TYP_RANGE] = sizeof(struct mlx5dr_action_range),
};
static struct mlx5dr_action *
@@ -933,6 +1019,123 @@ dec_ref:
return NULL;
}
+static void dr_action_range_definer_fill(u16 *format_id,
+ u8 *dw_selectors,
+ u8 *byte_selectors,
+ u8 *match_mask)
+{
+ int i;
+
+ *format_id = MLX5_IFC_DEFINER_FORMAT_ID_SELECT;
+
+ dw_selectors[0] = MLX5_IFC_DEFINER_FORMAT_OFFSET_OUTER_ETH_PKT_LEN / 4;
+
+ for (i = 1; i < MLX5_IFC_DEFINER_DW_SELECTORS_NUM; i++)
+ dw_selectors[i] = MLX5_IFC_DEFINER_FORMAT_OFFSET_UNUSED;
+
+ for (i = 0; i < MLX5_IFC_DEFINER_BYTE_SELECTORS_NUM; i++)
+ byte_selectors[i] = MLX5_IFC_DEFINER_FORMAT_OFFSET_UNUSED;
+
+ MLX5_SET(match_definer_match_mask, match_mask,
+ match_dw_0, 0xffffUL << 16);
+}
+
+static int dr_action_create_range_definer(struct mlx5dr_action *action)
+{
+ u8 match_mask[MLX5_FLD_SZ_BYTES(match_definer, match_mask)] = {};
+ u8 byte_selectors[MLX5_IFC_DEFINER_BYTE_SELECTORS_NUM] = {};
+ u8 dw_selectors[MLX5_IFC_DEFINER_DW_SELECTORS_NUM] = {};
+ struct mlx5dr_domain *dmn = action->range->dmn;
+ u32 definer_id;
+ u16 format_id;
+ int ret;
+
+ dr_action_range_definer_fill(&format_id,
+ dw_selectors,
+ byte_selectors,
+ match_mask);
+
+ ret = mlx5dr_definer_get(dmn, format_id,
+ dw_selectors, byte_selectors,
+ match_mask, &definer_id);
+ if (ret)
+ return ret;
+
+ action->range->definer_id = definer_id;
+ return 0;
+}
+
+static void dr_action_destroy_range_definer(struct mlx5dr_action *action)
+{
+ mlx5dr_definer_put(action->range->dmn, action->range->definer_id);
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_dest_match_range(struct mlx5dr_domain *dmn,
+ u32 field,
+ struct mlx5_flow_table *hit_ft,
+ struct mlx5_flow_table *miss_ft,
+ u32 min,
+ u32 max)
+{
+ struct mlx5dr_action *action;
+ int ret;
+
+ if (!mlx5dr_supp_match_ranges(dmn->mdev)) {
+ mlx5dr_dbg(dmn, "SELECT definer support is needed for match range\n");
+ return NULL;
+ }
+
+ if (field != MLX5_FLOW_DEST_RANGE_FIELD_PKT_LEN ||
+ min > 0xffff || max > 0xffff) {
+ mlx5dr_err(dmn, "Invalid match range parameters\n");
+ return NULL;
+ }
+
+ action = dr_action_create_generic(DR_ACTION_TYP_RANGE);
+ if (!action)
+ return NULL;
+
+ action->range->hit_tbl_action =
+ mlx5dr_is_fw_table(hit_ft) ?
+ mlx5dr_action_create_dest_flow_fw_table(dmn, hit_ft) :
+ mlx5dr_action_create_dest_table(hit_ft->fs_dr_table.dr_table);
+
+ if (!action->range->hit_tbl_action)
+ goto free_action;
+
+ action->range->miss_tbl_action =
+ mlx5dr_is_fw_table(miss_ft) ?
+ mlx5dr_action_create_dest_flow_fw_table(dmn, miss_ft) :
+ mlx5dr_action_create_dest_table(miss_ft->fs_dr_table.dr_table);
+
+ if (!action->range->miss_tbl_action)
+ goto free_hit_tbl_action;
+
+ action->range->min = min;
+ action->range->max = max;
+ action->range->dmn = dmn;
+
+ ret = dr_action_create_range_definer(action);
+ if (ret)
+ goto free_miss_tbl_action;
+
+ /* No need to increase refcount on domain for this action,
+ * the hit/miss table actions will do it internally.
+ */
+
+ return action;
+
+free_miss_tbl_action:
+ mlx5dr_action_destroy(action->range->miss_tbl_action);
+free_hit_tbl_action:
+ mlx5dr_action_destroy(action->range->hit_tbl_action);
+free_action:
+ kfree(action);
+
+ return NULL;
+}
+
struct mlx5dr_action *
mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
struct mlx5dr_action_dest *dests,
@@ -1980,6 +2183,11 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action)
case DR_ACTION_TYP_ASO_FLOW_METER:
refcount_dec(&action->aso->dmn->refcount);
break;
+ case DR_ACTION_TYP_RANGE:
+ dr_action_destroy_range_definer(action);
+ mlx5dr_action_destroy(action->range->miss_tbl_action);
+ mlx5dr_action_destroy(action->range->hit_tbl_action);
+ break;
default:
break;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index b4739eafc180..07b6a6dcb92f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -564,6 +564,83 @@ void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev,
mlx5_cmd_exec_in(mdev, dealloc_packet_reformat_context, in);
}
+static void dr_cmd_set_definer_format(void *ptr, u16 format_id,
+ u8 *dw_selectors,
+ u8 *byte_selectors)
+{
+ if (format_id != MLX5_IFC_DEFINER_FORMAT_ID_SELECT)
+ return;
+
+ MLX5_SET(match_definer, ptr, format_select_dw0, dw_selectors[0]);
+ MLX5_SET(match_definer, ptr, format_select_dw1, dw_selectors[1]);
+ MLX5_SET(match_definer, ptr, format_select_dw2, dw_selectors[2]);
+ MLX5_SET(match_definer, ptr, format_select_dw3, dw_selectors[3]);
+ MLX5_SET(match_definer, ptr, format_select_dw4, dw_selectors[4]);
+ MLX5_SET(match_definer, ptr, format_select_dw5, dw_selectors[5]);
+ MLX5_SET(match_definer, ptr, format_select_dw6, dw_selectors[6]);
+ MLX5_SET(match_definer, ptr, format_select_dw7, dw_selectors[7]);
+ MLX5_SET(match_definer, ptr, format_select_dw8, dw_selectors[8]);
+
+ MLX5_SET(match_definer, ptr, format_select_byte0, byte_selectors[0]);
+ MLX5_SET(match_definer, ptr, format_select_byte1, byte_selectors[1]);
+ MLX5_SET(match_definer, ptr, format_select_byte2, byte_selectors[2]);
+ MLX5_SET(match_definer, ptr, format_select_byte3, byte_selectors[3]);
+ MLX5_SET(match_definer, ptr, format_select_byte4, byte_selectors[4]);
+ MLX5_SET(match_definer, ptr, format_select_byte5, byte_selectors[5]);
+ MLX5_SET(match_definer, ptr, format_select_byte6, byte_selectors[6]);
+ MLX5_SET(match_definer, ptr, format_select_byte7, byte_selectors[7]);
+}
+
+int mlx5dr_cmd_create_definer(struct mlx5_core_dev *mdev,
+ u16 format_id,
+ u8 *dw_selectors,
+ u8 *byte_selectors,
+ u8 *match_mask,
+ u32 *definer_id)
+{
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+ u32 in[MLX5_ST_SZ_DW(create_match_definer_in)] = {};
+ void *ptr;
+ int err;
+
+ ptr = MLX5_ADDR_OF(create_match_definer_in, in,
+ general_obj_in_cmd_hdr);
+ MLX5_SET(general_obj_in_cmd_hdr, ptr, opcode,
+ MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, ptr, obj_type,
+ MLX5_OBJ_TYPE_MATCH_DEFINER);
+
+ ptr = MLX5_ADDR_OF(create_match_definer_in, in, obj_context);
+ MLX5_SET(match_definer, ptr, format_id, format_id);
+
+ dr_cmd_set_definer_format(ptr, format_id,
+ dw_selectors, byte_selectors);
+
+ ptr = MLX5_ADDR_OF(match_definer, ptr, match_mask);
+ memcpy(ptr, match_mask, MLX5_FLD_SZ_BYTES(match_definer, match_mask));
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ *definer_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+ return 0;
+}
+
+void
+mlx5dr_cmd_destroy_definer(struct mlx5_core_dev *mdev, u32 definer_id)
+{
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_MATCH_DEFINER);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, definer_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num,
u16 index, struct mlx5dr_cmd_gid_attr *attr)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
index 7adcf0eec13b..db81d881d38e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
@@ -49,7 +49,8 @@ enum dr_dump_rec_type {
DR_DUMP_REC_TYPE_ACTION_POP_VLAN = 3413,
DR_DUMP_REC_TYPE_ACTION_SAMPLER = 3415,
DR_DUMP_REC_TYPE_ACTION_INSERT_HDR = 3420,
- DR_DUMP_REC_TYPE_ACTION_REMOVE_HDR = 3421
+ DR_DUMP_REC_TYPE_ACTION_REMOVE_HDR = 3421,
+ DR_DUMP_REC_TYPE_ACTION_MATCH_RANGE = 3425,
};
void mlx5dr_dbg_tbl_add(struct mlx5dr_table *tbl)
@@ -107,6 +108,8 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
{
struct mlx5dr_action *action = action_mem->action;
const u64 action_id = DR_DBG_PTR_TO_ID(action);
+ u64 hit_tbl_ptr, miss_tbl_ptr;
+ u32 hit_tbl_id, miss_tbl_id;
switch (action->action_type) {
case DR_ACTION_TYP_DROP:
@@ -198,6 +201,30 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
action->sampler->rx_icm_addr,
action->sampler->tx_icm_addr);
break;
+ case DR_ACTION_TYP_RANGE:
+ if (action->range->hit_tbl_action->dest_tbl->is_fw_tbl) {
+ hit_tbl_id = action->range->hit_tbl_action->dest_tbl->fw_tbl.id;
+ hit_tbl_ptr = 0;
+ } else {
+ hit_tbl_id = action->range->hit_tbl_action->dest_tbl->tbl->table_id;
+ hit_tbl_ptr =
+ DR_DBG_PTR_TO_ID(action->range->hit_tbl_action->dest_tbl->tbl);
+ }
+
+ if (action->range->miss_tbl_action->dest_tbl->is_fw_tbl) {
+ miss_tbl_id = action->range->miss_tbl_action->dest_tbl->fw_tbl.id;
+ miss_tbl_ptr = 0;
+ } else {
+ miss_tbl_id = action->range->miss_tbl_action->dest_tbl->tbl->table_id;
+ miss_tbl_ptr =
+ DR_DBG_PTR_TO_ID(action->range->miss_tbl_action->dest_tbl->tbl);
+ }
+
+ seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%llx,0x%x,0x%llx,0x%x\n",
+ DR_DUMP_REC_TYPE_ACTION_MATCH_RANGE, action_id, rule_id,
+ hit_tbl_id, hit_tbl_ptr, miss_tbl_id, miss_tbl_ptr,
+ action->range->definer_id);
+ break;
default:
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_definer.c
new file mode 100644
index 000000000000..d5ea97751945
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_definer.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "dr_types.h"
+#include "dr_ste.h"
+
+struct dr_definer_object {
+ u32 id;
+ u16 format_id;
+ u8 dw_selectors[MLX5_IFC_DEFINER_DW_SELECTORS_NUM];
+ u8 byte_selectors[MLX5_IFC_DEFINER_BYTE_SELECTORS_NUM];
+ u8 match_mask[DR_STE_SIZE_MATCH_TAG];
+ refcount_t refcount;
+};
+
+static bool dr_definer_compare(struct dr_definer_object *definer,
+ u16 format_id, u8 *dw_selectors,
+ u8 *byte_selectors, u8 *match_mask)
+{
+ int i;
+
+ if (definer->format_id != format_id)
+ return false;
+
+ for (i = 0; i < MLX5_IFC_DEFINER_DW_SELECTORS_NUM; i++)
+ if (definer->dw_selectors[i] != dw_selectors[i])
+ return false;
+
+ for (i = 0; i < MLX5_IFC_DEFINER_BYTE_SELECTORS_NUM; i++)
+ if (definer->byte_selectors[i] != byte_selectors[i])
+ return false;
+
+ if (memcmp(definer->match_mask, match_mask, DR_STE_SIZE_MATCH_TAG))
+ return false;
+
+ return true;
+}
+
+static struct dr_definer_object *
+dr_definer_find_obj(struct mlx5dr_domain *dmn, u16 format_id,
+ u8 *dw_selectors, u8 *byte_selectors, u8 *match_mask)
+{
+ struct dr_definer_object *definer_obj;
+ unsigned long id;
+
+ xa_for_each(&dmn->definers_xa, id, definer_obj) {
+ if (dr_definer_compare(definer_obj, format_id,
+ dw_selectors, byte_selectors,
+ match_mask))
+ return definer_obj;
+ }
+
+ return NULL;
+}
+
+static struct dr_definer_object *
+dr_definer_create_obj(struct mlx5dr_domain *dmn, u16 format_id,
+ u8 *dw_selectors, u8 *byte_selectors, u8 *match_mask)
+{
+ struct dr_definer_object *definer_obj;
+ int ret = 0;
+
+ definer_obj = kzalloc(sizeof(*definer_obj), GFP_KERNEL);
+ if (!definer_obj)
+ return NULL;
+
+ ret = mlx5dr_cmd_create_definer(dmn->mdev,
+ format_id,
+ dw_selectors,
+ byte_selectors,
+ match_mask,
+ &definer_obj->id);
+ if (ret)
+ goto err_free_definer_obj;
+
+ /* Definer ID can have 32 bits, but STE format
+ * supports only definers with 8 bit IDs.
+ */
+ if (definer_obj->id > 0xff) {
+ mlx5dr_err(dmn, "Unsupported definer ID (%d)\n", definer_obj->id);
+ goto err_destroy_definer;
+ }
+
+ definer_obj->format_id = format_id;
+ memcpy(definer_obj->dw_selectors, dw_selectors, sizeof(definer_obj->dw_selectors));
+ memcpy(definer_obj->byte_selectors, byte_selectors, sizeof(definer_obj->byte_selectors));
+ memcpy(definer_obj->match_mask, match_mask, sizeof(definer_obj->match_mask));
+
+ refcount_set(&definer_obj->refcount, 1);
+
+ ret = xa_insert(&dmn->definers_xa, definer_obj->id, definer_obj, GFP_KERNEL);
+ if (ret) {
+ mlx5dr_dbg(dmn, "Couldn't insert new definer into xarray (%d)\n", ret);
+ goto err_destroy_definer;
+ }
+
+ return definer_obj;
+
+err_destroy_definer:
+ mlx5dr_cmd_destroy_definer(dmn->mdev, definer_obj->id);
+err_free_definer_obj:
+ kfree(definer_obj);
+
+ return NULL;
+}
+
+static void dr_definer_destroy_obj(struct mlx5dr_domain *dmn,
+ struct dr_definer_object *definer_obj)
+{
+ mlx5dr_cmd_destroy_definer(dmn->mdev, definer_obj->id);
+ xa_erase(&dmn->definers_xa, definer_obj->id);
+ kfree(definer_obj);
+}
+
+int mlx5dr_definer_get(struct mlx5dr_domain *dmn, u16 format_id,
+ u8 *dw_selectors, u8 *byte_selectors,
+ u8 *match_mask, u32 *definer_id)
+{
+ struct dr_definer_object *definer_obj;
+ int ret = 0;
+
+ definer_obj = dr_definer_find_obj(dmn, format_id, dw_selectors,
+ byte_selectors, match_mask);
+ if (!definer_obj) {
+ definer_obj = dr_definer_create_obj(dmn, format_id,
+ dw_selectors, byte_selectors,
+ match_mask);
+ if (!definer_obj)
+ return -ENOMEM;
+ } else {
+ refcount_inc(&definer_obj->refcount);
+ }
+
+ *definer_id = definer_obj->id;
+
+ return ret;
+}
+
+void mlx5dr_definer_put(struct mlx5dr_domain *dmn, u32 definer_id)
+{
+ struct dr_definer_object *definer_obj;
+
+ definer_obj = xa_load(&dmn->definers_xa, definer_id);
+ if (!definer_obj) {
+ mlx5dr_err(dmn, "Definer ID %d not found\n", definer_id);
+ return;
+ }
+
+ if (refcount_dec_and_test(&definer_obj->refcount))
+ dr_definer_destroy_obj(dmn, definer_obj);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
index 9a9836218c8e..5b8bb2ca31e6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
@@ -425,10 +425,11 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type)
refcount_set(&dmn->refcount, 1);
mutex_init(&dmn->info.rx.mutex);
mutex_init(&dmn->info.tx.mutex);
+ xa_init(&dmn->definers_xa);
if (dr_domain_caps_init(mdev, dmn)) {
mlx5dr_err(dmn, "Failed init domain, no caps\n");
- goto free_domain;
+ goto def_xa_destroy;
}
dmn->info.max_log_action_icm_sz = DR_CHUNK_SIZE_4K;
@@ -453,7 +454,8 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type)
uninit_caps:
dr_domain_caps_uninit(dmn);
-free_domain:
+def_xa_destroy:
+ xa_destroy(&dmn->definers_xa);
kfree(dmn);
return NULL;
}
@@ -493,6 +495,7 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn)
dr_domain_uninit_csum_recalc_fts(dmn);
dr_domain_uninit_resources(dmn);
dr_domain_caps_uninit(dmn);
+ xa_destroy(&dmn->definers_xa);
mutex_destroy(&dmn->info.tx.mutex);
mutex_destroy(&dmn->info.rx.mutex);
kfree(dmn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
index 7879991048ce..74cbe53ee9db 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
@@ -35,16 +35,28 @@ static int dr_rule_append_to_miss_list(struct mlx5dr_domain *dmn,
return 0;
}
+static void dr_rule_set_last_ste_miss_addr(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ u8 *hw_ste)
+{
+ struct mlx5dr_ste_ctx *ste_ctx = matcher->tbl->dmn->ste_ctx;
+ u64 icm_addr;
+
+ if (mlx5dr_ste_is_miss_addr_set(ste_ctx, hw_ste))
+ return;
+
+ icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
+ mlx5dr_ste_set_miss_addr(ste_ctx, hw_ste, icm_addr);
+}
+
static struct mlx5dr_ste *
dr_rule_create_collision_htbl(struct mlx5dr_matcher *matcher,
struct mlx5dr_matcher_rx_tx *nic_matcher,
u8 *hw_ste)
{
struct mlx5dr_domain *dmn = matcher->tbl->dmn;
- struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
struct mlx5dr_ste_htbl *new_htbl;
struct mlx5dr_ste *ste;
- u64 icm_addr;
/* Create new table for miss entry */
new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
@@ -58,8 +70,7 @@ dr_rule_create_collision_htbl(struct mlx5dr_matcher *matcher,
/* One and only entry, never grows */
ste = new_htbl->chunk->ste_arr;
- icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
- mlx5dr_ste_set_miss_addr(ste_ctx, hw_ste, icm_addr);
+ dr_rule_set_last_ste_miss_addr(matcher, nic_matcher, hw_ste);
mlx5dr_htbl_get(new_htbl);
return ste;
@@ -241,7 +252,6 @@ dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher,
bool use_update_list = false;
u8 hw_ste[DR_STE_SIZE] = {};
struct mlx5dr_ste *new_ste;
- u64 icm_addr;
int new_idx;
u8 sb_idx;
@@ -250,9 +260,8 @@ dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher,
mlx5dr_ste_set_bit_mask(hw_ste, nic_matcher->ste_builder[sb_idx].bit_mask);
/* Copy STE control and tag */
- icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
memcpy(hw_ste, mlx5dr_ste_get_hw_ste(cur_ste), DR_STE_SIZE_REDUCED);
- mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste, icm_addr);
+ dr_rule_set_last_ste_miss_addr(matcher, nic_matcher, hw_ste);
new_idx = mlx5dr_ste_calc_hash_index(hw_ste, new_htbl);
new_ste = &new_htbl->chunk->ste_arr[new_idx];
@@ -773,7 +782,6 @@ static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher,
{
struct mlx5dr_domain *dmn = matcher->tbl->dmn;
struct mlx5dr_ste_send_info *ste_info;
- u64 icm_addr;
/* Take ref on table, only on first time this ste is used */
mlx5dr_htbl_get(cur_htbl);
@@ -781,8 +789,7 @@ static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher,
/* new entry -> new branch */
list_add_tail(&ste->miss_list_node, miss_list);
- icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk);
- mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste, icm_addr);
+ dr_rule_set_last_ste_miss_addr(matcher, nic_matcher, hw_ste);
ste->ste_chain_location = ste_location;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index 9e19a8dc9022..1e15f605df6e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -90,6 +90,16 @@ static void dr_ste_set_always_miss(struct dr_hw_ste_format *hw_ste)
hw_ste->mask[0] = 0;
}
+bool mlx5dr_ste_is_miss_addr_set(struct mlx5dr_ste_ctx *ste_ctx,
+ u8 *hw_ste_p)
+{
+ if (!ste_ctx->is_miss_addr_set)
+ return false;
+
+ /* check if miss address is already set for this type of STE */
+ return ste_ctx->is_miss_addr_set(hw_ste_p);
+}
+
void mlx5dr_ste_set_miss_addr(struct mlx5dr_ste_ctx *ste_ctx,
u8 *hw_ste_p, u64 miss_addr)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
index 17513baff9b0..7075142bcfb6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
@@ -151,6 +151,7 @@ struct mlx5dr_ste_ctx {
bool is_rx, u16 gvmi);
void (*set_next_lu_type)(u8 *hw_ste_p, u16 lu_type);
u16 (*get_next_lu_type)(u8 *hw_ste_p);
+ bool (*is_miss_addr_set)(u8 *hw_ste_p);
void (*set_miss_addr)(u8 *hw_ste_p, u64 miss_addr);
u64 (*get_miss_addr)(u8 *hw_ste_p);
void (*set_hit_addr)(u8 *hw_ste_p, u64 icm_addr, u32 ht_size);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
index ee677a5c76be..084145f18084 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@ -13,6 +13,7 @@ enum dr_ste_v1_entry_format {
DR_STE_V1_TYPE_BWC_BYTE = 0x0,
DR_STE_V1_TYPE_BWC_DW = 0x1,
DR_STE_V1_TYPE_MATCH = 0x2,
+ DR_STE_V1_TYPE_MATCH_RANGES = 0x7,
};
/* Lookup type is built from 2B: [ Definer mode 1B ][ Definer index 1B ] */
@@ -267,6 +268,16 @@ static void dr_ste_v1_set_entry_type(u8 *hw_ste_p, u8 entry_type)
MLX5_SET(ste_match_bwc_v1, hw_ste_p, entry_format, entry_type);
}
+bool dr_ste_v1_is_miss_addr_set(u8 *hw_ste_p)
+{
+ u8 entry_type = MLX5_GET(ste_match_bwc_v1, hw_ste_p, entry_format);
+
+ /* unlike MATCH STE, for MATCH_RANGES STE both hit and miss addresses
+ * are part of the action, so they both set as part of STE init
+ */
+ return entry_type == DR_STE_V1_TYPE_MATCH_RANGES;
+}
+
void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr)
{
u64 index = miss_addr >> 6;
@@ -520,6 +531,27 @@ static void dr_ste_v1_set_aso_flow_meter(u8 *d_action,
init_color);
}
+static void dr_ste_v1_set_match_range_pkt_len(u8 *hw_ste_p, u32 definer_id,
+ u32 min, u32 max)
+{
+ MLX5_SET(ste_match_ranges_v1, hw_ste_p, match_definer_ctx_idx, definer_id);
+
+ /* When the STE will be sent, its mask and tags will be swapped in
+ * dr_ste_v1_prepare_for_postsend(). This, however, is match range STE
+ * which doesn't have mask, and shouldn't have mask/tag swapped.
+ * We're using the common utilities functions to send this STE, so need
+ * to allow for this swapping - place the values in the corresponding
+ * locations to allow flipping them when writing to ICM.
+ *
+ * min/max_value_2 corresponds to match_dw_0 in its definer.
+ * To allow mask/tag swapping, writing the min/max_2 to min/max_0.
+ *
+ * Pkt len is 2 bytes that are stored in the higher section of the DW.
+ */
+ MLX5_SET(ste_match_ranges_v1, hw_ste_p, min_value_0, min << 16);
+ MLX5_SET(ste_match_ranges_v1, hw_ste_p, max_value_0, max << 16);
+}
+
static void dr_ste_v1_arr_init_next_match(u8 **last_ste,
u32 *added_stes,
u16 gvmi)
@@ -535,6 +567,14 @@ static void dr_ste_v1_arr_init_next_match(u8 **last_ste,
memset(action, 0, MLX5_FLD_SZ_BYTES(ste_mask_and_match_v1, action));
}
+static void dr_ste_v1_arr_init_next_match_range(u8 **last_ste,
+ u32 *added_stes,
+ u16 gvmi)
+{
+ dr_ste_v1_arr_init_next_match(last_ste, added_stes, gvmi);
+ dr_ste_v1_set_entry_type(*last_ste, DR_STE_V1_TYPE_MATCH_RANGES);
+}
+
void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
u8 *action_type_set,
u32 actions_caps,
@@ -670,6 +710,20 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
action += DR_STE_ACTION_DOUBLE_SZ;
}
+ if (action_type_set[DR_ACTION_TYP_RANGE]) {
+ /* match ranges requires a new STE of its own type */
+ dr_ste_v1_arr_init_next_match_range(&last_ste, added_stes, attr->gvmi);
+ dr_ste_v1_set_miss_addr(last_ste, attr->range.miss_icm_addr);
+
+ /* we do not support setting any action on the match ranges STE */
+ action_sz = 0;
+
+ dr_ste_v1_set_match_range_pkt_len(last_ste,
+ attr->range.definer_id,
+ attr->range.min,
+ attr->range.max);
+ }
+
dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi);
dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1);
}
@@ -858,6 +912,20 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
action += DR_STE_ACTION_DOUBLE_SZ;
}
+ if (action_type_set[DR_ACTION_TYP_RANGE]) {
+ /* match ranges requires a new STE of its own type */
+ dr_ste_v1_arr_init_next_match_range(&last_ste, added_stes, attr->gvmi);
+ dr_ste_v1_set_miss_addr(last_ste, attr->range.miss_icm_addr);
+
+ /* we do not support setting any action on the match ranges STE */
+ action_sz = 0;
+
+ dr_ste_v1_set_match_range_pkt_len(last_ste,
+ attr->range.definer_id,
+ attr->range.min,
+ attr->range.max);
+ }
+
dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi);
dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1);
}
@@ -2144,6 +2212,7 @@ static struct mlx5dr_ste_ctx ste_ctx_v1 = {
.ste_init = &dr_ste_v1_init,
.set_next_lu_type = &dr_ste_v1_set_next_lu_type,
.get_next_lu_type = &dr_ste_v1_get_next_lu_type,
+ .is_miss_addr_set = &dr_ste_v1_is_miss_addr_set,
.set_miss_addr = &dr_ste_v1_set_miss_addr,
.get_miss_addr = &dr_ste_v1_get_miss_addr,
.set_hit_addr = &dr_ste_v1_set_hit_addr,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h
index 8a1d49790c6e..b5c0f0f8392f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h
@@ -7,6 +7,7 @@
#include "dr_types.h"
#include "dr_ste.h"
+bool dr_ste_v1_is_miss_addr_set(u8 *hw_ste_p);
void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr);
u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p);
void dr_ste_v1_set_byte_mask(u8 *hw_ste_p, u16 byte_mask);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c
index c60fddd125d2..cf1a3c9a1cf4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c
@@ -202,6 +202,7 @@ static struct mlx5dr_ste_ctx ste_ctx_v2 = {
.ste_init = &dr_ste_v1_init,
.set_next_lu_type = &dr_ste_v1_set_next_lu_type,
.get_next_lu_type = &dr_ste_v1_get_next_lu_type,
+ .is_miss_addr_set = &dr_ste_v1_is_miss_addr_set,
.set_miss_addr = &dr_ste_v1_set_miss_addr,
.get_miss_addr = &dr_ste_v1_get_miss_addr,
.set_hit_addr = &dr_ste_v1_set_hit_addr,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 41a37b9ac98b..2b769dcbd453 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -81,6 +81,7 @@ mlx5dr_icm_next_higher_chunk(enum mlx5dr_icm_chunk_size chunk)
enum {
DR_STE_SIZE = 64,
DR_STE_SIZE_CTRL = 32,
+ DR_STE_SIZE_MATCH_TAG = 32,
DR_STE_SIZE_TAG = 16,
DR_STE_SIZE_MASK = 16,
DR_STE_SIZE_REDUCED = DR_STE_SIZE - DR_STE_SIZE_MASK,
@@ -128,6 +129,7 @@ enum mlx5dr_action_type {
DR_ACTION_TYP_REMOVE_HDR,
DR_ACTION_TYP_SAMPLER,
DR_ACTION_TYP_ASO_FLOW_METER,
+ DR_ACTION_TYP_RANGE,
DR_ACTION_TYP_MAX,
};
@@ -237,6 +239,7 @@ static inline void mlx5dr_htbl_get(struct mlx5dr_ste_htbl *htbl)
/* STE utils */
u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl);
+bool mlx5dr_ste_is_miss_addr_set(struct mlx5dr_ste_ctx *ste_ctx, u8 *hw_ste_p);
void mlx5dr_ste_set_miss_addr(struct mlx5dr_ste_ctx *ste_ctx,
u8 *hw_ste, u64 miss_addr);
void mlx5dr_ste_set_hit_addr(struct mlx5dr_ste_ctx *ste_ctx,
@@ -281,6 +284,13 @@ struct mlx5dr_ste_actions_attr {
u8 dest_reg_id;
u8 init_color;
} aso_flow_meter;
+
+ struct {
+ u64 miss_icm_addr;
+ u32 definer_id;
+ u32 min;
+ u32 max;
+ } range;
};
void mlx5dr_ste_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx,
@@ -924,6 +934,7 @@ struct mlx5dr_domain {
struct mlx5dr_ste_ctx *ste_ctx;
struct list_head dbg_tbl_list;
struct mlx5dr_dbg_dump_info dump_info;
+ struct xarray definers_xa;
};
struct mlx5dr_table_rx_tx {
@@ -1026,6 +1037,15 @@ struct mlx5dr_action_dest_tbl {
};
};
+struct mlx5dr_action_range {
+ struct mlx5dr_domain *dmn;
+ struct mlx5dr_action *hit_tbl_action;
+ struct mlx5dr_action *miss_tbl_action;
+ u32 definer_id;
+ u32 min;
+ u32 max;
+};
+
struct mlx5dr_action_ctr {
u32 ctr_id;
u32 offset;
@@ -1072,6 +1092,7 @@ struct mlx5dr_action {
struct mlx5dr_action_push_vlan *push_vlan;
struct mlx5dr_action_flow_tag *flow_tag;
struct mlx5dr_action_aso_flow_meter *aso;
+ struct mlx5dr_action_range *range;
};
};
@@ -1295,6 +1316,14 @@ int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
u32 *reformat_id);
void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev,
u32 reformat_id);
+int mlx5dr_cmd_create_definer(struct mlx5_core_dev *mdev,
+ u16 format_id,
+ u8 *dw_selectors,
+ u8 *byte_selectors,
+ u8 *match_mask,
+ u32 *definer_id);
+void mlx5dr_cmd_destroy_definer(struct mlx5_core_dev *mdev,
+ u32 definer_id);
struct mlx5dr_cmd_gid_attr {
u8 gid[16];
@@ -1483,4 +1512,18 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
u32 flow_source);
void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, u32 tbl_id,
u32 group_id);
+
+static inline bool mlx5dr_is_fw_table(struct mlx5_flow_table *ft)
+{
+ return !ft->fs_dr_table.dr_table;
+}
+
+static inline bool mlx5dr_supp_match_ranges(struct mlx5_core_dev *dev)
+{
+ return (MLX5_CAP_GEN(dev, steering_format_version) >=
+ MLX5_STEERING_FORMAT_CONNECTX_6DX) &&
+ (MLX5_CAP_GEN_64(dev, match_definer_format_supported) &
+ (1ULL << MLX5_IFC_DEFINER_FORMAT_ID_SELECT));
+}
+
#endif /* _DR_TYPES_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index 13b6d4721e17..984653756779 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -7,10 +7,11 @@
#include "fs_cmd.h"
#include "mlx5dr.h"
#include "fs_dr.h"
+#include "dr_types.h"
-static bool mlx5_dr_is_fw_table(u32 flags)
+static bool dr_is_fw_term_table(struct mlx5_flow_table *ft)
{
- if (flags & MLX5_FLOW_TABLE_TERMINATION)
+ if (ft->flags & MLX5_FLOW_TABLE_TERMINATION)
return true;
return false;
@@ -69,7 +70,7 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
u32 flags;
int err;
- if (mlx5_dr_is_fw_table(ft->flags))
+ if (dr_is_fw_term_table(ft))
return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft,
ft_attr,
next_ft);
@@ -109,7 +110,7 @@ static int mlx5_cmd_dr_destroy_flow_table(struct mlx5_flow_root_namespace *ns,
struct mlx5dr_action *action = ft->fs_dr_table.miss_action;
int err;
- if (mlx5_dr_is_fw_table(ft->flags))
+ if (dr_is_fw_term_table(ft))
return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_table(ns, ft);
err = mlx5dr_table_destroy(ft->fs_dr_table.dr_table);
@@ -134,7 +135,7 @@ static int mlx5_cmd_dr_modify_flow_table(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
struct mlx5_flow_table *next_ft)
{
- if (mlx5_dr_is_fw_table(ft->flags))
+ if (dr_is_fw_term_table(ft))
return mlx5_fs_cmd_get_fw_cmds()->modify_flow_table(ns, ft, next_ft);
return set_miss_action(ns, ft, next_ft);
@@ -153,7 +154,7 @@ static int mlx5_cmd_dr_create_flow_group(struct mlx5_flow_root_namespace *ns,
match_criteria_enable);
struct mlx5dr_match_parameters mask;
- if (mlx5_dr_is_fw_table(ft->flags))
+ if (dr_is_fw_term_table(ft))
return mlx5_fs_cmd_get_fw_cmds()->create_flow_group(ns, ft, in,
fg);
@@ -178,7 +179,7 @@ static int mlx5_cmd_dr_destroy_flow_group(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
struct mlx5_flow_group *fg)
{
- if (mlx5_dr_is_fw_table(ft->flags))
+ if (dr_is_fw_term_table(ft))
return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_group(ns, ft, fg);
return mlx5dr_matcher_destroy(fg->fs_dr_matcher.dr_matcher);
@@ -209,11 +210,22 @@ static struct mlx5dr_action *create_ft_action(struct mlx5dr_domain *domain,
{
struct mlx5_flow_table *dest_ft = dst->dest_attr.ft;
- if (mlx5_dr_is_fw_table(dest_ft->flags))
+ if (mlx5dr_is_fw_table(dest_ft))
return mlx5dr_action_create_dest_flow_fw_table(domain, dest_ft);
return mlx5dr_action_create_dest_table(dest_ft->fs_dr_table.dr_table);
}
+static struct mlx5dr_action *create_range_action(struct mlx5dr_domain *domain,
+ struct mlx5_flow_rule *dst)
+{
+ return mlx5dr_action_create_dest_match_range(domain,
+ dst->dest_attr.range.field,
+ dst->dest_attr.range.hit_ft,
+ dst->dest_attr.range.miss_ft,
+ dst->dest_attr.range.min,
+ dst->dest_attr.range.max);
+}
+
static struct mlx5dr_action *create_action_push_vlan(struct mlx5dr_domain *domain,
struct mlx5_fs_vlan *vlan)
{
@@ -260,7 +272,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
int err = 0;
int i;
- if (mlx5_dr_is_fw_table(ft->flags))
+ if (dr_is_fw_term_table(ft))
return mlx5_fs_cmd_get_fw_cmds()->create_fte(ns, ft, group, fte);
actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, sizeof(*actions),
@@ -467,6 +479,15 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
fs_dr_actions[fs_dr_num_actions++] = tmp_action;
term_actions[num_term_actions++].dest = tmp_action;
break;
+ case MLX5_FLOW_DESTINATION_TYPE_RANGE:
+ tmp_action = create_range_action(domain, dst);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ term_actions[num_term_actions++].dest = tmp_action;
+ break;
default:
err = -EOPNOTSUPP;
goto free_actions;
@@ -702,7 +723,7 @@ static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns,
int err;
int i;
- if (mlx5_dr_is_fw_table(ft->flags))
+ if (dr_is_fw_term_table(ft))
return mlx5_fs_cmd_get_fw_cmds()->delete_fte(ns, ft, fte);
err = mlx5dr_rule_destroy(rule->dr_rule);
@@ -727,7 +748,7 @@ static int mlx5_cmd_dr_update_fte(struct mlx5_flow_root_namespace *ns,
struct fs_fte fte_tmp = {};
int ret;
- if (mlx5_dr_is_fw_table(ft->flags))
+ if (dr_is_fw_term_table(ft))
return mlx5_fs_cmd_get_fw_cmds()->update_fte(ns, ft, group, modify_mask, fte);
/* Backup current dr rule details */
@@ -780,11 +801,19 @@ static int mlx5_cmd_dr_destroy_ns(struct mlx5_flow_root_namespace *ns)
static u32 mlx5_cmd_dr_get_capabilities(struct mlx5_flow_root_namespace *ns,
enum fs_flow_table_type ft_type)
{
+ u32 steering_caps = 0;
+
if (ft_type != FS_FT_FDB ||
MLX5_CAP_GEN(ns->dev, steering_format_version) == MLX5_STEERING_FORMAT_CONNECTX_5)
return 0;
- return MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX | MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX;
+ steering_caps |= MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX;
+ steering_caps |= MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX;
+
+ if (mlx5dr_supp_match_ranges(ns->dev))
+ steering_caps |= MLX5_FLOW_STEERING_CAP_MATCH_RANGES;
+
+ return steering_caps;
}
bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h
index 34c2bd17a8b4..790a17d6207f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h
@@ -165,6 +165,41 @@ struct mlx5_ifc_ste_mask_and_match_v1_bits {
u8 action[0x60];
};
+struct mlx5_ifc_ste_match_ranges_v1_bits {
+ u8 entry_format[0x8];
+ u8 counter_id[0x18];
+
+ u8 miss_address_63_48[0x10];
+ u8 match_definer_ctx_idx[0x8];
+ u8 miss_address_39_32[0x8];
+
+ u8 miss_address_31_6[0x1a];
+ u8 reserved_at_5a[0x1];
+ u8 match_polarity[0x1];
+ u8 reparse[0x1];
+ u8 reserved_at_5d[0x3];
+
+ u8 next_table_base_63_48[0x10];
+ u8 hash_definer_ctx_idx[0x8];
+ u8 next_table_base_39_32_size[0x8];
+
+ u8 next_table_base_31_5_size[0x1b];
+ u8 hash_type[0x2];
+ u8 hash_after_actions[0x1];
+ u8 reserved_at_9e[0x2];
+
+ u8 action[0x60];
+
+ u8 max_value_0[0x20];
+ u8 min_value_0[0x20];
+ u8 max_value_1[0x20];
+ u8 min_value_1[0x20];
+ u8 max_value_2[0x20];
+ u8 min_value_2[0x20];
+ u8 max_value_3[0x20];
+ u8 min_value_3[0x20];
+};
+
struct mlx5_ifc_ste_eth_l2_src_v1_bits {
u8 reserved_at_0[0x1];
u8 sx_sniffer[0x1];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
index 84ed77763b21..9afd268a2573 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -140,8 +140,21 @@ mlx5dr_action_create_aso(struct mlx5dr_domain *dmn,
u8 init_color,
u8 meter_id);
+struct mlx5dr_action *
+mlx5dr_action_create_dest_match_range(struct mlx5dr_domain *dmn,
+ u32 field,
+ struct mlx5_flow_table *hit_ft,
+ struct mlx5_flow_table *miss_ft,
+ u32 min,
+ u32 max);
+
int mlx5dr_action_destroy(struct mlx5dr_action *action);
+int mlx5dr_definer_get(struct mlx5dr_domain *dmn, u16 format_id,
+ u8 *dw_selectors, u8 *byte_selectors,
+ u8 *match_mask, u32 *definer_id);
+void mlx5dr_definer_put(struct mlx5dr_domain *dmn, u32 definer_id);
+
static inline bool
mlx5dr_is_supported(struct mlx5_core_dev *dev)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index d5c317325030..ba7e3df22413 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -1160,14 +1160,40 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev)
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid);
-int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out)
+int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out,
+ u16 opmod)
{
- u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01);
u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {};
+ opmod = (opmod << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01);
MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
MLX5_SET(query_hca_cap_in, in, function_id, function_id);
MLX5_SET(query_hca_cap_in, in, other_function, true);
return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out);
}
+EXPORT_SYMBOL_GPL(mlx5_vport_get_other_func_cap);
+
+int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap,
+ u16 function_id, u16 opmod)
+{
+ int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+ void *set_hca_cap;
+ void *set_ctx;
+ int ret;
+
+ set_ctx = kzalloc(set_sz, GFP_KERNEL);
+ if (!set_ctx)
+ return -ENOMEM;
+
+ MLX5_SET(set_hca_cap_in, set_ctx, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+ MLX5_SET(set_hca_cap_in, set_ctx, op_mod, opmod << 1);
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+ memcpy(set_hca_cap, hca_cap, MLX5_ST_SZ_BYTES(cmd_hca_cap));
+ MLX5_SET(set_hca_cap_in, set_ctx, function_id, function_id);
+ MLX5_SET(set_hca_cap_in, set_ctx, other_function, true);
+ ret = mlx5_cmd_exec_in(dev, set_hca_cap, set_ctx);
+
+ kfree(set_ctx);
+ return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index a2ee695a3f17..3340b4a694c3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -363,93 +363,7 @@ static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = {
};
static struct mlxsw_sp_ipip_parms
-mlxsw_sp1_ipip_netdev_parms_init_gre6(const struct net_device *ol_dev)
-{
- struct mlxsw_sp_ipip_parms parms = {0};
-
- WARN_ON_ONCE(1);
- return parms;
-}
-
-static int
-mlxsw_sp1_ipip_nexthop_update_gre6(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
- struct mlxsw_sp_ipip_entry *ipip_entry,
- bool force, char *ratr_pl)
-{
- WARN_ON_ONCE(1);
- return -EINVAL;
-}
-
-static int
-mlxsw_sp1_ipip_decap_config_gre6(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_ipip_entry *ipip_entry,
- u32 tunnel_index)
-{
- WARN_ON_ONCE(1);
- return -EINVAL;
-}
-
-static bool mlxsw_sp1_ipip_can_offload_gre6(const struct mlxsw_sp *mlxsw_sp,
- const struct net_device *ol_dev)
-{
- return false;
-}
-
-static struct mlxsw_sp_rif_ipip_lb_config
-mlxsw_sp1_ipip_ol_loopback_config_gre6(struct mlxsw_sp *mlxsw_sp,
- const struct net_device *ol_dev)
-{
- struct mlxsw_sp_rif_ipip_lb_config config = {0};
-
- WARN_ON_ONCE(1);
- return config;
-}
-
-static int
-mlxsw_sp1_ipip_ol_netdev_change_gre6(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_ipip_entry *ipip_entry,
- struct netlink_ext_ack *extack)
-{
- WARN_ON_ONCE(1);
- return -EINVAL;
-}
-
-static int
-mlxsw_sp1_ipip_rem_addr_set_gre6(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_ipip_entry *ipip_entry)
-{
- WARN_ON_ONCE(1);
- return -EINVAL;
-}
-
-static void
-mlxsw_sp1_ipip_rem_addr_unset_gre6(struct mlxsw_sp *mlxsw_sp,
- const struct mlxsw_sp_ipip_entry *ipip_entry)
-{
- WARN_ON_ONCE(1);
-}
-
-static const struct mlxsw_sp_ipip_ops mlxsw_sp1_ipip_gre6_ops = {
- .dev_type = ARPHRD_IP6GRE,
- .ul_proto = MLXSW_SP_L3_PROTO_IPV6,
- .inc_parsing_depth = true,
- .parms_init = mlxsw_sp1_ipip_netdev_parms_init_gre6,
- .nexthop_update = mlxsw_sp1_ipip_nexthop_update_gre6,
- .decap_config = mlxsw_sp1_ipip_decap_config_gre6,
- .can_offload = mlxsw_sp1_ipip_can_offload_gre6,
- .ol_loopback_config = mlxsw_sp1_ipip_ol_loopback_config_gre6,
- .ol_netdev_change = mlxsw_sp1_ipip_ol_netdev_change_gre6,
- .rem_ip_addr_set = mlxsw_sp1_ipip_rem_addr_set_gre6,
- .rem_ip_addr_unset = mlxsw_sp1_ipip_rem_addr_unset_gre6,
-};
-
-const struct mlxsw_sp_ipip_ops *mlxsw_sp1_ipip_ops_arr[] = {
- [MLXSW_SP_IPIP_TYPE_GRE4] = &mlxsw_sp_ipip_gre4_ops,
- [MLXSW_SP_IPIP_TYPE_GRE6] = &mlxsw_sp1_ipip_gre6_ops,
-};
-
-static struct mlxsw_sp_ipip_parms
-mlxsw_sp2_ipip_netdev_parms_init_gre6(const struct net_device *ol_dev)
+mlxsw_sp_ipip_netdev_parms_init_gre6(const struct net_device *ol_dev)
{
struct __ip6_tnl_parm parms = mlxsw_sp_ipip_netdev_parms6(ol_dev);
@@ -464,9 +378,9 @@ mlxsw_sp2_ipip_netdev_parms_init_gre6(const struct net_device *ol_dev)
}
static int
-mlxsw_sp2_ipip_nexthop_update_gre6(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
- struct mlxsw_sp_ipip_entry *ipip_entry,
- bool force, char *ratr_pl)
+mlxsw_sp_ipip_nexthop_update_gre6(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+ struct mlxsw_sp_ipip_entry *ipip_entry,
+ bool force, char *ratr_pl)
{
u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
enum mlxsw_reg_ratr_op op;
@@ -482,9 +396,9 @@ mlxsw_sp2_ipip_nexthop_update_gre6(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
}
static int
-mlxsw_sp2_ipip_decap_config_gre6(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_ipip_entry *ipip_entry,
- u32 tunnel_index)
+mlxsw_sp_ipip_decap_config_gre6(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry,
+ u32 tunnel_index)
{
u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
u16 ul_rif_id = mlxsw_sp_ipip_lb_ul_rif_id(ipip_entry->ol_lb);
@@ -519,8 +433,8 @@ mlxsw_sp2_ipip_decap_config_gre6(struct mlxsw_sp *mlxsw_sp,
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl);
}
-static bool mlxsw_sp2_ipip_can_offload_gre6(const struct mlxsw_sp *mlxsw_sp,
- const struct net_device *ol_dev)
+static bool mlxsw_sp_ipip_can_offload_gre6(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *ol_dev)
{
struct __ip6_tnl_parm tparm = mlxsw_sp_ipip_netdev_parms6(ol_dev);
bool inherit_tos = tparm.flags & IP6_TNL_F_USE_ORIG_TCLASS;
@@ -534,8 +448,8 @@ static bool mlxsw_sp2_ipip_can_offload_gre6(const struct mlxsw_sp *mlxsw_sp,
}
static struct mlxsw_sp_rif_ipip_lb_config
-mlxsw_sp2_ipip_ol_loopback_config_gre6(struct mlxsw_sp *mlxsw_sp,
- const struct net_device *ol_dev)
+mlxsw_sp_ipip_ol_loopback_config_gre6(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *ol_dev)
{
struct __ip6_tnl_parm parms = mlxsw_sp_ipip_netdev_parms6(ol_dev);
enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
@@ -553,20 +467,20 @@ mlxsw_sp2_ipip_ol_loopback_config_gre6(struct mlxsw_sp *mlxsw_sp,
}
static int
-mlxsw_sp2_ipip_ol_netdev_change_gre6(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_ipip_entry *ipip_entry,
- struct netlink_ext_ack *extack)
+mlxsw_sp_ipip_ol_netdev_change_gre6(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry,
+ struct netlink_ext_ack *extack)
{
struct mlxsw_sp_ipip_parms new_parms;
- new_parms = mlxsw_sp2_ipip_netdev_parms_init_gre6(ipip_entry->ol_dev);
+ new_parms = mlxsw_sp_ipip_netdev_parms_init_gre6(ipip_entry->ol_dev);
return mlxsw_sp_ipip_ol_netdev_change_gre(mlxsw_sp, ipip_entry,
&new_parms, extack);
}
static int
-mlxsw_sp2_ipip_rem_addr_set_gre6(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_ipip_entry *ipip_entry)
+mlxsw_sp_ipip_rem_addr_set_gre6(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry)
{
return mlxsw_sp_ipv6_addr_kvdl_index_get(mlxsw_sp,
&ipip_entry->parms.daddr.addr6,
@@ -574,24 +488,44 @@ mlxsw_sp2_ipip_rem_addr_set_gre6(struct mlxsw_sp *mlxsw_sp,
}
static void
-mlxsw_sp2_ipip_rem_addr_unset_gre6(struct mlxsw_sp *mlxsw_sp,
- const struct mlxsw_sp_ipip_entry *ipip_entry)
+mlxsw_sp_ipip_rem_addr_unset_gre6(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_ipip_entry *ipip_entry)
{
mlxsw_sp_ipv6_addr_put(mlxsw_sp, &ipip_entry->parms.daddr.addr6);
}
+static const struct mlxsw_sp_ipip_ops mlxsw_sp1_ipip_gre6_ops = {
+ .dev_type = ARPHRD_IP6GRE,
+ .ul_proto = MLXSW_SP_L3_PROTO_IPV6,
+ .inc_parsing_depth = true,
+ .double_rif_entry = true,
+ .parms_init = mlxsw_sp_ipip_netdev_parms_init_gre6,
+ .nexthop_update = mlxsw_sp_ipip_nexthop_update_gre6,
+ .decap_config = mlxsw_sp_ipip_decap_config_gre6,
+ .can_offload = mlxsw_sp_ipip_can_offload_gre6,
+ .ol_loopback_config = mlxsw_sp_ipip_ol_loopback_config_gre6,
+ .ol_netdev_change = mlxsw_sp_ipip_ol_netdev_change_gre6,
+ .rem_ip_addr_set = mlxsw_sp_ipip_rem_addr_set_gre6,
+ .rem_ip_addr_unset = mlxsw_sp_ipip_rem_addr_unset_gre6,
+};
+
+const struct mlxsw_sp_ipip_ops *mlxsw_sp1_ipip_ops_arr[] = {
+ [MLXSW_SP_IPIP_TYPE_GRE4] = &mlxsw_sp_ipip_gre4_ops,
+ [MLXSW_SP_IPIP_TYPE_GRE6] = &mlxsw_sp1_ipip_gre6_ops,
+};
+
static const struct mlxsw_sp_ipip_ops mlxsw_sp2_ipip_gre6_ops = {
.dev_type = ARPHRD_IP6GRE,
.ul_proto = MLXSW_SP_L3_PROTO_IPV6,
.inc_parsing_depth = true,
- .parms_init = mlxsw_sp2_ipip_netdev_parms_init_gre6,
- .nexthop_update = mlxsw_sp2_ipip_nexthop_update_gre6,
- .decap_config = mlxsw_sp2_ipip_decap_config_gre6,
- .can_offload = mlxsw_sp2_ipip_can_offload_gre6,
- .ol_loopback_config = mlxsw_sp2_ipip_ol_loopback_config_gre6,
- .ol_netdev_change = mlxsw_sp2_ipip_ol_netdev_change_gre6,
- .rem_ip_addr_set = mlxsw_sp2_ipip_rem_addr_set_gre6,
- .rem_ip_addr_unset = mlxsw_sp2_ipip_rem_addr_unset_gre6,
+ .parms_init = mlxsw_sp_ipip_netdev_parms_init_gre6,
+ .nexthop_update = mlxsw_sp_ipip_nexthop_update_gre6,
+ .decap_config = mlxsw_sp_ipip_decap_config_gre6,
+ .can_offload = mlxsw_sp_ipip_can_offload_gre6,
+ .ol_loopback_config = mlxsw_sp_ipip_ol_loopback_config_gre6,
+ .ol_netdev_change = mlxsw_sp_ipip_ol_netdev_change_gre6,
+ .rem_ip_addr_set = mlxsw_sp_ipip_rem_addr_set_gre6,
+ .rem_ip_addr_unset = mlxsw_sp_ipip_rem_addr_unset_gre6,
};
const struct mlxsw_sp_ipip_ops *mlxsw_sp2_ipip_ops_arr[] = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
index 8cc259dcc8d0..a35f009da561 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -49,6 +49,7 @@ struct mlxsw_sp_ipip_ops {
int dev_type;
enum mlxsw_sp_l3proto ul_proto; /* Underlay. */
bool inc_parsing_depth;
+ bool double_rif_entry;
struct mlxsw_sp_ipip_parms
(*parms_init)(const struct net_device *ol_dev);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 48f1fa62a4fd..c22c3ac4e2a1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -18,6 +18,7 @@
#include <linux/jhash.h>
#include <linux/net_namespace.h>
#include <linux/mutex.h>
+#include <linux/genalloc.h>
#include <net/netevent.h>
#include <net/neighbour.h>
#include <net/arp.h>
@@ -59,6 +60,7 @@ struct mlxsw_sp_rif {
int mtu;
u16 rif_index;
u8 mac_profile_id;
+ u8 rif_entries;
u16 vr_id;
const struct mlxsw_sp_rif_ops *ops;
struct mlxsw_sp *mlxsw_sp;
@@ -77,6 +79,7 @@ struct mlxsw_sp_rif_params {
};
u16 vid;
bool lag;
+ bool double_entry;
};
struct mlxsw_sp_rif_subport {
@@ -1068,6 +1071,7 @@ mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
.common.dev = ol_dev,
.common.lag = false,
+ .common.double_entry = ipip_ops->double_rif_entry,
.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
};
@@ -7826,18 +7830,26 @@ mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
}
-static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
+static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index,
+ u8 rif_entries)
{
- int i;
+ *p_rif_index = gen_pool_alloc(mlxsw_sp->router->rifs_table,
+ rif_entries);
+ if (*p_rif_index == 0)
+ return -ENOBUFS;
+ *p_rif_index -= MLXSW_SP_ROUTER_GENALLOC_OFFSET;
- for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
- if (!mlxsw_sp->router->rifs[i]) {
- *p_rif_index = i;
- return 0;
- }
- }
+ /* RIF indexes must be aligned to the allocation size. */
+ WARN_ON_ONCE(*p_rif_index % rif_entries);
- return -ENOBUFS;
+ return 0;
+}
+
+static void mlxsw_sp_rif_index_free(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
+ u8 rif_entries)
+{
+ gen_pool_free(mlxsw_sp->router->rifs_table,
+ MLXSW_SP_ROUTER_GENALLOC_OFFSET + rif_index, rif_entries);
}
static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
@@ -8081,6 +8093,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
const struct mlxsw_sp_rif_params *params,
struct netlink_ext_ack *extack)
{
+ u8 rif_entries = params->double_entry ? 2 : 1;
u32 tb_id = l3mdev_fib_table(params->dev);
const struct mlxsw_sp_rif_ops *ops;
struct mlxsw_sp_fid *fid = NULL;
@@ -8098,7 +8111,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
return ERR_CAST(vr);
vr->rif_count++;
- err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
+ err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index, rif_entries);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
goto err_rif_index_alloc;
@@ -8113,6 +8126,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp->router->rifs[rif_index] = rif;
rif->mlxsw_sp = mlxsw_sp;
rif->ops = ops;
+ rif->rif_entries = rif_entries;
if (ops->fid_get) {
fid = ops->fid_get(rif, extack);
@@ -8146,7 +8160,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_rif_counters_alloc(rif);
}
- atomic_inc(&mlxsw_sp->router->rifs_count);
+ atomic_add(rif_entries, &mlxsw_sp->router->rifs_count);
return rif;
err_stats_enable:
@@ -8162,6 +8176,7 @@ err_fid_get:
dev_put(rif->dev);
kfree(rif);
err_rif_alloc:
+ mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
err_rif_index_alloc:
vr->rif_count--;
mlxsw_sp_vr_put(mlxsw_sp, vr);
@@ -8173,10 +8188,12 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
const struct mlxsw_sp_rif_ops *ops = rif->ops;
struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
struct mlxsw_sp_fid *fid = rif->fid;
+ u8 rif_entries = rif->rif_entries;
+ u16 rif_index = rif->rif_index;
struct mlxsw_sp_vr *vr;
int i;
- atomic_dec(&mlxsw_sp->router->rifs_count);
+ atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count);
mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
vr = &mlxsw_sp->router->vrs[rif->vr_id];
@@ -8198,6 +8215,7 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
mlxsw_sp->router->rifs[rif->rif_index] = NULL;
dev_put(rif->dev);
kfree(rif);
+ mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
vr->rif_count--;
mlxsw_sp_vr_put(mlxsw_sp, vr);
}
@@ -9771,42 +9789,51 @@ mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
struct netlink_ext_ack *extack)
{
struct mlxsw_sp_rif *ul_rif;
+ u8 rif_entries = 1;
u16 rif_index;
int err;
- err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
+ err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index, rif_entries);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
return ERR_PTR(err);
}
ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL);
- if (!ul_rif)
- return ERR_PTR(-ENOMEM);
+ if (!ul_rif) {
+ err = -ENOMEM;
+ goto err_rif_alloc;
+ }
mlxsw_sp->router->rifs[rif_index] = ul_rif;
ul_rif->mlxsw_sp = mlxsw_sp;
+ ul_rif->rif_entries = rif_entries;
err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
if (err)
goto ul_rif_op_err;
- atomic_inc(&mlxsw_sp->router->rifs_count);
+ atomic_add(rif_entries, &mlxsw_sp->router->rifs_count);
return ul_rif;
ul_rif_op_err:
mlxsw_sp->router->rifs[rif_index] = NULL;
kfree(ul_rif);
+err_rif_alloc:
+ mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
return ERR_PTR(err);
}
static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
{
struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
+ u8 rif_entries = ul_rif->rif_entries;
+ u16 rif_index = ul_rif->rif_index;
- atomic_dec(&mlxsw_sp->router->rifs_count);
+ atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count);
mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
kfree(ul_rif);
+ mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
}
static struct mlxsw_sp_rif *
@@ -9940,11 +9967,43 @@ static const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
[MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp2_rif_ipip_lb_ops,
};
+static int mlxsw_sp_rifs_table_init(struct mlxsw_sp *mlxsw_sp)
+{
+ struct gen_pool *rifs_table;
+ int err;
+
+ rifs_table = gen_pool_create(0, -1);
+ if (!rifs_table)
+ return -ENOMEM;
+
+ gen_pool_set_algo(rifs_table, gen_pool_first_fit_order_align,
+ NULL);
+
+ err = gen_pool_add(rifs_table, MLXSW_SP_ROUTER_GENALLOC_OFFSET,
+ MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS), -1);
+ if (err)
+ goto err_gen_pool_add;
+
+ mlxsw_sp->router->rifs_table = rifs_table;
+
+ return 0;
+
+err_gen_pool_add:
+ gen_pool_destroy(rifs_table);
+ return err;
+}
+
+static void mlxsw_sp_rifs_table_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ gen_pool_destroy(mlxsw_sp->router->rifs_table);
+}
+
static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
{
u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
struct mlxsw_core *core = mlxsw_sp->core;
+ int err;
if (!MLXSW_CORE_RES_VALID(core, MAX_RIF_MAC_PROFILES))
return -EIO;
@@ -9957,6 +10016,10 @@ static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
if (!mlxsw_sp->router->rifs)
return -ENOMEM;
+ err = mlxsw_sp_rifs_table_init(mlxsw_sp);
+ if (err)
+ goto err_rifs_table_init;
+
idr_init(&mlxsw_sp->router->rif_mac_profiles_idr);
atomic_set(&mlxsw_sp->router->rif_mac_profiles_count, 0);
atomic_set(&mlxsw_sp->router->rifs_count, 0);
@@ -9970,6 +10033,10 @@ static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
mlxsw_sp);
return 0;
+
+err_rifs_table_init:
+ kfree(mlxsw_sp->router->rifs);
+ return err;
}
static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
@@ -9986,6 +10053,7 @@ static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
MLXSW_SP_RESOURCE_RIF_MAC_PROFILES);
WARN_ON(!idr_is_empty(&mlxsw_sp->router->rif_mac_profiles_idr));
idr_destroy(&mlxsw_sp->router->rif_mac_profiles_idr);
+ mlxsw_sp_rifs_table_fini(mlxsw_sp);
kfree(mlxsw_sp->router->rifs);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index c5dfb972b433..37d6e4c80e6a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -15,8 +15,12 @@ struct mlxsw_sp_router_nve_decap {
u8 valid:1;
};
+/* gen_pool_alloc() returns 0 when allocation fails, so use an offset */
+#define MLXSW_SP_ROUTER_GENALLOC_OFFSET 0x100
+
struct mlxsw_sp_router {
struct mlxsw_sp *mlxsw_sp;
+ struct gen_pool *rifs_table;
struct mlxsw_sp_rif **rifs;
struct idr rif_mac_profiles_idr;
atomic_t rif_mac_profiles_count;
diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c
index 81a8ccca7e5e..5693784eec5b 100644
--- a/drivers/net/ethernet/microchip/encx24j600-regmap.c
+++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c
@@ -359,7 +359,7 @@ static int regmap_encx24j600_phy_reg_read(void *context, unsigned int reg,
goto err_out;
usleep_range(26, 100);
- while ((ret = regmap_read(ctx->regmap, MISTAT, &mistat) != 0) &&
+ while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) == 0) &&
(mistat & BUSY))
cpu_relax();
@@ -397,7 +397,7 @@ static int regmap_encx24j600_phy_reg_write(void *context, unsigned int reg,
goto err_out;
usleep_range(26, 100);
- while ((ret = regmap_read(ctx->regmap, MISTAT, &mistat) != 0) &&
+ while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) == 0) &&
(mistat & BUSY))
cpu_relax();
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index f6092983d028..cadde20505ba 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -443,11 +443,22 @@ static int lan966x_port_ioctl(struct net_device *dev, struct ifreq *ifr,
int cmd)
{
struct lan966x_port *port = netdev_priv(dev);
+ int err;
+
+ if (cmd == SIOCSHWTSTAMP) {
+ err = lan966x_ptp_setup_traps(port, ifr);
+ if (err)
+ return err;
+ }
if (!phy_has_hwtstamp(dev->phydev) && port->lan966x->ptp) {
switch (cmd) {
case SIOCSHWTSTAMP:
- return lan966x_ptp_hwtstamp_set(port, ifr);
+ err = lan966x_ptp_hwtstamp_set(port, ifr);
+ if (err)
+ lan966x_ptp_del_traps(port);
+
+ return err;
case SIOCGHWTSTAMP:
return lan966x_ptp_hwtstamp_get(port, ifr);
}
@@ -456,7 +467,11 @@ static int lan966x_port_ioctl(struct net_device *dev, struct ifreq *ifr,
if (!dev->phydev)
return -ENODEV;
- return phy_mii_ioctl(dev->phydev, ifr, cmd);
+ err = phy_mii_ioctl(dev->phydev, ifr, cmd);
+ if (err && cmd == SIOCSHWTSTAMP)
+ lan966x_ptp_del_traps(port);
+
+ return err;
}
static const struct net_device_ops lan966x_port_netdev_ops = {
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
index f2e45da7ffd4..3491f1961835 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
@@ -88,6 +88,10 @@
#define SE_IDX_QUEUE 0 /* 0-79 : Queue scheduler elements */
#define SE_IDX_PORT 80 /* 80-89 : Port schedular elements */
+#define LAN966X_VCAP_CID_IS2_L0 VCAP_CID_INGRESS_STAGE2_L0 /* IS2 lookup 0 */
+#define LAN966X_VCAP_CID_IS2_L1 VCAP_CID_INGRESS_STAGE2_L1 /* IS2 lookup 1 */
+#define LAN966X_VCAP_CID_IS2_MAX (VCAP_CID_INGRESS_STAGE2_L2 - 1) /* IS2 Max */
+
/* MAC table entry types.
* ENTRYTYPE_NORMAL is subject to aging.
* ENTRYTYPE_LOCKED is not subject to aging.
@@ -116,6 +120,14 @@ enum lan966x_fdma_action {
FDMA_REDIRECT,
};
+/* Controls how PORT_MASK is applied */
+enum LAN966X_PORT_MASK_MODE {
+ LAN966X_PMM_NO_ACTION,
+ LAN966X_PMM_REPLACE,
+ LAN966X_PMM_FORWARDING,
+ LAN966X_PMM_REDIRECT,
+};
+
struct lan966x_port;
struct lan966x_db {
@@ -473,6 +485,8 @@ irqreturn_t lan966x_ptp_irq_handler(int irq, void *args);
irqreturn_t lan966x_ptp_ext_irq_handler(int irq, void *args);
u32 lan966x_ptp_get_period_ps(void);
int lan966x_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts);
+int lan966x_ptp_setup_traps(struct lan966x_port *port, struct ifreq *ifr);
+int lan966x_ptp_del_traps(struct lan966x_port *port);
int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev);
int lan966x_fdma_xmit_xdpf(struct lan966x_port *port,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c
index e5a2bbe064f8..300fe4005919 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c
@@ -3,6 +3,8 @@
#include <linux/ptp_classify.h>
#include "lan966x_main.h"
+#include "vcap_api.h"
+#include "vcap_api_client.h"
#define LAN966X_MAX_PTP_ID 512
@@ -18,6 +20,17 @@
#define TOD_ACC_PIN 0x7
+/* This represents the base rule ID for the PTP rules that are added in the
+ * VCAP to trap frames to CPU. This number needs to be bigger than the maximum
+ * number of entries that can exist in the VCAP.
+ */
+#define LAN966X_VCAP_PTP_RULE_ID 1000000
+#define LAN966X_VCAP_L2_PTP_TRAP (LAN966X_VCAP_PTP_RULE_ID + 0)
+#define LAN966X_VCAP_IPV4_EV_PTP_TRAP (LAN966X_VCAP_PTP_RULE_ID + 1)
+#define LAN966X_VCAP_IPV4_GEN_PTP_TRAP (LAN966X_VCAP_PTP_RULE_ID + 2)
+#define LAN966X_VCAP_IPV6_EV_PTP_TRAP (LAN966X_VCAP_PTP_RULE_ID + 3)
+#define LAN966X_VCAP_IPV6_GEN_PTP_TRAP (LAN966X_VCAP_PTP_RULE_ID + 4)
+
enum {
PTP_PIN_ACTION_IDLE = 0,
PTP_PIN_ACTION_LOAD,
@@ -35,19 +48,228 @@ static u64 lan966x_ptp_get_nominal_value(void)
return 0x304d4873ecade305;
}
+static int lan966x_ptp_add_trap(struct lan966x_port *port,
+ int (*add_ptp_key)(struct vcap_rule *vrule,
+ struct lan966x_port*),
+ u32 rule_id,
+ u16 proto)
+{
+ struct lan966x *lan966x = port->lan966x;
+ struct vcap_rule *vrule;
+ int err;
+
+ vrule = vcap_get_rule(lan966x->vcap_ctrl, rule_id);
+ if (vrule) {
+ u32 value, mask;
+
+ /* Just modify the ingress port mask and exit */
+ vcap_rule_get_key_u32(vrule, VCAP_KF_IF_IGR_PORT_MASK,
+ &value, &mask);
+ mask &= ~BIT(port->chip_port);
+ vcap_rule_mod_key_u32(vrule, VCAP_KF_IF_IGR_PORT_MASK,
+ value, mask);
+
+ err = vcap_mod_rule(vrule);
+ goto free_rule;
+ }
+
+ vrule = vcap_alloc_rule(lan966x->vcap_ctrl, port->dev,
+ LAN966X_VCAP_CID_IS2_L0,
+ VCAP_USER_PTP, 0, rule_id);
+ if (!vrule)
+ return -ENOMEM;
+ if (IS_ERR(vrule))
+ return PTR_ERR(vrule);
+
+ err = add_ptp_key(vrule, port);
+ if (err)
+ goto free_rule;
+
+ err = vcap_set_rule_set_actionset(vrule, VCAP_AFS_BASE_TYPE);
+ err |= vcap_rule_add_action_bit(vrule, VCAP_AF_CPU_COPY_ENA, VCAP_BIT_1);
+ err |= vcap_rule_add_action_u32(vrule, VCAP_AF_MASK_MODE, LAN966X_PMM_REPLACE);
+ err |= vcap_val_rule(vrule, proto);
+ if (err)
+ goto free_rule;
+
+ err = vcap_add_rule(vrule);
+
+free_rule:
+ /* Free the local copy of the rule */
+ vcap_free_rule(vrule);
+ return err;
+}
+
+static int lan966x_ptp_del_trap(struct lan966x_port *port,
+ u32 rule_id)
+{
+ struct lan966x *lan966x = port->lan966x;
+ struct vcap_rule *vrule;
+ u32 value, mask;
+ int err;
+
+ vrule = vcap_get_rule(lan966x->vcap_ctrl, rule_id);
+ if (!vrule)
+ return -EEXIST;
+
+ vcap_rule_get_key_u32(vrule, VCAP_KF_IF_IGR_PORT_MASK, &value, &mask);
+ mask |= BIT(port->chip_port);
+
+ /* No other port requires this trap, so it is safe to remove it */
+ if (mask == GENMASK(lan966x->num_phys_ports, 0)) {
+ err = vcap_del_rule(lan966x->vcap_ctrl, port->dev, rule_id);
+ goto free_rule;
+ }
+
+ vcap_rule_mod_key_u32(vrule, VCAP_KF_IF_IGR_PORT_MASK, value, mask);
+ err = vcap_mod_rule(vrule);
+
+free_rule:
+ vcap_free_rule(vrule);
+ return err;
+}
+
+static int lan966x_ptp_add_l2_key(struct vcap_rule *vrule,
+ struct lan966x_port *port)
+{
+ return vcap_rule_add_key_u32(vrule, VCAP_KF_ETYPE, ETH_P_1588, ~0);
+}
+
+static int lan966x_ptp_add_ip_event_key(struct vcap_rule *vrule,
+ struct lan966x_port *port)
+{
+ return vcap_rule_add_key_u32(vrule, VCAP_KF_L4_DPORT, PTP_EV_PORT, ~0) ||
+ vcap_rule_add_key_bit(vrule, VCAP_KF_TCP_IS, VCAP_BIT_0);
+}
+
+static int lan966x_ptp_add_ip_general_key(struct vcap_rule *vrule,
+ struct lan966x_port *port)
+{
+ return vcap_rule_add_key_u32(vrule, VCAP_KF_L4_DPORT, PTP_GEN_PORT, ~0) ||
+ vcap_rule_add_key_bit(vrule, VCAP_KF_TCP_IS, VCAP_BIT_0);
+}
+
+static int lan966x_ptp_add_l2_rule(struct lan966x_port *port)
+{
+ return lan966x_ptp_add_trap(port, lan966x_ptp_add_l2_key,
+ LAN966X_VCAP_L2_PTP_TRAP, ETH_P_ALL);
+}
+
+static int lan966x_ptp_add_ipv4_rules(struct lan966x_port *port)
+{
+ int err;
+
+ err = lan966x_ptp_add_trap(port, lan966x_ptp_add_ip_event_key,
+ LAN966X_VCAP_IPV4_EV_PTP_TRAP, ETH_P_IP);
+ if (err)
+ return err;
+
+ err = lan966x_ptp_add_trap(port, lan966x_ptp_add_ip_general_key,
+ LAN966X_VCAP_IPV4_GEN_PTP_TRAP, ETH_P_IP);
+ if (err)
+ lan966x_ptp_del_trap(port, LAN966X_VCAP_IPV4_EV_PTP_TRAP);
+
+ return err;
+}
+
+static int lan966x_ptp_add_ipv6_rules(struct lan966x_port *port)
+{
+ int err;
+
+ err = lan966x_ptp_add_trap(port, lan966x_ptp_add_ip_event_key,
+ LAN966X_VCAP_IPV6_EV_PTP_TRAP, ETH_P_IPV6);
+ if (err)
+ return err;
+
+ err = lan966x_ptp_add_trap(port, lan966x_ptp_add_ip_general_key,
+ LAN966X_VCAP_IPV6_GEN_PTP_TRAP, ETH_P_IPV6);
+ if (err)
+ lan966x_ptp_del_trap(port, LAN966X_VCAP_IPV6_EV_PTP_TRAP);
+
+ return err;
+}
+
+static int lan966x_ptp_del_l2_rule(struct lan966x_port *port)
+{
+ return lan966x_ptp_del_trap(port, LAN966X_VCAP_L2_PTP_TRAP);
+}
+
+static int lan966x_ptp_del_ipv4_rules(struct lan966x_port *port)
+{
+ int err;
+
+ err = lan966x_ptp_del_trap(port, LAN966X_VCAP_IPV4_EV_PTP_TRAP);
+ err |= lan966x_ptp_del_trap(port, LAN966X_VCAP_IPV4_GEN_PTP_TRAP);
+
+ return err;
+}
+
+static int lan966x_ptp_del_ipv6_rules(struct lan966x_port *port)
+{
+ int err;
+
+ err = lan966x_ptp_del_trap(port, LAN966X_VCAP_IPV6_EV_PTP_TRAP);
+ err |= lan966x_ptp_del_trap(port, LAN966X_VCAP_IPV6_GEN_PTP_TRAP);
+
+ return err;
+}
+
+static int lan966x_ptp_add_traps(struct lan966x_port *port)
+{
+ int err;
+
+ err = lan966x_ptp_add_l2_rule(port);
+ if (err)
+ goto err_l2;
+
+ err = lan966x_ptp_add_ipv4_rules(port);
+ if (err)
+ goto err_ipv4;
+
+ err = lan966x_ptp_add_ipv6_rules(port);
+ if (err)
+ goto err_ipv6;
+
+ return err;
+
+err_ipv6:
+ lan966x_ptp_del_ipv4_rules(port);
+err_ipv4:
+ lan966x_ptp_del_l2_rule(port);
+err_l2:
+ return err;
+}
+
+int lan966x_ptp_del_traps(struct lan966x_port *port)
+{
+ int err;
+
+ err = lan966x_ptp_del_l2_rule(port);
+ err |= lan966x_ptp_del_ipv4_rules(port);
+ err |= lan966x_ptp_del_ipv6_rules(port);
+
+ return err;
+}
+
+int lan966x_ptp_setup_traps(struct lan966x_port *port, struct ifreq *ifr)
+{
+ struct hwtstamp_config cfg;
+
+ if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+ return -EFAULT;
+
+ if (cfg.rx_filter == HWTSTAMP_FILTER_NONE)
+ return lan966x_ptp_del_traps(port);
+ else
+ return lan966x_ptp_add_traps(port);
+}
+
int lan966x_ptp_hwtstamp_set(struct lan966x_port *port, struct ifreq *ifr)
{
struct lan966x *lan966x = port->lan966x;
struct hwtstamp_config cfg;
struct lan966x_phc *phc;
- /* For now don't allow to run ptp on ports that are part of a bridge,
- * because in case of transparent clock the HW will still forward the
- * frames, so there would be duplicate frames
- */
- if (lan966x->bridge_mask & BIT(port->chip_port))
- return -EINVAL;
-
if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
return -EFAULT;
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c
index 04a2afd683cc..ba3fa917d6b7 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c
@@ -4,14 +4,6 @@
#include "vcap_api.h"
#include "vcap_api_client.h"
-/* Controls how PORT_MASK is applied */
-enum LAN966X_PORT_MASK_MODE {
- LAN966X_PMM_NO_ACTION,
- LAN966X_PMM_REPLACE,
- LAN966X_PMM_FORWARDING,
- LAN966X_PMM_REDIRECT,
-};
-
struct lan966x_tc_flower_parse_usage {
struct flow_cls_offload *f;
struct flow_rule *frule;
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c
index 44f40d914947..d8dc9fbb81e1 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c
@@ -5,10 +5,6 @@
#include "vcap_api.h"
#include "vcap_api_client.h"
-#define LAN966X_VCAP_CID_IS2_L0 VCAP_CID_INGRESS_STAGE2_L0 /* IS2 lookup 0 */
-#define LAN966X_VCAP_CID_IS2_L1 VCAP_CID_INGRESS_STAGE2_L1 /* IS2 lookup 1 */
-#define LAN966X_VCAP_CID_IS2_MAX (VCAP_CID_INGRESS_STAGE2_L2 - 1) /* IS2 Max */
-
#define STREAMSIZE (64 * 4)
#define LAN966X_IS2_LOOKUPS 2
@@ -219,9 +215,12 @@ static void lan966x_vcap_add_default_fields(struct net_device *dev,
struct vcap_rule *rule)
{
struct lan966x_port *port = netdev_priv(dev);
+ u32 value, mask;
- vcap_rule_add_key_u32(rule, VCAP_KF_IF_IGR_PORT_MASK, 0,
- ~BIT(port->chip_port));
+ if (vcap_rule_get_key_u32(rule, VCAP_KF_IF_IGR_PORT_MASK,
+ &value, &mask))
+ vcap_rule_add_key_u32(rule, VCAP_KF_IF_IGR_PORT_MASK, 0,
+ ~BIT(port->chip_port));
if (lan966x_vcap_is_first_chain(rule))
vcap_rule_add_key_bit(rule, VCAP_KF_LOOKUP_FIRST_IS,
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c
index 66360c8c5a38..141897dfe388 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c
@@ -317,7 +317,7 @@ int sparx5_fdma_xmit(struct sparx5 *sparx5, u32 *ifh, struct sk_buff *skb)
next_dcb_hw = sparx5_fdma_next_dcb(tx, tx->curr_entry);
db_hw = &next_dcb_hw->db[0];
if (!(db_hw->status & FDMA_DCB_STATUS_DONE))
- tx->dropped++;
+ return -EINVAL;
db = list_first_entry(&tx->db_list, struct sparx5_db, list);
list_move_tail(&db->list, &tx->db_list);
next_dcb_hw->nextptr = FDMA_DCB_INVALID_DATA;
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index f8382d3124e0..d25f4f09faa0 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -897,6 +897,8 @@ static int mchp_sparx5_probe(struct platform_device *pdev)
cleanup_ports:
sparx5_cleanup_ports(sparx5);
+ if (sparx5->mact_queue)
+ destroy_workqueue(sparx5->mact_queue);
cleanup_config:
kfree(configs);
cleanup_pnode:
@@ -923,6 +925,7 @@ static int mchp_sparx5_remove(struct platform_device *pdev)
sparx5_vcap_destroy(sparx5);
/* Unregister netdevs */
sparx5_unregister_notifier_blocks(sparx5);
+ destroy_workqueue(sparx5->mact_queue);
return 0;
}
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
index 83c16ca5b30f..6db6ac6a3bbc 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
@@ -234,9 +234,8 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev)
sparx5_set_port_ifh(ifh, port->portno);
if (sparx5->ptp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
- ret = sparx5_ptp_txtstamp_request(port, skb);
- if (ret)
- return ret;
+ if (sparx5_ptp_txtstamp_request(port, skb) < 0)
+ return NETDEV_TX_BUSY;
sparx5_set_port_ifh_rew_op(ifh, SPARX5_SKB_CB(skb)->rew_op);
sparx5_set_port_ifh_pdu_type(ifh, SPARX5_SKB_CB(skb)->pdu_type);
@@ -250,23 +249,31 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev)
else
ret = sparx5_inject(sparx5, ifh, skb, dev);
- if (ret == NETDEV_TX_OK) {
- stats->tx_bytes += skb->len;
- stats->tx_packets++;
+ if (ret == -EBUSY)
+ goto busy;
+ if (ret < 0)
+ goto drop;
- if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
- SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP)
- return ret;
+ stats->tx_bytes += skb->len;
+ stats->tx_packets++;
+ sparx5->tx.packets++;
- dev_kfree_skb_any(skb);
- } else {
- stats->tx_dropped++;
+ if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
+ SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP)
+ return NETDEV_TX_OK;
- if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
- SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP)
- sparx5_ptp_txtstamp_release(port, skb);
- }
- return ret;
+ dev_consume_skb_any(skb);
+ return NETDEV_TX_OK;
+drop:
+ stats->tx_dropped++;
+ sparx5->tx.dropped++;
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+busy:
+ if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
+ SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP)
+ sparx5_ptp_txtstamp_release(port, skb);
+ return NETDEV_TX_BUSY;
}
static enum hrtimer_restart sparx5_injection_timeout(struct hrtimer *tmr)
diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api.c b/drivers/net/ethernet/microchip/vcap/vcap_api.c
index f2435d7ab515..664aae3e2acd 100644
--- a/drivers/net/ethernet/microchip/vcap/vcap_api.c
+++ b/drivers/net/ethernet/microchip/vcap/vcap_api.c
@@ -169,6 +169,227 @@ static void vcap_encode_typegroups(u32 *stream, int sw_width,
}
}
+static bool vcap_bitarray_zero(int width, u8 *value)
+{
+ int bytes = DIV_ROUND_UP(width, BITS_PER_BYTE);
+ u8 total = 0, bmask = 0xff;
+ int rwidth = width;
+ int idx;
+
+ for (idx = 0; idx < bytes; ++idx, rwidth -= BITS_PER_BYTE) {
+ if (rwidth && rwidth < BITS_PER_BYTE)
+ bmask = (1 << rwidth) - 1;
+ total += value[idx] & bmask;
+ }
+ return total == 0;
+}
+
+static bool vcap_get_bit(u32 *stream, struct vcap_stream_iter *itr)
+{
+ u32 mask = BIT(itr->reg_bitpos);
+ u32 *p = &stream[itr->reg_idx];
+
+ return !!(*p & mask);
+}
+
+static void vcap_decode_field(u32 *stream, struct vcap_stream_iter *itr,
+ int width, u8 *value)
+{
+ int idx;
+
+ /* Loop over the field value bits and get the field bits and
+ * set them in the output value byte array
+ */
+ for (idx = 0; idx < width; idx++) {
+ u8 bidx = idx & 0x7;
+
+ /* Decode one field value bit */
+ if (vcap_get_bit(stream, itr))
+ *value |= 1 << bidx;
+ vcap_iter_next(itr);
+ if (bidx == 7)
+ value++;
+ }
+}
+
+/* Verify that the type id in the stream matches the type id of the keyset */
+static bool vcap_verify_keystream_keyset(struct vcap_control *vctrl,
+ enum vcap_type vt,
+ u32 *keystream,
+ u32 *mskstream,
+ enum vcap_keyfield_set keyset)
+{
+ const struct vcap_info *vcap = &vctrl->vcaps[vt];
+ const struct vcap_field *typefld;
+ const struct vcap_typegroup *tgt;
+ const struct vcap_field *fields;
+ struct vcap_stream_iter iter;
+ const struct vcap_set *info;
+ u32 value = 0;
+ u32 mask = 0;
+
+ if (vcap_keyfield_count(vctrl, vt, keyset) == 0)
+ return false;
+
+ info = vcap_keyfieldset(vctrl, vt, keyset);
+ /* Check that the keyset is valid */
+ if (!info)
+ return false;
+
+ /* a type_id of value -1 means that there is no type field */
+ if (info->type_id == (u8)-1)
+ return true;
+
+ /* Get a valid typegroup for the specific keyset */
+ tgt = vcap_keyfield_typegroup(vctrl, vt, keyset);
+ if (!tgt)
+ return false;
+
+ fields = vcap_keyfields(vctrl, vt, keyset);
+ if (!fields)
+ return false;
+
+ typefld = &fields[VCAP_KF_TYPE];
+ vcap_iter_init(&iter, vcap->sw_width, tgt, typefld->offset);
+ vcap_decode_field(mskstream, &iter, typefld->width, (u8 *)&mask);
+ /* no type info if there are no mask bits */
+ if (vcap_bitarray_zero(typefld->width, (u8 *)&mask))
+ return false;
+
+ /* Get the value of the type field in the stream and compare to the
+ * one define in the vcap keyset
+ */
+ vcap_iter_init(&iter, vcap->sw_width, tgt, typefld->offset);
+ vcap_decode_field(keystream, &iter, typefld->width, (u8 *)&value);
+
+ return (value & mask) == (info->type_id & mask);
+}
+
+/* Verify that the typegroup bits have the correct values */
+static int vcap_verify_typegroups(u32 *stream, int sw_width,
+ const struct vcap_typegroup *tgt, bool mask,
+ int sw_max)
+{
+ struct vcap_stream_iter iter;
+ int sw_cnt, idx;
+
+ vcap_iter_set(&iter, sw_width, tgt, 0);
+ sw_cnt = 0;
+ while (iter.tg->width) {
+ u32 value = 0;
+ u32 tg_value = iter.tg->value;
+
+ if (mask)
+ tg_value = (1 << iter.tg->width) - 1;
+ /* Set position to current typegroup bit */
+ iter.offset = iter.tg->offset;
+ vcap_iter_update(&iter);
+ for (idx = 0; idx < iter.tg->width; idx++) {
+ /* Decode one typegroup bit */
+ if (vcap_get_bit(stream, &iter))
+ value |= 1 << idx;
+ iter.offset++;
+ vcap_iter_update(&iter);
+ }
+ if (value != tg_value)
+ return -EINVAL;
+ iter.tg++; /* next typegroup */
+ sw_cnt++;
+ /* Stop checking more typegroups */
+ if (sw_max && sw_cnt >= sw_max)
+ break;
+ }
+ return 0;
+}
+
+/* Find the subword width of the key typegroup that matches the stream data */
+static int vcap_find_keystream_typegroup_sw(struct vcap_control *vctrl,
+ enum vcap_type vt, u32 *stream,
+ bool mask, int sw_max)
+{
+ const struct vcap_typegroup **tgt;
+ int sw_idx, res;
+
+ tgt = vctrl->vcaps[vt].keyfield_set_typegroups;
+ /* Try the longest subword match first */
+ for (sw_idx = vctrl->vcaps[vt].sw_count; sw_idx >= 0; sw_idx--) {
+ if (!tgt[sw_idx])
+ continue;
+
+ res = vcap_verify_typegroups(stream, vctrl->vcaps[vt].sw_width,
+ tgt[sw_idx], mask, sw_max);
+ if (res == 0)
+ return sw_idx;
+ }
+ return -EINVAL;
+}
+
+/* Verify that the typegroup information, subword count, keyset and type id
+ * are in sync and correct, return the list of matchin keysets
+ */
+int
+vcap_find_keystream_keysets(struct vcap_control *vctrl,
+ enum vcap_type vt,
+ u32 *keystream,
+ u32 *mskstream,
+ bool mask, int sw_max,
+ struct vcap_keyset_list *kslist)
+{
+ const struct vcap_set *keyfield_set;
+ int sw_count, idx;
+
+ sw_count = vcap_find_keystream_typegroup_sw(vctrl, vt, keystream, mask,
+ sw_max);
+ if (sw_count < 0)
+ return sw_count;
+
+ keyfield_set = vctrl->vcaps[vt].keyfield_set;
+ for (idx = 0; idx < vctrl->vcaps[vt].keyfield_set_size; ++idx) {
+ if (keyfield_set[idx].sw_per_item != sw_count)
+ continue;
+
+ if (vcap_verify_keystream_keyset(vctrl, vt, keystream,
+ mskstream, idx))
+ vcap_keyset_list_add(kslist, idx);
+ }
+ if (kslist->cnt > 0)
+ return 0;
+ return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(vcap_find_keystream_keysets);
+
+/* Read key data from a VCAP address and discover if there are any rule keysets
+ * here
+ */
+int vcap_addr_keysets(struct vcap_control *vctrl,
+ struct net_device *ndev,
+ struct vcap_admin *admin,
+ int addr,
+ struct vcap_keyset_list *kslist)
+{
+ enum vcap_type vt = admin->vtype;
+ int keyset_sw_regs, idx;
+ u32 key = 0, mask = 0;
+
+ /* Read the cache at the specified address */
+ keyset_sw_regs = DIV_ROUND_UP(vctrl->vcaps[vt].sw_width, 32);
+ vctrl->ops->update(ndev, admin, VCAP_CMD_READ, VCAP_SEL_ALL, addr);
+ vctrl->ops->cache_read(ndev, admin, VCAP_SEL_ENTRY, 0,
+ keyset_sw_regs);
+ /* Skip uninitialized key/mask entries */
+ for (idx = 0; idx < keyset_sw_regs; ++idx) {
+ key |= ~admin->cache.keystream[idx];
+ mask |= admin->cache.maskstream[idx];
+ }
+ if (key == 0 && mask == 0)
+ return -EINVAL;
+ /* Decode and locate the keysets */
+ return vcap_find_keystream_keysets(vctrl, vt, admin->cache.keystream,
+ admin->cache.maskstream, false, 0,
+ kslist);
+}
+EXPORT_SYMBOL_GPL(vcap_addr_keysets);
+
/* Return the list of keyfields for the keyset */
const struct vcap_field *vcap_keyfields(struct vcap_control *vctrl,
enum vcap_type vt,
@@ -618,6 +839,517 @@ struct vcap_rule_internal *vcap_dup_rule(struct vcap_rule_internal *ri)
return duprule;
}
+static void vcap_apply_width(u8 *dst, int width, int bytes)
+{
+ u8 bmask;
+ int idx;
+
+ for (idx = 0; idx < bytes; idx++) {
+ if (width > 0)
+ if (width < 8)
+ bmask = (1 << width) - 1;
+ else
+ bmask = ~0;
+ else
+ bmask = 0;
+ dst[idx] &= bmask;
+ width -= 8;
+ }
+}
+
+static void vcap_copy_from_w32be(u8 *dst, u8 *src, int size, int width)
+{
+ int idx, ridx, wstart, nidx;
+ int tail_bytes = (((size + 4) >> 2) << 2) - size;
+
+ for (idx = 0, ridx = size - 1; idx < size; ++idx, --ridx) {
+ wstart = (idx >> 2) << 2;
+ nidx = wstart + 3 - (idx & 0x3);
+ if (nidx >= size)
+ nidx -= tail_bytes;
+ dst[nidx] = src[ridx];
+ }
+
+ vcap_apply_width(dst, width, size);
+}
+
+static void vcap_copy_action_bit_field(struct vcap_u1_action *field, u8 *value)
+{
+ field->value = (*value) & 0x1;
+}
+
+static void vcap_copy_limited_actionfield(u8 *dstvalue, u8 *srcvalue,
+ int width, int bytes)
+{
+ memcpy(dstvalue, srcvalue, bytes);
+ vcap_apply_width(dstvalue, width, bytes);
+}
+
+static void vcap_copy_to_client_actionfield(struct vcap_rule_internal *ri,
+ struct vcap_client_actionfield *field,
+ u8 *value, u16 width)
+{
+ int field_size = actionfield_size_table[field->ctrl.type];
+
+ if (ri->admin->w32be) {
+ switch (field->ctrl.type) {
+ case VCAP_FIELD_BIT:
+ vcap_copy_action_bit_field(&field->data.u1, value);
+ break;
+ case VCAP_FIELD_U32:
+ vcap_copy_limited_actionfield((u8 *)&field->data.u32.value,
+ value,
+ width, field_size);
+ break;
+ case VCAP_FIELD_U48:
+ vcap_copy_from_w32be(field->data.u48.value, value,
+ field_size, width);
+ break;
+ case VCAP_FIELD_U56:
+ vcap_copy_from_w32be(field->data.u56.value, value,
+ field_size, width);
+ break;
+ case VCAP_FIELD_U64:
+ vcap_copy_from_w32be(field->data.u64.value, value,
+ field_size, width);
+ break;
+ case VCAP_FIELD_U72:
+ vcap_copy_from_w32be(field->data.u72.value, value,
+ field_size, width);
+ break;
+ case VCAP_FIELD_U112:
+ vcap_copy_from_w32be(field->data.u112.value, value,
+ field_size, width);
+ break;
+ case VCAP_FIELD_U128:
+ vcap_copy_from_w32be(field->data.u128.value, value,
+ field_size, width);
+ break;
+ };
+ } else {
+ switch (field->ctrl.type) {
+ case VCAP_FIELD_BIT:
+ vcap_copy_action_bit_field(&field->data.u1, value);
+ break;
+ case VCAP_FIELD_U32:
+ vcap_copy_limited_actionfield((u8 *)&field->data.u32.value,
+ value,
+ width, field_size);
+ break;
+ case VCAP_FIELD_U48:
+ vcap_copy_limited_actionfield(field->data.u48.value,
+ value,
+ width, field_size);
+ break;
+ case VCAP_FIELD_U56:
+ vcap_copy_limited_actionfield(field->data.u56.value,
+ value,
+ width, field_size);
+ break;
+ case VCAP_FIELD_U64:
+ vcap_copy_limited_actionfield(field->data.u64.value,
+ value,
+ width, field_size);
+ break;
+ case VCAP_FIELD_U72:
+ vcap_copy_limited_actionfield(field->data.u72.value,
+ value,
+ width, field_size);
+ break;
+ case VCAP_FIELD_U112:
+ vcap_copy_limited_actionfield(field->data.u112.value,
+ value,
+ width, field_size);
+ break;
+ case VCAP_FIELD_U128:
+ vcap_copy_limited_actionfield(field->data.u128.value,
+ value,
+ width, field_size);
+ break;
+ };
+ }
+}
+
+static void vcap_copy_key_bit_field(struct vcap_u1_key *field,
+ u8 *value, u8 *mask)
+{
+ field->value = (*value) & 0x1;
+ field->mask = (*mask) & 0x1;
+}
+
+static void vcap_copy_limited_keyfield(u8 *dstvalue, u8 *dstmask,
+ u8 *srcvalue, u8 *srcmask,
+ int width, int bytes)
+{
+ memcpy(dstvalue, srcvalue, bytes);
+ vcap_apply_width(dstvalue, width, bytes);
+ memcpy(dstmask, srcmask, bytes);
+ vcap_apply_width(dstmask, width, bytes);
+}
+
+static void vcap_copy_to_client_keyfield(struct vcap_rule_internal *ri,
+ struct vcap_client_keyfield *field,
+ u8 *value, u8 *mask, u16 width)
+{
+ int field_size = keyfield_size_table[field->ctrl.type] / 2;
+
+ if (ri->admin->w32be) {
+ switch (field->ctrl.type) {
+ case VCAP_FIELD_BIT:
+ vcap_copy_key_bit_field(&field->data.u1, value, mask);
+ break;
+ case VCAP_FIELD_U32:
+ vcap_copy_limited_keyfield((u8 *)&field->data.u32.value,
+ (u8 *)&field->data.u32.mask,
+ value, mask,
+ width, field_size);
+ break;
+ case VCAP_FIELD_U48:
+ vcap_copy_from_w32be(field->data.u48.value, value,
+ field_size, width);
+ vcap_copy_from_w32be(field->data.u48.mask, mask,
+ field_size, width);
+ break;
+ case VCAP_FIELD_U56:
+ vcap_copy_from_w32be(field->data.u56.value, value,
+ field_size, width);
+ vcap_copy_from_w32be(field->data.u56.mask, mask,
+ field_size, width);
+ break;
+ case VCAP_FIELD_U64:
+ vcap_copy_from_w32be(field->data.u64.value, value,
+ field_size, width);
+ vcap_copy_from_w32be(field->data.u64.mask, mask,
+ field_size, width);
+ break;
+ case VCAP_FIELD_U72:
+ vcap_copy_from_w32be(field->data.u72.value, value,
+ field_size, width);
+ vcap_copy_from_w32be(field->data.u72.mask, mask,
+ field_size, width);
+ break;
+ case VCAP_FIELD_U112:
+ vcap_copy_from_w32be(field->data.u112.value, value,
+ field_size, width);
+ vcap_copy_from_w32be(field->data.u112.mask, mask,
+ field_size, width);
+ break;
+ case VCAP_FIELD_U128:
+ vcap_copy_from_w32be(field->data.u128.value, value,
+ field_size, width);
+ vcap_copy_from_w32be(field->data.u128.mask, mask,
+ field_size, width);
+ break;
+ };
+ } else {
+ switch (field->ctrl.type) {
+ case VCAP_FIELD_BIT:
+ vcap_copy_key_bit_field(&field->data.u1, value, mask);
+ break;
+ case VCAP_FIELD_U32:
+ vcap_copy_limited_keyfield((u8 *)&field->data.u32.value,
+ (u8 *)&field->data.u32.mask,
+ value, mask,
+ width, field_size);
+ break;
+ case VCAP_FIELD_U48:
+ vcap_copy_limited_keyfield(field->data.u48.value,
+ field->data.u48.mask,
+ value, mask,
+ width, field_size);
+ break;
+ case VCAP_FIELD_U56:
+ vcap_copy_limited_keyfield(field->data.u56.value,
+ field->data.u56.mask,
+ value, mask,
+ width, field_size);
+ break;
+ case VCAP_FIELD_U64:
+ vcap_copy_limited_keyfield(field->data.u64.value,
+ field->data.u64.mask,
+ value, mask,
+ width, field_size);
+ break;
+ case VCAP_FIELD_U72:
+ vcap_copy_limited_keyfield(field->data.u72.value,
+ field->data.u72.mask,
+ value, mask,
+ width, field_size);
+ break;
+ case VCAP_FIELD_U112:
+ vcap_copy_limited_keyfield(field->data.u112.value,
+ field->data.u112.mask,
+ value, mask,
+ width, field_size);
+ break;
+ case VCAP_FIELD_U128:
+ vcap_copy_limited_keyfield(field->data.u128.value,
+ field->data.u128.mask,
+ value, mask,
+ width, field_size);
+ break;
+ };
+ }
+}
+
+static void vcap_rule_alloc_keyfield(struct vcap_rule_internal *ri,
+ const struct vcap_field *keyfield,
+ enum vcap_key_field key,
+ u8 *value, u8 *mask)
+{
+ struct vcap_client_keyfield *field;
+
+ field = kzalloc(sizeof(*field), GFP_KERNEL);
+ if (!field)
+ return;
+ INIT_LIST_HEAD(&field->ctrl.list);
+ field->ctrl.key = key;
+ field->ctrl.type = keyfield->type;
+ vcap_copy_to_client_keyfield(ri, field, value, mask, keyfield->width);
+ list_add_tail(&field->ctrl.list, &ri->data.keyfields);
+}
+
+/* Read key data from a VCAP address and discover if there is a rule keyset
+ * here
+ */
+static bool
+vcap_verify_actionstream_actionset(struct vcap_control *vctrl,
+ enum vcap_type vt,
+ u32 *actionstream,
+ enum vcap_actionfield_set actionset)
+{
+ const struct vcap_typegroup *tgt;
+ const struct vcap_field *fields;
+ const struct vcap_set *info;
+
+ if (vcap_actionfield_count(vctrl, vt, actionset) == 0)
+ return false;
+
+ info = vcap_actionfieldset(vctrl, vt, actionset);
+ /* Check that the actionset is valid */
+ if (!info)
+ return false;
+
+ /* a type_id of value -1 means that there is no type field */
+ if (info->type_id == (u8)-1)
+ return true;
+
+ /* Get a valid typegroup for the specific actionset */
+ tgt = vcap_actionfield_typegroup(vctrl, vt, actionset);
+ if (!tgt)
+ return false;
+
+ fields = vcap_actionfields(vctrl, vt, actionset);
+ if (!fields)
+ return false;
+
+ /* Later this will be expanded with a check of the type id */
+ return true;
+}
+
+/* Find the subword width of the action typegroup that matches the stream data
+ */
+static int vcap_find_actionstream_typegroup_sw(struct vcap_control *vctrl,
+ enum vcap_type vt, u32 *stream,
+ int sw_max)
+{
+ const struct vcap_typegroup **tgt;
+ int sw_idx, res;
+
+ tgt = vctrl->vcaps[vt].actionfield_set_typegroups;
+ /* Try the longest subword match first */
+ for (sw_idx = vctrl->vcaps[vt].sw_count; sw_idx >= 0; sw_idx--) {
+ if (!tgt[sw_idx])
+ continue;
+ res = vcap_verify_typegroups(stream, vctrl->vcaps[vt].act_width,
+ tgt[sw_idx], false, sw_max);
+ if (res == 0)
+ return sw_idx;
+ }
+ return -EINVAL;
+}
+
+/* Verify that the typegroup information, subword count, actionset and type id
+ * are in sync and correct, return the actionset
+ */
+static enum vcap_actionfield_set
+vcap_find_actionstream_actionset(struct vcap_control *vctrl,
+ enum vcap_type vt,
+ u32 *stream,
+ int sw_max)
+{
+ const struct vcap_set *actionfield_set;
+ int sw_count, idx;
+ bool res;
+
+ sw_count = vcap_find_actionstream_typegroup_sw(vctrl, vt, stream,
+ sw_max);
+ if (sw_count < 0)
+ return sw_count;
+
+ actionfield_set = vctrl->vcaps[vt].actionfield_set;
+ for (idx = 0; idx < vctrl->vcaps[vt].actionfield_set_size; ++idx) {
+ if (actionfield_set[idx].sw_per_item != sw_count)
+ continue;
+
+ res = vcap_verify_actionstream_actionset(vctrl, vt,
+ stream, idx);
+ if (res)
+ return idx;
+ }
+ return -EINVAL;
+}
+
+/* Store action value in an element in a list for the client */
+static void vcap_rule_alloc_actionfield(struct vcap_rule_internal *ri,
+ const struct vcap_field *actionfield,
+ enum vcap_action_field action,
+ u8 *value)
+{
+ struct vcap_client_actionfield *field;
+
+ field = kzalloc(sizeof(*field), GFP_KERNEL);
+ if (!field)
+ return;
+ INIT_LIST_HEAD(&field->ctrl.list);
+ field->ctrl.action = action;
+ field->ctrl.type = actionfield->type;
+ vcap_copy_to_client_actionfield(ri, field, value, actionfield->width);
+ list_add_tail(&field->ctrl.list, &ri->data.actionfields);
+}
+
+static int vcap_decode_actionset(struct vcap_rule_internal *ri)
+{
+ struct vcap_control *vctrl = ri->vctrl;
+ struct vcap_admin *admin = ri->admin;
+ const struct vcap_field *actionfield;
+ enum vcap_actionfield_set actionset;
+ enum vcap_type vt = admin->vtype;
+ const struct vcap_typegroup *tgt;
+ struct vcap_stream_iter iter;
+ int idx, res, actfield_count;
+ u32 *actstream;
+ u8 value[16];
+
+ actstream = admin->cache.actionstream;
+ res = vcap_find_actionstream_actionset(vctrl, vt, actstream, 0);
+ if (res < 0) {
+ pr_err("%s:%d: could not find valid actionset: %d\n",
+ __func__, __LINE__, res);
+ return -EINVAL;
+ }
+ actionset = res;
+ actfield_count = vcap_actionfield_count(vctrl, vt, actionset);
+ actionfield = vcap_actionfields(vctrl, vt, actionset);
+ tgt = vcap_actionfield_typegroup(vctrl, vt, actionset);
+ /* Start decoding the stream */
+ for (idx = 0; idx < actfield_count; ++idx) {
+ if (actionfield[idx].width <= 0)
+ continue;
+ /* Get the action */
+ memset(value, 0, DIV_ROUND_UP(actionfield[idx].width, 8));
+ vcap_iter_init(&iter, vctrl->vcaps[vt].act_width, tgt,
+ actionfield[idx].offset);
+ vcap_decode_field(actstream, &iter, actionfield[idx].width,
+ value);
+ /* Skip if no bits are set */
+ if (vcap_bitarray_zero(actionfield[idx].width, value))
+ continue;
+ vcap_rule_alloc_actionfield(ri, &actionfield[idx], idx, value);
+ /* Later the action id will also be checked */
+ }
+ return vcap_set_rule_set_actionset((struct vcap_rule *)ri, actionset);
+}
+
+static int vcap_decode_keyset(struct vcap_rule_internal *ri)
+{
+ struct vcap_control *vctrl = ri->vctrl;
+ struct vcap_stream_iter kiter, miter;
+ struct vcap_admin *admin = ri->admin;
+ enum vcap_keyfield_set keysets[10];
+ const struct vcap_field *keyfield;
+ enum vcap_type vt = admin->vtype;
+ const struct vcap_typegroup *tgt;
+ struct vcap_keyset_list matches;
+ enum vcap_keyfield_set keyset;
+ int idx, res, keyfield_count;
+ u32 *maskstream;
+ u32 *keystream;
+ u8 value[16];
+ u8 mask[16];
+
+ keystream = admin->cache.keystream;
+ maskstream = admin->cache.maskstream;
+ matches.keysets = keysets;
+ matches.cnt = 0;
+ matches.max = ARRAY_SIZE(keysets);
+ res = vcap_find_keystream_keysets(vctrl, vt, keystream, maskstream,
+ false, 0, &matches);
+ if (res < 0) {
+ pr_err("%s:%d: could not find valid keysets: %d\n",
+ __func__, __LINE__, res);
+ return -EINVAL;
+ }
+ keyset = matches.keysets[0];
+ keyfield_count = vcap_keyfield_count(vctrl, vt, keyset);
+ keyfield = vcap_keyfields(vctrl, vt, keyset);
+ tgt = vcap_keyfield_typegroup(vctrl, vt, keyset);
+ /* Start decoding the streams */
+ for (idx = 0; idx < keyfield_count; ++idx) {
+ if (keyfield[idx].width <= 0)
+ continue;
+ /* First get the mask */
+ memset(mask, 0, DIV_ROUND_UP(keyfield[idx].width, 8));
+ vcap_iter_init(&miter, vctrl->vcaps[vt].sw_width, tgt,
+ keyfield[idx].offset);
+ vcap_decode_field(maskstream, &miter, keyfield[idx].width,
+ mask);
+ /* Skip if no mask bits are set */
+ if (vcap_bitarray_zero(keyfield[idx].width, mask))
+ continue;
+ /* Get the key */
+ memset(value, 0, DIV_ROUND_UP(keyfield[idx].width, 8));
+ vcap_iter_init(&kiter, vctrl->vcaps[vt].sw_width, tgt,
+ keyfield[idx].offset);
+ vcap_decode_field(keystream, &kiter, keyfield[idx].width,
+ value);
+ vcap_rule_alloc_keyfield(ri, &keyfield[idx], idx, value, mask);
+ }
+ return vcap_set_rule_set_keyset((struct vcap_rule *)ri, keyset);
+}
+
+/* Read VCAP content into the VCAP cache */
+static int vcap_read_rule(struct vcap_rule_internal *ri)
+{
+ struct vcap_admin *admin = ri->admin;
+ int sw_idx, ent_idx = 0, act_idx = 0;
+ u32 addr = ri->addr;
+
+ if (!ri->size || !ri->keyset_sw_regs || !ri->actionset_sw_regs) {
+ pr_err("%s:%d: rule is empty\n", __func__, __LINE__);
+ return -EINVAL;
+ }
+ vcap_erase_cache(ri);
+ /* Use the values in the streams to read the VCAP cache */
+ for (sw_idx = 0; sw_idx < ri->size; sw_idx++, addr++) {
+ ri->vctrl->ops->update(ri->ndev, admin, VCAP_CMD_READ,
+ VCAP_SEL_ALL, addr);
+ ri->vctrl->ops->cache_read(ri->ndev, admin,
+ VCAP_SEL_ENTRY, ent_idx,
+ ri->keyset_sw_regs);
+ ri->vctrl->ops->cache_read(ri->ndev, admin,
+ VCAP_SEL_ACTION, act_idx,
+ ri->actionset_sw_regs);
+ if (sw_idx == 0)
+ ri->vctrl->ops->cache_read(ri->ndev, admin,
+ VCAP_SEL_COUNTER,
+ ri->counter_id, 0);
+ ent_idx += ri->keyset_sw_regs;
+ act_idx += ri->actionset_sw_regs;
+ }
+ return 0;
+}
+
/* Write VCAP cache content to the VCAP HW instance */
static int vcap_write_rule(struct vcap_rule_internal *ri)
{
@@ -1183,6 +1915,82 @@ void vcap_free_rule(struct vcap_rule *rule)
}
EXPORT_SYMBOL_GPL(vcap_free_rule);
+struct vcap_rule *vcap_get_rule(struct vcap_control *vctrl, u32 id)
+{
+ struct vcap_rule_internal *elem;
+ struct vcap_rule_internal *ri;
+ int err;
+
+ ri = NULL;
+
+ err = vcap_api_check(vctrl);
+ if (err)
+ return ERR_PTR(err);
+ elem = vcap_lookup_rule(vctrl, id);
+ if (!elem)
+ return NULL;
+ mutex_lock(&elem->admin->lock);
+ ri = vcap_dup_rule(elem);
+ if (IS_ERR(ri))
+ goto unlock;
+ err = vcap_read_rule(ri);
+ if (err) {
+ ri = ERR_PTR(err);
+ goto unlock;
+ }
+ err = vcap_decode_keyset(ri);
+ if (err) {
+ ri = ERR_PTR(err);
+ goto unlock;
+ }
+ err = vcap_decode_actionset(ri);
+ if (err) {
+ ri = ERR_PTR(err);
+ goto unlock;
+ }
+
+unlock:
+ mutex_unlock(&elem->admin->lock);
+ return (struct vcap_rule *)ri;
+}
+EXPORT_SYMBOL_GPL(vcap_get_rule);
+
+/* Update existing rule */
+int vcap_mod_rule(struct vcap_rule *rule)
+{
+ struct vcap_rule_internal *ri = to_intrule(rule);
+ struct vcap_counter ctr;
+ int err;
+
+ err = vcap_api_check(ri->vctrl);
+ if (err)
+ return err;
+
+ if (!vcap_lookup_rule(ri->vctrl, ri->data.id))
+ return -ENOENT;
+
+ mutex_lock(&ri->admin->lock);
+ /* Encode the bitstreams to the VCAP cache */
+ vcap_erase_cache(ri);
+ err = vcap_encode_rule(ri);
+ if (err)
+ goto out;
+
+ err = vcap_write_rule(ri);
+ if (err)
+ goto out;
+
+ memset(&ctr, 0, sizeof(ctr));
+ err = vcap_write_counter(ri, &ctr);
+ if (err)
+ goto out;
+
+out:
+ mutex_unlock(&ri->admin->lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(vcap_mod_rule);
+
/* Return the alignment offset for a new rule address */
static int vcap_valid_rule_move(struct vcap_rule_internal *el, int offset)
{
@@ -1389,7 +2197,7 @@ static void vcap_copy_from_client_keyfield(struct vcap_rule *rule,
vcap_copy_to_w32be(field->data.u128.value, data->u128.value, size);
vcap_copy_to_w32be(field->data.u128.mask, data->u128.mask, size);
break;
- };
+ }
}
/* Check if the keyfield is already in the rule */
@@ -1530,6 +2338,22 @@ int vcap_rule_add_key_u128(struct vcap_rule *rule, enum vcap_key_field key,
}
EXPORT_SYMBOL_GPL(vcap_rule_add_key_u128);
+int vcap_rule_get_key_u32(struct vcap_rule *rule, enum vcap_key_field key,
+ u32 *value, u32 *mask)
+{
+ struct vcap_client_keyfield *ckf;
+
+ ckf = vcap_find_keyfield(rule, key);
+ if (!ckf)
+ return -ENOENT;
+
+ *value = ckf->data.u32.value;
+ *mask = ckf->data.u32.mask;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vcap_rule_get_key_u32);
+
/* Find a client action field in a rule */
static struct vcap_client_actionfield *
vcap_find_actionfield(struct vcap_rule *rule, enum vcap_action_field act)
@@ -1579,7 +2403,7 @@ static void vcap_copy_from_client_actionfield(struct vcap_rule *rule,
case VCAP_FIELD_U128:
vcap_copy_to_w32be(field->data.u128.value, data->u128.value, size);
break;
- };
+ }
}
/* Check if the actionfield is already in the rule */
diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_client.h b/drivers/net/ethernet/microchip/vcap/vcap_api_client.h
index 93a0fcb12a81..0319866f9c94 100644
--- a/drivers/net/ethernet/microchip/vcap/vcap_api_client.h
+++ b/drivers/net/ethernet/microchip/vcap/vcap_api_client.h
@@ -170,6 +170,10 @@ int vcap_add_rule(struct vcap_rule *rule);
int vcap_del_rule(struct vcap_control *vctrl, struct net_device *ndev, u32 id);
/* Make a full copy of an existing rule with a new rule id */
struct vcap_rule *vcap_copy_rule(struct vcap_rule *rule);
+/* Get rule from a VCAP instance */
+struct vcap_rule *vcap_get_rule(struct vcap_control *vctrl, u32 id);
+/* Update existing rule */
+int vcap_mod_rule(struct vcap_rule *rule);
/* Update the keyset for the rule */
int vcap_set_rule_set_keyset(struct vcap_rule *rule,
@@ -254,4 +258,8 @@ int vcap_rule_mod_action_u32(struct vcap_rule *rule,
enum vcap_action_field action,
u32 value);
+/* Get a 32 bit key field value and mask from the rule */
+int vcap_rule_get_key_u32(struct vcap_rule *rule, enum vcap_key_field key,
+ u32 *value, u32 *mask);
+
#endif /* __VCAP_API_CLIENT__ */
diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.c b/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.c
index 3b8d165dc832..895bfff550d2 100644
--- a/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.c
+++ b/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.c
@@ -18,355 +18,15 @@ struct vcap_port_debugfs_info {
struct net_device *ndev;
};
-static bool vcap_bitarray_zero(int width, u8 *value)
-{
- int bytes = DIV_ROUND_UP(width, BITS_PER_BYTE);
- u8 total = 0, bmask = 0xff;
- int rwidth = width;
- int idx;
-
- for (idx = 0; idx < bytes; ++idx, rwidth -= BITS_PER_BYTE) {
- if (rwidth && rwidth < BITS_PER_BYTE)
- bmask = (1 << rwidth) - 1;
- total += value[idx] & bmask;
- }
- return total == 0;
-}
-
-static bool vcap_get_bit(u32 *stream, struct vcap_stream_iter *itr)
-{
- u32 mask = BIT(itr->reg_bitpos);
- u32 *p = &stream[itr->reg_idx];
-
- return !!(*p & mask);
-}
-
-static void vcap_decode_field(u32 *stream, struct vcap_stream_iter *itr,
- int width, u8 *value)
-{
- int idx;
-
- /* Loop over the field value bits and get the field bits and
- * set them in the output value byte array
- */
- for (idx = 0; idx < width; idx++) {
- u8 bidx = idx & 0x7;
-
- /* Decode one field value bit */
- if (vcap_get_bit(stream, itr))
- *value |= 1 << bidx;
- vcap_iter_next(itr);
- if (bidx == 7)
- value++;
- }
-}
-
-/* Verify that the typegroup bits have the correct values */
-static int vcap_verify_typegroups(u32 *stream, int sw_width,
- const struct vcap_typegroup *tgt, bool mask,
- int sw_max)
-{
- struct vcap_stream_iter iter;
- int sw_cnt, idx;
-
- vcap_iter_set(&iter, sw_width, tgt, 0);
- sw_cnt = 0;
- while (iter.tg->width) {
- u32 value = 0;
- u32 tg_value = iter.tg->value;
-
- if (mask)
- tg_value = (1 << iter.tg->width) - 1;
- /* Set position to current typegroup bit */
- iter.offset = iter.tg->offset;
- vcap_iter_update(&iter);
- for (idx = 0; idx < iter.tg->width; idx++) {
- /* Decode one typegroup bit */
- if (vcap_get_bit(stream, &iter))
- value |= 1 << idx;
- iter.offset++;
- vcap_iter_update(&iter);
- }
- if (value != tg_value)
- return -EINVAL;
- iter.tg++; /* next typegroup */
- sw_cnt++;
- /* Stop checking more typegroups */
- if (sw_max && sw_cnt >= sw_max)
- break;
- }
- return 0;
-}
-
-/* Find the subword width of the key typegroup that matches the stream data */
-static int vcap_find_keystream_typegroup_sw(struct vcap_control *vctrl,
- enum vcap_type vt, u32 *stream,
- bool mask, int sw_max)
-{
- const struct vcap_typegroup **tgt;
- int sw_idx, res;
-
- tgt = vctrl->vcaps[vt].keyfield_set_typegroups;
- /* Try the longest subword match first */
- for (sw_idx = vctrl->vcaps[vt].sw_count; sw_idx >= 0; sw_idx--) {
- if (!tgt[sw_idx])
- continue;
-
- res = vcap_verify_typegroups(stream, vctrl->vcaps[vt].sw_width,
- tgt[sw_idx], mask, sw_max);
- if (res == 0)
- return sw_idx;
- }
- return -EINVAL;
-}
-
-/* Find the subword width of the action typegroup that matches the stream data
- */
-static int vcap_find_actionstream_typegroup_sw(struct vcap_control *vctrl,
- enum vcap_type vt, u32 *stream,
- int sw_max)
-{
- const struct vcap_typegroup **tgt;
- int sw_idx, res;
-
- tgt = vctrl->vcaps[vt].actionfield_set_typegroups;
- /* Try the longest subword match first */
- for (sw_idx = vctrl->vcaps[vt].sw_count; sw_idx >= 0; sw_idx--) {
- if (!tgt[sw_idx])
- continue;
- res = vcap_verify_typegroups(stream, vctrl->vcaps[vt].act_width,
- tgt[sw_idx], false, sw_max);
- if (res == 0)
- return sw_idx;
- }
- return -EINVAL;
-}
-
-/* Verify that the type id in the stream matches the type id of the keyset */
-static bool vcap_verify_keystream_keyset(struct vcap_control *vctrl,
- enum vcap_type vt,
- u32 *keystream,
- u32 *mskstream,
- enum vcap_keyfield_set keyset)
-{
- const struct vcap_info *vcap = &vctrl->vcaps[vt];
- const struct vcap_field *typefld;
- const struct vcap_typegroup *tgt;
- const struct vcap_field *fields;
- struct vcap_stream_iter iter;
- const struct vcap_set *info;
- u32 value = 0;
- u32 mask = 0;
-
- if (vcap_keyfield_count(vctrl, vt, keyset) == 0)
- return false;
-
- info = vcap_keyfieldset(vctrl, vt, keyset);
- /* Check that the keyset is valid */
- if (!info)
- return false;
-
- /* a type_id of value -1 means that there is no type field */
- if (info->type_id == (u8)-1)
- return true;
-
- /* Get a valid typegroup for the specific keyset */
- tgt = vcap_keyfield_typegroup(vctrl, vt, keyset);
- if (!tgt)
- return false;
-
- fields = vcap_keyfields(vctrl, vt, keyset);
- if (!fields)
- return false;
-
- typefld = &fields[VCAP_KF_TYPE];
- vcap_iter_init(&iter, vcap->sw_width, tgt, typefld->offset);
- vcap_decode_field(mskstream, &iter, typefld->width, (u8 *)&mask);
- /* no type info if there are no mask bits */
- if (vcap_bitarray_zero(typefld->width, (u8 *)&mask))
- return false;
-
- /* Get the value of the type field in the stream and compare to the
- * one define in the vcap keyset
- */
- vcap_iter_init(&iter, vcap->sw_width, tgt, typefld->offset);
- vcap_decode_field(keystream, &iter, typefld->width, (u8 *)&value);
-
- return (value & mask) == (info->type_id & mask);
-}
-
-/* Verify that the typegroup information, subword count, keyset and type id
- * are in sync and correct, return the list of matching keysets
- */
-static int
-vcap_find_keystream_keysets(struct vcap_control *vctrl,
- enum vcap_type vt,
- u32 *keystream,
- u32 *mskstream,
- bool mask, int sw_max,
- struct vcap_keyset_list *kslist)
-{
- const struct vcap_set *keyfield_set;
- int sw_count, idx;
-
- sw_count = vcap_find_keystream_typegroup_sw(vctrl, vt, keystream, mask,
- sw_max);
- if (sw_count < 0)
- return sw_count;
-
- keyfield_set = vctrl->vcaps[vt].keyfield_set;
- for (idx = 0; idx < vctrl->vcaps[vt].keyfield_set_size; ++idx) {
- if (keyfield_set[idx].sw_per_item != sw_count)
- continue;
-
- if (vcap_verify_keystream_keyset(vctrl, vt, keystream,
- mskstream, idx))
- vcap_keyset_list_add(kslist, idx);
- }
- if (kslist->cnt > 0)
- return 0;
- return -EINVAL;
-}
-
-/* Read key data from a VCAP address and discover if there is a rule keyset
- * here
- */
-static bool
-vcap_verify_actionstream_actionset(struct vcap_control *vctrl,
- enum vcap_type vt,
- u32 *actionstream,
- enum vcap_actionfield_set actionset)
-{
- const struct vcap_typegroup *tgt;
- const struct vcap_field *fields;
- const struct vcap_set *info;
-
- if (vcap_actionfield_count(vctrl, vt, actionset) == 0)
- return false;
-
- info = vcap_actionfieldset(vctrl, vt, actionset);
- /* Check that the actionset is valid */
- if (!info)
- return false;
-
- /* a type_id of value -1 means that there is no type field */
- if (info->type_id == (u8)-1)
- return true;
-
- /* Get a valid typegroup for the specific actionset */
- tgt = vcap_actionfield_typegroup(vctrl, vt, actionset);
- if (!tgt)
- return false;
-
- fields = vcap_actionfields(vctrl, vt, actionset);
- if (!fields)
- return false;
-
- /* Later this will be expanded with a check of the type id */
- return true;
-}
-
-/* Verify that the typegroup information, subword count, actionset and type id
- * are in sync and correct, return the actionset
- */
-static enum vcap_actionfield_set
-vcap_find_actionstream_actionset(struct vcap_control *vctrl,
- enum vcap_type vt,
- u32 *stream,
- int sw_max)
-{
- const struct vcap_set *actionfield_set;
- int sw_count, idx;
- bool res;
-
- sw_count = vcap_find_actionstream_typegroup_sw(vctrl, vt, stream,
- sw_max);
- if (sw_count < 0)
- return sw_count;
-
- actionfield_set = vctrl->vcaps[vt].actionfield_set;
- for (idx = 0; idx < vctrl->vcaps[vt].actionfield_set_size; ++idx) {
- if (actionfield_set[idx].sw_per_item != sw_count)
- continue;
-
- res = vcap_verify_actionstream_actionset(vctrl, vt,
- stream, idx);
- if (res)
- return idx;
- }
- return -EINVAL;
-}
-
-/* Read key data from a VCAP address and discover if there are any rule keysets
- * here
- */
-static int vcap_addr_keysets(struct vcap_control *vctrl,
- struct net_device *ndev,
- struct vcap_admin *admin,
- int addr,
- struct vcap_keyset_list *kslist)
-{
- enum vcap_type vt = admin->vtype;
- int keyset_sw_regs, idx;
- u32 key = 0, mask = 0;
-
- /* Read the cache at the specified address */
- keyset_sw_regs = DIV_ROUND_UP(vctrl->vcaps[vt].sw_width, 32);
- vctrl->ops->update(ndev, admin, VCAP_CMD_READ, VCAP_SEL_ALL, addr);
- vctrl->ops->cache_read(ndev, admin, VCAP_SEL_ENTRY, 0,
- keyset_sw_regs);
- /* Skip uninitialized key/mask entries */
- for (idx = 0; idx < keyset_sw_regs; ++idx) {
- key |= ~admin->cache.keystream[idx];
- mask |= admin->cache.maskstream[idx];
- }
- if (key == 0 && mask == 0)
- return -EINVAL;
- /* Decode and locate the keysets */
- return vcap_find_keystream_keysets(vctrl, vt, admin->cache.keystream,
- admin->cache.maskstream, false, 0,
- kslist);
-}
-
-static int vcap_read_rule(struct vcap_rule_internal *ri)
-{
- struct vcap_admin *admin = ri->admin;
- int sw_idx, ent_idx = 0, act_idx = 0;
- u32 addr = ri->addr;
-
- if (!ri->size || !ri->keyset_sw_regs || !ri->actionset_sw_regs) {
- pr_err("%s:%d: rule is empty\n", __func__, __LINE__);
- return -EINVAL;
- }
- vcap_erase_cache(ri);
- /* Use the values in the streams to read the VCAP cache */
- for (sw_idx = 0; sw_idx < ri->size; sw_idx++, addr++) {
- ri->vctrl->ops->update(ri->ndev, admin, VCAP_CMD_READ,
- VCAP_SEL_ALL, addr);
- ri->vctrl->ops->cache_read(ri->ndev, admin,
- VCAP_SEL_ENTRY, ent_idx,
- ri->keyset_sw_regs);
- ri->vctrl->ops->cache_read(ri->ndev, admin,
- VCAP_SEL_ACTION, act_idx,
- ri->actionset_sw_regs);
- if (sw_idx == 0)
- ri->vctrl->ops->cache_read(ri->ndev, admin,
- VCAP_SEL_COUNTER,
- ri->counter_id, 0);
- ent_idx += ri->keyset_sw_regs;
- act_idx += ri->actionset_sw_regs;
- }
- return 0;
-}
-
/* Dump the keyfields value and mask values */
static void vcap_debugfs_show_rule_keyfield(struct vcap_control *vctrl,
struct vcap_output_print *out,
enum vcap_key_field key,
const struct vcap_field *keyfield,
- u8 *value, u8 *mask)
+ struct vcap_client_keyfield_data *data)
{
bool hex = false;
+ u8 *value, *mask;
int idx, bytes;
out->prf(out->dst, " %s: W%d: ", vcap_keyfield_name(vctrl, key),
@@ -374,40 +34,62 @@ static void vcap_debugfs_show_rule_keyfield(struct vcap_control *vctrl,
switch (keyfield[key].type) {
case VCAP_FIELD_BIT:
- out->prf(out->dst, "%d/%d", value[0], mask[0]);
+ out->prf(out->dst, "%d/%d", data->u1.value, data->u1.mask);
break;
case VCAP_FIELD_U32:
+ value = (u8 *)(&data->u32.value);
+ mask = (u8 *)(&data->u32.mask);
+
if (key == VCAP_KF_L3_IP4_SIP || key == VCAP_KF_L3_IP4_DIP) {
- out->prf(out->dst, "%pI4h/%pI4h", value, mask);
+ out->prf(out->dst, "%pI4h/%pI4h", &data->u32.value,
+ &data->u32.mask);
} else if (key == VCAP_KF_ETYPE ||
key == VCAP_KF_IF_IGR_PORT_MASK) {
hex = true;
} else {
u32 fmsk = (1 << keyfield[key].width) - 1;
- u32 val = *(u32 *)value;
- u32 msk = *(u32 *)mask;
- out->prf(out->dst, "%u/%u", val & fmsk, msk & fmsk);
+ out->prf(out->dst, "%u/%u", data->u32.value & fmsk,
+ data->u32.mask & fmsk);
}
break;
case VCAP_FIELD_U48:
+ value = data->u48.value;
+ mask = data->u48.mask;
if (key == VCAP_KF_L2_SMAC || key == VCAP_KF_L2_DMAC)
- out->prf(out->dst, "%pMR/%pMR", value, mask);
+ out->prf(out->dst, "%pMR/%pMR", data->u48.value,
+ data->u48.mask);
else
hex = true;
break;
case VCAP_FIELD_U56:
+ value = data->u56.value;
+ mask = data->u56.mask;
+ hex = true;
+ break;
case VCAP_FIELD_U64:
+ value = data->u64.value;
+ mask = data->u64.mask;
+ hex = true;
+ break;
case VCAP_FIELD_U72:
+ value = data->u72.value;
+ mask = data->u72.mask;
+ hex = true;
+ break;
case VCAP_FIELD_U112:
+ value = data->u112.value;
+ mask = data->u112.mask;
hex = true;
break;
case VCAP_FIELD_U128:
if (key == VCAP_KF_L3_IP6_SIP || key == VCAP_KF_L3_IP6_DIP) {
u8 nvalue[16], nmask[16];
- vcap_netbytes_copy(nvalue, value, sizeof(nvalue));
- vcap_netbytes_copy(nmask, mask, sizeof(nmask));
+ vcap_netbytes_copy(nvalue, data->u128.value,
+ sizeof(nvalue));
+ vcap_netbytes_copy(nmask, data->u128.mask,
+ sizeof(nmask));
out->prf(out->dst, "%pI6/%pI6", nvalue, nmask);
} else {
hex = true;
@@ -472,19 +154,15 @@ static int vcap_debugfs_show_rule_keyset(struct vcap_rule_internal *ri,
struct vcap_output_print *out)
{
struct vcap_control *vctrl = ri->vctrl;
- struct vcap_stream_iter kiter, miter;
struct vcap_admin *admin = ri->admin;
enum vcap_keyfield_set keysets[10];
const struct vcap_field *keyfield;
enum vcap_type vt = admin->vtype;
- const struct vcap_typegroup *tgt;
+ struct vcap_client_keyfield *ckf;
struct vcap_keyset_list matches;
- enum vcap_keyfield_set keyset;
- int idx, res, keyfield_count;
u32 *maskstream;
u32 *keystream;
- u8 value[16];
- u8 mask[16];
+ int res;
keystream = admin->cache.keystream;
maskstream = admin->cache.maskstream;
@@ -498,39 +176,20 @@ static int vcap_debugfs_show_rule_keyset(struct vcap_rule_internal *ri,
__func__, __LINE__, res);
return -EINVAL;
}
- keyset = matches.keysets[0];
out->prf(out->dst, " keysets:");
- for (idx = 0; idx < matches.cnt; ++idx)
+ for (int idx = 0; idx < matches.cnt; ++idx)
out->prf(out->dst, " %s",
vcap_keyset_name(vctrl, matches.keysets[idx]));
out->prf(out->dst, "\n");
out->prf(out->dst, " keyset_sw: %d\n", ri->keyset_sw);
out->prf(out->dst, " keyset_sw_regs: %d\n", ri->keyset_sw_regs);
- keyfield_count = vcap_keyfield_count(vctrl, vt, keyset);
- keyfield = vcap_keyfields(vctrl, vt, keyset);
- tgt = vcap_keyfield_typegroup(vctrl, vt, keyset);
- /* Start decoding the streams */
- for (idx = 0; idx < keyfield_count; ++idx) {
- if (keyfield[idx].width <= 0)
- continue;
- /* First get the mask */
- memset(mask, 0, DIV_ROUND_UP(keyfield[idx].width, 8));
- vcap_iter_init(&miter, vctrl->vcaps[vt].sw_width, tgt,
- keyfield[idx].offset);
- vcap_decode_field(maskstream, &miter, keyfield[idx].width,
- mask);
- /* Skip if no mask bits are set */
- if (vcap_bitarray_zero(keyfield[idx].width, mask))
- continue;
- /* Get the key */
- memset(value, 0, DIV_ROUND_UP(keyfield[idx].width, 8));
- vcap_iter_init(&kiter, vctrl->vcaps[vt].sw_width, tgt,
- keyfield[idx].offset);
- vcap_decode_field(keystream, &kiter, keyfield[idx].width,
- value);
- vcap_debugfs_show_rule_keyfield(vctrl, out, idx, keyfield,
- value, mask);
+
+ list_for_each_entry(ckf, &ri->data.keyfields, ctrl.list) {
+ keyfield = vcap_keyfields(vctrl, admin->vtype, ri->data.keyset);
+ vcap_debugfs_show_rule_keyfield(vctrl, out, ckf->ctrl.key,
+ keyfield, &ckf->data);
}
+
return 0;
}
@@ -540,48 +199,21 @@ static int vcap_debugfs_show_rule_actionset(struct vcap_rule_internal *ri,
struct vcap_control *vctrl = ri->vctrl;
struct vcap_admin *admin = ri->admin;
const struct vcap_field *actionfield;
- enum vcap_actionfield_set actionset;
- enum vcap_type vt = admin->vtype;
- const struct vcap_typegroup *tgt;
- struct vcap_stream_iter iter;
- int idx, res, actfield_count;
- u32 *actstream;
- u8 value[16];
- bool no_bits;
-
- actstream = admin->cache.actionstream;
- res = vcap_find_actionstream_actionset(vctrl, vt, actstream, 0);
- if (res < 0) {
- pr_err("%s:%d: could not find valid actionset: %d\n",
- __func__, __LINE__, res);
- return -EINVAL;
- }
- actionset = res;
+ struct vcap_client_actionfield *caf;
+
out->prf(out->dst, " actionset: %s\n",
vcap_actionset_name(vctrl, ri->data.actionset));
out->prf(out->dst, " actionset_sw: %d\n", ri->actionset_sw);
out->prf(out->dst, " actionset_sw_regs: %d\n", ri->actionset_sw_regs);
- actfield_count = vcap_actionfield_count(vctrl, vt, actionset);
- actionfield = vcap_actionfields(vctrl, vt, actionset);
- tgt = vcap_actionfield_typegroup(vctrl, vt, actionset);
- /* Start decoding the stream */
- for (idx = 0; idx < actfield_count; ++idx) {
- if (actionfield[idx].width <= 0)
- continue;
- /* Get the action */
- memset(value, 0, DIV_ROUND_UP(actionfield[idx].width, 8));
- vcap_iter_init(&iter, vctrl->vcaps[vt].act_width, tgt,
- actionfield[idx].offset);
- vcap_decode_field(actstream, &iter, actionfield[idx].width,
- value);
- /* Skip if no bits are set */
- no_bits = vcap_bitarray_zero(actionfield[idx].width, value);
- if (no_bits)
- continue;
- /* Later the action id will also be checked */
- vcap_debugfs_show_rule_actionfield(vctrl, out, idx, actionfield,
- value);
+
+ list_for_each_entry(caf, &ri->data.actionfields, ctrl.list) {
+ actionfield = vcap_actionfields(vctrl, admin->vtype,
+ ri->data.actionset);
+ vcap_debugfs_show_rule_actionfield(vctrl, out, caf->ctrl.action,
+ actionfield,
+ &caf->data.u1.value);
}
+
return 0;
}
@@ -632,32 +264,22 @@ static int vcap_show_admin(struct vcap_control *vctrl,
struct vcap_admin *admin,
struct vcap_output_print *out)
{
- struct vcap_rule_internal *elem, *ri;
+ struct vcap_rule_internal *elem;
+ struct vcap_rule *vrule;
int ret = 0;
vcap_show_admin_info(vctrl, admin, out);
- mutex_lock(&admin->lock);
list_for_each_entry(elem, &admin->rules, list) {
- ri = vcap_dup_rule(elem);
- if (IS_ERR(ri)) {
- ret = PTR_ERR(ri);
- goto err_unlock;
+ vrule = vcap_get_rule(vctrl, elem->data.id);
+ if (IS_ERR_OR_NULL(vrule)) {
+ ret = PTR_ERR(vrule);
+ break;
}
- /* Read data from VCAP */
- ret = vcap_read_rule(ri);
- if (ret)
- goto err_free_rule;
+
out->prf(out->dst, "\n");
- vcap_show_admin_rule(vctrl, admin, out, ri);
- vcap_free_rule((struct vcap_rule *)ri);
+ vcap_show_admin_rule(vctrl, admin, out, to_intrule(vrule));
+ vcap_free_rule(vrule);
}
- mutex_unlock(&admin->lock);
- return 0;
-
-err_free_rule:
- vcap_free_rule((struct vcap_rule *)ri);
-err_unlock:
- mutex_unlock(&admin->lock);
return ret;
}
diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_private.h b/drivers/net/ethernet/microchip/vcap/vcap_api_private.h
index 9ac1b1d55f22..4fd21da97679 100644
--- a/drivers/net/ethernet/microchip/vcap/vcap_api_private.h
+++ b/drivers/net/ethernet/microchip/vcap/vcap_api_private.h
@@ -96,4 +96,18 @@ const char *vcap_actionset_name(struct vcap_control *vctrl,
const char *vcap_actionfield_name(struct vcap_control *vctrl,
enum vcap_action_field action);
+/* Read key data from a VCAP address and discover if there are any rule keysets
+ * here
+ */
+int vcap_addr_keysets(struct vcap_control *vctrl, struct net_device *ndev,
+ struct vcap_admin *admin, int addr,
+ struct vcap_keyset_list *kslist);
+
+/* Verify that the typegroup information, subword count, keyset and type id
+ * are in sync and correct, return the list of matchin keysets
+ */
+int vcap_find_keystream_keysets(struct vcap_control *vctrl, enum vcap_type vt,
+ u32 *keystream, u32 *mskstream, bool mask,
+ int sw_max, struct vcap_keyset_list *kslist);
+
#endif /* __VCAP_API_PRIVATE__ */
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index ad1277ac7f0d..2f6a048dee90 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1363,10 +1363,11 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
xdp_do_flush();
}
-static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
+static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
{
struct mana_cq *cq = context;
u8 arm_bit;
+ int w;
WARN_ON_ONCE(cq->gdma_cq != gdma_queue);
@@ -1375,26 +1376,31 @@ static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
else
mana_poll_tx_cq(cq);
- if (cq->work_done < cq->budget &&
- napi_complete_done(&cq->napi, cq->work_done)) {
+ w = cq->work_done;
+
+ if (w < cq->budget &&
+ napi_complete_done(&cq->napi, w)) {
arm_bit = SET_ARM_BIT;
} else {
arm_bit = 0;
}
mana_gd_ring_cq(gdma_queue, arm_bit);
+
+ return w;
}
static int mana_poll(struct napi_struct *napi, int budget)
{
struct mana_cq *cq = container_of(napi, struct mana_cq, napi);
+ int w;
cq->work_done = 0;
cq->budget = budget;
- mana_cq_handler(cq, cq->gdma_cq);
+ w = mana_cq_handler(cq, cq->gdma_cq);
- return min(cq->work_done, budget);
+ return min(w, budget);
}
static void mana_schedule_napi(void *context, struct gdma_queue *gdma_queue)
diff --git a/drivers/net/ethernet/netronome/nfp/ccm_mbox.c b/drivers/net/ethernet/netronome/nfp/ccm_mbox.c
index 4247bca09807..aa8aba4ff7aa 100644
--- a/drivers/net/ethernet/netronome/nfp/ccm_mbox.c
+++ b/drivers/net/ethernet/netronome/nfp/ccm_mbox.c
@@ -503,7 +503,7 @@ nfp_ccm_mbox_msg_prepare(struct nfp_net *nn, struct sk_buff *skb,
max_len = max(max_reply_size, round_up(skb->len, 4));
if (max_len > mbox_max) {
nn_dp_warn(&nn->dp,
- "message too big for tha mailbox: %u/%u vs %u\n",
+ "message too big for the mailbox: %u/%u vs %u\n",
skb->len, max_reply_size, mbox_max);
return -EMSGSIZE;
}
diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
index 2b427d8ccb2f..ccacb6ab6c39 100644
--- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
@@ -282,7 +282,7 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev)
dma_len = skb_headlen(skb);
if (skb_is_gso(skb))
type = NFDK_DESC_TX_TYPE_TSO;
- else if (!nr_frags && dma_len < NFDK_TX_MAX_DATA_PER_HEAD)
+ else if (!nr_frags && dma_len <= NFDK_TX_MAX_DATA_PER_HEAD)
type = NFDK_DESC_TX_TYPE_SIMPLE;
else
type = NFDK_DESC_TX_TYPE_GATHER;
@@ -927,7 +927,7 @@ nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
dma_len = pkt_len;
dma_addr = rxbuf->dma_addr + dma_off;
- if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD)
+ if (dma_len <= NFDK_TX_MAX_DATA_PER_HEAD)
type = NFDK_DESC_TX_TYPE_SIMPLE;
else
type = NFDK_DESC_TX_TYPE_GATHER;
@@ -1325,7 +1325,7 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
txbuf = &tx_ring->ktxbufs[wr_idx];
dma_len = skb_headlen(skb);
- if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD)
+ if (dma_len <= NFDK_TX_MAX_DATA_PER_HEAD)
type = NFDK_DESC_TX_TYPE_SIMPLE;
else
type = NFDK_DESC_TX_TYPE_GATHER;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 6c83e47d8b3d..da33f09facb9 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -88,6 +88,9 @@
#define NFP_NET_FL_BATCH 16 /* Add freelist in this Batch size */
#define NFP_NET_XDP_MAX_COMPLETE 2048 /* XDP bufs to reclaim in NAPI poll */
+/* MC definitions */
+#define NFP_NET_CFG_MAC_MC_MAX 1024 /* The maximum number of MC address per port*/
+
/* Offload definitions */
#define NFP_NET_N_VXLAN_PORTS (NFP_NET_CFG_VXLAN_SZ / sizeof(__be16))
@@ -476,6 +479,7 @@ struct nfp_stat_pair {
* @rx_dma_off: Offset at which DMA packets (for XDP headroom)
* @rx_offset: Offset in the RX buffers where packet data starts
* @ctrl: Local copy of the control register/word.
+ * @ctrl_w1: Local copy of the control register/word1.
* @fl_bufsz: Currently configured size of the freelist buffers
* @xdp_prog: Installed XDP program
* @tx_rings: Array of pre-allocated TX ring structures
@@ -508,6 +512,7 @@ struct nfp_net_dp {
u32 rx_dma_off;
u32 ctrl;
+ u32 ctrl_w1;
u32 fl_bufsz;
struct bpf_prog *xdp_prog;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 682a9198fb54..2314cf55e821 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1007,6 +1007,7 @@ static int nfp_net_set_config_and_enable(struct nfp_net *nn)
new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG;
nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
+ nn_writel(nn, NFP_NET_CFG_CTRL_WORD1, nn->dp.ctrl_w1);
err = nfp_net_reconfig(nn, update);
if (err) {
nfp_net_clear_config_and_disable(nn);
@@ -1333,18 +1334,59 @@ err_unlock:
return err;
}
+static int nfp_net_mc_cfg(struct net_device *netdev, const unsigned char *addr, const u32 cmd)
+{
+ struct nfp_net *nn = netdev_priv(netdev);
+ int ret;
+
+ ret = nfp_net_mbox_lock(nn, NFP_NET_CFG_MULTICAST_SZ);
+ if (ret)
+ return ret;
+
+ nn_writel(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_MULTICAST_MAC_HI,
+ get_unaligned_be32(addr));
+ nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_MULTICAST_MAC_LO,
+ get_unaligned_be16(addr + 4));
+
+ return nfp_net_mbox_reconfig_and_unlock(nn, cmd);
+}
+
+static int nfp_net_mc_sync(struct net_device *netdev, const unsigned char *addr)
+{
+ struct nfp_net *nn = netdev_priv(netdev);
+
+ if (netdev_mc_count(netdev) > NFP_NET_CFG_MAC_MC_MAX) {
+ nn_err(nn, "Requested number of MC addresses (%d) exceeds maximum (%d).\n",
+ netdev_mc_count(netdev), NFP_NET_CFG_MAC_MC_MAX);
+ return -EINVAL;
+ }
+
+ return nfp_net_mc_cfg(netdev, addr, NFP_NET_CFG_MBOX_CMD_MULTICAST_ADD);
+}
+
+static int nfp_net_mc_unsync(struct net_device *netdev, const unsigned char *addr)
+{
+ return nfp_net_mc_cfg(netdev, addr, NFP_NET_CFG_MBOX_CMD_MULTICAST_DEL);
+}
+
static void nfp_net_set_rx_mode(struct net_device *netdev)
{
struct nfp_net *nn = netdev_priv(netdev);
- u32 new_ctrl;
+ u32 new_ctrl, new_ctrl_w1;
new_ctrl = nn->dp.ctrl;
+ new_ctrl_w1 = nn->dp.ctrl_w1;
if (!netdev_mc_empty(netdev) || netdev->flags & IFF_ALLMULTI)
new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_L2MC;
else
new_ctrl &= ~NFP_NET_CFG_CTRL_L2MC;
+ if (netdev->flags & IFF_ALLMULTI)
+ new_ctrl_w1 &= ~NFP_NET_CFG_CTRL_MCAST_FILTER;
+ else
+ new_ctrl_w1 |= nn->cap_w1 & NFP_NET_CFG_CTRL_MCAST_FILTER;
+
if (netdev->flags & IFF_PROMISC) {
if (nn->cap & NFP_NET_CFG_CTRL_PROMISC)
new_ctrl |= NFP_NET_CFG_CTRL_PROMISC;
@@ -1354,13 +1396,21 @@ static void nfp_net_set_rx_mode(struct net_device *netdev)
new_ctrl &= ~NFP_NET_CFG_CTRL_PROMISC;
}
- if (new_ctrl == nn->dp.ctrl)
+ if ((nn->cap_w1 & NFP_NET_CFG_CTRL_MCAST_FILTER) &&
+ __dev_mc_sync(netdev, nfp_net_mc_sync, nfp_net_mc_unsync))
+ netdev_err(netdev, "Sync mc address failed\n");
+
+ if (new_ctrl == nn->dp.ctrl && new_ctrl_w1 == nn->dp.ctrl_w1)
return;
- nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
+ if (new_ctrl != nn->dp.ctrl)
+ nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
+ if (new_ctrl_w1 != nn->dp.ctrl_w1)
+ nn_writel(nn, NFP_NET_CFG_CTRL_WORD1, new_ctrl_w1);
nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_GEN);
nn->dp.ctrl = new_ctrl;
+ nn->dp.ctrl_w1 = new_ctrl_w1;
}
static void nfp_net_rss_init_itbl(struct nfp_net *nn)
@@ -2092,7 +2142,7 @@ void nfp_net_info(struct nfp_net *nn)
nn->fw_ver.extend, nn->fw_ver.class,
nn->fw_ver.major, nn->fw_ver.minor,
nn->max_mtu);
- nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+ nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
nn->cap,
nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "",
nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "",
@@ -2120,6 +2170,7 @@ void nfp_net_info(struct nfp_net *nn)
nn->cap & NFP_NET_CFG_CTRL_CSUM_COMPLETE ?
"RXCSUM_COMPLETE " : "",
nn->cap & NFP_NET_CFG_CTRL_LIVE_ADDR ? "LIVE_ADDR " : "",
+ nn->cap_w1 & NFP_NET_CFG_CTRL_MCAST_FILTER ? "MULTICAST_FILTER " : "",
nfp_app_extra_cap(nn->app, nn));
}
@@ -2548,6 +2599,9 @@ int nfp_net_init(struct nfp_net *nn)
if (nn->cap & NFP_NET_CFG_CTRL_TXRWB)
nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXRWB;
+ if (nn->cap_w1 & NFP_NET_CFG_CTRL_MCAST_FILTER)
+ nn->dp.ctrl_w1 |= NFP_NET_CFG_CTRL_MCAST_FILTER;
+
/* Stash the re-configuration queue away. First odd queue in TX Bar */
nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ;
@@ -2555,6 +2609,7 @@ int nfp_net_init(struct nfp_net *nn)
nn_writel(nn, NFP_NET_CFG_CTRL, 0);
nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0);
nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0);
+ nn_writel(nn, NFP_NET_CFG_CTRL_WORD1, 0);
err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RING |
NFP_NET_CFG_UPDATE_GEN);
if (err)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index cc11b3dc1252..51124309ae1f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -267,6 +267,7 @@
#define NFP_NET_CFG_CTRL_WORD1 0x0098
#define NFP_NET_CFG_CTRL_PKT_TYPE (0x1 << 0) /* Pkttype offload */
#define NFP_NET_CFG_CTRL_IPSEC (0x1 << 1) /* IPsec offload */
+#define NFP_NET_CFG_CTRL_MCAST_FILTER (0x1 << 2) /* Multicast Filter */
#define NFP_NET_CFG_CAP_WORD1 0x00a4
@@ -413,6 +414,9 @@
#define NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET 5
#define NFP_NET_CFG_MBOX_CMD_TLV_CMSG 6
+#define NFP_NET_CFG_MBOX_CMD_MULTICAST_ADD 8
+#define NFP_NET_CFG_MBOX_CMD_MULTICAST_DEL 9
+
/* VLAN filtering using general use mailbox
* %NFP_NET_CFG_VLAN_FILTER: Base address of VLAN filter mailbox
* %NFP_NET_CFG_VLAN_FILTER_VID: VLAN ID to filter
@@ -424,6 +428,17 @@
#define NFP_NET_CFG_VLAN_FILTER_PROTO (NFP_NET_CFG_VLAN_FILTER + 2)
#define NFP_NET_CFG_VLAN_FILTER_SZ 0x0004
+/* Multicast filtering using general use mailbox
+ * %NFP_NET_CFG_MULTICAST: Base address of Multicast filter mailbox
+ * %NFP_NET_CFG_MULTICAST_MAC_HI: High 32-bits of Multicast MAC address
+ * %NFP_NET_CFG_MULTICAST_MAC_LO: Low 16-bits of Multicast MAC address
+ * %NFP_NET_CFG_MULTICAST_SZ: Size of the Multicast filter mailbox in bytes
+ */
+#define NFP_NET_CFG_MULTICAST NFP_NET_CFG_MBOX_SIMPLE_VAL
+#define NFP_NET_CFG_MULTICAST_MAC_HI NFP_NET_CFG_MULTICAST
+#define NFP_NET_CFG_MULTICAST_MAC_LO (NFP_NET_CFG_MULTICAST + 6)
+#define NFP_NET_CFG_MULTICAST_SZ 0x0006
+
/* TLV capabilities
* %NFP_NET_CFG_TLV_TYPE: Offset of type within the TLV
* %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index ed274f033626..e5116a86cfbc 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -200,7 +200,7 @@ static void qed_ll2b_complete_rx_packet(void *cxt,
dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr,
cdev->ll2->rx_size, DMA_FROM_DEVICE);
- skb = build_skb(buffer->data, 0);
+ skb = slab_build_skb(buffer->data);
if (!skb) {
DP_INFO(cdev, "Failed to build SKB\n");
kfree(buffer->data);
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index ec157885da13..a9dcc98b6af1 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -5283,6 +5283,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
dev->hw_features |= NETIF_F_RXALL;
dev->hw_features |= NETIF_F_RXFCS;
+ netdev_sw_irq_coalesce_default_on(dev);
+
/* configure chip for default features */
rtl8169_set_features(dev, dev->features);
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 6bc923326268..33f723a9f471 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -841,7 +841,7 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
napi_gro_receive(&priv->napi[q],
priv->rx_1st_skb);
stats->rx_packets++;
- stats->rx_bytes += priv->rx_1st_skb->len;
+ stats->rx_bytes += pkt_len;
break;
}
}
diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c
index c2224e41a694..cc30524c2fe4 100644
--- a/drivers/net/ethernet/sfc/efx_common.c
+++ b/drivers/net/ethernet/sfc/efx_common.c
@@ -1164,7 +1164,7 @@ static ssize_t mcdi_logging_show(struct device *dev,
struct efx_nic *efx = dev_get_drvdata(dev);
struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
- return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled);
+ return sysfs_emit(buf, "%d\n", mcdi->logging_enabled);
}
static ssize_t mcdi_logging_store(struct device *dev,
diff --git a/drivers/net/ethernet/sfc/siena/efx_common.c b/drivers/net/ethernet/sfc/siena/efx_common.c
index 1fd396b00bfb..e4b294b8e9ac 100644
--- a/drivers/net/ethernet/sfc/siena/efx_common.c
+++ b/drivers/net/ethernet/sfc/siena/efx_common.c
@@ -1178,7 +1178,7 @@ static ssize_t mcdi_logging_show(struct device *dev,
struct efx_nic *efx = dev_get_drvdata(dev);
struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
- return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled);
+ return sysfs_emit(buf, "%d\n", mcdi->logging_enabled);
}
static ssize_t mcdi_logging_store(struct device *dev,
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 31ff35174034..f77511fe4e87 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -235,6 +235,15 @@ config DWMAC_INTEL_PLAT
the stmmac device driver. This driver is used for the Intel Keem Bay
SoC.
+config DWMAC_TEGRA
+ tristate "NVIDIA Tegra MGBE support"
+ depends on ARCH_TEGRA || COMPILE_TEST
+ help
+ This selects the Multi-GigaBit Ethernet (MGBE) Controller that is
+ found on the NVIDIA Tegra SoC devices. This driver provides the glue
+ layer on top of the stmmac driver required for these NVIDIA Tegra SoC
+ devices.
+
config DWMAC_VISCONTI
tristate "Toshiba Visconti DWMAC support"
default ARCH_VISCONTI
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index d4e12e9ace4f..057e4bab5c08 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -31,6 +31,7 @@ obj-$(CONFIG_DWMAC_DWC_QOS_ETH) += dwmac-dwc-qos-eth.o
obj-$(CONFIG_DWMAC_INTEL_PLAT) += dwmac-intel-plat.o
obj-$(CONFIG_DWMAC_GENERIC) += dwmac-generic.o
obj-$(CONFIG_DWMAC_IMX8) += dwmac-imx.o
+obj-$(CONFIG_DWMAC_TEGRA) += dwmac-tegra.o
obj-$(CONFIG_DWMAC_VISCONTI) += dwmac-visconti.o
stmmac-platform-objs:= stmmac_platform.o
dwmac-altr-socfpga-objs := altr_tse_pcs.o dwmac-socfpga.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
new file mode 100644
index 000000000000..bdf990cf2f31
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/platform_device.h>
+#include <linux/of_device.h>
+#include <linux/module.h>
+#include <linux/stmmac.h>
+#include <linux/clk.h>
+
+#include "stmmac_platform.h"
+
+static const char *const mgbe_clks[] = {
+ "rx-pcs", "tx", "tx-pcs", "mac-divider", "mac", "mgbe", "ptp-ref", "mac"
+};
+
+struct tegra_mgbe {
+ struct device *dev;
+
+ struct clk_bulk_data *clks;
+
+ struct reset_control *rst_mac;
+ struct reset_control *rst_pcs;
+
+ void __iomem *hv;
+ void __iomem *regs;
+ void __iomem *xpcs;
+
+ struct mii_bus *mii;
+};
+
+#define XPCS_WRAP_UPHY_RX_CONTROL 0x801c
+#define XPCS_WRAP_UPHY_RX_CONTROL_RX_SW_OVRD BIT(31)
+#define XPCS_WRAP_UPHY_RX_CONTROL_RX_PCS_PHY_RDY BIT(10)
+#define XPCS_WRAP_UPHY_RX_CONTROL_RX_CDR_RESET BIT(9)
+#define XPCS_WRAP_UPHY_RX_CONTROL_RX_CAL_EN BIT(8)
+#define XPCS_WRAP_UPHY_RX_CONTROL_RX_SLEEP (BIT(7) | BIT(6))
+#define XPCS_WRAP_UPHY_RX_CONTROL_AUX_RX_IDDQ BIT(5)
+#define XPCS_WRAP_UPHY_RX_CONTROL_RX_IDDQ BIT(4)
+#define XPCS_WRAP_UPHY_RX_CONTROL_RX_DATA_EN BIT(0)
+#define XPCS_WRAP_UPHY_HW_INIT_CTRL 0x8020
+#define XPCS_WRAP_UPHY_HW_INIT_CTRL_TX_EN BIT(0)
+#define XPCS_WRAP_UPHY_HW_INIT_CTRL_RX_EN BIT(2)
+#define XPCS_WRAP_UPHY_STATUS 0x8044
+#define XPCS_WRAP_UPHY_STATUS_TX_P_UP BIT(0)
+#define XPCS_WRAP_IRQ_STATUS 0x8050
+#define XPCS_WRAP_IRQ_STATUS_PCS_LINK_STS BIT(6)
+
+#define XPCS_REG_ADDR_SHIFT 10
+#define XPCS_REG_ADDR_MASK 0x1fff
+#define XPCS_ADDR 0x3fc
+
+#define MGBE_WRAP_COMMON_INTR_ENABLE 0x8704
+#define MAC_SBD_INTR BIT(2)
+#define MGBE_WRAP_AXI_ASID0_CTRL 0x8400
+#define MGBE_SID 0x6
+
+static int __maybe_unused tegra_mgbe_suspend(struct device *dev)
+{
+ struct tegra_mgbe *mgbe = get_stmmac_bsp_priv(dev);
+ int err;
+
+ err = stmmac_suspend(dev);
+ if (err)
+ return err;
+
+ clk_bulk_disable_unprepare(ARRAY_SIZE(mgbe_clks), mgbe->clks);
+
+ return reset_control_assert(mgbe->rst_mac);
+}
+
+static int __maybe_unused tegra_mgbe_resume(struct device *dev)
+{
+ struct tegra_mgbe *mgbe = get_stmmac_bsp_priv(dev);
+ u32 value;
+ int err;
+
+ err = clk_bulk_prepare_enable(ARRAY_SIZE(mgbe_clks), mgbe->clks);
+ if (err < 0)
+ return err;
+
+ err = reset_control_deassert(mgbe->rst_mac);
+ if (err < 0)
+ return err;
+
+ /* Enable common interrupt at wrapper level */
+ writel(MAC_SBD_INTR, mgbe->regs + MGBE_WRAP_COMMON_INTR_ENABLE);
+
+ /* Program SID */
+ writel(MGBE_SID, mgbe->hv + MGBE_WRAP_AXI_ASID0_CTRL);
+
+ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_STATUS);
+ if ((value & XPCS_WRAP_UPHY_STATUS_TX_P_UP) == 0) {
+ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_HW_INIT_CTRL);
+ value |= XPCS_WRAP_UPHY_HW_INIT_CTRL_TX_EN;
+ writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_HW_INIT_CTRL);
+ }
+
+ err = readl_poll_timeout(mgbe->xpcs + XPCS_WRAP_UPHY_HW_INIT_CTRL, value,
+ (value & XPCS_WRAP_UPHY_HW_INIT_CTRL_TX_EN) == 0,
+ 500, 500 * 2000);
+ if (err < 0) {
+ dev_err(mgbe->dev, "timeout waiting for TX lane to become enabled\n");
+ clk_bulk_disable_unprepare(ARRAY_SIZE(mgbe_clks), mgbe->clks);
+ return err;
+ }
+
+ err = stmmac_resume(dev);
+ if (err < 0)
+ clk_bulk_disable_unprepare(ARRAY_SIZE(mgbe_clks), mgbe->clks);
+
+ return err;
+}
+
+static int mgbe_uphy_lane_bringup_serdes_up(struct net_device *ndev, void *mgbe_data)
+{
+ struct tegra_mgbe *mgbe = (struct tegra_mgbe *)mgbe_data;
+ u32 value;
+ int err;
+
+ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+ value |= XPCS_WRAP_UPHY_RX_CONTROL_RX_SW_OVRD;
+ writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+
+ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+ value &= ~XPCS_WRAP_UPHY_RX_CONTROL_RX_IDDQ;
+ writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+
+ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+ value &= ~XPCS_WRAP_UPHY_RX_CONTROL_AUX_RX_IDDQ;
+ writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+
+ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+ value &= ~XPCS_WRAP_UPHY_RX_CONTROL_RX_SLEEP;
+ writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+
+ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+ value |= XPCS_WRAP_UPHY_RX_CONTROL_RX_CAL_EN;
+ writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+
+ err = readl_poll_timeout(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL, value,
+ (value & XPCS_WRAP_UPHY_RX_CONTROL_RX_CAL_EN) == 0,
+ 1000, 1000 * 2000);
+ if (err < 0) {
+ dev_err(mgbe->dev, "timeout waiting for RX calibration to become enabled\n");
+ return err;
+ }
+
+ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+ value |= XPCS_WRAP_UPHY_RX_CONTROL_RX_DATA_EN;
+ writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+
+ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+ value |= XPCS_WRAP_UPHY_RX_CONTROL_RX_CDR_RESET;
+ writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+
+ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+ value &= ~XPCS_WRAP_UPHY_RX_CONTROL_RX_CDR_RESET;
+ writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+
+ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+ value |= XPCS_WRAP_UPHY_RX_CONTROL_RX_PCS_PHY_RDY;
+ writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+
+ err = readl_poll_timeout(mgbe->xpcs + XPCS_WRAP_IRQ_STATUS, value,
+ value & XPCS_WRAP_IRQ_STATUS_PCS_LINK_STS,
+ 500, 500 * 2000);
+ if (err < 0) {
+ dev_err(mgbe->dev, "timeout waiting for link to become ready\n");
+ return err;
+ }
+
+ /* clear status */
+ writel(value, mgbe->xpcs + XPCS_WRAP_IRQ_STATUS);
+
+ return 0;
+}
+
+static void mgbe_uphy_lane_bringup_serdes_down(struct net_device *ndev, void *mgbe_data)
+{
+ struct tegra_mgbe *mgbe = (struct tegra_mgbe *)mgbe_data;
+ u32 value;
+
+ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+ value |= XPCS_WRAP_UPHY_RX_CONTROL_RX_SW_OVRD;
+ writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+
+ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+ value &= ~XPCS_WRAP_UPHY_RX_CONTROL_RX_DATA_EN;
+ writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+
+ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+ value |= XPCS_WRAP_UPHY_RX_CONTROL_RX_SLEEP;
+ writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+
+ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+ value |= XPCS_WRAP_UPHY_RX_CONTROL_AUX_RX_IDDQ;
+ writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+
+ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+ value |= XPCS_WRAP_UPHY_RX_CONTROL_RX_IDDQ;
+ writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL);
+}
+
+static int tegra_mgbe_probe(struct platform_device *pdev)
+{
+ struct plat_stmmacenet_data *plat;
+ struct stmmac_resources res;
+ struct tegra_mgbe *mgbe;
+ int irq, err, i;
+ u32 value;
+
+ mgbe = devm_kzalloc(&pdev->dev, sizeof(*mgbe), GFP_KERNEL);
+ if (!mgbe)
+ return -ENOMEM;
+
+ mgbe->dev = &pdev->dev;
+
+ memset(&res, 0, sizeof(res));
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ mgbe->hv = devm_platform_ioremap_resource_byname(pdev, "hypervisor");
+ if (IS_ERR(mgbe->hv))
+ return PTR_ERR(mgbe->hv);
+
+ mgbe->regs = devm_platform_ioremap_resource_byname(pdev, "mac");
+ if (IS_ERR(mgbe->regs))
+ return PTR_ERR(mgbe->regs);
+
+ mgbe->xpcs = devm_platform_ioremap_resource_byname(pdev, "xpcs");
+ if (IS_ERR(mgbe->xpcs))
+ return PTR_ERR(mgbe->xpcs);
+
+ res.addr = mgbe->regs;
+ res.irq = irq;
+
+ mgbe->clks = devm_kzalloc(&pdev->dev, sizeof(*mgbe->clks), GFP_KERNEL);
+ if (!mgbe->clks)
+ return -ENOMEM;
+
+ for (i = 0; i < ARRAY_SIZE(mgbe_clks); i++)
+ mgbe->clks[i].id = mgbe_clks[i];
+
+ err = devm_clk_bulk_get(mgbe->dev, ARRAY_SIZE(mgbe_clks), mgbe->clks);
+ if (err < 0)
+ return err;
+
+ err = clk_bulk_prepare_enable(ARRAY_SIZE(mgbe_clks), mgbe->clks);
+ if (err < 0)
+ return err;
+
+ /* Perform MAC reset */
+ mgbe->rst_mac = devm_reset_control_get(&pdev->dev, "mac");
+ if (IS_ERR(mgbe->rst_mac)) {
+ err = PTR_ERR(mgbe->rst_mac);
+ goto disable_clks;
+ }
+
+ err = reset_control_assert(mgbe->rst_mac);
+ if (err < 0)
+ goto disable_clks;
+
+ usleep_range(2000, 4000);
+
+ err = reset_control_deassert(mgbe->rst_mac);
+ if (err < 0)
+ goto disable_clks;
+
+ /* Perform PCS reset */
+ mgbe->rst_pcs = devm_reset_control_get(&pdev->dev, "pcs");
+ if (IS_ERR(mgbe->rst_pcs)) {
+ err = PTR_ERR(mgbe->rst_pcs);
+ goto disable_clks;
+ }
+
+ err = reset_control_assert(mgbe->rst_pcs);
+ if (err < 0)
+ goto disable_clks;
+
+ usleep_range(2000, 4000);
+
+ err = reset_control_deassert(mgbe->rst_pcs);
+ if (err < 0)
+ goto disable_clks;
+
+ plat = stmmac_probe_config_dt(pdev, res.mac);
+ if (IS_ERR(plat)) {
+ err = PTR_ERR(plat);
+ goto disable_clks;
+ }
+
+ plat->has_xgmac = 1;
+ plat->tso_en = 1;
+ plat->pmt = 1;
+ plat->bsp_priv = mgbe;
+
+ if (!plat->mdio_node)
+ plat->mdio_node = of_get_child_by_name(pdev->dev.of_node, "mdio");
+
+ if (!plat->mdio_bus_data) {
+ plat->mdio_bus_data = devm_kzalloc(&pdev->dev, sizeof(*plat->mdio_bus_data),
+ GFP_KERNEL);
+ if (!plat->mdio_bus_data) {
+ err = -ENOMEM;
+ goto remove;
+ }
+ }
+
+ plat->mdio_bus_data->needs_reset = true;
+
+ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_STATUS);
+ if ((value & XPCS_WRAP_UPHY_STATUS_TX_P_UP) == 0) {
+ value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_HW_INIT_CTRL);
+ value |= XPCS_WRAP_UPHY_HW_INIT_CTRL_TX_EN;
+ writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_HW_INIT_CTRL);
+ }
+
+ err = readl_poll_timeout(mgbe->xpcs + XPCS_WRAP_UPHY_HW_INIT_CTRL, value,
+ (value & XPCS_WRAP_UPHY_HW_INIT_CTRL_TX_EN) == 0,
+ 500, 500 * 2000);
+ if (err < 0) {
+ dev_err(mgbe->dev, "timeout waiting for TX lane to become enabled\n");
+ goto remove;
+ }
+
+ plat->serdes_powerup = mgbe_uphy_lane_bringup_serdes_up;
+ plat->serdes_powerdown = mgbe_uphy_lane_bringup_serdes_down;
+
+ /* Tx FIFO Size - 128KB */
+ plat->tx_fifo_size = 131072;
+ /* Rx FIFO Size - 192KB */
+ plat->rx_fifo_size = 196608;
+
+ /* Enable common interrupt at wrapper level */
+ writel(MAC_SBD_INTR, mgbe->regs + MGBE_WRAP_COMMON_INTR_ENABLE);
+
+ /* Program SID */
+ writel(MGBE_SID, mgbe->hv + MGBE_WRAP_AXI_ASID0_CTRL);
+
+ plat->serdes_up_after_phy_linkup = 1;
+
+ err = stmmac_dvr_probe(&pdev->dev, plat, &res);
+ if (err < 0)
+ goto remove;
+
+ return 0;
+
+remove:
+ stmmac_remove_config_dt(pdev, plat);
+disable_clks:
+ clk_bulk_disable_unprepare(ARRAY_SIZE(mgbe_clks), mgbe->clks);
+
+ return err;
+}
+
+static int tegra_mgbe_remove(struct platform_device *pdev)
+{
+ struct tegra_mgbe *mgbe = get_stmmac_bsp_priv(&pdev->dev);
+
+ clk_bulk_disable_unprepare(ARRAY_SIZE(mgbe_clks), mgbe->clks);
+
+ stmmac_pltfr_remove(pdev);
+
+ return 0;
+}
+
+static const struct of_device_id tegra_mgbe_match[] = {
+ { .compatible = "nvidia,tegra234-mgbe", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, tegra_mgbe_match);
+
+static SIMPLE_DEV_PM_OPS(tegra_mgbe_pm_ops, tegra_mgbe_suspend, tegra_mgbe_resume);
+
+static struct platform_driver tegra_mgbe_driver = {
+ .probe = tegra_mgbe_probe,
+ .remove = tegra_mgbe_remove,
+ .driver = {
+ .name = "tegra-mgbe",
+ .pm = &tegra_mgbe_pm_ops,
+ .of_match_table = tegra_mgbe_match,
+ },
+};
+module_platform_driver(tegra_mgbe_driver);
+
+MODULE_AUTHOR("Thierry Reding <[email protected]>");
+MODULE_DESCRIPTION("NVIDIA Tegra MGBE driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 18c7ca29da2c..f36590d0c830 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -988,6 +988,9 @@ static void stmmac_mac_link_up(struct phylink_config *config,
struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
u32 old_ctrl, ctrl;
+ if (priv->plat->serdes_up_after_phy_linkup && priv->plat->serdes_powerup)
+ priv->plat->serdes_powerup(priv->dev, priv->plat->bsp_priv);
+
old_ctrl = readl(priv->ioaddr + MAC_CTRL_REG);
ctrl = old_ctrl & ~priv->hw->link.speed_mask;
@@ -3809,7 +3812,7 @@ static int __stmmac_open(struct net_device *dev,
stmmac_reset_queues_param(priv);
- if (priv->plat->serdes_powerup) {
+ if (!priv->plat->serdes_up_after_phy_linkup && priv->plat->serdes_powerup) {
ret = priv->plat->serdes_powerup(dev, priv->plat->bsp_priv);
if (ret < 0) {
netdev_err(priv->dev, "%s: Serdes powerup failed\n",
@@ -7518,7 +7521,7 @@ int stmmac_resume(struct device *dev)
stmmac_mdio_reset(priv->mii);
}
- if (priv->plat->serdes_powerup) {
+ if (!priv->plat->serdes_up_after_phy_linkup && priv->plat->serdes_powerup) {
ret = priv->plat->serdes_powerup(ndev,
priv->plat->bsp_priv);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 50f6b4a14be4..eb6d9cd8e93f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -108,10 +108,10 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev)
axi->axi_lpi_en = of_property_read_bool(np, "snps,lpi_en");
axi->axi_xit_frm = of_property_read_bool(np, "snps,xit_frm");
- axi->axi_kbbe = of_property_read_bool(np, "snps,axi_kbbe");
- axi->axi_fb = of_property_read_bool(np, "snps,axi_fb");
- axi->axi_mb = of_property_read_bool(np, "snps,axi_mb");
- axi->axi_rb = of_property_read_bool(np, "snps,axi_rb");
+ axi->axi_kbbe = of_property_read_bool(np, "snps,kbbe");
+ axi->axi_fb = of_property_read_bool(np, "snps,fb");
+ axi->axi_mb = of_property_read_bool(np, "snps,mb");
+ axi->axi_rb = of_property_read_bool(np, "snps,rb");
if (of_property_read_u32(np, "snps,wr_osr_lmt", &axi->axi_wr_osr_lmt))
axi->axi_wr_osr_lmt = 1;
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 51c37e99d086..de112ab3195c 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -133,11 +133,6 @@
NETIF_MSG_IFUP | NETIF_MSG_PROBE | NETIF_MSG_IFDOWN | \
NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
-static int am65_cpsw_nuss_init_tx_chns(struct am65_cpsw_common *common);
-static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common);
-static void am65_cpsw_nuss_free_tx_chns(struct am65_cpsw_common *common);
-static void am65_cpsw_nuss_free_rx_chns(struct am65_cpsw_common *common);
-
static void am65_cpsw_port_set_sl_mac(struct am65_cpsw_port *slave,
const u8 *dev_addr)
{
@@ -379,20 +374,6 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
if (common->usage_count)
return 0;
- /* init tx/rx channels */
- ret = am65_cpsw_nuss_init_tx_chns(common);
- if (ret) {
- dev_err(common->dev, "init_tx_chns failed\n");
- return ret;
- }
-
- ret = am65_cpsw_nuss_init_rx_chns(common);
- if (ret) {
- dev_err(common->dev, "init_rx_chns failed\n");
- am65_cpsw_nuss_free_tx_chns(common);
- return ret;
- }
-
/* Control register */
writel(AM65_CPSW_CTL_P0_ENABLE | AM65_CPSW_CTL_P0_TX_CRC_REMOVE |
AM65_CPSW_CTL_VLAN_AWARE | AM65_CPSW_CTL_P0_RX_PAD,
@@ -421,7 +402,6 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
/* disable priority elevation */
writel(0, common->cpsw_base + AM65_CPSW_REG_PTYPE);
- cpsw_ale_control_set(common->ale, 0, ALE_CLEAR, 1);
cpsw_ale_start(common->ale);
/* limit to one RX flow only */
@@ -453,8 +433,7 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
GFP_KERNEL);
if (!skb) {
dev_err(common->dev, "cannot allocate skb\n");
- ret = -ENOMEM;
- goto err;
+ return -ENOMEM;
}
ret = am65_cpsw_nuss_rx_push(common, skb);
@@ -463,7 +442,7 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
"cannot submit skb to channel rx, error %d\n",
ret);
kfree_skb(skb);
- goto err;
+ return ret;
}
kmemleak_not_leak(skb);
}
@@ -472,7 +451,7 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
for (i = 0; i < common->tx_ch_num; i++) {
ret = k3_udma_glue_enable_tx_chn(common->tx_chns[i].tx_chn);
if (ret)
- goto err;
+ return ret;
napi_enable(&common->tx_chns[i].napi_tx);
}
@@ -484,12 +463,6 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
dev_dbg(common->dev, "cpsw_nuss started\n");
return 0;
-
-err:
- am65_cpsw_nuss_free_tx_chns(common);
- am65_cpsw_nuss_free_rx_chns(common);
-
- return ret;
}
static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma);
@@ -543,9 +516,6 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common)
writel(0, common->cpsw_base + AM65_CPSW_REG_CTL);
writel(0, common->cpsw_base + AM65_CPSW_REG_STAT_PORT_EN);
- am65_cpsw_nuss_free_tx_chns(common);
- am65_cpsw_nuss_free_rx_chns(common);
-
dev_dbg(common->dev, "cpsw_nuss stopped\n");
return 0;
}
@@ -587,7 +557,6 @@ static int am65_cpsw_nuss_ndo_slave_open(struct net_device *ndev)
struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
int ret, i;
u32 reg;
- int tmo;
ret = pm_runtime_resume_and_get(common->dev);
if (ret < 0)
@@ -595,19 +564,17 @@ static int am65_cpsw_nuss_ndo_slave_open(struct net_device *ndev)
/* Idle MAC port */
cpsw_sl_ctl_set(port->slave.mac_sl, CPSW_SL_CTL_CMD_IDLE);
-
- tmo = cpsw_sl_wait_for_idle(port->slave.mac_sl, 100);
- dev_info(common->dev, "down msc_sl %08x tmo %d\n",
- cpsw_sl_reg_read(port->slave.mac_sl, CPSW_SL_MACSTATUS), tmo);
-
+ cpsw_sl_wait_for_idle(port->slave.mac_sl, 100);
cpsw_sl_ctl_reset(port->slave.mac_sl);
/* soft reset MAC */
cpsw_sl_reg_write(port->slave.mac_sl, CPSW_SL_SOFT_RESET, 1);
mdelay(1);
reg = cpsw_sl_reg_read(port->slave.mac_sl, CPSW_SL_SOFT_RESET);
- if (reg)
- dev_info(common->dev, "mac reset not yet done\n");
+ if (reg) {
+ dev_err(common->dev, "soft RESET didn't complete\n");
+ return -ETIMEDOUT;
+ }
/* Notify the stack of the actual queue counts. */
ret = netif_set_real_num_tx_queues(ndev, common->tx_ch_num);
@@ -1495,7 +1462,7 @@ static void am65_cpsw_nuss_mac_link_up(struct phylink_config *config, struct phy
if (speed == SPEED_1000)
mac_control |= CPSW_SL_CTL_GIG;
- if (speed == SPEED_10 && interface == PHY_INTERFACE_MODE_RGMII)
+ if (speed == SPEED_10 && phy_interface_mode_is_rgmii(interface))
/* Can be used with in band mode only */
mac_control |= CPSW_SL_CTL_EXT_EN;
if (speed == SPEED_100 && interface == PHY_INTERFACE_MODE_RMII)
@@ -1539,9 +1506,9 @@ static void am65_cpsw_nuss_slave_disable_unused(struct am65_cpsw_port *port)
cpsw_sl_ctl_reset(port->slave.mac_sl);
}
-static void am65_cpsw_nuss_free_tx_chns(struct am65_cpsw_common *common)
+static void am65_cpsw_nuss_free_tx_chns(void *data)
{
- struct device *dev = common->dev;
+ struct am65_cpsw_common *common = data;
int i;
for (i = 0; i < common->tx_ch_num; i++) {
@@ -1553,11 +1520,7 @@ static void am65_cpsw_nuss_free_tx_chns(struct am65_cpsw_common *common)
if (!IS_ERR_OR_NULL(tx_chn->tx_chn))
k3_udma_glue_release_tx_chn(tx_chn->tx_chn);
- /* Don't clear tx_chn memory as we need to preserve
- * data between suspend/resume
- */
- if (!(tx_chn->irq < 0))
- devm_free_irq(dev, tx_chn->irq, tx_chn);
+ memset(tx_chn, 0, sizeof(*tx_chn));
}
}
@@ -1566,10 +1529,12 @@ void am65_cpsw_nuss_remove_tx_chns(struct am65_cpsw_common *common)
struct device *dev = common->dev;
int i;
+ devm_remove_action(dev, am65_cpsw_nuss_free_tx_chns, common);
+
for (i = 0; i < common->tx_ch_num; i++) {
struct am65_cpsw_tx_chn *tx_chn = &common->tx_chns[i];
- if (!(tx_chn->irq < 0))
+ if (tx_chn->irq)
devm_free_irq(dev, tx_chn->irq, tx_chn);
netif_napi_del(&tx_chn->napi_tx);
@@ -1584,6 +1549,32 @@ void am65_cpsw_nuss_remove_tx_chns(struct am65_cpsw_common *common)
}
}
+static int am65_cpsw_nuss_ndev_add_tx_napi(struct am65_cpsw_common *common)
+{
+ struct device *dev = common->dev;
+ int i, ret = 0;
+
+ for (i = 0; i < common->tx_ch_num; i++) {
+ struct am65_cpsw_tx_chn *tx_chn = &common->tx_chns[i];
+
+ netif_napi_add_tx(common->dma_ndev, &tx_chn->napi_tx,
+ am65_cpsw_nuss_tx_poll);
+
+ ret = devm_request_irq(dev, tx_chn->irq,
+ am65_cpsw_nuss_tx_irq,
+ IRQF_TRIGGER_HIGH,
+ tx_chn->tx_chn_name, tx_chn);
+ if (ret) {
+ dev_err(dev, "failure requesting tx%u irq %u, %d\n",
+ tx_chn->id, tx_chn->irq, ret);
+ goto err;
+ }
+ }
+
+err:
+ return ret;
+}
+
static int am65_cpsw_nuss_init_tx_chns(struct am65_cpsw_common *common)
{
u32 max_desc_num = ALIGN(AM65_CPSW_MAX_TX_DESC, MAX_SKB_FRAGS);
@@ -1639,7 +1630,7 @@ static int am65_cpsw_nuss_init_tx_chns(struct am65_cpsw_common *common)
}
tx_chn->irq = k3_udma_glue_tx_get_irq(tx_chn->tx_chn);
- if (tx_chn->irq < 0) {
+ if (tx_chn->irq <= 0) {
dev_err(dev, "Failed to get tx dma irq %d\n",
tx_chn->irq);
goto err;
@@ -1648,41 +1639,59 @@ static int am65_cpsw_nuss_init_tx_chns(struct am65_cpsw_common *common)
snprintf(tx_chn->tx_chn_name,
sizeof(tx_chn->tx_chn_name), "%s-tx%d",
dev_name(dev), tx_chn->id);
-
- ret = devm_request_irq(dev, tx_chn->irq,
- am65_cpsw_nuss_tx_irq,
- IRQF_TRIGGER_HIGH,
- tx_chn->tx_chn_name, tx_chn);
- if (ret) {
- dev_err(dev, "failure requesting tx%u irq %u, %d\n",
- tx_chn->id, tx_chn->irq, ret);
- tx_chn->irq = -EINVAL;
- goto err;
- }
}
- return 0;
+ ret = am65_cpsw_nuss_ndev_add_tx_napi(common);
+ if (ret) {
+ dev_err(dev, "Failed to add tx NAPI %d\n", ret);
+ goto err;
+ }
err:
- am65_cpsw_nuss_free_tx_chns(common);
+ i = devm_add_action(dev, am65_cpsw_nuss_free_tx_chns, common);
+ if (i) {
+ dev_err(dev, "Failed to add free_tx_chns action %d\n", i);
+ return i;
+ }
return ret;
}
-static void am65_cpsw_nuss_free_rx_chns(struct am65_cpsw_common *common)
+static void am65_cpsw_nuss_free_rx_chns(void *data)
{
+ struct am65_cpsw_common *common = data;
struct am65_cpsw_rx_chn *rx_chn;
rx_chn = &common->rx_chns;
+ if (!IS_ERR_OR_NULL(rx_chn->desc_pool))
+ k3_cppi_desc_pool_destroy(rx_chn->desc_pool);
+
+ if (!IS_ERR_OR_NULL(rx_chn->rx_chn))
+ k3_udma_glue_release_rx_chn(rx_chn->rx_chn);
+}
+
+static void am65_cpsw_nuss_remove_rx_chns(void *data)
+{
+ struct am65_cpsw_common *common = data;
+ struct am65_cpsw_rx_chn *rx_chn;
+ struct device *dev = common->dev;
+
+ rx_chn = &common->rx_chns;
+ devm_remove_action(dev, am65_cpsw_nuss_free_rx_chns, common);
+
if (!(rx_chn->irq < 0))
- devm_free_irq(common->dev, rx_chn->irq, common);
+ devm_free_irq(dev, rx_chn->irq, common);
+
+ netif_napi_del(&common->napi_rx);
if (!IS_ERR_OR_NULL(rx_chn->desc_pool))
k3_cppi_desc_pool_destroy(rx_chn->desc_pool);
if (!IS_ERR_OR_NULL(rx_chn->rx_chn))
k3_udma_glue_release_rx_chn(rx_chn->rx_chn);
+
+ common->rx_flow_id_base = -1;
}
static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
@@ -1700,7 +1709,7 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
rx_cfg.swdata_size = AM65_CPSW_NAV_SW_DATA_SIZE;
rx_cfg.flow_id_num = AM65_CPSW_MAX_RX_FLOWS;
- rx_cfg.flow_id_base = -1;
+ rx_cfg.flow_id_base = common->rx_flow_id_base;
/* init all flows */
rx_chn->dev = dev;
@@ -1772,20 +1781,24 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
}
}
+ netif_napi_add(common->dma_ndev, &common->napi_rx,
+ am65_cpsw_nuss_rx_poll);
+
ret = devm_request_irq(dev, rx_chn->irq,
am65_cpsw_nuss_rx_irq,
IRQF_TRIGGER_HIGH, dev_name(dev), common);
if (ret) {
dev_err(dev, "failure requesting rx irq %u, %d\n",
rx_chn->irq, ret);
- rx_chn->irq = -EINVAL;
goto err;
}
- return 0;
-
err:
- am65_cpsw_nuss_free_rx_chns(common);
+ i = devm_add_action(dev, am65_cpsw_nuss_free_rx_chns, common);
+ if (i) {
+ dev_err(dev, "Failed to add free_rx_chns action %d\n", i);
+ return i;
+ }
return ret;
}
@@ -2105,26 +2118,9 @@ static int am65_cpsw_nuss_init_ndevs(struct am65_cpsw_common *common)
return ret;
}
- netif_napi_add(common->dma_ndev, &common->napi_rx,
- am65_cpsw_nuss_rx_poll);
-
return ret;
}
-static int am65_cpsw_nuss_ndev_add_tx_napi(struct am65_cpsw_common *common)
-{
- int i;
-
- for (i = 0; i < common->tx_ch_num; i++) {
- struct am65_cpsw_tx_chn *tx_chn = &common->tx_chns[i];
-
- netif_napi_add_tx(common->dma_ndev, &tx_chn->napi_tx,
- am65_cpsw_nuss_tx_poll);
- }
-
- return 0;
-}
-
static void am65_cpsw_nuss_cleanup_ndev(struct am65_cpsw_common *common)
{
struct am65_cpsw_port *port;
@@ -2587,7 +2583,11 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
struct am65_cpsw_port *port;
int ret = 0, i;
- ret = am65_cpsw_nuss_ndev_add_tx_napi(common);
+ /* init tx channels */
+ ret = am65_cpsw_nuss_init_tx_chns(common);
+ if (ret)
+ return ret;
+ ret = am65_cpsw_nuss_init_rx_chns(common);
if (ret)
return ret;
@@ -2634,10 +2634,8 @@ int am65_cpsw_nuss_update_tx_chns(struct am65_cpsw_common *common, int num_tx)
common->tx_ch_num = num_tx;
ret = am65_cpsw_nuss_init_tx_chns(common);
- if (ret)
- return ret;
- return am65_cpsw_nuss_ndev_add_tx_napi(common);
+ return ret;
}
struct am65_cpsw_soc_pdata {
@@ -2745,6 +2743,7 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev)
if (common->port_num < 1 || common->port_num > AM65_CPSW_MAX_PORTS)
return -ENOENT;
+ common->rx_flow_id_base = -1;
init_completion(&common->tdown_complete);
common->tx_ch_num = 1;
common->pf_p0_rx_ptype_rrobin = false;
@@ -2878,10 +2877,10 @@ static int am65_cpsw_nuss_remove(struct platform_device *pdev)
static int am65_cpsw_nuss_suspend(struct device *dev)
{
struct am65_cpsw_common *common = dev_get_drvdata(dev);
+ struct am65_cpsw_host *host_p = am65_common_get_host(common);
struct am65_cpsw_port *port;
struct net_device *ndev;
int i, ret;
- struct am65_cpsw_host *host_p = am65_common_get_host(common);
cpsw_ale_dump(common->ale, common->ale_context);
host_p->vid_context = readl(host_p->port_base + AM65_CPSW_PORT_VLAN_REG_OFFSET);
@@ -2907,6 +2906,9 @@ static int am65_cpsw_nuss_suspend(struct device *dev)
am65_cpts_suspend(common->cpts);
+ am65_cpsw_nuss_remove_rx_chns(common);
+ am65_cpsw_nuss_remove_tx_chns(common);
+
return 0;
}
@@ -2918,6 +2920,17 @@ static int am65_cpsw_nuss_resume(struct device *dev)
int i, ret;
struct am65_cpsw_host *host_p = am65_common_get_host(common);
+ ret = am65_cpsw_nuss_init_tx_chns(common);
+ if (ret)
+ return ret;
+ ret = am65_cpsw_nuss_init_rx_chns(common);
+ if (ret)
+ return ret;
+
+ /* If RX IRQ was disabled before suspend, keep it disabled */
+ if (common->rx_irq_disabled)
+ disable_irq(common->rx_chns.irq);
+
am65_cpts_resume(common->cpts);
for (i = 0; i < common->port_num; i++) {
diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c
index 450b16ad40a4..e1a569b99e4a 100644
--- a/drivers/net/ieee802154/ca8210.c
+++ b/drivers/net/ieee802154/ca8210.c
@@ -885,7 +885,7 @@ static int ca8210_spi_transfer(
dev_dbg(&spi->dev, "%s called\n", __func__);
- cas_ctl = kmalloc(sizeof(*cas_ctl), GFP_ATOMIC);
+ cas_ctl = kzalloc(sizeof(*cas_ctl), GFP_ATOMIC);
if (!cas_ctl)
return -ENOMEM;
diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c
index c69b87d3837d..edc769daad07 100644
--- a/drivers/net/ieee802154/cc2520.c
+++ b/drivers/net/ieee802154/cc2520.c
@@ -970,7 +970,7 @@ static int cc2520_hw_init(struct cc2520_private *priv)
if (timeout-- <= 0) {
dev_err(&priv->spi->dev, "oscillator start failed!\n");
- return ret;
+ return -ETIMEDOUT;
}
udelay(1);
} while (!(status & CC2520_STATUS_XOSC32M_STABLE));
diff --git a/drivers/net/ipa/ipa_sysfs.c b/drivers/net/ipa/ipa_sysfs.c
index 5cbc15a971f9..14bd2f903045 100644
--- a/drivers/net/ipa/ipa_sysfs.c
+++ b/drivers/net/ipa/ipa_sysfs.c
@@ -46,7 +46,7 @@ version_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct ipa *ipa = dev_get_drvdata(dev);
- return scnprintf(buf, PAGE_SIZE, "%s\n", ipa_version_string(ipa));
+ return sysfs_emit(buf, "%s\n", ipa_version_string(ipa));
}
static DEVICE_ATTR_RO(version);
@@ -70,7 +70,7 @@ static ssize_t rx_offload_show(struct device *dev,
{
struct ipa *ipa = dev_get_drvdata(dev);
- return scnprintf(buf, PAGE_SIZE, "%s\n", ipa_offload_string(ipa));
+ return sysfs_emit(buf, "%s\n", ipa_offload_string(ipa));
}
static DEVICE_ATTR_RO(rx_offload);
@@ -80,7 +80,7 @@ static ssize_t tx_offload_show(struct device *dev,
{
struct ipa *ipa = dev_get_drvdata(dev);
- return scnprintf(buf, PAGE_SIZE, "%s\n", ipa_offload_string(ipa));
+ return sysfs_emit(buf, "%s\n", ipa_offload_string(ipa));
}
static DEVICE_ATTR_RO(tx_offload);
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index d73b9d535b7a..937f5b1f04ff 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -3698,6 +3698,7 @@ static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = {
[IFLA_MACSEC_SCB] = { .type = NLA_U8 },
[IFLA_MACSEC_REPLAY_PROTECT] = { .type = NLA_U8 },
[IFLA_MACSEC_VALIDATION] = { .type = NLA_U8 },
+ [IFLA_MACSEC_OFFLOAD] = { .type = NLA_U8 },
};
static void macsec_free_netdev(struct net_device *dev)
diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c
index eb344f6d4a7b..b782c35c4ac1 100644
--- a/drivers/net/mdio/fwnode_mdio.c
+++ b/drivers/net/mdio/fwnode_mdio.c
@@ -98,6 +98,7 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio,
*/
rc = phy_device_register(phy);
if (rc) {
+ device_set_node(&phy->mdio.dev, NULL);
fwnode_handle_put(child);
return rc;
}
@@ -153,7 +154,8 @@ int fwnode_mdiobus_register_phy(struct mii_bus *bus,
/* All data is now stored in the phy struct, so register it */
rc = phy_device_register(phy);
if (rc) {
- fwnode_handle_put(phy->mdio.dev.fwnode);
+ phy->mdio.dev.fwnode = NULL;
+ fwnode_handle_put(child);
goto clean_phy;
}
} else if (is_of_node(child)) {
diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c
index 796e9c7857d0..510822d6d0d9 100644
--- a/drivers/net/mdio/of_mdio.c
+++ b/drivers/net/mdio/of_mdio.c
@@ -68,8 +68,9 @@ static int of_mdiobus_register_device(struct mii_bus *mdio,
/* All data is now stored in the mdiodev struct; register it. */
rc = mdio_device_register(mdiodev);
if (rc) {
+ device_set_node(&mdiodev->dev, NULL);
+ fwnode_handle_put(fwnode);
mdio_device_free(mdiodev);
- of_node_put(child);
return rc;
}
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index af00cf44cd97..1327290decab 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -47,7 +47,6 @@ config LED_TRIGGER_PHY
config FIXED_PHY
tristate "MDIO Bus/PHY emulation with fixed speed/link PHYs"
- depends on PHYLIB
select SWPHY
help
Adds the platform "fixed" MDIO Bus to cover the boards that use
@@ -112,7 +111,6 @@ config BROADCOM_PHY
config BCM54140_PHY
tristate "Broadcom BCM54140 PHY"
- depends on PHYLIB
depends on HWMON || HWMON=n
select BCM_NET_PHYLIB
help
@@ -137,7 +135,6 @@ config BCM7XXX_PHY
config BCM84881_PHY
tristate "Broadcom BCM84881 PHY"
- depends on PHYLIB
help
Support the Broadcom BCM84881 PHY.
diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c
index 250742ffdfd9..044828d081d2 100644
--- a/drivers/net/phy/mdio_device.c
+++ b/drivers/net/phy/mdio_device.c
@@ -21,6 +21,7 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/unistd.h>
+#include <linux/property.h>
void mdio_device_free(struct mdio_device *mdiodev)
{
@@ -30,6 +31,7 @@ EXPORT_SYMBOL(mdio_device_free);
static void mdio_device_release(struct device *dev)
{
+ fwnode_handle_put(dev->fwnode);
kfree(to_mdio_device(dev));
}
diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c
index 27c0f161623e..147d7a5a9b35 100644
--- a/drivers/net/phy/mxl-gpy.c
+++ b/drivers/net/phy/mxl-gpy.c
@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <linux/bitfield.h>
#include <linux/hwmon.h>
+#include <linux/mutex.h>
#include <linux/phy.h>
#include <linux/polynomial.h>
#include <linux/netdevice.h>
@@ -78,8 +79,16 @@
VSPEC1_SGMII_CTRL_ANRS)
/* Temperature sensor */
-#define VPSPEC1_TEMP_STA 0x0E
-#define VPSPEC1_TEMP_STA_DATA GENMASK(9, 0)
+#define VSPEC1_TEMP_STA 0x0E
+#define VSPEC1_TEMP_STA_DATA GENMASK(9, 0)
+
+/* Mailbox */
+#define VSPEC1_MBOX_DATA 0x5
+#define VSPEC1_MBOX_ADDRLO 0x6
+#define VSPEC1_MBOX_CMD 0x7
+#define VSPEC1_MBOX_CMD_ADDRHI GENMASK(7, 0)
+#define VSPEC1_MBOX_CMD_RD (0 << 8)
+#define VSPEC1_MBOX_CMD_READY BIT(15)
/* WoL */
#define VPSPEC2_WOL_CTL 0x0E06
@@ -88,7 +97,13 @@
#define VPSPEC2_WOL_AD45 0x0E0A
#define WOL_EN BIT(0)
+/* Internal registers, access via mbox */
+#define REG_GPIO0_OUT 0xd3ce00
+
struct gpy_priv {
+ /* serialize mailbox acesses */
+ struct mutex mbox_lock;
+
u8 fw_major;
u8 fw_minor;
};
@@ -140,14 +155,14 @@ static int gpy_hwmon_read(struct device *dev,
struct phy_device *phydev = dev_get_drvdata(dev);
int ret;
- ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VPSPEC1_TEMP_STA);
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_TEMP_STA);
if (ret < 0)
return ret;
if (!ret)
return -ENODATA;
*value = polynomial_calc(&poly_N_to_temp,
- FIELD_GET(VPSPEC1_TEMP_STA_DATA, ret));
+ FIELD_GET(VSPEC1_TEMP_STA_DATA, ret));
return 0;
}
@@ -198,6 +213,45 @@ static int gpy_hwmon_register(struct phy_device *phydev)
}
#endif
+static int gpy_mbox_read(struct phy_device *phydev, u32 addr)
+{
+ struct gpy_priv *priv = phydev->priv;
+ int val, ret;
+ u16 cmd;
+
+ mutex_lock(&priv->mbox_lock);
+
+ ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_MBOX_ADDRLO,
+ addr);
+ if (ret)
+ goto out;
+
+ cmd = VSPEC1_MBOX_CMD_RD;
+ cmd |= FIELD_PREP(VSPEC1_MBOX_CMD_ADDRHI, addr >> 16);
+
+ ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_MBOX_CMD, cmd);
+ if (ret)
+ goto out;
+
+ /* The mbox read is used in the interrupt workaround. It was observed
+ * that a read might take up to 2.5ms. This is also the time for which
+ * the interrupt line is stuck low. To be on the safe side, poll the
+ * ready bit for 10ms.
+ */
+ ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1,
+ VSPEC1_MBOX_CMD, val,
+ (val & VSPEC1_MBOX_CMD_READY),
+ 500, 10000, false);
+ if (ret)
+ goto out;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_MBOX_DATA);
+
+out:
+ mutex_unlock(&priv->mbox_lock);
+ return ret;
+}
+
static int gpy_config_init(struct phy_device *phydev)
{
int ret;
@@ -212,6 +266,13 @@ static int gpy_config_init(struct phy_device *phydev)
return ret < 0 ? ret : 0;
}
+static bool gpy_has_broken_mdint(struct phy_device *phydev)
+{
+ /* At least these PHYs are known to have broken interrupt handling */
+ return phydev->drv->phy_id == PHY_ID_GPY215B ||
+ phydev->drv->phy_id == PHY_ID_GPY215C;
+}
+
static int gpy_probe(struct phy_device *phydev)
{
struct device *dev = &phydev->mdio.dev;
@@ -229,6 +290,7 @@ static int gpy_probe(struct phy_device *phydev)
if (!priv)
return -ENOMEM;
phydev->priv = priv;
+ mutex_init(&priv->mbox_lock);
fw_version = phy_read(phydev, PHY_FWV);
if (fw_version < 0)
@@ -574,6 +636,29 @@ static irqreturn_t gpy_handle_interrupt(struct phy_device *phydev)
if (!(reg & PHY_IMASK_MASK))
return IRQ_NONE;
+ /* The PHY might leave the interrupt line asserted even after PHY_ISTAT
+ * is read. To avoid interrupt storms, delay the interrupt handling as
+ * long as the PHY drives the interrupt line. An internal bus read will
+ * stall as long as the interrupt line is asserted, thus just read a
+ * random register here.
+ * Because we cannot access the internal bus at all while the interrupt
+ * is driven by the PHY, there is no way to make the interrupt line
+ * unstuck (e.g. by changing the pinmux to GPIO input) during that time
+ * frame. Therefore, polling is the best we can do and won't do any more
+ * harm.
+ * It was observed that this bug happens on link state and link speed
+ * changes on a GPY215B and GYP215C independent of the firmware version
+ * (which doesn't mean that this list is exhaustive).
+ */
+ if (gpy_has_broken_mdint(phydev) &&
+ (reg & (PHY_IMASK_LSTC | PHY_IMASK_LSPC))) {
+ reg = gpy_mbox_read(phydev, REG_GPIO0_OUT);
+ if (reg < 0) {
+ phy_error(phydev);
+ return IRQ_NONE;
+ }
+ }
+
phy_trigger_machine(phydev);
return IRQ_HANDLED;
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 39fd1811375c..83b99d95b278 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -2642,10 +2642,46 @@ static void sfp_cleanup(void *data)
kfree(sfp);
}
+static int sfp_i2c_get(struct sfp *sfp)
+{
+ struct acpi_handle *acpi_handle;
+ struct fwnode_handle *h;
+ struct i2c_adapter *i2c;
+ struct device_node *np;
+ int err;
+
+ h = fwnode_find_reference(dev_fwnode(sfp->dev), "i2c-bus", 0);
+ if (IS_ERR(h)) {
+ dev_err(sfp->dev, "missing 'i2c-bus' property\n");
+ return -ENODEV;
+ }
+
+ if (is_acpi_device_node(h)) {
+ acpi_handle = ACPI_HANDLE_FWNODE(h);
+ i2c = i2c_acpi_find_adapter_by_handle(acpi_handle);
+ } else if ((np = to_of_node(h)) != NULL) {
+ i2c = of_find_i2c_adapter_by_node(np);
+ } else {
+ err = -EINVAL;
+ goto put;
+ }
+
+ if (!i2c) {
+ err = -EPROBE_DEFER;
+ goto put;
+ }
+
+ err = sfp_i2c_configure(sfp, i2c);
+ if (err)
+ i2c_put_adapter(i2c);
+put:
+ fwnode_handle_put(h);
+ return err;
+}
+
static int sfp_probe(struct platform_device *pdev)
{
const struct sff_data *sff;
- struct i2c_adapter *i2c;
char *sfp_irq_name;
struct sfp *sfp;
int err, i;
@@ -2663,51 +2699,20 @@ static int sfp_probe(struct platform_device *pdev)
sff = sfp->type = &sfp_data;
if (pdev->dev.of_node) {
- struct device_node *node = pdev->dev.of_node;
const struct of_device_id *id;
- struct device_node *np;
- id = of_match_node(sfp_of_match, node);
+ id = of_match_node(sfp_of_match, pdev->dev.of_node);
if (WARN_ON(!id))
return -EINVAL;
sff = sfp->type = id->data;
-
- np = of_parse_phandle(node, "i2c-bus", 0);
- if (!np) {
- dev_err(sfp->dev, "missing 'i2c-bus' property\n");
- return -ENODEV;
- }
-
- i2c = of_find_i2c_adapter_by_node(np);
- of_node_put(np);
- } else if (has_acpi_companion(&pdev->dev)) {
- struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
- struct fwnode_handle *fw = acpi_fwnode_handle(adev);
- struct fwnode_reference_args args;
- struct acpi_handle *acpi_handle;
- int ret;
-
- ret = acpi_node_get_property_reference(fw, "i2c-bus", 0, &args);
- if (ret || !is_acpi_device_node(args.fwnode)) {
- dev_err(&pdev->dev, "missing 'i2c-bus' property\n");
- return -ENODEV;
- }
-
- acpi_handle = ACPI_HANDLE_FWNODE(args.fwnode);
- i2c = i2c_acpi_find_adapter_by_handle(acpi_handle);
- } else {
+ } else if (!has_acpi_companion(&pdev->dev)) {
return -EINVAL;
}
- if (!i2c)
- return -EPROBE_DEFER;
-
- err = sfp_i2c_configure(sfp, i2c);
- if (err < 0) {
- i2c_put_adapter(i2c);
+ err = sfp_i2c_get(sfp);
+ if (err)
return err;
- }
for (i = 0; i < GPIO_MAX; i++)
if (sff->gpios & BIT(i)) {
diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c
index c8791e9b451d..40ce8abe6999 100644
--- a/drivers/net/plip/plip.c
+++ b/drivers/net/plip/plip.c
@@ -450,12 +450,12 @@ plip_bh_timeout_error(struct net_device *dev, struct net_local *nl,
}
rcv->state = PLIP_PK_DONE;
if (rcv->skb) {
- kfree_skb(rcv->skb);
+ dev_kfree_skb_irq(rcv->skb);
rcv->skb = NULL;
}
snd->state = PLIP_PK_DONE;
if (snd->skb) {
- dev_kfree_skb(snd->skb);
+ dev_consume_skb_irq(snd->skb);
snd->skb = NULL;
}
spin_unlock_irq(&nl->lock);
diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c
index 4ed7f5b547e3..990484776f2d 100644
--- a/drivers/net/thunderbolt.c
+++ b/drivers/net/thunderbolt.c
@@ -914,6 +914,7 @@ static int tbnet_open(struct net_device *dev)
eof_mask, tbnet_start_poll, net);
if (!ring) {
netdev_err(dev, "failed to allocate Rx ring\n");
+ tb_xdomain_release_out_hopid(xd, hopid);
tb_ring_free(net->tx_ring.ring);
net->tx_ring.ring = NULL;
return -ENOMEM;
diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 0fe3773c5bca..743cbf5d662c 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -1350,6 +1350,20 @@ static const struct driver_info ax88772b_info = {
.data = FLAG_EEPROM_MAC,
};
+static const struct driver_info lxausb_t1l_info = {
+ .description = "Linux Automation GmbH USB 10Base-T1L",
+ .bind = ax88772_bind,
+ .unbind = ax88772_unbind,
+ .status = asix_status,
+ .reset = ax88772_reset,
+ .stop = ax88772_stop,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR |
+ FLAG_MULTI_PACKET,
+ .rx_fixup = asix_rx_fixup_common,
+ .tx_fixup = asix_tx_fixup,
+ .data = FLAG_EEPROM_MAC,
+};
+
static const struct driver_info ax88178_info = {
.description = "ASIX AX88178 USB 2.0 Ethernet",
.bind = ax88178_bind,
@@ -1538,6 +1552,10 @@ static const struct usb_device_id products [] = {
*/
USB_DEVICE(0x066b, 0x20f9),
.driver_info = (unsigned long) &hg20f9_info,
+}, {
+ // Linux Automation GmbH USB 10Base-T1L
+ USB_DEVICE(0x33f7, 0x0004),
+ .driver_info = (unsigned long) &lxausb_t1l_info,
},
{ }, // END
};
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index d3e7b27eb933..6f1e560fb15c 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -75,8 +75,14 @@ vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter)
for (i = 0; i < adapter->intr.num_intrs; i++)
vmxnet3_enable_intr(adapter, i);
- adapter->shared->devRead.intrConf.intrCtrl &=
+ if (!VMXNET3_VERSION_GE_6(adapter) ||
+ !adapter->queuesExtEnabled) {
+ adapter->shared->devRead.intrConf.intrCtrl &=
+ cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
+ } else {
+ adapter->shared->devReadExt.intrConfExt.intrCtrl &=
cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
+ }
}
@@ -85,8 +91,14 @@ vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter)
{
int i;
- adapter->shared->devRead.intrConf.intrCtrl |=
+ if (!VMXNET3_VERSION_GE_6(adapter) ||
+ !adapter->queuesExtEnabled) {
+ adapter->shared->devRead.intrConf.intrCtrl |=
+ cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
+ } else {
+ adapter->shared->devReadExt.intrConfExt.intrCtrl |=
cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
+ }
for (i = 0; i < adapter->intr.num_intrs; i++)
vmxnet3_disable_intr(adapter, i);
}
@@ -1396,6 +1408,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
};
u32 num_pkts = 0;
bool skip_page_frags = false;
+ bool encap_lro = false;
struct Vmxnet3_RxCompDesc *rcd;
struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
u16 segCnt = 0, mss = 0;
@@ -1556,13 +1569,18 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
if (VMXNET3_VERSION_GE_2(adapter) &&
rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
struct Vmxnet3_RxCompDescExt *rcdlro;
+ union Vmxnet3_GenericDesc *gdesc;
+
rcdlro = (struct Vmxnet3_RxCompDescExt *)rcd;
+ gdesc = (union Vmxnet3_GenericDesc *)rcd;
segCnt = rcdlro->segCnt;
WARN_ON_ONCE(segCnt == 0);
mss = rcdlro->mss;
if (unlikely(segCnt <= 1))
segCnt = 0;
+ encap_lro = (le32_to_cpu(gdesc->dword[0]) &
+ (1UL << VMXNET3_RCD_HDR_INNER_SHIFT));
} else {
segCnt = 0;
}
@@ -1630,7 +1648,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
vmxnet3_rx_csum(adapter, skb,
(union Vmxnet3_GenericDesc *)rcd);
skb->protocol = eth_type_trans(skb, adapter->netdev);
- if (!rcd->tcp ||
+ if ((!rcd->tcp && !encap_lro) ||
!(adapter->netdev->features & NETIF_F_LRO))
goto not_lro;
@@ -1639,7 +1657,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
SKB_GSO_TCPV4 : SKB_GSO_TCPV6;
skb_shinfo(skb)->gso_size = mss;
skb_shinfo(skb)->gso_segs = segCnt;
- } else if (segCnt != 0 || skb->len > mtu) {
+ } else if ((segCnt != 0 || skb->len > mtu) && !encap_lro) {
u32 hlen;
hlen = vmxnet3_get_hdr_len(adapter, skb,
@@ -1668,6 +1686,7 @@ not_lro:
napi_gro_receive(&rq->napi, skb);
ctx->skb = NULL;
+ encap_lro = false;
num_pkts++;
}
diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 3f8c0845fcca..f795548562f5 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -648,7 +648,7 @@ mt76_dma_wed_setup(struct mt76_dev *dev, struct mt76_queue *q)
q->wed_regs = wed->txfree_ring.reg_base;
break;
case MT76_WED_Q_RX:
- ret = mtk_wed_device_rx_ring_setup(wed, ring, q->regs);
+ ret = mtk_wed_device_rx_ring_setup(wed, ring, q->regs, false);
if (!ret)
q->wed_regs = wed->rx_ring[ring].reg_base;
break;
diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux.c b/drivers/net/wwan/iosm/iosm_ipc_mux.c
index 9c7a9a2a1f25..fc928b298a98 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_mux.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_mux.c
@@ -332,6 +332,7 @@ struct iosm_mux *ipc_mux_init(struct ipc_mux_config *mux_cfg,
if (!ipc_mux->ul_adb.pp_qlt[i]) {
for (j = i - 1; j >= 0; j--)
kfree(ipc_mux->ul_adb.pp_qlt[j]);
+ kfree(ipc_mux);
return NULL;
}
}
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 1545cbee77a4..3dbfc8a6924e 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -386,7 +386,7 @@ int xenvif_dealloc_kthread(void *data);
irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data);
bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread);
-void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
+bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
void xenvif_carrier_on(struct xenvif *vif);
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 650fa180220f..f3f2c07423a6 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -254,14 +254,16 @@ xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE)
skb_clear_hash(skb);
- xenvif_rx_queue_tail(queue, skb);
+ if (!xenvif_rx_queue_tail(queue, skb))
+ goto drop;
+
xenvif_kick_thread(queue);
return NETDEV_TX_OK;
drop:
vif->dev->stats.tx_dropped++;
- dev_kfree_skb(skb);
+ dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 3d2081bbbc86..bf627af723bf 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -332,10 +332,13 @@ static int xenvif_count_requests(struct xenvif_queue *queue,
struct xenvif_tx_cb {
- u16 pending_idx;
+ u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + 1];
+ u8 copy_count;
};
#define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
+#define copy_pending_idx(skb, i) (XENVIF_TX_CB(skb)->copy_pending_idx[i])
+#define copy_count(skb) (XENVIF_TX_CB(skb)->copy_count)
static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue,
u16 pending_idx,
@@ -370,31 +373,93 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size)
return skb;
}
-static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue,
- struct sk_buff *skb,
- struct xen_netif_tx_request *txp,
- struct gnttab_map_grant_ref *gop,
- unsigned int frag_overflow,
- struct sk_buff *nskb)
+static void xenvif_get_requests(struct xenvif_queue *queue,
+ struct sk_buff *skb,
+ struct xen_netif_tx_request *first,
+ struct xen_netif_tx_request *txfrags,
+ unsigned *copy_ops,
+ unsigned *map_ops,
+ unsigned int frag_overflow,
+ struct sk_buff *nskb,
+ unsigned int extra_count,
+ unsigned int data_len)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
skb_frag_t *frags = shinfo->frags;
- u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
- int start;
+ u16 pending_idx;
pending_ring_idx_t index;
unsigned int nr_slots;
+ struct gnttab_copy *cop = queue->tx_copy_ops + *copy_ops;
+ struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops;
+ struct xen_netif_tx_request *txp = first;
+
+ nr_slots = shinfo->nr_frags + 1;
+
+ copy_count(skb) = 0;
+
+ /* Create copy ops for exactly data_len bytes into the skb head. */
+ __skb_put(skb, data_len);
+ while (data_len > 0) {
+ int amount = data_len > txp->size ? txp->size : data_len;
+
+ cop->source.u.ref = txp->gref;
+ cop->source.domid = queue->vif->domid;
+ cop->source.offset = txp->offset;
+
+ cop->dest.domid = DOMID_SELF;
+ cop->dest.offset = (offset_in_page(skb->data +
+ skb_headlen(skb) -
+ data_len)) & ~XEN_PAGE_MASK;
+ cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb)
+ - data_len);
+
+ cop->len = amount;
+ cop->flags = GNTCOPY_source_gref;
- nr_slots = shinfo->nr_frags;
+ index = pending_index(queue->pending_cons);
+ pending_idx = queue->pending_ring[index];
+ callback_param(queue, pending_idx).ctx = NULL;
+ copy_pending_idx(skb, copy_count(skb)) = pending_idx;
+ copy_count(skb)++;
+
+ cop++;
+ data_len -= amount;
- /* Skip first skb fragment if it is on same page as header fragment. */
- start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
+ if (amount == txp->size) {
+ /* The copy op covered the full tx_request */
+
+ memcpy(&queue->pending_tx_info[pending_idx].req,
+ txp, sizeof(*txp));
+ queue->pending_tx_info[pending_idx].extra_count =
+ (txp == first) ? extra_count : 0;
+
+ if (txp == first)
+ txp = txfrags;
+ else
+ txp++;
+ queue->pending_cons++;
+ nr_slots--;
+ } else {
+ /* The copy op partially covered the tx_request.
+ * The remainder will be mapped.
+ */
+ txp->offset += amount;
+ txp->size -= amount;
+ }
+ }
- for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots;
- shinfo->nr_frags++, txp++, gop++) {
+ for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots;
+ shinfo->nr_frags++, gop++) {
index = pending_index(queue->pending_cons++);
pending_idx = queue->pending_ring[index];
- xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop);
+ xenvif_tx_create_map_op(queue, pending_idx, txp,
+ txp == first ? extra_count : 0, gop);
frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
+
+ if (txp == first)
+ txp = txfrags;
+ else
+ txp++;
}
if (frag_overflow) {
@@ -415,7 +480,8 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *que
skb_shinfo(skb)->frag_list = nskb;
}
- return gop;
+ (*copy_ops) = cop - queue->tx_copy_ops;
+ (*map_ops) = gop - queue->tx_map_ops;
}
static inline void xenvif_grant_handle_set(struct xenvif_queue *queue,
@@ -451,7 +517,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
struct gnttab_copy **gopp_copy)
{
struct gnttab_map_grant_ref *gop_map = *gopp_map;
- u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+ u16 pending_idx;
/* This always points to the shinfo of the skb being checked, which
* could be either the first or the one on the frag_list
*/
@@ -462,24 +528,37 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
struct skb_shared_info *first_shinfo = NULL;
int nr_frags = shinfo->nr_frags;
const bool sharedslot = nr_frags &&
- frag_get_pending_idx(&shinfo->frags[0]) == pending_idx;
- int i, err;
+ frag_get_pending_idx(&shinfo->frags[0]) ==
+ copy_pending_idx(skb, copy_count(skb) - 1);
+ int i, err = 0;
- /* Check status of header. */
- err = (*gopp_copy)->status;
- if (unlikely(err)) {
- if (net_ratelimit())
- netdev_dbg(queue->vif->dev,
- "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
- (*gopp_copy)->status,
- pending_idx,
- (*gopp_copy)->source.u.ref);
- /* The first frag might still have this slot mapped */
- if (!sharedslot)
- xenvif_idx_release(queue, pending_idx,
- XEN_NETIF_RSP_ERROR);
+ for (i = 0; i < copy_count(skb); i++) {
+ int newerr;
+
+ /* Check status of header. */
+ pending_idx = copy_pending_idx(skb, i);
+
+ newerr = (*gopp_copy)->status;
+ if (likely(!newerr)) {
+ /* The first frag might still have this slot mapped */
+ if (i < copy_count(skb) - 1 || !sharedslot)
+ xenvif_idx_release(queue, pending_idx,
+ XEN_NETIF_RSP_OKAY);
+ } else {
+ err = newerr;
+ if (net_ratelimit())
+ netdev_dbg(queue->vif->dev,
+ "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
+ (*gopp_copy)->status,
+ pending_idx,
+ (*gopp_copy)->source.u.ref);
+ /* The first frag might still have this slot mapped */
+ if (i < copy_count(skb) - 1 || !sharedslot)
+ xenvif_idx_release(queue, pending_idx,
+ XEN_NETIF_RSP_ERROR);
+ }
+ (*gopp_copy)++;
}
- (*gopp_copy)++;
check_frags:
for (i = 0; i < nr_frags; i++, gop_map++) {
@@ -526,14 +605,6 @@ check_frags:
if (err)
continue;
- /* First error: if the header haven't shared a slot with the
- * first frag, release it as well.
- */
- if (!sharedslot)
- xenvif_idx_release(queue,
- XENVIF_TX_CB(skb)->pending_idx,
- XEN_NETIF_RSP_OKAY);
-
/* Invalidate preceding fragments of this skb. */
for (j = 0; j < i; j++) {
pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
@@ -803,7 +874,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
unsigned *copy_ops,
unsigned *map_ops)
{
- struct gnttab_map_grant_ref *gop = queue->tx_map_ops;
struct sk_buff *skb, *nskb;
int ret;
unsigned int frag_overflow;
@@ -885,8 +955,12 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
continue;
}
+ data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN) ?
+ XEN_NETBACK_TX_COPY_LEN : txreq.size;
+
ret = xenvif_count_requests(queue, &txreq, extra_count,
txfrags, work_to_do);
+
if (unlikely(ret < 0))
break;
@@ -912,9 +986,8 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
index = pending_index(queue->pending_cons);
pending_idx = queue->pending_ring[index];
- data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN &&
- ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
- XEN_NETBACK_TX_COPY_LEN : txreq.size;
+ if (ret >= XEN_NETBK_LEGACY_SLOTS_MAX - 1 && data_len < txreq.size)
+ data_len = txreq.size;
skb = xenvif_alloc_skb(data_len);
if (unlikely(skb == NULL)) {
@@ -925,8 +998,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
}
skb_shinfo(skb)->nr_frags = ret;
- if (data_len < txreq.size)
- skb_shinfo(skb)->nr_frags++;
/* At this point shinfo->nr_frags is in fact the number of
* slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
*/
@@ -988,54 +1059,19 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
type);
}
- XENVIF_TX_CB(skb)->pending_idx = pending_idx;
-
- __skb_put(skb, data_len);
- queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref;
- queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid;
- queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset;
-
- queue->tx_copy_ops[*copy_ops].dest.u.gmfn =
- virt_to_gfn(skb->data);
- queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
- queue->tx_copy_ops[*copy_ops].dest.offset =
- offset_in_page(skb->data) & ~XEN_PAGE_MASK;
-
- queue->tx_copy_ops[*copy_ops].len = data_len;
- queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
-
- (*copy_ops)++;
-
- if (data_len < txreq.size) {
- frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
- pending_idx);
- xenvif_tx_create_map_op(queue, pending_idx, &txreq,
- extra_count, gop);
- gop++;
- } else {
- frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
- INVALID_PENDING_IDX);
- memcpy(&queue->pending_tx_info[pending_idx].req,
- &txreq, sizeof(txreq));
- queue->pending_tx_info[pending_idx].extra_count =
- extra_count;
- }
-
- queue->pending_cons++;
-
- gop = xenvif_get_requests(queue, skb, txfrags, gop,
- frag_overflow, nskb);
+ xenvif_get_requests(queue, skb, &txreq, txfrags, copy_ops,
+ map_ops, frag_overflow, nskb, extra_count,
+ data_len);
__skb_queue_tail(&queue->tx_queue, skb);
queue->tx.req_cons = idx;
- if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) ||
+ if ((*map_ops >= ARRAY_SIZE(queue->tx_map_ops)) ||
(*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops)))
break;
}
- (*map_ops) = gop - queue->tx_map_ops;
return;
}
@@ -1114,9 +1150,8 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) {
struct xen_netif_tx_request *txp;
u16 pending_idx;
- unsigned data_len;
- pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+ pending_idx = copy_pending_idx(skb, 0);
txp = &queue->pending_tx_info[pending_idx].req;
/* Check the remap error code. */
@@ -1135,18 +1170,6 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
continue;
}
- data_len = skb->len;
- callback_param(queue, pending_idx).ctx = NULL;
- if (data_len < txp->size) {
- /* Append the packet payload as a fragment. */
- txp->offset += data_len;
- txp->size -= data_len;
- } else {
- /* Schedule a response immediately. */
- xenvif_idx_release(queue, pending_idx,
- XEN_NETIF_RSP_OKAY);
- }
-
if (txp->flags & XEN_NETTXF_csum_blank)
skb->ip_summed = CHECKSUM_PARTIAL;
else if (txp->flags & XEN_NETTXF_data_validated)
@@ -1332,7 +1355,7 @@ static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
/* Called after netfront has transmitted */
int xenvif_tx_action(struct xenvif_queue *queue, int budget)
{
- unsigned nr_mops, nr_cops = 0;
+ unsigned nr_mops = 0, nr_cops = 0;
int work_done, ret;
if (unlikely(!tx_work_todo(queue)))
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index 932762177110..0ba754ebc5ba 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -82,9 +82,10 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
return false;
}
-void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
+bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
{
unsigned long flags;
+ bool ret = true;
spin_lock_irqsave(&queue->rx_queue.lock, flags);
@@ -92,8 +93,7 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
struct net_device *dev = queue->vif->dev;
netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
- kfree_skb(skb);
- queue->vif->dev->stats.rx_dropped++;
+ ret = false;
} else {
if (skb_queue_empty(&queue->rx_queue))
xenvif_update_needed_slots(queue, skb);
@@ -104,6 +104,8 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
}
spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
+
+ return ret;
}
static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index ef4e53bf5604..14aec417fa06 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1862,6 +1862,12 @@ static int netfront_resume(struct xenbus_device *dev)
netif_tx_unlock_bh(info->netdev);
xennet_disconnect_backend(info);
+
+ rtnl_lock();
+ if (info->queues)
+ xennet_destroy_queues(info);
+ rtnl_unlock();
+
return 0;
}
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index da55ce45ac70..69e333922bea 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4304,7 +4304,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
mutex_unlock(&ns->ctrl->subsys->lock);
/* guarantee not available in head->list */
- synchronize_rcu();
+ synchronize_srcu(&ns->head->srcu);
if (!nvme_ns_head_multipath(ns->head))
nvme_cdev_del(&ns->cdev, &ns->cdev_device);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 93e2138a8b42..7e025b8948cb 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -174,11 +174,14 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
struct nvme_ns_head *head = ns->head;
sector_t capacity = get_capacity(head->disk);
int node;
+ int srcu_idx;
+ srcu_idx = srcu_read_lock(&head->srcu);
list_for_each_entry_rcu(ns, &head->list, siblings) {
if (capacity != get_capacity(ns->disk))
clear_bit(NVME_NS_READY, &ns->flags);
}
+ srcu_read_unlock(&head->srcu, srcu_idx);
for_each_node(node)
rcu_assign_pointer(head->current_path[node], NULL);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index f4335519399d..488ad7dabeb8 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -797,6 +797,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev,
cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma);
if (bv->bv_len > first_prp_len)
cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len);
+ else
+ cmnd->dptr.prp2 = 0;
return BLK_STS_OK;
}
diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c
index 52ecd66ce357..047a8374b4fd 100644
--- a/drivers/pinctrl/intel/pinctrl-intel.c
+++ b/drivers/pinctrl/intel/pinctrl-intel.c
@@ -436,9 +436,14 @@ static void __intel_gpio_set_direction(void __iomem *padcfg0, bool input)
writel(value, padcfg0);
}
+static int __intel_gpio_get_gpio_mode(u32 value)
+{
+ return (value & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT;
+}
+
static int intel_gpio_get_gpio_mode(void __iomem *padcfg0)
{
- return (readl(padcfg0) & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT;
+ return __intel_gpio_get_gpio_mode(readl(padcfg0));
}
static void intel_gpio_set_gpio_mode(void __iomem *padcfg0)
@@ -1674,6 +1679,7 @@ EXPORT_SYMBOL_GPL(intel_pinctrl_get_soc_data);
static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int pin)
{
const struct pin_desc *pd = pin_desc_get(pctrl->pctldev, pin);
+ u32 value;
if (!pd || !intel_pad_usable(pctrl, pin))
return false;
@@ -1688,6 +1694,25 @@ static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int
gpiochip_line_is_irq(&pctrl->chip, intel_pin_to_gpio(pctrl, pin)))
return true;
+ /*
+ * The firmware on some systems may configure GPIO pins to be
+ * an interrupt source in so called "direct IRQ" mode. In such
+ * cases the GPIO controller driver has no idea if those pins
+ * are being used or not. At the same time, there is a known bug
+ * in the firmwares that don't restore the pin settings correctly
+ * after suspend, i.e. by an unknown reason the Rx value becomes
+ * inverted.
+ *
+ * Hence, let's save and restore the pins that are configured
+ * as GPIOs in the input mode with GPIROUTIOXAPIC bit set.
+ *
+ * See https://bugzilla.kernel.org/show_bug.cgi?id=214749.
+ */
+ value = readl(intel_get_padcfg(pctrl, pin, PADCFG0));
+ if ((value & PADCFG0_GPIROUTIOXAPIC) && (value & PADCFG0_GPIOTXDIS) &&
+ (__intel_gpio_get_gpio_mode(value) == PADCFG0_PMODE_GPIO))
+ return true;
+
return false;
}
diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c
index 65d312967619..27f0a54e12bf 100644
--- a/drivers/pinctrl/mediatek/mtk-eint.c
+++ b/drivers/pinctrl/mediatek/mtk-eint.c
@@ -303,12 +303,15 @@ static struct irq_chip mtk_eint_irq_chip = {
static unsigned int mtk_eint_hw_init(struct mtk_eint *eint)
{
- void __iomem *reg = eint->base + eint->regs->dom_en;
+ void __iomem *dom_en = eint->base + eint->regs->dom_en;
+ void __iomem *mask_set = eint->base + eint->regs->mask_set;
unsigned int i;
for (i = 0; i < eint->hw->ap_num; i += 32) {
- writel(0xffffffff, reg);
- reg += 4;
+ writel(0xffffffff, dom_en);
+ writel(0xffffffff, mask_set);
+ dom_en += 4;
+ mask_set += 4;
}
return 0;
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index 67bec7ea0f8b..414ee6bb8ac9 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -727,7 +727,7 @@ static int pcs_allocate_pin_table(struct pcs_device *pcs)
mux_bytes = pcs->width / BITS_PER_BYTE;
- if (pcs->bits_per_mux) {
+ if (pcs->bits_per_mux && pcs->fmask) {
pcs->bits_per_pin = fls(pcs->fmask);
nr_pins = (pcs->size * BITS_PER_BYTE) / pcs->bits_per_pin;
} else {
diff --git a/drivers/platform/x86/amd/pmc.c b/drivers/platform/x86/amd/pmc.c
index ef4ae977b8e0..439d282aafd1 100644
--- a/drivers/platform/x86/amd/pmc.c
+++ b/drivers/platform/x86/amd/pmc.c
@@ -739,8 +739,14 @@ static void amd_pmc_s2idle_prepare(void)
static void amd_pmc_s2idle_check(void)
{
struct amd_pmc_dev *pdev = &pmc;
+ struct smu_metrics table;
int rc;
+ /* CZN: Ensure that future s0i3 entry attempts at least 10ms passed */
+ if (pdev->cpu_id == AMD_CPU_ID_CZN && !get_metrics_table(pdev, &table) &&
+ table.s0i3_last_entry_status)
+ usleep_range(10000, 20000);
+
/* Dump the IdleMask before we add to the STB */
amd_pmc_idlemask_read(pdev, pdev->dev, NULL);
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 9dc935886e9f..c6ded3fdd715 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -758,7 +758,6 @@ static void qeth_l2_br2dev_worker(struct work_struct *work)
struct list_head *iter;
int err = 0;
- kfree(br2dev_event_work);
QETH_CARD_TEXT_(card, 4, "b2dw%04lx", event);
QETH_CARD_TEXT_(card, 4, "ma%012llx", ether_addr_to_u64(addr));
@@ -815,6 +814,7 @@ unlock:
dev_put(brdev);
dev_put(lsyncdev);
dev_put(dstdev);
+ kfree(br2dev_event_work);
}
static int qeth_l2_br2dev_queue_work(struct net_device *brdev,
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 4981baf97835..b5237206eac3 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -406,7 +406,7 @@ void afs_put_server(struct afs_net *net, struct afs_server *server,
if (!server)
return;
- a = atomic_inc_return(&server->active);
+ a = atomic_read(&server->active);
zero = __refcount_dec_and_test(&server->ref, &r);
trace_afs_server(debug_id, r - 1, a, reason);
if (unlikely(zero))
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 451d8a077e12..bce2492186d0 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -605,6 +605,14 @@ again:
set_bit(FSCACHE_COOKIE_DO_PREP_TO_WRITE, &cookie->flags);
queue = true;
}
+ /*
+ * We could race with cookie_lru which may set LRU_DISCARD bit
+ * but has yet to run the cookie state machine. If this happens
+ * and another thread tries to use the cookie, clear LRU_DISCARD
+ * so we don't end up withdrawing the cookie while in use.
+ */
+ if (test_and_clear_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags))
+ fscache_see_cookie(cookie, fscache_cookie_see_lru_discard_clear);
break;
case FSCACHE_COOKIE_STATE_FAILED:
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 3b55e239705f..9930fa901039 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -111,6 +111,13 @@ static void nilfs_dat_commit_free(struct inode *dat,
kunmap_atomic(kaddr);
nilfs_dat_commit_entry(dat, req);
+
+ if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) {
+ nilfs_error(dat->i_sb,
+ "state inconsistency probably due to duplicate use of vblocknr = %llu",
+ (unsigned long long)req->pr_entry_nr);
+ return;
+ }
nilfs_palloc_commit_free_entry(dat, req);
}
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 492dce43236e..cab7cfebf40b 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -222,12 +222,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
#define tlb_needs_table_invalidate() (true)
#endif
+void tlb_remove_table_sync_one(void);
+
#else
#ifdef tlb_needs_table_invalidate
#error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE
#endif
+static inline void tlb_remove_table_sync_one(void) { }
+
#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 528bd44b59e2..2b7d077de7ef 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -68,6 +68,7 @@ struct css_task_iter {
struct list_head iters_node; /* css_set->task_iters */
};
+extern struct file_system_type cgroup_fs_type;
extern struct cgroup_root cgrp_dfl_root;
extern struct css_set init_css_set;
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index ef4aea3b356e..65a78773dcca 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -210,6 +210,20 @@ alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct p
return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array);
}
+static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask)
+{
+ gfp_t warn_gfp = gfp_mask & (__GFP_THISNODE|__GFP_NOWARN);
+
+ if (warn_gfp != (__GFP_THISNODE|__GFP_NOWARN))
+ return;
+
+ if (node_online(this_node))
+ return;
+
+ pr_warn("%pGg allocation from offline node %d\n", &gfp_mask, this_node);
+ dump_stack();
+}
+
/*
* Allocate pages, preferring the node given as nid. The node must be valid and
* online. For more general interface, see alloc_pages_node().
@@ -218,7 +232,7 @@ static inline struct page *
__alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
{
VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
- VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid));
+ warn_if_node_offline(nid, gfp_mask);
return __alloc_pages(gfp_mask, order, nid, NULL);
}
@@ -227,7 +241,7 @@ static inline
struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid)
{
VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
- VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid));
+ warn_if_node_offline(nid, gfp);
return __folio_alloc(gfp, order, nid, NULL);
}
diff --git a/include/linux/license.h b/include/linux/license.h
index ad937f57f2cb..7cce390f120b 100644
--- a/include/linux/license.h
+++ b/include/linux/license.h
@@ -2,8 +2,6 @@
#ifndef __LICENSE_H
#define __LICENSE_H
-#include <linux/string.h>
-
static inline int license_is_gpl_compatible(const char *license)
{
return (strcmp(license, "GPL") == 0
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index c7a91981cd5a..ba6958b49a8e 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -50,6 +50,7 @@ enum mlx5_flow_destination_type {
MLX5_FLOW_DESTINATION_TYPE_PORT,
MLX5_FLOW_DESTINATION_TYPE_COUNTER,
MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM,
+ MLX5_FLOW_DESTINATION_TYPE_RANGE,
};
enum {
@@ -143,6 +144,10 @@ enum {
MLX5_FLOW_DEST_VPORT_REFORMAT_ID = BIT(1),
};
+enum mlx5_flow_dest_range_field {
+ MLX5_FLOW_DEST_RANGE_FIELD_PKT_LEN = 0,
+};
+
struct mlx5_flow_destination {
enum mlx5_flow_destination_type type;
union {
@@ -156,6 +161,13 @@ struct mlx5_flow_destination {
struct mlx5_pkt_reformat *pkt_reformat;
u8 flags;
} vport;
+ struct {
+ struct mlx5_flow_table *hit_ft;
+ struct mlx5_flow_table *miss_ft;
+ enum mlx5_flow_dest_range_field field;
+ u32 min;
+ u32 max;
+ } range;
u32 sampler_id;
};
};
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 300b56ea5ff4..152d2d7f8743 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -68,6 +68,7 @@ enum {
MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2,
MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3,
MLX5_SET_HCA_CAP_OP_MOD_ROCE = 0x4,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2 = 0x20,
MLX5_SET_HCA_CAP_OP_MODE_PORT_SELECTION = 0x25,
};
@@ -1880,7 +1881,10 @@ struct mlx5_ifc_cmd_hca_cap_bits {
};
struct mlx5_ifc_cmd_hca_cap_2_bits {
- u8 reserved_at_0[0xa0];
+ u8 reserved_at_0[0x80];
+
+ u8 migratable[0x1];
+ u8 reserved_at_81[0x1f];
u8 max_reformat_insert_size[0x8];
u8 max_reformat_insert_offset[0x8];
@@ -6109,6 +6113,38 @@ struct mlx5_ifc_match_definer_format_32_bits {
u8 inner_dmac_15_0[0x10];
};
+enum {
+ MLX5_IFC_DEFINER_FORMAT_ID_SELECT = 61,
+};
+
+#define MLX5_IFC_DEFINER_FORMAT_OFFSET_UNUSED 0x0
+#define MLX5_IFC_DEFINER_FORMAT_OFFSET_OUTER_ETH_PKT_LEN 0x48
+#define MLX5_IFC_DEFINER_DW_SELECTORS_NUM 9
+#define MLX5_IFC_DEFINER_BYTE_SELECTORS_NUM 8
+
+struct mlx5_ifc_match_definer_match_mask_bits {
+ u8 reserved_at_1c0[5][0x20];
+ u8 match_dw_8[0x20];
+ u8 match_dw_7[0x20];
+ u8 match_dw_6[0x20];
+ u8 match_dw_5[0x20];
+ u8 match_dw_4[0x20];
+ u8 match_dw_3[0x20];
+ u8 match_dw_2[0x20];
+ u8 match_dw_1[0x20];
+ u8 match_dw_0[0x20];
+
+ u8 match_byte_7[0x8];
+ u8 match_byte_6[0x8];
+ u8 match_byte_5[0x8];
+ u8 match_byte_4[0x8];
+
+ u8 match_byte_3[0x8];
+ u8 match_byte_2[0x8];
+ u8 match_byte_1[0x8];
+ u8 match_byte_0[0x8];
+};
+
struct mlx5_ifc_match_definer_bits {
u8 modify_field_select[0x40];
@@ -6117,9 +6153,41 @@ struct mlx5_ifc_match_definer_bits {
u8 reserved_at_80[0x10];
u8 format_id[0x10];
- u8 reserved_at_a0[0x160];
+ u8 reserved_at_a0[0x60];
- u8 match_mask[16][0x20];
+ u8 format_select_dw3[0x8];
+ u8 format_select_dw2[0x8];
+ u8 format_select_dw1[0x8];
+ u8 format_select_dw0[0x8];
+
+ u8 format_select_dw7[0x8];
+ u8 format_select_dw6[0x8];
+ u8 format_select_dw5[0x8];
+ u8 format_select_dw4[0x8];
+
+ u8 reserved_at_100[0x18];
+ u8 format_select_dw8[0x8];
+
+ u8 reserved_at_120[0x20];
+
+ u8 format_select_byte3[0x8];
+ u8 format_select_byte2[0x8];
+ u8 format_select_byte1[0x8];
+ u8 format_select_byte0[0x8];
+
+ u8 format_select_byte7[0x8];
+ u8 format_select_byte6[0x8];
+ u8 format_select_byte5[0x8];
+ u8 format_select_byte4[0x8];
+
+ u8 reserved_at_180[0x40];
+
+ union {
+ struct {
+ u8 match_mask[16][0x20];
+ };
+ struct mlx5_ifc_match_definer_match_mask_bits match_mask_format;
+ };
};
struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index aad53cb72f17..7f31432f44c2 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -132,4 +132,6 @@ int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev,
int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev);
u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev);
+int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out,
+ u16 opmod);
#endif /* __MLX5_VPORT_H__ */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8bbcccbc5565..974ccca609d2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1852,6 +1852,25 @@ static void __maybe_unused show_free_areas(unsigned int flags, nodemask_t *nodem
__show_free_areas(flags, nodemask, MAX_NR_ZONES - 1);
}
+/*
+ * Parameter block passed down to zap_pte_range in exceptional cases.
+ */
+struct zap_details {
+ struct folio *single_folio; /* Locked folio to be unmapped */
+ bool even_cows; /* Zap COWed private pages too? */
+ zap_flags_t zap_flags; /* Extra flags for zapping */
+};
+
+/*
+ * Whether to drop the pte markers, for example, the uffd-wp information for
+ * file-backed memory. This should only be specified when we will completely
+ * drop the page in the mm, either by truncation or unmapping of the vma. By
+ * default, the flag is not set.
+ */
+#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0))
+/* Set in unmap_vmas() to indicate a final unmap call. Only used by hugetlb */
+#define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1))
+
#ifdef CONFIG_MMU
extern bool can_do_mlock(void);
#else
@@ -1869,6 +1888,8 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
unsigned long size);
void zap_page_range(struct vm_area_struct *vma, unsigned long address,
unsigned long size);
+void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
+ unsigned long size, struct zap_details *details);
void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
struct vm_area_struct *start_vma, unsigned long start,
unsigned long end);
@@ -3467,12 +3488,4 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
}
#endif
-/*
- * Whether to drop the pte markers, for example, the uffd-wp information for
- * file-backed memory. This should only be specified when we will completely
- * drop the page in the mm, either by truncation or unmapping of the vma. By
- * default, the flag is not set.
- */
-#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0))
-
#endif /* _LINUX_MM_H */
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 9c50bc40f8ff..6f7993803ee7 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -451,7 +451,7 @@ static inline bool mmc_ready_for_data(u32 status)
#define MMC_SECURE_TRIM1_ARG 0x80000001
#define MMC_SECURE_TRIM2_ARG 0x80008000
#define MMC_SECURE_ARGS 0x80000000
-#define MMC_TRIM_ARGS 0x00008001
+#define MMC_TRIM_OR_DISCARD_ARGS 0x00008003
#define mmc_driver_type_mask(n) (1 << (n))
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 29ae964e3b89..2287cb8eb9e4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -78,6 +78,7 @@ struct xdp_buff;
void synchronize_net(void);
void netdev_set_default_ethtool_ops(struct net_device *dev,
const struct ethtool_ops *ops);
+void netdev_sw_irq_coalesce_default_on(struct net_device *dev);
/* Backlog congestion levels */
#define NET_RX_SUCCESS 0 /* keep 'em coming, baby */
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index a108b60a6962..5f0d7d0b9471 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -165,6 +165,13 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr)
return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr);
}
+#ifndef pmd_young
+static inline int pmd_young(pmd_t pmd)
+{
+ return 0;
+}
+#endif
+
#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
extern int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
@@ -260,6 +267,17 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
+#ifndef arch_has_hw_nonleaf_pmd_young
+/*
+ * Return whether the accessed bit in non-leaf PMD entries is supported on the
+ * local CPU.
+ */
+static inline bool arch_has_hw_nonleaf_pmd_young(void)
+{
+ return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG);
+}
+#endif
+
#ifndef arch_has_hw_pte_young
/*
* Return whether the accessed bit is supported on the local CPU.
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 68dab3e08aad..5b5357c0bd8c 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -323,29 +323,36 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert(
* When we write to a bucket without unlocking, we use rht_assign_locked().
*/
-static inline void rht_lock(struct bucket_table *tbl,
- struct rhash_lock_head __rcu **bkt)
+static inline unsigned long rht_lock(struct bucket_table *tbl,
+ struct rhash_lock_head __rcu **bkt)
{
- local_bh_disable();
+ unsigned long flags;
+
+ local_irq_save(flags);
bit_spin_lock(0, (unsigned long *)bkt);
lock_map_acquire(&tbl->dep_map);
+ return flags;
}
-static inline void rht_lock_nested(struct bucket_table *tbl,
- struct rhash_lock_head __rcu **bucket,
- unsigned int subclass)
+static inline unsigned long rht_lock_nested(struct bucket_table *tbl,
+ struct rhash_lock_head __rcu **bucket,
+ unsigned int subclass)
{
- local_bh_disable();
+ unsigned long flags;
+
+ local_irq_save(flags);
bit_spin_lock(0, (unsigned long *)bucket);
lock_acquire_exclusive(&tbl->dep_map, subclass, 0, NULL, _THIS_IP_);
+ return flags;
}
static inline void rht_unlock(struct bucket_table *tbl,
- struct rhash_lock_head __rcu **bkt)
+ struct rhash_lock_head __rcu **bkt,
+ unsigned long flags)
{
lock_map_release(&tbl->dep_map);
bit_spin_unlock(0, (unsigned long *)bkt);
- local_bh_enable();
+ local_irq_restore(flags);
}
static inline struct rhash_head *__rht_ptr(
@@ -393,7 +400,8 @@ static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt,
static inline void rht_assign_unlock(struct bucket_table *tbl,
struct rhash_lock_head __rcu **bkt,
- struct rhash_head *obj)
+ struct rhash_head *obj,
+ unsigned long flags)
{
if (rht_is_a_nulls(obj))
obj = NULL;
@@ -401,7 +409,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl,
rcu_assign_pointer(*bkt, (void *)obj);
preempt_enable();
__release(bitlock);
- local_bh_enable();
+ local_irq_restore(flags);
}
/**
@@ -706,6 +714,7 @@ static inline void *__rhashtable_insert_fast(
struct rhash_head __rcu **pprev;
struct bucket_table *tbl;
struct rhash_head *head;
+ unsigned long flags;
unsigned int hash;
int elasticity;
void *data;
@@ -720,11 +729,11 @@ static inline void *__rhashtable_insert_fast(
if (!bkt)
goto out;
pprev = NULL;
- rht_lock(tbl, bkt);
+ flags = rht_lock(tbl, bkt);
if (unlikely(rcu_access_pointer(tbl->future_tbl))) {
slow_path:
- rht_unlock(tbl, bkt);
+ rht_unlock(tbl, bkt, flags);
rcu_read_unlock();
return rhashtable_insert_slow(ht, key, obj);
}
@@ -756,9 +765,9 @@ slow_path:
RCU_INIT_POINTER(list->rhead.next, head);
if (pprev) {
rcu_assign_pointer(*pprev, obj);
- rht_unlock(tbl, bkt);
+ rht_unlock(tbl, bkt, flags);
} else
- rht_assign_unlock(tbl, bkt, obj);
+ rht_assign_unlock(tbl, bkt, obj, flags);
data = NULL;
goto out;
}
@@ -785,7 +794,7 @@ slow_path:
}
atomic_inc(&ht->nelems);
- rht_assign_unlock(tbl, bkt, obj);
+ rht_assign_unlock(tbl, bkt, obj, flags);
if (rht_grow_above_75(ht, tbl))
schedule_work(&ht->run_work);
@@ -797,7 +806,7 @@ out:
return data;
out_unlock:
- rht_unlock(tbl, bkt);
+ rht_unlock(tbl, bkt, flags);
goto out;
}
@@ -991,6 +1000,7 @@ static inline int __rhashtable_remove_fast_one(
struct rhash_lock_head __rcu **bkt;
struct rhash_head __rcu **pprev;
struct rhash_head *he;
+ unsigned long flags;
unsigned int hash;
int err = -ENOENT;
@@ -999,7 +1009,7 @@ static inline int __rhashtable_remove_fast_one(
if (!bkt)
return -ENOENT;
pprev = NULL;
- rht_lock(tbl, bkt);
+ flags = rht_lock(tbl, bkt);
rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
struct rhlist_head *list;
@@ -1043,14 +1053,14 @@ static inline int __rhashtable_remove_fast_one(
if (pprev) {
rcu_assign_pointer(*pprev, obj);
- rht_unlock(tbl, bkt);
+ rht_unlock(tbl, bkt, flags);
} else {
- rht_assign_unlock(tbl, bkt, obj);
+ rht_assign_unlock(tbl, bkt, obj, flags);
}
goto unlocked;
}
- rht_unlock(tbl, bkt);
+ rht_unlock(tbl, bkt, flags);
unlocked:
if (err > 0) {
atomic_dec(&ht->nelems);
@@ -1143,6 +1153,7 @@ static inline int __rhashtable_replace_fast(
struct rhash_lock_head __rcu **bkt;
struct rhash_head __rcu **pprev;
struct rhash_head *he;
+ unsigned long flags;
unsigned int hash;
int err = -ENOENT;
@@ -1158,7 +1169,7 @@ static inline int __rhashtable_replace_fast(
return -ENOENT;
pprev = NULL;
- rht_lock(tbl, bkt);
+ flags = rht_lock(tbl, bkt);
rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
if (he != obj_old) {
@@ -1169,15 +1180,15 @@ static inline int __rhashtable_replace_fast(
rcu_assign_pointer(obj_new->next, obj_old->next);
if (pprev) {
rcu_assign_pointer(*pprev, obj_new);
- rht_unlock(tbl, bkt);
+ rht_unlock(tbl, bkt, flags);
} else {
- rht_assign_unlock(tbl, bkt, obj_new);
+ rht_assign_unlock(tbl, bkt, obj_new, flags);
}
err = 0;
goto unlocked;
}
- rht_unlock(tbl, bkt);
+ rht_unlock(tbl, bkt, flags);
unlocked:
return err;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 4e464a27adaf..4c8492401a10 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1255,6 +1255,7 @@ struct sk_buff *build_skb_around(struct sk_buff *skb,
void skb_attempt_defer_free(struct sk_buff *skb);
struct sk_buff *napi_build_skb(void *data, unsigned int frag_size);
+struct sk_buff *slab_build_skb(void *data);
/**
* alloc_skb - allocate a network buffer
diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
index beb190449704..a0746d4aec20 100644
--- a/include/linux/soc/mediatek/mtk_wed.h
+++ b/include/linux/soc/mediatek/mtk_wed.h
@@ -160,7 +160,7 @@ struct mtk_wed_ops {
int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring,
void __iomem *regs, bool reset);
int (*rx_ring_setup)(struct mtk_wed_device *dev, int ring,
- void __iomem *regs);
+ void __iomem *regs, bool reset);
int (*txfree_ring_setup)(struct mtk_wed_device *dev,
void __iomem *regs);
int (*msg_update)(struct mtk_wed_device *dev, int cmd_id,
@@ -228,8 +228,8 @@ mtk_wed_get_rx_capa(struct mtk_wed_device *dev)
(_dev)->ops->irq_get(_dev, _mask)
#define mtk_wed_device_irq_set_mask(_dev, _mask) \
(_dev)->ops->irq_set_mask(_dev, _mask)
-#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs) \
- (_dev)->ops->rx_ring_setup(_dev, _ring, _regs)
+#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs, _reset) \
+ (_dev)->ops->rx_ring_setup(_dev, _ring, _regs, _reset)
#define mtk_wed_device_ppe_check(_dev, _skb, _reason, _hash) \
(_dev)->ops->ppe_check(_dev, _skb, _reason, _hash)
#define mtk_wed_device_update_msg(_dev, _id, _msg, _len) \
@@ -249,7 +249,7 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
#define mtk_wed_device_reg_write(_dev, _reg, _val) do {} while (0)
#define mtk_wed_device_irq_get(_dev, _mask) 0
#define mtk_wed_device_irq_set_mask(_dev, _mask) do {} while (0)
-#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs) -ENODEV
+#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs, _reset) -ENODEV
#define mtk_wed_device_ppe_check(_dev, _skb, _reason, _hash) do {} while (0)
#define mtk_wed_device_update_msg(_dev, _id, _msg, _len) -ENODEV
#define mtk_wed_device_stop(_dev) do {} while (0)
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index fb2e88614f5d..83ca2e8eb6b5 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -271,5 +271,6 @@ struct plat_stmmacenet_data {
int msi_tx_base_vec;
bool use_phy_wol;
bool sph_disable;
+ bool serdes_up_after_phy_linkup;
};
#endif
diff --git a/include/net/act_api.h b/include/net/act_api.h
index c94ea1a306e0..2a6f443f0ef6 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -101,11 +101,6 @@ static inline enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats)
return hw_stats;
}
-#ifdef CONFIG_NET_CLS_ACT
-
-#define ACT_P_CREATED 1
-#define ACT_P_DELETED 1
-
typedef void (*tc_action_priv_destructor)(void *priv);
struct tc_action_ops {
@@ -140,6 +135,11 @@ struct tc_action_ops {
struct netlink_ext_ack *extack);
};
+#ifdef CONFIG_NET_CLS_ACT
+
+#define ACT_P_CREATED 1
+#define ACT_P_DELETED 1
+
struct tc_action_net {
struct tcf_idrinfo *idrinfo;
const struct tc_action_ops *ops;
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index b69ca695935c..d5a5ae926380 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -66,10 +66,10 @@ int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *);
u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *);
-bool rxrpc_kernel_call_is_complete(struct rxrpc_call *);
void rxrpc_kernel_set_max_life(struct socket *, struct rxrpc_call *,
unsigned long);
int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val);
+int rxrpc_sock_set_security_keyring(struct sock *, struct key *);
#endif /* _NET_RXRPC_H */
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index e004ba04a9ae..684f1cd28730 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -228,6 +228,17 @@ enum {
*/
HCI_QUIRK_VALID_LE_STATES,
+ /* When this quirk is set, then erroneous data reporting
+ * is ignored. This is mainly due to the fact that the HCI
+ * Read Default Erroneous Data Reporting command is advertised,
+ * but not supported; these controllers often reply with unknown
+ * command and tend to lock up randomly. Needing a hard reset.
+ *
+ * This quirk can be set before hci_register_dev is called or
+ * during the hdev->setup vendor callback.
+ */
+ HCI_QUIRK_BROKEN_ERR_DATA_REPORTING,
+
/*
* When this quirk is set, then the hci_suspend_notifier is not
* registered. This is intended for devices which drop completely
@@ -1424,7 +1435,6 @@ struct hci_std_codecs_v2 {
} __packed;
struct hci_vnd_codec_v2 {
- __u8 id;
__le16 cid;
__le16 vid;
__u8 transport;
diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index e1481f9cf049..d09c393d229f 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -260,6 +260,24 @@ struct ieee802154_addr {
};
};
+/**
+ * struct ieee802154_coord_desc - Coordinator descriptor
+ * @addr: PAN ID and coordinator address
+ * @page: page this coordinator is using
+ * @channel: channel this coordinator is using
+ * @superframe_spec: SuperFrame specification as received
+ * @link_quality: link quality indicator at which the beacon was received
+ * @gts_permit: the coordinator accepts GTS requests
+ */
+struct ieee802154_coord_desc {
+ struct ieee802154_addr addr;
+ u8 page;
+ u8 channel;
+ u16 superframe_spec;
+ u8 link_quality;
+ bool gts_permit;
+};
+
struct ieee802154_llsec_key_id {
u8 mode;
u8 id;
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 5f6eca5e4a40..0f376a28b9c4 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1452,6 +1452,45 @@ struct devlink_ops {
const u8 *hw_addr, int hw_addr_len,
struct netlink_ext_ack *extack);
/**
+ * @port_fn_roce_get: Port function's roce get function.
+ *
+ * Query RoCE state of a function managed by the devlink port.
+ * Return -EOPNOTSUPP if port function RoCE handling is not supported.
+ */
+ int (*port_fn_roce_get)(struct devlink_port *devlink_port,
+ bool *is_enable,
+ struct netlink_ext_ack *extack);
+ /**
+ * @port_fn_roce_set: Port function's roce set function.
+ *
+ * Enable/Disable the RoCE state of a function managed by the devlink
+ * port.
+ * Return -EOPNOTSUPP if port function RoCE handling is not supported.
+ */
+ int (*port_fn_roce_set)(struct devlink_port *devlink_port,
+ bool enable, struct netlink_ext_ack *extack);
+ /**
+ * @port_fn_migratable_get: Port function's migratable get function.
+ *
+ * Query migratable state of a function managed by the devlink port.
+ * Return -EOPNOTSUPP if port function migratable handling is not
+ * supported.
+ */
+ int (*port_fn_migratable_get)(struct devlink_port *devlink_port,
+ bool *is_enable,
+ struct netlink_ext_ack *extack);
+ /**
+ * @port_fn_migratable_set: Port function's migratable set function.
+ *
+ * Enable/Disable migratable state of a function managed by the devlink
+ * port.
+ * Return -EOPNOTSUPP if port function migratable handling is not
+ * supported.
+ */
+ int (*port_fn_migratable_set)(struct devlink_port *devlink_port,
+ bool enable,
+ struct netlink_ext_ack *extack);
+ /**
* port_new() - Add a new port function of a specified flavor
* @devlink: Devlink instance
* @attrs: attributes of the new port
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 28d0687bf7da..d80c78506f19 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -522,7 +522,14 @@ enum {
#define GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT BIT(0)
-#define GDMA_DRV_CAP_FLAGS1 GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT
+/* Advertise to the NIC firmware: the NAPI work_done variable race is fixed,
+ * so the driver is able to reliably support features like busy_poll.
+ */
+#define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2)
+
+#define GDMA_DRV_CAP_FLAGS1 \
+ (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
+ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX)
#define GDMA_DRV_CAP_FLAGS2 0
diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index f5850b569c52..b79a89d5207c 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -72,6 +72,8 @@ enum nl802154_commands {
NL802154_CMD_NEW_SEC_LEVEL,
NL802154_CMD_DEL_SEC_LEVEL,
+ NL802154_CMD_SCAN_EVENT,
+
/* add new commands above here */
/* used to define NL802154_CMD_MAX below */
@@ -131,6 +133,8 @@ enum nl802154_attrs {
NL802154_ATTR_PID,
NL802154_ATTR_NETNS_FD,
+ NL802154_ATTR_COORDINATOR,
+
/* add attributes here, update the policy in nl802154.c */
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
@@ -217,6 +221,45 @@ enum nl802154_wpan_phy_capability_attr {
};
/**
+ * enum nl802154_coord - Netlink attributes for a coord
+ *
+ * @__NL802154_COORD_INVALID: invalid
+ * @NL802154_COORD_PANID: PANID of the coordinator (2 bytes)
+ * @NL802154_COORD_ADDR: coordinator address, (8 bytes or 2 bytes)
+ * @NL802154_COORD_CHANNEL: channel number, related to @NL802154_COORD_PAGE (u8)
+ * @NL802154_COORD_PAGE: channel page, related to @NL802154_COORD_CHANNEL (u8)
+ * @NL802154_COORD_PREAMBLE_CODE: Preamble code used when the beacon was received,
+ * this is PHY dependent and optional (u8)
+ * @NL802154_COORD_MEAN_PRF: Mean PRF used when the beacon was received,
+ * this is PHY dependent and optional (u8)
+ * @NL802154_COORD_SUPERFRAME_SPEC: superframe specification of the PAN (u16)
+ * @NL802154_COORD_LINK_QUALITY: signal quality of beacon in unspecified units,
+ * scaled to 0..255 (u8)
+ * @NL802154_COORD_GTS_PERMIT: set to true if GTS is permitted on this PAN
+ * @NL802154_COORD_PAYLOAD_DATA: binary data containing the raw data from the
+ * frame payload, (only if beacon or probe response had data)
+ * @NL802154_COORD_PAD: attribute used for padding for 64-bit alignment
+ * @NL802154_COORD_MAX: highest coordinator attribute
+ */
+enum nl802154_coord {
+ __NL802154_COORD_INVALID,
+ NL802154_COORD_PANID,
+ NL802154_COORD_ADDR,
+ NL802154_COORD_CHANNEL,
+ NL802154_COORD_PAGE,
+ NL802154_COORD_PREAMBLE_CODE,
+ NL802154_COORD_MEAN_PRF,
+ NL802154_COORD_SUPERFRAME_SPEC,
+ NL802154_COORD_LINK_QUALITY,
+ NL802154_COORD_GTS_PERMIT,
+ NL802154_COORD_PAYLOAD_DATA,
+ NL802154_COORD_PAD,
+
+ /* keep last */
+ NL802154_COORD_MAX,
+};
+
+/**
* enum nl802154_cca_modes - cca modes
*
* @__NL802154_CCA_INVALID: cca mode number 0 is reserved
diff --git a/include/net/ping.h b/include/net/ping.h
index e4ff3911cbf5..9233ad3de0ad 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -16,9 +16,6 @@
#define PING_HTABLE_SIZE 64
#define PING_HTABLE_MASK (PING_HTABLE_SIZE-1)
-#define ping_portaddr_for_each_entry(__sk, node, list) \
- hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node)
-
/*
* gid_t is either uint or ushort. We want to pass it to
* proc_dointvec_minmax(), so it must not be larger than MAX_INT
diff --git a/include/net/sock.h b/include/net/sock.h
index 6d207e7c4ad0..ecea3dcc2217 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -503,10 +503,10 @@ struct sock {
#if BITS_PER_LONG==32
seqlock_t sk_stamp_seq;
#endif
- u16 sk_tsflags;
- u8 sk_shutdown;
atomic_t sk_tskey;
atomic_t sk_zckey;
+ u32 sk_tsflags;
+ u8 sk_shutdown;
u8 sk_clockid;
u8 sk_txtime_deadline_mode : 1,
@@ -1899,7 +1899,7 @@ static inline void sock_replace_proto(struct sock *sk, struct proto *proto)
struct sockcm_cookie {
u64 transmit_time;
u32 mark;
- u16 tsflags;
+ u32 tsflags;
};
static inline void sockcm_init(struct sockcm_cookie *sockc,
diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
new file mode 100644
index 000000000000..ceed2fc089ff
--- /dev/null
+++ b/include/net/tc_wrapper.h
@@ -0,0 +1,251 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NET_TC_WRAPPER_H
+#define __NET_TC_WRAPPER_H
+
+#include <net/pkt_cls.h>
+
+#if IS_ENABLED(CONFIG_RETPOLINE)
+
+#include <linux/cpufeature.h>
+#include <linux/static_key.h>
+#include <linux/indirect_call_wrapper.h>
+
+#define TC_INDIRECT_SCOPE
+
+extern struct static_key_false tc_skip_wrapper;
+
+/* TC Actions */
+#ifdef CONFIG_NET_CLS_ACT
+
+#define TC_INDIRECT_ACTION_DECLARE(fname) \
+ INDIRECT_CALLABLE_DECLARE(int fname(struct sk_buff *skb, \
+ const struct tc_action *a, \
+ struct tcf_result *res))
+
+TC_INDIRECT_ACTION_DECLARE(tcf_bpf_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_connmark_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_csum_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_ct_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_ctinfo_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_gact_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_gate_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_ife_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_ipt_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_mirred_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_mpls_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_nat_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_pedit_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_police_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_sample_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_simp_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_skbedit_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_skbmod_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_vlan_act);
+TC_INDIRECT_ACTION_DECLARE(tunnel_key_act);
+
+static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
+{
+ if (static_branch_likely(&tc_skip_wrapper))
+ goto skip;
+
+#if IS_BUILTIN(CONFIG_NET_ACT_GACT)
+ if (a->ops->act == tcf_gact_act)
+ return tcf_gact_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_MIRRED)
+ if (a->ops->act == tcf_mirred_act)
+ return tcf_mirred_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_PEDIT)
+ if (a->ops->act == tcf_pedit_act)
+ return tcf_pedit_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_SKBEDIT)
+ if (a->ops->act == tcf_skbedit_act)
+ return tcf_skbedit_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_SKBMOD)
+ if (a->ops->act == tcf_skbmod_act)
+ return tcf_skbmod_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_POLICE)
+ if (a->ops->act == tcf_police_act)
+ return tcf_police_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_BPF)
+ if (a->ops->act == tcf_bpf_act)
+ return tcf_bpf_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_CONNMARK)
+ if (a->ops->act == tcf_connmark_act)
+ return tcf_connmark_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_CSUM)
+ if (a->ops->act == tcf_csum_act)
+ return tcf_csum_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_CT)
+ if (a->ops->act == tcf_ct_act)
+ return tcf_ct_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_CTINFO)
+ if (a->ops->act == tcf_ctinfo_act)
+ return tcf_ctinfo_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_GATE)
+ if (a->ops->act == tcf_gate_act)
+ return tcf_gate_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_MPLS)
+ if (a->ops->act == tcf_mpls_act)
+ return tcf_mpls_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_NAT)
+ if (a->ops->act == tcf_nat_act)
+ return tcf_nat_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_TUNNEL_KEY)
+ if (a->ops->act == tunnel_key_act)
+ return tunnel_key_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_VLAN)
+ if (a->ops->act == tcf_vlan_act)
+ return tcf_vlan_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_IFE)
+ if (a->ops->act == tcf_ife_act)
+ return tcf_ife_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_IPT)
+ if (a->ops->act == tcf_ipt_act)
+ return tcf_ipt_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_SIMP)
+ if (a->ops->act == tcf_simp_act)
+ return tcf_simp_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_SAMPLE)
+ if (a->ops->act == tcf_sample_act)
+ return tcf_sample_act(skb, a, res);
+#endif
+
+skip:
+ return a->ops->act(skb, a, res);
+}
+
+#endif /* CONFIG_NET_CLS_ACT */
+
+/* TC Filters */
+#ifdef CONFIG_NET_CLS
+
+#define TC_INDIRECT_FILTER_DECLARE(fname) \
+ INDIRECT_CALLABLE_DECLARE(int fname(struct sk_buff *skb, \
+ const struct tcf_proto *tp, \
+ struct tcf_result *res))
+
+TC_INDIRECT_FILTER_DECLARE(basic_classify);
+TC_INDIRECT_FILTER_DECLARE(cls_bpf_classify);
+TC_INDIRECT_FILTER_DECLARE(cls_cgroup_classify);
+TC_INDIRECT_FILTER_DECLARE(fl_classify);
+TC_INDIRECT_FILTER_DECLARE(flow_classify);
+TC_INDIRECT_FILTER_DECLARE(fw_classify);
+TC_INDIRECT_FILTER_DECLARE(mall_classify);
+TC_INDIRECT_FILTER_DECLARE(route4_classify);
+TC_INDIRECT_FILTER_DECLARE(rsvp_classify);
+TC_INDIRECT_FILTER_DECLARE(rsvp6_classify);
+TC_INDIRECT_FILTER_DECLARE(tcindex_classify);
+TC_INDIRECT_FILTER_DECLARE(u32_classify);
+
+static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res)
+{
+ if (static_branch_likely(&tc_skip_wrapper))
+ goto skip;
+
+#if IS_BUILTIN(CONFIG_NET_CLS_BPF)
+ if (tp->classify == cls_bpf_classify)
+ return cls_bpf_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_U32)
+ if (tp->classify == u32_classify)
+ return u32_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_FLOWER)
+ if (tp->classify == fl_classify)
+ return fl_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_FW)
+ if (tp->classify == fw_classify)
+ return fw_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_MATCHALL)
+ if (tp->classify == mall_classify)
+ return mall_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_BASIC)
+ if (tp->classify == basic_classify)
+ return basic_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_CGROUP)
+ if (tp->classify == cls_cgroup_classify)
+ return cls_cgroup_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_FLOW)
+ if (tp->classify == flow_classify)
+ return flow_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_ROUTE4)
+ if (tp->classify == route4_classify)
+ return route4_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_RSVP)
+ if (tp->classify == rsvp_classify)
+ return rsvp_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_RSVP6)
+ if (tp->classify == rsvp6_classify)
+ return rsvp6_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_TCINDEX)
+ if (tp->classify == tcindex_classify)
+ return tcindex_classify(skb, tp, res);
+#endif
+
+skip:
+ return tp->classify(skb, tp, res);
+}
+
+static inline void tc_wrapper_init(void)
+{
+#ifdef CONFIG_X86
+ if (!cpu_feature_enabled(X86_FEATURE_RETPOLINE))
+ static_branch_enable(&tc_skip_wrapper);
+#endif
+}
+
+#endif /* CONFIG_NET_CLS */
+
+#else
+
+#define TC_INDIRECT_SCOPE static
+
+static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
+{
+ return a->ops->act(skb, a, res);
+}
+
+static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res)
+{
+ return tp->classify(skb, tp, res);
+}
+
+static inline void tc_wrapper_init(void)
+{
+}
+
+#endif
+
+#endif /* __NET_TC_WRAPPER_H */
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index c078c48a8e6d..a6190aa1b406 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -66,6 +66,7 @@ enum fscache_cookie_trace {
fscache_cookie_put_work,
fscache_cookie_see_active,
fscache_cookie_see_lru_discard,
+ fscache_cookie_see_lru_discard_clear,
fscache_cookie_see_lru_do_one,
fscache_cookie_see_relinquish,
fscache_cookie_see_withdraw,
@@ -149,6 +150,7 @@ enum fscache_access_trace {
EM(fscache_cookie_put_work, "PQ work ") \
EM(fscache_cookie_see_active, "- activ") \
EM(fscache_cookie_see_lru_discard, "- x-lru") \
+ EM(fscache_cookie_see_lru_discard_clear,"- lrudc") \
EM(fscache_cookie_see_lru_do_one, "- lrudo") \
EM(fscache_cookie_see_relinquish, "- x-rlq") \
EM(fscache_cookie_see_withdraw, "- x-wth") \
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index b9886d1df825..049b52e7aa6a 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -16,44 +16,121 @@
/*
* Declare tracing information enums and their string mappings for display.
*/
+#define rxrpc_call_poke_traces \
+ EM(rxrpc_call_poke_error, "Error") \
+ EM(rxrpc_call_poke_idle, "Idle") \
+ EM(rxrpc_call_poke_start, "Start") \
+ EM(rxrpc_call_poke_timer, "Timer") \
+ E_(rxrpc_call_poke_timer_now, "Timer-now")
+
#define rxrpc_skb_traces \
- EM(rxrpc_skb_ack, "ACK") \
- EM(rxrpc_skb_cleaned, "CLN") \
- EM(rxrpc_skb_cloned_jumbo, "CLJ") \
- EM(rxrpc_skb_freed, "FRE") \
- EM(rxrpc_skb_got, "GOT") \
- EM(rxrpc_skb_lost, "*L*") \
- EM(rxrpc_skb_new, "NEW") \
- EM(rxrpc_skb_purged, "PUR") \
- EM(rxrpc_skb_received, "RCV") \
- EM(rxrpc_skb_rotated, "ROT") \
- EM(rxrpc_skb_seen, "SEE") \
- EM(rxrpc_skb_unshared, "UNS") \
- E_(rxrpc_skb_unshared_nomem, "US0")
+ EM(rxrpc_skb_eaten_by_unshare, "ETN unshare ") \
+ EM(rxrpc_skb_eaten_by_unshare_nomem, "ETN unshar-nm") \
+ EM(rxrpc_skb_get_conn_work, "GET conn-work") \
+ EM(rxrpc_skb_get_local_work, "GET locl-work") \
+ EM(rxrpc_skb_get_reject_work, "GET rej-work ") \
+ EM(rxrpc_skb_get_to_recvmsg, "GET to-recv ") \
+ EM(rxrpc_skb_get_to_recvmsg_oos, "GET to-recv-o") \
+ EM(rxrpc_skb_new_encap_rcv, "NEW encap-rcv") \
+ EM(rxrpc_skb_new_error_report, "NEW error-rpt") \
+ EM(rxrpc_skb_new_jumbo_subpacket, "NEW jumbo-sub") \
+ EM(rxrpc_skb_new_unshared, "NEW unshared ") \
+ EM(rxrpc_skb_put_conn_work, "PUT conn-work") \
+ EM(rxrpc_skb_put_error_report, "PUT error-rep") \
+ EM(rxrpc_skb_put_input, "PUT input ") \
+ EM(rxrpc_skb_put_jumbo_subpacket, "PUT jumbo-sub") \
+ EM(rxrpc_skb_put_purge, "PUT purge ") \
+ EM(rxrpc_skb_put_rotate, "PUT rotate ") \
+ EM(rxrpc_skb_put_unknown, "PUT unknown ") \
+ EM(rxrpc_skb_see_conn_work, "SEE conn-work") \
+ EM(rxrpc_skb_see_recvmsg, "SEE recvmsg ") \
+ EM(rxrpc_skb_see_reject, "SEE reject ") \
+ EM(rxrpc_skb_see_rotate, "SEE rotate ") \
+ E_(rxrpc_skb_see_version, "SEE version ")
#define rxrpc_local_traces \
- EM(rxrpc_local_got, "GOT") \
- EM(rxrpc_local_new, "NEW") \
- EM(rxrpc_local_processing, "PRO") \
- EM(rxrpc_local_put, "PUT") \
- EM(rxrpc_local_queued, "QUE") \
- E_(rxrpc_local_tx_ack, "TAK")
+ EM(rxrpc_local_free, "FREE ") \
+ EM(rxrpc_local_get_call, "GET call ") \
+ EM(rxrpc_local_get_client_conn, "GET conn-cln") \
+ EM(rxrpc_local_get_for_use, "GET for-use ") \
+ EM(rxrpc_local_get_peer, "GET peer ") \
+ EM(rxrpc_local_get_prealloc_conn, "GET conn-pre") \
+ EM(rxrpc_local_new, "NEW ") \
+ EM(rxrpc_local_put_bind, "PUT bind ") \
+ EM(rxrpc_local_put_call, "PUT call ") \
+ EM(rxrpc_local_put_for_use, "PUT for-use ") \
+ EM(rxrpc_local_put_kill_conn, "PUT conn-kil") \
+ EM(rxrpc_local_put_peer, "PUT peer ") \
+ EM(rxrpc_local_put_prealloc_conn, "PUT conn-pre") \
+ EM(rxrpc_local_put_release_sock, "PUT rel-sock") \
+ EM(rxrpc_local_stop, "STOP ") \
+ EM(rxrpc_local_stopped, "STOPPED ") \
+ EM(rxrpc_local_unuse_bind, "UNU bind ") \
+ EM(rxrpc_local_unuse_conn_work, "UNU conn-wrk") \
+ EM(rxrpc_local_unuse_peer_keepalive, "UNU peer-kpa") \
+ EM(rxrpc_local_unuse_release_sock, "UNU rel-sock") \
+ EM(rxrpc_local_use_conn_work, "USE conn-wrk") \
+ EM(rxrpc_local_use_lookup, "USE lookup ") \
+ E_(rxrpc_local_use_peer_keepalive, "USE peer-kpa")
#define rxrpc_peer_traces \
- EM(rxrpc_peer_got, "GOT") \
- EM(rxrpc_peer_new, "NEW") \
- EM(rxrpc_peer_processing, "PRO") \
- E_(rxrpc_peer_put, "PUT")
+ EM(rxrpc_peer_free, "FREE ") \
+ EM(rxrpc_peer_get_accept, "GET accept ") \
+ EM(rxrpc_peer_get_activate_call, "GET act-call") \
+ EM(rxrpc_peer_get_bundle, "GET bundle ") \
+ EM(rxrpc_peer_get_client_conn, "GET cln-conn") \
+ EM(rxrpc_peer_get_input, "GET input ") \
+ EM(rxrpc_peer_get_input_error, "GET inpt-err") \
+ EM(rxrpc_peer_get_keepalive, "GET keepaliv") \
+ EM(rxrpc_peer_get_lookup_client, "GET look-cln") \
+ EM(rxrpc_peer_get_service_conn, "GET srv-conn") \
+ EM(rxrpc_peer_new_client, "NEW client ") \
+ EM(rxrpc_peer_new_prealloc, "NEW prealloc") \
+ EM(rxrpc_peer_put_bundle, "PUT bundle ") \
+ EM(rxrpc_peer_put_call, "PUT call ") \
+ EM(rxrpc_peer_put_conn, "PUT conn ") \
+ EM(rxrpc_peer_put_discard_tmp, "PUT disc-tmp") \
+ EM(rxrpc_peer_put_input, "PUT input ") \
+ EM(rxrpc_peer_put_input_error, "PUT inpt-err") \
+ E_(rxrpc_peer_put_keepalive, "PUT keepaliv")
+
+#define rxrpc_bundle_traces \
+ EM(rxrpc_bundle_free, "FREE ") \
+ EM(rxrpc_bundle_get_client_call, "GET clt-call") \
+ EM(rxrpc_bundle_get_client_conn, "GET clt-conn") \
+ EM(rxrpc_bundle_get_service_conn, "GET svc-conn") \
+ EM(rxrpc_bundle_put_conn, "PUT conn ") \
+ EM(rxrpc_bundle_put_discard, "PUT discard ") \
+ E_(rxrpc_bundle_new, "NEW ")
#define rxrpc_conn_traces \
- EM(rxrpc_conn_got, "GOT") \
- EM(rxrpc_conn_new_client, "NWc") \
- EM(rxrpc_conn_new_service, "NWs") \
- EM(rxrpc_conn_put_client, "PTc") \
- EM(rxrpc_conn_put_service, "PTs") \
- EM(rxrpc_conn_queued, "QUE") \
- EM(rxrpc_conn_reap_service, "RPs") \
- E_(rxrpc_conn_seen, "SEE")
+ EM(rxrpc_conn_free, "FREE ") \
+ EM(rxrpc_conn_get_activate_call, "GET act-call") \
+ EM(rxrpc_conn_get_call_input, "GET inp-call") \
+ EM(rxrpc_conn_get_conn_input, "GET inp-conn") \
+ EM(rxrpc_conn_get_idle, "GET idle ") \
+ EM(rxrpc_conn_get_poke, "GET poke ") \
+ EM(rxrpc_conn_get_service_conn, "GET svc-conn") \
+ EM(rxrpc_conn_new_client, "NEW client ") \
+ EM(rxrpc_conn_new_service, "NEW service ") \
+ EM(rxrpc_conn_put_call, "PUT call ") \
+ EM(rxrpc_conn_put_call_input, "PUT inp-call") \
+ EM(rxrpc_conn_put_conn_input, "PUT inp-conn") \
+ EM(rxrpc_conn_put_discard, "PUT discard ") \
+ EM(rxrpc_conn_put_discard_idle, "PUT disc-idl") \
+ EM(rxrpc_conn_put_local_dead, "PUT loc-dead") \
+ EM(rxrpc_conn_put_noreuse, "PUT noreuse ") \
+ EM(rxrpc_conn_put_poke, "PUT poke ") \
+ EM(rxrpc_conn_put_service_reaped, "PUT svc-reap") \
+ EM(rxrpc_conn_put_unbundle, "PUT unbundle") \
+ EM(rxrpc_conn_put_unidle, "PUT unidle ") \
+ EM(rxrpc_conn_queue_challenge, "QUE chall ") \
+ EM(rxrpc_conn_queue_retry_work, "QUE retry-wk") \
+ EM(rxrpc_conn_queue_rx_work, "QUE rx-work ") \
+ EM(rxrpc_conn_queue_timer, "QUE timer ") \
+ EM(rxrpc_conn_see_new_service_conn, "SEE new-svc ") \
+ EM(rxrpc_conn_see_reap_service, "SEE reap-svc") \
+ E_(rxrpc_conn_see_work, "SEE work ")
#define rxrpc_client_traces \
EM(rxrpc_client_activate_chans, "Activa") \
@@ -71,26 +148,36 @@
E_(rxrpc_client_to_idle, "->Idle")
#define rxrpc_call_traces \
- EM(rxrpc_call_connected, "CON") \
- EM(rxrpc_call_error, "*E*") \
- EM(rxrpc_call_got, "GOT") \
- EM(rxrpc_call_got_kernel, "Gke") \
- EM(rxrpc_call_got_timer, "GTM") \
- EM(rxrpc_call_got_tx, "Gtx") \
- EM(rxrpc_call_got_userid, "Gus") \
- EM(rxrpc_call_new_client, "NWc") \
- EM(rxrpc_call_new_service, "NWs") \
- EM(rxrpc_call_put, "PUT") \
- EM(rxrpc_call_put_kernel, "Pke") \
- EM(rxrpc_call_put_noqueue, "PnQ") \
- EM(rxrpc_call_put_notimer, "PnT") \
- EM(rxrpc_call_put_timer, "PTM") \
- EM(rxrpc_call_put_tx, "Ptx") \
- EM(rxrpc_call_put_userid, "Pus") \
- EM(rxrpc_call_queued, "QUE") \
- EM(rxrpc_call_queued_ref, "QUR") \
- EM(rxrpc_call_release, "RLS") \
- E_(rxrpc_call_seen, "SEE")
+ EM(rxrpc_call_get_input, "GET input ") \
+ EM(rxrpc_call_get_kernel_service, "GET krnl-srv") \
+ EM(rxrpc_call_get_notify_socket, "GET notify ") \
+ EM(rxrpc_call_get_poke, "GET poke ") \
+ EM(rxrpc_call_get_recvmsg, "GET recvmsg ") \
+ EM(rxrpc_call_get_release_sock, "GET rel-sock") \
+ EM(rxrpc_call_get_sendmsg, "GET sendmsg ") \
+ EM(rxrpc_call_get_userid, "GET user-id ") \
+ EM(rxrpc_call_new_client, "NEW client ") \
+ EM(rxrpc_call_new_prealloc_service, "NEW prealloc") \
+ EM(rxrpc_call_put_discard_prealloc, "PUT disc-pre") \
+ EM(rxrpc_call_put_discard_error, "PUT disc-err") \
+ EM(rxrpc_call_put_input, "PUT input ") \
+ EM(rxrpc_call_put_kernel, "PUT kernel ") \
+ EM(rxrpc_call_put_poke, "PUT poke ") \
+ EM(rxrpc_call_put_recvmsg, "PUT recvmsg ") \
+ EM(rxrpc_call_put_release_sock, "PUT rls-sock") \
+ EM(rxrpc_call_put_release_sock_tba, "PUT rls-sk-a") \
+ EM(rxrpc_call_put_sendmsg, "PUT sendmsg ") \
+ EM(rxrpc_call_put_unnotify, "PUT unnotify") \
+ EM(rxrpc_call_put_userid_exists, "PUT u-exists") \
+ EM(rxrpc_call_see_accept, "SEE accept ") \
+ EM(rxrpc_call_see_activate_client, "SEE act-clnt") \
+ EM(rxrpc_call_see_connect_failed, "SEE con-fail") \
+ EM(rxrpc_call_see_connected, "SEE connect ") \
+ EM(rxrpc_call_see_distribute_error, "SEE dist-err") \
+ EM(rxrpc_call_see_input, "SEE input ") \
+ EM(rxrpc_call_see_release, "SEE release ") \
+ EM(rxrpc_call_see_userid_exists, "SEE u-exists") \
+ E_(rxrpc_call_see_zap, "SEE zap ")
#define rxrpc_txqueue_traces \
EM(rxrpc_txqueue_await_reply, "AWR") \
@@ -179,6 +266,7 @@
EM(rxrpc_propose_ack_respond_to_ping, "Rsp2Png") \
EM(rxrpc_propose_ack_retry_tx, "RetryTx") \
EM(rxrpc_propose_ack_rotate_rx, "RxAck ") \
+ EM(rxrpc_propose_ack_rx_idle, "RxIdle ") \
E_(rxrpc_propose_ack_terminal_ack, "ClTerm ")
#define rxrpc_congest_modes \
@@ -273,6 +361,7 @@
EM(rxrpc_txbuf_put_rotated, "PUT ROTATED") \
EM(rxrpc_txbuf_put_send_aborted, "PUT SEND-X ") \
EM(rxrpc_txbuf_put_trans, "PUT TRANS ") \
+ EM(rxrpc_txbuf_see_out_of_step, "OUT-OF-STEP") \
EM(rxrpc_txbuf_see_send_more, "SEE SEND+ ") \
E_(rxrpc_txbuf_see_unacked, "SEE UNACKED")
@@ -287,6 +376,8 @@
#define EM(a, b) a,
#define E_(a, b) a
+enum rxrpc_bundle_trace { rxrpc_bundle_traces } __mode(byte);
+enum rxrpc_call_poke_trace { rxrpc_call_poke_traces } __mode(byte);
enum rxrpc_call_trace { rxrpc_call_traces } __mode(byte);
enum rxrpc_client_trace { rxrpc_client_traces } __mode(byte);
enum rxrpc_congest_change { rxrpc_congest_changes } __mode(byte);
@@ -316,6 +407,8 @@ enum rxrpc_txqueue_trace { rxrpc_txqueue_traces } __mode(byte);
#define EM(a, b) TRACE_DEFINE_ENUM(a);
#define E_(a, b) TRACE_DEFINE_ENUM(a);
+rxrpc_bundle_traces;
+rxrpc_call_poke_traces;
rxrpc_call_traces;
rxrpc_client_traces;
rxrpc_congest_changes;
@@ -345,83 +438,98 @@ rxrpc_txqueue_traces;
TRACE_EVENT(rxrpc_local,
TP_PROTO(unsigned int local_debug_id, enum rxrpc_local_trace op,
- int usage, const void *where),
+ int ref, int usage),
- TP_ARGS(local_debug_id, op, usage, where),
+ TP_ARGS(local_debug_id, op, ref, usage),
TP_STRUCT__entry(
__field(unsigned int, local )
__field(int, op )
+ __field(int, ref )
__field(int, usage )
- __field(const void *, where )
),
TP_fast_assign(
__entry->local = local_debug_id;
__entry->op = op;
+ __entry->ref = ref;
__entry->usage = usage;
- __entry->where = where;
),
- TP_printk("L=%08x %s u=%d sp=%pSR",
+ TP_printk("L=%08x %s r=%d u=%d",
__entry->local,
__print_symbolic(__entry->op, rxrpc_local_traces),
- __entry->usage,
- __entry->where)
+ __entry->ref,
+ __entry->usage)
);
TRACE_EVENT(rxrpc_peer,
- TP_PROTO(unsigned int peer_debug_id, enum rxrpc_peer_trace op,
- int usage, const void *where),
+ TP_PROTO(unsigned int peer_debug_id, int ref, enum rxrpc_peer_trace why),
- TP_ARGS(peer_debug_id, op, usage, where),
+ TP_ARGS(peer_debug_id, ref, why),
TP_STRUCT__entry(
__field(unsigned int, peer )
- __field(int, op )
- __field(int, usage )
- __field(const void *, where )
+ __field(int, ref )
+ __field(int, why )
),
TP_fast_assign(
__entry->peer = peer_debug_id;
- __entry->op = op;
- __entry->usage = usage;
- __entry->where = where;
+ __entry->ref = ref;
+ __entry->why = why;
),
- TP_printk("P=%08x %s u=%d sp=%pSR",
+ TP_printk("P=%08x %s r=%d",
__entry->peer,
- __print_symbolic(__entry->op, rxrpc_peer_traces),
- __entry->usage,
- __entry->where)
+ __print_symbolic(__entry->why, rxrpc_peer_traces),
+ __entry->ref)
+ );
+
+TRACE_EVENT(rxrpc_bundle,
+ TP_PROTO(unsigned int bundle_debug_id, int ref, enum rxrpc_bundle_trace why),
+
+ TP_ARGS(bundle_debug_id, ref, why),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, bundle )
+ __field(int, ref )
+ __field(int, why )
+ ),
+
+ TP_fast_assign(
+ __entry->bundle = bundle_debug_id;
+ __entry->ref = ref;
+ __entry->why = why;
+ ),
+
+ TP_printk("CB=%08x %s r=%d",
+ __entry->bundle,
+ __print_symbolic(__entry->why, rxrpc_bundle_traces),
+ __entry->ref)
);
TRACE_EVENT(rxrpc_conn,
- TP_PROTO(unsigned int conn_debug_id, enum rxrpc_conn_trace op,
- int usage, const void *where),
+ TP_PROTO(unsigned int conn_debug_id, int ref, enum rxrpc_conn_trace why),
- TP_ARGS(conn_debug_id, op, usage, where),
+ TP_ARGS(conn_debug_id, ref, why),
TP_STRUCT__entry(
__field(unsigned int, conn )
- __field(int, op )
- __field(int, usage )
- __field(const void *, where )
+ __field(int, ref )
+ __field(int, why )
),
TP_fast_assign(
__entry->conn = conn_debug_id;
- __entry->op = op;
- __entry->usage = usage;
- __entry->where = where;
+ __entry->ref = ref;
+ __entry->why = why;
),
- TP_printk("C=%08x %s u=%d sp=%pSR",
+ TP_printk("C=%08x %s r=%d",
__entry->conn,
- __print_symbolic(__entry->op, rxrpc_conn_traces),
- __entry->usage,
- __entry->where)
+ __print_symbolic(__entry->why, rxrpc_conn_traces),
+ __entry->ref)
);
TRACE_EVENT(rxrpc_client,
@@ -455,63 +563,57 @@ TRACE_EVENT(rxrpc_client,
);
TRACE_EVENT(rxrpc_call,
- TP_PROTO(unsigned int call_debug_id, enum rxrpc_call_trace op,
- int usage, const void *where, const void *aux),
+ TP_PROTO(unsigned int call_debug_id, int ref, unsigned long aux,
+ enum rxrpc_call_trace why),
- TP_ARGS(call_debug_id, op, usage, where, aux),
+ TP_ARGS(call_debug_id, ref, aux, why),
TP_STRUCT__entry(
__field(unsigned int, call )
- __field(int, op )
- __field(int, usage )
- __field(const void *, where )
- __field(const void *, aux )
+ __field(int, ref )
+ __field(int, why )
+ __field(unsigned long, aux )
),
TP_fast_assign(
__entry->call = call_debug_id;
- __entry->op = op;
- __entry->usage = usage;
- __entry->where = where;
+ __entry->ref = ref;
+ __entry->why = why;
__entry->aux = aux;
),
- TP_printk("c=%08x %s u=%d sp=%pSR a=%p",
+ TP_printk("c=%08x %s r=%d a=%lx",
__entry->call,
- __print_symbolic(__entry->op, rxrpc_call_traces),
- __entry->usage,
- __entry->where,
+ __print_symbolic(__entry->why, rxrpc_call_traces),
+ __entry->ref,
__entry->aux)
);
TRACE_EVENT(rxrpc_skb,
- TP_PROTO(struct sk_buff *skb, enum rxrpc_skb_trace op,
- int usage, int mod_count, const void *where),
+ TP_PROTO(struct sk_buff *skb, int usage, int mod_count,
+ enum rxrpc_skb_trace why),
- TP_ARGS(skb, op, usage, mod_count, where),
+ TP_ARGS(skb, usage, mod_count, why),
TP_STRUCT__entry(
__field(struct sk_buff *, skb )
- __field(enum rxrpc_skb_trace, op )
__field(int, usage )
__field(int, mod_count )
- __field(const void *, where )
+ __field(enum rxrpc_skb_trace, why )
),
TP_fast_assign(
__entry->skb = skb;
- __entry->op = op;
__entry->usage = usage;
__entry->mod_count = mod_count;
- __entry->where = where;
+ __entry->why = why;
),
- TP_printk("s=%p Rx %s u=%d m=%d p=%pSR",
+ TP_printk("s=%p Rx %s u=%d m=%d",
__entry->skb,
- __print_symbolic(__entry->op, rxrpc_skb_traces),
+ __print_symbolic(__entry->why, rxrpc_skb_traces),
__entry->usage,
- __entry->mod_count,
- __entry->where)
+ __entry->mod_count)
);
TRACE_EVENT(rxrpc_rx_packet,
@@ -623,6 +725,7 @@ TRACE_EVENT(rxrpc_txqueue,
__field(rxrpc_seq_t, acks_hard_ack )
__field(rxrpc_seq_t, tx_bottom )
__field(rxrpc_seq_t, tx_top )
+ __field(rxrpc_seq_t, tx_prepared )
__field(int, tx_winsize )
),
@@ -632,16 +735,18 @@ TRACE_EVENT(rxrpc_txqueue,
__entry->acks_hard_ack = call->acks_hard_ack;
__entry->tx_bottom = call->tx_bottom;
__entry->tx_top = call->tx_top;
+ __entry->tx_prepared = call->tx_prepared;
__entry->tx_winsize = call->tx_winsize;
),
- TP_printk("c=%08x %s f=%08x h=%08x n=%u/%u/%u",
+ TP_printk("c=%08x %s f=%08x h=%08x n=%u/%u/%u/%u",
__entry->call,
__print_symbolic(__entry->why, rxrpc_txqueue_traces),
__entry->tx_bottom,
__entry->acks_hard_ack,
__entry->tx_top - __entry->tx_bottom,
__entry->tx_top - __entry->acks_hard_ack,
+ __entry->tx_prepared - __entry->tx_bottom,
__entry->tx_winsize)
);
@@ -733,6 +838,66 @@ TRACE_EVENT(rxrpc_rx_abort,
__entry->abort_code)
);
+TRACE_EVENT(rxrpc_rx_challenge,
+ TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t serial,
+ u32 version, u32 nonce, u32 min_level),
+
+ TP_ARGS(conn, serial, version, nonce, min_level),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, conn )
+ __field(rxrpc_serial_t, serial )
+ __field(u32, version )
+ __field(u32, nonce )
+ __field(u32, min_level )
+ ),
+
+ TP_fast_assign(
+ __entry->conn = conn->debug_id;
+ __entry->serial = serial;
+ __entry->version = version;
+ __entry->nonce = nonce;
+ __entry->min_level = min_level;
+ ),
+
+ TP_printk("C=%08x CHALLENGE %08x v=%x n=%x ml=%x",
+ __entry->conn,
+ __entry->serial,
+ __entry->version,
+ __entry->nonce,
+ __entry->min_level)
+ );
+
+TRACE_EVENT(rxrpc_rx_response,
+ TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t serial,
+ u32 version, u32 kvno, u32 ticket_len),
+
+ TP_ARGS(conn, serial, version, kvno, ticket_len),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, conn )
+ __field(rxrpc_serial_t, serial )
+ __field(u32, version )
+ __field(u32, kvno )
+ __field(u32, ticket_len )
+ ),
+
+ TP_fast_assign(
+ __entry->conn = conn->debug_id;
+ __entry->serial = serial;
+ __entry->version = version;
+ __entry->kvno = kvno;
+ __entry->ticket_len = ticket_len;
+ ),
+
+ TP_printk("C=%08x RESPONSE %08x v=%x kvno=%x tl=%x",
+ __entry->conn,
+ __entry->serial,
+ __entry->version,
+ __entry->kvno,
+ __entry->ticket_len)
+ );
+
TRACE_EVENT(rxrpc_rx_rwind_change,
TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial,
u32 rwind, bool wake),
@@ -1278,6 +1443,44 @@ TRACE_EVENT(rxrpc_congest,
__entry->sum.retrans_timeo ? " rTxTo" : "")
);
+TRACE_EVENT(rxrpc_reset_cwnd,
+ TP_PROTO(struct rxrpc_call *call, ktime_t now),
+
+ TP_ARGS(call, now),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call )
+ __field(enum rxrpc_congest_mode, mode )
+ __field(unsigned short, cwnd )
+ __field(unsigned short, extra )
+ __field(rxrpc_seq_t, hard_ack )
+ __field(rxrpc_seq_t, prepared )
+ __field(ktime_t, since_last_tx )
+ __field(bool, has_data )
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->mode = call->cong_mode;
+ __entry->cwnd = call->cong_cwnd;
+ __entry->extra = call->cong_extra;
+ __entry->hard_ack = call->acks_hard_ack;
+ __entry->prepared = call->tx_prepared - call->tx_bottom;
+ __entry->since_last_tx = ktime_sub(now, call->tx_last_sent);
+ __entry->has_data = !list_empty(&call->tx_sendmsg);
+ ),
+
+ TP_printk("c=%08x q=%08x %s cw=%u+%u pr=%u tm=%llu d=%u",
+ __entry->call,
+ __entry->hard_ack,
+ __print_symbolic(__entry->mode, rxrpc_congest_modes),
+ __entry->cwnd,
+ __entry->extra,
+ __entry->prepared,
+ ktime_to_ns(__entry->since_last_tx),
+ __entry->has_data)
+ );
+
TRACE_EVENT(rxrpc_disconnect_call,
TP_PROTO(struct rxrpc_call *call),
@@ -1352,6 +1555,7 @@ TRACE_EVENT(rxrpc_connect_call,
__field(unsigned long, user_call_ID )
__field(u32, cid )
__field(u32, call_id )
+ __field_struct(struct sockaddr_rxrpc, srx )
),
TP_fast_assign(
@@ -1359,33 +1563,42 @@ TRACE_EVENT(rxrpc_connect_call,
__entry->user_call_ID = call->user_call_ID;
__entry->cid = call->cid;
__entry->call_id = call->call_id;
+ __entry->srx = call->dest_srx;
),
- TP_printk("c=%08x u=%p %08x:%08x",
+ TP_printk("c=%08x u=%p %08x:%08x dst=%pISp",
__entry->call,
(void *)__entry->user_call_ID,
__entry->cid,
- __entry->call_id)
+ __entry->call_id,
+ &__entry->srx.transport)
);
TRACE_EVENT(rxrpc_resend,
- TP_PROTO(struct rxrpc_call *call),
+ TP_PROTO(struct rxrpc_call *call, struct sk_buff *ack),
- TP_ARGS(call),
+ TP_ARGS(call, ack),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(rxrpc_seq_t, seq )
+ __field(rxrpc_seq_t, transmitted )
+ __field(rxrpc_serial_t, ack_serial )
),
TP_fast_assign(
+ struct rxrpc_skb_priv *sp = ack ? rxrpc_skb(ack) : NULL;
__entry->call = call->debug_id;
__entry->seq = call->acks_hard_ack;
+ __entry->transmitted = call->tx_transmitted;
+ __entry->ack_serial = sp ? sp->hdr.serial : 0;
),
- TP_printk("c=%08x q=%x",
+ TP_printk("c=%08x r=%x q=%x tq=%x",
__entry->call,
- __entry->seq)
+ __entry->ack_serial,
+ __entry->seq,
+ __entry->transmitted)
);
TRACE_EVENT(rxrpc_rx_icmp,
@@ -1586,6 +1799,47 @@ TRACE_EVENT(rxrpc_txbuf,
__entry->ref)
);
+TRACE_EVENT(rxrpc_poke_call,
+ TP_PROTO(struct rxrpc_call *call, bool busy,
+ enum rxrpc_call_poke_trace what),
+
+ TP_ARGS(call, busy, what),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call_debug_id )
+ __field(bool, busy )
+ __field(enum rxrpc_call_poke_trace, what )
+ ),
+
+ TP_fast_assign(
+ __entry->call_debug_id = call->debug_id;
+ __entry->busy = busy;
+ __entry->what = what;
+ ),
+
+ TP_printk("c=%08x %s%s",
+ __entry->call_debug_id,
+ __print_symbolic(__entry->what, rxrpc_call_poke_traces),
+ __entry->busy ? "!" : "")
+ );
+
+TRACE_EVENT(rxrpc_call_poked,
+ TP_PROTO(struct rxrpc_call *call),
+
+ TP_ARGS(call),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call_debug_id )
+ ),
+
+ TP_fast_assign(
+ __entry->call_debug_id = call->debug_id;
+ ),
+
+ TP_printk("c=%08x",
+ __entry->call_debug_id)
+ );
+
#undef EM
#undef E_
#endif /* _TRACE_RXRPC_H */
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 70191d96af89..3782d4219ac9 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -658,11 +658,24 @@ enum devlink_resource_unit {
DEVLINK_RESOURCE_UNIT_ENTRY,
};
+enum devlink_port_fn_attr_cap {
+ DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT,
+ DEVLINK_PORT_FN_ATTR_CAP_MIGRATABLE_BIT,
+
+ /* Add new caps above */
+ __DEVLINK_PORT_FN_ATTR_CAPS_MAX,
+};
+
+#define DEVLINK_PORT_FN_CAP_ROCE _BITUL(DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT)
+#define DEVLINK_PORT_FN_CAP_MIGRATABLE \
+ _BITUL(DEVLINK_PORT_FN_ATTR_CAP_MIGRATABLE_BIT)
+
enum devlink_port_function_attr {
DEVLINK_PORT_FUNCTION_ATTR_UNSPEC,
DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, /* binary */
DEVLINK_PORT_FN_ATTR_STATE, /* u8 */
DEVLINK_PORT_FN_ATTR_OPSTATE, /* u8 */
+ DEVLINK_PORT_FN_ATTR_CAPS, /* bitfield32 */
__DEVLINK_PORT_FUNCTION_ATTR_MAX,
DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index aaf7c6963d61..5799a9db034e 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -51,6 +51,7 @@ enum {
ETHTOOL_MSG_MODULE_SET,
ETHTOOL_MSG_PSE_GET,
ETHTOOL_MSG_PSE_SET,
+ ETHTOOL_MSG_RSS_GET,
/* add new constants above here */
__ETHTOOL_MSG_USER_CNT,
@@ -97,6 +98,7 @@ enum {
ETHTOOL_MSG_MODULE_GET_REPLY,
ETHTOOL_MSG_MODULE_NTF,
ETHTOOL_MSG_PSE_GET_REPLY,
+ ETHTOOL_MSG_RSS_GET_REPLY,
/* add new constants above here */
__ETHTOOL_MSG_KERNEL_CNT,
@@ -880,6 +882,18 @@ enum {
ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1)
};
+enum {
+ ETHTOOL_A_RSS_UNSPEC,
+ ETHTOOL_A_RSS_HEADER,
+ ETHTOOL_A_RSS_CONTEXT, /* u32 */
+ ETHTOOL_A_RSS_HFUNC, /* u32 */
+ ETHTOOL_A_RSS_INDIR, /* binary */
+ ETHTOOL_A_RSS_HKEY, /* binary */
+
+ __ETHTOOL_A_RSS_CNT,
+ ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1),
+};
+
/* generic netlink info */
#define ETHTOOL_GENL_NAME "ethtool"
#define ETHTOOL_GENL_VERSION 1
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 55501e5e7ac8..a2c66b3d7f0f 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -31,8 +31,9 @@ enum {
SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13),
SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14),
SOF_TIMESTAMPING_BIND_PHC = (1 << 15),
+ SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16),
- SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_BIND_PHC,
+ SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_TCP,
SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
SOF_TIMESTAMPING_LAST
};
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 94066f87e9ee..c5d62ee82567 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -277,11 +277,25 @@ enum ovs_vport_attr {
OVS_VPORT_ATTR_PAD,
OVS_VPORT_ATTR_IFINDEX,
OVS_VPORT_ATTR_NETNSID,
+ OVS_VPORT_ATTR_UPCALL_STATS,
__OVS_VPORT_ATTR_MAX
};
#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
+/**
+ * enum ovs_vport_upcall_attr - attributes for %OVS_VPORT_UPCALL* commands
+ * @OVS_VPORT_UPCALL_SUCCESS: 64-bit upcall success packets.
+ * @OVS_VPORT_UPCALL_FAIL: 64-bit upcall fail packets.
+ */
+enum ovs_vport_upcall_attr {
+ OVS_VPORT_UPCALL_ATTR_SUCCESS,
+ OVS_VPORT_UPCALL_ATTR_FAIL,
+ __OVS_VPORT_UPCALL_ATTR_MAX
+};
+
+#define OVS_VPORT_UPCALL_ATTR_MAX (__OVS_VPORT_UPCALL_ATTR_MAX - 1)
+
enum {
OVS_VXLAN_EXT_UNSPEC,
OVS_VXLAN_EXT_GBP, /* Flag or __u32 */
diff --git a/ipc/sem.c b/ipc/sem.c
index c8496f98b139..00f88aa01ac5 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -2179,14 +2179,15 @@ long __do_semtimedop(int semid, struct sembuf *sops,
* scenarios where we were awakened externally, during the
* window between wake_q_add() and wake_up_q().
*/
+ rcu_read_lock();
error = READ_ONCE(queue.status);
if (error != -EINTR) {
/* see SEM_BARRIER_2 for purpose/pairing */
smp_acquire__after_ctrl_dep();
+ rcu_read_unlock();
goto out;
}
- rcu_read_lock();
locknum = sem_lock(sma, sops, nsops);
if (!ipc_valid_object(&sma->sem_perm))
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index fd4020835ec6..367b0a42ada9 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -167,7 +167,6 @@ struct cgroup_mgctx {
extern spinlock_t css_set_lock;
extern struct cgroup_subsys *cgroup_subsys[];
extern struct list_head cgroup_roots;
-extern struct file_system_type cgroup_fs_type;
/* iterate across the hierarchies */
#define for_each_root(root) \
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 7091bbf88ee7..7f04f995c975 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2291,6 +2291,7 @@ event_sched_out(struct perf_event *event,
!event->pending_work) {
event->pending_work = 1;
dec = false;
+ WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount));
task_work_add(current, &event->pending_task, TWA_RESUME);
}
if (dec)
@@ -2336,6 +2337,7 @@ group_sched_out(struct perf_event *group_event,
#define DETACH_GROUP 0x01UL
#define DETACH_CHILD 0x02UL
+#define DETACH_DEAD 0x04UL
/*
* Cross CPU call to remove a performance event
@@ -2356,12 +2358,20 @@ __perf_remove_from_context(struct perf_event *event,
update_cgrp_time_from_cpuctx(cpuctx, false);
}
+ /*
+ * Ensure event_sched_out() switches to OFF, at the very least
+ * this avoids raising perf_pending_task() at this time.
+ */
+ if (flags & DETACH_DEAD)
+ event->pending_disable = 1;
event_sched_out(event, cpuctx, ctx);
if (flags & DETACH_GROUP)
perf_group_detach(event);
if (flags & DETACH_CHILD)
perf_child_detach(event);
list_del_event(event, ctx);
+ if (flags & DETACH_DEAD)
+ event->state = PERF_EVENT_STATE_DEAD;
if (!ctx->nr_events && ctx->is_active) {
if (ctx == &cpuctx->ctx)
@@ -5121,9 +5131,7 @@ int perf_event_release_kernel(struct perf_event *event)
ctx = perf_event_ctx_lock(event);
WARN_ON_ONCE(ctx->parent_ctx);
- perf_remove_from_context(event, DETACH_GROUP);
- raw_spin_lock_irq(&ctx->lock);
/*
* Mark this event as STATE_DEAD, there is no external reference to it
* anymore.
@@ -5135,8 +5143,7 @@ int perf_event_release_kernel(struct perf_event *event)
* Thus this guarantees that we will in fact observe and kill _ALL_
* child events.
*/
- event->state = PERF_EVENT_STATE_DEAD;
- raw_spin_unlock_irq(&ctx->lock);
+ perf_remove_from_context(event, DETACH_GROUP|DETACH_DEAD);
perf_event_ctx_unlock(event, ctx);
@@ -6577,6 +6584,8 @@ static void perf_pending_task(struct callback_head *head)
if (rctx >= 0)
perf_swevent_put_recursion_context(rctx);
preempt_enable_notrace();
+
+ put_event(event);
}
#ifdef CONFIG_GUEST_PERF_EVENTS
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 188c305aeb8b..c6d9dec11b74 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -267,13 +267,14 @@ int proc_dostring(struct ctl_table *table, int write,
ppos);
}
-static size_t proc_skip_spaces(char **buf)
+static void proc_skip_spaces(char **buf, size_t *size)
{
- size_t ret;
- char *tmp = skip_spaces(*buf);
- ret = tmp - *buf;
- *buf = tmp;
- return ret;
+ while (*size) {
+ if (!isspace(**buf))
+ break;
+ (*size)--;
+ (*buf)++;
+ }
}
static void proc_skip_char(char **buf, size_t *size, const char v)
@@ -342,13 +343,12 @@ static int proc_get_long(char **buf, size_t *size,
unsigned long *val, bool *neg,
const char *perm_tr, unsigned perm_tr_len, char *tr)
{
- int len;
char *p, tmp[TMPBUFLEN];
+ ssize_t len = *size;
- if (!*size)
+ if (len <= 0)
return -EINVAL;
- len = *size;
if (len > TMPBUFLEN - 1)
len = TMPBUFLEN - 1;
@@ -521,7 +521,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
bool neg;
if (write) {
- left -= proc_skip_spaces(&p);
+ proc_skip_spaces(&p, &left);
if (!left)
break;
@@ -548,7 +548,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
if (!write && !first && left && !err)
proc_put_char(&buffer, &left, '\n');
if (write && !err && left)
- left -= proc_skip_spaces(&p);
+ proc_skip_spaces(&p, &left);
if (write && first)
return err ? : -EINVAL;
*lenp -= left;
@@ -590,7 +590,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data,
if (left > PAGE_SIZE - 1)
left = PAGE_SIZE - 1;
- left -= proc_skip_spaces(&p);
+ proc_skip_spaces(&p, &left);
if (!left) {
err = -EINVAL;
goto out_free;
@@ -610,7 +610,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data,
}
if (!err && left)
- left -= proc_skip_spaces(&p);
+ proc_skip_spaces(&p, &left);
out_free:
if (err)
@@ -1075,7 +1075,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
if (write) {
bool neg;
- left -= proc_skip_spaces(&p);
+ proc_skip_spaces(&p, &left);
if (!left)
break;
@@ -1104,7 +1104,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
if (!write && !first && left && !err)
proc_put_char(&buffer, &left, '\n');
if (write && !err)
- left -= proc_skip_spaces(&p);
+ proc_skip_spaces(&p, &left);
if (write && first)
return err ? : -EINVAL;
*lenp -= left;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index a1005415f0f4..3638b3424be5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -399,6 +399,7 @@ config FRAME_WARN
default 2048 if GCC_PLUGIN_LATENT_ENTROPY
default 2048 if PARISC
default 1536 if (!64BIT && XTENSA)
+ default 1280 if KASAN && !64BIT
default 1024 if !64BIT
default 2048 if 64BIT
help
@@ -1874,8 +1875,14 @@ config NETDEV_NOTIFIER_ERROR_INJECT
If unsure, say N.
config FUNCTION_ERROR_INJECTION
- def_bool y
+ bool "Fault-injections of functions"
depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES
+ help
+ Add fault injections into various functions that are annotated with
+ ALLOW_ERROR_INJECTION() in the kernel. BPF may also modify the return
+ value of theses functions. This is useful to test error paths of code.
+
+ If unsure, say N
config FAULT_INJECTION
bool "Fault-injection framework"
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index e12bbfb240b8..6ae2ba8e06a2 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -231,6 +231,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht,
struct rhash_head *head, *next, *entry;
struct rhash_head __rcu **pprev = NULL;
unsigned int new_hash;
+ unsigned long flags;
if (new_tbl->nest)
goto out;
@@ -253,13 +254,14 @@ static int rhashtable_rehash_one(struct rhashtable *ht,
new_hash = head_hashfn(ht, new_tbl, entry);
- rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], SINGLE_DEPTH_NESTING);
+ flags = rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash],
+ SINGLE_DEPTH_NESTING);
head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash);
RCU_INIT_POINTER(entry->next, head);
- rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry);
+ rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry, flags);
if (pprev)
rcu_assign_pointer(*pprev, next);
@@ -276,18 +278,19 @@ static int rhashtable_rehash_chain(struct rhashtable *ht,
{
struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash);
+ unsigned long flags;
int err;
if (!bkt)
return 0;
- rht_lock(old_tbl, bkt);
+ flags = rht_lock(old_tbl, bkt);
while (!(err = rhashtable_rehash_one(ht, bkt, old_hash)))
;
if (err == -ENOENT)
err = 0;
- rht_unlock(old_tbl, bkt);
+ rht_unlock(old_tbl, bkt, flags);
return err;
}
@@ -590,6 +593,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
struct bucket_table *new_tbl;
struct bucket_table *tbl;
struct rhash_lock_head __rcu **bkt;
+ unsigned long flags;
unsigned int hash;
void *data;
@@ -607,7 +611,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
data = ERR_PTR(-EAGAIN);
} else {
- rht_lock(tbl, bkt);
+ flags = rht_lock(tbl, bkt);
data = rhashtable_lookup_one(ht, bkt, tbl,
hash, key, obj);
new_tbl = rhashtable_insert_one(ht, bkt, tbl,
@@ -615,7 +619,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
if (PTR_ERR(new_tbl) != -EEXIST)
data = ERR_CAST(new_tbl);
- rht_unlock(tbl, bkt);
+ rht_unlock(tbl, bkt, flags);
}
} while (!IS_ERR_OR_NULL(new_tbl));
diff --git a/mm/compaction.c b/mm/compaction.c
index c51f7f545afe..1f6da31dd9a5 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -985,28 +985,28 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
}
/*
+ * Be careful not to clear PageLRU until after we're
+ * sure the page is not being freed elsewhere -- the
+ * page release code relies on it.
+ */
+ if (unlikely(!get_page_unless_zero(page)))
+ goto isolate_fail;
+
+ /*
* Migration will fail if an anonymous page is pinned in memory,
* so avoid taking lru_lock and isolating it unnecessarily in an
* admittedly racy check.
*/
mapping = page_mapping(page);
- if (!mapping && page_count(page) > page_mapcount(page))
- goto isolate_fail;
+ if (!mapping && (page_count(page) - 1) > total_mapcount(page))
+ goto isolate_fail_put;
/*
* Only allow to migrate anonymous pages in GFP_NOFS context
* because those do not depend on fs locks.
*/
if (!(cc->gfp_mask & __GFP_FS) && mapping)
- goto isolate_fail;
-
- /*
- * Be careful not to clear PageLRU until after we're
- * sure the page is not being freed elsewhere -- the
- * page release code relies on it.
- */
- if (unlikely(!get_page_unless_zero(page)))
- goto isolate_fail;
+ goto isolate_fail_put;
/* Only take pages on LRU: a check now makes later tests safe */
if (!PageLRU(page))
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index 5ce403378c20..07e5f1bdf025 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -2283,12 +2283,54 @@ static struct damos *damon_sysfs_mk_scheme(
&wmarks);
}
+static void damon_sysfs_update_scheme(struct damos *scheme,
+ struct damon_sysfs_scheme *sysfs_scheme)
+{
+ struct damon_sysfs_access_pattern *access_pattern =
+ sysfs_scheme->access_pattern;
+ struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas;
+ struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights;
+ struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks;
+
+ scheme->pattern.min_sz_region = access_pattern->sz->min;
+ scheme->pattern.max_sz_region = access_pattern->sz->max;
+ scheme->pattern.min_nr_accesses = access_pattern->nr_accesses->min;
+ scheme->pattern.max_nr_accesses = access_pattern->nr_accesses->max;
+ scheme->pattern.min_age_region = access_pattern->age->min;
+ scheme->pattern.max_age_region = access_pattern->age->max;
+
+ scheme->action = sysfs_scheme->action;
+
+ scheme->quota.ms = sysfs_quotas->ms;
+ scheme->quota.sz = sysfs_quotas->sz;
+ scheme->quota.reset_interval = sysfs_quotas->reset_interval_ms;
+ scheme->quota.weight_sz = sysfs_weights->sz;
+ scheme->quota.weight_nr_accesses = sysfs_weights->nr_accesses;
+ scheme->quota.weight_age = sysfs_weights->age;
+
+ scheme->wmarks.metric = sysfs_wmarks->metric;
+ scheme->wmarks.interval = sysfs_wmarks->interval_us;
+ scheme->wmarks.high = sysfs_wmarks->high;
+ scheme->wmarks.mid = sysfs_wmarks->mid;
+ scheme->wmarks.low = sysfs_wmarks->low;
+}
+
static int damon_sysfs_set_schemes(struct damon_ctx *ctx,
struct damon_sysfs_schemes *sysfs_schemes)
{
- int i;
+ struct damos *scheme, *next;
+ int i = 0;
+
+ damon_for_each_scheme_safe(scheme, next, ctx) {
+ if (i < sysfs_schemes->nr)
+ damon_sysfs_update_scheme(scheme,
+ sysfs_schemes->schemes_arr[i]);
+ else
+ damon_destroy_scheme(scheme);
+ i++;
+ }
- for (i = 0; i < sysfs_schemes->nr; i++) {
+ for (; i < sysfs_schemes->nr; i++) {
struct damos *scheme, *next;
scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f1385c3b6c96..e36ca75311a5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5206,17 +5206,22 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
__unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags);
- /*
- * Unlock and free the vma lock before releasing i_mmap_rwsem. When
- * the vma_lock is freed, this makes the vma ineligible for pmd
- * sharing. And, i_mmap_rwsem is required to set up pmd sharing.
- * This is important as page tables for this unmapped range will
- * be asynchrously deleted. If the page tables are shared, there
- * will be issues when accessed by someone else.
- */
- __hugetlb_vma_unlock_write_free(vma);
-
- i_mmap_unlock_write(vma->vm_file->f_mapping);
+ if (zap_flags & ZAP_FLAG_UNMAP) { /* final unmap */
+ /*
+ * Unlock and free the vma lock before releasing i_mmap_rwsem.
+ * When the vma_lock is freed, this makes the vma ineligible
+ * for pmd sharing. And, i_mmap_rwsem is required to set up
+ * pmd sharing. This is important as page tables for this
+ * unmapped range will be asynchrously deleted. If the page
+ * tables are shared, there will be issues when accessed by
+ * someone else.
+ */
+ __hugetlb_vma_unlock_write_free(vma);
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ } else {
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ hugetlb_vma_unlock_write(vma);
+ }
}
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index a8d5ef2a77d2..3703a56571c1 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1051,6 +1051,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
_pmd = pmdp_collapse_flush(vma, address, pmd);
spin_unlock(pmd_ptl);
mmu_notifier_invalidate_range_end(&range);
+ tlb_remove_table_sync_one();
spin_lock(pte_ptl);
result = __collapse_huge_page_isolate(vma, address, pte, cc,
@@ -1379,16 +1380,43 @@ static int set_huge_pmd(struct vm_area_struct *vma, unsigned long addr,
return SCAN_SUCCEED;
}
+/*
+ * A note about locking:
+ * Trying to take the page table spinlocks would be useless here because those
+ * are only used to synchronize:
+ *
+ * - modifying terminal entries (ones that point to a data page, not to another
+ * page table)
+ * - installing *new* non-terminal entries
+ *
+ * Instead, we need roughly the same kind of protection as free_pgtables() or
+ * mm_take_all_locks() (but only for a single VMA):
+ * The mmap lock together with this VMA's rmap locks covers all paths towards
+ * the page table entries we're messing with here, except for hardware page
+ * table walks and lockless_pages_from_mm().
+ */
static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
- spinlock_t *ptl;
pmd_t pmd;
+ struct mmu_notifier_range range;
mmap_assert_write_locked(mm);
- ptl = pmd_lock(vma->vm_mm, pmdp);
+ if (vma->vm_file)
+ lockdep_assert_held_write(&vma->vm_file->f_mapping->i_mmap_rwsem);
+ /*
+ * All anon_vmas attached to the VMA have the same root and are
+ * therefore locked by the same lock.
+ */
+ if (vma->anon_vma)
+ lockdep_assert_held_write(&vma->anon_vma->root->rwsem);
+
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, addr,
+ addr + HPAGE_PMD_SIZE);
+ mmu_notifier_invalidate_range_start(&range);
pmd = pmdp_collapse_flush(vma, addr, pmdp);
- spin_unlock(ptl);
+ tlb_remove_table_sync_one();
+ mmu_notifier_invalidate_range_end(&range);
mm_dec_nr_ptes(mm);
page_table_check_pte_clear_range(mm, addr, pmd);
pte_free(mm, pmd_pgtable(pmd));
@@ -1439,6 +1467,14 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
if (!hugepage_vma_check(vma, vma->vm_flags, false, false, false))
return SCAN_VMA_CHECK;
+ /*
+ * Symmetry with retract_page_tables(): Exclude MAP_PRIVATE mappings
+ * that got written to. Without this, we'd have to also lock the
+ * anon_vma if one exists.
+ */
+ if (vma->anon_vma)
+ return SCAN_VMA_CHECK;
+
/* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */
if (userfaultfd_wp(vma))
return SCAN_PTE_UFFD_WP;
@@ -1472,6 +1508,20 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
goto drop_hpage;
}
+ /*
+ * We need to lock the mapping so that from here on, only GUP-fast and
+ * hardware page walks can access the parts of the page tables that
+ * we're operating on.
+ * See collapse_and_free_pmd().
+ */
+ i_mmap_lock_write(vma->vm_file->f_mapping);
+
+ /*
+ * This spinlock should be unnecessary: Nobody else should be accessing
+ * the page tables under spinlock protection here, only
+ * lockless_pages_from_mm() and the hardware page walker can access page
+ * tables while all the high-level locks are held in write mode.
+ */
start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
result = SCAN_FAIL;
@@ -1526,6 +1576,8 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
/* step 4: remove pte entries */
collapse_and_free_pmd(mm, vma, haddr, pmd);
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+
maybe_install_pmd:
/* step 5: install pmd entry */
result = install_pmd
@@ -1539,6 +1591,7 @@ drop_hpage:
abort:
pte_unmap_unlock(start_pte, ptl);
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
goto drop_hpage;
}
@@ -1595,7 +1648,8 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
* An alternative would be drop the check, but check that page
* table is clear before calling pmdp_collapse_flush() under
* ptl. It has higher chance to recover THP for the VMA, but
- * has higher cost too.
+ * has higher cost too. It would also probably require locking
+ * the anon_vma.
*/
if (vma->anon_vma) {
result = SCAN_PAGE_ANON;
diff --git a/mm/madvise.c b/mm/madvise.c
index c7105ec6d08c..b913ba6efc10 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -772,8 +772,8 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
* Application no longer needs these pages. If the pages are dirty,
* it's OK to just throw them away. The app will be more careful about
* data it wants to keep. Be sure to free swap resources too. The
- * zap_page_range call sets things up for shrink_active_list to actually free
- * these pages later if no one else has touched them in the meantime,
+ * zap_page_range_single call sets things up for shrink_active_list to actually
+ * free these pages later if no one else has touched them in the meantime,
* although we could add these pages to a global reuse list for
* shrink_active_list to pick up before reclaiming other pages.
*
@@ -790,7 +790,7 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
static long madvise_dontneed_single_vma(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
- zap_page_range(vma, start, end - start);
+ zap_page_range_single(vma, start, end - start, NULL);
return 0;
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a1a35c12635e..266a1ab05434 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4832,6 +4832,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
unsigned int efd, cfd;
struct fd efile;
struct fd cfile;
+ struct dentry *cdentry;
const char *name;
char *endp;
int ret;
@@ -4886,6 +4887,16 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
goto out_put_cfile;
/*
+ * The control file must be a regular cgroup1 file. As a regular cgroup
+ * file can't be renamed, it's safe to access its name afterwards.
+ */
+ cdentry = cfile.file->f_path.dentry;
+ if (cdentry->d_sb->s_type != &cgroup_fs_type || !d_is_reg(cdentry)) {
+ ret = -EINVAL;
+ goto out_put_cfile;
+ }
+
+ /*
* Determine the event callbacks and set them in @event. This used
* to be done via struct cftype but cgroup core no longer knows
* about these events. The following is crude but the whole thing
@@ -4893,7 +4904,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
*
* DO NOT ADD NEW FILES.
*/
- name = cfile.file->f_path.dentry->d_name.name;
+ name = cdentry->d_name.name;
if (!strcmp(name, "memory.usage_in_bytes")) {
event->register_event = mem_cgroup_usage_register_event;
@@ -4917,7 +4928,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
* automatically removed on cgroup destruction but the removal is
* asynchronous, so take an extra ref on @css.
*/
- cfile_css = css_tryget_online_from_dir(cfile.file->f_path.dentry->d_parent,
+ cfile_css = css_tryget_online_from_dir(cdentry->d_parent,
&memory_cgrp_subsys);
ret = -EINVAL;
if (IS_ERR(cfile_css))
diff --git a/mm/memory.c b/mm/memory.c
index 8a6d5c823f91..8c8420934d60 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1341,15 +1341,6 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
return ret;
}
-/*
- * Parameter block passed down to zap_pte_range in exceptional cases.
- */
-struct zap_details {
- struct folio *single_folio; /* Locked folio to be unmapped */
- bool even_cows; /* Zap COWed private pages too? */
- zap_flags_t zap_flags; /* Extra flags for zapping */
-};
-
/* Whether we should zap all COWed (private) pages too */
static inline bool should_zap_cows(struct zap_details *details)
{
@@ -1720,7 +1711,7 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
{
struct mmu_notifier_range range;
struct zap_details details = {
- .zap_flags = ZAP_FLAG_DROP_MARKER,
+ .zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP,
/* Careful - we need to zap private pages too! */
.even_cows = true,
};
@@ -1774,19 +1765,27 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
*
* The range must fit into one VMA.
*/
-static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
+void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
unsigned long size, struct zap_details *details)
{
+ const unsigned long end = address + size;
struct mmu_notifier_range range;
struct mmu_gather tlb;
lru_add_drain();
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
- address, address + size);
+ address, end);
+ if (is_vm_hugetlb_page(vma))
+ adjust_range_if_pmd_sharing_possible(vma, &range.start,
+ &range.end);
tlb_gather_mmu(&tlb, vma->vm_mm);
update_hiwater_rss(vma->vm_mm);
mmu_notifier_invalidate_range_start(&range);
- unmap_single_vma(&tlb, vma, address, range.end, details);
+ /*
+ * unmap 'address-end' not 'range.start-range.end' as range
+ * could have been expanded for hugetlb pmd sharing.
+ */
+ unmap_single_vma(&tlb, vma, address, end, details);
mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb);
}
diff --git a/mm/mmap.c b/mm/mmap.c
index 74a84eb33b90..a5eb2f175da0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1779,9 +1779,6 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
*/
pgoff = 0;
get_area = shmem_get_unmapped_area;
- } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
- /* Ensures that larger anonymous mappings are THP aligned. */
- get_area = thp_get_unmapped_area;
}
addr = get_area(file, addr, len, pgoff, flags);
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index add4244e5790..3a2c3f8cad2f 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -153,7 +153,7 @@ static void tlb_remove_table_smp_sync(void *arg)
/* Simply deliver the interrupt */
}
-static void tlb_remove_table_sync_one(void)
+void tlb_remove_table_sync_one(void)
{
/*
* This isn't an RCU grace period and hence the page-tables cannot be
@@ -177,8 +177,6 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch)
#else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */
-static void tlb_remove_table_sync_one(void) { }
-
static void tlb_remove_table_free(struct mmu_table_batch *batch)
{
__tlb_remove_table_free(batch);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 026199c047e0..8fcc5fa768c0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3987,7 +3987,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area
goto next;
if (!pmd_trans_huge(pmd[i])) {
- if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) &&
+ if (arch_has_hw_nonleaf_pmd_young() &&
get_cap(LRU_GEN_NONLEAF_YOUNG))
pmdp_test_and_clear_young(vma, addr, pmd + i);
goto next;
@@ -4085,14 +4085,14 @@ restart:
#endif
walk->mm_stats[MM_NONLEAF_TOTAL]++;
-#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
- if (get_cap(LRU_GEN_NONLEAF_YOUNG)) {
+ if (arch_has_hw_nonleaf_pmd_young() &&
+ get_cap(LRU_GEN_NONLEAF_YOUNG)) {
if (!pmd_young(val))
continue;
walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
}
-#endif
+
if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
continue;
@@ -5392,7 +5392,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c
if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))
caps |= BIT(LRU_GEN_MM_WALK);
- if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG))
+ if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG))
caps |= BIT(LRU_GEN_NONLEAF_YOUNG);
return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 215af9b3b589..c57d643afb10 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -972,6 +972,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type,
hci_dev_lock(hdev);
hcon = hci_conn_hash_lookup_le(hdev, addr, *addr_type);
hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
if (!hcon)
return -ENOENT;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index dc65974f5adb..1c3c7ff5c3c6 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -737,7 +737,7 @@ static int __init bt_init(void)
err = bt_sysfs_init();
if (err < 0)
- return err;
+ goto cleanup_led;
err = sock_register(&bt_sock_family_ops);
if (err)
@@ -773,6 +773,8 @@ unregister_socket:
sock_unregister(PF_BLUETOOTH);
cleanup_sysfs:
bt_sysfs_cleanup();
+cleanup_led:
+ bt_leds_cleanup();
return err;
}
diff --git a/net/bluetooth/hci_codec.c b/net/bluetooth/hci_codec.c
index 38201532f58e..3cc135bb1d30 100644
--- a/net/bluetooth/hci_codec.c
+++ b/net/bluetooth/hci_codec.c
@@ -72,9 +72,8 @@ static void hci_read_codec_capabilities(struct hci_dev *hdev, __u8 transport,
continue;
}
- skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODEC_CAPS,
- sizeof(*cmd), cmd,
- HCI_CMD_TIMEOUT);
+ skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODEC_CAPS,
+ sizeof(*cmd), cmd, 0, HCI_CMD_TIMEOUT, NULL);
if (IS_ERR(skb)) {
bt_dev_err(hdev, "Failed to read codec capabilities (%ld)",
PTR_ERR(skb));
@@ -127,8 +126,8 @@ void hci_read_supported_codecs(struct hci_dev *hdev)
struct hci_op_read_local_codec_caps caps;
__u8 i;
- skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL,
- HCI_CMD_TIMEOUT);
+ skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL,
+ 0, HCI_CMD_TIMEOUT, NULL);
if (IS_ERR(skb)) {
bt_dev_err(hdev, "Failed to read local supported codecs (%ld)",
@@ -158,7 +157,8 @@ void hci_read_supported_codecs(struct hci_dev *hdev)
for (i = 0; i < std_codecs->num; i++) {
caps.id = std_codecs->codec[i];
caps.direction = 0x00;
- hci_read_codec_capabilities(hdev, LOCAL_CODEC_ACL_MASK, &caps);
+ hci_read_codec_capabilities(hdev,
+ LOCAL_CODEC_ACL_MASK | LOCAL_CODEC_SCO_MASK, &caps);
}
skb_pull(skb, flex_array_size(std_codecs, codec, std_codecs->num)
@@ -178,7 +178,8 @@ void hci_read_supported_codecs(struct hci_dev *hdev)
caps.cid = vnd_codecs->codec[i].cid;
caps.vid = vnd_codecs->codec[i].vid;
caps.direction = 0x00;
- hci_read_codec_capabilities(hdev, LOCAL_CODEC_ACL_MASK, &caps);
+ hci_read_codec_capabilities(hdev,
+ LOCAL_CODEC_ACL_MASK | LOCAL_CODEC_SCO_MASK, &caps);
}
error:
@@ -194,8 +195,8 @@ void hci_read_supported_codecs_v2(struct hci_dev *hdev)
struct hci_op_read_local_codec_caps caps;
__u8 i;
- skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODECS_V2, 0, NULL,
- HCI_CMD_TIMEOUT);
+ skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODECS_V2, 0, NULL,
+ 0, HCI_CMD_TIMEOUT, NULL);
if (IS_ERR(skb)) {
bt_dev_err(hdev, "Failed to read local supported codecs (%ld)",
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 0540555b3704..d97fac4f7130 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2764,7 +2764,8 @@ int hci_register_suspend_notifier(struct hci_dev *hdev)
{
int ret = 0;
- if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) {
+ if (!hdev->suspend_notifier.notifier_call &&
+ !test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) {
hdev->suspend_notifier.notifier_call = hci_suspend_notifier;
ret = register_pm_notifier(&hdev->suspend_notifier);
}
@@ -2776,8 +2777,11 @@ int hci_unregister_suspend_notifier(struct hci_dev *hdev)
{
int ret = 0;
- if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks))
+ if (hdev->suspend_notifier.notifier_call) {
ret = unregister_pm_notifier(&hdev->suspend_notifier);
+ if (!ret)
+ hdev->suspend_notifier.notifier_call = NULL;
+ }
return ret;
}
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 5a0296a4352e..f7e006a36382 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -269,7 +269,7 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
const void *param)
{
- bt_dev_err(req->hdev, "HCI_REQ-0x%4.4x", opcode);
+ bt_dev_dbg(req->hdev, "HCI_REQ-0x%4.4x", opcode);
hci_req_add_ev(req, opcode, plen, param, 0);
}
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 76c3107c9f91..1fc693122a47 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -12,6 +12,7 @@
#include <net/bluetooth/mgmt.h>
#include "hci_request.h"
+#include "hci_codec.h"
#include "hci_debugfs.h"
#include "smp.h"
#include "eir.h"
@@ -3780,7 +3781,8 @@ static int hci_read_page_scan_activity_sync(struct hci_dev *hdev)
static int hci_read_def_err_data_reporting_sync(struct hci_dev *hdev)
{
if (!(hdev->commands[18] & 0x04) ||
- !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING))
+ !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) ||
+ test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks))
return 0;
return __hci_cmd_sync_status(hdev, HCI_OP_READ_DEF_ERR_DATA_REPORTING,
@@ -4238,11 +4240,12 @@ static int hci_set_event_mask_page_2_sync(struct hci_dev *hdev)
/* Read local codec list if the HCI command is supported */
static int hci_read_local_codecs_sync(struct hci_dev *hdev)
{
- if (!(hdev->commands[29] & 0x20))
- return 0;
+ if (hdev->commands[45] & 0x04)
+ hci_read_supported_codecs_v2(hdev);
+ else if (hdev->commands[29] & 0x20)
+ hci_read_supported_codecs(hdev);
- return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL,
- HCI_CMD_TIMEOUT);
+ return 0;
}
/* Read local pairing options if the HCI command is supported */
@@ -4298,7 +4301,8 @@ static int hci_set_err_data_report_sync(struct hci_dev *hdev)
bool enabled = hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED);
if (!(hdev->commands[18] & 0x08) ||
- !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING))
+ !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) ||
+ test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks))
return 0;
if (enabled == hdev->err_data_reporting)
@@ -4457,6 +4461,9 @@ static const struct {
HCI_QUIRK_BROKEN(STORED_LINK_KEY,
"HCI Delete Stored Link Key command is advertised, "
"but not supported."),
+ HCI_QUIRK_BROKEN(ERR_DATA_REPORTING,
+ "HCI Read Default Erroneous Data Reporting command is "
+ "advertised, but not supported."),
HCI_QUIRK_BROKEN(READ_TRANSMIT_POWER,
"HCI Read Transmit Power Level command is advertised, "
"but not supported."),
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index f825857db6d0..26db929b97c4 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -879,6 +879,7 @@ static int iso_listen_bis(struct sock *sk)
iso_pi(sk)->bc_sid);
hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
return err;
}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 9c24947aa41e..9fdede5fe71c 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -4453,7 +4453,8 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
chan->ident = cmd->ident;
l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp);
- chan->num_conf_rsp++;
+ if (chan->num_conf_rsp < L2CAP_CONF_MAX_CONF_RSP)
+ chan->num_conf_rsp++;
/* Reset config buffer. */
chan->conf_len = 0;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 6094ef7cffcd..c9bfd263dcef 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -1128,7 +1128,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
}
sock_init_data(NULL, sk);
- skb = build_skb(data, 0);
+ skb = slab_build_skb(data);
if (!skb) {
kfree(data);
kfree(ctx);
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 321be94c445a..ae7d93c08880 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -754,73 +754,6 @@ static const struct nla_policy br_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = {
sizeof(struct in6_addr)),
};
-static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct net_device **pdev, struct br_mdb_entry **pentry,
- struct nlattr **mdb_attrs, struct netlink_ext_ack *extack)
-{
- struct net *net = sock_net(skb->sk);
- struct br_mdb_entry *entry;
- struct br_port_msg *bpm;
- struct nlattr *tb[MDBA_SET_ENTRY_MAX+1];
- struct net_device *dev;
- int err;
-
- err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb,
- MDBA_SET_ENTRY_MAX, NULL, NULL);
- if (err < 0)
- return err;
-
- bpm = nlmsg_data(nlh);
- if (bpm->ifindex == 0) {
- NL_SET_ERR_MSG_MOD(extack, "Invalid bridge ifindex");
- return -EINVAL;
- }
-
- dev = __dev_get_by_index(net, bpm->ifindex);
- if (dev == NULL) {
- NL_SET_ERR_MSG_MOD(extack, "Bridge device doesn't exist");
- return -ENODEV;
- }
-
- if (!netif_is_bridge_master(dev)) {
- NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge");
- return -EOPNOTSUPP;
- }
-
- *pdev = dev;
-
- if (!tb[MDBA_SET_ENTRY]) {
- NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY attribute");
- return -EINVAL;
- }
- if (nla_len(tb[MDBA_SET_ENTRY]) != sizeof(struct br_mdb_entry)) {
- NL_SET_ERR_MSG_MOD(extack, "Invalid MDBA_SET_ENTRY attribute length");
- return -EINVAL;
- }
-
- entry = nla_data(tb[MDBA_SET_ENTRY]);
- if (!is_valid_mdb_entry(entry, extack))
- return -EINVAL;
- *pentry = entry;
-
- if (tb[MDBA_SET_ENTRY_ATTRS]) {
- err = nla_parse_nested(mdb_attrs, MDBE_ATTR_MAX,
- tb[MDBA_SET_ENTRY_ATTRS],
- br_mdbe_attrs_pol, extack);
- if (err)
- return err;
- if (mdb_attrs[MDBE_ATTR_SOURCE] &&
- !is_valid_mdb_source(mdb_attrs[MDBE_ATTR_SOURCE],
- entry->addr.proto, extack))
- return -EINVAL;
- } else {
- memset(mdb_attrs, 0,
- sizeof(struct nlattr *) * (MDBE_ATTR_MAX + 1));
- }
-
- return 0;
-}
-
static struct net_bridge_mcast *
__br_mdb_choose_context(struct net_bridge *br,
const struct br_mdb_entry *entry,
@@ -853,44 +786,26 @@ out:
return brmctx;
}
-static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
- struct br_mdb_entry *entry,
- struct nlattr **mdb_attrs,
+static int br_mdb_add_group(const struct br_mdb_config *cfg,
struct netlink_ext_ack *extack)
{
struct net_bridge_mdb_entry *mp, *star_mp;
struct net_bridge_port_group __rcu **pp;
+ struct br_mdb_entry *entry = cfg->entry;
+ struct net_bridge_port *port = cfg->p;
+ struct net_bridge *br = cfg->br;
struct net_bridge_port_group *p;
struct net_bridge_mcast *brmctx;
- struct br_ip group, star_group;
+ struct br_ip group = cfg->group;
unsigned long now = jiffies;
unsigned char flags = 0;
+ struct br_ip star_group;
u8 filter_mode;
- __mdb_entry_to_br_ip(entry, &group, mdb_attrs);
-
brmctx = __br_mdb_choose_context(br, entry, extack);
if (!brmctx)
return -EINVAL;
- /* host join errors which can happen before creating the group */
- if (!port && !br_group_is_l2(&group)) {
- /* don't allow any flags for host-joined IP groups */
- if (entry->state) {
- NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups");
- return -EINVAL;
- }
- if (!br_multicast_is_star_g(&group)) {
- NL_SET_ERR_MSG_MOD(extack, "Groups with sources cannot be manually host joined");
- return -EINVAL;
- }
- }
-
- if (br_group_is_l2(&group) && entry->state != MDB_PERMANENT) {
- NL_SET_ERR_MSG_MOD(extack, "Only permanent L2 entries allowed");
- return -EINVAL;
- }
-
mp = br_multicast_new_group(br, &group);
if (IS_ERR(mp))
return PTR_ERR(mp);
@@ -959,107 +874,197 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
return 0;
}
-static int __br_mdb_add(struct net *net, struct net_bridge *br,
- struct net_bridge_port *p,
- struct br_mdb_entry *entry,
- struct nlattr **mdb_attrs,
+static int __br_mdb_add(const struct br_mdb_config *cfg,
struct netlink_ext_ack *extack)
{
int ret;
- spin_lock_bh(&br->multicast_lock);
- ret = br_mdb_add_group(br, p, entry, mdb_attrs, extack);
- spin_unlock_bh(&br->multicast_lock);
+ spin_lock_bh(&cfg->br->multicast_lock);
+ ret = br_mdb_add_group(cfg, extack);
+ spin_unlock_bh(&cfg->br->multicast_lock);
return ret;
}
-static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
+static int br_mdb_config_attrs_init(struct nlattr *set_attrs,
+ struct br_mdb_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct nlattr *mdb_attrs[MDBE_ATTR_MAX + 1];
- struct net *net = sock_net(skb->sk);
- struct net_bridge_vlan_group *vg;
- struct net_bridge_port *p = NULL;
- struct net_device *dev, *pdev;
- struct br_mdb_entry *entry;
- struct net_bridge_vlan *v;
- struct net_bridge *br;
int err;
- err = br_mdb_parse(skb, nlh, &dev, &entry, mdb_attrs, extack);
- if (err < 0)
+ err = nla_parse_nested(mdb_attrs, MDBE_ATTR_MAX, set_attrs,
+ br_mdbe_attrs_pol, extack);
+ if (err)
return err;
- br = netdev_priv(dev);
+ if (mdb_attrs[MDBE_ATTR_SOURCE] &&
+ !is_valid_mdb_source(mdb_attrs[MDBE_ATTR_SOURCE],
+ cfg->entry->addr.proto, extack))
+ return -EINVAL;
+
+ __mdb_entry_to_br_ip(cfg->entry, &cfg->group, mdb_attrs);
- if (!netif_running(br->dev)) {
+ return 0;
+}
+
+static int br_mdb_config_init(struct net *net, const struct nlmsghdr *nlh,
+ struct br_mdb_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1];
+ struct br_port_msg *bpm;
+ struct net_device *dev;
+ int err;
+
+ err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb,
+ MDBA_SET_ENTRY_MAX, NULL, extack);
+ if (err)
+ return err;
+
+ memset(cfg, 0, sizeof(*cfg));
+
+ bpm = nlmsg_data(nlh);
+ if (!bpm->ifindex) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid bridge ifindex");
+ return -EINVAL;
+ }
+
+ dev = __dev_get_by_index(net, bpm->ifindex);
+ if (!dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Bridge device doesn't exist");
+ return -ENODEV;
+ }
+
+ if (!netif_is_bridge_master(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge");
+ return -EOPNOTSUPP;
+ }
+
+ cfg->br = netdev_priv(dev);
+
+ if (!netif_running(cfg->br->dev)) {
NL_SET_ERR_MSG_MOD(extack, "Bridge device is not running");
return -EINVAL;
}
- if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) {
+ if (!br_opt_get(cfg->br, BROPT_MULTICAST_ENABLED)) {
NL_SET_ERR_MSG_MOD(extack, "Bridge's multicast processing is disabled");
return -EINVAL;
}
- if (entry->ifindex != br->dev->ifindex) {
- pdev = __dev_get_by_index(net, entry->ifindex);
+ if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY attribute");
+ return -EINVAL;
+ }
+ if (nla_len(tb[MDBA_SET_ENTRY]) != sizeof(struct br_mdb_entry)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid MDBA_SET_ENTRY attribute length");
+ return -EINVAL;
+ }
+
+ cfg->entry = nla_data(tb[MDBA_SET_ENTRY]);
+ if (!is_valid_mdb_entry(cfg->entry, extack))
+ return -EINVAL;
+
+ if (cfg->entry->ifindex != cfg->br->dev->ifindex) {
+ struct net_device *pdev;
+
+ pdev = __dev_get_by_index(net, cfg->entry->ifindex);
if (!pdev) {
NL_SET_ERR_MSG_MOD(extack, "Port net device doesn't exist");
return -ENODEV;
}
- p = br_port_get_rtnl(pdev);
- if (!p) {
+ cfg->p = br_port_get_rtnl(pdev);
+ if (!cfg->p) {
NL_SET_ERR_MSG_MOD(extack, "Net device is not a bridge port");
return -EINVAL;
}
- if (p->br != br) {
+ if (cfg->p->br != cfg->br) {
NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device");
return -EINVAL;
}
- if (p->state == BR_STATE_DISABLED && entry->state != MDB_PERMANENT) {
+ }
+
+ if (tb[MDBA_SET_ENTRY_ATTRS])
+ return br_mdb_config_attrs_init(tb[MDBA_SET_ENTRY_ATTRS], cfg,
+ extack);
+ else
+ __mdb_entry_to_br_ip(cfg->entry, &cfg->group, NULL);
+
+ return 0;
+}
+
+static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *v;
+ struct br_mdb_config cfg;
+ int err;
+
+ err = br_mdb_config_init(net, nlh, &cfg, extack);
+ if (err)
+ return err;
+
+ /* host join errors which can happen before creating the group */
+ if (!cfg.p && !br_group_is_l2(&cfg.group)) {
+ /* don't allow any flags for host-joined IP groups */
+ if (cfg.entry->state) {
+ NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups");
+ return -EINVAL;
+ }
+ if (!br_multicast_is_star_g(&cfg.group)) {
+ NL_SET_ERR_MSG_MOD(extack, "Groups with sources cannot be manually host joined");
+ return -EINVAL;
+ }
+ }
+
+ if (br_group_is_l2(&cfg.group) && cfg.entry->state != MDB_PERMANENT) {
+ NL_SET_ERR_MSG_MOD(extack, "Only permanent L2 entries allowed");
+ return -EINVAL;
+ }
+
+ if (cfg.p) {
+ if (cfg.p->state == BR_STATE_DISABLED && cfg.entry->state != MDB_PERMANENT) {
NL_SET_ERR_MSG_MOD(extack, "Port is in disabled state and entry is not permanent");
return -EINVAL;
}
- vg = nbp_vlan_group(p);
+ vg = nbp_vlan_group(cfg.p);
} else {
- vg = br_vlan_group(br);
+ vg = br_vlan_group(cfg.br);
}
/* If vlan filtering is enabled and VLAN is not specified
* install mdb entry on all vlans configured on the port.
*/
- if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
+ if (br_vlan_enabled(cfg.br->dev) && vg && cfg.entry->vid == 0) {
list_for_each_entry(v, &vg->vlan_list, vlist) {
- entry->vid = v->vid;
- err = __br_mdb_add(net, br, p, entry, mdb_attrs, extack);
+ cfg.entry->vid = v->vid;
+ cfg.group.vid = v->vid;
+ err = __br_mdb_add(&cfg, extack);
if (err)
break;
}
} else {
- err = __br_mdb_add(net, br, p, entry, mdb_attrs, extack);
+ err = __br_mdb_add(&cfg, extack);
}
return err;
}
-static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry,
- struct nlattr **mdb_attrs)
+static int __br_mdb_del(const struct br_mdb_config *cfg)
{
+ struct br_mdb_entry *entry = cfg->entry;
+ struct net_bridge *br = cfg->br;
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
- struct br_ip ip;
+ struct br_ip ip = cfg->group;
int err = -EINVAL;
- if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
- return -EINVAL;
-
- __mdb_entry_to_br_ip(entry, &ip, mdb_attrs);
-
spin_lock_bh(&br->multicast_lock);
mp = br_mdb_ip_get(br, &ip);
if (!mp)
@@ -1094,51 +1099,32 @@ unlock:
static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
- struct nlattr *mdb_attrs[MDBE_ATTR_MAX + 1];
struct net *net = sock_net(skb->sk);
struct net_bridge_vlan_group *vg;
- struct net_bridge_port *p = NULL;
- struct net_device *dev, *pdev;
- struct br_mdb_entry *entry;
struct net_bridge_vlan *v;
- struct net_bridge *br;
+ struct br_mdb_config cfg;
int err;
- err = br_mdb_parse(skb, nlh, &dev, &entry, mdb_attrs, extack);
- if (err < 0)
+ err = br_mdb_config_init(net, nlh, &cfg, extack);
+ if (err)
return err;
- br = netdev_priv(dev);
-
- if (entry->ifindex != br->dev->ifindex) {
- pdev = __dev_get_by_index(net, entry->ifindex);
- if (!pdev)
- return -ENODEV;
-
- p = br_port_get_rtnl(pdev);
- if (!p) {
- NL_SET_ERR_MSG_MOD(extack, "Net device is not a bridge port");
- return -EINVAL;
- }
- if (p->br != br) {
- NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device");
- return -EINVAL;
- }
- vg = nbp_vlan_group(p);
- } else {
- vg = br_vlan_group(br);
- }
+ if (cfg.p)
+ vg = nbp_vlan_group(cfg.p);
+ else
+ vg = br_vlan_group(cfg.br);
/* If vlan filtering is enabled and VLAN is not specified
* delete mdb entry on all vlans configured on the port.
*/
- if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
+ if (br_vlan_enabled(cfg.br->dev) && vg && cfg.entry->vid == 0) {
list_for_each_entry(v, &vg->vlan_list, vlist) {
- entry->vid = v->vid;
- err = __br_mdb_del(br, entry, mdb_attrs);
+ cfg.entry->vid = v->vid;
+ cfg.group.vid = v->vid;
+ err = __br_mdb_del(&cfg);
}
} else {
- err = __br_mdb_del(br, entry, mdb_attrs);
+ err = __br_mdb_del(&cfg);
}
return err;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 5e988f0ed2c0..db4c3900ae95 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1273,7 +1273,7 @@ br_multicast_new_group_src(struct net_bridge_port_group *pg, struct br_ip *src_i
struct net_bridge_port_group *br_multicast_new_port_group(
struct net_bridge_port *port,
- struct br_ip *group,
+ const struct br_ip *group,
struct net_bridge_port_group __rcu *next,
unsigned char flags,
const unsigned char *src,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 4c4fda930068..3997e16c15fc 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -92,6 +92,13 @@ struct bridge_mcast_stats {
struct br_mcast_stats mstats;
struct u64_stats_sync syncp;
};
+
+struct br_mdb_config {
+ struct net_bridge *br;
+ struct net_bridge_port *p;
+ struct br_mdb_entry *entry;
+ struct br_ip group;
+};
#endif
/* net_bridge_mcast_port must be always defined due to forwarding stubs */
@@ -934,7 +941,8 @@ br_mdb_ip_get(struct net_bridge *br, struct br_ip *dst);
struct net_bridge_mdb_entry *
br_multicast_new_group(struct net_bridge *br, struct br_ip *group);
struct net_bridge_port_group *
-br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group,
+br_multicast_new_port_group(struct net_bridge_port *port,
+ const struct br_ip *group,
struct net_bridge_port_group __rcu *next,
unsigned char flags, const unsigned char *src,
u8 filter_mode, u8 rt_protocol);
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 27dcdcc0b808..c69168f11e44 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -677,7 +677,7 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
static int can_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
- if (unlikely(dev->type != ARPHRD_CAN || (!can_is_can_skb(skb)))) {
+ if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || !can_is_can_skb(skb))) {
pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n",
dev->type, skb->len);
@@ -692,7 +692,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
- if (unlikely(dev->type != ARPHRD_CAN || (!can_is_canfd_skb(skb)))) {
+ if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || !can_is_canfd_skb(skb))) {
pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n",
dev->type, skb->len);
@@ -707,7 +707,7 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
static int canxl_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
- if (unlikely(dev->type != ARPHRD_CAN || (!can_is_canxl_skb(skb)))) {
+ if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || !can_is_canxl_skb(skb))) {
pr_warn_once("PF_CAN: dropped non conform CAN XL skbuff: dev type %d, len %d\n",
dev->type, skb->len);
diff --git a/net/core/dev.c b/net/core/dev.c
index 7627c475d991..b76fb37b381e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10517,6 +10517,22 @@ void netdev_set_default_ethtool_ops(struct net_device *dev,
}
EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
+/**
+ * netdev_sw_irq_coalesce_default_on() - enable SW IRQ coalescing by default
+ * @dev: netdev to enable the IRQ coalescing on
+ *
+ * Sets a conservative default for SW IRQ coalescing. Users can use
+ * sysfs attributes to override the default values.
+ */
+void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
+{
+ WARN_ON(dev->reg_state == NETREG_REGISTERED);
+
+ dev->gro_flush_timeout = 20000;
+ dev->napi_defer_hard_irqs = 1;
+}
+EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);
+
void netdev_freemem(struct net_device *dev)
{
char *addr = (char *)dev - dev->padded;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 907df7124157..6004bd0ccee4 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -195,11 +195,16 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
+#define DEVLINK_PORT_FN_CAPS_VALID_MASK \
+ (_BITUL(__DEVLINK_PORT_FN_ATTR_CAPS_MAX) - 1)
+
static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {
[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY },
[DEVLINK_PORT_FN_ATTR_STATE] =
NLA_POLICY_RANGE(NLA_U8, DEVLINK_PORT_FN_STATE_INACTIVE,
DEVLINK_PORT_FN_STATE_ACTIVE),
+ [DEVLINK_PORT_FN_ATTR_CAPS] =
+ NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
};
static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = {
@@ -680,6 +685,87 @@ devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb,
return 0;
}
+static void devlink_port_fn_cap_fill(struct nla_bitfield32 *caps,
+ u32 cap, bool is_enable)
+{
+ caps->selector |= cap;
+ if (is_enable)
+ caps->value |= cap;
+}
+
+static int devlink_port_fn_roce_fill(const struct devlink_ops *ops,
+ struct devlink_port *devlink_port,
+ struct nla_bitfield32 *caps,
+ struct netlink_ext_ack *extack)
+{
+ bool is_enable;
+ int err;
+
+ if (!ops->port_fn_roce_get)
+ return 0;
+
+ err = ops->port_fn_roce_get(devlink_port, &is_enable, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_ROCE, is_enable);
+ return 0;
+}
+
+static int devlink_port_fn_migratable_fill(const struct devlink_ops *ops,
+ struct devlink_port *devlink_port,
+ struct nla_bitfield32 *caps,
+ struct netlink_ext_ack *extack)
+{
+ bool is_enable;
+ int err;
+
+ if (!ops->port_fn_migratable_get ||
+ devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
+ return 0;
+
+ err = ops->port_fn_migratable_get(devlink_port, &is_enable, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_MIGRATABLE, is_enable);
+ return 0;
+}
+
+static int devlink_port_fn_caps_fill(const struct devlink_ops *ops,
+ struct devlink_port *devlink_port,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ bool *msg_updated)
+{
+ struct nla_bitfield32 caps = {};
+ int err;
+
+ err = devlink_port_fn_roce_fill(ops, devlink_port, &caps, extack);
+ if (err)
+ return err;
+
+ err = devlink_port_fn_migratable_fill(ops, devlink_port, &caps, extack);
+ if (err)
+ return err;
+
+ if (!caps.selector)
+ return 0;
+ err = nla_put_bitfield32(msg, DEVLINK_PORT_FN_ATTR_CAPS, caps.value,
+ caps.selector);
+ if (err)
+ return err;
+
+ *msg_updated = true;
+ return 0;
+}
+
static int
devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
struct genl_info *info,
@@ -1264,6 +1350,51 @@ static int devlink_port_fn_state_fill(const struct devlink_ops *ops,
}
static int
+devlink_port_fn_mig_set(struct devlink_port *devlink_port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+ return ops->port_fn_migratable_set(devlink_port, enable, extack);
+}
+
+static int
+devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+ return ops->port_fn_roce_set(devlink_port, enable, extack);
+}
+
+static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nla_bitfield32 caps;
+ u32 caps_value;
+ int err;
+
+ caps = nla_get_bitfield32(attr);
+ caps_value = caps.value & caps.selector;
+ if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE) {
+ err = devlink_port_fn_roce_set(devlink_port,
+ caps_value & DEVLINK_PORT_FN_CAP_ROCE,
+ extack);
+ if (err)
+ return err;
+ }
+ if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) {
+ err = devlink_port_fn_mig_set(devlink_port, caps_value &
+ DEVLINK_PORT_FN_CAP_MIGRATABLE,
+ extack);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int
devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
struct netlink_ext_ack *extack)
{
@@ -1281,6 +1412,10 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
&msg_updated);
if (err)
goto out;
+ err = devlink_port_fn_caps_fill(ops, port, msg, extack,
+ &msg_updated);
+ if (err)
+ goto out;
err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated);
out:
if (err || !msg_updated)
@@ -1632,11 +1767,6 @@ static int devlink_port_function_hw_addr_set(struct devlink_port *port,
}
}
- if (!ops->port_function_hw_addr_set) {
- NL_SET_ERR_MSG_MOD(extack, "Port doesn't support function attributes");
- return -EOPNOTSUPP;
- }
-
return ops->port_function_hw_addr_set(port, hw_addr, hw_addr_len,
extack);
}
@@ -1650,12 +1780,52 @@ static int devlink_port_fn_state_set(struct devlink_port *port,
state = nla_get_u8(attr);
ops = port->devlink->ops;
- if (!ops->port_fn_state_set) {
- NL_SET_ERR_MSG_MOD(extack,
- "Function does not support state setting");
+ return ops->port_fn_state_set(port, state, extack);
+}
+
+static int devlink_port_function_validate(struct devlink_port *devlink_port,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ const struct devlink_ops *ops = devlink_port->devlink->ops;
+ struct nlattr *attr;
+
+ if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] &&
+ !ops->port_function_hw_addr_set) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
+ "Port doesn't support function attributes");
return -EOPNOTSUPP;
}
- return ops->port_fn_state_set(port, state, extack);
+ if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
+ "Function does not support state setting");
+ return -EOPNOTSUPP;
+ }
+ attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
+ if (attr) {
+ struct nla_bitfield32 caps;
+
+ caps = nla_get_bitfield32(attr);
+ if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE &&
+ !ops->port_fn_roce_set) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Port doesn't support RoCE function attribute");
+ return -EOPNOTSUPP;
+ }
+ if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) {
+ if (!ops->port_fn_migratable_set) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Port doesn't support migratable function attribute");
+ return -EOPNOTSUPP;
+ }
+ if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "migratable function attribute supported for VFs only");
+ return -EOPNOTSUPP;
+ }
+ }
+ }
+ return 0;
}
static int devlink_port_function_set(struct devlink_port *port,
@@ -1672,12 +1842,24 @@ static int devlink_port_function_set(struct devlink_port *port,
return err;
}
+ err = devlink_port_function_validate(port, tb, extack);
+ if (err)
+ return err;
+
attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR];
if (attr) {
err = devlink_port_function_hw_addr_set(port, attr, extack);
if (err)
return err;
}
+
+ attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
+ if (attr) {
+ err = devlink_port_fn_caps_set(port, attr, extack);
+ if (err)
+ return err;
+ }
+
/* Keep this as the last function attribute set, so that when
* multiple port function attributes are set along with state,
* Those can be applied first before activating the state.
@@ -4259,9 +4441,10 @@ static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_ID, resource->id,
DEVLINK_ATTR_PAD))
goto nla_put_failure;
- if (resource->size != resource->size_new)
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
- resource->size_new, DEVLINK_ATTR_PAD);
+ if (resource->size != resource->size_new &&
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
+ resource->size_new, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
if (devlink_resource_occ_put(resource, skb))
goto nla_put_failure;
if (devlink_resource_size_params_put(resource, skb))
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4bf95e36ed16..3cbba7099c0f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -270,12 +270,10 @@ static struct sk_buff *napi_skb_cache_get(void)
return skb;
}
-/* Caller must provide SKB that is memset cleared */
-static void __build_skb_around(struct sk_buff *skb, void *data,
- unsigned int frag_size)
+static inline void __finalize_skb_around(struct sk_buff *skb, void *data,
+ unsigned int size)
{
struct skb_shared_info *shinfo;
- unsigned int size = frag_size ? : ksize(data);
size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
@@ -297,15 +295,71 @@ static void __build_skb_around(struct sk_buff *skb, void *data,
skb_set_kcov_handle(skb, kcov_common_handle());
}
+static inline void *__slab_build_skb(struct sk_buff *skb, void *data,
+ unsigned int *size)
+{
+ void *resized;
+
+ /* Must find the allocation size (and grow it to match). */
+ *size = ksize(data);
+ /* krealloc() will immediately return "data" when
+ * "ksize(data)" is requested: it is the existing upper
+ * bounds. As a result, GFP_ATOMIC will be ignored. Note
+ * that this "new" pointer needs to be passed back to the
+ * caller for use so the __alloc_size hinting will be
+ * tracked correctly.
+ */
+ resized = krealloc(data, *size, GFP_ATOMIC);
+ WARN_ON_ONCE(resized != data);
+ return resized;
+}
+
+/* build_skb() variant which can operate on slab buffers.
+ * Note that this should be used sparingly as slab buffers
+ * cannot be combined efficiently by GRO!
+ */
+struct sk_buff *slab_build_skb(void *data)
+{
+ struct sk_buff *skb;
+ unsigned int size;
+
+ skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
+ if (unlikely(!skb))
+ return NULL;
+
+ memset(skb, 0, offsetof(struct sk_buff, tail));
+ data = __slab_build_skb(skb, data, &size);
+ __finalize_skb_around(skb, data, size);
+
+ return skb;
+}
+EXPORT_SYMBOL(slab_build_skb);
+
+/* Caller must provide SKB that is memset cleared */
+static void __build_skb_around(struct sk_buff *skb, void *data,
+ unsigned int frag_size)
+{
+ unsigned int size = frag_size;
+
+ /* frag_size == 0 is considered deprecated now. Callers
+ * using slab buffer should use slab_build_skb() instead.
+ */
+ if (WARN_ONCE(size == 0, "Use slab_build_skb() instead"))
+ data = __slab_build_skb(skb, data, &size);
+
+ __finalize_skb_around(skb, data, size);
+}
+
/**
* __build_skb - build a network buffer
* @data: data buffer provided by caller
- * @frag_size: size of data, or 0 if head was kmalloced
+ * @frag_size: size of data (must not be 0)
*
* Allocate a new &sk_buff. Caller provides space holding head and
- * skb_shared_info. @data must have been allocated by kmalloc() only if
- * @frag_size is 0, otherwise data should come from the page allocator
- * or vmalloc()
+ * skb_shared_info. @data must have been allocated from the page
+ * allocator or vmalloc(). (A @frag_size of 0 to indicate a kmalloc()
+ * allocation is deprecated, and callers should use slab_build_skb()
+ * instead.)
* The return is the new skb buffer.
* On a failure the return is %NULL, and @data is not freed.
* Notes :
diff --git a/net/core/sock.c b/net/core/sock.c
index 4571914a4aa8..b0ab841e0aed 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -901,13 +901,20 @@ int sock_set_timestamping(struct sock *sk, int optname,
if (val & ~SOF_TIMESTAMPING_MASK)
return -EINVAL;
+ if (val & SOF_TIMESTAMPING_OPT_ID_TCP &&
+ !(val & SOF_TIMESTAMPING_OPT_ID))
+ return -EINVAL;
+
if (val & SOF_TIMESTAMPING_OPT_ID &&
!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
if (sk_is_tcp(sk)) {
if ((1 << sk->sk_state) &
(TCPF_CLOSE | TCPF_LISTEN))
return -EINVAL;
- atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una);
+ if (val & SOF_TIMESTAMPING_OPT_ID_TCP)
+ atomic_set(&sk->sk_tskey, tcp_sk(sk)->write_seq);
+ else
+ atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una);
} else {
atomic_set(&sk->sk_tskey, 0);
}
diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c
index 71884296fc70..03a1fb9c87a9 100644
--- a/net/dsa/tag_hellcreek.c
+++ b/net/dsa/tag_hellcreek.c
@@ -51,7 +51,8 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb,
return NULL;
}
- pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN);
+ if (pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN))
+ return NULL;
dsa_default_offload_fwd_mark(skb);
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 0f6ae143afc9..080e5c369f5b 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -27,7 +27,8 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
if (!skb->dev)
return NULL;
- pskb_trim_rcsum(skb, skb->len - len);
+ if (pskb_trim_rcsum(skb, skb->len - len))
+ return NULL;
dsa_default_offload_fwd_mark(skb);
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index f14f51b41491..1c2ceba4771b 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -670,7 +670,8 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
* padding and trailer we need to account for the fact that
* skb->data points to skb_mac_header(skb) + ETH_HLEN.
*/
- pskb_trim_rcsum(skb, start_of_padding - ETH_HLEN);
+ if (pskb_trim_rcsum(skb, start_of_padding - ETH_HLEN))
+ return NULL;
/* Trap-to-host frame, no timestamp trailer */
} else {
*source_port = SJA1110_RX_HEADER_SRC_PORT(rx_header);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index e02daa74e833..2edc8b796a4e 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -398,7 +398,7 @@ EXPORT_SYMBOL(alloc_etherdev_mqs);
ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
{
- return scnprintf(buf, PAGE_SIZE, "%*phC\n", len, addr);
+ return sysfs_emit(buf, "%*phC\n", len, addr);
}
EXPORT_SYMBOL(sysfs_format_mac);
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 72ab0944262a..228f13df2e18 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -4,7 +4,7 @@ obj-y += ioctl.o common.o
obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o
-ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
+ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \
linkstate.o debug.o wol.o features.o privflags.o rings.o \
channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
tunnels.o fec.o eeprom.o stats.o phc_vclocks.o module.o \
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 21cfe8557205..6f399afc2ff2 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -417,6 +417,7 @@ const char sof_timestamping_names[][ETH_GSTRING_LEN] = {
[const_ilog2(SOF_TIMESTAMPING_OPT_PKTINFO)] = "option-pktinfo",
[const_ilog2(SOF_TIMESTAMPING_OPT_TX_SWHW)] = "option-tx-swhw",
[const_ilog2(SOF_TIMESTAMPING_BIND_PHC)] = "bind-phc",
+ [const_ilog2(SOF_TIMESTAMPING_OPT_ID_TCP)] = "option-id-tcp",
};
static_assert(ARRAY_SIZE(sof_timestamping_names) == __SOF_TIMESTAMPING_CNT);
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 1a4c11356c96..aee98be6237f 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -287,6 +287,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
[ETHTOOL_MSG_PHC_VCLOCKS_GET] = &ethnl_phc_vclocks_request_ops,
[ETHTOOL_MSG_MODULE_GET] = &ethnl_module_request_ops,
[ETHTOOL_MSG_PSE_GET] = &ethnl_pse_request_ops,
+ [ETHTOOL_MSG_RSS_GET] = &ethnl_rss_request_ops,
};
static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
@@ -1040,6 +1041,12 @@ static const struct genl_ops ethtool_genl_ops[] = {
.policy = ethnl_pse_set_policy,
.maxattr = ARRAY_SIZE(ethnl_pse_set_policy) - 1,
},
+ {
+ .cmd = ETHTOOL_MSG_RSS_GET,
+ .doit = ethnl_default_doit,
+ .policy = ethnl_rss_get_policy,
+ .maxattr = ARRAY_SIZE(ethnl_rss_get_policy) - 1,
+ },
};
static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 1bfd374f9718..3753787ba233 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -346,6 +346,7 @@ extern const struct ethnl_request_ops ethnl_stats_request_ops;
extern const struct ethnl_request_ops ethnl_phc_vclocks_request_ops;
extern const struct ethnl_request_ops ethnl_module_request_ops;
extern const struct ethnl_request_ops ethnl_pse_request_ops;
+extern const struct ethnl_request_ops ethnl_rss_request_ops;
extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
@@ -386,6 +387,7 @@ extern const struct nla_policy ethnl_module_get_policy[ETHTOOL_A_MODULE_HEADER +
extern const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1];
extern const struct nla_policy ethnl_pse_get_policy[ETHTOOL_A_PSE_HEADER + 1];
extern const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1];
+extern const struct nla_policy ethnl_rss_get_policy[ETHTOOL_A_RSS_CONTEXT + 1];
int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);
int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
new file mode 100644
index 000000000000..ebe6145aed3f
--- /dev/null
+++ b/net/ethtool/rss.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "netlink.h"
+#include "common.h"
+
+struct rss_req_info {
+ struct ethnl_req_info base;
+ u32 rss_context;
+};
+
+struct rss_reply_data {
+ struct ethnl_reply_data base;
+ u32 indir_size;
+ u32 hkey_size;
+ u32 hfunc;
+ u32 *indir_table;
+ u8 *hkey;
+};
+
+#define RSS_REQINFO(__req_base) \
+ container_of(__req_base, struct rss_req_info, base)
+
+#define RSS_REPDATA(__reply_base) \
+ container_of(__reply_base, struct rss_reply_data, base)
+
+const struct nla_policy ethnl_rss_get_policy[] = {
+ [ETHTOOL_A_RSS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+ [ETHTOOL_A_RSS_CONTEXT] = { .type = NLA_U32 },
+};
+
+static int
+rss_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ struct rss_req_info *request = RSS_REQINFO(req_info);
+
+ if (tb[ETHTOOL_A_RSS_CONTEXT])
+ request->rss_context = nla_get_u32(tb[ETHTOOL_A_RSS_CONTEXT]);
+
+ return 0;
+}
+
+static int
+rss_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base, struct genl_info *info)
+{
+ struct rss_reply_data *data = RSS_REPDATA(reply_base);
+ struct rss_req_info *request = RSS_REQINFO(req_base);
+ struct net_device *dev = reply_base->dev;
+ const struct ethtool_ops *ops;
+ u32 total_size, indir_bytes;
+ u8 dev_hfunc = 0;
+ u8 *rss_config;
+ int ret;
+
+ ops = dev->ethtool_ops;
+ if (!ops->get_rxfh)
+ return -EOPNOTSUPP;
+
+ /* Some drivers don't handle rss_context */
+ if (request->rss_context && !ops->get_rxfh_context)
+ return -EOPNOTSUPP;
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ return ret;
+
+ data->indir_size = 0;
+ data->hkey_size = 0;
+ if (ops->get_rxfh_indir_size)
+ data->indir_size = ops->get_rxfh_indir_size(dev);
+ if (ops->get_rxfh_key_size)
+ data->hkey_size = ops->get_rxfh_key_size(dev);
+
+ indir_bytes = data->indir_size * sizeof(u32);
+ total_size = indir_bytes + data->hkey_size;
+ rss_config = kzalloc(total_size, GFP_KERNEL);
+ if (!rss_config) {
+ ret = -ENOMEM;
+ goto out_ops;
+ }
+
+ if (data->indir_size)
+ data->indir_table = (u32 *)rss_config;
+
+ if (data->hkey_size)
+ data->hkey = rss_config + indir_bytes;
+
+ if (request->rss_context)
+ ret = ops->get_rxfh_context(dev, data->indir_table, data->hkey,
+ &dev_hfunc, request->rss_context);
+ else
+ ret = ops->get_rxfh(dev, data->indir_table, data->hkey,
+ &dev_hfunc);
+
+ if (ret)
+ goto out_ops;
+
+ data->hfunc = dev_hfunc;
+out_ops:
+ ethnl_ops_complete(dev);
+ return ret;
+}
+
+static int
+rss_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct rss_reply_data *data = RSS_REPDATA(reply_base);
+ int len;
+
+ len = nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */
+ nla_total_size(sizeof(u32) * data->indir_size) + /* _RSS_INDIR */
+ nla_total_size(data->hkey_size); /* _RSS_HKEY */
+
+ return len;
+}
+
+static int
+rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct rss_reply_data *data = RSS_REPDATA(reply_base);
+
+ if (nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc) ||
+ nla_put(skb, ETHTOOL_A_RSS_INDIR,
+ sizeof(u32) * data->indir_size, data->indir_table) ||
+ nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static void rss_cleanup_data(struct ethnl_reply_data *reply_base)
+{
+ const struct rss_reply_data *data = RSS_REPDATA(reply_base);
+
+ kfree(data->indir_table);
+}
+
+const struct ethnl_request_ops ethnl_rss_request_ops = {
+ .request_cmd = ETHTOOL_MSG_RSS_GET,
+ .reply_cmd = ETHTOOL_MSG_RSS_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_RSS_HEADER,
+ .req_info_size = sizeof(struct rss_req_info),
+ .reply_data_size = sizeof(struct rss_reply_data),
+
+ .parse_request = rss_parse_request,
+ .prepare_data = rss_prepare_data,
+ .reply_size = rss_reply_size,
+ .fill_reply = rss_fill_reply,
+ .cleanup_data = rss_cleanup_data,
+};
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index b33d1b5eda87..248ad5e46969 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -26,10 +26,12 @@ static struct genl_family nl802154_fam;
/* multicast groups */
enum nl802154_multicast_groups {
NL802154_MCGRP_CONFIG,
+ NL802154_MCGRP_SCAN,
};
static const struct genl_multicast_group nl802154_mcgrps[] = {
[NL802154_MCGRP_CONFIG] = { .name = "config", },
+ [NL802154_MCGRP_SCAN] = { .name = "scan", },
};
/* returns ERR_PTR values */
@@ -216,6 +218,9 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
[NL802154_ATTR_PID] = { .type = NLA_U32 },
[NL802154_ATTR_NETNS_FD] = { .type = NLA_U32 },
+
+ [NL802154_ATTR_COORDINATOR] = { .type = NLA_NESTED },
+
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
[NL802154_ATTR_SEC_ENABLED] = { .type = NLA_U8, },
[NL802154_ATTR_SEC_OUT_LEVEL] = { .type = NLA_U32, },
@@ -1281,6 +1286,104 @@ static int nl802154_wpan_phy_netns(struct sk_buff *skb, struct genl_info *info)
return err;
}
+static int nl802154_prep_scan_event_msg(struct sk_buff *msg,
+ struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev,
+ u32 portid, u32 seq, int flags, u8 cmd,
+ struct ieee802154_coord_desc *desc)
+{
+ struct nlattr *nla;
+ void *hdr;
+
+ hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
+ if (!hdr)
+ return -ENOBUFS;
+
+ if (nla_put_u32(msg, NL802154_ATTR_WPAN_PHY, rdev->wpan_phy_idx))
+ goto nla_put_failure;
+
+ if (wpan_dev->netdev &&
+ nla_put_u32(msg, NL802154_ATTR_IFINDEX, wpan_dev->netdev->ifindex))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, NL802154_ATTR_WPAN_DEV,
+ wpan_dev_id(wpan_dev), NL802154_ATTR_PAD))
+ goto nla_put_failure;
+
+ nla = nla_nest_start_noflag(msg, NL802154_ATTR_COORDINATOR);
+ if (!nla)
+ goto nla_put_failure;
+
+ if (nla_put(msg, NL802154_COORD_PANID, IEEE802154_PAN_ID_LEN,
+ &desc->addr.pan_id))
+ goto nla_put_failure;
+
+ if (desc->addr.mode == IEEE802154_ADDR_SHORT) {
+ if (nla_put(msg, NL802154_COORD_ADDR,
+ IEEE802154_SHORT_ADDR_LEN,
+ &desc->addr.short_addr))
+ goto nla_put_failure;
+ } else {
+ if (nla_put(msg, NL802154_COORD_ADDR,
+ IEEE802154_EXTENDED_ADDR_LEN,
+ &desc->addr.extended_addr))
+ goto nla_put_failure;
+ }
+
+ if (nla_put_u8(msg, NL802154_COORD_CHANNEL, desc->channel))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, NL802154_COORD_PAGE, desc->page))
+ goto nla_put_failure;
+
+ if (nla_put_u16(msg, NL802154_COORD_SUPERFRAME_SPEC,
+ desc->superframe_spec))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, NL802154_COORD_LINK_QUALITY, desc->link_quality))
+ goto nla_put_failure;
+
+ if (desc->gts_permit && nla_put_flag(msg, NL802154_COORD_GTS_PERMIT))
+ goto nla_put_failure;
+
+ /* TODO: NL802154_COORD_PAYLOAD_DATA if any */
+
+ nla_nest_end(msg, nla);
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+ nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+
+ return -EMSGSIZE;
+}
+
+int nl802154_scan_event(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ struct ieee802154_coord_desc *desc)
+{
+ struct cfg802154_registered_device *rdev = wpan_phy_to_rdev(wpan_phy);
+ struct sk_buff *msg;
+ int ret;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (!msg)
+ return -ENOMEM;
+
+ ret = nl802154_prep_scan_event_msg(msg, rdev, wpan_dev, 0, 0, 0,
+ NL802154_CMD_SCAN_EVENT,
+ desc);
+ if (ret < 0) {
+ nlmsg_free(msg);
+ return ret;
+ }
+
+ return genlmsg_multicast_netns(&nl802154_fam, wpan_phy_net(wpan_phy),
+ msg, 0, NL802154_MCGRP_SCAN, GFP_ATOMIC);
+}
+EXPORT_SYMBOL_GPL(nl802154_scan_event);
+
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
static const struct nla_policy nl802154_dev_addr_policy[NL802154_DEV_ADDR_ATTR_MAX + 1] = {
[NL802154_DEV_ADDR_ATTR_PAN_ID] = { .type = NLA_U16 },
diff --git a/net/ieee802154/nl802154.h b/net/ieee802154/nl802154.h
index 8c4b6d08954c..89b805500032 100644
--- a/net/ieee802154/nl802154.h
+++ b/net/ieee802154/nl802154.h
@@ -4,5 +4,7 @@
int nl802154_init(void);
void nl802154_exit(void);
+int nl802154_scan_event(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ struct ieee802154_coord_desc *desc);
#endif /* __IEEE802154_NL802154_H */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f361d3d56be2..b5736ef16ed2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -841,6 +841,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
return -EINVAL;
}
+ if (!cfg->fc_table)
+ cfg->fc_table = RT_TABLE_MAIN;
+
return 0;
errout:
return err;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 19a662003eef..ce9ff3c62e84 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -423,6 +423,7 @@ static struct fib_info *fib_find_info(struct fib_info *nfi)
nfi->fib_prefsrc == fi->fib_prefsrc &&
nfi->fib_priority == fi->fib_priority &&
nfi->fib_type == fi->fib_type &&
+ nfi->fib_tb_id == fi->fib_tb_id &&
memcmp(nfi->fib_metrics, fi->fib_metrics,
sizeof(u32) * RTAX_MAX) == 0 &&
!((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a4ccef3e6935..ffff46cdcb58 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1492,24 +1492,6 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
struct ip_tunnel_parm *p = &t->parms;
__be16 o_flags = p->o_flags;
- if (t->erspan_ver <= 2) {
- if (t->erspan_ver != 0 && !t->collect_md)
- o_flags |= TUNNEL_KEY;
-
- if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
- goto nla_put_failure;
-
- if (t->erspan_ver == 1) {
- if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
- goto nla_put_failure;
- } else if (t->erspan_ver == 2) {
- if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
- goto nla_put_failure;
- if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
- goto nla_put_failure;
- }
- }
-
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
nla_put_be16(skb, IFLA_GRE_IFLAGS,
gre_tnl_flags_to_gre_flags(p->i_flags)) ||
@@ -1550,6 +1532,34 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+
+ if (t->erspan_ver <= 2) {
+ if (t->erspan_ver != 0 && !t->collect_md)
+ t->parms.o_flags |= TUNNEL_KEY;
+
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
+ goto nla_put_failure;
+
+ if (t->erspan_ver == 1) {
+ if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
+ goto nla_put_failure;
+ } else if (t->erspan_ver == 2) {
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
+ goto nla_put_failure;
+ if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
+ goto nla_put_failure;
+ }
+ }
+
+ return ipgre_fill_info(skb, dev);
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
static void erspan_setup(struct net_device *dev)
{
struct ip_tunnel *t = netdev_priv(dev);
@@ -1628,7 +1638,7 @@ static struct rtnl_link_ops erspan_link_ops __read_mostly = {
.changelink = erspan_changelink,
.dellink = ip_tunnel_dellink,
.get_size = ipgre_get_size,
- .fill_info = ipgre_fill_info,
+ .fill_info = erspan_fill_info,
.get_link_net = ip_tunnel_get_link_net,
};
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index bb9854c2b7a1..409ec2a1f95b 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -49,6 +49,11 @@
#include <net/transp_v6.h>
#endif
+#define ping_portaddr_for_each_entry(__sk, node, list) \
+ hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node)
+#define ping_portaddr_for_each_entry_rcu(__sk, node, list) \
+ hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node)
+
struct ping_table {
struct hlist_nulls_head hash[PING_HTABLE_SIZE];
spinlock_t lock;
@@ -192,7 +197,7 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
return NULL;
}
- ping_portaddr_for_each_entry(sk, hnode, hslot) {
+ ping_portaddr_for_each_entry_rcu(sk, hnode, hslot) {
isk = inet_sk(sk);
pr_debug("iterate\n");
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index e19507614f64..60fd91bb5171 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -920,6 +920,9 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (err < 0)
goto fail;
+ /* We prevent @rt from being freed. */
+ rcu_read_lock();
+
for (;;) {
/* Prepare header of the next frame,
* before previous one went down. */
@@ -943,6 +946,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (err == 0) {
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGOKS);
+ rcu_read_unlock();
return 0;
}
@@ -950,6 +954,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGFAILS);
+ rcu_read_unlock();
return err;
slow_path_clean:
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index d9b50884d34e..ac0b28025fb0 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -254,7 +254,6 @@ ieee802154_check_concurrent_iface(struct ieee802154_sub_if_data *sdata,
enum nl802154_iftype iftype)
{
struct ieee802154_local *local = sdata->local;
- struct wpan_dev *wpan_dev = &sdata->wpan_dev;
struct ieee802154_sub_if_data *nsdata;
/* we hold the RTNL here so can safely walk the list */
@@ -262,13 +261,13 @@ ieee802154_check_concurrent_iface(struct ieee802154_sub_if_data *sdata,
if (nsdata != sdata && ieee802154_sdata_running(nsdata)) {
int ret;
- /* TODO currently we don't support multiple node types
- * we need to run skb_clone at rx path. Check if there
- * exist really an use case if we need to support
- * multiple node types at the same time.
+ /* TODO currently we don't support multiple node/coord
+ * types we need to run skb_clone at rx path. Check if
+ * there exist really an use case if we need to support
+ * multiple node/coord types at the same time.
*/
- if (wpan_dev->iftype == NL802154_IFTYPE_NODE &&
- nsdata->wpan_dev.iftype == NL802154_IFTYPE_NODE)
+ if (sdata->wpan_dev.iftype != NL802154_IFTYPE_MONITOR &&
+ nsdata->wpan_dev.iftype != NL802154_IFTYPE_MONITOR)
return -EBUSY;
/* check all phy mac sublayer settings are the same.
@@ -565,6 +564,7 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
wpan_dev->short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
switch (type) {
+ case NL802154_IFTYPE_COORD:
case NL802154_IFTYPE_NODE:
ieee802154_be64_to_le64(&wpan_dev->extended_addr,
sdata->dev->dev_addr);
@@ -624,6 +624,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
ieee802154_le64_to_be64(ndev->perm_addr,
&local->hw.phy->perm_extended_addr);
switch (type) {
+ case NL802154_IFTYPE_COORD:
case NL802154_IFTYPE_NODE:
ndev->type = ARPHRD_IEEE802154;
if (ieee802154_is_valid_extended_unicast_addr(extended_addr)) {
@@ -650,6 +651,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
sdata->dev = ndev;
sdata->wpan_dev.wpan_phy = local->hw.phy;
sdata->local = local;
+ INIT_LIST_HEAD(&sdata->wpan_dev.list);
/* setup type-dependent data */
ret = ieee802154_setup_sdata(sdata, type);
diff --git a/net/mac802154/main.c b/net/mac802154/main.c
index 40fab08df24b..3ed31daf7b9c 100644
--- a/net/mac802154/main.c
+++ b/net/mac802154/main.c
@@ -107,7 +107,7 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops)
phy->supported.lbt = NL802154_SUPPORTED_BOOL_FALSE;
/* always supported */
- phy->supported.iftypes = BIT(NL802154_IFTYPE_NODE);
+ phy->supported.iftypes = BIT(NL802154_IFTYPE_NODE) | BIT(NL802154_IFTYPE_COORD);
return &local->hw;
}
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index 0724aac8f48c..c2aae2a6d6a6 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -208,6 +208,7 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local,
int ret;
struct ieee802154_sub_if_data *sdata;
struct ieee802154_hdr hdr;
+ struct sk_buff *skb2;
ret = ieee802154_parse_frame_start(skb, &hdr);
if (ret) {
@@ -217,7 +218,7 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local,
}
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- if (sdata->wpan_dev.iftype != NL802154_IFTYPE_NODE)
+ if (sdata->wpan_dev.iftype == NL802154_IFTYPE_MONITOR)
continue;
if (!ieee802154_sdata_running(sdata))
@@ -230,12 +231,12 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local,
sdata->required_filtering == IEEE802154_FILTERING_4_FRAME_FIELDS)
continue;
- ieee802154_subif_frame(sdata, skb, &hdr);
- skb = NULL;
- break;
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (skb2) {
+ skb2->dev = sdata->dev;
+ ieee802154_subif_frame(sdata, skb2, &hdr);
+ }
}
-
- kfree_skb(skb);
}
static void
@@ -274,7 +275,7 @@ void ieee802154_rx(struct ieee802154_local *local, struct sk_buff *skb)
WARN_ON_ONCE(softirq_count() == 0);
if (local->suspended)
- goto drop;
+ goto free_skb;
/* TODO: When a transceiver omits the checksum here, we
* add an own calculated one. This is currently an ugly
@@ -292,20 +293,17 @@ void ieee802154_rx(struct ieee802154_local *local, struct sk_buff *skb)
/* Level 1 filtering: Check the FCS by software when relevant */
if (local->hw.phy->filtering == IEEE802154_FILTERING_NONE) {
crc = crc_ccitt(0, skb->data, skb->len);
- if (crc) {
- rcu_read_unlock();
+ if (crc)
goto drop;
- }
}
/* remove crc */
skb_trim(skb, skb->len - 2);
__ieee802154_rx_handle_packet(local, skb);
- rcu_read_unlock();
-
- return;
drop:
+ rcu_read_unlock();
+free_skb:
kfree_skb(skb);
}
diff --git a/net/mac802154/trace.h b/net/mac802154/trace.h
index df855c33daf2..689396d6c76a 100644
--- a/net/mac802154/trace.h
+++ b/net/mac802154/trace.h
@@ -264,6 +264,31 @@ TRACE_EVENT(802154_drv_set_promiscuous_mode,
BOOL_TO_STR(__entry->on))
);
+TRACE_EVENT(802154_new_scan_event,
+ TP_PROTO(struct ieee802154_coord_desc *desc),
+ TP_ARGS(desc),
+ TP_STRUCT__entry(
+ __field(__le16, pan_id)
+ __field(__le64, addr)
+ __field(u8, channel)
+ __field(u8, page)
+ ),
+ TP_fast_assign(
+ __entry->page = desc->page;
+ __entry->channel = desc->channel;
+ __entry->pan_id = desc->addr.pan_id;
+ __entry->addr = desc->addr.extended_addr;
+ ),
+ TP_printk("panid: %u, coord_addr: 0x%llx, page: %u, channel: %u",
+ __le16_to_cpu(__entry->pan_id), __le64_to_cpu(__entry->addr),
+ __entry->page, __entry->channel)
+);
+
+DEFINE_EVENT(802154_new_scan_event, 802154_scan_event,
+ TP_PROTO(struct ieee802154_coord_desc *desc),
+ TP_ARGS(desc)
+);
+
#endif /* !__MAC802154_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index eef69d0e44ec..2ea7eae43bdb 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1190,7 +1190,7 @@ static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[],
if (!tb[MPTCP_PM_ADDR_ATTR_FAMILY]) {
if (!require_family)
- return err;
+ return 0;
NL_SET_ERR_MSG_ATTR(info->extack, attr,
"missing family");
@@ -1224,7 +1224,7 @@ static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[],
if (tb[MPTCP_PM_ADDR_ATTR_PORT])
addr->port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
- return err;
+ return 0;
}
int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
@@ -2094,7 +2094,7 @@ void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id)
return;
nla_put_failure:
- kfree_skb(skb);
+ nlmsg_free(skb);
}
void mptcp_event_addr_announced(const struct sock *ssk,
@@ -2151,7 +2151,7 @@ void mptcp_event_addr_announced(const struct sock *ssk,
return;
nla_put_failure:
- kfree_skb(skb);
+ nlmsg_free(skb);
}
void mptcp_event_pm_listener(const struct sock *ssk,
@@ -2203,7 +2203,7 @@ void mptcp_event_pm_listener(const struct sock *ssk,
return;
nla_put_failure:
- kfree_skb(skb);
+ nlmsg_free(skb);
}
void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
@@ -2261,7 +2261,7 @@ void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
return;
nla_put_failure:
- kfree_skb(skb);
+ nlmsg_free(skb);
}
static const struct genl_small_ops mptcp_pm_ops[] = {
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index a47423ebb33a..d4b1e6ec1b36 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -740,7 +740,7 @@ static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname,
}
release_sock(sk);
- return err;
+ return 0;
}
static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index dda8b76b7798..fd2236ee9a79 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c
@@ -228,7 +228,8 @@ static int ncsi_cmd_handler_oem(struct sk_buff *skb,
len += max(payload, padding_bytes);
cmd = skb_put_zero(skb, len);
- memcpy(&cmd->mfr_id, nca->data, nca->payload);
+ unsafe_memcpy(&cmd->mfr_id, nca->data, nca->payload,
+ /* skb allocated with enough to load the payload */);
ncsi_cmd_build_header(&cmd->cmd.common, nca);
return 0;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b96338b4bf36..5c3cf0834af0 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -887,7 +887,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
zone = nf_ct_zone(ct);
if (!nf_ct_ext_valid_pre(ct->ext)) {
- NF_CT_STAT_INC(net, insert_failed);
+ NF_CT_STAT_INC_ATOMIC(net, insert_failed);
return -ETIMEDOUT;
}
@@ -934,7 +934,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
if (!nf_ct_ext_valid_post(ct->ext)) {
nf_ct_kill(ct);
- NF_CT_STAT_INC(net, drop);
+ NF_CT_STAT_INC_ATOMIC(net, drop);
return -ETIMEDOUT;
}
@@ -1271,7 +1271,7 @@ chaintoolong:
*/
if (!nf_ct_ext_valid_post(ct->ext)) {
nf_ct_kill(ct);
- NF_CT_STAT_INC(net, drop);
+ NF_CT_STAT_INC_ATOMIC(net, drop);
return NF_DROP;
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d71150a40fb0..1286ae7d4609 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -328,8 +328,13 @@ nla_put_failure:
}
#ifdef CONFIG_NF_CONNTRACK_MARK
-static int ctnetlink_dump_mark(struct sk_buff *skb, u32 mark)
+static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
{
+ u32 mark = READ_ONCE(ct->mark);
+
+ if (!mark)
+ return 0;
+
if (nla_put_be32(skb, CTA_MARK, htonl(mark)))
goto nla_put_failure;
return 0;
@@ -543,7 +548,7 @@ static int ctnetlink_dump_extinfo(struct sk_buff *skb,
static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct)
{
if (ctnetlink_dump_status(skb, ct) < 0 ||
- ctnetlink_dump_mark(skb, READ_ONCE(ct->mark)) < 0 ||
+ ctnetlink_dump_mark(skb, ct) < 0 ||
ctnetlink_dump_secctx(skb, ct) < 0 ||
ctnetlink_dump_id(skb, ct) < 0 ||
ctnetlink_dump_use(skb, ct) < 0 ||
@@ -722,7 +727,6 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
struct sk_buff *skb;
unsigned int type;
unsigned int flags = 0, group;
- u32 mark;
int err;
if (events & (1 << IPCT_DESTROY)) {
@@ -827,9 +831,8 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
}
#ifdef CONFIG_NF_CONNTRACK_MARK
- mark = READ_ONCE(ct->mark);
- if ((events & (1 << IPCT_MARK) || mark) &&
- ctnetlink_dump_mark(skb, mark) < 0)
+ if (events & (1 << IPCT_MARK) &&
+ ctnetlink_dump_mark(skb, ct) < 0)
goto nla_put_failure;
#endif
nlmsg_end(skb, nlh);
@@ -2671,7 +2674,6 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
{
const struct nf_conntrack_zone *zone;
struct nlattr *nest_parms;
- u32 mark;
zone = nf_ct_zone(ct);
@@ -2733,8 +2735,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
goto nla_put_failure;
#ifdef CONFIG_NF_CONNTRACK_MARK
- mark = READ_ONCE(ct->mark);
- if (mark && ctnetlink_dump_mark(skb, mark) < 0)
+ if (ctnetlink_dump_mark(skb, ct) < 0)
goto nla_put_failure;
#endif
if (ctnetlink_dump_labels(skb, ct) < 0)
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 00b522890d77..0fdcdb2c9ae4 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -997,13 +997,13 @@ static void flow_offload_queue_work(struct flow_offload_work *offload)
struct net *net = read_pnet(&offload->flowtable->net);
if (offload->cmd == FLOW_CLS_REPLACE) {
- NF_FLOW_TABLE_STAT_INC(net, count_wq_add);
+ NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_add);
queue_work(nf_flow_offload_add_wq, &offload->work);
} else if (offload->cmd == FLOW_CLS_DESTROY) {
- NF_FLOW_TABLE_STAT_INC(net, count_wq_del);
+ NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_del);
queue_work(nf_flow_offload_del_wq, &offload->work);
} else {
- NF_FLOW_TABLE_STAT_INC(net, count_wq_stats);
+ NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_stats);
queue_work(nf_flow_offload_stats_wq, &offload->work);
}
}
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 4f9299b9dcdd..06d46d182634 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1162,6 +1162,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
struct nft_pipapo_match *m = priv->clone;
u8 genmask = nft_genmask_next(net);
struct nft_pipapo_field *f;
+ const u8 *start_p, *end_p;
int i, bsize_max, err = 0;
if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END))
@@ -1202,9 +1203,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
}
/* Validate */
+ start_p = start;
+ end_p = end;
nft_pipapo_for_each_field(f, i, m) {
- const u8 *start_p = start, *end_p = end;
-
if (f->rules >= (unsigned long)NFT_PIPAPO_RULE0_MAX)
return -ENOSPC;
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 282c51051dcc..994a0a1efb58 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -240,6 +240,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
target->sens_res = nfca_poll->sens_res;
target->sel_res = nfca_poll->sel_res;
target->nfcid1_len = nfca_poll->nfcid1_len;
+ if (target->nfcid1_len > ARRAY_SIZE(target->nfcid1))
+ return -EPROTO;
if (target->nfcid1_len > 0) {
memcpy(target->nfcid1, nfca_poll->nfcid1,
target->nfcid1_len);
@@ -248,6 +250,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
nfcb_poll = (struct rf_tech_specific_params_nfcb_poll *)params;
target->sensb_res_len = nfcb_poll->sensb_res_len;
+ if (target->sensb_res_len > ARRAY_SIZE(target->sensb_res))
+ return -EPROTO;
if (target->sensb_res_len > 0) {
memcpy(target->sensb_res, nfcb_poll->sensb_res,
target->sensb_res_len);
@@ -256,6 +260,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
nfcf_poll = (struct rf_tech_specific_params_nfcf_poll *)params;
target->sensf_res_len = nfcf_poll->sensf_res_len;
+ if (target->sensf_res_len > ARRAY_SIZE(target->sensf_res))
+ return -EPROTO;
if (target->sensf_res_len > 0) {
memcpy(target->sensf_res, nfcf_poll->sensf_res,
target->sensf_res_len);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 861dfb8daf4a..932bcf766d63 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -209,6 +209,26 @@ static struct vport *new_vport(const struct vport_parms *parms)
return vport;
}
+static void ovs_vport_update_upcall_stats(struct sk_buff *skb,
+ const struct dp_upcall_info *upcall_info,
+ bool upcall_result)
+{
+ struct vport *p = OVS_CB(skb)->input_vport;
+ struct vport_upcall_stats_percpu *stats;
+
+ if (upcall_info->cmd != OVS_PACKET_CMD_MISS &&
+ upcall_info->cmd != OVS_PACKET_CMD_ACTION)
+ return;
+
+ stats = this_cpu_ptr(p->upcall_stats);
+ u64_stats_update_begin(&stats->syncp);
+ if (upcall_result)
+ u64_stats_inc(&stats->n_success);
+ else
+ u64_stats_inc(&stats->n_fail);
+ u64_stats_update_end(&stats->syncp);
+}
+
void ovs_dp_detach_port(struct vport *p)
{
ASSERT_OVSL();
@@ -216,6 +236,9 @@ void ovs_dp_detach_port(struct vport *p)
/* First drop references to device. */
hlist_del_rcu(&p->dp_hash_node);
+ /* Free percpu memory */
+ free_percpu(p->upcall_stats);
+
/* Then destroy it. */
ovs_vport_del(p);
}
@@ -305,6 +328,8 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
else
err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
+
+ ovs_vport_update_upcall_stats(skb, upcall_info, !err);
if (err)
goto err;
@@ -1826,6 +1851,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_destroy_portids;
}
+ vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
+ if (!vport->upcall_stats) {
+ err = -ENOMEM;
+ goto err_destroy_portids;
+ }
+
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
info->snd_seq, 0, OVS_DP_CMD_NEW);
BUG_ON(err < 0);
@@ -2098,6 +2129,9 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
OVS_VPORT_ATTR_PAD))
goto nla_put_failure;
+ if (ovs_vport_get_upcall_stats(vport, skb))
+ goto nla_put_failure;
+
if (ovs_vport_get_upcall_portids(vport, skb))
goto nla_put_failure;
@@ -2279,6 +2313,12 @@ restart:
goto exit_unlock_free;
}
+ vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
+ if (!vport->upcall_stats) {
+ err = -ENOMEM;
+ goto exit_unlock_free;
+ }
+
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
OVS_VPORT_CMD_NEW, GFP_KERNEL);
@@ -2508,6 +2548,7 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
[OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
[OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
+ [OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NLA_NESTED },
};
static const struct genl_small_ops dp_vport_genl_ops[] = {
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 82a74f998966..7e0f5c45b512 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -285,6 +285,56 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
}
/**
+ * ovs_vport_get_upcall_stats - retrieve upcall stats
+ *
+ * @vport: vport from which to retrieve the stats.
+ * @skb: sk_buff where upcall stats should be appended.
+ *
+ * Retrieves upcall stats for the given device.
+ *
+ * Must be called with ovs_mutex or rcu_read_lock.
+ */
+int ovs_vport_get_upcall_stats(struct vport *vport, struct sk_buff *skb)
+{
+ struct nlattr *nla;
+ int i;
+
+ __u64 tx_success = 0;
+ __u64 tx_fail = 0;
+
+ for_each_possible_cpu(i) {
+ const struct vport_upcall_stats_percpu *stats;
+ unsigned int start;
+
+ stats = per_cpu_ptr(vport->upcall_stats, i);
+ do {
+ start = u64_stats_fetch_begin(&stats->syncp);
+ tx_success += u64_stats_read(&stats->n_success);
+ tx_fail += u64_stats_read(&stats->n_fail);
+ } while (u64_stats_fetch_retry(&stats->syncp, start));
+ }
+
+ nla = nla_nest_start_noflag(skb, OVS_VPORT_ATTR_UPCALL_STATS);
+ if (!nla)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(skb, OVS_VPORT_UPCALL_ATTR_SUCCESS, tx_success,
+ OVS_VPORT_ATTR_PAD)) {
+ nla_nest_cancel(skb, nla);
+ return -EMSGSIZE;
+ }
+
+ if (nla_put_u64_64bit(skb, OVS_VPORT_UPCALL_ATTR_FAIL, tx_fail,
+ OVS_VPORT_ATTR_PAD)) {
+ nla_nest_cancel(skb, nla);
+ return -EMSGSIZE;
+ }
+ nla_nest_end(skb, nla);
+
+ return 0;
+}
+
+/**
* ovs_vport_get_options - retrieve device options
*
* @vport: vport from which to retrieve the options.
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 6ff45e8a0868..3e71ca8ad8a7 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -32,6 +32,8 @@ struct vport *ovs_vport_locate(const struct net *net, const char *name);
void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *);
+int ovs_vport_get_upcall_stats(struct vport *vport, struct sk_buff *skb);
+
int ovs_vport_set_options(struct vport *, struct nlattr *options);
int ovs_vport_get_options(const struct vport *, struct sk_buff *);
@@ -65,6 +67,7 @@ struct vport_portids {
* @hash_node: Element in @dev_table hash table in vport.c.
* @dp_hash_node: Element in @datapath->ports hash table in datapath.c.
* @ops: Class structure.
+ * @upcall_stats: Upcall stats of every ports.
* @detach_list: list used for detaching vport in net-exit call.
* @rcu: RCU callback head for deferred destruction.
*/
@@ -78,6 +81,7 @@ struct vport {
struct hlist_node hash_node;
struct hlist_node dp_hash_node;
const struct vport_ops *ops;
+ struct vport_upcall_stats_percpu __percpu *upcall_stats;
struct list_head detach_list;
struct rcu_head rcu;
@@ -137,6 +141,18 @@ struct vport_ops {
struct list_head list;
};
+/**
+ * struct vport_upcall_stats_percpu - per-cpu packet upcall statistics for
+ * a given vport.
+ * @n_success: Number of packets that upcall to userspace succeed.
+ * @n_fail: Number of packets that upcall to userspace failed.
+ */
+struct vport_upcall_stats_percpu {
+ struct u64_stats_sync syncp;
+ u64_stats_t n_success;
+ u64_stats_t n_fail;
+};
+
struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *,
const struct vport_parms *);
void ovs_vport_free(struct vport *);
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index accd35c05577..7ae023b37a83 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -58,4 +58,11 @@ config RXKAD
See Documentation/networking/rxrpc.rst.
+config RXPERF
+ tristate "RxRPC test service"
+ help
+ Provide an rxperf service tester. This listens on UDP port 7009 for
+ incoming calls from the rxperf program (an example of which can be
+ found in OpenAFS).
+
endif
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index fdeba488fc6e..e76d3459d78e 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -16,6 +16,7 @@ rxrpc-y := \
conn_service.o \
input.o \
insecure.o \
+ io_thread.o \
key.o \
local_event.o \
local_object.o \
@@ -36,3 +37,6 @@ rxrpc-y := \
rxrpc-$(CONFIG_PROC_FS) += proc.o
rxrpc-$(CONFIG_RXKAD) += rxkad.o
rxrpc-$(CONFIG_SYSCTL) += sysctl.o
+
+
+obj-$(CONFIG_RXPERF) += rxperf.o
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index aacdd96a9886..7ea576f6ba4b 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -194,8 +194,8 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
service_in_use:
write_unlock(&local->services_lock);
- rxrpc_unuse_local(local);
- rxrpc_put_local(local);
+ rxrpc_unuse_local(local, rxrpc_local_unuse_bind);
+ rxrpc_put_local(local, rxrpc_local_put_bind);
ret = -EADDRINUSE;
error_unlock:
release_sock(&rx->sk);
@@ -328,7 +328,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
mutex_unlock(&call->user_mutex);
}
- rxrpc_put_peer(cp.peer);
+ rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp);
_leave(" = %p", call);
return call;
}
@@ -359,9 +359,9 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
/* Make sure we're not going to call back into a kernel service */
if (call->notify_rx) {
- spin_lock_bh(&call->notify_lock);
+ spin_lock(&call->notify_lock);
call->notify_rx = rxrpc_dummy_notify_rx;
- spin_unlock_bh(&call->notify_lock);
+ spin_unlock(&call->notify_lock);
}
mutex_unlock(&call->user_mutex);
@@ -812,14 +812,12 @@ static int rxrpc_shutdown(struct socket *sock, int flags)
lock_sock(sk);
- spin_lock_bh(&sk->sk_receive_queue.lock);
if (sk->sk_state < RXRPC_CLOSE) {
sk->sk_state = RXRPC_CLOSE;
sk->sk_shutdown = SHUTDOWN_MASK;
} else {
ret = -ESHUTDOWN;
}
- spin_unlock_bh(&sk->sk_receive_queue.lock);
rxrpc_discard_prealloc(rx);
@@ -872,9 +870,7 @@ static int rxrpc_release_sock(struct sock *sk)
break;
}
- spin_lock_bh(&sk->sk_receive_queue.lock);
sk->sk_state = RXRPC_CLOSE;
- spin_unlock_bh(&sk->sk_receive_queue.lock);
if (rx->local && rcu_access_pointer(rx->local->service) == rx) {
write_lock(&rx->local->services_lock);
@@ -888,8 +884,8 @@ static int rxrpc_release_sock(struct sock *sk)
flush_workqueue(rxrpc_workqueue);
rxrpc_purge_queue(&sk->sk_receive_queue);
- rxrpc_unuse_local(rx->local);
- rxrpc_put_local(rx->local);
+ rxrpc_unuse_local(rx->local, rxrpc_local_unuse_release_sock);
+ rxrpc_put_local(rx->local, rxrpc_local_put_release_sock);
rx->local = NULL;
key_put(rx->key);
rx->key = NULL;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index f5c538ce3e23..e7dccab7b741 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -36,6 +36,8 @@ struct rxrpc_txbuf;
* to pass supplementary information.
*/
enum rxrpc_skb_mark {
+ RXRPC_SKB_MARK_PACKET, /* Received packet */
+ RXRPC_SKB_MARK_ERROR, /* Error notification */
RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */
RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */
};
@@ -76,7 +78,7 @@ struct rxrpc_net {
bool kill_all_client_conns;
atomic_t nr_client_conns;
spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */
- spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */
+ struct mutex client_conn_discard_lock; /* Prevent multiple discarders */
struct list_head idle_client_conns;
struct work_struct client_conn_reaper;
struct timer_list client_conn_reap_timer;
@@ -99,6 +101,9 @@ struct rxrpc_net {
atomic_t stat_tx_data_retrans;
atomic_t stat_tx_data_send;
atomic_t stat_tx_data_send_frag;
+ atomic_t stat_tx_data_send_fail;
+ atomic_t stat_tx_data_underflow;
+ atomic_t stat_tx_data_cwnd_reset;
atomic_t stat_rx_data;
atomic_t stat_rx_data_reqack;
atomic_t stat_rx_data_jumbo;
@@ -110,6 +115,8 @@ struct rxrpc_net {
atomic_t stat_rx_acks[256];
atomic_t stat_why_req_ack[8];
+
+ atomic_t stat_io_loop;
};
/*
@@ -279,13 +286,11 @@ struct rxrpc_local {
struct rxrpc_net *rxnet; /* The network ns in which this resides */
struct hlist_node link;
struct socket *socket; /* my UDP socket */
- struct work_struct processor;
- struct list_head ack_tx_queue; /* List of ACKs that need sending */
- spinlock_t ack_tx_lock; /* ACK list lock */
+ struct task_struct *io_thread;
struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */
struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */
- struct sk_buff_head reject_queue; /* packets awaiting rejection */
- struct sk_buff_head event_queue; /* endpoint event packets awaiting processing */
+ struct sk_buff_head rx_queue; /* Received packets */
+ struct list_head call_attend_q; /* Calls requiring immediate attention */
struct rb_root client_bundles; /* Client connection bundles by socket params */
spinlock_t client_bundles_lock; /* Lock for client_bundles */
spinlock_t lock; /* access lock */
@@ -403,12 +408,18 @@ enum rxrpc_conn_proto_state {
* RxRPC client connection bundle.
*/
struct rxrpc_bundle {
- struct rxrpc_conn_parameters params;
+ struct rxrpc_local *local; /* Representation of local endpoint */
+ struct rxrpc_peer *peer; /* Remote endpoint */
+ struct key *key; /* Security details */
refcount_t ref;
atomic_t active; /* Number of active users */
unsigned int debug_id;
+ u32 security_level; /* Security level selected */
+ u16 service_id; /* Service ID for this connection */
bool try_upgrade; /* True if the bundle is attempting upgrade */
bool alloc_conn; /* True if someone's getting a conn */
+ bool exclusive; /* T if conn is exclusive */
+ bool upgrade; /* T if service ID can be upgraded */
short alloc_error; /* Error from last conn allocation */
spinlock_t channel_lock;
struct rb_node local_node; /* Node in local->client_conns */
@@ -424,9 +435,13 @@ struct rxrpc_bundle {
*/
struct rxrpc_connection {
struct rxrpc_conn_proto proto;
- struct rxrpc_conn_parameters params;
+ struct rxrpc_local *local; /* Representation of local endpoint */
+ struct rxrpc_peer *peer; /* Remote endpoint */
+ struct rxrpc_net *rxnet; /* Network namespace to which call belongs */
+ struct key *key; /* Security details */
refcount_t ref;
+ atomic_t active; /* Active count for service conns */
struct rcu_head rcu;
struct list_head cache_link;
@@ -447,6 +462,7 @@ struct rxrpc_connection {
struct timer_list timer; /* Conn event timer */
struct work_struct processor; /* connection event processor */
+ struct work_struct destructor; /* In-process-context destroyer */
struct rxrpc_bundle *bundle; /* Client connection bundle */
struct rb_node service_node; /* Node in peer->service_conns */
struct list_head proc_link; /* link in procfs list */
@@ -471,9 +487,13 @@ struct rxrpc_connection {
atomic_t serial; /* packet serial number counter */
unsigned int hi_serial; /* highest serial number received */
u32 service_id; /* Service ID, possibly upgraded */
+ u32 security_level; /* Security level selected */
u8 security_ix; /* security type */
u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */
u8 bundle_shift; /* Index into bundle->avail_chans */
+ bool exclusive; /* T if conn is exclusive */
+ bool upgrade; /* T if service ID can be upgraded */
+ u16 orig_service_id; /* Originally requested service ID */
short error; /* Local error code */
};
@@ -502,22 +522,19 @@ enum rxrpc_call_flag {
RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */
RXRPC_CALL_BEGAN_RX_TIMER, /* We began the expect_rx_by timer */
RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */
- RXRPC_CALL_RX_UNDERRUN, /* Got data underrun */
RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */
RXRPC_CALL_KERNEL, /* The call was made by the kernel */
RXRPC_CALL_UPGRADE, /* Service upgrade was requested for the call */
- RXRPC_CALL_DELAY_ACK_PENDING, /* DELAY ACK generation is pending */
- RXRPC_CALL_IDLE_ACK_PENDING, /* IDLE ACK generation is pending */
+ RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */
+ RXRPC_CALL_RX_IS_IDLE, /* Reception is idle - send an ACK */
};
/*
* Events that can be raised on a call.
*/
enum rxrpc_call_event {
- RXRPC_CALL_EV_ABORT, /* need to generate abort */
- RXRPC_CALL_EV_RESEND, /* Tx resend required */
- RXRPC_CALL_EV_EXPIRED, /* Expiry occurred */
RXRPC_CALL_EV_ACK_LOST, /* ACK may be lost, send ping */
+ RXRPC_CALL_EV_INITIAL_PING, /* Send initial ping for a new service call */
};
/*
@@ -570,10 +587,13 @@ struct rxrpc_call {
struct rcu_head rcu;
struct rxrpc_connection *conn; /* connection carrying call */
struct rxrpc_peer *peer; /* Peer record for remote address */
+ struct rxrpc_local *local; /* Representation of local endpoint */
struct rxrpc_sock __rcu *socket; /* socket responsible */
struct rxrpc_net *rxnet; /* Network namespace to which call belongs */
+ struct key *key; /* Security details */
const struct rxrpc_security *security; /* applied security module */
struct mutex user_mutex; /* User access mutex */
+ struct sockaddr_rxrpc dest_srx; /* Destination address */
unsigned long delay_ack_at; /* When DELAY ACK needs to happen */
unsigned long ack_lost_at; /* When ACK is figured as lost */
unsigned long resend_at; /* When next resend needs to happen */
@@ -585,7 +605,7 @@ struct rxrpc_call {
u32 next_rx_timo; /* Timeout for next Rx packet (jif) */
u32 next_req_timo; /* Timeout for next Rx request packet (jif) */
struct timer_list timer; /* Combined event timer */
- struct work_struct processor; /* Event processor */
+ struct work_struct destroyer; /* In-process-context destroyer */
rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */
struct list_head link; /* link in master call list */
struct list_head chan_wait_link; /* Link in conn->bundle->waiting_calls */
@@ -594,6 +614,7 @@ struct rxrpc_call {
struct list_head recvmsg_link; /* Link in rx->recvmsg_q */
struct list_head sock_link; /* Link in rx->sock_calls */
struct rb_node sock_node; /* Node in rx->calls */
+ struct list_head attend_link; /* Link in local->call_attend_q */
struct rxrpc_txbuf *tx_pending; /* Tx buffer being filled */
wait_queue_head_t waitq; /* Wait queue for channel or Tx */
s64 tx_total_len; /* Total length left to be transmitted (or -1) */
@@ -607,20 +628,22 @@ struct rxrpc_call {
enum rxrpc_call_state state; /* current state of call */
enum rxrpc_call_completion completion; /* Call completion condition */
refcount_t ref;
- u16 service_id; /* service ID */
u8 security_ix; /* Security type */
enum rxrpc_interruptibility interruptibility; /* At what point call may be interrupted */
u32 call_id; /* call ID on connection */
u32 cid; /* connection ID plus channel index */
+ u32 security_level; /* Security level selected */
int debug_id; /* debug ID for printks */
unsigned short rx_pkt_offset; /* Current recvmsg packet offset */
unsigned short rx_pkt_len; /* Current recvmsg packet len */
/* Transmitted data tracking. */
spinlock_t tx_lock; /* Transmit queue lock */
+ struct list_head tx_sendmsg; /* Sendmsg prepared packets */
struct list_head tx_buffer; /* Buffer of transmissible packets */
rxrpc_seq_t tx_bottom; /* First packet in buffer */
rxrpc_seq_t tx_transmitted; /* Highest packet transmitted */
+ rxrpc_seq_t tx_prepared; /* Highest Tx slot prepared. */
rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */
u16 tx_backoff; /* Delay to insert due to Tx failure */
u8 tx_winsize; /* Maximum size of Tx window */
@@ -635,13 +658,13 @@ struct rxrpc_call {
rxrpc_seq_t rx_consumed; /* Highest packet consumed */
rxrpc_serial_t rx_serial; /* Highest serial received for this call */
u8 rx_winsize; /* Size of Rx window */
- spinlock_t input_lock; /* Lock for packet input to this call */
/* TCP-style slow-start congestion control [RFC5681]. Since the SMSS
* is fixed, we keep these numbers in terms of segments (ie. DATA
* packets) rather than bytes.
*/
#define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN
+#define RXRPC_MIN_CWND (RXRPC_TX_SMSS > 2190 ? 2 : RXRPC_TX_SMSS > 1095 ? 3 : 4)
u8 cong_cwnd; /* Congestion window size */
u8 cong_extra; /* Extra to send for congestion management */
u8 cong_ssthresh; /* Slow-start threshold */
@@ -676,11 +699,7 @@ struct rxrpc_call {
rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */
rxrpc_seq_t acks_hard_ack; /* Latest hard-ack point */
rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
- rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */
- rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */
rxrpc_serial_t acks_highest_serial; /* Highest serial number ACK'd */
- struct sk_buff *acks_soft_tbl; /* The last ACK packet with NAKs in it */
- spinlock_t acks_ack_lock; /* Access to ->acks_last_ack */
};
/*
@@ -739,9 +758,8 @@ struct rxrpc_send_params {
*/
struct rxrpc_txbuf {
struct rcu_head rcu;
- struct list_head call_link; /* Link in call->tx_queue */
+ struct list_head call_link; /* Link in call->tx_sendmsg/tx_buffer */
struct list_head tx_link; /* Link in live Enc queue or Tx queue */
- struct rxrpc_call *call; /* Call to which belongs */
ktime_t last_sent; /* Time at which last transmitted */
refcount_t ref;
rxrpc_seq_t seq; /* Sequence number of this packet */
@@ -793,9 +811,9 @@ extern struct workqueue_struct *rxrpc_workqueue;
*/
int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t);
void rxrpc_discard_prealloc(struct rxrpc_sock *);
-struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *,
- struct rxrpc_sock *,
- struct sk_buff *);
+bool rxrpc_new_incoming_call(struct rxrpc_local *, struct rxrpc_peer *,
+ struct rxrpc_connection *, struct sockaddr_rxrpc *,
+ struct sk_buff *);
void rxrpc_accept_incoming_calls(struct rxrpc_local *);
int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long);
@@ -808,14 +826,14 @@ void rxrpc_send_ACK(struct rxrpc_call *, u8, rxrpc_serial_t, enum rxrpc_propose_
void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t,
enum rxrpc_propose_ack_trace);
void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *);
-void rxrpc_process_call(struct work_struct *);
+void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb);
void rxrpc_reduce_call_timer(struct rxrpc_call *call,
unsigned long expire_at,
unsigned long now,
enum rxrpc_timer_trace why);
-void rxrpc_delete_call_timer(struct rxrpc_call *call);
+void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb);
/*
* call_object.c
@@ -824,6 +842,7 @@ extern const char *const rxrpc_call_states[];
extern const char *const rxrpc_call_completions[];
extern struct kmem_cache *rxrpc_call_jar;
+void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what);
struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t, unsigned int);
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
@@ -835,10 +854,8 @@ void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *,
struct sk_buff *);
void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *);
void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
-bool __rxrpc_queue_call(struct rxrpc_call *);
-bool rxrpc_queue_call(struct rxrpc_call *);
-void rxrpc_see_call(struct rxrpc_call *);
-bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op);
+void rxrpc_see_call(struct rxrpc_call *, enum rxrpc_call_trace);
+struct rxrpc_call *rxrpc_try_get_call(struct rxrpc_call *, enum rxrpc_call_trace);
void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace);
void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace);
void rxrpc_cleanup_call(struct rxrpc_call *);
@@ -863,14 +880,14 @@ extern unsigned long rxrpc_conn_idle_client_fast_expiry;
extern struct idr rxrpc_client_conn_ids;
void rxrpc_destroy_client_conn_ids(void);
-struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *);
-void rxrpc_put_bundle(struct rxrpc_bundle *);
+struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace);
+void rxrpc_put_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace);
int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_call *,
struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *,
gfp_t);
void rxrpc_expose_client_call(struct rxrpc_call *);
void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *);
-void rxrpc_put_client_conn(struct rxrpc_connection *);
+void rxrpc_put_client_conn(struct rxrpc_connection *, enum rxrpc_conn_trace);
void rxrpc_discard_expired_client_conns(struct work_struct *);
void rxrpc_destroy_all_client_connections(struct rxrpc_net *);
void rxrpc_clean_up_local_conns(struct rxrpc_local *);
@@ -880,6 +897,7 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *);
*/
void rxrpc_process_connection(struct work_struct *);
void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool);
+int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb);
/*
* conn_object.c
@@ -887,18 +905,20 @@ void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool);
extern unsigned int rxrpc_connection_expiry;
extern unsigned int rxrpc_closed_conn_expiry;
-struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
-struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *,
- struct sk_buff *,
- struct rxrpc_peer **);
+struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *, gfp_t);
+struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *,
+ struct sockaddr_rxrpc *,
+ struct sk_buff *);
void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *);
void rxrpc_disconnect_call(struct rxrpc_call *);
-void rxrpc_kill_connection(struct rxrpc_connection *);
-bool rxrpc_queue_conn(struct rxrpc_connection *);
-void rxrpc_see_connection(struct rxrpc_connection *);
-struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *);
-struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *);
-void rxrpc_put_service_conn(struct rxrpc_connection *);
+void rxrpc_kill_client_conn(struct rxrpc_connection *);
+void rxrpc_queue_conn(struct rxrpc_connection *, enum rxrpc_conn_trace);
+void rxrpc_see_connection(struct rxrpc_connection *, enum rxrpc_conn_trace);
+struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *,
+ enum rxrpc_conn_trace);
+struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *,
+ enum rxrpc_conn_trace);
+void rxrpc_put_connection(struct rxrpc_connection *, enum rxrpc_conn_trace);
void rxrpc_service_connection_reaper(struct work_struct *);
void rxrpc_destroy_all_connections(struct rxrpc_net *);
@@ -912,17 +932,6 @@ static inline bool rxrpc_conn_is_service(const struct rxrpc_connection *conn)
return !rxrpc_conn_is_client(conn);
}
-static inline void rxrpc_put_connection(struct rxrpc_connection *conn)
-{
- if (!conn)
- return;
-
- if (rxrpc_conn_is_client(conn))
- rxrpc_put_client_conn(conn);
- else
- rxrpc_put_service_conn(conn);
-}
-
static inline void rxrpc_reduce_conn_timer(struct rxrpc_connection *conn,
unsigned long expire_at)
{
@@ -942,7 +951,20 @@ void rxrpc_unpublish_service_conn(struct rxrpc_connection *);
/*
* input.c
*/
-int rxrpc_input_packet(struct sock *, struct sk_buff *);
+void rxrpc_congestion_degrade(struct rxrpc_call *);
+void rxrpc_input_call_packet(struct rxrpc_call *, struct sk_buff *);
+void rxrpc_implicit_end_call(struct rxrpc_call *, struct sk_buff *);
+
+/*
+ * io_thread.c
+ */
+int rxrpc_encap_rcv(struct sock *, struct sk_buff *);
+void rxrpc_error_report(struct sock *);
+int rxrpc_io_thread(void *data);
+static inline void rxrpc_wake_up_io_thread(struct rxrpc_local *local)
+{
+ wake_up_process(local->io_thread);
+}
/*
* insecure.c
@@ -961,28 +983,41 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time64_t,
/*
* local_event.c
*/
-extern void rxrpc_process_local_events(struct rxrpc_local *);
+void rxrpc_send_version_request(struct rxrpc_local *local,
+ struct rxrpc_host_header *hdr,
+ struct sk_buff *skb);
/*
* local_object.c
*/
struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc *);
-struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *);
-struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *);
-void rxrpc_put_local(struct rxrpc_local *);
-struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *);
-void rxrpc_unuse_local(struct rxrpc_local *);
-void rxrpc_queue_local(struct rxrpc_local *);
+struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *, enum rxrpc_local_trace);
+struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *, enum rxrpc_local_trace);
+void rxrpc_put_local(struct rxrpc_local *, enum rxrpc_local_trace);
+struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *, enum rxrpc_local_trace);
+void rxrpc_unuse_local(struct rxrpc_local *, enum rxrpc_local_trace);
+void rxrpc_destroy_local(struct rxrpc_local *local);
void rxrpc_destroy_all_locals(struct rxrpc_net *);
-static inline bool __rxrpc_unuse_local(struct rxrpc_local *local)
+static inline bool __rxrpc_use_local(struct rxrpc_local *local,
+ enum rxrpc_local_trace why)
{
- return atomic_dec_return(&local->active_users) == 0;
+ int r, u;
+
+ r = refcount_read(&local->ref);
+ u = atomic_fetch_add_unless(&local->active_users, 1, 0);
+ trace_rxrpc_local(local->debug_id, why, r, u);
+ return u != 0;
}
-static inline bool __rxrpc_use_local(struct rxrpc_local *local)
+static inline void rxrpc_see_local(struct rxrpc_local *local,
+ enum rxrpc_local_trace why)
{
- return atomic_fetch_add_unless(&local->active_users, 1, 0) != 0;
+ int r, u;
+
+ r = refcount_read(&local->ref);
+ u = atomic_read(&local->active_users);
+ trace_rxrpc_local(local->debug_id, why, r, u);
}
/*
@@ -1009,16 +1044,17 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net)
/*
* output.c
*/
-void rxrpc_transmit_ack_packets(struct rxrpc_local *);
+int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb);
int rxrpc_send_abort_packet(struct rxrpc_call *);
int rxrpc_send_data_packet(struct rxrpc_call *, struct rxrpc_txbuf *);
-void rxrpc_reject_packets(struct rxrpc_local *);
+void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb);
void rxrpc_send_keepalive(struct rxrpc_peer *);
+void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb);
/*
* peer_event.c
*/
-void rxrpc_error_report(struct sock *);
+void rxrpc_input_error(struct rxrpc_local *, struct sk_buff *);
void rxrpc_peer_keepalive_worker(struct work_struct *);
/*
@@ -1028,14 +1064,15 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *,
const struct sockaddr_rxrpc *);
struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *, struct rxrpc_local *,
struct sockaddr_rxrpc *, gfp_t);
-struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t);
+struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t,
+ enum rxrpc_peer_trace);
void rxrpc_new_incoming_peer(struct rxrpc_sock *, struct rxrpc_local *,
struct rxrpc_peer *);
void rxrpc_destroy_all_peers(struct rxrpc_net *);
-struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *);
-struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *);
-void rxrpc_put_peer(struct rxrpc_peer *);
-void rxrpc_put_peer_locked(struct rxrpc_peer *);
+struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *, enum rxrpc_peer_trace);
+struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *, enum rxrpc_peer_trace);
+void rxrpc_put_peer(struct rxrpc_peer *, enum rxrpc_peer_trace);
+void rxrpc_put_peer_locked(struct rxrpc_peer *, enum rxrpc_peer_trace);
/*
* proc.c
@@ -1097,6 +1134,7 @@ extern const struct rxrpc_security rxkad;
int __init rxrpc_init_security(void);
const struct rxrpc_security *rxrpc_security_lookup(u8);
void rxrpc_exit_security(void);
+int rxrpc_init_client_call_security(struct rxrpc_call *);
int rxrpc_init_client_conn_security(struct rxrpc_connection *);
const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *,
struct sk_buff *);
@@ -1119,7 +1157,6 @@ int rxrpc_server_keyring(struct rxrpc_sock *, sockptr_t, int);
* skbuff.c
*/
void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *);
-void rxrpc_packet_destructor(struct sk_buff *);
void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_eaten_skb(struct sk_buff *, enum rxrpc_skb_trace);
@@ -1190,23 +1227,17 @@ extern unsigned int rxrpc_debug;
#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__)
-#define kproto(FMT,...) dbgprintk("### "FMT ,##__VA_ARGS__)
-#define knet(FMT,...) dbgprintk("@@@ "FMT ,##__VA_ARGS__)
#if defined(__KDEBUG)
#define _enter(FMT,...) kenter(FMT,##__VA_ARGS__)
#define _leave(FMT,...) kleave(FMT,##__VA_ARGS__)
#define _debug(FMT,...) kdebug(FMT,##__VA_ARGS__)
-#define _proto(FMT,...) kproto(FMT,##__VA_ARGS__)
-#define _net(FMT,...) knet(FMT,##__VA_ARGS__)
#elif defined(CONFIG_AF_RXRPC_DEBUG)
#define RXRPC_DEBUG_KENTER 0x01
#define RXRPC_DEBUG_KLEAVE 0x02
#define RXRPC_DEBUG_KDEBUG 0x04
-#define RXRPC_DEBUG_KPROTO 0x08
-#define RXRPC_DEBUG_KNET 0x10
#define _enter(FMT,...) \
do { \
@@ -1226,24 +1257,10 @@ do { \
kdebug(FMT,##__VA_ARGS__); \
} while (0)
-#define _proto(FMT,...) \
-do { \
- if (unlikely(rxrpc_debug & RXRPC_DEBUG_KPROTO)) \
- kproto(FMT,##__VA_ARGS__); \
-} while (0)
-
-#define _net(FMT,...) \
-do { \
- if (unlikely(rxrpc_debug & RXRPC_DEBUG_KNET)) \
- knet(FMT,##__VA_ARGS__); \
-} while (0)
-
#else
#define _enter(FMT,...) no_printk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
#define _leave(FMT,...) no_printk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
#define _debug(FMT,...) no_printk(" "FMT ,##__VA_ARGS__)
-#define _proto(FMT,...) no_printk("### "FMT ,##__VA_ARGS__)
-#define _net(FMT,...) no_printk("@@@ "FMT ,##__VA_ARGS__)
#endif
/*
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 48790ee77019..d1850863507f 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -38,7 +38,6 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
unsigned long user_call_ID, gfp_t gfp,
unsigned int debug_id)
{
- const void *here = __builtin_return_address(0);
struct rxrpc_call *call, *xcall;
struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
struct rb_node *parent, **pp;
@@ -70,7 +69,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
head = b->peer_backlog_head;
tail = READ_ONCE(b->peer_backlog_tail);
if (CIRC_CNT(head, tail, size) < max) {
- struct rxrpc_peer *peer = rxrpc_alloc_peer(rx->local, gfp);
+ struct rxrpc_peer *peer;
+
+ peer = rxrpc_alloc_peer(rx->local, gfp, rxrpc_peer_new_prealloc);
if (!peer)
return -ENOMEM;
b->peer_backlog[head] = peer;
@@ -89,9 +90,6 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
b->conn_backlog[head] = conn;
smp_store_release(&b->conn_backlog_head,
(head + 1) & (size - 1));
-
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service,
- refcount_read(&conn->ref), here);
}
/* Now it gets complicated, because calls get registered with the
@@ -102,10 +100,10 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
return -ENOMEM;
call->flags |= (1 << RXRPC_CALL_IS_SERVICE);
call->state = RXRPC_CALL_SERVER_PREALLOC;
+ __set_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events);
- trace_rxrpc_call(call->debug_id, rxrpc_call_new_service,
- refcount_read(&call->ref),
- here, (const void *)user_call_ID);
+ trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
+ user_call_ID, rxrpc_call_new_prealloc_service);
write_lock(&rx->call_lock);
@@ -126,11 +124,11 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
call->user_call_ID = user_call_ID;
call->notify_rx = notify_rx;
if (user_attach_call) {
- rxrpc_get_call(call, rxrpc_call_got_kernel);
+ rxrpc_get_call(call, rxrpc_call_get_kernel_service);
user_attach_call(call, user_call_ID);
}
- rxrpc_get_call(call, rxrpc_call_got_userid);
+ rxrpc_get_call(call, rxrpc_call_get_userid);
rb_link_node(&call->sock_node, parent, pp);
rb_insert_color(&call->sock_node, &rx->calls);
set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
@@ -140,9 +138,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
write_unlock(&rx->call_lock);
rxnet = call->rxnet;
- spin_lock_bh(&rxnet->call_lock);
+ spin_lock(&rxnet->call_lock);
list_add_tail_rcu(&call->link, &rxnet->calls);
- spin_unlock_bh(&rxnet->call_lock);
+ spin_unlock(&rxnet->call_lock);
b->call_backlog[call_head] = call;
smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1));
@@ -190,14 +188,14 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
/* Make sure that there aren't any incoming calls in progress before we
* clear the preallocation buffers.
*/
- spin_lock_bh(&rx->incoming_lock);
- spin_unlock_bh(&rx->incoming_lock);
+ spin_lock(&rx->incoming_lock);
+ spin_unlock(&rx->incoming_lock);
head = b->peer_backlog_head;
tail = b->peer_backlog_tail;
while (CIRC_CNT(head, tail, size) > 0) {
struct rxrpc_peer *peer = b->peer_backlog[tail];
- rxrpc_put_local(peer->local);
+ rxrpc_put_local(peer->local, rxrpc_local_put_prealloc_conn);
kfree(peer);
tail = (tail + 1) & (size - 1);
}
@@ -230,7 +228,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
}
rxrpc_call_completed(call);
rxrpc_release_call(rx, call);
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_put_call(call, rxrpc_call_put_discard_prealloc);
tail = (tail + 1) & (size - 1);
}
@@ -238,21 +236,6 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
}
/*
- * Ping the other end to fill our RTT cache and to retrieve the rwind
- * and MTU parameters.
- */
-static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb)
-{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- ktime_t now = skb->tstamp;
-
- if (call->peer->rtt_count < 3 ||
- ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now))
- rxrpc_send_ACK(call, RXRPC_ACK_PING, sp->hdr.serial,
- rxrpc_propose_ack_ping_for_params);
-}
-
-/*
* Allocate a new incoming call from the prealloc pool, along with a connection
* and a peer as necessary.
*/
@@ -261,6 +244,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
struct rxrpc_peer *peer,
struct rxrpc_connection *conn,
const struct rxrpc_security *sec,
+ struct sockaddr_rxrpc *peer_srx,
struct sk_buff *skb)
{
struct rxrpc_backlog *b = rx->backlog;
@@ -286,12 +270,11 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
return NULL;
if (!conn) {
- if (peer && !rxrpc_get_peer_maybe(peer))
+ if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_service_conn))
peer = NULL;
if (!peer) {
peer = b->peer_backlog[peer_tail];
- if (rxrpc_extract_addr_from_skb(&peer->srx, skb) < 0)
- return NULL;
+ peer->srx = *peer_srx;
b->peer_backlog[peer_tail] = NULL;
smp_store_release(&b->peer_backlog_tail,
(peer_tail + 1) &
@@ -305,12 +288,13 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
b->conn_backlog[conn_tail] = NULL;
smp_store_release(&b->conn_backlog_tail,
(conn_tail + 1) & (RXRPC_BACKLOG_MAX - 1));
- conn->params.local = rxrpc_get_local(local);
- conn->params.peer = peer;
- rxrpc_see_connection(conn);
+ conn->local = rxrpc_get_local(local, rxrpc_local_get_prealloc_conn);
+ conn->peer = peer;
+ rxrpc_see_connection(conn, rxrpc_conn_see_new_service_conn);
rxrpc_new_incoming_connection(rx, conn, sec, skb);
} else {
- rxrpc_get_connection(conn);
+ rxrpc_get_connection(conn, rxrpc_conn_get_service_conn);
+ atomic_inc(&conn->active);
}
/* And now we can allocate and set up a new call */
@@ -319,43 +303,69 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
smp_store_release(&b->call_backlog_tail,
(call_tail + 1) & (RXRPC_BACKLOG_MAX - 1));
- rxrpc_see_call(call);
+ rxrpc_see_call(call, rxrpc_call_see_accept);
+ call->local = rxrpc_get_local(conn->local, rxrpc_local_get_call);
call->conn = conn;
call->security = conn->security;
call->security_ix = conn->security_ix;
- call->peer = rxrpc_get_peer(conn->params.peer);
+ call->peer = rxrpc_get_peer(conn->peer, rxrpc_peer_get_accept);
+ call->dest_srx = peer->srx;
call->cong_ssthresh = call->peer->cong_ssthresh;
call->tx_last_sent = ktime_get_real();
return call;
}
/*
- * Set up a new incoming call. Called in BH context with the RCU read lock
- * held.
+ * Set up a new incoming call. Called from the I/O thread.
*
* If this is for a kernel service, when we allocate the call, it will have
* three refs on it: (1) the kernel service, (2) the user_call_ID tree, (3) the
* retainer ref obtained from the backlog buffer. Prealloc calls for userspace
- * services only have the ref from the backlog buffer. We want to pass this
- * ref to non-BH context to dispose of.
+ * services only have the ref from the backlog buffer.
*
* If we want to report an error, we mark the skb with the packet type and
- * abort code and return NULL.
- *
- * The call is returned with the user access mutex held.
+ * abort code and return false.
*/
-struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
- struct rxrpc_sock *rx,
- struct sk_buff *skb)
+bool rxrpc_new_incoming_call(struct rxrpc_local *local,
+ struct rxrpc_peer *peer,
+ struct rxrpc_connection *conn,
+ struct sockaddr_rxrpc *peer_srx,
+ struct sk_buff *skb)
{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
const struct rxrpc_security *sec = NULL;
- struct rxrpc_connection *conn;
- struct rxrpc_peer *peer = NULL;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_call *call = NULL;
+ struct rxrpc_sock *rx;
_enter("");
+ /* Don't set up a call for anything other than the first DATA packet. */
+ if (sp->hdr.seq != 1 ||
+ sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
+ return true; /* Just discard */
+
+ rcu_read_lock();
+
+ /* Weed out packets to services we're not offering. Packets that would
+ * begin a call are explicitly rejected and the rest are just
+ * discarded.
+ */
+ rx = rcu_dereference(local->service);
+ if (!rx || (sp->hdr.serviceId != rx->srx.srx_service &&
+ sp->hdr.serviceId != rx->second_service)
+ ) {
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
+ sp->hdr.seq == 1)
+ goto unsupported_service;
+ goto discard;
+ }
+
+ if (!conn) {
+ sec = rxrpc_get_incoming_security(rx, skb);
+ if (!sec)
+ goto reject;
+ }
+
spin_lock(&rx->incoming_lock);
if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED ||
rx->sk.sk_state == RXRPC_CLOSE) {
@@ -366,20 +376,8 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
goto no_call;
}
- /* The peer, connection and call may all have sprung into existence due
- * to a duplicate packet being handled on another CPU in parallel, so
- * we have to recheck the routing. However, we're now holding
- * rx->incoming_lock, so the values should remain stable.
- */
- conn = rxrpc_find_connection_rcu(local, skb, &peer);
-
- if (!conn) {
- sec = rxrpc_get_incoming_security(rx, skb);
- if (!sec)
- goto no_call;
- }
-
- call = rxrpc_alloc_incoming_call(rx, local, peer, conn, sec, skb);
+ call = rxrpc_alloc_incoming_call(rx, local, peer, conn, sec, peer_srx,
+ skb);
if (!call) {
skb->mark = RXRPC_SKB_MARK_REJECT_BUSY;
goto no_call;
@@ -396,50 +394,41 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
rx->notify_new_call(&rx->sk, call, call->user_call_ID);
spin_lock(&conn->state_lock);
- switch (conn->state) {
- case RXRPC_CONN_SERVICE_UNSECURED:
+ if (conn->state == RXRPC_CONN_SERVICE_UNSECURED) {
conn->state = RXRPC_CONN_SERVICE_CHALLENGING;
set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events);
- rxrpc_queue_conn(call->conn);
- break;
-
- case RXRPC_CONN_SERVICE:
- write_lock(&call->state_lock);
- if (call->state < RXRPC_CALL_COMPLETE)
- call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
- write_unlock(&call->state_lock);
- break;
-
- case RXRPC_CONN_REMOTELY_ABORTED:
- rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
- conn->abort_code, conn->error);
- break;
- case RXRPC_CONN_LOCALLY_ABORTED:
- rxrpc_abort_call("CON", call, sp->hdr.seq,
- conn->abort_code, conn->error);
- break;
- default:
- BUG();
+ rxrpc_queue_conn(call->conn, rxrpc_conn_queue_challenge);
}
spin_unlock(&conn->state_lock);
- spin_unlock(&rx->incoming_lock);
- rxrpc_send_ping(call, skb);
+ spin_unlock(&rx->incoming_lock);
+ rcu_read_unlock();
- /* We have to discard the prealloc queue's ref here and rely on a
- * combination of the RCU read lock and refs held either by the socket
- * (recvmsg queue, to-be-accepted queue or user ID tree) or the kernel
- * service to prevent the call from being deallocated too early.
- */
- rxrpc_put_call(call, rxrpc_call_put);
+ if (hlist_unhashed(&call->error_link)) {
+ spin_lock(&call->peer->lock);
+ hlist_add_head(&call->error_link, &call->peer->error_targets);
+ spin_unlock(&call->peer->lock);
+ }
_leave(" = %p{%d}", call, call->debug_id);
- return call;
-
+ rxrpc_input_call_event(call, skb);
+ rxrpc_put_call(call, rxrpc_call_put_input);
+ return true;
+
+unsupported_service:
+ trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ RX_INVALID_OPERATION, EOPNOTSUPP);
+ skb->priority = RX_INVALID_OPERATION;
+ goto reject;
no_call:
spin_unlock(&rx->incoming_lock);
- _leave(" = NULL [%u]", skb->mark);
- return NULL;
+reject:
+ rcu_read_unlock();
+ _leave(" = f [%u]", skb->mark);
+ return false;
+discard:
+ rcu_read_unlock();
+ return true;
}
/*
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 1e21a708390e..b2cf448fb02c 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -69,21 +69,15 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial,
void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why)
{
- struct rxrpc_local *local = call->conn->params.local;
struct rxrpc_txbuf *txb;
if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
return;
- if (ack_reason == RXRPC_ACK_DELAY &&
- test_and_set_bit(RXRPC_CALL_DELAY_ACK_PENDING, &call->flags)) {
- trace_rxrpc_drop_ack(call, why, ack_reason, serial, false);
- return;
- }
rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]);
txb = rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_ACK,
- in_softirq() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS);
+ rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS);
if (!txb) {
kleave(" = -ENOMEM");
return;
@@ -101,22 +95,9 @@ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
txb->ack.reason = ack_reason;
txb->ack.nAcks = 0;
- if (!rxrpc_try_get_call(call, rxrpc_call_got)) {
- rxrpc_put_txbuf(txb, rxrpc_txbuf_put_nomem);
- return;
- }
-
- spin_lock_bh(&local->ack_tx_lock);
- list_add_tail(&txb->tx_link, &local->ack_tx_queue);
- spin_unlock_bh(&local->ack_tx_lock);
trace_rxrpc_send_ack(call, why, ack_reason, serial);
-
- if (in_task()) {
- rxrpc_transmit_ack_packets(call->peer->local);
- } else {
- rxrpc_get_local(local);
- rxrpc_queue_local(local);
- }
+ rxrpc_send_ack_packet(call, txb);
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx);
}
/*
@@ -130,11 +111,10 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
/*
* Perform retransmission of NAK'd and unack'd packets.
*/
-static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
+void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
{
struct rxrpc_ackpacket *ack = NULL;
struct rxrpc_txbuf *txb;
- struct sk_buff *ack_skb = NULL;
unsigned long resend_at;
rxrpc_seq_t transmitted = READ_ONCE(call->tx_transmitted);
ktime_t now, max_age, oldest, ack_ts;
@@ -148,32 +128,21 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
max_age = ktime_sub_us(now, jiffies_to_usecs(call->peer->rto_j));
oldest = now;
- /* See if there's an ACK saved with a soft-ACK table in it. */
- if (call->acks_soft_tbl) {
- spin_lock_bh(&call->acks_ack_lock);
- ack_skb = call->acks_soft_tbl;
- if (ack_skb) {
- rxrpc_get_skb(ack_skb, rxrpc_skb_ack);
- ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header);
- }
- spin_unlock_bh(&call->acks_ack_lock);
- }
-
if (list_empty(&call->tx_buffer))
goto no_resend;
- spin_lock(&call->tx_lock);
-
if (list_empty(&call->tx_buffer))
goto no_further_resend;
- trace_rxrpc_resend(call);
+ trace_rxrpc_resend(call, ack_skb);
txb = list_first_entry(&call->tx_buffer, struct rxrpc_txbuf, call_link);
/* Scan the soft ACK table without dropping the lock and resend any
* explicitly NAK'd packets.
*/
- if (ack) {
+ if (ack_skb) {
+ ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header);
+
for (i = 0; i < ack->nAcks; i++) {
rxrpc_seq_t seq;
@@ -197,8 +166,6 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked);
if (list_empty(&txb->tx_link)) {
- rxrpc_get_txbuf(txb, rxrpc_txbuf_get_retrans);
- rxrpc_get_call(call, rxrpc_call_got_tx);
list_add_tail(&txb->tx_link, &retrans_queue);
set_bit(RXRPC_TXBUF_RESENT, &txb->flags);
}
@@ -242,7 +209,6 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
do_resend:
unacked = true;
if (list_empty(&txb->tx_link)) {
- rxrpc_get_txbuf(txb, rxrpc_txbuf_get_retrans);
list_add_tail(&txb->tx_link, &retrans_queue);
set_bit(RXRPC_TXBUF_RESENT, &txb->flags);
rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans);
@@ -250,10 +216,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
}
no_further_resend:
- spin_unlock(&call->tx_lock);
no_resend:
- rxrpc_free_skb(ack_skb, rxrpc_skb_freed);
-
resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest)));
resend_at += jiffies + rxrpc_get_rto_backoff(call->peer,
!list_empty(&retrans_queue));
@@ -267,7 +230,7 @@ no_resend:
* retransmitting data.
*/
if (list_empty(&retrans_queue)) {
- rxrpc_reduce_call_timer(call, resend_at, now_j,
+ rxrpc_reduce_call_timer(call, resend_at, jiffies,
rxrpc_timer_set_for_resend);
ack_ts = ktime_sub(now, call->acks_latest_ts);
if (ktime_to_us(ack_ts) < (call->peer->srtt_us >> 3))
@@ -277,76 +240,134 @@ no_resend:
goto out;
}
+ /* Retransmit the queue */
while ((txb = list_first_entry_or_null(&retrans_queue,
struct rxrpc_txbuf, tx_link))) {
list_del_init(&txb->tx_link);
- rxrpc_send_data_packet(call, txb);
- rxrpc_put_txbuf(txb, rxrpc_txbuf_put_trans);
-
- trace_rxrpc_retransmit(call, txb->seq,
- ktime_to_ns(ktime_sub(txb->last_sent,
- max_age)));
+ rxrpc_transmit_one(call, txb);
}
out:
_leave("");
}
+static bool rxrpc_tx_window_has_space(struct rxrpc_call *call)
+{
+ unsigned int winsize = min_t(unsigned int, call->tx_winsize,
+ call->cong_cwnd + call->cong_extra);
+ rxrpc_seq_t window = call->acks_hard_ack, wtop = window + winsize;
+ rxrpc_seq_t tx_top = call->tx_top;
+ int space;
+
+ space = wtop - tx_top;
+ return space > 0;
+}
+
+/*
+ * Decant some if the sendmsg prepared queue into the transmission buffer.
+ */
+static void rxrpc_decant_prepared_tx(struct rxrpc_call *call)
+{
+ struct rxrpc_txbuf *txb;
+
+ if (rxrpc_is_client_call(call) &&
+ !test_bit(RXRPC_CALL_EXPOSED, &call->flags))
+ rxrpc_expose_client_call(call);
+
+ while ((txb = list_first_entry_or_null(&call->tx_sendmsg,
+ struct rxrpc_txbuf, call_link))) {
+ spin_lock(&call->tx_lock);
+ list_del(&txb->call_link);
+ spin_unlock(&call->tx_lock);
+
+ call->tx_top = txb->seq;
+ list_add_tail(&txb->call_link, &call->tx_buffer);
+
+ rxrpc_transmit_one(call, txb);
+
+ if (!rxrpc_tx_window_has_space(call))
+ break;
+ }
+}
+
+static void rxrpc_transmit_some_data(struct rxrpc_call *call)
+{
+ switch (call->state) {
+ case RXRPC_CALL_SERVER_ACK_REQUEST:
+ if (list_empty(&call->tx_sendmsg))
+ return;
+ fallthrough;
+
+ case RXRPC_CALL_SERVER_SEND_REPLY:
+ case RXRPC_CALL_SERVER_AWAIT_ACK:
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+ if (!rxrpc_tx_window_has_space(call))
+ return;
+ if (list_empty(&call->tx_sendmsg)) {
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_underflow);
+ return;
+ }
+ rxrpc_decant_prepared_tx(call);
+ break;
+ default:
+ return;
+ }
+}
+
+/*
+ * Ping the other end to fill our RTT cache and to retrieve the rwind
+ * and MTU parameters.
+ */
+static void rxrpc_send_initial_ping(struct rxrpc_call *call)
+{
+ if (call->peer->rtt_count < 3 ||
+ ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
+ ktime_get_real()))
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_params);
+}
+
/*
* Handle retransmission and deferred ACK/abort generation.
*/
-void rxrpc_process_call(struct work_struct *work)
+void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
{
- struct rxrpc_call *call =
- container_of(work, struct rxrpc_call, processor);
unsigned long now, next, t;
- unsigned int iterations = 0;
rxrpc_serial_t ackr_serial;
+ bool resend = false, expired = false;
- rxrpc_see_call(call);
+ rxrpc_see_call(call, rxrpc_call_see_input);
//printk("\n--------------------\n");
_enter("{%d,%s,%lx}",
call->debug_id, rxrpc_call_states[call->state], call->events);
-recheck_state:
- /* Limit the number of times we do this before returning to the manager */
- iterations++;
- if (iterations > 5)
- goto requeue;
-
- if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
- rxrpc_send_abort_packet(call);
- goto recheck_state;
- }
-
- if (READ_ONCE(call->acks_hard_ack) != call->tx_bottom)
- rxrpc_shrink_call_tx_buffer(call);
+ if (call->state == RXRPC_CALL_COMPLETE)
+ goto out;
- if (call->state == RXRPC_CALL_COMPLETE) {
- rxrpc_delete_call_timer(call);
- goto out_put;
- }
+ if (skb && skb->mark == RXRPC_SKB_MARK_ERROR)
+ goto out;
- /* Work out if any timeouts tripped */
+ /* If we see our async-event poke, check for timeout trippage. */
now = jiffies;
t = READ_ONCE(call->expect_rx_by);
if (time_after_eq(now, t)) {
trace_rxrpc_timer(call, rxrpc_timer_exp_normal, now);
- set_bit(RXRPC_CALL_EV_EXPIRED, &call->events);
+ expired = true;
}
t = READ_ONCE(call->expect_req_by);
if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST &&
time_after_eq(now, t)) {
trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now);
- set_bit(RXRPC_CALL_EV_EXPIRED, &call->events);
+ expired = true;
}
t = READ_ONCE(call->expect_term_by);
if (time_after_eq(now, t)) {
trace_rxrpc_timer(call, rxrpc_timer_exp_hard, now);
- set_bit(RXRPC_CALL_EV_EXPIRED, &call->events);
+ expired = true;
}
t = READ_ONCE(call->delay_ack_at);
@@ -385,11 +406,26 @@ recheck_state:
if (time_after_eq(now, t)) {
trace_rxrpc_timer(call, rxrpc_timer_exp_resend, now);
cmpxchg(&call->resend_at, t, now + MAX_JIFFY_OFFSET);
- set_bit(RXRPC_CALL_EV_RESEND, &call->events);
+ resend = true;
}
+ if (skb)
+ rxrpc_input_call_packet(call, skb);
+
+ rxrpc_transmit_some_data(call);
+
+ if (skb) {
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK)
+ rxrpc_congestion_degrade(call);
+ }
+
+ if (test_and_clear_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events))
+ rxrpc_send_initial_ping(call);
+
/* Process events */
- if (test_and_clear_bit(RXRPC_CALL_EV_EXPIRED, &call->events)) {
+ if (expired) {
if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) &&
(int)call->conn->hi_serial - (int)call->rx_serial > 0) {
trace_rxrpc_call_reset(call);
@@ -397,52 +433,50 @@ recheck_state:
} else {
rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME);
}
- set_bit(RXRPC_CALL_EV_ABORT, &call->events);
- goto recheck_state;
+ rxrpc_send_abort_packet(call);
+ goto out;
}
- if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) {
- call->acks_lost_top = call->tx_top;
+ if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events))
rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
rxrpc_propose_ack_ping_for_lost_ack);
- }
- if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) &&
- call->state != RXRPC_CALL_CLIENT_RECV_REPLY) {
- rxrpc_resend(call, now);
- goto recheck_state;
- }
+ if (resend && call->state != RXRPC_CALL_CLIENT_RECV_REPLY)
+ rxrpc_resend(call, NULL);
+
+ if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags))
+ rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
+ rxrpc_propose_ack_rx_idle);
+
+ if (atomic_read(&call->ackr_nr_unacked) > 2)
+ rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
+ rxrpc_propose_ack_input_data);
/* Make sure the timer is restarted */
- next = call->expect_rx_by;
+ if (call->state != RXRPC_CALL_COMPLETE) {
+ next = call->expect_rx_by;
#define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; }
- set(call->expect_req_by);
- set(call->expect_term_by);
- set(call->delay_ack_at);
- set(call->ack_lost_at);
- set(call->resend_at);
- set(call->keepalive_at);
- set(call->ping_at);
-
- now = jiffies;
- if (time_after_eq(now, next))
- goto recheck_state;
+ set(call->expect_req_by);
+ set(call->expect_term_by);
+ set(call->delay_ack_at);
+ set(call->ack_lost_at);
+ set(call->resend_at);
+ set(call->keepalive_at);
+ set(call->ping_at);
- rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart);
+ now = jiffies;
+ if (time_after_eq(now, next))
+ rxrpc_poke_call(call, rxrpc_call_poke_timer_now);
- /* other events may have been raised since we started checking */
- if (call->events && call->state < RXRPC_CALL_COMPLETE)
- goto requeue;
+ rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart);
+ }
-out_put:
- rxrpc_put_call(call, rxrpc_call_put);
out:
+ if (call->state == RXRPC_CALL_COMPLETE)
+ del_timer_sync(&call->timer);
+ if (call->acks_hard_ack != call->tx_bottom)
+ rxrpc_shrink_call_tx_buffer(call);
_leave("");
- return;
-
-requeue:
- __rxrpc_queue_call(call);
- goto out;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 1befe22cd301..be5eb8cdf549 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -45,6 +45,24 @@ static struct semaphore rxrpc_call_limiter =
static struct semaphore rxrpc_kernel_call_limiter =
__SEMAPHORE_INITIALIZER(rxrpc_kernel_call_limiter, 1000);
+void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what)
+{
+ struct rxrpc_local *local = call->local;
+ bool busy;
+
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ spin_lock_bh(&local->lock);
+ busy = !list_empty(&call->attend_link);
+ trace_rxrpc_poke_call(call, busy, what);
+ if (!busy) {
+ rxrpc_get_call(call, rxrpc_call_get_poke);
+ list_add_tail(&call->attend_link, &local->call_attend_q);
+ }
+ spin_unlock_bh(&local->lock);
+ rxrpc_wake_up_io_thread(local);
+ }
+}
+
static void rxrpc_call_timer_expired(struct timer_list *t)
{
struct rxrpc_call *call = from_timer(call, t, timer);
@@ -53,9 +71,7 @@ static void rxrpc_call_timer_expired(struct timer_list *t)
if (call->state < RXRPC_CALL_COMPLETE) {
trace_rxrpc_timer_expired(call, jiffies);
- __rxrpc_queue_call(call);
- } else {
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_poke_call(call, rxrpc_call_poke_timer);
}
}
@@ -64,21 +80,14 @@ void rxrpc_reduce_call_timer(struct rxrpc_call *call,
unsigned long now,
enum rxrpc_timer_trace why)
{
- if (rxrpc_try_get_call(call, rxrpc_call_got_timer)) {
- trace_rxrpc_timer(call, why, now);
- if (timer_reduce(&call->timer, expire_at))
- rxrpc_put_call(call, rxrpc_call_put_notimer);
- }
-}
-
-void rxrpc_delete_call_timer(struct rxrpc_call *call)
-{
- if (del_timer_sync(&call->timer))
- rxrpc_put_call(call, rxrpc_call_put_timer);
+ trace_rxrpc_timer(call, why, now);
+ timer_reduce(&call->timer, expire_at);
}
static struct lock_class_key rxrpc_call_user_mutex_lock_class_key;
+static void rxrpc_destroy_call(struct work_struct *);
+
/*
* find an extant server call
* - called in process context with IRQs enabled
@@ -110,7 +119,7 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx,
return NULL;
found_extant_call:
- rxrpc_get_call(call, rxrpc_call_got);
+ rxrpc_get_call(call, rxrpc_call_get_sendmsg);
read_unlock(&rx->call_lock);
_leave(" = %p [%d]", call, refcount_read(&call->ref));
return call;
@@ -139,20 +148,20 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
&rxrpc_call_user_mutex_lock_class_key);
timer_setup(&call->timer, rxrpc_call_timer_expired, 0);
- INIT_WORK(&call->processor, &rxrpc_process_call);
+ INIT_WORK(&call->destroyer, rxrpc_destroy_call);
INIT_LIST_HEAD(&call->link);
INIT_LIST_HEAD(&call->chan_wait_link);
INIT_LIST_HEAD(&call->accept_link);
INIT_LIST_HEAD(&call->recvmsg_link);
INIT_LIST_HEAD(&call->sock_link);
+ INIT_LIST_HEAD(&call->attend_link);
+ INIT_LIST_HEAD(&call->tx_sendmsg);
INIT_LIST_HEAD(&call->tx_buffer);
skb_queue_head_init(&call->recvmsg_queue);
skb_queue_head_init(&call->rx_oos_queue);
init_waitqueue_head(&call->waitq);
spin_lock_init(&call->notify_lock);
spin_lock_init(&call->tx_lock);
- spin_lock_init(&call->input_lock);
- spin_lock_init(&call->acks_ack_lock);
rwlock_init(&call->state_lock);
refcount_set(&call->ref, 1);
call->debug_id = debug_id;
@@ -185,22 +194,45 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
*/
static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
struct sockaddr_rxrpc *srx,
+ struct rxrpc_conn_parameters *cp,
+ struct rxrpc_call_params *p,
gfp_t gfp,
unsigned int debug_id)
{
struct rxrpc_call *call;
ktime_t now;
+ int ret;
_enter("");
call = rxrpc_alloc_call(rx, gfp, debug_id);
if (!call)
return ERR_PTR(-ENOMEM);
- call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
- call->service_id = srx->srx_service;
now = ktime_get_real();
- call->acks_latest_ts = now;
- call->cong_tstamp = now;
+ call->acks_latest_ts = now;
+ call->cong_tstamp = now;
+ call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
+ call->dest_srx = *srx;
+ call->interruptibility = p->interruptibility;
+ call->tx_total_len = p->tx_total_len;
+ call->key = key_get(cp->key);
+ call->local = rxrpc_get_local(cp->local, rxrpc_local_get_call);
+ if (p->kernel)
+ __set_bit(RXRPC_CALL_KERNEL, &call->flags);
+ if (cp->upgrade)
+ __set_bit(RXRPC_CALL_UPGRADE, &call->flags);
+ if (cp->exclusive)
+ __set_bit(RXRPC_CALL_EXCLUSIVE, &call->flags);
+
+ ret = rxrpc_init_client_call_security(call);
+ if (ret < 0) {
+ __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret);
+ rxrpc_put_call(call, rxrpc_call_put_discard_error);
+ return ERR_PTR(ret);
+ }
+
+ trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
+ p->user_call_ID, rxrpc_call_new_client);
_leave(" = %p", call);
return call;
@@ -218,6 +250,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call)
call->ack_lost_at = j;
call->resend_at = j;
call->ping_at = j;
+ call->keepalive_at = j;
call->expect_rx_by = j;
call->expect_req_by = j;
call->expect_term_by = j;
@@ -270,7 +303,6 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
struct rxrpc_net *rxnet;
struct semaphore *limiter;
struct rb_node *parent, **pp;
- const void *here = __builtin_return_address(0);
int ret;
_enter("%p,%lx", rx, p->user_call_ID);
@@ -281,7 +313,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
return ERR_PTR(-ERESTARTSYS);
}
- call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id);
+ call = rxrpc_alloc_client_call(rx, srx, cp, p, gfp, debug_id);
if (IS_ERR(call)) {
release_sock(&rx->sk);
up(limiter);
@@ -289,14 +321,6 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
return call;
}
- call->interruptibility = p->interruptibility;
- call->tx_total_len = p->tx_total_len;
- trace_rxrpc_call(call->debug_id, rxrpc_call_new_client,
- refcount_read(&call->ref),
- here, (const void *)p->user_call_ID);
- if (p->kernel)
- __set_bit(RXRPC_CALL_KERNEL, &call->flags);
-
/* We need to protect a partially set up call against the user as we
* will be acting outside the socket lock.
*/
@@ -322,7 +346,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
rcu_assign_pointer(call->socket, rx);
call->user_call_ID = p->user_call_ID;
__set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
- rxrpc_get_call(call, rxrpc_call_got_userid);
+ rxrpc_get_call(call, rxrpc_call_get_userid);
rb_link_node(&call->sock_node, parent, pp);
rb_insert_color(&call->sock_node, &rx->calls);
list_add(&call->sock_link, &rx->sock_calls);
@@ -330,9 +354,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
write_unlock(&rx->call_lock);
rxnet = call->rxnet;
- spin_lock_bh(&rxnet->call_lock);
+ spin_lock(&rxnet->call_lock);
list_add_tail_rcu(&call->link, &rxnet->calls);
- spin_unlock_bh(&rxnet->call_lock);
+ spin_unlock(&rxnet->call_lock);
/* From this point on, the call is protected by its own lock. */
release_sock(&rx->sk);
@@ -344,13 +368,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
if (ret < 0)
goto error_attached_to_socket;
- trace_rxrpc_call(call->debug_id, rxrpc_call_connected,
- refcount_read(&call->ref), here, NULL);
+ rxrpc_see_call(call, rxrpc_call_see_connected);
rxrpc_start_call_timer(call);
- _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
-
_leave(" = %p [new]", call);
return call;
@@ -364,11 +385,11 @@ error_dup_user_ID:
release_sock(&rx->sk);
__rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
RX_CALL_DEAD, -EEXIST);
- trace_rxrpc_call(call->debug_id, rxrpc_call_error,
- refcount_read(&call->ref), here, ERR_PTR(-EEXIST));
+ trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), 0,
+ rxrpc_call_see_userid_exists);
rxrpc_release_call(rx, call);
mutex_unlock(&call->user_mutex);
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_put_call(call, rxrpc_call_put_userid_exists);
_leave(" = -EEXIST");
return ERR_PTR(-EEXIST);
@@ -378,8 +399,8 @@ error_dup_user_ID:
* leave the error to recvmsg() to deal with.
*/
error_attached_to_socket:
- trace_rxrpc_call(call->debug_id, rxrpc_call_error,
- refcount_read(&call->ref), here, ERR_PTR(ret));
+ trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), ret,
+ rxrpc_call_see_connect_failed);
set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
__rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
RX_CALL_DEAD, ret);
@@ -403,11 +424,34 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
rcu_assign_pointer(call->socket, rx);
call->call_id = sp->hdr.callNumber;
- call->service_id = sp->hdr.serviceId;
+ call->dest_srx.srx_service = sp->hdr.serviceId;
call->cid = sp->hdr.cid;
call->state = RXRPC_CALL_SERVER_SECURING;
call->cong_tstamp = skb->tstamp;
+ spin_lock(&conn->state_lock);
+
+ switch (conn->state) {
+ case RXRPC_CONN_SERVICE_UNSECURED:
+ case RXRPC_CONN_SERVICE_CHALLENGING:
+ call->state = RXRPC_CALL_SERVER_SECURING;
+ break;
+ case RXRPC_CONN_SERVICE:
+ call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
+ break;
+
+ case RXRPC_CONN_REMOTELY_ABORTED:
+ __rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
+ conn->abort_code, conn->error);
+ break;
+ case RXRPC_CONN_LOCALLY_ABORTED:
+ __rxrpc_abort_call("CON", call, 1,
+ conn->abort_code, conn->error);
+ break;
+ default:
+ BUG();
+ }
+
/* Set the channel for this call. We don't get channel_lock as we're
* only defending against the data_ready handler (which we're called
* from) and the RESPONSE packet parser (which is only really
@@ -418,86 +462,48 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
conn->channels[chan].call_counter = call->call_id;
conn->channels[chan].call_id = call->call_id;
rcu_assign_pointer(conn->channels[chan].call, call);
+ spin_unlock(&conn->state_lock);
- spin_lock(&conn->params.peer->lock);
- hlist_add_head_rcu(&call->error_link, &conn->params.peer->error_targets);
- spin_unlock(&conn->params.peer->lock);
-
- _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
+ spin_lock(&conn->peer->lock);
+ hlist_add_head(&call->error_link, &conn->peer->error_targets);
+ spin_unlock(&conn->peer->lock);
rxrpc_start_call_timer(call);
_leave("");
}
/*
- * Queue a call's work processor, getting a ref to pass to the work queue.
- */
-bool rxrpc_queue_call(struct rxrpc_call *call)
-{
- const void *here = __builtin_return_address(0);
- int n;
-
- if (!__refcount_inc_not_zero(&call->ref, &n))
- return false;
- if (rxrpc_queue_work(&call->processor))
- trace_rxrpc_call(call->debug_id, rxrpc_call_queued, n + 1,
- here, NULL);
- else
- rxrpc_put_call(call, rxrpc_call_put_noqueue);
- return true;
-}
-
-/*
- * Queue a call's work processor, passing the callers ref to the work queue.
- */
-bool __rxrpc_queue_call(struct rxrpc_call *call)
-{
- const void *here = __builtin_return_address(0);
- int n = refcount_read(&call->ref);
- ASSERTCMP(n, >=, 1);
- if (rxrpc_queue_work(&call->processor))
- trace_rxrpc_call(call->debug_id, rxrpc_call_queued_ref, n,
- here, NULL);
- else
- rxrpc_put_call(call, rxrpc_call_put_noqueue);
- return true;
-}
-
-/*
* Note the re-emergence of a call.
*/
-void rxrpc_see_call(struct rxrpc_call *call)
+void rxrpc_see_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
{
- const void *here = __builtin_return_address(0);
if (call) {
- int n = refcount_read(&call->ref);
+ int r = refcount_read(&call->ref);
- trace_rxrpc_call(call->debug_id, rxrpc_call_seen, n,
- here, NULL);
+ trace_rxrpc_call(call->debug_id, r, 0, why);
}
}
-bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
+struct rxrpc_call *rxrpc_try_get_call(struct rxrpc_call *call,
+ enum rxrpc_call_trace why)
{
- const void *here = __builtin_return_address(0);
- int n;
+ int r;
- if (!__refcount_inc_not_zero(&call->ref, &n))
- return false;
- trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL);
- return true;
+ if (!call || !__refcount_inc_not_zero(&call->ref, &r))
+ return NULL;
+ trace_rxrpc_call(call->debug_id, r + 1, 0, why);
+ return call;
}
/*
* Note the addition of a ref on a call.
*/
-void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
+void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
{
- const void *here = __builtin_return_address(0);
- int n;
+ int r;
- __refcount_inc(&call->ref, &n);
- trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL);
+ __refcount_inc(&call->ref, &r);
+ trace_rxrpc_call(call->debug_id, r + 1, 0, why);
}
/*
@@ -514,15 +520,13 @@ static void rxrpc_cleanup_ring(struct rxrpc_call *call)
*/
void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
{
- const void *here = __builtin_return_address(0);
struct rxrpc_connection *conn = call->conn;
bool put = false;
_enter("{%d,%d}", call->debug_id, refcount_read(&call->ref));
- trace_rxrpc_call(call->debug_id, rxrpc_call_release,
- refcount_read(&call->ref),
- here, (const void *)call->flags);
+ trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
+ call->flags, rxrpc_call_see_release);
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
@@ -530,10 +534,10 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
BUG();
rxrpc_put_call_slot(call);
- rxrpc_delete_call_timer(call);
+ del_timer_sync(&call->timer);
/* Make sure we don't get any more notifications */
- write_lock_bh(&rx->recvmsg_lock);
+ write_lock(&rx->recvmsg_lock);
if (!list_empty(&call->recvmsg_link)) {
_debug("unlinking once-pending call %p { e=%lx f=%lx }",
@@ -546,16 +550,16 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
call->recvmsg_link.next = NULL;
call->recvmsg_link.prev = NULL;
- write_unlock_bh(&rx->recvmsg_lock);
+ write_unlock(&rx->recvmsg_lock);
if (put)
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_put_call(call, rxrpc_call_put_unnotify);
write_lock(&rx->call_lock);
if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
rb_erase(&call->sock_node, &rx->calls);
memset(&call->sock_node, 0xdd, sizeof(call->sock_node));
- rxrpc_put_call(call, rxrpc_call_put_userid);
+ rxrpc_put_call(call, rxrpc_call_put_userid_exists);
}
list_del(&call->sock_link);
@@ -584,17 +588,17 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
struct rxrpc_call, accept_link);
list_del(&call->accept_link);
rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, -ECONNRESET);
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_put_call(call, rxrpc_call_put_release_sock_tba);
}
while (!list_empty(&rx->sock_calls)) {
call = list_entry(rx->sock_calls.next,
struct rxrpc_call, sock_link);
- rxrpc_get_call(call, rxrpc_call_got);
+ rxrpc_get_call(call, rxrpc_call_get_release_sock);
rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, -ECONNRESET);
rxrpc_send_abort_packet(call);
rxrpc_release_call(rx, call);
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_put_call(call, rxrpc_call_put_release_sock);
}
_leave("");
@@ -603,26 +607,24 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
/*
* release a call
*/
-void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
+void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
{
struct rxrpc_net *rxnet = call->rxnet;
- const void *here = __builtin_return_address(0);
unsigned int debug_id = call->debug_id;
bool dead;
- int n;
+ int r;
ASSERT(call != NULL);
- dead = __refcount_dec_and_test(&call->ref, &n);
- trace_rxrpc_call(debug_id, op, n, here, NULL);
+ dead = __refcount_dec_and_test(&call->ref, &r);
+ trace_rxrpc_call(debug_id, r - 1, 0, why);
if (dead) {
- _debug("call %d dead", call->debug_id);
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
if (!list_empty(&call->link)) {
- spin_lock_bh(&rxnet->call_lock);
+ spin_lock(&rxnet->call_lock);
list_del_init(&call->link);
- spin_unlock_bh(&rxnet->call_lock);
+ spin_unlock(&rxnet->call_lock);
}
rxrpc_cleanup_call(call);
@@ -630,36 +632,45 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
}
/*
- * Final call destruction - but must be done in process context.
+ * Free up the call under RCU.
*/
-static void rxrpc_destroy_call(struct work_struct *work)
+static void rxrpc_rcu_free_call(struct rcu_head *rcu)
{
- struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor);
- struct rxrpc_net *rxnet = call->rxnet;
-
- rxrpc_delete_call_timer(call);
+ struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
+ struct rxrpc_net *rxnet = READ_ONCE(call->rxnet);
- rxrpc_put_connection(call->conn);
- rxrpc_put_peer(call->peer);
kmem_cache_free(rxrpc_call_jar, call);
if (atomic_dec_and_test(&rxnet->nr_calls))
wake_up_var(&rxnet->nr_calls);
}
/*
- * Final call destruction under RCU.
+ * Final call destruction - but must be done in process context.
*/
-static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
+static void rxrpc_destroy_call(struct work_struct *work)
{
- struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
+ struct rxrpc_call *call = container_of(work, struct rxrpc_call, destroyer);
+ struct rxrpc_txbuf *txb;
- if (in_softirq()) {
- INIT_WORK(&call->processor, rxrpc_destroy_call);
- if (!rxrpc_queue_work(&call->processor))
- BUG();
- } else {
- rxrpc_destroy_call(&call->processor);
+ del_timer_sync(&call->timer);
+
+ rxrpc_cleanup_ring(call);
+ while ((txb = list_first_entry_or_null(&call->tx_sendmsg,
+ struct rxrpc_txbuf, call_link))) {
+ list_del(&txb->call_link);
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned);
}
+ while ((txb = list_first_entry_or_null(&call->tx_buffer,
+ struct rxrpc_txbuf, call_link))) {
+ list_del(&txb->call_link);
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned);
+ }
+
+ rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned);
+ rxrpc_put_connection(call->conn, rxrpc_conn_put_call);
+ rxrpc_put_peer(call->peer, rxrpc_peer_put_call);
+ rxrpc_put_local(call->local, rxrpc_local_put_call);
+ call_rcu(&call->rcu, rxrpc_rcu_free_call);
}
/*
@@ -667,25 +678,20 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
*/
void rxrpc_cleanup_call(struct rxrpc_call *call)
{
- struct rxrpc_txbuf *txb;
-
- _net("DESTROY CALL %d", call->debug_id);
-
memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
- rxrpc_cleanup_ring(call);
- while ((txb = list_first_entry_or_null(&call->tx_buffer,
- struct rxrpc_txbuf, call_link))) {
- list_del(&txb->call_link);
- rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned);
- }
- rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned);
- rxrpc_free_skb(call->acks_soft_tbl, rxrpc_skb_cleaned);
+ del_timer(&call->timer);
- call_rcu(&call->rcu, rxrpc_rcu_destroy_call);
+ if (rcu_read_lock_held())
+ /* Can't use the rxrpc workqueue as we need to cancel/flush
+ * something that may be running/waiting there.
+ */
+ schedule_work(&call->destroyer);
+ else
+ rxrpc_destroy_call(&call->destroyer);
}
/*
@@ -700,14 +706,14 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
_enter("");
if (!list_empty(&rxnet->calls)) {
- spin_lock_bh(&rxnet->call_lock);
+ spin_lock(&rxnet->call_lock);
while (!list_empty(&rxnet->calls)) {
call = list_entry(rxnet->calls.next,
struct rxrpc_call, link);
_debug("Zapping call %p", call);
- rxrpc_see_call(call);
+ rxrpc_see_call(call, rxrpc_call_see_zap);
list_del_init(&call->link);
pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
@@ -715,12 +721,12 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
rxrpc_call_states[call->state],
call->flags, call->events);
- spin_unlock_bh(&rxnet->call_lock);
+ spin_unlock(&rxnet->call_lock);
cond_resched();
- spin_lock_bh(&rxnet->call_lock);
+ spin_lock(&rxnet->call_lock);
}
- spin_unlock_bh(&rxnet->call_lock);
+ spin_unlock(&rxnet->call_lock);
}
atomic_dec(&rxnet->nr_calls);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index f11c97e28d2a..a08e33c9e54b 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -51,7 +51,7 @@ static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle);
static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
gfp_t gfp)
{
- struct rxrpc_net *rxnet = conn->params.local->rxnet;
+ struct rxrpc_net *rxnet = conn->rxnet;
int id;
_enter("");
@@ -122,37 +122,47 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp,
bundle = kzalloc(sizeof(*bundle), gfp);
if (bundle) {
- bundle->params = *cp;
- rxrpc_get_peer(bundle->params.peer);
+ bundle->local = cp->local;
+ bundle->peer = rxrpc_get_peer(cp->peer, rxrpc_peer_get_bundle);
+ bundle->key = cp->key;
+ bundle->exclusive = cp->exclusive;
+ bundle->upgrade = cp->upgrade;
+ bundle->service_id = cp->service_id;
+ bundle->security_level = cp->security_level;
refcount_set(&bundle->ref, 1);
atomic_set(&bundle->active, 1);
spin_lock_init(&bundle->channel_lock);
INIT_LIST_HEAD(&bundle->waiting_calls);
+ trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_new);
}
return bundle;
}
-struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle)
+struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle,
+ enum rxrpc_bundle_trace why)
{
- refcount_inc(&bundle->ref);
+ int r;
+
+ __refcount_inc(&bundle->ref, &r);
+ trace_rxrpc_bundle(bundle->debug_id, r + 1, why);
return bundle;
}
static void rxrpc_free_bundle(struct rxrpc_bundle *bundle)
{
- rxrpc_put_peer(bundle->params.peer);
+ trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_free);
+ rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle);
kfree(bundle);
}
-void rxrpc_put_bundle(struct rxrpc_bundle *bundle)
+void rxrpc_put_bundle(struct rxrpc_bundle *bundle, enum rxrpc_bundle_trace why)
{
- unsigned int d = bundle->debug_id;
+ unsigned int id = bundle->debug_id;
bool dead;
int r;
dead = __refcount_dec_and_test(&bundle->ref, &r);
-
- _debug("PUT B=%x %d", d, r - 1);
+ trace_rxrpc_bundle(id, r - 1, why);
if (dead)
rxrpc_free_bundle(bundle);
}
@@ -164,12 +174,12 @@ static struct rxrpc_connection *
rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp)
{
struct rxrpc_connection *conn;
- struct rxrpc_net *rxnet = bundle->params.local->rxnet;
+ struct rxrpc_net *rxnet = bundle->local->rxnet;
int ret;
_enter("");
- conn = rxrpc_alloc_connection(gfp);
+ conn = rxrpc_alloc_connection(rxnet, gfp);
if (!conn) {
_leave(" = -ENOMEM");
return ERR_PTR(-ENOMEM);
@@ -177,10 +187,16 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp)
refcount_set(&conn->ref, 1);
conn->bundle = bundle;
- conn->params = bundle->params;
+ conn->local = bundle->local;
+ conn->peer = bundle->peer;
+ conn->key = bundle->key;
+ conn->exclusive = bundle->exclusive;
+ conn->upgrade = bundle->upgrade;
+ conn->orig_service_id = bundle->service_id;
+ conn->security_level = bundle->security_level;
conn->out_clientflag = RXRPC_CLIENT_INITIATED;
conn->state = RXRPC_CONN_CLIENT;
- conn->service_id = conn->params.service_id;
+ conn->service_id = conn->orig_service_id;
ret = rxrpc_get_client_connection_id(conn, gfp);
if (ret < 0)
@@ -195,14 +211,13 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp)
list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
write_unlock(&rxnet->conn_lock);
- rxrpc_get_bundle(bundle);
- rxrpc_get_peer(conn->params.peer);
- rxrpc_get_local(conn->params.local);
- key_get(conn->params.key);
+ rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_conn);
+ rxrpc_get_peer(conn->peer, rxrpc_peer_get_client_conn);
+ rxrpc_get_local(conn->local, rxrpc_local_get_client_conn);
+ key_get(conn->key);
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_client,
- refcount_read(&conn->ref),
- __builtin_return_address(0));
+ trace_rxrpc_conn(conn->debug_id, refcount_read(&conn->ref),
+ rxrpc_conn_new_client);
atomic_inc(&rxnet->nr_client_conns);
trace_rxrpc_client(conn, -1, rxrpc_client_alloc);
@@ -228,7 +243,7 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
if (!conn)
goto dont_reuse;
- rxnet = conn->params.local->rxnet;
+ rxnet = conn->rxnet;
if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags))
goto dont_reuse;
@@ -285,7 +300,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
while (p) {
bundle = rb_entry(p, struct rxrpc_bundle, local_node);
-#define cmp(X) ((long)bundle->params.X - (long)cp->X)
+#define cmp(X) ((long)bundle->X - (long)cp->X)
diff = (cmp(peer) ?:
cmp(key) ?:
cmp(security_level) ?:
@@ -314,7 +329,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
parent = *pp;
bundle = rb_entry(parent, struct rxrpc_bundle, local_node);
-#define cmp(X) ((long)bundle->params.X - (long)cp->X)
+#define cmp(X) ((long)bundle->X - (long)cp->X)
diff = (cmp(peer) ?:
cmp(key) ?:
cmp(security_level) ?:
@@ -332,7 +347,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
candidate->debug_id = atomic_inc_return(&rxrpc_bundle_id);
rb_link_node(&candidate->local_node, parent, pp);
rb_insert_color(&candidate->local_node, &local->client_bundles);
- rxrpc_get_bundle(candidate);
+ rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call);
spin_unlock(&local->client_bundles_lock);
_leave(" = %u [new]", candidate->debug_id);
return candidate;
@@ -340,7 +355,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
found_bundle_free:
rxrpc_free_bundle(candidate);
found_bundle:
- rxrpc_get_bundle(bundle);
+ rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_call);
atomic_inc(&bundle->active);
spin_unlock(&local->client_bundles_lock);
_leave(" = %u [found]", bundle->debug_id);
@@ -456,10 +471,10 @@ static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp)
if (candidate) {
_debug("discard C=%x", candidate->debug_id);
trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate);
- rxrpc_put_connection(candidate);
+ rxrpc_put_connection(candidate, rxrpc_conn_put_discard);
}
- rxrpc_put_connection(old);
+ rxrpc_put_connection(old, rxrpc_conn_put_noreuse);
_leave("");
}
@@ -530,23 +545,21 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags);
clear_bit(conn->bundle_shift + channel, &bundle->avail_chans);
- rxrpc_see_call(call);
+ rxrpc_see_call(call, rxrpc_call_see_activate_client);
list_del_init(&call->chan_wait_link);
- call->peer = rxrpc_get_peer(conn->params.peer);
- call->conn = rxrpc_get_connection(conn);
+ call->peer = rxrpc_get_peer(conn->peer, rxrpc_peer_get_activate_call);
+ call->conn = rxrpc_get_connection(conn, rxrpc_conn_get_activate_call);
call->cid = conn->proto.cid | channel;
call->call_id = call_id;
call->security = conn->security;
call->security_ix = conn->security_ix;
- call->service_id = conn->service_id;
+ call->dest_srx.srx_service = conn->service_id;
trace_rxrpc_connect_call(call);
- _net("CONNECT call %08x:%08x as call %d on conn %d",
- call->cid, call->call_id, call->debug_id, conn->debug_id);
- write_lock_bh(&call->state_lock);
+ write_lock(&call->state_lock);
call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
- write_unlock_bh(&call->state_lock);
+ write_unlock(&call->state_lock);
/* Paired with the read barrier in rxrpc_connect_call(). This orders
* cid and epoch in the connection wrt to call_id without the need to
@@ -571,7 +584,7 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
*/
static void rxrpc_unidle_conn(struct rxrpc_bundle *bundle, struct rxrpc_connection *conn)
{
- struct rxrpc_net *rxnet = bundle->params.local->rxnet;
+ struct rxrpc_net *rxnet = bundle->local->rxnet;
bool drop_ref;
if (!list_empty(&conn->cache_link)) {
@@ -583,7 +596,7 @@ static void rxrpc_unidle_conn(struct rxrpc_bundle *bundle, struct rxrpc_connecti
}
spin_unlock(&rxnet->client_conn_cache_lock);
if (drop_ref)
- rxrpc_put_connection(conn);
+ rxrpc_put_connection(conn, rxrpc_conn_put_unidle);
}
}
@@ -732,7 +745,7 @@ granted_channel:
out_put_bundle:
rxrpc_deactivate_bundle(bundle);
- rxrpc_put_bundle(bundle);
+ rxrpc_put_bundle(bundle, rxrpc_bundle_get_client_call);
out:
_leave(" = %d", ret);
return ret;
@@ -773,6 +786,10 @@ void rxrpc_expose_client_call(struct rxrpc_call *call)
if (chan->call_counter >= INT_MAX)
set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
trace_rxrpc_client(conn, channel, rxrpc_client_exposed);
+
+ spin_lock(&call->peer->lock);
+ hlist_add_head(&call->error_link, &call->peer->error_targets);
+ spin_unlock(&call->peer->lock);
}
}
@@ -797,7 +814,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
{
struct rxrpc_connection *conn;
struct rxrpc_channel *chan = NULL;
- struct rxrpc_net *rxnet = bundle->params.local->rxnet;
+ struct rxrpc_net *rxnet = bundle->local->rxnet;
unsigned int channel;
bool may_reuse;
u32 cid;
@@ -887,7 +904,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
trace_rxrpc_client(conn, channel, rxrpc_client_to_idle);
conn->idle_timestamp = jiffies;
- rxrpc_get_connection(conn);
+ rxrpc_get_connection(conn, rxrpc_conn_get_idle);
spin_lock(&rxnet->client_conn_cache_lock);
list_move_tail(&conn->cache_link, &rxnet->idle_client_conns);
spin_unlock(&rxnet->client_conn_cache_lock);
@@ -929,7 +946,7 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
if (need_drop) {
rxrpc_deactivate_bundle(bundle);
- rxrpc_put_connection(conn);
+ rxrpc_put_connection(conn, rxrpc_conn_put_unbundle);
}
}
@@ -938,11 +955,11 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
*/
static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle)
{
- struct rxrpc_local *local = bundle->params.local;
+ struct rxrpc_local *local = bundle->local;
bool need_put = false;
if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) {
- if (!bundle->params.exclusive) {
+ if (!bundle->exclusive) {
_debug("erase bundle");
rb_erase(&bundle->local_node, &local->client_bundles);
need_put = true;
@@ -950,16 +967,16 @@ static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle)
spin_unlock(&local->client_bundles_lock);
if (need_put)
- rxrpc_put_bundle(bundle);
+ rxrpc_put_bundle(bundle, rxrpc_bundle_put_discard);
}
}
/*
* Clean up a dead client connection.
*/
-static void rxrpc_kill_client_conn(struct rxrpc_connection *conn)
+void rxrpc_kill_client_conn(struct rxrpc_connection *conn)
{
- struct rxrpc_local *local = conn->params.local;
+ struct rxrpc_local *local = conn->local;
struct rxrpc_net *rxnet = local->rxnet;
_enter("C=%x", conn->debug_id);
@@ -968,23 +985,6 @@ static void rxrpc_kill_client_conn(struct rxrpc_connection *conn)
atomic_dec(&rxnet->nr_client_conns);
rxrpc_put_client_connection_id(conn);
- rxrpc_kill_connection(conn);
-}
-
-/*
- * Clean up a dead client connections.
- */
-void rxrpc_put_client_conn(struct rxrpc_connection *conn)
-{
- const void *here = __builtin_return_address(0);
- unsigned int debug_id = conn->debug_id;
- bool dead;
- int r;
-
- dead = __refcount_dec_and_test(&conn->ref, &r);
- trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, r - 1, here);
- if (dead)
- rxrpc_kill_client_conn(conn);
}
/*
@@ -1010,7 +1010,7 @@ void rxrpc_discard_expired_client_conns(struct work_struct *work)
}
/* Don't double up on the discarding */
- if (!spin_trylock(&rxnet->client_conn_discard_lock)) {
+ if (!mutex_trylock(&rxnet->client_conn_discard_lock)) {
_leave(" [already]");
return;
}
@@ -1038,7 +1038,7 @@ next:
expiry = rxrpc_conn_idle_client_expiry;
if (nr_conns > rxrpc_reap_client_connections)
expiry = rxrpc_conn_idle_client_fast_expiry;
- if (conn->params.local->service_closed)
+ if (conn->local->service_closed)
expiry = rxrpc_closed_conn_expiry * HZ;
conn_expires_at = conn->idle_timestamp + expiry;
@@ -1048,13 +1048,15 @@ next:
goto not_yet_expired;
}
+ atomic_dec(&conn->active);
trace_rxrpc_client(conn, -1, rxrpc_client_discard);
list_del_init(&conn->cache_link);
spin_unlock(&rxnet->client_conn_cache_lock);
rxrpc_unbundle_conn(conn);
- rxrpc_put_connection(conn); /* Drop the ->cache_link ref */
+ /* Drop the ->cache_link ref */
+ rxrpc_put_connection(conn, rxrpc_conn_put_discard_idle);
nr_conns--;
goto next;
@@ -1073,7 +1075,7 @@ not_yet_expired:
out:
spin_unlock(&rxnet->client_conn_cache_lock);
- spin_unlock(&rxnet->client_conn_discard_lock);
+ mutex_unlock(&rxnet->client_conn_discard_lock);
_leave("");
}
@@ -1112,7 +1114,8 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *local)
list_for_each_entry_safe(conn, tmp, &rxnet->idle_client_conns,
cache_link) {
- if (conn->params.local == local) {
+ if (conn->local == local) {
+ atomic_dec(&conn->active);
trace_rxrpc_client(conn, -1, rxrpc_client_discard);
list_move(&conn->cache_link, &graveyard);
}
@@ -1125,7 +1128,7 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *local)
struct rxrpc_connection, cache_link);
list_del_init(&conn->cache_link);
rxrpc_unbundle_conn(conn);
- rxrpc_put_connection(conn);
+ rxrpc_put_connection(conn, rxrpc_conn_put_local_dead);
}
_leave(" [culled]");
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index aab069701398..480364bcbf85 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -52,8 +52,8 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
if (skb && call_id != sp->hdr.callNumber)
return;
- msg.msg_name = &conn->params.peer->srx.transport;
- msg.msg_namelen = conn->params.peer->srx.transport_len;
+ msg.msg_name = &conn->peer->srx.transport;
+ msg.msg_namelen = conn->peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
@@ -86,8 +86,8 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
break;
case RXRPC_PACKET_TYPE_ACK:
- mtu = conn->params.peer->if_mtu;
- mtu -= conn->params.peer->hdrsize;
+ mtu = conn->peer->if_mtu;
+ mtu -= conn->peer->hdrsize;
pkt.ack.bufferSpace = 0;
pkt.ack.maxSkew = htons(skb ? skb->priority : 0);
pkt.ack.firstPacket = htonl(chan->last_seq + 1);
@@ -122,19 +122,17 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
switch (chan->last_type) {
case RXRPC_PACKET_TYPE_ABORT:
- _proto("Tx ABORT %%%u { %d } [re]", serial, conn->abort_code);
break;
case RXRPC_PACKET_TYPE_ACK:
trace_rxrpc_tx_ack(chan->call_debug_id, serial,
ntohl(pkt.ack.firstPacket),
ntohl(pkt.ack.serial),
pkt.ack.reason, 0);
- _proto("Tx ACK %%%u [re]", serial);
break;
}
- ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len);
- conn->params.peer->last_tx_at = ktime_get_seconds();
+ ret = kernel_sendmsg(conn->local->socket, &msg, iov, ioc, len);
+ conn->peer->last_tx_at = ktime_get_seconds();
if (ret < 0)
trace_rxrpc_tx_fail(chan->call_debug_id, serial, ret,
rxrpc_tx_point_call_final_resend);
@@ -200,9 +198,9 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
_enter("%d,,%u,%u", conn->debug_id, error, abort_code);
/* generate a connection-level abort */
- spin_lock_bh(&conn->state_lock);
+ spin_lock(&conn->state_lock);
if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
- spin_unlock_bh(&conn->state_lock);
+ spin_unlock(&conn->state_lock);
_leave(" = 0 [already dead]");
return 0;
}
@@ -211,10 +209,10 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
conn->abort_code = abort_code;
conn->state = RXRPC_CONN_LOCALLY_ABORTED;
set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
- spin_unlock_bh(&conn->state_lock);
+ spin_unlock(&conn->state_lock);
- msg.msg_name = &conn->params.peer->srx.transport;
- msg.msg_namelen = conn->params.peer->srx.transport_len;
+ msg.msg_name = &conn->peer->srx.transport;
+ msg.msg_namelen = conn->peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
@@ -242,9 +240,8 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
serial = atomic_inc_return(&conn->serial);
rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial);
whdr.serial = htonl(serial);
- _proto("Tx CONN ABORT %%%u { %d }", serial, conn->abort_code);
- ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
+ ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len);
if (ret < 0) {
trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
rxrpc_tx_point_conn_abort);
@@ -254,7 +251,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort);
- conn->params.peer->last_tx_at = ktime_get_seconds();
+ conn->peer->last_tx_at = ktime_get_seconds();
_leave(" = 0");
return 0;
@@ -268,12 +265,12 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call)
{
_enter("%p", call);
if (call) {
- write_lock_bh(&call->state_lock);
+ write_lock(&call->state_lock);
if (call->state == RXRPC_CALL_SERVER_SECURING) {
call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
rxrpc_notify_socket(call);
}
- write_unlock_bh(&call->state_lock);
+ write_unlock(&call->state_lock);
}
}
@@ -285,8 +282,6 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
u32 *_abort_code)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- __be32 wtmp;
- u32 abort_code;
int loop, ret;
if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
@@ -308,17 +303,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
return 0;
case RXRPC_PACKET_TYPE_ABORT:
- if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
- &wtmp, sizeof(wtmp)) < 0) {
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
- tracepoint_string("bad_abort"));
- return -EPROTO;
- }
- abort_code = ntohl(wtmp);
- _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code);
-
conn->error = -ECONNABORTED;
- conn->abort_code = abort_code;
+ conn->abort_code = skb->priority;
conn->state = RXRPC_CONN_REMOTELY_ABORTED;
set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial);
@@ -334,23 +320,23 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
return ret;
ret = conn->security->init_connection_security(
- conn, conn->params.key->payload.data[0]);
+ conn, conn->key->payload.data[0]);
if (ret < 0)
return ret;
spin_lock(&conn->bundle->channel_lock);
- spin_lock_bh(&conn->state_lock);
+ spin_lock(&conn->state_lock);
if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) {
conn->state = RXRPC_CONN_SERVICE;
- spin_unlock_bh(&conn->state_lock);
+ spin_unlock(&conn->state_lock);
for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
rxrpc_call_is_secure(
rcu_dereference_protected(
conn->channels[loop].call,
lockdep_is_held(&conn->bundle->channel_lock)));
} else {
- spin_unlock_bh(&conn->state_lock);
+ spin_unlock(&conn->state_lock);
}
spin_unlock(&conn->bundle->channel_lock);
@@ -451,7 +437,7 @@ static void rxrpc_do_process_connection(struct rxrpc_connection *conn)
/* go through the conn-level event packets, releasing the ref on this
* connection that each one has when we've finished with it */
while ((skb = skb_dequeue(&conn->rx_queue))) {
- rxrpc_see_skb(skb, rxrpc_skb_seen);
+ rxrpc_see_skb(skb, rxrpc_skb_see_conn_work);
ret = rxrpc_process_event(conn, skb, &abort_code);
switch (ret) {
case -EPROTO:
@@ -463,7 +449,7 @@ static void rxrpc_do_process_connection(struct rxrpc_connection *conn)
goto requeue_and_leave;
case -ECONNABORTED:
default:
- rxrpc_free_skb(skb, rxrpc_skb_freed);
+ rxrpc_free_skb(skb, rxrpc_skb_put_conn_work);
break;
}
}
@@ -477,7 +463,7 @@ requeue_and_leave:
protocol_error:
if (rxrpc_abort_connection(conn, ret, abort_code) < 0)
goto requeue_and_leave;
- rxrpc_free_skb(skb, rxrpc_skb_freed);
+ rxrpc_free_skb(skb, rxrpc_skb_put_conn_work);
return;
}
@@ -486,14 +472,70 @@ void rxrpc_process_connection(struct work_struct *work)
struct rxrpc_connection *conn =
container_of(work, struct rxrpc_connection, processor);
- rxrpc_see_connection(conn);
+ rxrpc_see_connection(conn, rxrpc_conn_see_work);
- if (__rxrpc_use_local(conn->params.local)) {
+ if (__rxrpc_use_local(conn->local, rxrpc_local_use_conn_work)) {
rxrpc_do_process_connection(conn);
- rxrpc_unuse_local(conn->params.local);
+ rxrpc_unuse_local(conn->local, rxrpc_local_unuse_conn_work);
}
+}
- rxrpc_put_connection(conn);
- _leave("");
- return;
+/*
+ * post connection-level events to the connection
+ * - this includes challenges, responses, some aborts and call terminal packet
+ * retransmission.
+ */
+static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ _enter("%p,%p", conn, skb);
+
+ rxrpc_get_skb(skb, rxrpc_skb_get_conn_work);
+ skb_queue_tail(&conn->rx_queue, skb);
+ rxrpc_queue_conn(conn, rxrpc_conn_queue_rx_work);
+}
+
+/*
+ * Input a connection-level packet.
+ */
+int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
+ _leave(" = -ECONNABORTED [%u]", conn->state);
+ return -ECONNABORTED;
+ }
+
+ _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);
+
+ switch (sp->hdr.type) {
+ case RXRPC_PACKET_TYPE_DATA:
+ case RXRPC_PACKET_TYPE_ACK:
+ rxrpc_conn_retransmit_call(conn, skb,
+ sp->hdr.cid & RXRPC_CHANNELMASK);
+ return 0;
+
+ case RXRPC_PACKET_TYPE_BUSY:
+ /* Just ignore BUSY packets for now. */
+ return 0;
+
+ case RXRPC_PACKET_TYPE_ABORT:
+ conn->error = -ECONNABORTED;
+ conn->abort_code = skb->priority;
+ conn->state = RXRPC_CONN_REMOTELY_ABORTED;
+ set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+ rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial);
+ return -ECONNABORTED;
+
+ case RXRPC_PACKET_TYPE_CHALLENGE:
+ case RXRPC_PACKET_TYPE_RESPONSE:
+ rxrpc_post_packet_to_conn(conn, skb);
+ return 0;
+
+ default:
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
+ tracepoint_string("bad_conn_pkt"));
+ return -EPROTO;
+ }
}
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 156bd26daf74..3c8f83dacb2b 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -19,20 +19,23 @@
unsigned int __read_mostly rxrpc_connection_expiry = 10 * 60;
unsigned int __read_mostly rxrpc_closed_conn_expiry = 10;
-static void rxrpc_destroy_connection(struct rcu_head *);
+static void rxrpc_clean_up_connection(struct work_struct *work);
+static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet,
+ unsigned long reap_at);
static void rxrpc_connection_timer(struct timer_list *timer)
{
struct rxrpc_connection *conn =
container_of(timer, struct rxrpc_connection, timer);
- rxrpc_queue_conn(conn);
+ rxrpc_queue_conn(conn, rxrpc_conn_queue_timer);
}
/*
* allocate a new connection
*/
-struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
+struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet,
+ gfp_t gfp)
{
struct rxrpc_connection *conn;
@@ -42,10 +45,12 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
if (conn) {
INIT_LIST_HEAD(&conn->cache_link);
timer_setup(&conn->timer, &rxrpc_connection_timer, 0);
- INIT_WORK(&conn->processor, &rxrpc_process_connection);
+ INIT_WORK(&conn->processor, rxrpc_process_connection);
+ INIT_WORK(&conn->destructor, rxrpc_clean_up_connection);
INIT_LIST_HEAD(&conn->proc_link);
INIT_LIST_HEAD(&conn->link);
skb_queue_head_init(&conn->rx_queue);
+ conn->rxnet = rxnet;
conn->security = &rxrpc_no_security;
spin_lock_init(&conn->state_lock);
conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
@@ -67,89 +72,55 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
*
* The caller must be holding the RCU read lock.
*/
-struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
- struct sk_buff *skb,
- struct rxrpc_peer **_peer)
+struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *local,
+ struct sockaddr_rxrpc *srx,
+ struct sk_buff *skb)
{
struct rxrpc_connection *conn;
- struct rxrpc_conn_proto k;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct sockaddr_rxrpc srx;
struct rxrpc_peer *peer;
_enter(",%x", sp->hdr.cid & RXRPC_CIDMASK);
- if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
- goto not_found;
-
- if (srx.transport.family != local->srx.transport.family &&
- (srx.transport.family == AF_INET &&
- local->srx.transport.family != AF_INET6)) {
- pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n",
- srx.transport.family,
- local->srx.transport.family);
+ /* Look up client connections by connection ID alone as their IDs are
+ * unique for this machine.
+ */
+ conn = idr_find(&rxrpc_client_conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT);
+ if (!conn || refcount_read(&conn->ref) == 0) {
+ _debug("no conn");
goto not_found;
}
- k.epoch = sp->hdr.epoch;
- k.cid = sp->hdr.cid & RXRPC_CIDMASK;
-
- if (rxrpc_to_server(sp)) {
- /* We need to look up service connections by the full protocol
- * parameter set. We look up the peer first as an intermediate
- * step and then the connection from the peer's tree.
- */
- peer = rxrpc_lookup_peer_rcu(local, &srx);
- if (!peer)
- goto not_found;
- *_peer = peer;
- conn = rxrpc_find_service_conn_rcu(peer, skb);
- if (!conn || refcount_read(&conn->ref) == 0)
- goto not_found;
- _leave(" = %p", conn);
- return conn;
- } else {
- /* Look up client connections by connection ID alone as their
- * IDs are unique for this machine.
- */
- conn = idr_find(&rxrpc_client_conn_ids,
- sp->hdr.cid >> RXRPC_CIDSHIFT);
- if (!conn || refcount_read(&conn->ref) == 0) {
- _debug("no conn");
- goto not_found;
- }
+ if (conn->proto.epoch != sp->hdr.epoch ||
+ conn->local != local)
+ goto not_found;
- if (conn->proto.epoch != k.epoch ||
- conn->params.local != local)
+ peer = conn->peer;
+ switch (srx->transport.family) {
+ case AF_INET:
+ if (peer->srx.transport.sin.sin_port !=
+ srx->transport.sin.sin_port ||
+ peer->srx.transport.sin.sin_addr.s_addr !=
+ srx->transport.sin.sin_addr.s_addr)
goto not_found;
-
- peer = conn->params.peer;
- switch (srx.transport.family) {
- case AF_INET:
- if (peer->srx.transport.sin.sin_port !=
- srx.transport.sin.sin_port ||
- peer->srx.transport.sin.sin_addr.s_addr !=
- srx.transport.sin.sin_addr.s_addr)
- goto not_found;
- break;
+ break;
#ifdef CONFIG_AF_RXRPC_IPV6
- case AF_INET6:
- if (peer->srx.transport.sin6.sin6_port !=
- srx.transport.sin6.sin6_port ||
- memcmp(&peer->srx.transport.sin6.sin6_addr,
- &srx.transport.sin6.sin6_addr,
- sizeof(struct in6_addr)) != 0)
- goto not_found;
- break;
+ case AF_INET6:
+ if (peer->srx.transport.sin6.sin6_port !=
+ srx->transport.sin6.sin6_port ||
+ memcmp(&peer->srx.transport.sin6.sin6_addr,
+ &srx->transport.sin6.sin6_addr,
+ sizeof(struct in6_addr)) != 0)
+ goto not_found;
+ break;
#endif
- default:
- BUG();
- }
-
- _leave(" = %p", conn);
- return conn;
+ default:
+ BUG();
}
+ _leave(" = %p", conn);
+ return conn;
+
not_found:
_leave(" = NULL");
return NULL;
@@ -210,9 +181,9 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
call->peer->cong_ssthresh = call->cong_ssthresh;
if (!hlist_unhashed(&call->error_link)) {
- spin_lock_bh(&call->peer->lock);
- hlist_del_rcu(&call->error_link);
- spin_unlock_bh(&call->peer->lock);
+ spin_lock(&call->peer->lock);
+ hlist_del_init(&call->error_link);
+ spin_unlock(&call->peer->lock);
}
if (rxrpc_is_client_call(call))
@@ -224,79 +195,45 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
conn->idle_timestamp = jiffies;
-}
-
-/*
- * Kill off a connection.
- */
-void rxrpc_kill_connection(struct rxrpc_connection *conn)
-{
- struct rxrpc_net *rxnet = conn->params.local->rxnet;
-
- ASSERT(!rcu_access_pointer(conn->channels[0].call) &&
- !rcu_access_pointer(conn->channels[1].call) &&
- !rcu_access_pointer(conn->channels[2].call) &&
- !rcu_access_pointer(conn->channels[3].call));
- ASSERT(list_empty(&conn->cache_link));
-
- write_lock(&rxnet->conn_lock);
- list_del_init(&conn->proc_link);
- write_unlock(&rxnet->conn_lock);
-
- /* Drain the Rx queue. Note that even though we've unpublished, an
- * incoming packet could still be being added to our Rx queue, so we
- * will need to drain it again in the RCU cleanup handler.
- */
- rxrpc_purge_queue(&conn->rx_queue);
-
- /* Leave final destruction to RCU. The connection processor work item
- * must carry a ref on the connection to prevent us getting here whilst
- * it is queued or running.
- */
- call_rcu(&conn->rcu, rxrpc_destroy_connection);
+ if (atomic_dec_and_test(&conn->active))
+ rxrpc_set_service_reap_timer(conn->rxnet,
+ jiffies + rxrpc_connection_expiry);
}
/*
* Queue a connection's work processor, getting a ref to pass to the work
* queue.
*/
-bool rxrpc_queue_conn(struct rxrpc_connection *conn)
+void rxrpc_queue_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why)
{
- const void *here = __builtin_return_address(0);
- int r;
-
- if (!__refcount_inc_not_zero(&conn->ref, &r))
- return false;
- if (rxrpc_queue_work(&conn->processor))
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, r + 1, here);
- else
- rxrpc_put_connection(conn);
- return true;
+ if (atomic_read(&conn->active) >= 0 &&
+ rxrpc_queue_work(&conn->processor))
+ rxrpc_see_connection(conn, why);
}
/*
* Note the re-emergence of a connection.
*/
-void rxrpc_see_connection(struct rxrpc_connection *conn)
+void rxrpc_see_connection(struct rxrpc_connection *conn,
+ enum rxrpc_conn_trace why)
{
- const void *here = __builtin_return_address(0);
if (conn) {
- int n = refcount_read(&conn->ref);
+ int r = refcount_read(&conn->ref);
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_seen, n, here);
+ trace_rxrpc_conn(conn->debug_id, r, why);
}
}
/*
* Get a ref on a connection.
*/
-struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn)
+struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn,
+ enum rxrpc_conn_trace why)
{
- const void *here = __builtin_return_address(0);
int r;
__refcount_inc(&conn->ref, &r);
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r, here);
+ trace_rxrpc_conn(conn->debug_id, r + 1, why);
return conn;
}
@@ -304,14 +241,14 @@ struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn)
* Try to get a ref on a connection.
*/
struct rxrpc_connection *
-rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
+rxrpc_get_connection_maybe(struct rxrpc_connection *conn,
+ enum rxrpc_conn_trace why)
{
- const void *here = __builtin_return_address(0);
int r;
if (conn) {
if (__refcount_inc_not_zero(&conn->ref, &r))
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r + 1, here);
+ trace_rxrpc_conn(conn->debug_id, r + 1, why);
else
conn = NULL;
}
@@ -329,49 +266,95 @@ static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet,
}
/*
- * Release a service connection
+ * destroy a virtual connection
*/
-void rxrpc_put_service_conn(struct rxrpc_connection *conn)
+static void rxrpc_rcu_free_connection(struct rcu_head *rcu)
{
- const void *here = __builtin_return_address(0);
- unsigned int debug_id = conn->debug_id;
- int r;
+ struct rxrpc_connection *conn =
+ container_of(rcu, struct rxrpc_connection, rcu);
+ struct rxrpc_net *rxnet = conn->rxnet;
- __refcount_dec(&conn->ref, &r);
- trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, r - 1, here);
- if (r - 1 == 1)
- rxrpc_set_service_reap_timer(conn->params.local->rxnet,
- jiffies + rxrpc_connection_expiry);
+ _enter("{%d,u=%d}", conn->debug_id, refcount_read(&conn->ref));
+
+ trace_rxrpc_conn(conn->debug_id, refcount_read(&conn->ref),
+ rxrpc_conn_free);
+ kfree(conn);
+
+ if (atomic_dec_and_test(&rxnet->nr_conns))
+ wake_up_var(&rxnet->nr_conns);
}
/*
- * destroy a virtual connection
+ * Clean up a dead connection.
*/
-static void rxrpc_destroy_connection(struct rcu_head *rcu)
+static void rxrpc_clean_up_connection(struct work_struct *work)
{
struct rxrpc_connection *conn =
- container_of(rcu, struct rxrpc_connection, rcu);
+ container_of(work, struct rxrpc_connection, destructor);
+ struct rxrpc_net *rxnet = conn->rxnet;
- _enter("{%d,u=%d}", conn->debug_id, refcount_read(&conn->ref));
+ ASSERT(!rcu_access_pointer(conn->channels[0].call) &&
+ !rcu_access_pointer(conn->channels[1].call) &&
+ !rcu_access_pointer(conn->channels[2].call) &&
+ !rcu_access_pointer(conn->channels[3].call));
+ ASSERT(list_empty(&conn->cache_link));
- ASSERTCMP(refcount_read(&conn->ref), ==, 0);
+ del_timer_sync(&conn->timer);
+ cancel_work_sync(&conn->processor); /* Processing may restart the timer */
+ del_timer_sync(&conn->timer);
- _net("DESTROY CONN %d", conn->debug_id);
+ write_lock(&rxnet->conn_lock);
+ list_del_init(&conn->proc_link);
+ write_unlock(&rxnet->conn_lock);
- del_timer_sync(&conn->timer);
rxrpc_purge_queue(&conn->rx_queue);
+ rxrpc_kill_client_conn(conn);
+
conn->security->clear(conn);
- key_put(conn->params.key);
- rxrpc_put_bundle(conn->bundle);
- rxrpc_put_peer(conn->params.peer);
+ key_put(conn->key);
+ rxrpc_put_bundle(conn->bundle, rxrpc_bundle_put_conn);
+ rxrpc_put_peer(conn->peer, rxrpc_peer_put_conn);
+ rxrpc_put_local(conn->local, rxrpc_local_put_kill_conn);
+
+ /* Drain the Rx queue. Note that even though we've unpublished, an
+ * incoming packet could still be being added to our Rx queue, so we
+ * will need to drain it again in the RCU cleanup handler.
+ */
+ rxrpc_purge_queue(&conn->rx_queue);
- if (atomic_dec_and_test(&conn->params.local->rxnet->nr_conns))
- wake_up_var(&conn->params.local->rxnet->nr_conns);
- rxrpc_put_local(conn->params.local);
+ call_rcu(&conn->rcu, rxrpc_rcu_free_connection);
+}
- kfree(conn);
- _leave("");
+/*
+ * Drop a ref on a connection.
+ */
+void rxrpc_put_connection(struct rxrpc_connection *conn,
+ enum rxrpc_conn_trace why)
+{
+ unsigned int debug_id;
+ bool dead;
+ int r;
+
+ if (!conn)
+ return;
+
+ debug_id = conn->debug_id;
+ dead = __refcount_dec_and_test(&conn->ref, &r);
+ trace_rxrpc_conn(debug_id, r - 1, why);
+ if (dead) {
+ del_timer(&conn->timer);
+ cancel_work(&conn->processor);
+
+ if (in_softirq() || work_busy(&conn->processor) ||
+ timer_pending(&conn->timer))
+ /* Can't use the rxrpc workqueue as we need to cancel/flush
+ * something that may be running/waiting there.
+ */
+ schedule_work(&conn->destructor);
+ else
+ rxrpc_clean_up_connection(&conn->destructor);
+ }
}
/*
@@ -383,6 +366,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
struct rxrpc_net *rxnet =
container_of(work, struct rxrpc_net, service_conn_reaper);
unsigned long expire_at, earliest, idle_timestamp, now;
+ int active;
LIST_HEAD(graveyard);
@@ -393,20 +377,20 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
write_lock(&rxnet->conn_lock);
list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) {
- ASSERTCMP(refcount_read(&conn->ref), >, 0);
- if (likely(refcount_read(&conn->ref) > 1))
+ ASSERTCMP(atomic_read(&conn->active), >=, 0);
+ if (likely(atomic_read(&conn->active) > 0))
continue;
if (conn->state == RXRPC_CONN_SERVICE_PREALLOC)
continue;
- if (rxnet->live && !conn->params.local->dead) {
+ if (rxnet->live && !conn->local->dead) {
idle_timestamp = READ_ONCE(conn->idle_timestamp);
expire_at = idle_timestamp + rxrpc_connection_expiry * HZ;
- if (conn->params.local->service_closed)
+ if (conn->local->service_closed)
expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ;
- _debug("reap CONN %d { u=%d,t=%ld }",
- conn->debug_id, refcount_read(&conn->ref),
+ _debug("reap CONN %d { a=%d,t=%ld }",
+ conn->debug_id, atomic_read(&conn->active),
(long)expire_at - (long)now);
if (time_before(now, expire_at)) {
@@ -416,12 +400,13 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
}
}
- /* The usage count sits at 1 whilst the object is unused on the
- * list; we reduce that to 0 to make the object unavailable.
+ /* The activity count sits at 0 whilst the conn is unused on
+ * the list; we reduce that to -1 to make the conn unavailable.
*/
- if (!refcount_dec_if_one(&conn->ref))
+ active = 0;
+ if (!atomic_try_cmpxchg(&conn->active, &active, -1))
continue;
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_reap_service, 0, NULL);
+ rxrpc_see_connection(conn, rxrpc_conn_see_reap_service);
if (rxrpc_conn_is_client(conn))
BUG();
@@ -443,8 +428,8 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
link);
list_del_init(&conn->link);
- ASSERTCMP(refcount_read(&conn->ref), ==, 0);
- rxrpc_kill_connection(conn);
+ ASSERTCMP(atomic_read(&conn->active), ==, -1);
+ rxrpc_put_connection(conn, rxrpc_conn_put_service_reaped);
}
_leave("");
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index 6e6aa02c6f9e..2a55a88b2a5b 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -73,7 +73,7 @@ static void rxrpc_publish_service_conn(struct rxrpc_peer *peer,
struct rxrpc_conn_proto k = conn->proto;
struct rb_node **pp, *parent;
- write_seqlock_bh(&peer->service_conn_lock);
+ write_seqlock(&peer->service_conn_lock);
pp = &peer->service_conns.rb_node;
parent = NULL;
@@ -94,14 +94,14 @@ static void rxrpc_publish_service_conn(struct rxrpc_peer *peer,
rb_insert_color(&conn->service_node, &peer->service_conns);
conn_published:
set_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags);
- write_sequnlock_bh(&peer->service_conn_lock);
+ write_sequnlock(&peer->service_conn_lock);
_leave(" = %d [new]", conn->debug_id);
return;
found_extant_conn:
if (refcount_read(&cursor->ref) == 0)
goto replace_old_connection;
- write_sequnlock_bh(&peer->service_conn_lock);
+ write_sequnlock(&peer->service_conn_lock);
/* We should not be able to get here. rxrpc_incoming_connection() is
* called in a non-reentrant context, so there can't be a race to
* insert a new connection.
@@ -125,7 +125,7 @@ replace_old_connection:
struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxnet,
gfp_t gfp)
{
- struct rxrpc_connection *conn = rxrpc_alloc_connection(gfp);
+ struct rxrpc_connection *conn = rxrpc_alloc_connection(rxnet, gfp);
if (conn) {
/* We maintain an extra ref on the connection whilst it is on
@@ -133,7 +133,8 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
*/
conn->state = RXRPC_CONN_SERVICE_PREALLOC;
refcount_set(&conn->ref, 2);
- conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle);
+ conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle,
+ rxrpc_bundle_get_service_conn);
atomic_inc(&rxnet->nr_conns);
write_lock(&rxnet->conn_lock);
@@ -141,9 +142,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
write_unlock(&rxnet->conn_lock);
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service,
- refcount_read(&conn->ref),
- __builtin_return_address(0));
+ rxrpc_see_connection(conn, rxrpc_conn_new_service);
}
return conn;
@@ -164,7 +163,7 @@ void rxrpc_new_incoming_connection(struct rxrpc_sock *rx,
conn->proto.epoch = sp->hdr.epoch;
conn->proto.cid = sp->hdr.cid & RXRPC_CIDMASK;
- conn->params.service_id = sp->hdr.serviceId;
+ conn->orig_service_id = sp->hdr.serviceId;
conn->service_id = sp->hdr.serviceId;
conn->security_ix = sp->hdr.securityIndex;
conn->out_clientflag = 0;
@@ -182,10 +181,10 @@ void rxrpc_new_incoming_connection(struct rxrpc_sock *rx,
conn->service_id == rx->service_upgrade.from)
conn->service_id = rx->service_upgrade.to;
- /* Make the connection a target for incoming packets. */
- rxrpc_publish_service_conn(conn->params.peer, conn);
+ atomic_set(&conn->active, 1);
- _net("CONNECTION new %d {%x}", conn->debug_id, conn->proto.cid);
+ /* Make the connection a target for incoming packets. */
+ rxrpc_publish_service_conn(conn->peer, conn);
}
/*
@@ -194,10 +193,10 @@ void rxrpc_new_incoming_connection(struct rxrpc_sock *rx,
*/
void rxrpc_unpublish_service_conn(struct rxrpc_connection *conn)
{
- struct rxrpc_peer *peer = conn->params.peer;
+ struct rxrpc_peer *peer = conn->peer;
- write_seqlock_bh(&peer->service_conn_lock);
+ write_seqlock(&peer->service_conn_lock);
if (test_and_clear_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags))
rb_erase(&conn->service_node, &peer->service_conns);
- write_sequnlock_bh(&peer->service_conn_lock);
+ write_sequnlock(&peer->service_conn_lock);
}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index bdf70b81addc..d0e20e946e48 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/* RxRPC packet reception
+/* Processing of received RxRPC packets
*
- * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
* Written by David Howells ([email protected])
*/
@@ -12,10 +12,8 @@
static void rxrpc_proto_abort(const char *why,
struct rxrpc_call *call, rxrpc_seq_t seq)
{
- if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG)) {
- set_bit(RXRPC_CALL_EV_ABORT, &call->events);
- rxrpc_queue_call(call);
- }
+ if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG))
+ rxrpc_send_abort_packet(call);
}
/*
@@ -58,25 +56,6 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
summary->cumulative_acks = cumulative_acks;
summary->dup_acks = call->cong_dup_acks;
- /* If we haven't transmitted anything for >1RTT, we should reset the
- * congestion management state.
- */
- if ((call->cong_mode == RXRPC_CALL_SLOW_START ||
- call->cong_mode == RXRPC_CALL_CONGEST_AVOIDANCE) &&
- ktime_before(ktime_add_us(call->tx_last_sent,
- call->peer->srtt_us >> 3),
- ktime_get_real())
- ) {
- change = rxrpc_cong_idle_reset;
- summary->mode = RXRPC_CALL_SLOW_START;
- if (RXRPC_TX_SMSS > 2190)
- summary->cwnd = 2;
- else if (RXRPC_TX_SMSS > 1095)
- summary->cwnd = 3;
- else
- summary->cwnd = 4;
- }
-
switch (call->cong_mode) {
case RXRPC_CALL_SLOW_START:
if (summary->saw_nacks)
@@ -174,8 +153,8 @@ out_no_clear_ca:
call->cong_cwnd = cwnd;
call->cong_cumul_acks = cumulative_acks;
trace_rxrpc_congest(call, summary, acked_serial, change);
- if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
- rxrpc_queue_call(call);
+ if (resend)
+ rxrpc_resend(call, skb);
return;
packet_loss_detected:
@@ -197,6 +176,33 @@ send_extra_data:
}
/*
+ * Degrade the congestion window if we haven't transmitted a packet for >1RTT.
+ */
+void rxrpc_congestion_degrade(struct rxrpc_call *call)
+{
+ ktime_t rtt, now;
+
+ if (call->cong_mode != RXRPC_CALL_SLOW_START &&
+ call->cong_mode != RXRPC_CALL_CONGEST_AVOIDANCE)
+ return;
+ if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
+ return;
+
+ rtt = ns_to_ktime(call->peer->srtt_us * (1000 / 8));
+ now = ktime_get_real();
+ if (!ktime_before(ktime_add(call->tx_last_sent, rtt), now))
+ return;
+
+ trace_rxrpc_reset_cwnd(call, now);
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_cwnd_reset);
+ call->tx_last_sent = now;
+ call->cong_mode = RXRPC_CALL_SLOW_START;
+ call->cong_ssthresh = max_t(unsigned int, call->cong_ssthresh,
+ call->cong_cwnd * 3 / 4);
+ call->cong_cwnd = max_t(unsigned int, call->cong_cwnd / 2, RXRPC_MIN_CWND);
+}
+
+/*
* Apply a hard ACK by advancing the Tx window.
*/
static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
@@ -338,7 +344,8 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb,
/*
* Process a DATA packet.
*/
-static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb)
+static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
+ bool *_notify)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct sk_buff *oos;
@@ -361,7 +368,7 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb)
if (test_and_set_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
seq + 1 != wtop) {
rxrpc_proto_abort("LSN", call, seq);
- goto err_free;
+ return;
}
} else {
if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
@@ -369,7 +376,7 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb)
pr_warn("Packet beyond last: c=%x q=%x window=%x-%x wlimit=%x\n",
call->debug_id, seq, window, wtop, wlimit);
rxrpc_proto_abort("LSA", call, seq);
- goto err_free;
+ return;
}
}
@@ -397,14 +404,18 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb)
/* Send an immediate ACK if we fill in a hole */
else if (!skb_queue_empty(&call->rx_oos_queue))
ack_reason = RXRPC_ACK_DELAY;
+ else
+ atomic_inc_return(&call->ackr_nr_unacked);
window++;
if (after(window, wtop))
wtop = window;
+ rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg);
+
spin_lock(&call->recvmsg_queue.lock);
rxrpc_input_queue_data(call, skb, window, wtop, rxrpc_receive_queue);
- skb = NULL;
+ *_notify = true;
while ((oos = skb_peek(&call->rx_oos_queue))) {
struct rxrpc_skb_priv *osp = rxrpc_skb(oos);
@@ -456,36 +467,26 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb)
struct rxrpc_skb_priv *osp = rxrpc_skb(oos);
if (after(osp->hdr.seq, seq)) {
+ rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg_oos);
__skb_queue_before(&call->rx_oos_queue, oos, skb);
goto oos_queued;
}
}
+ rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg_oos);
__skb_queue_tail(&call->rx_oos_queue, skb);
oos_queued:
trace_rxrpc_receive(call, last ? rxrpc_receive_oos_last : rxrpc_receive_oos,
sp->hdr.serial, sp->hdr.seq);
- skb = NULL;
}
send_ack:
- if (ack_reason < 0 &&
- atomic_inc_return(&call->ackr_nr_unacked) > 2 &&
- test_and_set_bit(RXRPC_CALL_IDLE_ACK_PENDING, &call->flags)) {
- ack_reason = RXRPC_ACK_IDLE;
- } else if (ack_reason >= 0) {
- set_bit(RXRPC_CALL_IDLE_ACK_PENDING, &call->flags);
- }
-
if (ack_reason >= 0)
rxrpc_send_ACK(call, ack_reason, serial,
rxrpc_propose_ack_input_data);
else
rxrpc_propose_delay_ACK(call, serial,
rxrpc_propose_ack_input_data);
-
-err_free:
- rxrpc_free_skb(skb, rxrpc_skb_freed);
}
/*
@@ -498,6 +499,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
struct sk_buff *jskb;
unsigned int offset = sizeof(struct rxrpc_wire_header);
unsigned int len = skb->len - offset;
+ bool notify = false;
while (sp->hdr.flags & RXRPC_JUMBO_PACKET) {
if (len < RXRPC_JUMBO_SUBPKTLEN)
@@ -508,16 +510,17 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
&jhdr, sizeof(jhdr)) < 0)
goto protocol_error;
- jskb = skb_clone(skb, GFP_ATOMIC);
+ jskb = skb_clone(skb, GFP_NOFS);
if (!jskb) {
kdebug("couldn't clone");
return false;
}
- rxrpc_new_skb(jskb, rxrpc_skb_cloned_jumbo);
+ rxrpc_new_skb(jskb, rxrpc_skb_new_jumbo_subpacket);
jsp = rxrpc_skb(jskb);
jsp->offset = offset;
jsp->len = RXRPC_JUMBO_DATALEN;
- rxrpc_input_data_one(call, jskb);
+ rxrpc_input_data_one(call, jskb, &notify);
+ rxrpc_free_skb(jskb, rxrpc_skb_put_jumbo_subpacket);
sp->hdr.flags = jhdr.flags;
sp->hdr._rsvd = ntohs(jhdr._rsvd);
@@ -529,7 +532,11 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
sp->offset = offset;
sp->len = len;
- rxrpc_input_data_one(call, skb);
+ rxrpc_input_data_one(call, skb, &notify);
+ if (notify) {
+ trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial);
+ rxrpc_notify_socket(call);
+ }
return true;
protocol_error:
@@ -551,32 +558,9 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
atomic64_read(&call->ackr_window), call->rx_highest_seq,
skb->len, seq0);
- _proto("Rx DATA %%%u { #%u f=%02x }",
- sp->hdr.serial, seq0, sp->hdr.flags);
-
state = READ_ONCE(call->state);
- if (state >= RXRPC_CALL_COMPLETE) {
- rxrpc_free_skb(skb, rxrpc_skb_freed);
+ if (state >= RXRPC_CALL_COMPLETE)
return;
- }
-
- /* Unshare the packet so that it can be modified for in-place
- * decryption.
- */
- if (sp->hdr.securityIndex != 0) {
- struct sk_buff *nskb = skb_unshare(skb, GFP_ATOMIC);
- if (!nskb) {
- rxrpc_eaten_skb(skb, rxrpc_skb_unshared_nomem);
- return;
- }
-
- if (nskb != skb) {
- rxrpc_eaten_skb(skb, rxrpc_skb_received);
- skb = nskb;
- rxrpc_new_skb(skb, rxrpc_skb_unshared);
- sp = rxrpc_skb(skb);
- }
- }
if (state == RXRPC_CALL_SERVER_RECV_REQUEST) {
unsigned long timo = READ_ONCE(call->next_req_timo);
@@ -591,28 +575,23 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
}
}
- spin_lock(&call->input_lock);
-
/* Received data implicitly ACKs all of the request packets we sent
* when we're acting as a client.
*/
if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
!rxrpc_receiving_reply(call))
- goto out;
+ goto out_notify;
if (!rxrpc_input_split_jumbo(call, skb)) {
rxrpc_proto_abort("VLD", call, sp->hdr.seq);
- goto out;
+ goto out_notify;
}
skb = NULL;
-out:
+out_notify:
trace_rxrpc_notify_socket(call->debug_id, serial);
rxrpc_notify_socket(call);
-
- spin_unlock(&call->input_lock);
- rxrpc_free_skb(skb, rxrpc_skb_freed);
_leave(" [queued]");
}
@@ -671,32 +650,6 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call,
}
/*
- * Process the response to a ping that we sent to find out if we lost an ACK.
- *
- * If we got back a ping response that indicates a lower tx_top than what we
- * had at the time of the ping transmission, we adjudge all the DATA packets
- * sent between the response tx_top and the ping-time tx_top to have been lost.
- */
-static void rxrpc_input_check_for_lost_ack(struct rxrpc_call *call)
-{
- if (after(call->acks_lost_top, call->acks_prev_seq) &&
- !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
- rxrpc_queue_call(call);
-}
-
-/*
- * Process a ping response.
- */
-static void rxrpc_input_ping_response(struct rxrpc_call *call,
- ktime_t resp_time,
- rxrpc_serial_t acked_serial,
- rxrpc_serial_t ack_serial)
-{
- if (acked_serial == call->acks_lost_ping)
- rxrpc_input_check_for_lost_ack(call);
-}
-
-/*
* Process the extra information that may be appended to an ACK packet
*/
static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
@@ -708,11 +661,6 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
bool wake = false;
u32 rwind = ntohl(ackinfo->rwind);
- _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
- sp->hdr.serial,
- ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU),
- rwind, ntohl(ackinfo->jumbo_max));
-
if (rwind > RXRPC_TX_MAX_WINDOW)
rwind = RXRPC_TX_MAX_WINDOW;
if (call->tx_winsize != rwind) {
@@ -729,11 +677,10 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
peer = call->peer;
if (mtu < peer->maxdata) {
- spin_lock_bh(&peer->lock);
+ spin_lock(&peer->lock);
peer->maxdata = mtu;
peer->mtu = mtu + peer->hdrsize;
- spin_unlock_bh(&peer->lock);
- _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
+ spin_unlock(&peer->lock);
}
if (wake)
@@ -810,7 +757,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
struct rxrpc_ackpacket ack;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_ackinfo info;
- struct sk_buff *skb_old = NULL, *skb_put = skb;
rxrpc_serial_t ack_serial, acked_serial;
rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt;
int nr_acks, offset, ioffset;
@@ -818,10 +764,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
_enter("");
offset = sizeof(struct rxrpc_wire_header);
- if (skb_copy_bits(skb, offset, &ack, sizeof(ack)) < 0) {
- rxrpc_proto_abort("XAK", call, 0);
- goto out_not_locked;
- }
+ if (skb_copy_bits(skb, offset, &ack, sizeof(ack)) < 0)
+ return rxrpc_proto_abort("XAK", call, 0);
offset += sizeof(ack);
ack_serial = sp->hdr.serial;
@@ -855,7 +799,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
}
if (ack.reason == RXRPC_ACK_PING) {
- _proto("Rx ACK %%%u PING Request", ack_serial);
rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial,
rxrpc_propose_ack_respond_to_ping);
} else if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
@@ -895,41 +838,25 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial,
first_soft_ack, call->acks_first_seq,
prev_pkt, call->acks_prev_seq);
- goto out_not_locked;
+ return;
}
info.rxMTU = 0;
ioffset = offset + nr_acks + 3;
if (skb->len >= ioffset + sizeof(info) &&
- skb_copy_bits(skb, ioffset, &info, sizeof(info)) < 0) {
- rxrpc_proto_abort("XAI", call, 0);
- goto out_not_locked;
- }
+ skb_copy_bits(skb, ioffset, &info, sizeof(info)) < 0)
+ return rxrpc_proto_abort("XAI", call, 0);
if (nr_acks > 0)
skb_condense(skb);
- spin_lock(&call->input_lock);
-
- /* Discard any out-of-order or duplicate ACKs (inside lock). */
- if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) {
- trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial,
- first_soft_ack, call->acks_first_seq,
- prev_pkt, call->acks_prev_seq);
- goto out;
- }
call->acks_latest_ts = skb->tstamp;
-
call->acks_first_seq = first_soft_ack;
call->acks_prev_seq = prev_pkt;
switch (ack.reason) {
case RXRPC_ACK_PING:
break;
- case RXRPC_ACK_PING_RESPONSE:
- rxrpc_input_ping_response(call, skb->tstamp, acked_serial,
- ack_serial);
- fallthrough;
default:
if (after(acked_serial, call->acks_highest_serial))
call->acks_highest_serial = acked_serial;
@@ -940,10 +867,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
if (info.rxMTU)
rxrpc_input_ackinfo(call, skb, &info);
- if (first_soft_ack == 0) {
- rxrpc_proto_abort("AK0", call, 0);
- goto out;
- }
+ if (first_soft_ack == 0)
+ return rxrpc_proto_abort("AK0", call, 0);
/* Ignore ACKs unless we are or have just been transmitting. */
switch (READ_ONCE(call->state)) {
@@ -953,45 +878,27 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
case RXRPC_CALL_SERVER_AWAIT_ACK:
break;
default:
- goto out;
+ return;
}
if (before(hard_ack, call->acks_hard_ack) ||
- after(hard_ack, call->tx_top)) {
- rxrpc_proto_abort("AKW", call, 0);
- goto out;
- }
- if (nr_acks > call->tx_top - hard_ack) {
- rxrpc_proto_abort("AKN", call, 0);
- goto out;
- }
+ after(hard_ack, call->tx_top))
+ return rxrpc_proto_abort("AKW", call, 0);
+ if (nr_acks > call->tx_top - hard_ack)
+ return rxrpc_proto_abort("AKN", call, 0);
if (after(hard_ack, call->acks_hard_ack)) {
if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) {
rxrpc_end_tx_phase(call, false, "ETA");
- goto out;
+ return;
}
}
if (nr_acks > 0) {
- if (offset > (int)skb->len - nr_acks) {
- rxrpc_proto_abort("XSA", call, 0);
- goto out;
- }
-
- spin_lock(&call->acks_ack_lock);
- skb_old = call->acks_soft_tbl;
- call->acks_soft_tbl = skb;
- spin_unlock(&call->acks_ack_lock);
-
+ if (offset > (int)skb->len - nr_acks)
+ return rxrpc_proto_abort("XSA", call, 0);
rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack,
nr_acks, &summary);
- skb_put = NULL;
- } else if (call->acks_soft_tbl) {
- spin_lock(&call->acks_ack_lock);
- skb_old = call->acks_soft_tbl;
- call->acks_soft_tbl = NULL;
- spin_unlock(&call->acks_ack_lock);
}
if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) &&
@@ -1001,11 +908,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_propose_ack_ping_for_lost_reply);
rxrpc_congestion_management(call, skb, &summary, acked_serial);
-out:
- spin_unlock(&call->input_lock);
-out_not_locked:
- rxrpc_free_skb(skb_put, rxrpc_skb_freed);
- rxrpc_free_skb(skb_old, rxrpc_skb_freed);
}
/*
@@ -1014,16 +916,9 @@ out_not_locked:
static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_ack_summary summary = { 0 };
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
- _proto("Rx ACKALL %%%u", sp->hdr.serial);
-
- spin_lock(&call->input_lock);
if (rxrpc_rotate_tx_window(call, call->tx_top, &summary))
rxrpc_end_tx_phase(call, false, "ETL");
-
- spin_unlock(&call->input_lock);
}
/*
@@ -1032,35 +927,30 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- __be32 wtmp;
- u32 abort_code = RX_CALL_DEAD;
-
- _enter("");
-
- if (skb->len >= 4 &&
- skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
- &wtmp, sizeof(wtmp)) >= 0)
- abort_code = ntohl(wtmp);
- trace_rxrpc_rx_abort(call, sp->hdr.serial, abort_code);
-
- _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code);
+ trace_rxrpc_rx_abort(call, sp->hdr.serial, skb->priority);
rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
- abort_code, -ECONNABORTED);
+ skb->priority, -ECONNABORTED);
}
/*
* Process an incoming call packet.
*/
-static void rxrpc_input_call_packet(struct rxrpc_call *call,
- struct sk_buff *skb)
+void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
unsigned long timo;
_enter("%p,%p", call, skb);
+ if (sp->hdr.serviceId != call->dest_srx.srx_service)
+ call->dest_srx.srx_service = sp->hdr.serviceId;
+ if ((int)sp->hdr.serial - (int)call->rx_serial > 0)
+ call->rx_serial = sp->hdr.serial;
+ if (!test_bit(RXRPC_CALL_RX_HEARD, &call->flags))
+ set_bit(RXRPC_CALL_RX_HEARD, &call->flags);
+
timo = READ_ONCE(call->next_rx_timo);
if (timo) {
unsigned long now = jiffies, expect_rx_by;
@@ -1074,15 +964,13 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
switch (sp->hdr.type) {
case RXRPC_PACKET_TYPE_DATA:
rxrpc_input_data(call, skb);
- goto no_free;
+ break;
case RXRPC_PACKET_TYPE_ACK:
rxrpc_input_ack(call, skb);
- goto no_free;
+ break;
case RXRPC_PACKET_TYPE_BUSY:
- _proto("Rx BUSY %%%u", sp->hdr.serial);
-
/* Just ignore BUSY packets from the server; the retry and
* lifespan timers will take care of business. BUSY packets
* from the client don't make sense.
@@ -1100,10 +988,6 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
default:
break;
}
-
- rxrpc_free_skb(skb, rxrpc_skb_freed);
-no_free:
- _leave("");
}
/*
@@ -1112,10 +996,10 @@ no_free:
*
* TODO: If callNumber > call_id + 1, renegotiate security.
*/
-static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx,
- struct rxrpc_connection *conn,
- struct rxrpc_call *call)
+void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb)
{
+ struct rxrpc_connection *conn = call->conn;
+
switch (READ_ONCE(call->state)) {
case RXRPC_CALL_SERVER_AWAIT_ACK:
rxrpc_call_completed(call);
@@ -1123,360 +1007,15 @@ static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx,
case RXRPC_CALL_COMPLETE:
break;
default:
- if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN)) {
- set_bit(RXRPC_CALL_EV_ABORT, &call->events);
- rxrpc_queue_call(call);
- }
+ if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN))
+ rxrpc_send_abort_packet(call);
trace_rxrpc_improper_term(call);
break;
}
- spin_lock(&rx->incoming_lock);
- __rxrpc_disconnect_call(conn, call);
- spin_unlock(&rx->incoming_lock);
-}
-
-/*
- * post connection-level events to the connection
- * - this includes challenges, responses, some aborts and call terminal packet
- * retransmission.
- */
-static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
- struct sk_buff *skb)
-{
- _enter("%p,%p", conn, skb);
-
- skb_queue_tail(&conn->rx_queue, skb);
- rxrpc_queue_conn(conn);
-}
-
-/*
- * post endpoint-level events to the local endpoint
- * - this includes debug and version messages
- */
-static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
- struct sk_buff *skb)
-{
- _enter("%p,%p", local, skb);
-
- if (rxrpc_get_local_maybe(local)) {
- skb_queue_tail(&local->event_queue, skb);
- rxrpc_queue_local(local);
- } else {
- rxrpc_free_skb(skb, rxrpc_skb_freed);
- }
-}
-
-/*
- * put a packet up for transport-level abort
- */
-static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
-{
- if (rxrpc_get_local_maybe(local)) {
- skb_queue_tail(&local->reject_queue, skb);
- rxrpc_queue_local(local);
- } else {
- rxrpc_free_skb(skb, rxrpc_skb_freed);
- }
-}
-
-/*
- * Extract the wire header from a packet and translate the byte order.
- */
-static noinline
-int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
-{
- struct rxrpc_wire_header whdr;
-
- /* dig out the RxRPC connection details */
- if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) {
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
- tracepoint_string("bad_hdr"));
- return -EBADMSG;
- }
-
- memset(sp, 0, sizeof(*sp));
- sp->hdr.epoch = ntohl(whdr.epoch);
- sp->hdr.cid = ntohl(whdr.cid);
- sp->hdr.callNumber = ntohl(whdr.callNumber);
- sp->hdr.seq = ntohl(whdr.seq);
- sp->hdr.serial = ntohl(whdr.serial);
- sp->hdr.flags = whdr.flags;
- sp->hdr.type = whdr.type;
- sp->hdr.userStatus = whdr.userStatus;
- sp->hdr.securityIndex = whdr.securityIndex;
- sp->hdr._rsvd = ntohs(whdr._rsvd);
- sp->hdr.serviceId = ntohs(whdr.serviceId);
- return 0;
-}
-
-/*
- * handle data received on the local endpoint
- * - may be called in interrupt context
- *
- * [!] Note that as this is called from the encap_rcv hook, the socket is not
- * held locked by the caller and nothing prevents sk_user_data on the UDP from
- * being cleared in the middle of processing this function.
- *
- * Called with the RCU read lock held from the IP layer via UDP.
- */
-int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
-{
- struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk);
- struct rxrpc_connection *conn;
- struct rxrpc_channel *chan;
- struct rxrpc_call *call = NULL;
- struct rxrpc_skb_priv *sp;
- struct rxrpc_peer *peer = NULL;
- struct rxrpc_sock *rx = NULL;
- unsigned int channel;
-
- _enter("%p", udp_sk);
-
- if (unlikely(!local)) {
- kfree_skb(skb);
- return 0;
- }
- if (skb->tstamp == 0)
- skb->tstamp = ktime_get_real();
-
- rxrpc_new_skb(skb, rxrpc_skb_received);
-
- skb_pull(skb, sizeof(struct udphdr));
-
- /* The UDP protocol already released all skb resources;
- * we are free to add our own data there.
- */
- sp = rxrpc_skb(skb);
-
- /* dig out the RxRPC connection details */
- if (rxrpc_extract_header(sp, skb) < 0)
- goto bad_message;
-
- if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
- static int lose;
- if ((lose++ & 7) == 7) {
- trace_rxrpc_rx_lose(sp);
- rxrpc_free_skb(skb, rxrpc_skb_lost);
- return 0;
- }
- }
-
- if (skb->tstamp == 0)
- skb->tstamp = ktime_get_real();
- trace_rxrpc_rx_packet(sp);
-
- switch (sp->hdr.type) {
- case RXRPC_PACKET_TYPE_VERSION:
- if (rxrpc_to_client(sp))
- goto discard;
- rxrpc_post_packet_to_local(local, skb);
- goto out;
-
- case RXRPC_PACKET_TYPE_BUSY:
- if (rxrpc_to_server(sp))
- goto discard;
- fallthrough;
- case RXRPC_PACKET_TYPE_ACK:
- case RXRPC_PACKET_TYPE_ACKALL:
- if (sp->hdr.callNumber == 0)
- goto bad_message;
- fallthrough;
- case RXRPC_PACKET_TYPE_ABORT:
- break;
-
- case RXRPC_PACKET_TYPE_DATA:
- if (sp->hdr.callNumber == 0 ||
- sp->hdr.seq == 0)
- goto bad_message;
-
- /* Unshare the packet so that it can be modified for in-place
- * decryption.
- */
- if (sp->hdr.securityIndex != 0) {
- struct sk_buff *nskb = skb_unshare(skb, GFP_ATOMIC);
- if (!nskb) {
- rxrpc_eaten_skb(skb, rxrpc_skb_unshared_nomem);
- goto out;
- }
-
- if (nskb != skb) {
- rxrpc_eaten_skb(skb, rxrpc_skb_received);
- skb = nskb;
- rxrpc_new_skb(skb, rxrpc_skb_unshared);
- sp = rxrpc_skb(skb);
- }
- }
- break;
-
- case RXRPC_PACKET_TYPE_CHALLENGE:
- if (rxrpc_to_server(sp))
- goto discard;
- break;
- case RXRPC_PACKET_TYPE_RESPONSE:
- if (rxrpc_to_client(sp))
- goto discard;
- break;
-
- /* Packet types 9-11 should just be ignored. */
- case RXRPC_PACKET_TYPE_PARAMS:
- case RXRPC_PACKET_TYPE_10:
- case RXRPC_PACKET_TYPE_11:
- goto discard;
-
- default:
- _proto("Rx Bad Packet Type %u", sp->hdr.type);
- goto bad_message;
- }
-
- if (sp->hdr.serviceId == 0)
- goto bad_message;
-
- if (rxrpc_to_server(sp)) {
- /* Weed out packets to services we're not offering. Packets
- * that would begin a call are explicitly rejected and the rest
- * are just discarded.
- */
- rx = rcu_dereference(local->service);
- if (!rx || (sp->hdr.serviceId != rx->srx.srx_service &&
- sp->hdr.serviceId != rx->second_service)) {
- if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
- sp->hdr.seq == 1)
- goto unsupported_service;
- goto discard;
- }
- }
-
- conn = rxrpc_find_connection_rcu(local, skb, &peer);
- if (conn) {
- if (sp->hdr.securityIndex != conn->security_ix)
- goto wrong_security;
-
- if (sp->hdr.serviceId != conn->service_id) {
- int old_id;
-
- if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags))
- goto reupgrade;
- old_id = cmpxchg(&conn->service_id, conn->params.service_id,
- sp->hdr.serviceId);
-
- if (old_id != conn->params.service_id &&
- old_id != sp->hdr.serviceId)
- goto reupgrade;
- }
-
- if (sp->hdr.callNumber == 0) {
- /* Connection-level packet */
- _debug("CONN %p {%d}", conn, conn->debug_id);
- rxrpc_post_packet_to_conn(conn, skb);
- goto out;
- }
-
- if ((int)sp->hdr.serial - (int)conn->hi_serial > 0)
- conn->hi_serial = sp->hdr.serial;
-
- /* Call-bound packets are routed by connection channel. */
- channel = sp->hdr.cid & RXRPC_CHANNELMASK;
- chan = &conn->channels[channel];
-
- /* Ignore really old calls */
- if (sp->hdr.callNumber < chan->last_call)
- goto discard;
-
- if (sp->hdr.callNumber == chan->last_call) {
- if (chan->call ||
- sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)
- goto discard;
-
- /* For the previous service call, if completed
- * successfully, we discard all further packets.
- */
- if (rxrpc_conn_is_service(conn) &&
- chan->last_type == RXRPC_PACKET_TYPE_ACK)
- goto discard;
-
- /* But otherwise we need to retransmit the final packet
- * from data cached in the connection record.
- */
- if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA)
- trace_rxrpc_rx_data(chan->call_debug_id,
- sp->hdr.seq,
- sp->hdr.serial,
- sp->hdr.flags);
- rxrpc_post_packet_to_conn(conn, skb);
- goto out;
- }
-
- call = rcu_dereference(chan->call);
-
- if (sp->hdr.callNumber > chan->call_id) {
- if (rxrpc_to_client(sp))
- goto reject_packet;
- if (call)
- rxrpc_input_implicit_end_call(rx, conn, call);
- call = NULL;
- }
-
- if (call) {
- if (sp->hdr.serviceId != call->service_id)
- call->service_id = sp->hdr.serviceId;
- if ((int)sp->hdr.serial - (int)call->rx_serial > 0)
- call->rx_serial = sp->hdr.serial;
- if (!test_bit(RXRPC_CALL_RX_HEARD, &call->flags))
- set_bit(RXRPC_CALL_RX_HEARD, &call->flags);
- }
- }
-
- if (!call || refcount_read(&call->ref) == 0) {
- if (rxrpc_to_client(sp) ||
- sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
- goto bad_message;
- if (sp->hdr.seq != 1)
- goto discard;
- call = rxrpc_new_incoming_call(local, rx, skb);
- if (!call)
- goto reject_packet;
- }
-
- /* Process a call packet; this either discards or passes on the ref
- * elsewhere.
- */
- rxrpc_input_call_packet(call, skb);
- goto out;
+ rxrpc_input_call_event(call, skb);
-discard:
- rxrpc_free_skb(skb, rxrpc_skb_freed);
-out:
- trace_rxrpc_rx_done(0, 0);
- return 0;
-
-wrong_security:
- trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RXKADINCONSISTENCY, EBADMSG);
- skb->priority = RXKADINCONSISTENCY;
- goto post_abort;
-
-unsupported_service:
- trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_INVALID_OPERATION, EOPNOTSUPP);
- skb->priority = RX_INVALID_OPERATION;
- goto post_abort;
-
-reupgrade:
- trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_PROTOCOL_ERROR, EBADMSG);
- goto protocol_error;
-
-bad_message:
- trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_PROTOCOL_ERROR, EBADMSG);
-protocol_error:
- skb->priority = RX_PROTOCOL_ERROR;
-post_abort:
- skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
-reject_packet:
- trace_rxrpc_rx_done(skb->mark, skb->priority);
- rxrpc_reject_packet(local, skb);
- _leave(" [badmsg]");
- return 0;
+ spin_lock(&conn->bundle->channel_lock);
+ __rxrpc_disconnect_call(conn, call);
+ spin_unlock(&conn->bundle->channel_lock);
}
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
new file mode 100644
index 000000000000..d83ae3193032
--- /dev/null
+++ b/net/rxrpc/io_thread.c
@@ -0,0 +1,496 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* RxRPC packet reception
+ *
+ * Copyright (C) 2007, 2016, 2022 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells ([email protected])
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include "ar-internal.h"
+
+static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
+ struct sockaddr_rxrpc *peer_srx,
+ struct sk_buff *skb);
+
+/*
+ * handle data received on the local endpoint
+ * - may be called in interrupt context
+ *
+ * [!] Note that as this is called from the encap_rcv hook, the socket is not
+ * held locked by the caller and nothing prevents sk_user_data on the UDP from
+ * being cleared in the middle of processing this function.
+ *
+ * Called with the RCU read lock held from the IP layer via UDP.
+ */
+int rxrpc_encap_rcv(struct sock *udp_sk, struct sk_buff *skb)
+{
+ struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk);
+
+ if (unlikely(!local)) {
+ kfree_skb(skb);
+ return 0;
+ }
+ if (skb->tstamp == 0)
+ skb->tstamp = ktime_get_real();
+
+ skb->mark = RXRPC_SKB_MARK_PACKET;
+ rxrpc_new_skb(skb, rxrpc_skb_new_encap_rcv);
+ skb_queue_tail(&local->rx_queue, skb);
+ rxrpc_wake_up_io_thread(local);
+ return 0;
+}
+
+/*
+ * Handle an error received on the local endpoint.
+ */
+void rxrpc_error_report(struct sock *sk)
+{
+ struct rxrpc_local *local;
+ struct sk_buff *skb;
+
+ rcu_read_lock();
+ local = rcu_dereference_sk_user_data(sk);
+ if (unlikely(!local)) {
+ rcu_read_unlock();
+ return;
+ }
+
+ while ((skb = skb_dequeue(&sk->sk_error_queue))) {
+ skb->mark = RXRPC_SKB_MARK_ERROR;
+ rxrpc_new_skb(skb, rxrpc_skb_new_error_report);
+ skb_queue_tail(&local->rx_queue, skb);
+ }
+
+ rxrpc_wake_up_io_thread(local);
+ rcu_read_unlock();
+}
+
+/*
+ * Process event packets targeted at a local endpoint.
+ */
+static void rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ char v;
+
+ _enter("");
+
+ rxrpc_see_skb(skb, rxrpc_skb_see_version);
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), &v, 1) >= 0) {
+ if (v == 0)
+ rxrpc_send_version_request(local, &sp->hdr, skb);
+ }
+}
+
+/*
+ * Extract the wire header from a packet and translate the byte order.
+ */
+static noinline
+int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
+{
+ struct rxrpc_wire_header whdr;
+
+ /* dig out the RxRPC connection details */
+ if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) {
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
+ tracepoint_string("bad_hdr"));
+ return -EBADMSG;
+ }
+
+ memset(sp, 0, sizeof(*sp));
+ sp->hdr.epoch = ntohl(whdr.epoch);
+ sp->hdr.cid = ntohl(whdr.cid);
+ sp->hdr.callNumber = ntohl(whdr.callNumber);
+ sp->hdr.seq = ntohl(whdr.seq);
+ sp->hdr.serial = ntohl(whdr.serial);
+ sp->hdr.flags = whdr.flags;
+ sp->hdr.type = whdr.type;
+ sp->hdr.userStatus = whdr.userStatus;
+ sp->hdr.securityIndex = whdr.securityIndex;
+ sp->hdr._rsvd = ntohs(whdr._rsvd);
+ sp->hdr.serviceId = ntohs(whdr.serviceId);
+ return 0;
+}
+
+/*
+ * Extract the abort code from an ABORT packet and stash it in skb->priority.
+ */
+static bool rxrpc_extract_abort(struct sk_buff *skb)
+{
+ __be32 wtmp;
+
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ &wtmp, sizeof(wtmp)) < 0)
+ return false;
+ skb->priority = ntohl(wtmp);
+ return true;
+}
+
+/*
+ * Process packets received on the local endpoint
+ */
+static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
+{
+ struct rxrpc_connection *conn;
+ struct sockaddr_rxrpc peer_srx;
+ struct rxrpc_skb_priv *sp;
+ struct rxrpc_peer *peer = NULL;
+ struct sk_buff *skb = *_skb;
+ int ret = 0;
+
+ skb_pull(skb, sizeof(struct udphdr));
+
+ sp = rxrpc_skb(skb);
+
+ /* dig out the RxRPC connection details */
+ if (rxrpc_extract_header(sp, skb) < 0)
+ goto bad_message;
+
+ if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
+ static int lose;
+ if ((lose++ & 7) == 7) {
+ trace_rxrpc_rx_lose(sp);
+ return 0;
+ }
+ }
+
+ trace_rxrpc_rx_packet(sp);
+
+ switch (sp->hdr.type) {
+ case RXRPC_PACKET_TYPE_VERSION:
+ if (rxrpc_to_client(sp))
+ return 0;
+ rxrpc_input_version(local, skb);
+ return 0;
+
+ case RXRPC_PACKET_TYPE_BUSY:
+ if (rxrpc_to_server(sp))
+ return 0;
+ fallthrough;
+ case RXRPC_PACKET_TYPE_ACK:
+ case RXRPC_PACKET_TYPE_ACKALL:
+ if (sp->hdr.callNumber == 0)
+ goto bad_message;
+ break;
+ case RXRPC_PACKET_TYPE_ABORT:
+ if (!rxrpc_extract_abort(skb))
+ return 0; /* Just discard if malformed */
+ break;
+
+ case RXRPC_PACKET_TYPE_DATA:
+ if (sp->hdr.callNumber == 0 ||
+ sp->hdr.seq == 0)
+ goto bad_message;
+
+ /* Unshare the packet so that it can be modified for in-place
+ * decryption.
+ */
+ if (sp->hdr.securityIndex != 0) {
+ skb = skb_unshare(skb, GFP_ATOMIC);
+ if (!skb) {
+ rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare_nomem);
+ *_skb = NULL;
+ return 0;
+ }
+
+ if (skb != *_skb) {
+ rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare);
+ *_skb = skb;
+ rxrpc_new_skb(skb, rxrpc_skb_new_unshared);
+ sp = rxrpc_skb(skb);
+ }
+ }
+ break;
+
+ case RXRPC_PACKET_TYPE_CHALLENGE:
+ if (rxrpc_to_server(sp))
+ return 0;
+ break;
+ case RXRPC_PACKET_TYPE_RESPONSE:
+ if (rxrpc_to_client(sp))
+ return 0;
+ break;
+
+ /* Packet types 9-11 should just be ignored. */
+ case RXRPC_PACKET_TYPE_PARAMS:
+ case RXRPC_PACKET_TYPE_10:
+ case RXRPC_PACKET_TYPE_11:
+ return 0;
+
+ default:
+ goto bad_message;
+ }
+
+ if (sp->hdr.serviceId == 0)
+ goto bad_message;
+
+ if (WARN_ON_ONCE(rxrpc_extract_addr_from_skb(&peer_srx, skb) < 0))
+ return true; /* Unsupported address type - discard. */
+
+ if (peer_srx.transport.family != local->srx.transport.family &&
+ (peer_srx.transport.family == AF_INET &&
+ local->srx.transport.family != AF_INET6)) {
+ pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n",
+ peer_srx.transport.family,
+ local->srx.transport.family);
+ return true; /* Wrong address type - discard. */
+ }
+
+ if (rxrpc_to_client(sp)) {
+ rcu_read_lock();
+ conn = rxrpc_find_client_connection_rcu(local, &peer_srx, skb);
+ conn = rxrpc_get_connection_maybe(conn, rxrpc_conn_get_call_input);
+ rcu_read_unlock();
+ if (!conn) {
+ trace_rxrpc_abort(0, "NCC", sp->hdr.cid,
+ sp->hdr.callNumber, sp->hdr.seq,
+ RXKADINCONSISTENCY, EBADMSG);
+ goto protocol_error;
+ }
+
+ ret = rxrpc_input_packet_on_conn(conn, &peer_srx, skb);
+ rxrpc_put_connection(conn, rxrpc_conn_put_call_input);
+ return ret;
+ }
+
+ /* We need to look up service connections by the full protocol
+ * parameter set. We look up the peer first as an intermediate step
+ * and then the connection from the peer's tree.
+ */
+ rcu_read_lock();
+
+ peer = rxrpc_lookup_peer_rcu(local, &peer_srx);
+ if (!peer) {
+ rcu_read_unlock();
+ return rxrpc_new_incoming_call(local, NULL, NULL, &peer_srx, skb);
+ }
+
+ conn = rxrpc_find_service_conn_rcu(peer, skb);
+ conn = rxrpc_get_connection_maybe(conn, rxrpc_conn_get_call_input);
+ if (conn) {
+ rcu_read_unlock();
+ ret = rxrpc_input_packet_on_conn(conn, &peer_srx, skb);
+ rxrpc_put_connection(conn, rxrpc_conn_put_call_input);
+ return ret;
+ }
+
+ peer = rxrpc_get_peer_maybe(peer, rxrpc_peer_get_input);
+ rcu_read_unlock();
+
+ ret = rxrpc_new_incoming_call(local, peer, NULL, &peer_srx, skb);
+ rxrpc_put_peer(peer, rxrpc_peer_put_input);
+ if (ret < 0)
+ goto reject_packet;
+ return 0;
+
+bad_message:
+ trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ RX_PROTOCOL_ERROR, EBADMSG);
+protocol_error:
+ skb->priority = RX_PROTOCOL_ERROR;
+ skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
+reject_packet:
+ rxrpc_reject_packet(local, skb);
+ return ret;
+}
+
+/*
+ * Deal with a packet that's associated with an extant connection.
+ */
+static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
+ struct sockaddr_rxrpc *peer_srx,
+ struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_channel *chan;
+ struct rxrpc_call *call = NULL;
+ unsigned int channel;
+
+ if (sp->hdr.securityIndex != conn->security_ix)
+ goto wrong_security;
+
+ if (sp->hdr.serviceId != conn->service_id) {
+ int old_id;
+
+ if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags))
+ goto reupgrade;
+ old_id = cmpxchg(&conn->service_id, conn->orig_service_id,
+ sp->hdr.serviceId);
+
+ if (old_id != conn->orig_service_id &&
+ old_id != sp->hdr.serviceId)
+ goto reupgrade;
+ }
+
+ if (after(sp->hdr.serial, conn->hi_serial))
+ conn->hi_serial = sp->hdr.serial;
+
+ /* It's a connection-level packet if the call number is 0. */
+ if (sp->hdr.callNumber == 0)
+ return rxrpc_input_conn_packet(conn, skb);
+
+ /* Call-bound packets are routed by connection channel. */
+ channel = sp->hdr.cid & RXRPC_CHANNELMASK;
+ chan = &conn->channels[channel];
+
+ /* Ignore really old calls */
+ if (sp->hdr.callNumber < chan->last_call)
+ return 0;
+
+ if (sp->hdr.callNumber == chan->last_call) {
+ if (chan->call ||
+ sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)
+ return 0;
+
+ /* For the previous service call, if completed successfully, we
+ * discard all further packets.
+ */
+ if (rxrpc_conn_is_service(conn) &&
+ chan->last_type == RXRPC_PACKET_TYPE_ACK)
+ return 0;
+
+ /* But otherwise we need to retransmit the final packet from
+ * data cached in the connection record.
+ */
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA)
+ trace_rxrpc_rx_data(chan->call_debug_id,
+ sp->hdr.seq,
+ sp->hdr.serial,
+ sp->hdr.flags);
+ rxrpc_input_conn_packet(conn, skb);
+ return 0;
+ }
+
+ rcu_read_lock();
+ call = rxrpc_try_get_call(rcu_dereference(chan->call),
+ rxrpc_call_get_input);
+ rcu_read_unlock();
+
+ if (sp->hdr.callNumber > chan->call_id) {
+ if (rxrpc_to_client(sp)) {
+ rxrpc_put_call(call, rxrpc_call_put_input);
+ goto reject_packet;
+ }
+
+ if (call) {
+ rxrpc_implicit_end_call(call, skb);
+ rxrpc_put_call(call, rxrpc_call_put_input);
+ call = NULL;
+ }
+ }
+
+ if (!call) {
+ if (rxrpc_to_client(sp))
+ goto bad_message;
+ if (rxrpc_new_incoming_call(conn->local, conn->peer, conn,
+ peer_srx, skb))
+ return 0;
+ goto reject_packet;
+ }
+
+ rxrpc_input_call_event(call, skb);
+ rxrpc_put_call(call, rxrpc_call_put_input);
+ return 0;
+
+wrong_security:
+ trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ RXKADINCONSISTENCY, EBADMSG);
+ skb->priority = RXKADINCONSISTENCY;
+ goto post_abort;
+
+reupgrade:
+ trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ RX_PROTOCOL_ERROR, EBADMSG);
+ goto protocol_error;
+
+bad_message:
+ trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ RX_PROTOCOL_ERROR, EBADMSG);
+protocol_error:
+ skb->priority = RX_PROTOCOL_ERROR;
+post_abort:
+ skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
+reject_packet:
+ rxrpc_reject_packet(conn->local, skb);
+ return 0;
+}
+
+/*
+ * I/O and event handling thread.
+ */
+int rxrpc_io_thread(void *data)
+{
+ struct sk_buff_head rx_queue;
+ struct rxrpc_local *local = data;
+ struct rxrpc_call *call;
+ struct sk_buff *skb;
+
+ skb_queue_head_init(&rx_queue);
+
+ set_user_nice(current, MIN_NICE);
+
+ for (;;) {
+ rxrpc_inc_stat(local->rxnet, stat_io_loop);
+
+ /* Deal with calls that want immediate attention. */
+ if ((call = list_first_entry_or_null(&local->call_attend_q,
+ struct rxrpc_call,
+ attend_link))) {
+ spin_lock_bh(&local->lock);
+ list_del_init(&call->attend_link);
+ spin_unlock_bh(&local->lock);
+
+ trace_rxrpc_call_poked(call);
+ rxrpc_input_call_event(call, NULL);
+ rxrpc_put_call(call, rxrpc_call_put_poke);
+ continue;
+ }
+
+ /* Process received packets and errors. */
+ if ((skb = __skb_dequeue(&rx_queue))) {
+ switch (skb->mark) {
+ case RXRPC_SKB_MARK_PACKET:
+ skb->priority = 0;
+ rxrpc_input_packet(local, &skb);
+ trace_rxrpc_rx_done(skb->mark, skb->priority);
+ rxrpc_free_skb(skb, rxrpc_skb_put_input);
+ break;
+ case RXRPC_SKB_MARK_ERROR:
+ rxrpc_input_error(local, skb);
+ rxrpc_free_skb(skb, rxrpc_skb_put_error_report);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ rxrpc_free_skb(skb, rxrpc_skb_put_unknown);
+ break;
+ }
+ continue;
+ }
+
+ if (!skb_queue_empty(&local->rx_queue)) {
+ spin_lock_irq(&local->rx_queue.lock);
+ skb_queue_splice_tail_init(&local->rx_queue, &rx_queue);
+ spin_unlock_irq(&local->rx_queue.lock);
+ continue;
+ }
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (!skb_queue_empty(&local->rx_queue) ||
+ !list_empty(&local->call_attend_q)) {
+ __set_current_state(TASK_RUNNING);
+ continue;
+ }
+
+ if (kthread_should_stop())
+ break;
+ schedule();
+ }
+
+ __set_current_state(TASK_RUNNING);
+ rxrpc_see_local(local, rxrpc_local_stop);
+ rxrpc_destroy_local(local);
+ local->io_thread = NULL;
+ rxrpc_see_local(local, rxrpc_local_stopped);
+ return 0;
+}
diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index 8d2073e0e3da..8d53aded09c4 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c
@@ -513,7 +513,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
if (ret < 0)
goto error;
- conn->params.key = key;
+ conn->key = key;
_leave(" = 0 [%d]", key_serial(key));
return 0;
@@ -602,7 +602,8 @@ static long rxrpc_read(const struct key *key,
}
_debug("token[%u]: toksize=%u", ntoks, toksize);
- ASSERTCMP(toksize, <=, AFSTOKEN_LENGTH_MAX);
+ if (WARN_ON(toksize > AFSTOKEN_LENGTH_MAX))
+ return -EIO;
toksizes[ntoks++] = toksize;
size += toksize + 4; /* each token has a length word */
@@ -679,8 +680,9 @@ static long rxrpc_read(const struct key *key,
return -ENOPKG;
}
- ASSERTCMP((unsigned long)xdr - (unsigned long)oldxdr, ==,
- toksize);
+ if (WARN_ON((unsigned long)xdr - (unsigned long)oldxdr ==
+ toksize))
+ return -EIO;
}
#undef ENCODE_STR
@@ -688,8 +690,10 @@ static long rxrpc_read(const struct key *key,
#undef ENCODE64
#undef ENCODE
- ASSERTCMP(tok, ==, ntoks);
- ASSERTCMP((char __user *) xdr - buffer, ==, size);
+ if (WARN_ON(tok != ntoks))
+ return -EIO;
+ if (WARN_ON((unsigned long)xdr - (unsigned long)buffer != size))
+ return -EIO;
_leave(" = %zu", size);
return size;
}
diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
index 19e929c7c38b..5e69ea6b233d 100644
--- a/net/rxrpc/local_event.c
+++ b/net/rxrpc/local_event.c
@@ -21,9 +21,9 @@ static const char rxrpc_version_string[65] = "linux-" UTS_RELEASE " AF_RXRPC";
/*
* Reply to a version request
*/
-static void rxrpc_send_version_request(struct rxrpc_local *local,
- struct rxrpc_host_header *hdr,
- struct sk_buff *skb)
+void rxrpc_send_version_request(struct rxrpc_local *local,
+ struct rxrpc_host_header *hdr,
+ struct sk_buff *skb)
{
struct rxrpc_wire_header whdr;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
@@ -63,8 +63,6 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
len = iov[0].iov_len + iov[1].iov_len;
- _proto("Tx VERSION (reply)");
-
ret = kernel_sendmsg(local->socket, &msg, iov, 2, len);
if (ret < 0)
trace_rxrpc_tx_fail(local->debug_id, 0, ret,
@@ -75,41 +73,3 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
_leave("");
}
-
-/*
- * Process event packets targeted at a local endpoint.
- */
-void rxrpc_process_local_events(struct rxrpc_local *local)
-{
- struct sk_buff *skb;
- char v;
-
- _enter("");
-
- skb = skb_dequeue(&local->event_queue);
- if (skb) {
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
- rxrpc_see_skb(skb, rxrpc_skb_seen);
- _debug("{%d},{%u}", local->debug_id, sp->hdr.type);
-
- switch (sp->hdr.type) {
- case RXRPC_PACKET_TYPE_VERSION:
- if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
- &v, 1) < 0)
- return;
- _proto("Rx VERSION { %02x }", v);
- if (v == 0)
- rxrpc_send_version_request(local, &sp->hdr, skb);
- break;
-
- default:
- /* Just ignore anything we don't understand */
- break;
- }
-
- rxrpc_free_skb(skb, rxrpc_skb_freed);
- }
-
- _leave("");
-}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index a943fdf91e24..44222923c0d1 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -20,7 +20,6 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
-static void rxrpc_local_processor(struct work_struct *);
static void rxrpc_local_rcu(struct rcu_head *);
/*
@@ -97,12 +96,9 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
atomic_set(&local->active_users, 1);
local->rxnet = rxnet;
INIT_HLIST_NODE(&local->link);
- INIT_WORK(&local->processor, rxrpc_local_processor);
- INIT_LIST_HEAD(&local->ack_tx_queue);
- spin_lock_init(&local->ack_tx_lock);
init_rwsem(&local->defrag_sem);
- skb_queue_head_init(&local->reject_queue);
- skb_queue_head_init(&local->event_queue);
+ skb_queue_head_init(&local->rx_queue);
+ INIT_LIST_HEAD(&local->call_attend_q);
local->client_bundles = RB_ROOT;
spin_lock_init(&local->client_bundles_lock);
spin_lock_init(&local->lock);
@@ -110,7 +106,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
local->debug_id = atomic_inc_return(&rxrpc_debug_id);
memcpy(&local->srx, srx, sizeof(*srx));
local->srx.srx_service = 0;
- trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, NULL);
+ trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, 1);
}
_leave(" = %p", local);
@@ -126,6 +122,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
struct udp_tunnel_sock_cfg tuncfg = {NULL};
struct sockaddr_rxrpc *srx = &local->srx;
struct udp_port_cfg udp_conf = {0};
+ struct task_struct *io_thread;
struct sock *usk;
int ret;
@@ -152,7 +149,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
}
tuncfg.encap_type = UDP_ENCAP_RXRPC;
- tuncfg.encap_rcv = rxrpc_input_packet;
+ tuncfg.encap_rcv = rxrpc_encap_rcv;
tuncfg.encap_err_rcv = rxrpc_encap_err_rcv;
tuncfg.sk_user_data = local;
setup_udp_tunnel_sock(net, local->socket, &tuncfg);
@@ -185,8 +182,23 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
BUG();
}
+ io_thread = kthread_run(rxrpc_io_thread, local,
+ "krxrpcio/%u", ntohs(udp_conf.local_udp_port));
+ if (IS_ERR(io_thread)) {
+ ret = PTR_ERR(io_thread);
+ goto error_sock;
+ }
+
+ local->io_thread = io_thread;
_leave(" = 0");
return 0;
+
+error_sock:
+ kernel_sock_shutdown(local->socket, SHUT_RDWR);
+ local->socket->sk->sk_user_data = NULL;
+ sock_release(local->socket);
+ local->socket = NULL;
+ return ret;
}
/*
@@ -198,7 +210,6 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
struct rxrpc_local *local;
struct rxrpc_net *rxnet = rxrpc_net(net);
struct hlist_node *cursor;
- const char *age;
long diff;
int ret;
@@ -229,10 +240,9 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
* we're attempting to use a local address that the dying
* object is still using.
*/
- if (!rxrpc_use_local(local))
+ if (!rxrpc_use_local(local, rxrpc_local_use_lookup))
break;
- age = "old";
goto found;
}
@@ -250,14 +260,9 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
} else {
hlist_add_head_rcu(&local->link, &rxnet->local_endpoints);
}
- age = "new";
found:
mutex_unlock(&rxnet->local_mutex);
-
- _net("LOCAL %s %d {%pISp}",
- age, local->debug_id, &local->srx.transport);
-
_leave(" = %p", local);
return local;
@@ -279,64 +284,49 @@ addr_in_use:
/*
* Get a ref on a local endpoint.
*/
-struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local)
+struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local,
+ enum rxrpc_local_trace why)
{
- const void *here = __builtin_return_address(0);
- int r;
+ int r, u;
+ u = atomic_read(&local->active_users);
__refcount_inc(&local->ref, &r);
- trace_rxrpc_local(local->debug_id, rxrpc_local_got, r + 1, here);
+ trace_rxrpc_local(local->debug_id, why, r + 1, u);
return local;
}
/*
* Get a ref on a local endpoint unless its usage has already reached 0.
*/
-struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
+struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local,
+ enum rxrpc_local_trace why)
{
- const void *here = __builtin_return_address(0);
- int r;
+ int r, u;
- if (local) {
- if (__refcount_inc_not_zero(&local->ref, &r))
- trace_rxrpc_local(local->debug_id, rxrpc_local_got,
- r + 1, here);
- else
- local = NULL;
+ if (local && __refcount_inc_not_zero(&local->ref, &r)) {
+ u = atomic_read(&local->active_users);
+ trace_rxrpc_local(local->debug_id, why, r + 1, u);
+ return local;
}
- return local;
-}
-/*
- * Queue a local endpoint and pass the caller's reference to the work item.
- */
-void rxrpc_queue_local(struct rxrpc_local *local)
-{
- const void *here = __builtin_return_address(0);
- unsigned int debug_id = local->debug_id;
- int r = refcount_read(&local->ref);
-
- if (rxrpc_queue_work(&local->processor))
- trace_rxrpc_local(debug_id, rxrpc_local_queued, r + 1, here);
- else
- rxrpc_put_local(local);
+ return NULL;
}
/*
* Drop a ref on a local endpoint.
*/
-void rxrpc_put_local(struct rxrpc_local *local)
+void rxrpc_put_local(struct rxrpc_local *local, enum rxrpc_local_trace why)
{
- const void *here = __builtin_return_address(0);
unsigned int debug_id;
bool dead;
- int r;
+ int r, u;
if (local) {
debug_id = local->debug_id;
+ u = atomic_read(&local->active_users);
dead = __refcount_dec_and_test(&local->ref, &r);
- trace_rxrpc_local(debug_id, rxrpc_local_put, r, here);
+ trace_rxrpc_local(debug_id, why, r, u);
if (dead)
call_rcu(&local->rcu, rxrpc_local_rcu);
@@ -346,14 +336,15 @@ void rxrpc_put_local(struct rxrpc_local *local)
/*
* Start using a local endpoint.
*/
-struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local)
+struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local,
+ enum rxrpc_local_trace why)
{
- local = rxrpc_get_local_maybe(local);
+ local = rxrpc_get_local_maybe(local, rxrpc_local_get_for_use);
if (!local)
return NULL;
- if (!__rxrpc_use_local(local)) {
- rxrpc_put_local(local);
+ if (!__rxrpc_use_local(local, why)) {
+ rxrpc_put_local(local, rxrpc_local_put_for_use);
return NULL;
}
@@ -362,15 +353,19 @@ struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local)
/*
* Cease using a local endpoint. Once the number of active users reaches 0, we
- * start the closure of the transport in the work processor.
+ * start the closure of the transport in the I/O thread..
*/
-void rxrpc_unuse_local(struct rxrpc_local *local)
+void rxrpc_unuse_local(struct rxrpc_local *local, enum rxrpc_local_trace why)
{
+ unsigned int debug_id = local->debug_id;
+ int r, u;
+
if (local) {
- if (__rxrpc_unuse_local(local)) {
- rxrpc_get_local(local);
- rxrpc_queue_local(local);
- }
+ r = refcount_read(&local->ref);
+ u = atomic_dec_return(&local->active_users);
+ trace_rxrpc_local(debug_id, why, r, u);
+ if (u == 0)
+ kthread_stop(local->io_thread);
}
}
@@ -381,7 +376,7 @@ void rxrpc_unuse_local(struct rxrpc_local *local)
* Closing the socket cannot be done from bottom half context or RCU callback
* context because it might sleep.
*/
-static void rxrpc_local_destroyer(struct rxrpc_local *local)
+void rxrpc_destroy_local(struct rxrpc_local *local)
{
struct socket *socket = local->socket;
struct rxrpc_net *rxnet = local->rxnet;
@@ -408,52 +403,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local)
/* At this point, there should be no more packets coming in to the
* local endpoint.
*/
- rxrpc_purge_queue(&local->reject_queue);
- rxrpc_purge_queue(&local->event_queue);
-}
-
-/*
- * Process events on an endpoint. The work item carries a ref which
- * we must release.
- */
-static void rxrpc_local_processor(struct work_struct *work)
-{
- struct rxrpc_local *local =
- container_of(work, struct rxrpc_local, processor);
- bool again;
-
- if (local->dead)
- return;
-
- trace_rxrpc_local(local->debug_id, rxrpc_local_processing,
- refcount_read(&local->ref), NULL);
-
- do {
- again = false;
- if (!__rxrpc_use_local(local)) {
- rxrpc_local_destroyer(local);
- break;
- }
-
- if (!list_empty(&local->ack_tx_queue)) {
- rxrpc_transmit_ack_packets(local);
- again = true;
- }
-
- if (!skb_queue_empty(&local->reject_queue)) {
- rxrpc_reject_packets(local);
- again = true;
- }
-
- if (!skb_queue_empty(&local->event_queue)) {
- rxrpc_process_local_events(local);
- again = true;
- }
-
- __rxrpc_unuse_local(local);
- } while (again);
-
- rxrpc_put_local(local);
+ rxrpc_purge_queue(&local->rx_queue);
}
/*
@@ -463,13 +413,8 @@ static void rxrpc_local_rcu(struct rcu_head *rcu)
{
struct rxrpc_local *local = container_of(rcu, struct rxrpc_local, rcu);
- _enter("%d", local->debug_id);
-
- ASSERT(!work_pending(&local->processor));
-
- _net("DESTROY LOCAL %d", local->debug_id);
+ rxrpc_see_local(local, rxrpc_local_free);
kfree(local);
- _leave("");
}
/*
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index 84242c0e467c..5905530e2f33 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -65,7 +65,7 @@ static __net_init int rxrpc_init_net(struct net *net)
atomic_set(&rxnet->nr_client_conns, 0);
rxnet->kill_all_client_conns = false;
spin_lock_init(&rxnet->client_conn_cache_lock);
- spin_lock_init(&rxnet->client_conn_discard_lock);
+ mutex_init(&rxnet->client_conn_discard_lock);
INIT_LIST_HEAD(&rxnet->idle_client_conns);
INIT_WORK(&rxnet->client_conn_reaper,
rxrpc_discard_expired_client_conns);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index c5eed0e83e47..3d8c9f830ee0 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -142,8 +142,8 @@ retry:
txb->ack.reason = RXRPC_ACK_IDLE;
}
- mtu = conn->params.peer->if_mtu;
- mtu -= conn->params.peer->hdrsize;
+ mtu = conn->peer->if_mtu;
+ mtu -= conn->peer->hdrsize;
jmax = rxrpc_rx_jumbo_max;
qsize = (window - 1) - call->rx_consumed;
rsize = max_t(int, call->rx_winsize - qsize, 0);
@@ -203,12 +203,11 @@ static void rxrpc_cancel_rtt_probe(struct rxrpc_call *call,
}
/*
- * Send an ACK call packet.
+ * Transmit an ACK packet.
*/
-static int rxrpc_send_ack_packet(struct rxrpc_local *local, struct rxrpc_txbuf *txb)
+int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
{
struct rxrpc_connection *conn;
- struct rxrpc_call *call = txb->call;
struct msghdr msg;
struct kvec iov[1];
rxrpc_serial_t serial;
@@ -229,11 +228,6 @@ static int rxrpc_send_ack_packet(struct rxrpc_local *local, struct rxrpc_txbuf *
if (txb->ack.reason == RXRPC_ACK_PING)
txb->wire.flags |= RXRPC_REQUEST_ACK;
- if (txb->ack.reason == RXRPC_ACK_DELAY)
- clear_bit(RXRPC_CALL_DELAY_ACK_PENDING, &call->flags);
- if (txb->ack.reason == RXRPC_ACK_IDLE)
- clear_bit(RXRPC_CALL_IDLE_ACK_PENDING, &call->flags);
-
n = rxrpc_fill_out_ack(conn, call, txb);
if (n == 0)
return 0;
@@ -247,8 +241,6 @@ static int rxrpc_send_ack_packet(struct rxrpc_local *local, struct rxrpc_txbuf *
trace_rxrpc_tx_ack(call->debug_id, serial,
ntohl(txb->ack.firstPacket),
ntohl(txb->ack.serial), txb->ack.reason, txb->ack.nAcks);
- if (txb->ack_why == rxrpc_propose_ack_ping_for_lost_ack)
- call->acks_lost_ping = serial;
if (txb->ack.reason == RXRPC_ACK_PING)
rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_ping);
@@ -259,7 +251,7 @@ static int rxrpc_send_ack_packet(struct rxrpc_local *local, struct rxrpc_txbuf *
txb->ack.previousPacket = htonl(call->rx_highest_seq);
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
- ret = do_udp_sendmsg(conn->params.local->socket, &msg, len);
+ ret = do_udp_sendmsg(conn->local->socket, &msg, len);
call->peer->last_tx_at = ktime_get_seconds();
if (ret < 0)
trace_rxrpc_tx_fail(call->debug_id, serial, ret,
@@ -279,44 +271,6 @@ static int rxrpc_send_ack_packet(struct rxrpc_local *local, struct rxrpc_txbuf *
}
/*
- * ACK transmitter for a local endpoint. The UDP socket locks around each
- * transmission, so we can only transmit one packet at a time, ACK, DATA or
- * otherwise.
- */
-void rxrpc_transmit_ack_packets(struct rxrpc_local *local)
-{
- LIST_HEAD(queue);
- int ret;
-
- trace_rxrpc_local(local->debug_id, rxrpc_local_tx_ack,
- refcount_read(&local->ref), NULL);
-
- if (list_empty(&local->ack_tx_queue))
- return;
-
- spin_lock_bh(&local->ack_tx_lock);
- list_splice_tail_init(&local->ack_tx_queue, &queue);
- spin_unlock_bh(&local->ack_tx_lock);
-
- while (!list_empty(&queue)) {
- struct rxrpc_txbuf *txb =
- list_entry(queue.next, struct rxrpc_txbuf, tx_link);
-
- ret = rxrpc_send_ack_packet(local, txb);
- if (ret < 0 && ret != -ECONNRESET) {
- spin_lock_bh(&local->ack_tx_lock);
- list_splice_init(&queue, &local->ack_tx_queue);
- spin_unlock_bh(&local->ack_tx_lock);
- break;
- }
-
- list_del_init(&txb->tx_link);
- rxrpc_put_call(txb->call, rxrpc_call_put);
- rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx);
- }
-}
-
-/*
* Send an ABORT call packet.
*/
int rxrpc_send_abort_packet(struct rxrpc_call *call)
@@ -358,7 +312,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
pkt.whdr.userStatus = 0;
pkt.whdr.securityIndex = call->security_ix;
pkt.whdr._rsvd = 0;
- pkt.whdr.serviceId = htons(call->service_id);
+ pkt.whdr.serviceId = htons(call->dest_srx.srx_service);
pkt.abort_code = htonl(call->abort_code);
iov[0].iov_base = &pkt;
@@ -368,8 +322,8 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
pkt.whdr.serial = htonl(serial);
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt));
- ret = do_udp_sendmsg(conn->params.local->socket, &msg, sizeof(pkt));
- conn->params.peer->last_tx_at = ktime_get_seconds();
+ ret = do_udp_sendmsg(conn->local->socket, &msg, sizeof(pkt));
+ conn->peer->last_tx_at = ktime_get_seconds();
if (ret < 0)
trace_rxrpc_tx_fail(call->debug_id, serial, ret,
rxrpc_tx_point_call_abort);
@@ -395,12 +349,6 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
_enter("%x,{%d}", txb->seq, txb->len);
- if (hlist_unhashed(&call->error_link)) {
- spin_lock_bh(&call->peer->lock);
- hlist_add_head_rcu(&call->error_link, &call->peer->error_targets);
- spin_unlock_bh(&call->peer->lock);
- }
-
/* Each transmission of a Tx packet needs a new serial number */
serial = atomic_inc_return(&conn->serial);
txb->wire.serial = htonl(serial);
@@ -466,6 +414,14 @@ dont_set_request_ack:
trace_rxrpc_tx_data(call, txb->seq, serial, txb->wire.flags,
test_bit(RXRPC_TXBUF_RESENT, &txb->flags), false);
+
+ /* Track what we've attempted to transmit at least once so that the
+ * retransmission algorithm doesn't try to resend what we haven't sent
+ * yet. However, this can race as we can receive an ACK before we get
+ * to this point. But, OTOH, if we won't get an ACK mentioning this
+ * packet unless the far side received it (though it could have
+ * discarded it anyway and NAK'd it).
+ */
cmpxchg(&call->tx_transmitted, txb->seq - 1, txb->seq);
/* send the packet with the don't fragment bit set if we currently
@@ -473,7 +429,7 @@ dont_set_request_ack:
if (txb->len >= call->peer->maxdata)
goto send_fragmentable;
- down_read(&conn->params.local->defrag_sem);
+ down_read(&conn->local->defrag_sem);
txb->last_sent = ktime_get_real();
if (txb->wire.flags & RXRPC_REQUEST_ACK)
@@ -486,11 +442,12 @@ dont_set_request_ack:
* message and update the peer record
*/
rxrpc_inc_stat(call->rxnet, stat_tx_data_send);
- ret = do_udp_sendmsg(conn->params.local->socket, &msg, len);
- conn->params.peer->last_tx_at = ktime_get_seconds();
+ ret = do_udp_sendmsg(conn->local->socket, &msg, len);
+ conn->peer->last_tx_at = ktime_get_seconds();
- up_read(&conn->params.local->defrag_sem);
+ up_read(&conn->local->defrag_sem);
if (ret < 0) {
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
trace_rxrpc_tx_fail(call->debug_id, serial, ret,
rxrpc_tx_point_call_data_nofrag);
@@ -549,22 +506,22 @@ send_fragmentable:
/* attempt to send this message with fragmentation enabled */
_debug("send fragment");
- down_write(&conn->params.local->defrag_sem);
+ down_write(&conn->local->defrag_sem);
txb->last_sent = ktime_get_real();
if (txb->wire.flags & RXRPC_REQUEST_ACK)
rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data);
- switch (conn->params.local->srx.transport.family) {
+ switch (conn->local->srx.transport.family) {
case AF_INET6:
case AF_INET:
- ip_sock_set_mtu_discover(conn->params.local->socket->sk,
+ ip_sock_set_mtu_discover(conn->local->socket->sk,
IP_PMTUDISC_DONT);
rxrpc_inc_stat(call->rxnet, stat_tx_data_send_frag);
- ret = do_udp_sendmsg(conn->params.local->socket, &msg, len);
- conn->params.peer->last_tx_at = ktime_get_seconds();
+ ret = do_udp_sendmsg(conn->local->socket, &msg, len);
+ conn->peer->last_tx_at = ktime_get_seconds();
- ip_sock_set_mtu_discover(conn->params.local->socket->sk,
+ ip_sock_set_mtu_discover(conn->local->socket->sk,
IP_PMTUDISC_DO);
break;
@@ -573,6 +530,7 @@ send_fragmentable:
}
if (ret < 0) {
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
trace_rxrpc_tx_fail(call->debug_id, serial, ret,
rxrpc_tx_point_call_data_frag);
@@ -582,26 +540,25 @@ send_fragmentable:
}
rxrpc_tx_backoff(call, ret);
- up_write(&conn->params.local->defrag_sem);
+ up_write(&conn->local->defrag_sem);
goto done;
}
/*
- * reject packets through the local endpoint
+ * Reject a packet through the local endpoint.
*/
-void rxrpc_reject_packets(struct rxrpc_local *local)
+void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
{
- struct sockaddr_rxrpc srx;
- struct rxrpc_skb_priv *sp;
struct rxrpc_wire_header whdr;
- struct sk_buff *skb;
+ struct sockaddr_rxrpc srx;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct msghdr msg;
struct kvec iov[2];
size_t size;
__be32 code;
int ret, ioc;
- _enter("%d", local->debug_id);
+ rxrpc_see_skb(skb, rxrpc_skb_see_reject);
iov[0].iov_base = &whdr;
iov[0].iov_len = sizeof(whdr);
@@ -615,52 +572,42 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
memset(&whdr, 0, sizeof(whdr));
- while ((skb = skb_dequeue(&local->reject_queue))) {
- rxrpc_see_skb(skb, rxrpc_skb_seen);
- sp = rxrpc_skb(skb);
+ switch (skb->mark) {
+ case RXRPC_SKB_MARK_REJECT_BUSY:
+ whdr.type = RXRPC_PACKET_TYPE_BUSY;
+ size = sizeof(whdr);
+ ioc = 1;
+ break;
+ case RXRPC_SKB_MARK_REJECT_ABORT:
+ whdr.type = RXRPC_PACKET_TYPE_ABORT;
+ code = htonl(skb->priority);
+ size = sizeof(whdr) + sizeof(code);
+ ioc = 2;
+ break;
+ default:
+ return;
+ }
- switch (skb->mark) {
- case RXRPC_SKB_MARK_REJECT_BUSY:
- whdr.type = RXRPC_PACKET_TYPE_BUSY;
- size = sizeof(whdr);
- ioc = 1;
- break;
- case RXRPC_SKB_MARK_REJECT_ABORT:
- whdr.type = RXRPC_PACKET_TYPE_ABORT;
- code = htonl(skb->priority);
- size = sizeof(whdr) + sizeof(code);
- ioc = 2;
- break;
- default:
- rxrpc_free_skb(skb, rxrpc_skb_freed);
- continue;
- }
+ if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
+ msg.msg_namelen = srx.transport_len;
- if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
- msg.msg_namelen = srx.transport_len;
-
- whdr.epoch = htonl(sp->hdr.epoch);
- whdr.cid = htonl(sp->hdr.cid);
- whdr.callNumber = htonl(sp->hdr.callNumber);
- whdr.serviceId = htons(sp->hdr.serviceId);
- whdr.flags = sp->hdr.flags;
- whdr.flags ^= RXRPC_CLIENT_INITIATED;
- whdr.flags &= RXRPC_CLIENT_INITIATED;
-
- iov_iter_kvec(&msg.msg_iter, WRITE, iov, ioc, size);
- ret = do_udp_sendmsg(local->socket, &msg, size);
- if (ret < 0)
- trace_rxrpc_tx_fail(local->debug_id, 0, ret,
- rxrpc_tx_point_reject);
- else
- trace_rxrpc_tx_packet(local->debug_id, &whdr,
- rxrpc_tx_point_reject);
- }
+ whdr.epoch = htonl(sp->hdr.epoch);
+ whdr.cid = htonl(sp->hdr.cid);
+ whdr.callNumber = htonl(sp->hdr.callNumber);
+ whdr.serviceId = htons(sp->hdr.serviceId);
+ whdr.flags = sp->hdr.flags;
+ whdr.flags ^= RXRPC_CLIENT_INITIATED;
+ whdr.flags &= RXRPC_CLIENT_INITIATED;
- rxrpc_free_skb(skb, rxrpc_skb_freed);
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, ioc, size);
+ ret = do_udp_sendmsg(local->socket, &msg, size);
+ if (ret < 0)
+ trace_rxrpc_tx_fail(local->debug_id, 0, ret,
+ rxrpc_tx_point_reject);
+ else
+ trace_rxrpc_tx_packet(local->debug_id, &whdr,
+ rxrpc_tx_point_reject);
}
-
- _leave("");
}
/*
@@ -701,8 +648,6 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer)
len = iov[0].iov_len + iov[1].iov_len;
- _proto("Tx VERSION (keepalive)");
-
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
ret = do_udp_sendmsg(peer->local->socket, &msg, len);
if (ret < 0)
@@ -715,3 +660,43 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer)
peer->last_tx_at = ktime_get_seconds();
_leave("");
}
+
+/*
+ * Schedule an instant Tx resend.
+ */
+static inline void rxrpc_instant_resend(struct rxrpc_call *call,
+ struct rxrpc_txbuf *txb)
+{
+ if (call->state < RXRPC_CALL_COMPLETE)
+ kdebug("resend");
+}
+
+/*
+ * Transmit one packet.
+ */
+void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+{
+ int ret;
+
+ ret = rxrpc_send_data_packet(call, txb);
+ if (ret < 0) {
+ switch (ret) {
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
+ case -ECONNREFUSED:
+ rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+ 0, ret);
+ break;
+ default:
+ _debug("need instant resend %d", ret);
+ rxrpc_instant_resend(call, txb);
+ }
+ } else {
+ unsigned long now = jiffies;
+ unsigned long resend_at = now + call->peer->rto_j;
+
+ WRITE_ONCE(call->resend_at, resend_at);
+ rxrpc_reduce_call_timer(call, resend_at, now,
+ rxrpc_timer_set_for_send);
+ }
+}
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index cda3890657a9..6685bf917aa6 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -18,9 +18,9 @@
#include <net/ip.h>
#include "ar-internal.h"
-static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *);
-static void rxrpc_distribute_error(struct rxrpc_peer *, int,
- enum rxrpc_call_completion);
+static void rxrpc_store_error(struct rxrpc_peer *, struct sk_buff *);
+static void rxrpc_distribute_error(struct rxrpc_peer *, struct sk_buff *,
+ enum rxrpc_call_completion, int);
/*
* Find the peer associated with a local error.
@@ -48,13 +48,11 @@ static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local,
srx->transport.sin.sin_port = serr->port;
switch (serr->ee.ee_origin) {
case SO_EE_ORIGIN_ICMP:
- _net("Rx ICMP");
memcpy(&srx->transport.sin.sin_addr,
skb_network_header(skb) + serr->addr_offset,
sizeof(struct in_addr));
break;
case SO_EE_ORIGIN_ICMP6:
- _net("Rx ICMP6 on v4 sock");
memcpy(&srx->transport.sin.sin_addr,
skb_network_header(skb) + serr->addr_offset + 12,
sizeof(struct in_addr));
@@ -70,14 +68,12 @@ static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local,
case AF_INET6:
switch (serr->ee.ee_origin) {
case SO_EE_ORIGIN_ICMP6:
- _net("Rx ICMP6");
srx->transport.sin6.sin6_port = serr->port;
memcpy(&srx->transport.sin6.sin6_addr,
skb_network_header(skb) + serr->addr_offset,
sizeof(struct in6_addr));
break;
case SO_EE_ORIGIN_ICMP:
- _net("Rx ICMP on v6 sock");
srx->transport_len = sizeof(srx->transport.sin);
srx->transport.family = AF_INET;
srx->transport.sin.sin_port = serr->port;
@@ -106,13 +102,9 @@ static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local,
*/
static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu)
{
- _net("Rx ICMP Fragmentation Needed (%d)", mtu);
-
/* wind down the local interface MTU */
- if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) {
+ if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu)
peer->if_mtu = mtu;
- _net("I/F MTU %u", mtu);
- }
if (mtu == 0) {
/* they didn't give us a size, estimate one */
@@ -129,63 +121,36 @@ static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu)
}
if (mtu < peer->mtu) {
- spin_lock_bh(&peer->lock);
+ spin_lock(&peer->lock);
peer->mtu = mtu;
peer->maxdata = peer->mtu - peer->hdrsize;
- spin_unlock_bh(&peer->lock);
- _net("Net MTU %u (maxdata %u)",
- peer->mtu, peer->maxdata);
+ spin_unlock(&peer->lock);
}
}
/*
* Handle an error received on the local endpoint.
*/
-void rxrpc_error_report(struct sock *sk)
+void rxrpc_input_error(struct rxrpc_local *local, struct sk_buff *skb)
{
- struct sock_exterr_skb *serr;
+ struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
struct sockaddr_rxrpc srx;
- struct rxrpc_local *local;
struct rxrpc_peer *peer = NULL;
- struct sk_buff *skb;
- rcu_read_lock();
- local = rcu_dereference_sk_user_data(sk);
- if (unlikely(!local)) {
- rcu_read_unlock();
- return;
- }
- _enter("%p{%d}", sk, local->debug_id);
-
- /* Clear the outstanding error value on the socket so that it doesn't
- * cause kernel_sendmsg() to return it later.
- */
- sock_error(sk);
+ _enter("L=%x", local->debug_id);
- skb = sock_dequeue_err_skb(sk);
- if (!skb) {
- rcu_read_unlock();
- _leave("UDP socket errqueue empty");
- return;
- }
- rxrpc_new_skb(skb, rxrpc_skb_received);
- serr = SKB_EXT_ERR(skb);
if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
_leave("UDP empty message");
- rcu_read_unlock();
- rxrpc_free_skb(skb, rxrpc_skb_freed);
return;
}
+ rcu_read_lock();
peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx);
- if (peer && !rxrpc_get_peer_maybe(peer))
+ if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_input_error))
peer = NULL;
- if (!peer) {
- rcu_read_unlock();
- rxrpc_free_skb(skb, rxrpc_skb_freed);
- _leave(" [no peer]");
+ rcu_read_unlock();
+ if (!peer)
return;
- }
trace_rxrpc_rx_icmp(peer, &serr->ee, &srx);
@@ -196,72 +161,26 @@ void rxrpc_error_report(struct sock *sk)
goto out;
}
- rxrpc_store_error(peer, serr);
+ rxrpc_store_error(peer, skb);
out:
- rcu_read_unlock();
- rxrpc_free_skb(skb, rxrpc_skb_freed);
- rxrpc_put_peer(peer);
-
- _leave("");
+ rxrpc_put_peer(peer, rxrpc_peer_put_input_error);
}
/*
* Map an error report to error codes on the peer record.
*/
-static void rxrpc_store_error(struct rxrpc_peer *peer,
- struct sock_exterr_skb *serr)
+static void rxrpc_store_error(struct rxrpc_peer *peer, struct sk_buff *skb)
{
enum rxrpc_call_completion compl = RXRPC_CALL_NETWORK_ERROR;
- struct sock_extended_err *ee;
- int err;
+ struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
+ struct sock_extended_err *ee = &serr->ee;
+ int err = ee->ee_errno;
_enter("");
- ee = &serr->ee;
-
- err = ee->ee_errno;
-
switch (ee->ee_origin) {
- case SO_EE_ORIGIN_ICMP:
- switch (ee->ee_type) {
- case ICMP_DEST_UNREACH:
- switch (ee->ee_code) {
- case ICMP_NET_UNREACH:
- _net("Rx Received ICMP Network Unreachable");
- break;
- case ICMP_HOST_UNREACH:
- _net("Rx Received ICMP Host Unreachable");
- break;
- case ICMP_PORT_UNREACH:
- _net("Rx Received ICMP Port Unreachable");
- break;
- case ICMP_NET_UNKNOWN:
- _net("Rx Received ICMP Unknown Network");
- break;
- case ICMP_HOST_UNKNOWN:
- _net("Rx Received ICMP Unknown Host");
- break;
- default:
- _net("Rx Received ICMP DestUnreach code=%u",
- ee->ee_code);
- break;
- }
- break;
-
- case ICMP_TIME_EXCEEDED:
- _net("Rx Received ICMP TTL Exceeded");
- break;
-
- default:
- _proto("Rx Received ICMP error { type=%u code=%u }",
- ee->ee_type, ee->ee_code);
- break;
- }
- break;
-
case SO_EE_ORIGIN_NONE:
case SO_EE_ORIGIN_LOCAL:
- _proto("Rx Received local error { error=%d }", err);
compl = RXRPC_CALL_LOCAL_ERROR;
break;
@@ -269,26 +188,40 @@ static void rxrpc_store_error(struct rxrpc_peer *peer,
if (err == EACCES)
err = EHOSTUNREACH;
fallthrough;
+ case SO_EE_ORIGIN_ICMP:
default:
- _proto("Rx Received error report { orig=%u }", ee->ee_origin);
break;
}
- rxrpc_distribute_error(peer, err, compl);
+ rxrpc_distribute_error(peer, skb, compl, err);
}
/*
* Distribute an error that occurred on a peer.
*/
-static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error,
- enum rxrpc_call_completion compl)
+static void rxrpc_distribute_error(struct rxrpc_peer *peer, struct sk_buff *skb,
+ enum rxrpc_call_completion compl, int err)
{
struct rxrpc_call *call;
+ HLIST_HEAD(error_targets);
- hlist_for_each_entry_rcu(call, &peer->error_targets, error_link) {
- rxrpc_see_call(call);
- rxrpc_set_call_completion(call, compl, 0, -error);
+ spin_lock(&peer->lock);
+ hlist_move_list(&peer->error_targets, &error_targets);
+
+ while (!hlist_empty(&error_targets)) {
+ call = hlist_entry(error_targets.first,
+ struct rxrpc_call, error_link);
+ hlist_del_init(&call->error_link);
+ spin_unlock(&peer->lock);
+
+ rxrpc_see_call(call, rxrpc_call_see_distribute_error);
+ rxrpc_set_call_completion(call, compl, 0, -err);
+ rxrpc_input_call_event(call, skb);
+
+ spin_lock(&peer->lock);
}
+
+ spin_unlock(&peer->lock);
}
/*
@@ -304,18 +237,18 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
time64_t keepalive_at;
int slot;
- spin_lock_bh(&rxnet->peer_hash_lock);
+ spin_lock(&rxnet->peer_hash_lock);
while (!list_empty(collector)) {
peer = list_entry(collector->next,
struct rxrpc_peer, keepalive_link);
list_del_init(&peer->keepalive_link);
- if (!rxrpc_get_peer_maybe(peer))
+ if (!rxrpc_get_peer_maybe(peer, rxrpc_peer_get_keepalive))
continue;
- if (__rxrpc_use_local(peer->local)) {
- spin_unlock_bh(&rxnet->peer_hash_lock);
+ if (__rxrpc_use_local(peer->local, rxrpc_local_use_peer_keepalive)) {
+ spin_unlock(&rxnet->peer_hash_lock);
keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME;
slot = keepalive_at - base;
@@ -334,15 +267,15 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
*/
slot += cursor;
slot &= mask;
- spin_lock_bh(&rxnet->peer_hash_lock);
+ spin_lock(&rxnet->peer_hash_lock);
list_add_tail(&peer->keepalive_link,
&rxnet->peer_keepalive[slot & mask]);
- rxrpc_unuse_local(peer->local);
+ rxrpc_unuse_local(peer->local, rxrpc_local_unuse_peer_keepalive);
}
- rxrpc_put_peer_locked(peer);
+ rxrpc_put_peer_locked(peer, rxrpc_peer_put_keepalive);
}
- spin_unlock_bh(&rxnet->peer_hash_lock);
+ spin_unlock(&rxnet->peer_hash_lock);
}
/*
@@ -372,7 +305,7 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work)
* second; the bucket at cursor + 1 goes at now + 1s and so
* on...
*/
- spin_lock_bh(&rxnet->peer_hash_lock);
+ spin_lock(&rxnet->peer_hash_lock);
list_splice_init(&rxnet->peer_keepalive_new, &collector);
stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive);
@@ -384,7 +317,7 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work)
}
base = now;
- spin_unlock_bh(&rxnet->peer_hash_lock);
+ spin_unlock(&rxnet->peer_hash_lock);
rxnet->peer_keepalive_base = base;
rxnet->peer_keepalive_cursor = cursor;
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 041a51225c5f..608946dcc505 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -138,10 +138,8 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local,
unsigned long hash_key = rxrpc_peer_hash_key(local, srx);
peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
- if (peer) {
- _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport);
+ if (peer)
_leave(" = %p {u=%d}", peer, refcount_read(&peer->ref));
- }
return peer;
}
@@ -207,9 +205,9 @@ static void rxrpc_assess_MTU_size(struct rxrpc_sock *rx,
/*
* Allocate a peer.
*/
-struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
+struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp,
+ enum rxrpc_peer_trace why)
{
- const void *here = __builtin_return_address(0);
struct rxrpc_peer *peer;
_enter("");
@@ -217,7 +215,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
peer = kzalloc(sizeof(struct rxrpc_peer), gfp);
if (peer) {
refcount_set(&peer->ref, 1);
- peer->local = rxrpc_get_local(local);
+ peer->local = rxrpc_get_local(local, rxrpc_local_get_peer);
INIT_HLIST_HEAD(&peer->error_targets);
peer->service_conns = RB_ROOT;
seqlock_init(&peer->service_conn_lock);
@@ -228,7 +226,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
rxrpc_peer_init_rtt(peer);
peer->cong_ssthresh = RXRPC_TX_MAX_WINDOW;
- trace_rxrpc_peer(peer->debug_id, rxrpc_peer_new, 1, here);
+ trace_rxrpc_peer(peer->debug_id, why, 1);
}
_leave(" = %p", peer);
@@ -284,7 +282,7 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx,
_enter("");
- peer = rxrpc_alloc_peer(local, gfp);
+ peer = rxrpc_alloc_peer(local, gfp, rxrpc_peer_new_client);
if (peer) {
memcpy(&peer->srx, srx, sizeof(*srx));
rxrpc_init_peer(rx, peer, hash_key);
@@ -296,7 +294,8 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx,
static void rxrpc_free_peer(struct rxrpc_peer *peer)
{
- rxrpc_put_local(peer->local);
+ trace_rxrpc_peer(peer->debug_id, 0, rxrpc_peer_free);
+ rxrpc_put_local(peer->local, rxrpc_local_put_peer);
kfree_rcu(peer, rcu);
}
@@ -336,7 +335,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
/* search the peer list first */
rcu_read_lock();
peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
- if (peer && !rxrpc_get_peer_maybe(peer))
+ if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_lookup_client))
peer = NULL;
rcu_read_unlock();
@@ -350,11 +349,11 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
return NULL;
}
- spin_lock_bh(&rxnet->peer_hash_lock);
+ spin_lock(&rxnet->peer_hash_lock);
/* Need to check that we aren't racing with someone else */
peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
- if (peer && !rxrpc_get_peer_maybe(peer))
+ if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_lookup_client))
peer = NULL;
if (!peer) {
hash_add_rcu(rxnet->peer_hash,
@@ -363,7 +362,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
&rxnet->peer_keepalive_new);
}
- spin_unlock_bh(&rxnet->peer_hash_lock);
+ spin_unlock(&rxnet->peer_hash_lock);
if (peer)
rxrpc_free_peer(candidate);
@@ -371,8 +370,6 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
peer = candidate;
}
- _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport);
-
_leave(" = %p {u=%d}", peer, refcount_read(&peer->ref));
return peer;
}
@@ -380,27 +377,26 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
/*
* Get a ref on a peer record.
*/
-struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer)
+struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer, enum rxrpc_peer_trace why)
{
- const void *here = __builtin_return_address(0);
int r;
__refcount_inc(&peer->ref, &r);
- trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here);
+ trace_rxrpc_peer(peer->debug_id, why, r + 1);
return peer;
}
/*
* Get a ref on a peer record unless its usage has already reached 0.
*/
-struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer)
+struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer,
+ enum rxrpc_peer_trace why)
{
- const void *here = __builtin_return_address(0);
int r;
if (peer) {
if (__refcount_inc_not_zero(&peer->ref, &r))
- trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here);
+ trace_rxrpc_peer(peer->debug_id, r + 1, why);
else
peer = NULL;
}
@@ -416,10 +412,10 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer)
ASSERT(hlist_empty(&peer->error_targets));
- spin_lock_bh(&rxnet->peer_hash_lock);
+ spin_lock(&rxnet->peer_hash_lock);
hash_del_rcu(&peer->hash_link);
list_del_init(&peer->keepalive_link);
- spin_unlock_bh(&rxnet->peer_hash_lock);
+ spin_unlock(&rxnet->peer_hash_lock);
rxrpc_free_peer(peer);
}
@@ -427,9 +423,8 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer)
/*
* Drop a ref on a peer record.
*/
-void rxrpc_put_peer(struct rxrpc_peer *peer)
+void rxrpc_put_peer(struct rxrpc_peer *peer, enum rxrpc_peer_trace why)
{
- const void *here = __builtin_return_address(0);
unsigned int debug_id;
bool dead;
int r;
@@ -437,7 +432,7 @@ void rxrpc_put_peer(struct rxrpc_peer *peer)
if (peer) {
debug_id = peer->debug_id;
dead = __refcount_dec_and_test(&peer->ref, &r);
- trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here);
+ trace_rxrpc_peer(debug_id, r - 1, why);
if (dead)
__rxrpc_put_peer(peer);
}
@@ -447,15 +442,14 @@ void rxrpc_put_peer(struct rxrpc_peer *peer)
* Drop a ref on a peer record where the caller already holds the
* peer_hash_lock.
*/
-void rxrpc_put_peer_locked(struct rxrpc_peer *peer)
+void rxrpc_put_peer_locked(struct rxrpc_peer *peer, enum rxrpc_peer_trace why)
{
- const void *here = __builtin_return_address(0);
unsigned int debug_id = peer->debug_id;
bool dead;
int r;
dead = __refcount_dec_and_test(&peer->ref, &r);
- trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here);
+ trace_rxrpc_peer(debug_id, r - 1, why);
if (dead) {
hash_del_rcu(&peer->hash_link);
list_del_init(&peer->keepalive_link);
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index fae22a8b38d6..3a59591ec061 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -49,8 +49,6 @@ static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
{
struct rxrpc_local *local;
- struct rxrpc_sock *rx;
- struct rxrpc_peer *peer;
struct rxrpc_call *call;
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
unsigned long timeout = 0;
@@ -63,28 +61,19 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
"Proto Local "
" Remote "
" SvID ConnID CallID End Use State Abort "
- " DebugId TxSeq TW RxSeq RW RxSerial RxTimo\n");
+ " DebugId TxSeq TW RxSeq RW RxSerial CW RxTimo\n");
return 0;
}
call = list_entry(v, struct rxrpc_call, link);
- rx = rcu_dereference(call->socket);
- if (rx) {
- local = READ_ONCE(rx->local);
- if (local)
- sprintf(lbuff, "%pISpc", &local->srx.transport);
- else
- strcpy(lbuff, "no_local");
- } else {
- strcpy(lbuff, "no_socket");
- }
-
- peer = call->peer;
- if (peer)
- sprintf(rbuff, "%pISpc", &peer->srx.transport);
+ local = call->local;
+ if (local)
+ sprintf(lbuff, "%pISpc", &local->srx.transport);
else
- strcpy(rbuff, "no_connection");
+ strcpy(lbuff, "no_local");
+
+ sprintf(rbuff, "%pISpc", &call->dest_srx.transport);
if (call->state != RXRPC_CALL_SERVER_PREALLOC) {
timeout = READ_ONCE(call->expect_rx_by);
@@ -95,10 +84,10 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
wtmp = atomic64_read_acquire(&call->ackr_window);
seq_printf(seq,
"UDP %-47.47s %-47.47s %4x %08x %08x %s %3u"
- " %-8.8s %08x %08x %08x %02x %08x %02x %08x %06lx\n",
+ " %-8.8s %08x %08x %08x %02x %08x %02x %08x %02x %06lx\n",
lbuff,
rbuff,
- call->service_id,
+ call->dest_srx.srx_service,
call->cid,
call->call_id,
rxrpc_is_service_call(call) ? "Svc" : "Clt",
@@ -109,6 +98,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack,
lower_32_bits(wtmp), upper_32_bits(wtmp) - lower_32_bits(wtmp),
call->rx_serial,
+ call->cong_cwnd,
timeout);
return 0;
@@ -159,7 +149,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
seq_puts(seq,
"Proto Local "
" Remote "
- " SvID ConnID End Use State Key "
+ " SvID ConnID End Ref Act State Key "
" Serial ISerial CallId0 CallId1 CallId2 CallId3\n"
);
return 0;
@@ -172,12 +162,12 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
goto print;
}
- sprintf(lbuff, "%pISpc", &conn->params.local->srx.transport);
+ sprintf(lbuff, "%pISpc", &conn->local->srx.transport);
- sprintf(rbuff, "%pISpc", &conn->params.peer->srx.transport);
+ sprintf(rbuff, "%pISpc", &conn->peer->srx.transport);
print:
seq_printf(seq,
- "UDP %-47.47s %-47.47s %4x %08x %s %3u"
+ "UDP %-47.47s %-47.47s %4x %08x %s %3u %3d"
" %s %08x %08x %08x %08x %08x %08x %08x\n",
lbuff,
rbuff,
@@ -185,8 +175,9 @@ print:
conn->proto.cid,
rxrpc_conn_is_service(conn) ? "Svc" : "Clt",
refcount_read(&conn->ref),
+ atomic_read(&conn->active),
rxrpc_conn_states[conn->state],
- key_serial(conn->params.key),
+ key_serial(conn->key),
atomic_read(&conn->serial),
conn->hi_serial,
conn->channels[0].call_id,
@@ -341,7 +332,7 @@ static int rxrpc_local_seq_show(struct seq_file *seq, void *v)
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
"Proto Local "
- " Use Act\n");
+ " Use Act RxQ\n");
return 0;
}
@@ -350,10 +341,11 @@ static int rxrpc_local_seq_show(struct seq_file *seq, void *v)
sprintf(lbuff, "%pISpc", &local->srx.transport);
seq_printf(seq,
- "UDP %-47.47s %3u %3u\n",
+ "UDP %-47.47s %3u %3u %3u\n",
lbuff,
refcount_read(&local->ref),
- atomic_read(&local->active_users));
+ atomic_read(&local->active_users),
+ local->rx_queue.qlen);
return 0;
}
@@ -407,13 +399,16 @@ int rxrpc_stats_show(struct seq_file *seq, void *v)
struct rxrpc_net *rxnet = rxrpc_net(seq_file_single_net(seq));
seq_printf(seq,
- "Data : send=%u sendf=%u\n",
+ "Data : send=%u sendf=%u fail=%u\n",
atomic_read(&rxnet->stat_tx_data_send),
- atomic_read(&rxnet->stat_tx_data_send_frag));
+ atomic_read(&rxnet->stat_tx_data_send_frag),
+ atomic_read(&rxnet->stat_tx_data_send_fail));
seq_printf(seq,
- "Data-Tx : nr=%u retrans=%u\n",
+ "Data-Tx : nr=%u retrans=%u uf=%u cwr=%u\n",
atomic_read(&rxnet->stat_tx_data),
- atomic_read(&rxnet->stat_tx_data_retrans));
+ atomic_read(&rxnet->stat_tx_data_retrans),
+ atomic_read(&rxnet->stat_tx_data_underflow),
+ atomic_read(&rxnet->stat_tx_data_cwnd_reset));
seq_printf(seq,
"Data-Rx : nr=%u reqack=%u jumbo=%u\n",
atomic_read(&rxnet->stat_rx_data),
@@ -462,6 +457,9 @@ int rxrpc_stats_show(struct seq_file *seq, void *v)
"Buffers : txb=%u rxb=%u\n",
atomic_read(&rxrpc_nr_txbuf),
atomic_read(&rxrpc_n_rx_skbs));
+ seq_printf(seq,
+ "IO-thread: loops=%u\n",
+ atomic_read(&rxnet->stat_io_loop));
return 0;
}
@@ -478,8 +476,11 @@ int rxrpc_stats_clear(struct file *file, char *buf, size_t size)
atomic_set(&rxnet->stat_tx_data, 0);
atomic_set(&rxnet->stat_tx_data_retrans, 0);
+ atomic_set(&rxnet->stat_tx_data_underflow, 0);
+ atomic_set(&rxnet->stat_tx_data_cwnd_reset, 0);
atomic_set(&rxnet->stat_tx_data_send, 0);
atomic_set(&rxnet->stat_tx_data_send_frag, 0);
+ atomic_set(&rxnet->stat_tx_data_send_fail, 0);
atomic_set(&rxnet->stat_rx_data, 0);
atomic_set(&rxnet->stat_rx_data_reqack, 0);
atomic_set(&rxnet->stat_rx_data_jumbo, 0);
@@ -491,5 +492,7 @@ int rxrpc_stats_clear(struct file *file, char *buf, size_t size)
memset(&rxnet->stat_rx_acks, 0, sizeof(rxnet->stat_rx_acks));
memset(&rxnet->stat_why_req_ack, 0, sizeof(rxnet->stat_why_req_ack));
+
+ atomic_set(&rxnet->stat_io_loop, 0);
return size;
}
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index efb85f983657..36b25d003cf0 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -36,16 +36,16 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
sk = &rx->sk;
if (rx && sk->sk_state < RXRPC_CLOSE) {
if (call->notify_rx) {
- spin_lock_bh(&call->notify_lock);
+ spin_lock(&call->notify_lock);
call->notify_rx(sk, call, call->user_call_ID);
- spin_unlock_bh(&call->notify_lock);
+ spin_unlock(&call->notify_lock);
} else {
- write_lock_bh(&rx->recvmsg_lock);
+ write_lock(&rx->recvmsg_lock);
if (list_empty(&call->recvmsg_link)) {
- rxrpc_get_call(call, rxrpc_call_got);
+ rxrpc_get_call(call, rxrpc_call_get_notify_socket);
list_add_tail(&call->recvmsg_link, &rx->recvmsg_q);
}
- write_unlock_bh(&rx->recvmsg_lock);
+ write_unlock(&rx->recvmsg_lock);
if (!sock_flag(sk, SOCK_DEAD)) {
_debug("call %ps", sk->sk_data_ready);
@@ -87,9 +87,9 @@ bool rxrpc_set_call_completion(struct rxrpc_call *call,
bool ret = false;
if (call->state < RXRPC_CALL_COMPLETE) {
- write_lock_bh(&call->state_lock);
+ write_lock(&call->state_lock);
ret = __rxrpc_set_call_completion(call, compl, abort_code, error);
- write_unlock_bh(&call->state_lock);
+ write_unlock(&call->state_lock);
}
return ret;
}
@@ -107,9 +107,9 @@ bool rxrpc_call_completed(struct rxrpc_call *call)
bool ret = false;
if (call->state < RXRPC_CALL_COMPLETE) {
- write_lock_bh(&call->state_lock);
+ write_lock(&call->state_lock);
ret = __rxrpc_call_completed(call);
- write_unlock_bh(&call->state_lock);
+ write_unlock(&call->state_lock);
}
return ret;
}
@@ -131,9 +131,9 @@ bool rxrpc_abort_call(const char *why, struct rxrpc_call *call,
{
bool ret;
- write_lock_bh(&call->state_lock);
+ write_lock(&call->state_lock);
ret = __rxrpc_abort_call(why, call, seq, abort_code, error);
- write_unlock_bh(&call->state_lock);
+ write_unlock(&call->state_lock);
return ret;
}
@@ -193,23 +193,23 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY)
rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack);
- write_lock_bh(&call->state_lock);
+ write_lock(&call->state_lock);
switch (call->state) {
case RXRPC_CALL_CLIENT_RECV_REPLY:
__rxrpc_call_completed(call);
- write_unlock_bh(&call->state_lock);
+ write_unlock(&call->state_lock);
break;
case RXRPC_CALL_SERVER_RECV_REQUEST:
call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
call->expect_req_by = jiffies + MAX_JIFFY_OFFSET;
- write_unlock_bh(&call->state_lock);
+ write_unlock(&call->state_lock);
rxrpc_propose_delay_ACK(call, serial,
rxrpc_propose_ack_processing_op);
break;
default:
- write_unlock_bh(&call->state_lock);
+ write_unlock(&call->state_lock);
break;
}
}
@@ -228,9 +228,8 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
_enter("%d", call->debug_id);
-further_rotation:
skb = skb_dequeue(&call->recvmsg_queue);
- rxrpc_see_skb(skb, rxrpc_skb_rotated);
+ rxrpc_see_skb(skb, rxrpc_skb_see_rotate);
sp = rxrpc_skb(skb);
tseq = sp->hdr.seq;
@@ -241,7 +240,7 @@ further_rotation:
if (after(tseq, call->rx_consumed))
smp_store_release(&call->rx_consumed, tseq);
- rxrpc_free_skb(skb, rxrpc_skb_freed);
+ rxrpc_free_skb(skb, rxrpc_skb_put_rotate);
trace_rxrpc_receive(call, last ? rxrpc_receive_rotate_last : rxrpc_receive_rotate,
serial, call->rx_consumed);
@@ -250,26 +249,12 @@ further_rotation:
return;
}
- /* The next packet on the queue might entirely overlap with the one we
- * just consumed; if so, rotate that away also.
- */
- skb = skb_peek(&call->recvmsg_queue);
- if (skb) {
- sp = rxrpc_skb(skb);
- if (sp->hdr.seq != call->rx_consumed &&
- after_eq(call->rx_consumed, sp->hdr.seq))
- goto further_rotation;
- }
-
/* Check to see if there's an ACK that needs sending. */
acked = atomic_add_return(call->rx_consumed - old_consumed,
&call->ackr_nr_consumed);
if (acked > 2 &&
- !test_and_set_bit(RXRPC_CALL_IDLE_ACK_PENDING, &call->flags)) {
- rxrpc_send_ACK(call, RXRPC_ACK_IDLE, serial,
- rxrpc_propose_ack_rotate_rx);
- rxrpc_transmit_ack_packets(call->peer->local);
- }
+ !test_and_set_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags))
+ rxrpc_poke_call(call, rxrpc_call_poke_idle);
}
/*
@@ -314,15 +299,10 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
*/
skb = skb_peek(&call->recvmsg_queue);
while (skb) {
- rxrpc_see_skb(skb, rxrpc_skb_seen);
+ rxrpc_see_skb(skb, rxrpc_skb_see_recvmsg);
sp = rxrpc_skb(skb);
seq = sp->hdr.seq;
- if (after_eq(call->rx_consumed, seq)) {
- kdebug("obsolete %x %x", call->rx_consumed, seq);
- goto skip_obsolete;
- }
-
if (!(flags & MSG_PEEK))
trace_rxrpc_receive(call, rxrpc_receive_front,
sp->hdr.serial, seq);
@@ -340,7 +320,6 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
ret = ret2;
goto out;
}
- rxrpc_transmit_ack_packets(call->peer->local);
} else {
trace_rxrpc_recvdata(call, rxrpc_recvmsg_cont, seq,
rx_pkt_offset, rx_pkt_len, 0);
@@ -373,7 +352,6 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
break;
}
- skip_obsolete:
/* The whole packet has been transferred. */
if (sp->hdr.flags & RXRPC_LAST_PACKET)
ret = 1;
@@ -395,7 +373,7 @@ done:
trace_rxrpc_recvdata(call, rxrpc_recvmsg_data_return, seq,
rx_pkt_offset, rx_pkt_len, ret);
if (ret == -EAGAIN)
- set_bit(RXRPC_CALL_RX_UNDERRUN, &call->flags);
+ set_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
return ret;
}
@@ -463,14 +441,14 @@ try_again:
/* Find the next call and dequeue it if we're not just peeking. If we
* do dequeue it, that comes with a ref that we will need to release.
*/
- write_lock_bh(&rx->recvmsg_lock);
+ write_lock(&rx->recvmsg_lock);
l = rx->recvmsg_q.next;
call = list_entry(l, struct rxrpc_call, recvmsg_link);
if (!(flags & MSG_PEEK))
list_del_init(&call->recvmsg_link);
else
- rxrpc_get_call(call, rxrpc_call_got);
- write_unlock_bh(&rx->recvmsg_lock);
+ rxrpc_get_call(call, rxrpc_call_get_recvmsg);
+ write_unlock(&rx->recvmsg_lock);
trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0);
@@ -508,11 +486,9 @@ try_again:
}
if (msg->msg_name && call->peer) {
- struct sockaddr_rxrpc *srx = msg->msg_name;
- size_t len = sizeof(call->peer->srx);
+ size_t len = sizeof(call->dest_srx);
- memcpy(msg->msg_name, &call->peer->srx, len);
- srx->srx_service = call->service_id;
+ memcpy(msg->msg_name, &call->dest_srx, len);
msg->msg_namelen = len;
}
@@ -525,7 +501,6 @@ try_again:
if (ret == -EAGAIN)
ret = 0;
- rxrpc_transmit_ack_packets(call->peer->local);
if (!skb_queue_empty(&call->recvmsg_queue))
rxrpc_notify_socket(call);
break;
@@ -555,18 +530,18 @@ try_again:
error_unlock_call:
mutex_unlock(&call->user_mutex);
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_put_call(call, rxrpc_call_put_recvmsg);
trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, ret);
return ret;
error_requeue_call:
if (!(flags & MSG_PEEK)) {
- write_lock_bh(&rx->recvmsg_lock);
+ write_lock(&rx->recvmsg_lock);
list_add(&call->recvmsg_link, &rx->recvmsg_q);
- write_unlock_bh(&rx->recvmsg_lock);
+ write_unlock(&rx->recvmsg_lock);
trace_rxrpc_recvmsg(call, rxrpc_recvmsg_requeue, 0);
} else {
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_put_call(call, rxrpc_call_put_recvmsg);
}
error_no_call:
release_sock(&rx->sk);
@@ -655,9 +630,8 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
read_phase_complete:
ret = 1;
out:
- rxrpc_transmit_ack_packets(call->peer->local);
if (_service)
- *_service = call->service_id;
+ *_service = call->dest_srx.srx_service;
mutex_unlock(&call->user_mutex);
_leave(" = %d [%zu,%d]", ret, iov_iter_count(iter), *_abort);
return ret;
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 110a5550c0a6..d1233720e05f 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -103,7 +103,7 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn,
struct crypto_sync_skcipher *ci;
int ret;
- _enter("{%d},{%x}", conn->debug_id, key_serial(conn->params.key));
+ _enter("{%d},{%x}", conn->debug_id, key_serial(conn->key));
conn->security_ix = token->security_index;
@@ -118,7 +118,7 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn,
sizeof(token->kad->session_key)) < 0)
BUG();
- switch (conn->params.security_level) {
+ switch (conn->security_level) {
case RXRPC_SECURITY_PLAIN:
case RXRPC_SECURITY_AUTH:
case RXRPC_SECURITY_ENCRYPT:
@@ -150,7 +150,7 @@ static int rxkad_how_much_data(struct rxrpc_call *call, size_t remain,
{
size_t shdr, buf_size, chunk;
- switch (call->conn->params.security_level) {
+ switch (call->conn->security_level) {
default:
buf_size = chunk = min_t(size_t, remain, RXRPC_JUMBO_DATALEN);
shdr = 0;
@@ -192,7 +192,7 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn,
_enter("");
- if (!conn->params.key)
+ if (!conn->key)
return 0;
tmpbuf = kmalloc(tmpsize, GFP_KERNEL);
@@ -205,7 +205,7 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn,
return -ENOMEM;
}
- token = conn->params.key->payload.data[0];
+ token = conn->key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
tmpbuf[0] = htonl(conn->proto.epoch);
@@ -317,7 +317,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
}
/* encrypt from the session key */
- token = call->conn->params.key->payload.data[0];
+ token = call->conn->key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
sg_init_one(&sg, txb->data, txb->len);
@@ -344,13 +344,13 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
int ret;
_enter("{%d{%x}},{#%u},%u,",
- call->debug_id, key_serial(call->conn->params.key),
+ call->debug_id, key_serial(call->conn->key),
txb->seq, txb->len);
if (!call->conn->rxkad.cipher)
return 0;
- ret = key_validate(call->conn->params.key);
+ ret = key_validate(call->conn->key);
if (ret < 0)
return ret;
@@ -380,7 +380,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
y = 1; /* zero checksums are not permitted */
txb->wire.cksum = htons(y);
- switch (call->conn->params.security_level) {
+ switch (call->conn->security_level) {
case RXRPC_SECURITY_PLAIN:
ret = 0;
break;
@@ -525,7 +525,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
}
/* decrypt from the session key */
- token = call->conn->params.key->payload.data[0];
+ token = call->conn->key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
@@ -596,7 +596,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
u32 x, y;
_enter("{%d{%x}},{#%u}",
- call->debug_id, key_serial(call->conn->params.key), seq);
+ call->debug_id, key_serial(call->conn->key), seq);
if (!call->conn->rxkad.cipher)
return 0;
@@ -632,7 +632,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
goto protocol_error;
}
- switch (call->conn->params.security_level) {
+ switch (call->conn->security_level) {
case RXRPC_SECURITY_PLAIN:
ret = 0;
break;
@@ -678,8 +678,8 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
challenge.min_level = htonl(0);
challenge.__padding = 0;
- msg.msg_name = &conn->params.peer->srx.transport;
- msg.msg_namelen = conn->params.peer->srx.transport_len;
+ msg.msg_name = &conn->peer->srx.transport;
+ msg.msg_namelen = conn->peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
@@ -704,16 +704,15 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
serial = atomic_inc_return(&conn->serial);
whdr.serial = htonl(serial);
- _proto("Tx CHALLENGE %%%u", serial);
- ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
+ ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len);
if (ret < 0) {
trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
rxrpc_tx_point_rxkad_challenge);
return -EAGAIN;
}
- conn->params.peer->last_tx_at = ktime_get_seconds();
+ conn->peer->last_tx_at = ktime_get_seconds();
trace_rxrpc_tx_packet(conn->debug_id, &whdr,
rxrpc_tx_point_rxkad_challenge);
_leave(" = 0");
@@ -737,8 +736,8 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
_enter("");
- msg.msg_name = &conn->params.peer->srx.transport;
- msg.msg_namelen = conn->params.peer->srx.transport_len;
+ msg.msg_name = &conn->peer->srx.transport;
+ msg.msg_namelen = conn->peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
@@ -762,16 +761,15 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
serial = atomic_inc_return(&conn->serial);
whdr.serial = htonl(serial);
- _proto("Tx RESPONSE %%%u", serial);
- ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 3, len);
+ ret = kernel_sendmsg(conn->local->socket, &msg, iov, 3, len);
if (ret < 0) {
trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
rxrpc_tx_point_rxkad_response);
return -EAGAIN;
}
- conn->params.peer->last_tx_at = ktime_get_seconds();
+ conn->peer->last_tx_at = ktime_get_seconds();
_leave(" = 0");
return 0;
}
@@ -834,15 +832,15 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
u32 version, nonce, min_level, abort_code;
int ret;
- _enter("{%d,%x}", conn->debug_id, key_serial(conn->params.key));
+ _enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
eproto = tracepoint_string("chall_no_key");
abort_code = RX_PROTOCOL_ERROR;
- if (!conn->params.key)
+ if (!conn->key)
goto protocol_error;
abort_code = RXKADEXPIRED;
- ret = key_validate(conn->params.key);
+ ret = key_validate(conn->key);
if (ret < 0)
goto other_error;
@@ -856,8 +854,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
nonce = ntohl(challenge.nonce);
min_level = ntohl(challenge.min_level);
- _proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }",
- sp->hdr.serial, version, nonce, min_level);
+ trace_rxrpc_rx_challenge(conn, sp->hdr.serial, version, nonce, min_level);
eproto = tracepoint_string("chall_ver");
abort_code = RXKADINCONSISTENCY;
@@ -866,10 +863,10 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
abort_code = RXKADLEVELFAIL;
ret = -EACCES;
- if (conn->params.security_level < min_level)
+ if (conn->security_level < min_level)
goto other_error;
- token = conn->params.key->payload.data[0];
+ token = conn->key->payload.data[0];
/* build the response packet */
resp = kzalloc(sizeof(struct rxkad_response), GFP_NOFS);
@@ -881,7 +878,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
resp->encrypted.cid = htonl(conn->proto.cid);
resp->encrypted.securityIndex = htonl(conn->security_ix);
resp->encrypted.inc_nonce = htonl(nonce + 1);
- resp->encrypted.level = htonl(conn->params.security_level);
+ resp->encrypted.level = htonl(conn->security_level);
resp->kvno = htonl(token->kad->kvno);
resp->ticket_len = htonl(token->kad->ticket_len);
resp->encrypted.call_id[0] = htonl(conn->channels[0].call_counter);
@@ -1139,8 +1136,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
version = ntohl(response->version);
ticket_len = ntohl(response->ticket_len);
kvno = ntohl(response->kvno);
- _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }",
- sp->hdr.serial, version, kvno, ticket_len);
+
+ trace_rxrpc_rx_response(conn, sp->hdr.serial, version, kvno, ticket_len);
eproto = tracepoint_string("rxkad_rsp_ver");
abort_code = RXKADINCONSISTENCY;
@@ -1229,7 +1226,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
level = ntohl(response->encrypted.level);
if (level > RXRPC_SECURITY_ENCRYPT)
goto protocol_error_free;
- conn->params.security_level = level;
+ conn->security_level = level;
/* create a key to hold the security data and expiration time - after
* this the connection security can be handled in exactly the same way
diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c
new file mode 100644
index 000000000000..66f5eea291ff
--- /dev/null
+++ b/net/rxrpc/rxperf.c
@@ -0,0 +1,619 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* In-kernel rxperf server for testing purposes.
+ *
+ * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells ([email protected])
+ */
+
+#define pr_fmt(fmt) "rxperf: " fmt
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+
+MODULE_DESCRIPTION("rxperf test server (afs)");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+#define RXPERF_PORT 7009
+#define RX_PERF_SERVICE 147
+#define RX_PERF_VERSION 3
+#define RX_PERF_SEND 0
+#define RX_PERF_RECV 1
+#define RX_PERF_RPC 3
+#define RX_PERF_FILE 4
+#define RX_PERF_MAGIC_COOKIE 0x4711
+
+struct rxperf_proto_params {
+ __be32 version;
+ __be32 type;
+ __be32 rsize;
+ __be32 wsize;
+} __packed;
+
+static const u8 rxperf_magic_cookie[] = { 0x00, 0x00, 0x47, 0x11 };
+static const u8 secret[8] = { 0xa7, 0x83, 0x8a, 0xcb, 0xc7, 0x83, 0xec, 0x94 };
+
+enum rxperf_call_state {
+ RXPERF_CALL_SV_AWAIT_PARAMS, /* Server: Awaiting parameter block */
+ RXPERF_CALL_SV_AWAIT_REQUEST, /* Server: Awaiting request data */
+ RXPERF_CALL_SV_REPLYING, /* Server: Replying */
+ RXPERF_CALL_SV_AWAIT_ACK, /* Server: Awaiting final ACK */
+ RXPERF_CALL_COMPLETE, /* Completed or failed */
+};
+
+struct rxperf_call {
+ struct rxrpc_call *rxcall;
+ struct iov_iter iter;
+ struct kvec kvec[1];
+ struct work_struct work;
+ const char *type;
+ size_t iov_len;
+ size_t req_len; /* Size of request blob */
+ size_t reply_len; /* Size of reply blob */
+ unsigned int debug_id;
+ unsigned int operation_id;
+ struct rxperf_proto_params params;
+ __be32 tmp[2];
+ s32 abort_code;
+ enum rxperf_call_state state;
+ short error;
+ unsigned short unmarshal;
+ u16 service_id;
+ int (*deliver)(struct rxperf_call *call);
+ void (*processor)(struct work_struct *work);
+};
+
+static struct socket *rxperf_socket;
+static struct key *rxperf_sec_keyring; /* Ring of security/crypto keys */
+static struct workqueue_struct *rxperf_workqueue;
+
+static void rxperf_deliver_to_call(struct work_struct *work);
+static int rxperf_deliver_param_block(struct rxperf_call *call);
+static int rxperf_deliver_request(struct rxperf_call *call);
+static int rxperf_process_call(struct rxperf_call *call);
+static void rxperf_charge_preallocation(struct work_struct *work);
+
+static DECLARE_WORK(rxperf_charge_preallocation_work,
+ rxperf_charge_preallocation);
+
+static inline void rxperf_set_call_state(struct rxperf_call *call,
+ enum rxperf_call_state to)
+{
+ call->state = to;
+}
+
+static inline void rxperf_set_call_complete(struct rxperf_call *call,
+ int error, s32 remote_abort)
+{
+ if (call->state != RXPERF_CALL_COMPLETE) {
+ call->abort_code = remote_abort;
+ call->error = error;
+ call->state = RXPERF_CALL_COMPLETE;
+ }
+}
+
+static void rxperf_rx_discard_new_call(struct rxrpc_call *rxcall,
+ unsigned long user_call_ID)
+{
+ kfree((struct rxperf_call *)user_call_ID);
+}
+
+static void rxperf_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall,
+ unsigned long user_call_ID)
+{
+ queue_work(rxperf_workqueue, &rxperf_charge_preallocation_work);
+}
+
+static void rxperf_queue_call_work(struct rxperf_call *call)
+{
+ queue_work(rxperf_workqueue, &call->work);
+}
+
+static void rxperf_notify_rx(struct sock *sk, struct rxrpc_call *rxcall,
+ unsigned long call_user_ID)
+{
+ struct rxperf_call *call = (struct rxperf_call *)call_user_ID;
+
+ if (call->state != RXPERF_CALL_COMPLETE)
+ rxperf_queue_call_work(call);
+}
+
+static void rxperf_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID)
+{
+ struct rxperf_call *call = (struct rxperf_call *)user_call_ID;
+
+ call->rxcall = rxcall;
+}
+
+static void rxperf_notify_end_reply_tx(struct sock *sock,
+ struct rxrpc_call *rxcall,
+ unsigned long call_user_ID)
+{
+ rxperf_set_call_state((struct rxperf_call *)call_user_ID,
+ RXPERF_CALL_SV_AWAIT_ACK);
+}
+
+/*
+ * Charge the incoming call preallocation.
+ */
+static void rxperf_charge_preallocation(struct work_struct *work)
+{
+ struct rxperf_call *call;
+
+ for (;;) {
+ call = kzalloc(sizeof(*call), GFP_KERNEL);
+ if (!call)
+ break;
+
+ call->type = "unset";
+ call->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ call->deliver = rxperf_deliver_param_block;
+ call->state = RXPERF_CALL_SV_AWAIT_PARAMS;
+ call->service_id = RX_PERF_SERVICE;
+ call->iov_len = sizeof(call->params);
+ call->kvec[0].iov_len = sizeof(call->params);
+ call->kvec[0].iov_base = &call->params;
+ iov_iter_kvec(&call->iter, READ, call->kvec, 1, call->iov_len);
+ INIT_WORK(&call->work, rxperf_deliver_to_call);
+
+ if (rxrpc_kernel_charge_accept(rxperf_socket,
+ rxperf_notify_rx,
+ rxperf_rx_attach,
+ (unsigned long)call,
+ GFP_KERNEL,
+ call->debug_id) < 0)
+ break;
+ call = NULL;
+ }
+
+ kfree(call);
+}
+
+/*
+ * Open an rxrpc socket and bind it to be a server for callback notifications
+ * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT
+ */
+static int rxperf_open_socket(void)
+{
+ struct sockaddr_rxrpc srx;
+ struct socket *socket;
+ int ret;
+
+ ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET6,
+ &socket);
+ if (ret < 0)
+ goto error_1;
+
+ socket->sk->sk_allocation = GFP_NOFS;
+
+ /* bind the callback manager's address to make this a server socket */
+ memset(&srx, 0, sizeof(srx));
+ srx.srx_family = AF_RXRPC;
+ srx.srx_service = RX_PERF_SERVICE;
+ srx.transport_type = SOCK_DGRAM;
+ srx.transport_len = sizeof(srx.transport.sin6);
+ srx.transport.sin6.sin6_family = AF_INET6;
+ srx.transport.sin6.sin6_port = htons(RXPERF_PORT);
+
+ ret = rxrpc_sock_set_min_security_level(socket->sk,
+ RXRPC_SECURITY_ENCRYPT);
+ if (ret < 0)
+ goto error_2;
+
+ ret = rxrpc_sock_set_security_keyring(socket->sk, rxperf_sec_keyring);
+
+ ret = kernel_bind(socket, (struct sockaddr *)&srx, sizeof(srx));
+ if (ret < 0)
+ goto error_2;
+
+ rxrpc_kernel_new_call_notification(socket, rxperf_rx_new_call,
+ rxperf_rx_discard_new_call);
+
+ ret = kernel_listen(socket, INT_MAX);
+ if (ret < 0)
+ goto error_2;
+
+ rxperf_socket = socket;
+ rxperf_charge_preallocation(&rxperf_charge_preallocation_work);
+ return 0;
+
+error_2:
+ sock_release(socket);
+error_1:
+ pr_err("Can't set up rxperf socket: %d\n", ret);
+ return ret;
+}
+
+/*
+ * close the rxrpc socket rxperf was using
+ */
+static void rxperf_close_socket(void)
+{
+ kernel_listen(rxperf_socket, 0);
+ kernel_sock_shutdown(rxperf_socket, SHUT_RDWR);
+ flush_workqueue(rxperf_workqueue);
+ sock_release(rxperf_socket);
+}
+
+/*
+ * Log remote abort codes that indicate that we have a protocol disagreement
+ * with the server.
+ */
+static void rxperf_log_error(struct rxperf_call *call, s32 remote_abort)
+{
+ static int max = 0;
+ const char *msg;
+ int m;
+
+ switch (remote_abort) {
+ case RX_EOF: msg = "unexpected EOF"; break;
+ case RXGEN_CC_MARSHAL: msg = "client marshalling"; break;
+ case RXGEN_CC_UNMARSHAL: msg = "client unmarshalling"; break;
+ case RXGEN_SS_MARSHAL: msg = "server marshalling"; break;
+ case RXGEN_SS_UNMARSHAL: msg = "server unmarshalling"; break;
+ case RXGEN_DECODE: msg = "opcode decode"; break;
+ case RXGEN_SS_XDRFREE: msg = "server XDR cleanup"; break;
+ case RXGEN_CC_XDRFREE: msg = "client XDR cleanup"; break;
+ case -32: msg = "insufficient data"; break;
+ default:
+ return;
+ }
+
+ m = max;
+ if (m < 3) {
+ max = m + 1;
+ pr_info("Peer reported %s failure on %s\n", msg, call->type);
+ }
+}
+
+/*
+ * deliver messages to a call
+ */
+static void rxperf_deliver_to_call(struct work_struct *work)
+{
+ struct rxperf_call *call = container_of(work, struct rxperf_call, work);
+ enum rxperf_call_state state;
+ u32 abort_code, remote_abort = 0;
+ int ret;
+
+ if (call->state == RXPERF_CALL_COMPLETE)
+ return;
+
+ while (state = call->state,
+ state == RXPERF_CALL_SV_AWAIT_PARAMS ||
+ state == RXPERF_CALL_SV_AWAIT_REQUEST ||
+ state == RXPERF_CALL_SV_AWAIT_ACK
+ ) {
+ if (state == RXPERF_CALL_SV_AWAIT_ACK) {
+ if (!rxrpc_kernel_check_life(rxperf_socket, call->rxcall))
+ goto call_complete;
+ return;
+ }
+
+ ret = call->deliver(call);
+ if (ret == 0)
+ ret = rxperf_process_call(call);
+
+ switch (ret) {
+ case 0:
+ continue;
+ case -EINPROGRESS:
+ case -EAGAIN:
+ return;
+ case -ECONNABORTED:
+ rxperf_log_error(call, call->abort_code);
+ goto call_complete;
+ case -EOPNOTSUPP:
+ abort_code = RXGEN_OPCODE;
+ rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
+ abort_code, ret, "GOP");
+ goto call_complete;
+ case -ENOTSUPP:
+ abort_code = RX_USER_ABORT;
+ rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
+ abort_code, ret, "GUA");
+ goto call_complete;
+ case -EIO:
+ pr_err("Call %u in bad state %u\n",
+ call->debug_id, call->state);
+ fallthrough;
+ case -ENODATA:
+ case -EBADMSG:
+ case -EMSGSIZE:
+ case -ENOMEM:
+ case -EFAULT:
+ rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
+ RXGEN_SS_UNMARSHAL, ret, "GUM");
+ goto call_complete;
+ default:
+ rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
+ RX_CALL_DEAD, ret, "GER");
+ goto call_complete;
+ }
+ }
+
+call_complete:
+ rxperf_set_call_complete(call, ret, remote_abort);
+ /* The call may have been requeued */
+ rxrpc_kernel_end_call(rxperf_socket, call->rxcall);
+ cancel_work(&call->work);
+ kfree(call);
+}
+
+/*
+ * Extract a piece of data from the received data socket buffers.
+ */
+static int rxperf_extract_data(struct rxperf_call *call, bool want_more)
+{
+ u32 remote_abort = 0;
+ int ret;
+
+ ret = rxrpc_kernel_recv_data(rxperf_socket, call->rxcall, &call->iter,
+ &call->iov_len, want_more, &remote_abort,
+ &call->service_id);
+ pr_debug("Extract i=%zu l=%zu m=%u ret=%d\n",
+ iov_iter_count(&call->iter), call->iov_len, want_more, ret);
+ if (ret == 0 || ret == -EAGAIN)
+ return ret;
+
+ if (ret == 1) {
+ switch (call->state) {
+ case RXPERF_CALL_SV_AWAIT_REQUEST:
+ rxperf_set_call_state(call, RXPERF_CALL_SV_REPLYING);
+ break;
+ case RXPERF_CALL_COMPLETE:
+ pr_debug("premature completion %d", call->error);
+ return call->error;
+ default:
+ break;
+ }
+ return 0;
+ }
+
+ rxperf_set_call_complete(call, ret, remote_abort);
+ return ret;
+}
+
+/*
+ * Grab the operation ID from an incoming manager call.
+ */
+static int rxperf_deliver_param_block(struct rxperf_call *call)
+{
+ u32 version;
+ int ret;
+
+ /* Extract the parameter block */
+ ret = rxperf_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ version = ntohl(call->params.version);
+ call->operation_id = ntohl(call->params.type);
+ call->deliver = rxperf_deliver_request;
+
+ if (version != RX_PERF_VERSION) {
+ pr_info("Version mismatch %x\n", version);
+ return -ENOTSUPP;
+ }
+
+ switch (call->operation_id) {
+ case RX_PERF_SEND:
+ call->type = "send";
+ call->reply_len = 0;
+ call->iov_len = 4; /* Expect req size */
+ break;
+ case RX_PERF_RECV:
+ call->type = "recv";
+ call->req_len = 0;
+ call->iov_len = 4; /* Expect reply size */
+ break;
+ case RX_PERF_RPC:
+ call->type = "rpc";
+ call->iov_len = 8; /* Expect req size and reply size */
+ break;
+ case RX_PERF_FILE:
+ call->type = "file";
+ fallthrough;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ rxperf_set_call_state(call, RXPERF_CALL_SV_AWAIT_REQUEST);
+ return call->deliver(call);
+}
+
+/*
+ * Deliver the request data.
+ */
+static int rxperf_deliver_request(struct rxperf_call *call)
+{
+ int ret;
+
+ switch (call->unmarshal) {
+ case 0:
+ call->kvec[0].iov_len = call->iov_len;
+ call->kvec[0].iov_base = call->tmp;
+ iov_iter_kvec(&call->iter, READ, call->kvec, 1, call->iov_len);
+ call->unmarshal++;
+ fallthrough;
+ case 1:
+ ret = rxperf_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ switch (call->operation_id) {
+ case RX_PERF_SEND:
+ call->type = "send";
+ call->req_len = ntohl(call->tmp[0]);
+ call->reply_len = 0;
+ break;
+ case RX_PERF_RECV:
+ call->type = "recv";
+ call->req_len = 0;
+ call->reply_len = ntohl(call->tmp[0]);
+ break;
+ case RX_PERF_RPC:
+ call->type = "rpc";
+ call->req_len = ntohl(call->tmp[0]);
+ call->reply_len = ntohl(call->tmp[1]);
+ break;
+ default:
+ pr_info("Can't parse extra params\n");
+ return -EIO;
+ }
+
+ pr_debug("CALL op=%s rq=%zx rp=%zx\n",
+ call->type, call->req_len, call->reply_len);
+
+ call->iov_len = call->req_len;
+ iov_iter_discard(&call->iter, READ, call->req_len);
+ call->unmarshal++;
+ fallthrough;
+ case 2:
+ ret = rxperf_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+ call->unmarshal++;
+ fallthrough;
+ default:
+ return 0;
+ }
+}
+
+/*
+ * Process a call for which we've received the request.
+ */
+static int rxperf_process_call(struct rxperf_call *call)
+{
+ struct msghdr msg = {};
+ struct bio_vec bv[1];
+ struct kvec iov[1];
+ ssize_t n;
+ size_t reply_len = call->reply_len, len;
+
+ rxrpc_kernel_set_tx_length(rxperf_socket, call->rxcall,
+ reply_len + sizeof(rxperf_magic_cookie));
+
+ while (reply_len > 0) {
+ len = min_t(size_t, reply_len, PAGE_SIZE);
+ bv[0].bv_page = ZERO_PAGE(0);
+ bv[0].bv_offset = 0;
+ bv[0].bv_len = len;
+ iov_iter_bvec(&msg.msg_iter, WRITE, bv, 1, len);
+ msg.msg_flags = MSG_MORE;
+ n = rxrpc_kernel_send_data(rxperf_socket, call->rxcall, &msg,
+ len, rxperf_notify_end_reply_tx);
+ if (n < 0)
+ return n;
+ if (n == 0)
+ return -EIO;
+ reply_len -= n;
+ }
+
+ len = sizeof(rxperf_magic_cookie);
+ iov[0].iov_base = (void *)rxperf_magic_cookie;
+ iov[0].iov_len = len;
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
+ msg.msg_flags = 0;
+ n = rxrpc_kernel_send_data(rxperf_socket, call->rxcall, &msg, len,
+ rxperf_notify_end_reply_tx);
+ if (n >= 0)
+ return 0; /* Success */
+
+ if (n == -ENOMEM)
+ rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
+ RXGEN_SS_MARSHAL, -ENOMEM, "GOM");
+ return n;
+}
+
+/*
+ * Add a key to the security keyring.
+ */
+static int rxperf_add_key(struct key *keyring)
+{
+ key_ref_t kref;
+ int ret;
+
+ kref = key_create_or_update(make_key_ref(keyring, true),
+ "rxrpc_s",
+ __stringify(RX_PERF_SERVICE) ":2",
+ secret,
+ sizeof(secret),
+ KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH
+ | KEY_USR_VIEW,
+ KEY_ALLOC_NOT_IN_QUOTA);
+
+ if (IS_ERR(kref)) {
+ pr_err("Can't allocate rxperf server key: %ld\n", PTR_ERR(kref));
+ return PTR_ERR(kref);
+ }
+
+ ret = key_link(keyring, key_ref_to_ptr(kref));
+ if (ret < 0)
+ pr_err("Can't link rxperf server key: %d\n", ret);
+ key_ref_put(kref);
+ return ret;
+}
+
+/*
+ * Initialise the rxperf server.
+ */
+static int __init rxperf_init(void)
+{
+ struct key *keyring;
+ int ret = -ENOMEM;
+
+ pr_info("Server registering\n");
+
+ rxperf_workqueue = alloc_workqueue("rxperf", 0, 0);
+ if (!rxperf_workqueue)
+ goto error_workqueue;
+
+ keyring = keyring_alloc("rxperf_server",
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
+ KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH |
+ KEY_POS_WRITE |
+ KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH |
+ KEY_USR_WRITE |
+ KEY_OTH_VIEW | KEY_OTH_READ | KEY_OTH_SEARCH,
+ KEY_ALLOC_NOT_IN_QUOTA,
+ NULL, NULL);
+ if (IS_ERR(keyring)) {
+ pr_err("Can't allocate rxperf server keyring: %ld\n",
+ PTR_ERR(keyring));
+ goto error_keyring;
+ }
+ rxperf_sec_keyring = keyring;
+ ret = rxperf_add_key(keyring);
+ if (ret < 0)
+ goto error_key;
+
+ ret = rxperf_open_socket();
+ if (ret < 0)
+ goto error_socket;
+ return 0;
+
+error_socket:
+error_key:
+ key_put(rxperf_sec_keyring);
+error_keyring:
+ destroy_workqueue(rxperf_workqueue);
+ rcu_barrier();
+error_workqueue:
+ pr_err("Failed to register: %d\n", ret);
+ return ret;
+}
+late_initcall(rxperf_init); /* Must be called after net/ to create socket */
+
+static void __exit rxperf_exit(void)
+{
+ pr_info("Server unregistering.\n");
+
+ rxperf_close_socket();
+ key_put(rxperf_sec_keyring);
+ destroy_workqueue(rxperf_workqueue);
+ rcu_barrier();
+}
+module_exit(rxperf_exit);
+
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index 50cb5f1ee0c0..209f2c25a0da 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -63,13 +63,43 @@ const struct rxrpc_security *rxrpc_security_lookup(u8 security_index)
}
/*
+ * Initialise the security on a client call.
+ */
+int rxrpc_init_client_call_security(struct rxrpc_call *call)
+{
+ const struct rxrpc_security *sec;
+ struct rxrpc_key_token *token;
+ struct key *key = call->key;
+ int ret;
+
+ if (!key)
+ return 0;
+
+ ret = key_validate(key);
+ if (ret < 0)
+ return ret;
+
+ for (token = key->payload.data[0]; token; token = token->next) {
+ sec = rxrpc_security_lookup(token->security_index);
+ if (sec)
+ goto found;
+ }
+ return -EKEYREJECTED;
+
+found:
+ call->security = sec;
+ _leave(" = 0");
+ return 0;
+}
+
+/*
* initialise the security on a client connection
*/
int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
{
const struct rxrpc_security *sec;
struct rxrpc_key_token *token;
- struct key *key = conn->params.key;
+ struct key *key = conn->key;
int ret;
_enter("{%d},{%x}", conn->debug_id, key_serial(key));
@@ -163,7 +193,7 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn,
rcu_read_lock();
- rx = rcu_dereference(conn->params.local->service);
+ rx = rcu_dereference(conn->local->service);
if (!rx)
goto out;
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index e5fd8a95bf71..9fa7e37f7155 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -22,30 +22,9 @@
*/
static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win)
{
- unsigned int win_size;
- rxrpc_seq_t tx_win = smp_load_acquire(&call->acks_hard_ack);
-
- /* If we haven't transmitted anything for >1RTT, we should reset the
- * congestion management state.
- */
- if (ktime_before(ktime_add_us(call->tx_last_sent,
- call->peer->srtt_us >> 3),
- ktime_get_real())) {
- if (RXRPC_TX_SMSS > 2190)
- win_size = 2;
- else if (RXRPC_TX_SMSS > 1095)
- win_size = 3;
- else
- win_size = 4;
- win_size += call->cong_extra;
- } else {
- win_size = min_t(unsigned int, call->tx_winsize,
- call->cong_cwnd + call->cong_extra);
- }
-
if (_tx_win)
- *_tx_win = tx_win;
- return call->tx_top - tx_win < win_size;
+ *_tx_win = call->tx_bottom;
+ return call->tx_prepared - call->tx_bottom < 256;
}
/*
@@ -66,11 +45,6 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
if (signal_pending(current))
return sock_intr_errno(*timeo);
- if (READ_ONCE(call->acks_hard_ack) != call->tx_bottom) {
- rxrpc_shrink_call_tx_buffer(call);
- continue;
- }
-
trace_rxrpc_txqueue(call, rxrpc_txqueue_wait);
*timeo = schedule_timeout(*timeo);
}
@@ -107,11 +81,6 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx,
tx_win == tx_start && signal_pending(current))
return -EINTR;
- if (READ_ONCE(call->acks_hard_ack) != call->tx_bottom) {
- rxrpc_shrink_call_tx_buffer(call);
- continue;
- }
-
if (tx_win != tx_start) {
timeout = rtt;
tx_start = tx_win;
@@ -137,11 +106,6 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
if (call->state >= RXRPC_CALL_COMPLETE)
return call->error;
- if (READ_ONCE(call->acks_hard_ack) != call->tx_bottom) {
- rxrpc_shrink_call_tx_buffer(call);
- continue;
- }
-
trace_rxrpc_txqueue(call, rxrpc_txqueue_wait);
*timeo = schedule_timeout(*timeo);
}
@@ -206,33 +170,32 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
{
unsigned long now;
rxrpc_seq_t seq = txb->seq;
- bool last = test_bit(RXRPC_TXBUF_LAST, &txb->flags);
- int ret;
+ bool last = test_bit(RXRPC_TXBUF_LAST, &txb->flags), poke;
rxrpc_inc_stat(call->rxnet, stat_tx_data);
- ASSERTCMP(seq, ==, call->tx_top + 1);
+ ASSERTCMP(txb->seq, ==, call->tx_prepared + 1);
/* We have to set the timestamp before queueing as the retransmit
* algorithm can see the packet as soon as we queue it.
*/
txb->last_sent = ktime_get_real();
- /* Add the packet to the call's output buffer */
- rxrpc_get_txbuf(txb, rxrpc_txbuf_get_buffer);
- spin_lock(&call->tx_lock);
- list_add_tail(&txb->call_link, &call->tx_buffer);
- call->tx_top = seq;
- spin_unlock(&call->tx_lock);
-
if (last)
trace_rxrpc_txqueue(call, rxrpc_txqueue_queue_last);
else
trace_rxrpc_txqueue(call, rxrpc_txqueue_queue);
+ /* Add the packet to the call's output buffer */
+ spin_lock(&call->tx_lock);
+ poke = list_empty(&call->tx_sendmsg);
+ list_add_tail(&txb->call_link, &call->tx_sendmsg);
+ call->tx_prepared = seq;
+ spin_unlock(&call->tx_lock);
+
if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
_debug("________awaiting reply/ACK__________");
- write_lock_bh(&call->state_lock);
+ write_lock(&call->state_lock);
switch (call->state) {
case RXRPC_CALL_CLIENT_SEND_REQUEST:
call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
@@ -255,33 +218,11 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
default:
break;
}
- write_unlock_bh(&call->state_lock);
+ write_unlock(&call->state_lock);
}
- if (seq == 1 && rxrpc_is_client_call(call))
- rxrpc_expose_client_call(call);
-
- ret = rxrpc_send_data_packet(call, txb);
- if (ret < 0) {
- switch (ret) {
- case -ENETUNREACH:
- case -EHOSTUNREACH:
- case -ECONNREFUSED:
- rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
- 0, ret);
- goto out;
- }
- } else {
- unsigned long now = jiffies;
- unsigned long resend_at = now + call->peer->rto_j;
-
- WRITE_ONCE(call->resend_at, resend_at);
- rxrpc_reduce_call_timer(call, resend_at, now,
- rxrpc_timer_set_for_send);
- }
-
-out:
- rxrpc_put_txbuf(txb, rxrpc_txbuf_put_trans);
+ if (poke)
+ rxrpc_poke_call(call, rxrpc_call_poke_start);
}
/*
@@ -335,8 +276,6 @@ reload:
rxrpc_see_txbuf(txb, rxrpc_txbuf_see_send_more);
do {
- rxrpc_transmit_ack_packets(call->peer->local);
-
if (!txb) {
size_t remain, bufsize, chunk, offset;
@@ -416,10 +355,10 @@ reload:
success:
ret = copied;
if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) {
- read_lock_bh(&call->state_lock);
+ read_lock(&call->state_lock);
if (call->error < 0)
ret = call->error;
- read_unlock_bh(&call->state_lock);
+ read_unlock(&call->state_lock);
}
out:
call->tx_pending = txb;
@@ -604,7 +543,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
atomic_inc_return(&rxrpc_debug_id));
/* The socket is now unlocked */
- rxrpc_put_peer(cp.peer);
+ rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp);
_leave(" = %p\n", call);
return call;
}
@@ -667,7 +606,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
case RXRPC_CALL_CLIENT_AWAIT_CONN:
case RXRPC_CALL_SERVER_PREALLOC:
case RXRPC_CALL_SERVER_SECURING:
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_put_call(call, rxrpc_call_put_sendmsg);
ret = -EBUSY;
goto error_release_sock;
default:
@@ -737,7 +676,7 @@ out_put_unlock:
if (!dropped_lock)
mutex_unlock(&call->user_mutex);
error_put:
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_put_call(call, rxrpc_call_put_sendmsg);
_leave(" = %d", ret);
return ret;
@@ -784,9 +723,9 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
notify_end_tx, &dropped_lock);
break;
case RXRPC_CALL_COMPLETE:
- read_lock_bh(&call->state_lock);
+ read_lock(&call->state_lock);
ret = call->error;
- read_unlock_bh(&call->state_lock);
+ read_unlock(&call->state_lock);
break;
default:
/* Request phase complete for this client call */
diff --git a/net/rxrpc/server_key.c b/net/rxrpc/server_key.c
index ee269e0e6ee8..e51940589ee5 100644
--- a/net/rxrpc/server_key.c
+++ b/net/rxrpc/server_key.c
@@ -144,3 +144,28 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, sockptr_t optval, int optlen)
_leave(" = 0 [key %x]", key->serial);
return 0;
}
+
+/**
+ * rxrpc_sock_set_security_keyring - Set the security keyring for a kernel service
+ * @sk: The socket to set the keyring on
+ * @keyring: The keyring to set
+ *
+ * Set the server security keyring on an rxrpc socket. This is used to provide
+ * the encryption keys for a kernel service.
+ */
+int rxrpc_sock_set_security_keyring(struct sock *sk, struct key *keyring)
+{
+ struct rxrpc_sock *rx = rxrpc_sk(sk);
+ int ret = 0;
+
+ lock_sock(sk);
+ if (rx->securities)
+ ret = -EINVAL;
+ else if (rx->sk.sk_state != RXRPC_UNBOUND)
+ ret = -EISCONN;
+ else
+ rx->securities = key_get(keyring);
+ release_sock(sk);
+ return ret;
+}
+EXPORT_SYMBOL(rxrpc_sock_set_security_keyring);
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index 0c827d5bb2b8..ebe0c75e7b07 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/* ar-skbuff.c: socket buffer destruction handling
+/* Socket buffer accounting
*
* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
* Written by David Howells ([email protected])
@@ -19,56 +19,50 @@
/*
* Note the allocation or reception of a socket buffer.
*/
-void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
+void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)
{
- const void *here = __builtin_return_address(0);
int n = atomic_inc_return(select_skb_count(skb));
- trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
+ trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why);
}
/*
* Note the re-emergence of a socket buffer from a queue or buffer.
*/
-void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
+void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)
{
- const void *here = __builtin_return_address(0);
if (skb) {
int n = atomic_read(select_skb_count(skb));
- trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
+ trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why);
}
}
/*
* Note the addition of a ref on a socket buffer.
*/
-void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
+void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)
{
- const void *here = __builtin_return_address(0);
int n = atomic_inc_return(select_skb_count(skb));
- trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
+ trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why);
skb_get(skb);
}
/*
* Note the dropping of a ref on a socket buffer by the core.
*/
-void rxrpc_eaten_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
+void rxrpc_eaten_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)
{
- const void *here = __builtin_return_address(0);
int n = atomic_inc_return(&rxrpc_n_rx_skbs);
- trace_rxrpc_skb(skb, op, 0, n, here);
+ trace_rxrpc_skb(skb, 0, n, why);
}
/*
* Note the destruction of a socket buffer.
*/
-void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
+void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)
{
- const void *here = __builtin_return_address(0);
if (skb) {
- int n;
- n = atomic_dec_return(select_skb_count(skb));
- trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
+ int n = atomic_dec_return(select_skb_count(skb));
+ trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why);
kfree_skb(skb);
}
}
@@ -78,12 +72,12 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
*/
void rxrpc_purge_queue(struct sk_buff_head *list)
{
- const void *here = __builtin_return_address(0);
struct sk_buff *skb;
+
while ((skb = skb_dequeue((list))) != NULL) {
int n = atomic_dec_return(select_skb_count(skb));
- trace_rxrpc_skb(skb, rxrpc_skb_purged,
- refcount_read(&skb->users), n, here);
+ trace_rxrpc_skb(skb, refcount_read(&skb->users), n,
+ rxrpc_skb_put_purge);
kfree_skb(skb);
}
}
diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c
index 96bfee89927b..d2cf2aac3adb 100644
--- a/net/rxrpc/txbuf.c
+++ b/net/rxrpc/txbuf.c
@@ -26,7 +26,6 @@ struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type,
INIT_LIST_HEAD(&txb->call_link);
INIT_LIST_HEAD(&txb->tx_link);
refcount_set(&txb->ref, 1);
- txb->call = call;
txb->call_debug_id = call->debug_id;
txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids);
txb->space = sizeof(txb->data);
@@ -34,7 +33,7 @@ struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type,
txb->offset = 0;
txb->flags = 0;
txb->ack_why = 0;
- txb->seq = call->tx_top + 1;
+ txb->seq = call->tx_prepared + 1;
txb->wire.epoch = htonl(call->conn->proto.epoch);
txb->wire.cid = htonl(call->cid);
txb->wire.callNumber = htonl(call->call_id);
@@ -44,7 +43,7 @@ struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type,
txb->wire.userStatus = 0;
txb->wire.securityIndex = call->security_ix;
txb->wire._rsvd = 0;
- txb->wire.serviceId = htons(call->service_id);
+ txb->wire.serviceId = htons(call->dest_srx.srx_service);
trace_rxrpc_txbuf(txb->debug_id,
txb->call_debug_id, txb->seq, 1,
@@ -107,6 +106,7 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call)
{
struct rxrpc_txbuf *txb;
rxrpc_seq_t hard_ack = smp_load_acquire(&call->acks_hard_ack);
+ bool wake = false;
_enter("%x/%x/%x", call->tx_bottom, call->acks_hard_ack, call->tx_top);
@@ -120,8 +120,10 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call)
if (before(hard_ack, txb->seq))
break;
+ if (txb->seq != call->tx_bottom + 1)
+ rxrpc_see_txbuf(txb, rxrpc_txbuf_see_out_of_step);
ASSERTCMP(txb->seq, ==, call->tx_bottom + 1);
- call->tx_bottom++;
+ smp_store_release(&call->tx_bottom, call->tx_bottom + 1);
list_del_rcu(&txb->call_link);
trace_rxrpc_txqueue(call, rxrpc_txqueue_dequeue);
@@ -129,7 +131,12 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call)
spin_unlock(&call->tx_lock);
rxrpc_put_txbuf(txb, rxrpc_txbuf_put_rotated);
+ if (after(call->acks_hard_ack, call->tx_bottom + 128))
+ wake = true;
}
spin_unlock(&call->tx_lock);
+
+ if (wake)
+ wake_up(&call->waitq);
}
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9b31a10cc639..5b3c0ac495be 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -23,6 +23,7 @@
#include <net/act_api.h>
#include <net/netlink.h>
#include <net/flow_offload.h>
+#include <net/tc_wrapper.h>
#ifdef CONFIG_INET
DEFINE_STATIC_KEY_FALSE(tcf_frag_xmit_count);
@@ -1080,7 +1081,7 @@ restart_act_graph:
repeat_ttl = 32;
repeat:
- ret = a->ops->act(skb, a, res);
+ ret = tc_act(skb, a, res);
if (unlikely(ret == TC_ACT_REPEAT)) {
if (--repeat_ttl != 0)
goto repeat;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index b79eee44e24e..b0455fda7d0b 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -18,6 +18,7 @@
#include <linux/tc_act/tc_bpf.h>
#include <net/tc_act/tc_bpf.h>
+#include <net/tc_wrapper.h>
#define ACT_BPF_NAME_LEN 256
@@ -31,8 +32,9 @@ struct tcf_bpf_cfg {
static struct tc_action_ops act_bpf_ops;
-static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_bpf_act(struct sk_buff *skb,
+ const struct tc_action *act,
+ struct tcf_result *res)
{
bool at_ingress = skb_at_tc_ingress(skb);
struct tcf_bpf *prog = to_bpf(act);
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index d41002e4613f..7e63ff7e3ed7 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -20,6 +20,7 @@
#include <net/pkt_cls.h>
#include <uapi/linux/tc_act/tc_connmark.h>
#include <net/tc_act/tc_connmark.h>
+#include <net/tc_wrapper.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -27,8 +28,9 @@
static struct tc_action_ops act_connmark_ops;
-static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_connmark_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
const struct nf_conntrack_tuple_hash *thash;
struct nf_conntrack_tuple tuple;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 1366adf9b909..95e9304024b7 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -32,6 +32,7 @@
#include <linux/tc_act/tc_csum.h>
#include <net/tc_act/tc_csum.h>
+#include <net/tc_wrapper.h>
static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
[TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
@@ -563,8 +564,9 @@ fail:
return 0;
}
-static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_csum_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_csum *p = to_tcf_csum(a);
bool orig_vlan_tag_present = false;
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index dd5ae7551956..f6df0168c91f 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -24,6 +24,7 @@
#include <net/ipv6_frag.h>
#include <uapi/linux/tc_act/tc_ct.h>
#include <net/tc_act/tc_ct.h>
+#include <net/tc_wrapper.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_conntrack.h>
@@ -1038,8 +1039,8 @@ static int tcf_ct_act_nat(struct sk_buff *skb,
#endif
}
-static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
{
struct net *net = dev_net(skb->dev);
enum ip_conntrack_info ctinfo;
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index eaa02f098d1c..4b1b59da5c0b 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -18,6 +18,7 @@
#include <net/pkt_cls.h>
#include <uapi/linux/tc_act/tc_ctinfo.h>
#include <net/tc_act/tc_ctinfo.h>
+#include <net/tc_wrapper.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -75,8 +76,9 @@ static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
skb->mark = READ_ONCE(ct->mark) & cp->cpmarkmask;
}
-static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_ctinfo_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
const struct nf_conntrack_tuple_hash *thash = NULL;
struct tcf_ctinfo *ca = to_ctinfo(a);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 62d682b96b88..54f1b13b2360 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -18,6 +18,7 @@
#include <net/pkt_cls.h>
#include <linux/tc_act/tc_gact.h>
#include <net/tc_act/tc_gact.h>
+#include <net/tc_wrapper.h>
static struct tc_action_ops act_gact_ops;
@@ -145,8 +146,9 @@ release_idr:
return err;
}
-static int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_gact_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_gact *gact = to_gact(a);
int action = READ_ONCE(gact->tcf_action);
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index 3049878e7315..9b8def0be41e 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -14,6 +14,7 @@
#include <net/netlink.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_gate.h>
+#include <net/tc_wrapper.h>
static struct tc_action_ops act_gate_ops;
@@ -113,8 +114,9 @@ static enum hrtimer_restart gate_timer_func(struct hrtimer *timer)
return HRTIMER_RESTART;
}
-static int tcf_gate_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_gate_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_gate *gact = to_gate(a);
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 41d63b33461d..bc7611b0744c 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -29,6 +29,7 @@
#include <net/tc_act/tc_ife.h>
#include <linux/etherdevice.h>
#include <net/ife.h>
+#include <net/tc_wrapper.h>
static int max_metacnt = IFE_META_MAX + 1;
static struct tc_action_ops act_ife_ops;
@@ -861,8 +862,9 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
return action;
}
-static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_ife_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_ife_info *ife = to_ife(a);
struct tcf_ife_params *p;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 1625e1037416..5d96ffebd40f 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -20,6 +20,7 @@
#include <net/pkt_sched.h>
#include <linux/tc_act/tc_ipt.h>
#include <net/tc_act/tc_ipt.h>
+#include <net/tc_wrapper.h>
#include <linux/netfilter_ipv4/ip_tables.h>
@@ -216,8 +217,9 @@ static int tcf_xt_init(struct net *net, struct nlattr *nla,
a, &act_xt_ops, tp, flags);
}
-static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
int ret = 0, result = 0;
struct tcf_ipt *ipt = to_ipt(a);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index b8ad6ae282c0..7284bcea7b0b 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -24,6 +24,7 @@
#include <net/pkt_cls.h>
#include <linux/tc_act/tc_mirred.h>
#include <net/tc_act/tc_mirred.h>
+#include <net/tc_wrapper.h>
static LIST_HEAD(mirred_list);
static DEFINE_SPINLOCK(mirred_list_lock);
@@ -217,8 +218,9 @@ static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
return err;
}
-static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_mirred *m = to_mirred(a);
struct sk_buff *skb2 = skb;
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index 8ad25cc8ccd5..ff47ce4d3968 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -14,6 +14,7 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_mpls.h>
+#include <net/tc_wrapper.h>
static struct tc_action_ops act_mpls_ops;
@@ -49,8 +50,9 @@ static __be32 tcf_mpls_get_lse(struct mpls_shim_hdr *lse,
return cpu_to_be32(new_lse);
}
-static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_mpls_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_mpls *m = to_mpls(a);
struct tcf_mpls_params *p;
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 9265145f1040..74c74be33048 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -24,7 +24,7 @@
#include <net/tc_act/tc_nat.h>
#include <net/tcp.h>
#include <net/udp.h>
-
+#include <net/tc_wrapper.h>
static struct tc_action_ops act_nat_ops;
@@ -98,8 +98,9 @@ release_idr:
return err;
}
-static int tcf_nat_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_nat_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_nat *p = to_tcf_nat(a);
struct iphdr *iph;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 94ed5857ce67..a0378e9f0121 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -20,6 +20,7 @@
#include <net/tc_act/tc_pedit.h>
#include <uapi/linux/tc_act/tc_pedit.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
static struct tc_action_ops act_pedit_ops;
@@ -319,8 +320,9 @@ static int pedit_skb_hdr_offset(struct sk_buff *skb,
return ret;
}
-static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_pedit *p = to_pedit(a);
u32 max_offset;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 0adb26e366a7..227cba58ce9f 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -19,6 +19,7 @@
#include <net/netlink.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_police.h>
+#include <net/tc_wrapper.h>
/* Each policer is serialized by its individual spinlock */
@@ -242,8 +243,9 @@ static bool tcf_police_mtu_check(struct sk_buff *skb, u32 limit)
return len <= limit;
}
-static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_police_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_police *police = to_police(a);
s64 now, toks, ppstoks = 0, ptoks = 0;
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 7a25477f5d99..98dea08c1764 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -20,6 +20,7 @@
#include <net/tc_act/tc_sample.h>
#include <net/psample.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
#include <linux/if_arp.h>
@@ -153,8 +154,9 @@ static bool tcf_sample_dev_ok_push(struct net_device *dev)
}
}
-static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_sample_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_sample *s = to_sample(a);
struct psample_group *psample_group;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 18d376135461..4b84514534f3 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -14,6 +14,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
#include <linux/tc_act/tc_defact.h>
#include <net/tc_act/tc_defact.h>
@@ -21,8 +22,9 @@
static struct tc_action_ops act_simp_ops;
#define SIMP_MAX_DATA 32
-static int tcf_simp_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_simp_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_defact *d = to_defact(a);
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 1710780c908a..ce7008cf291c 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -16,6 +16,7 @@
#include <net/ipv6.h>
#include <net/dsfield.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
#include <linux/tc_act/tc_skbedit.h>
#include <net/tc_act/tc_skbedit.h>
@@ -36,8 +37,9 @@ static u16 tcf_skbedit_hash(struct tcf_skbedit_params *params,
return netdev_cap_txqueue(skb->dev, queue_mapping);
}
-static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_skbedit_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_skbedit *d = to_skbedit(a);
struct tcf_skbedit_params *params;
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index d98758a63934..dffa990a9629 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -15,14 +15,16 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
#include <linux/tc_act/tc_skbmod.h>
#include <net/tc_act/tc_skbmod.h>
static struct tc_action_ops act_skbmod_ops;
-static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_skbmod_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_skbmod *d = to_skbmod(a);
int action, max_edit_len, err;
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 2691a3d8e451..2d12d2626415 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -16,14 +16,16 @@
#include <net/pkt_sched.h>
#include <net/dst.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
#include <linux/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_tunnel_key.h>
static struct tc_action_ops act_tunnel_key_ops;
-static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tunnel_key_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_tunnel_key *t = to_tunnel_key(a);
struct tcf_tunnel_key_params *params;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 7b24e898a3e6..0251442f5f29 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -12,14 +12,16 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
#include <linux/tc_act/tc_vlan.h>
#include <net/tc_act/tc_vlan.h>
static struct tc_action_ops act_vlan_ops;
-static int tcf_vlan_act(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcf_vlan_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
{
struct tcf_vlan *v = to_vlan(a);
struct tcf_vlan_params *p;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 23d1cfa4f58c..668130f08903 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -40,6 +40,7 @@
#include <net/tc_act/tc_mpls.h>
#include <net/tc_act/tc_gate.h>
#include <net/flow_offload.h>
+#include <net/tc_wrapper.h>
extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
@@ -1564,7 +1565,7 @@ reclassify:
tp->protocol != htons(ETH_P_ALL))
continue;
- err = tp->classify(skb, tp, res);
+ err = tc_classify(skb, tp, res);
#ifdef CONFIG_NET_CLS_ACT
if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
first_tp = orig_tp;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index d229ce99e554..1b92c33b5f81 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -18,6 +18,7 @@
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
struct basic_head {
struct list_head flist;
@@ -36,8 +37,9 @@ struct basic_filter {
struct rcu_work rwork;
};
-static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int basic_classify(struct sk_buff *skb,
+ const struct tcf_proto *tp,
+ struct tcf_result *res)
{
int r;
struct basic_head *head = rcu_dereference_bh(tp->root);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index bc317b3eac12..466c26df853a 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -19,6 +19,7 @@
#include <net/rtnetlink.h>
#include <net/pkt_cls.h>
#include <net/sock.h>
+#include <net/tc_wrapper.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Daniel Borkmann <[email protected]>");
@@ -77,8 +78,9 @@ static int cls_bpf_exec_opcode(int code)
}
}
-static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int cls_bpf_classify(struct sk_buff *skb,
+ const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct cls_bpf_head *head = rcu_dereference_bh(tp->root);
bool at_ingress = skb_at_tc_ingress(skb);
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index ed00001b528a..bd9322d71910 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -13,6 +13,7 @@
#include <net/pkt_cls.h>
#include <net/sock.h>
#include <net/cls_cgroup.h>
+#include <net/tc_wrapper.h>
struct cls_cgroup_head {
u32 handle;
@@ -22,8 +23,9 @@ struct cls_cgroup_head {
struct rcu_work rwork;
};
-static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int cls_cgroup_classify(struct sk_buff *skb,
+ const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct cls_cgroup_head *head = rcu_dereference_bh(tp->root);
u32 classid = task_get_classid(skb);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 014cd3de7b5d..535668e1f748 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -24,6 +24,7 @@
#include <net/ip.h>
#include <net/route.h>
#include <net/flow_dissector.h>
+#include <net/tc_wrapper.h>
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h>
@@ -292,8 +293,9 @@ static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow)
(1 << FLOW_KEY_NFCT_PROTO_SRC) | \
(1 << FLOW_KEY_NFCT_PROTO_DST))
-static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int flow_classify(struct sk_buff *skb,
+ const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct flow_head *head = rcu_dereference_bh(tp->root);
struct flow_filter *f;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 25bc57ee6ea1..0b15698b3531 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -27,6 +27,7 @@
#include <net/vxlan.h>
#include <net/erspan.h>
#include <net/gtp.h>
+#include <net/tc_wrapper.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
@@ -305,8 +306,9 @@ static u16 fl_ct_info_to_flower_map[] = {
TCA_FLOWER_KEY_CT_FLAGS_NEW,
};
-static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int fl_classify(struct sk_buff *skb,
+ const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct cls_fl_head *head = rcu_dereference_bh(tp->root);
bool post_ct = tc_skb_cb(skb)->post_ct;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index a32351da968c..ae9439a6c56c 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -21,6 +21,7 @@
#include <net/act_api.h>
#include <net/pkt_cls.h>
#include <net/sch_generic.h>
+#include <net/tc_wrapper.h>
#define HTSIZE 256
@@ -47,8 +48,9 @@ static u32 fw_hash(u32 handle)
return handle % HTSIZE;
}
-static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int fw_classify(struct sk_buff *skb,
+ const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct fw_head *head = rcu_dereference_bh(tp->root);
struct fw_filter *f;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 39a5d9c170de..705f63da2c21 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -12,6 +12,7 @@
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
struct cls_mall_head {
struct tcf_exts exts;
@@ -24,8 +25,9 @@ struct cls_mall_head {
bool deleting;
};
-static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int mall_classify(struct sk_buff *skb,
+ const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct cls_mall_head *head = rcu_dereference_bh(tp->root);
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 9e43b929d4ca..d0c53724d3e8 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -17,6 +17,7 @@
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
/*
* 1. For now we assume that route tags < 256.
@@ -121,8 +122,9 @@ static inline int route4_hash_wild(void)
return 0; \
}
-static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int route4_classify(struct sk_buff *skb,
+ const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct route4_head *head = rcu_dereference_bh(tp->root);
struct dst_entry *dst;
diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c
index de1c1d4da597..03d8619bd9c6 100644
--- a/net/sched/cls_rsvp.c
+++ b/net/sched/cls_rsvp.c
@@ -15,10 +15,12 @@
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
#define RSVP_DST_LEN 1
#define RSVP_ID "rsvp"
#define RSVP_OPS cls_rsvp_ops
+#define RSVP_CLS rsvp_classify
#include "cls_rsvp.h"
MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index b00a7dbd0587..869efba9f834 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -124,8 +124,8 @@ static inline unsigned int hash_src(__be32 *src)
return r; \
}
-static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int RSVP_CLS(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct rsvp_head *head = rcu_dereference_bh(tp->root);
struct rsvp_session *s;
@@ -738,7 +738,7 @@ static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
static struct tcf_proto_ops RSVP_OPS __read_mostly = {
.kind = RSVP_ID,
- .classify = rsvp_classify,
+ .classify = RSVP_CLS,
.init = rsvp_init,
.destroy = rsvp_destroy,
.get = rsvp_get,
diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c
index 64078846000e..e627cc32d633 100644
--- a/net/sched/cls_rsvp6.c
+++ b/net/sched/cls_rsvp6.c
@@ -15,10 +15,12 @@
#include <net/act_api.h>
#include <net/pkt_cls.h>
#include <net/netlink.h>
+#include <net/tc_wrapper.h>
#define RSVP_DST_LEN 4
#define RSVP_ID "rsvp6"
#define RSVP_OPS cls_rsvp6_ops
+#define RSVP_CLS rsvp6_classify
#include "cls_rsvp.h"
MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 1c9eeb98d826..eb0e9458e722 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -16,6 +16,7 @@
#include <net/netlink.h>
#include <net/pkt_cls.h>
#include <net/sch_generic.h>
+#include <net/tc_wrapper.h>
/*
* Passing parameters to the root seems to be done more awkwardly than really
@@ -98,9 +99,9 @@ static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p,
return NULL;
}
-
-static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int tcindex_classify(struct sk_buff *skb,
+ const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct tcindex_data *p = rcu_dereference_bh(tp->root);
struct tcindex_filter_result *f;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 34d25f7a0687..4e2e269f121f 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -39,6 +39,7 @@
#include <net/act_api.h>
#include <net/pkt_cls.h>
#include <linux/idr.h>
+#include <net/tc_wrapper.h>
struct tc_u_knode {
struct tc_u_knode __rcu *next;
@@ -100,8 +101,9 @@ static inline unsigned int u32_hash_fold(__be32 key,
return h;
}
-static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+TC_INDIRECT_SCOPE int u32_classify(struct sk_buff *skb,
+ const struct tcf_proto *tp,
+ struct tcf_result *res)
{
struct {
struct tc_u_knode *knode;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 4a27dfb1ba0f..2317db02c764 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -31,6 +31,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include <net/tc_wrapper.h>
#include <trace/events/qdisc.h>
@@ -2273,6 +2274,8 @@ static struct pernet_operations psched_net_ops = {
.exit = psched_net_exit,
};
+DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
+
static int __init pktsched_init(void)
{
int err;
@@ -2300,6 +2303,8 @@ static int __init pktsched_init(void)
rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
0);
+ tc_wrapper_init();
+
return 0;
}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index e260c0d557f5..b3ce24823f50 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2224,7 +2224,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
if (tipc_own_addr(l->net) > msg_prevnode(hdr))
l->net_plane = msg_net_plane(hdr);
- skb_linearize(skb);
+ if (skb_linearize(skb))
+ goto exit;
+
hdr = buf_msg(skb);
data = msg_data(hdr);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index b48d97cbbe29..49ddc484c4fe 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1689,6 +1689,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
struct tipc_node *n;
struct sk_buff_head xmitq;
bool node_up = false;
+ struct net *peer_net;
int bearer_id;
int rc;
@@ -1705,18 +1706,23 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
return -EHOSTUNREACH;
}
+ rcu_read_lock();
tipc_node_read_lock(n);
node_up = node_is_up(n);
- if (node_up && n->peer_net && check_net(n->peer_net)) {
+ peer_net = n->peer_net;
+ tipc_node_read_unlock(n);
+ if (node_up && peer_net && check_net(peer_net)) {
/* xmit inner linux container */
- tipc_lxc_xmit(n->peer_net, list);
+ tipc_lxc_xmit(peer_net, list);
if (likely(skb_queue_empty(list))) {
- tipc_node_read_unlock(n);
+ rcu_read_unlock();
tipc_node_put(n);
return 0;
}
}
+ rcu_read_unlock();
+ tipc_node_read_lock(n);
bearer_id = n->active_links[selector & 1];
if (unlikely(bearer_id == INVALID_BEARER_ID)) {
tipc_node_read_unlock(n);
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 105f522a89fe..616b55c5b890 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -114,14 +114,16 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
return nla_put(nlskb, UNIX_DIAG_RQLEN, sizeof(rql), &rql);
}
-static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb)
+static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb,
+ struct user_namespace *user_ns)
{
- uid_t uid = from_kuid_munged(sk_user_ns(nlskb->sk), sock_i_uid(sk));
+ uid_t uid = from_kuid_munged(user_ns, sock_i_uid(sk));
return nla_put(nlskb, UNIX_DIAG_UID, sizeof(uid_t), &uid);
}
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
- u32 portid, u32 seq, u32 flags, int sk_ino)
+ struct user_namespace *user_ns,
+ u32 portid, u32 seq, u32 flags, int sk_ino)
{
struct nlmsghdr *nlh;
struct unix_diag_msg *rep;
@@ -167,7 +169,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
goto out_nlmsg_trim;
if ((req->udiag_show & UDIAG_SHOW_UID) &&
- sk_diag_dump_uid(sk, skb))
+ sk_diag_dump_uid(sk, skb, user_ns))
goto out_nlmsg_trim;
nlmsg_end(skb, nlh);
@@ -179,7 +181,8 @@ out_nlmsg_trim:
}
static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
- u32 portid, u32 seq, u32 flags)
+ struct user_namespace *user_ns,
+ u32 portid, u32 seq, u32 flags)
{
int sk_ino;
@@ -190,7 +193,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
if (!sk_ino)
return 0;
- return sk_diag_fill(sk, skb, req, portid, seq, flags, sk_ino);
+ return sk_diag_fill(sk, skb, req, user_ns, portid, seq, flags, sk_ino);
}
static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -214,7 +217,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
goto next;
if (!(req->udiag_states & (1 << sk->sk_state)))
goto next;
- if (sk_diag_dump(sk, skb, req,
+ if (sk_diag_dump(sk, skb, req, sk_user_ns(skb->sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI) < 0) {
@@ -282,7 +285,8 @@ again:
if (!rep)
goto out;
- err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).portid,
+ err = sk_diag_fill(sk, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk),
+ NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, 0, req->udiag_ino);
if (err < 0) {
nlmsg_free(rep);
diff --git a/sound/firewire/dice/dice-stream.c b/sound/firewire/dice/dice-stream.c
index f99e00083141..4c677c8546c7 100644
--- a/sound/firewire/dice/dice-stream.c
+++ b/sound/firewire/dice/dice-stream.c
@@ -59,7 +59,7 @@ int snd_dice_stream_get_rate_mode(struct snd_dice *dice, unsigned int rate,
static int select_clock(struct snd_dice *dice, unsigned int rate)
{
- __be32 reg;
+ __be32 reg, new;
u32 data;
int i;
int err;
@@ -83,15 +83,17 @@ static int select_clock(struct snd_dice *dice, unsigned int rate)
if (completion_done(&dice->clock_accepted))
reinit_completion(&dice->clock_accepted);
- reg = cpu_to_be32(data);
+ new = cpu_to_be32(data);
err = snd_dice_transaction_write_global(dice, GLOBAL_CLOCK_SELECT,
- &reg, sizeof(reg));
+ &new, sizeof(new));
if (err < 0)
return err;
if (wait_for_completion_timeout(&dice->clock_accepted,
- msecs_to_jiffies(NOTIFICATION_TIMEOUT_MS)) == 0)
- return -ETIMEDOUT;
+ msecs_to_jiffies(NOTIFICATION_TIMEOUT_MS)) == 0) {
+ if (reg != new)
+ return -ETIMEDOUT;
+ }
return 0;
}
diff --git a/sound/soc/codecs/cs42l51.c b/sound/soc/codecs/cs42l51.c
index 51721edd8f53..e88d9ff95cdf 100644
--- a/sound/soc/codecs/cs42l51.c
+++ b/sound/soc/codecs/cs42l51.c
@@ -143,7 +143,7 @@ static const struct snd_kcontrol_new cs42l51_snd_controls[] = {
0, 0xA0, 96, adc_att_tlv),
SOC_DOUBLE_R_SX_TLV("PGA Volume",
CS42L51_ALC_PGA_CTL, CS42L51_ALC_PGB_CTL,
- 0, 0x19, 30, pga_tlv),
+ 0, 0x1A, 30, pga_tlv),
SOC_SINGLE("Playback Deemphasis Switch", CS42L51_DAC_CTL, 3, 1, 0),
SOC_SINGLE("Auto-Mute Switch", CS42L51_DAC_CTL, 2, 1, 0),
SOC_SINGLE("Soft Ramp Switch", CS42L51_DAC_CTL, 1, 1, 0),
diff --git a/sound/soc/codecs/tlv320adc3xxx.c b/sound/soc/codecs/tlv320adc3xxx.c
index a969547708d4..52bb55724724 100644
--- a/sound/soc/codecs/tlv320adc3xxx.c
+++ b/sound/soc/codecs/tlv320adc3xxx.c
@@ -14,6 +14,7 @@
#include <dt-bindings/sound/tlv320adc3xxx.h>
#include <linux/clk.h>
+#include <linux/gpio/consumer.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/io.h>
@@ -1025,7 +1026,9 @@ static const struct gpio_chip adc3xxx_gpio_chip = {
static void adc3xxx_free_gpio(struct adc3xxx *adc3xxx)
{
+#ifdef CONFIG_GPIOLIB
gpiochip_remove(&adc3xxx->gpio_chip);
+#endif
}
static void adc3xxx_init_gpio(struct adc3xxx *adc3xxx)
diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c
index 79ef4e269bc9..4b86ef82fd93 100644
--- a/sound/soc/fsl/fsl_micfil.c
+++ b/sound/soc/fsl/fsl_micfil.c
@@ -194,6 +194,25 @@ static int fsl_micfil_reset(struct device *dev)
if (ret)
return ret;
+ /*
+ * SRES is self-cleared bit, but REG_MICFIL_CTRL1 is defined
+ * as non-volatile register, so SRES still remain in regmap
+ * cache after set, that every update of REG_MICFIL_CTRL1,
+ * software reset happens. so clear it explicitly.
+ */
+ ret = regmap_clear_bits(micfil->regmap, REG_MICFIL_CTRL1,
+ MICFIL_CTRL1_SRES);
+ if (ret)
+ return ret;
+
+ /*
+ * Set SRES should clear CHnF flags, But even add delay here
+ * the CHnF may not be cleared sometimes, so clear CHnF explicitly.
+ */
+ ret = regmap_write_bits(micfil->regmap, REG_MICFIL_STAT, 0xFF, 0xFF);
+ if (ret)
+ return ret;
+
return 0;
}
diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c
index bd88de056358..55b009d3c681 100644
--- a/sound/soc/soc-ops.c
+++ b/sound/soc/soc-ops.c
@@ -452,7 +452,7 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol,
val = ucontrol->value.integer.value[0];
if (mc->platform_max && val > mc->platform_max)
return -EINVAL;
- if (val > max - min)
+ if (val > max)
return -EINVAL;
val_mask = mask << shift;
val = (val + min) & mask;
@@ -464,10 +464,15 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol,
ret = err;
if (snd_soc_volsw_is_stereo(mc)) {
- unsigned int val2;
+ unsigned int val2 = ucontrol->value.integer.value[1];
+
+ if (mc->platform_max && val2 > mc->platform_max)
+ return -EINVAL;
+ if (val2 > max)
+ return -EINVAL;
val_mask = mask << rshift;
- val2 = (ucontrol->value.integer.value[1] + min) & mask;
+ val2 = (val2 + min) & mask;
val2 = val2 << rshift;
err = snd_soc_component_update_bits(component, reg2, val_mask,
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh
index f62ce479c266..878125041fc3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh
@@ -31,7 +31,7 @@
# | 2001:db8:10::2/64 |
# +-------------------------+
-lib_dir=$(dirname $0)/../../../../net/forwarding
+lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="
decap_error_test
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index ee38ca888244..9cc84114741d 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -2,6 +2,7 @@
bind_bhash
csum
cmsg_sender
+diag_uid
fin_ack_lat
gro
hwtstamp_config
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 969620ae9928..1e4b397cece6 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,3 +1,3 @@
-TEST_GEN_PROGS := test_unix_oob unix_connect
+TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect
include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c
new file mode 100644
index 000000000000..5b88f7129fea
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/diag_uid.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <unistd.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/sock_diag.h>
+#include <linux/unix_diag.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "../../kselftest_harness.h"
+
+FIXTURE(diag_uid)
+{
+ int netlink_fd;
+ int unix_fd;
+ __u32 inode;
+ __u64 cookie;
+};
+
+FIXTURE_VARIANT(diag_uid)
+{
+ int unshare;
+ int udiag_show;
+};
+
+FIXTURE_VARIANT_ADD(diag_uid, uid)
+{
+ .unshare = 0,
+ .udiag_show = UDIAG_SHOW_UID
+};
+
+FIXTURE_VARIANT_ADD(diag_uid, uid_unshare)
+{
+ .unshare = CLONE_NEWUSER,
+ .udiag_show = UDIAG_SHOW_UID
+};
+
+FIXTURE_SETUP(diag_uid)
+{
+ struct stat file_stat;
+ socklen_t optlen;
+ int ret;
+
+ if (variant->unshare)
+ ASSERT_EQ(unshare(variant->unshare), 0);
+
+ self->netlink_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+ ASSERT_NE(self->netlink_fd, -1);
+
+ self->unix_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_NE(self->unix_fd, -1);
+
+ ret = fstat(self->unix_fd, &file_stat);
+ ASSERT_EQ(ret, 0);
+
+ self->inode = file_stat.st_ino;
+
+ optlen = sizeof(self->cookie);
+ ret = getsockopt(self->unix_fd, SOL_SOCKET, SO_COOKIE, &self->cookie, &optlen);
+ ASSERT_EQ(ret, 0);
+}
+
+FIXTURE_TEARDOWN(diag_uid)
+{
+ close(self->netlink_fd);
+ close(self->unix_fd);
+}
+
+int send_request(struct __test_metadata *_metadata,
+ FIXTURE_DATA(diag_uid) *self,
+ const FIXTURE_VARIANT(diag_uid) *variant)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct unix_diag_req udr;
+ } req = {
+ .nlh = {
+ .nlmsg_len = sizeof(req),
+ .nlmsg_type = SOCK_DIAG_BY_FAMILY,
+ .nlmsg_flags = NLM_F_REQUEST
+ },
+ .udr = {
+ .sdiag_family = AF_UNIX,
+ .udiag_ino = self->inode,
+ .udiag_cookie = {
+ (__u32)self->cookie,
+ (__u32)(self->cookie >> 32)
+ },
+ .udiag_show = variant->udiag_show
+ }
+ };
+ struct sockaddr_nl nladdr = {
+ .nl_family = AF_NETLINK
+ };
+ struct iovec iov = {
+ .iov_base = &req,
+ .iov_len = sizeof(req)
+ };
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1
+ };
+
+ return sendmsg(self->netlink_fd, &msg, 0);
+}
+
+void render_response(struct __test_metadata *_metadata,
+ struct unix_diag_req *udr, __u32 len)
+{
+ unsigned int rta_len = len - NLMSG_LENGTH(sizeof(*udr));
+ struct rtattr *attr;
+ uid_t uid;
+
+ ASSERT_GT(len, sizeof(*udr));
+ ASSERT_EQ(udr->sdiag_family, AF_UNIX);
+
+ attr = (struct rtattr *)(udr + 1);
+ ASSERT_NE(RTA_OK(attr, rta_len), 0);
+ ASSERT_EQ(attr->rta_type, UNIX_DIAG_UID);
+
+ uid = *(uid_t *)RTA_DATA(attr);
+ ASSERT_EQ(uid, getuid());
+}
+
+void receive_response(struct __test_metadata *_metadata,
+ FIXTURE_DATA(diag_uid) *self)
+{
+ long buf[8192 / sizeof(long)];
+ struct sockaddr_nl nladdr = {
+ .nl_family = AF_NETLINK
+ };
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = sizeof(buf)
+ };
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1
+ };
+ struct unix_diag_req *udr;
+ struct nlmsghdr *nlh;
+ int ret;
+
+ ret = recvmsg(self->netlink_fd, &msg, 0);
+ ASSERT_GT(ret, 0);
+
+ nlh = (struct nlmsghdr *)buf;
+ ASSERT_NE(NLMSG_OK(nlh, ret), 0);
+ ASSERT_EQ(nlh->nlmsg_type, SOCK_DIAG_BY_FAMILY);
+
+ render_response(_metadata, NLMSG_DATA(nlh), nlh->nlmsg_len);
+
+ nlh = NLMSG_NEXT(nlh, ret);
+ ASSERT_EQ(NLMSG_OK(nlh, ret), 0);
+}
+
+TEST_F(diag_uid, 1)
+{
+ int ret;
+
+ ret = send_request(_metadata, self, variant);
+ ASSERT_GT(ret, 0);
+
+ receive_response(_metadata, self);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile
index a26cb94354f6..4abaf16d2077 100644
--- a/tools/testing/selftests/net/bpf/Makefile
+++ b/tools/testing/selftests/net/bpf/Makefile
@@ -12,7 +12,7 @@ CCINCLUDE += -I$(SCRATCH_DIR)/include
BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
-MAKE_DIRS := $(BUILD_DIR)/libbpf
+MAKE_DIRS := $(BUILD_DIR)/libbpf $(OUTPUT)/bpf
$(MAKE_DIRS):
mkdir -p $@
@@ -37,8 +37,8 @@ endif
CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
-$(TEST_CUSTOM_PROGS): $(BPFOBJ)
- $(CLANG) -O2 -target bpf -c $(@:.o=.c) $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@
+$(TEST_CUSTOM_PROGS): $(OUTPUT)/%.o: %.c $(BPFOBJ) | $(MAKE_DIRS)
+ $(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@
$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
$(APIDIR)/linux/bpf.h \
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index ead7963b9bf0..bd89198cd817 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -43,5 +43,5 @@ CONFIG_NET_ACT_TUNNEL_KEY=m
CONFIG_NET_ACT_MIRRED=m
CONFIG_BAREUDP=m
CONFIG_IPV6_IOAM6_LWTUNNEL=y
-CONFIG_CRYPTO_SM4=y
+CONFIG_CRYPTO_SM4_GENERIC=y
CONFIG_AMT=m
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 2271a8727f62..5637b5dadabd 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -1711,13 +1711,21 @@ ipv4_del_addr_test()
$IP addr add dev dummy1 172.16.104.1/24
$IP addr add dev dummy1 172.16.104.11/24
+ $IP addr add dev dummy1 172.16.104.12/24
+ $IP addr add dev dummy1 172.16.104.13/24
$IP addr add dev dummy2 172.16.104.1/24
$IP addr add dev dummy2 172.16.104.11/24
+ $IP addr add dev dummy2 172.16.104.12/24
$IP route add 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11
+ $IP route add 172.16.106.0/24 dev lo src 172.16.104.12
+ $IP route add table 0 172.16.107.0/24 via 172.16.104.2 src 172.16.104.13
$IP route add vrf red 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11
+ $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12
set +e
# removing address from device in vrf should only remove route from vrf table
+ echo " Regular FIB info"
+
$IP addr del dev dummy2 172.16.104.11/24
$IP ro ls vrf red | grep -q 172.16.105.0/24
log_test $? 1 "Route removed from VRF when source address deleted"
@@ -1735,6 +1743,35 @@ ipv4_del_addr_test()
$IP ro ls vrf red | grep -q 172.16.105.0/24
log_test $? 0 "Route in VRF is not removed by address delete"
+ # removing address from device in vrf should only remove route from vrf
+ # table even when the associated fib info only differs in table ID
+ echo " Identical FIB info with different table ID"
+
+ $IP addr del dev dummy2 172.16.104.12/24
+ $IP ro ls vrf red | grep -q 172.16.106.0/24
+ log_test $? 1 "Route removed from VRF when source address deleted"
+
+ $IP ro ls | grep -q 172.16.106.0/24
+ log_test $? 0 "Route in default VRF not removed"
+
+ $IP addr add dev dummy2 172.16.104.12/24
+ $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12
+
+ $IP addr del dev dummy1 172.16.104.12/24
+ $IP ro ls | grep -q 172.16.106.0/24
+ log_test $? 1 "Route removed in default VRF when source address deleted"
+
+ $IP ro ls vrf red | grep -q 172.16.106.0/24
+ log_test $? 0 "Route in VRF is not removed by address delete"
+
+ # removing address from device in default vrf should remove route from
+ # the default vrf even when route was inserted with a table ID of 0.
+ echo " Table ID 0"
+
+ $IP addr del dev dummy1 172.16.104.13/24
+ $IP ro ls | grep -q 172.16.107.0/24
+ log_test $? 1 "Route removed in default VRF when source address deleted"
+
$IP li del dummy1
$IP li del dummy2
cleanup
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 0900c5438fbb..275491be3da2 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -782,7 +782,7 @@ kci_test_ipsec_offload()
tmpl proto esp src $srcip dst $dstip spi 9 \
mode transport reqid 42
check_err $?
- ip x p add dir out src $dstip/24 dst $srcip/24 \
+ ip x p add dir in src $dstip/24 dst $srcip/24 \
tmpl proto esp src $dstip dst $srcip spi 9 \
mode transport reqid 42
check_err $?
diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh
index 0a49907cd4fe..da5bfd834eff 100755
--- a/tools/testing/selftests/net/toeplitz.sh
+++ b/tools/testing/selftests/net/toeplitz.sh
@@ -32,7 +32,7 @@ DEV="eth0"
# This is determined by reading the RSS indirection table using ethtool.
get_rss_cfg_num_rxqs() {
echo $(ethtool -x "${DEV}" |
- egrep [[:space:]]+[0-9]+:[[:space:]]+ |
+ grep -E [[:space:]]+[0-9]+:[[:space:]]+ |
cut -d: -f2- |
awk '{$1=$1};1' |
tr ' ' '\n' |
diff --git a/tools/vm/slabinfo-gnuplot.sh b/tools/vm/slabinfo-gnuplot.sh
index 26e193ffd2a2..873a892147e5 100644
--- a/tools/vm/slabinfo-gnuplot.sh
+++ b/tools/vm/slabinfo-gnuplot.sh
@@ -150,7 +150,7 @@ do_preprocess()
let lines=3
out=`basename "$in"`"-slabs-by-loss"
`cat "$in" | grep -A "$lines" 'Slabs sorted by loss' |\
- egrep -iv '\-\-|Name|Slabs'\
+ grep -E -iv '\-\-|Name|Slabs'\
| awk '{print $1" "$4+$2*$3" "$4}' > "$out"`
if [ $? -eq 0 ]; then
do_slabs_plotting "$out"
@@ -159,7 +159,7 @@ do_preprocess()
let lines=3
out=`basename "$in"`"-slabs-by-size"
`cat "$in" | grep -A "$lines" 'Slabs sorted by size' |\
- egrep -iv '\-\-|Name|Slabs'\
+ grep -E -iv '\-\-|Name|Slabs'\
| awk '{print $1" "$4" "$4-$2*$3}' > "$out"`
if [ $? -eq 0 ]; then
do_slabs_plotting "$out"