aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-mvebu.txt6
-rw-r--r--Documentation/devicetree/bindings/mfd/stm32-timers.txt2
-rw-r--r--Documentation/devicetree/bindings/net/macb.txt1
-rw-r--r--Documentation/devicetree/bindings/net/nfc/trf7970a.txt10
-rw-r--r--Documentation/networking/ipvlan.txt4
-rw-r--r--Documentation/networking/policy-routing.txt150
-rw-r--r--MAINTAINERS23
-rw-r--r--Makefile4
-rw-r--r--arch/arc/include/asm/processor.h2
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/boot/compressed/efi-header.S3
-rw-r--r--arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts8
-rw-r--r--arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts7
-rw-r--r--arch/arm/boot/dts/sun8i-h3-orangepi-2.dts8
-rw-r--r--arch/arm/boot/dts/sun8i-h3-orangepi-one.dts8
-rw-r--r--arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts5
-rw-r--r--arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts8
-rw-r--r--arch/arm/boot/dts/sunxi-h3-h5.dtsi40
-rw-r--r--arch/arm/configs/multi_v7_defconfig1
-rw-r--r--arch/arm/configs/sunxi_defconfig1
-rw-r--r--arch/arm/kernel/setup.c2
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts15
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts17
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts16
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi41
-rw-r--r--arch/arm64/configs/defconfig1
-rw-r--r--arch/arm64/kernel/vdso.c5
-rw-r--r--arch/arm64/kernel/vdso/gettimeofday.S1
-rw-r--r--arch/blackfin/include/asm/processor.h5
-rw-r--r--arch/c6x/include/asm/processor.h5
-rw-r--r--arch/cris/arch-v10/kernel/process.c8
-rw-r--r--arch/cris/arch-v32/kernel/process.c8
-rw-r--r--arch/cris/include/asm/processor.h2
-rw-r--r--arch/frv/include/asm/processor.h5
-rw-r--r--arch/frv/kernel/process.c9
-rw-r--r--arch/h8300/include/asm/processor.h4
-rw-r--r--arch/h8300/kernel/process.c5
-rw-r--r--arch/hexagon/include/asm/processor.h3
-rw-r--r--arch/hexagon/kernel/process.c8
-rw-r--r--arch/ia64/include/asm/processor.h17
-rw-r--r--arch/m32r/include/asm/processor.h2
-rw-r--r--arch/m32r/kernel/process.c8
-rw-r--r--arch/m68k/include/asm/processor.h2
-rw-r--r--arch/m68k/kernel/process.c14
-rw-r--r--arch/microblaze/include/asm/processor.h6
-rw-r--r--arch/microblaze/kernel/process.c17
-rw-r--r--arch/mips/kvm/tlb.c6
-rw-r--r--arch/mn10300/include/asm/processor.h5
-rw-r--r--arch/mn10300/kernel/process.c8
-rw-r--r--arch/nios2/include/asm/processor.h3
-rw-r--r--arch/openrisc/include/asm/processor.h5
-rw-r--r--arch/openrisc/kernel/process.c5
-rw-r--r--arch/parisc/include/asm/processor.h5
-rw-r--r--arch/parisc/kernel/process.c5
-rw-r--r--arch/powerpc/include/asm/kprobes.h1
-rw-r--r--arch/powerpc/include/asm/processor.h6
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S11
-rw-r--r--arch/powerpc/kernel/kprobes.c17
-rw-r--r--arch/powerpc/kernel/setup_64.c31
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_mprofile.S59
-rw-r--r--arch/powerpc/kvm/book3s_hv.c51
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S12
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S75
-rw-r--r--arch/powerpc/perf/perf_regs.c3
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c94
-rw-r--r--arch/s390/include/asm/processor.h5
-rw-r--r--arch/s390/kernel/ipl.c7
-rw-r--r--arch/s390/kernel/process.c25
-rw-r--r--arch/s390/kvm/gaccess.c15
-rw-r--r--arch/score/include/asm/processor.h1
-rw-r--r--arch/score/kernel/process.c5
-rw-r--r--arch/sparc/include/asm/processor_32.h3
-rw-r--r--arch/sparc/include/asm/processor_64.h2
-rw-r--r--arch/sparc/kernel/process_32.c8
-rw-r--r--arch/sparc/kernel/process_64.c19
-rw-r--r--arch/tile/include/asm/processor.h7
-rw-r--r--arch/um/include/asm/processor-generic.h2
-rw-r--r--arch/um/kernel/um_arch.c6
-rw-r--r--arch/x86/events/intel/core.c4
-rw-r--r--arch/x86/include/asm/kvm_emulate.h1
-rw-r--r--arch/x86/include/asm/mshyperv.h3
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/kernel/process.c11
-rw-r--r--arch/x86/kvm/emulate.c1
-rw-r--r--arch/x86/kvm/x86.c62
-rw-r--r--arch/xtensa/include/asm/processor.h2
-rw-r--r--block/bio.c12
-rw-r--r--block/blk-mq-sched.c58
-rw-r--r--block/blk-mq-sched.h9
-rw-r--r--block/blk-mq.c16
-rw-r--r--crypto/algif_aead.c2
-rw-r--r--drivers/acpi/scan.c67
-rw-r--r--drivers/atm/fore200e.c12
-rw-r--r--drivers/atm/he.c2
-rw-r--r--drivers/atm/idt77252.c4
-rw-r--r--drivers/block/xen-blkback/blkback.c26
-rw-r--r--drivers/block/xen-blkback/common.h26
-rw-r--r--drivers/block/xen-blkback/xenbus.c15
-rw-r--r--drivers/bluetooth/btbcm.c1
-rw-r--r--drivers/bluetooth/hci_bcm.c12
-rw-r--r--drivers/bluetooth/hci_serdev.c4
-rw-r--r--drivers/char/random.c12
-rw-r--r--drivers/clocksource/arm_arch_timer.c4
-rw-r--r--drivers/clocksource/cadence_ttc_timer.c1
-rw-r--r--drivers/clocksource/timer-sun5i.c1
-rw-r--r--drivers/gpio/gpio-mvebu.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_crtc.c4
-rw-r--r--drivers/gpu/drm/drm_connector.c38
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem.h3
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c2
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c6
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c63
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c17
-rw-r--r--drivers/gpu/drm/i915/i915_gem_request.c2
-rw-r--r--drivers/gpu/drm/i915/i915_guc_submission.c4
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c5
-rw-r--r--drivers/gpu/drm/i915/intel_display.c30
-rw-r--r--drivers/gpu/drm/i915/intel_dp_aux_backlight.c2
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c6
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c41
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h19
-rw-r--r--drivers/gpu/drm/radeon/radeon_combios.c7
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c4
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c1
-rw-r--r--drivers/i2c/busses/i2c-imx.c8
-rw-r--r--drivers/infiniband/hw/mlx5/main.c119
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c4
-rw-r--r--drivers/input/misc/soc_button_array.c20
-rw-r--r--drivers/input/rmi4/rmi_f54.c17
-rw-r--r--drivers/input/serio/i8042-x86ia64io.h7
-rw-r--r--drivers/irqchip/irq-mips-gic.c6
-rw-r--r--drivers/isdn/mISDN/socket.c2
-rw-r--r--drivers/md/dm-integrity.c12
-rw-r--r--drivers/md/dm-io.c4
-rw-r--r--drivers/md/dm-raid.c17
-rw-r--r--drivers/md/dm-raid1.c21
-rw-r--r--drivers/md/dm-thin.c26
-rw-r--r--drivers/mfd/arizona-core.c3
-rw-r--r--drivers/net/arcnet/arcdevice.h4
-rw-r--r--drivers/net/arcnet/arcnet.c81
-rw-r--r--drivers/net/arcnet/capmode.c2
-rw-r--r--drivers/net/arcnet/com20020-pci.c64
-rw-r--r--drivers/net/arcnet/com20020.c2
-rw-r--r--drivers/net/bonding/bond_options.c132
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-common.h53
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-desc.c94
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-dev.c244
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-drv.c245
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c2
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-i2c.c30
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-main.c14
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-mdio.c33
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-pci.c14
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c240
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-platform.c10
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-ptp.c2
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe.h56
-rw-r--r--drivers/net/ethernet/atheros/atl1c/atl1c_main.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c61
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h6
-rw-r--r--drivers/net/ethernet/cadence/Kconfig10
-rw-r--r--drivers/net/ethernet/cadence/Makefile5
-rw-r--r--drivers/net/ethernet/cadence/macb.h158
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c (renamed from drivers/net/ethernet/cadence/macb.c)203
-rwxr-xr-xdrivers/net/ethernet/cadence/macb_ptp.c518
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c33
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.c8
-rw-r--r--drivers/net/ethernet/freescale/fman/Kconfig1
-rw-r--r--drivers/net/ethernet/freescale/gianfar.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hnae.h1
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_ethtool.c105
-rw-r--r--drivers/net/ethernet/ibm/ibmveth.c2
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_ethtool.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_main.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c60
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_resources.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c78
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h138
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c461
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h140
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c378
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h55
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c133
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c45
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c174
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c1042
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h96
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c87
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c376
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h94
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c164
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h204
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c154
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c50
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.c46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.h27
-rw-r--r--drivers/net/ethernet/mellanox/mlxfw/mlxfw.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c3
-rw-r--r--drivers/net/ethernet/netronome/Kconfig10
-rw-r--r--drivers/net/ethernet/netronome/nfp/Makefile10
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/action.c211
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/cmsg.c16
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/cmsg.h203
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/main.c124
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/main.h159
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/match.c292
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/metadata.c438
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/offload.c400
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_app.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_app.h30
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_main.c57
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_main.h4
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c17
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_main.c119
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_repr.c77
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_repr.h24
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_port.c92
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_port.h26
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h9
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c56
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c40
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h4
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c27
-rw-r--r--drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c3
-rw-r--r--drivers/net/ethernet/qlogic/qed/Makefile2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed.h30
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_cxt.c21
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dev.c36
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_hsi.h1
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_iwarp.c2409
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_iwarp.h189
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_l2.c16
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_ll2.c42
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_main.c17
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_rdma.c139
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_rdma.h5
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_roce.c20
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_sp.h7
-rw-r--r--drivers/net/ethernet/rocker/rocker_ofdpa.c2
-rw-r--r--drivers/net/ethernet/sfc/ef10.c15
-rw-r--r--drivers/net/ethernet/sfc/mcdi.c7
-rw-r--r--drivers/net/ethernet/smsc/smc91x.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c10
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c2
-rw-r--r--drivers/net/ethernet/ti/cpsw-common.c2
-rw-r--r--drivers/net/ethernet/ti/cpsw.c6
-rw-r--r--drivers/net/ethernet/ti/cpts.h16
-rw-r--r--drivers/net/ethernet/ti/netcp_ethss.c18
-rw-r--r--drivers/net/geneve.c48
-rw-r--r--drivers/net/hyperv/netvsc_drv.c4
-rw-r--r--drivers/net/macvlan.c88
-rw-r--r--drivers/net/phy/dp83640.c2
-rw-r--r--drivers/net/phy/marvell.c1
-rw-r--r--drivers/net/phy/micrel.c2
-rw-r--r--drivers/net/phy/phy_device.c51
-rw-r--r--drivers/net/rionet.c2
-rw-r--r--drivers/net/usb/ax88179_178a.c16
-rw-r--r--drivers/net/usb/cdc_mbim.c7
-rw-r--r--drivers/net/usb/cdc_ncm.c54
-rw-r--r--drivers/net/veth.c4
-rw-r--r--drivers/net/virtio_net.c1
-rw-r--r--drivers/net/vxlan.c43
-rw-r--r--drivers/net/xen-netback/common.h1
-rw-r--r--drivers/net/xen-netback/interface.c6
-rw-r--r--drivers/net/xen-netback/netback.c6
-rw-r--r--drivers/nfc/Kconfig2
-rw-r--r--drivers/nfc/fdp/fdp.c15
-rw-r--r--drivers/nfc/fdp/i2c.c38
-rw-r--r--drivers/nfc/nfcmrvl/fw_dnld.c7
-rw-r--r--drivers/nfc/nfcmrvl/main.c40
-rw-r--r--drivers/nfc/nfcmrvl/uart.c11
-rw-r--r--drivers/nfc/nfcmrvl/usb.c4
-rw-r--r--drivers/nfc/nfcsim.c6
-rw-r--r--drivers/nfc/pn544/i2c.c3
-rw-r--r--drivers/nfc/st-nci/i2c.c164
-rw-r--r--drivers/nfc/st-nci/spi.c162
-rw-r--r--drivers/nfc/st21nfca/i2c.c62
-rw-r--r--drivers/nfc/trf7970a.c391
-rw-r--r--drivers/nvme/host/pci.c3
-rw-r--r--drivers/s390/net/ctcm_fsms.c12
-rw-r--r--drivers/s390/net/ctcm_main.c26
-rw-r--r--drivers/s390/net/netiucv.c10
-rw-r--r--drivers/s390/net/qeth_core_main.c4
-rw-r--r--drivers/scsi/qedi/qedi_fw.c1
-rw-r--r--drivers/scsi/qedi/qedi_main.c4
-rw-r--r--drivers/target/iscsi/iscsi_target.c22
-rw-r--r--drivers/target/target_core_internal.h2
-rw-r--r--drivers/target/target_core_tmr.c16
-rw-r--r--drivers/target/target_core_transport.c9
-rw-r--r--fs/autofs4/dev-ioctl.c2
-rw-r--r--fs/block_dev.c5
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/cifs/misc.c2
-rw-r--r--fs/cifs/smb1ops.c9
-rw-r--r--fs/cifs/smb2ops.c8
-rw-r--r--fs/cifs/xattr.c2
-rw-r--r--fs/dax.c1
-rw-r--r--fs/exec.c28
-rw-r--r--fs/nfs/callback_xdr.c1
-rw-r--r--fs/nfs/dir.c51
-rw-r--r--fs/nfs/nfs4proc.c5
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/ocfs2/dlmglue.c4
-rw-r--r--fs/ocfs2/xattr.c23
-rw-r--r--fs/xfs/xfs_aops.c7
-rw-r--r--include/acpi/acpi_bus.h3
-rw-r--r--include/linux/atmdev.h2
-rw-r--r--include/linux/bio.h1
-rw-r--r--include/linux/blkdev.h2
-rw-r--r--include/linux/bpf-cgroup.h18
-rw-r--r--include/linux/bpf.h12
-rw-r--r--include/linux/bpf_types.h1
-rw-r--r--include/linux/filter.h56
-rw-r--r--include/linux/igmp.h3
-rw-r--r--include/linux/inetdevice.h11
-rw-r--r--include/linux/mlx5/device.h3
-rw-r--r--include/linux/mlx5/driver.h20
-rw-r--r--include/linux/mlx5/mlx5_ifc.h14
-rw-r--r--include/linux/mlx5/mlx5_ifc_fpga.h288
-rw-r--r--include/linux/mlx5/qp.h14
-rw-r--r--include/linux/netfilter/nfnetlink.h10
-rw-r--r--include/linux/netfilter_bridge/ebtables.h2
-rw-r--r--include/linux/netpoll.h3
-rw-r--r--include/linux/phy.h5
-rw-r--r--include/linux/platform_data/nfcmrvl.h2
-rw-r--r--include/linux/platform_data/st-nci.h31
-rw-r--r--include/linux/qed/common_hsi.h4
-rw-r--r--include/linux/qed/iwarp_common.h53
-rw-r--r--include/linux/qed/qed_ll2_if.h3
-rw-r--r--include/linux/qed/qed_rdma_if.h114
-rw-r--r--include/linux/sctp.h118
-rw-r--r--include/linux/skbuff.h25
-rw-r--r--include/linux/slub_def.h1
-rw-r--r--include/linux/timekeeper_internal.h5
-rw-r--r--include/linux/usb/cdc_ncm.h3
-rw-r--r--include/net/af_unix.h3
-rw-r--r--include/net/arp.h2
-rw-r--r--include/net/fib_rules.h7
-rw-r--r--include/net/inet_frag.h4
-rw-r--r--include/net/inet_hashtables.h5
-rw-r--r--include/net/inetpeer.h4
-rw-r--r--include/net/ndisc.h2
-rw-r--r--include/net/neighbour.h15
-rw-r--r--include/net/net_namespace.h6
-rw-r--r--include/net/netfilter/br_netfilter.h2
-rw-r--r--include/net/netfilter/nf_conntrack.h10
-rw-r--r--include/net/netfilter/nf_conntrack_l3proto.h4
-rw-r--r--include/net/netfilter/nf_tables.h29
-rw-r--r--include/net/netlabel.h8
-rw-r--r--include/net/request_sock.h9
-rw-r--r--include/net/sctp/auth.h6
-rw-r--r--include/net/sctp/command.h4
-rw-r--r--include/net/sctp/constants.h6
-rw-r--r--include/net/sctp/sctp.h4
-rw-r--r--include/net/sctp/sm.h16
-rw-r--r--include/net/sctp/structs.h9
-rw-r--r--include/net/sock.h25
-rw-r--r--include/net/switchdev.h4
-rw-r--r--include/net/tcp.h66
-rw-r--r--include/net/udp.h61
-rw-r--r--include/net/vxlan.h10
-rw-r--r--include/net/xfrm.h7
-rw-r--r--include/uapi/linux/bpf.h82
-rw-r--r--include/uapi/linux/sctp.h6
-rw-r--r--kernel/bpf/arraymap.c27
-rw-r--r--kernel/bpf/cgroup.c37
-rw-r--r--kernel/bpf/core.c4
-rw-r--r--kernel/bpf/hashtab.c21
-rw-r--r--kernel/bpf/map_in_map.c5
-rw-r--r--kernel/bpf/map_in_map.h1
-rw-r--r--kernel/bpf/syscall.c28
-rw-r--r--kernel/bpf/verifier.c145
-rw-r--r--kernel/events/ring_buffer.c2
-rw-r--r--kernel/signal.c20
-rw-r--r--kernel/time/timekeeping.c71
-rw-r--r--kernel/trace/bpf_trace.c38
-rw-r--r--lib/cmdline.c6
-rw-r--r--mm/khugepaged.c1
-rw-r--r--mm/slub.c40
-rw-r--r--mm/vmalloc.c15
-rw-r--r--net/atm/br2684.c2
-rw-r--r--net/atm/clip.c8
-rw-r--r--net/atm/common.c10
-rw-r--r--net/atm/lec.c4
-rw-r--r--net/atm/mpc.c4
-rw-r--r--net/atm/pppoatm.c2
-rw-r--r--net/atm/proc.c2
-rw-r--r--net/atm/raw.c2
-rw-r--r--net/atm/signaling.c2
-rw-r--r--net/bluetooth/af_bluetooth.c2
-rw-r--r--net/bluetooth/bnep/core.c11
-rw-r--r--net/bluetooth/cmtp/core.c17
-rw-r--r--net/bluetooth/hci_core.c7
-rw-r--r--net/bluetooth/hidp/core.c33
-rw-r--r--net/bluetooth/l2cap_sock.c5
-rw-r--r--net/bluetooth/rfcomm/sock.c5
-rw-r--r--net/bluetooth/sco.c6
-rw-r--r--net/bridge/br_netfilter_hooks.c4
-rw-r--r--net/bridge/br_sysfs_br.c2
-rw-r--r--net/bridge/netfilter/ebt_dnat.c2
-rw-r--r--net/bridge/netfilter/ebt_mark.c2
-rw-r--r--net/bridge/netfilter/ebt_redirect.c2
-rw-r--r--net/bridge/netfilter/ebt_snat.c2
-rw-r--r--net/caif/caif_socket.c2
-rw-r--r--net/core/datagram.c6
-rw-r--r--net/core/dev.c42
-rw-r--r--net/core/fib_rules.c6
-rw-r--r--net/core/filter.c650
-rw-r--r--net/core/neighbour.c22
-rw-r--r--net/core/net-sysfs.c8
-rw-r--r--net/core/net_namespace.c21
-rw-r--r--net/core/netpoll.c10
-rw-r--r--net/core/pktgen.c16
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/core/skbuff.c26
-rw-r--r--net/core/sock.c32
-rw-r--r--net/dccp/ipv6.c2
-rw-r--r--net/decnet/dn_neigh.c2
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/cipso_ipv4.c4
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/igmp.c10
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_fragment.c14
-rw-r--r--net/ipv4/inet_hashtables.c6
-rw-r--r--net/ipv4/inet_timewait_sock.c8
-rw-r--r--net/ipv4/inetpeer.c18
-rw-r--r--net/ipv4/ip_fragment.c2
-rw-r--r--net/ipv4/ip_output.c9
-rw-r--r--net/ipv4/ipmr.c63
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c115
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c4
-rw-r--r--net/ipv4/ping.c4
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/tcp.c13
-rw-r--r--net/ipv4/tcp_cong.c32
-rw-r--r--net/ipv4/tcp_fastopen.c3
-rw-r--r--net/ipv4/tcp_input.c9
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/tcp_minisocks.c9
-rw-r--r--net/ipv4/tcp_offload.c2
-rw-r--r--net/ipv4/tcp_output.c33
-rw-r--r--net/ipv4/udp.c66
-rw-r--r--net/ipv4/udp_diag.c4
-rw-r--r--net/ipv6/addrconf.c23
-rw-r--r--net/ipv6/calipso.c4
-rw-r--r--net/ipv6/datagram.c10
-rw-r--r--net/ipv6/esp6.c2
-rw-r--r--net/ipv6/esp6_offload.c25
-rw-r--r--net/ipv6/inet6_hashtables.c4
-rw-r--r--net/ipv6/ip6_output.c6
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c10
-rw-r--r--net/ipv6/route.c6
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/tcp_ipv6.c6
-rw-r--r--net/ipv6/udp.c21
-rw-r--r--net/ipv6/xfrm6_input.c2
-rw-r--r--net/iucv/af_iucv.c2
-rw-r--r--net/kcm/kcmproc.c2
-rw-r--r--net/key/af_key.c27
-rw-r--r--net/l2tp/l2tp_debugfs.c3
-rw-r--r--net/llc/llc_conn.c8
-rw-r--r--net/llc/llc_sap.c2
-rw-r--r--net/netfilter/Makefile7
-rw-r--r--net/netfilter/ipset/ip_set_core.c39
-rw-r--r--net/netfilter/ipset/ip_set_getport.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c21
-rw-r--r--net/netfilter/nf_conntrack_amanda.c12
-rw-r--r--net/netfilter/nf_conntrack_core.c149
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c63
-rw-r--r--net/netfilter/nf_conntrack_helper.c50
-rw-r--r--net/netfilter/nf_conntrack_netlink.c47
-rw-r--r--net/netfilter/nf_conntrack_proto.c44
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c33
-rw-r--r--net/netfilter/nf_dup_netdev.c1
-rw-r--r--net/netfilter/nf_nat_core.c37
-rw-r--r--net/netfilter/nf_nat_proto_sctp.c2
-rw-r--r--net/netfilter/nf_tables_api.c137
-rw-r--r--net/netfilter/nfnetlink.c21
-rw-r--r--net/netfilter/nfnetlink_acct.c9
-rw-r--r--net/netfilter/nfnetlink_cthelper.c9
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c54
-rw-r--r--net/netfilter/nfnetlink_log.c6
-rw-r--r--net/netfilter/nfnetlink_queue.c12
-rw-r--r--net/netfilter/nft_compat.c3
-rw-r--r--net/netfilter/nft_rt.c16
-rw-r--r--net/netfilter/nft_set_bitmap.c13
-rw-r--r--net/netfilter/nft_set_hash.c471
-rw-r--r--net/netfilter/nft_set_rbtree.c21
-rw-r--r--net/netfilter/xt_TPROXY.c4
-rw-r--r--net/netfilter/xt_osf.c6
-rw-r--r--net/netfilter/xt_sctp.c20
-rw-r--r--net/netlink/af_netlink.c14
-rw-r--r--net/nfc/core.c31
-rw-r--r--net/nfc/digital_core.c12
-rw-r--r--net/nfc/digital_dep.c2
-rw-r--r--net/nfc/digital_technology.c3
-rw-r--r--net/nfc/llcp_sock.c9
-rw-r--r--net/nfc/nci/core.c12
-rw-r--r--net/nfc/netlink.c4
-rw-r--r--net/openvswitch/datapath.c81
-rw-r--r--net/packet/af_packet.c14
-rw-r--r--net/packet/internal.h4
-rw-r--r--net/phonet/socket.c4
-rw-r--r--net/rds/tcp_send.c2
-rw-r--r--net/rxrpc/af_rxrpc.c6
-rw-r--r--net/rxrpc/skbuff.c12
-rw-r--r--net/sched/em_meta.c2
-rw-r--r--net/sched/sch_api.c3
-rw-r--r--net/sched/sch_atm.c2
-rw-r--r--net/sctp/associola.c6
-rw-r--r--net/sctp/auth.c28
-rw-r--r--net/sctp/endpointola.c21
-rw-r--r--net/sctp/input.c24
-rw-r--r--net/sctp/inqueue.c15
-rw-r--r--net/sctp/output.c6
-rw-r--r--net/sctp/outqueue.c2
-rw-r--r--net/sctp/proc.c2
-rw-r--r--net/sctp/sm_make_chunk.c80
-rw-r--r--net/sctp/sm_sideeffect.c7
-rw-r--r--net/sctp/sm_statefuns.c132
-rw-r--r--net/sctp/sm_statetable.c4
-rw-r--r--net/sctp/socket.c100
-rw-r--r--net/sctp/stream.c4
-rw-r--r--net/sctp/ulpevent.c2
-rw-r--r--net/sctp/ulpqueue.c2
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/unix/af_unix.c16
-rw-r--r--net/xfrm/Makefile3
-rw-r--r--net/xfrm/xfrm_device.c2
-rw-r--r--net/xfrm/xfrm_policy.c4
-rw-r--r--net/xfrm/xfrm_user.c1
-rw-r--r--samples/bpf/Makefile9
-rw-r--r--samples/bpf/bpf_helpers.h3
-rw-r--r--samples/bpf/bpf_load.c13
-rw-r--r--samples/bpf/load_sock_ops.c97
-rw-r--r--samples/bpf/sockex3_user.c15
-rw-r--r--samples/bpf/tcp_bufs_kern.c86
-rw-r--r--samples/bpf/tcp_clamp_kern.c102
-rw-r--r--samples/bpf/tcp_cong_kern.c83
-rw-r--r--samples/bpf/tcp_iw_kern.c88
-rw-r--r--samples/bpf/tcp_rwnd_kern.c69
-rw-r--r--samples/bpf/tcp_synrto_kern.c69
-rw-r--r--samples/bpf/test_map_in_map_user.c17
-rw-r--r--scripts/Makefile.headersinst10
-rw-r--r--scripts/genksyms/genksyms.h2
-rw-r--r--scripts/kconfig/Makefile2
-rw-r--r--scripts/kconfig/nconf.c12
-rw-r--r--scripts/kconfig/nconf.gui.c4
-rwxr-xr-xscripts/tags.sh1
-rw-r--r--sound/core/pcm_lib.c4
-rw-r--r--sound/firewire/amdtp-stream.c8
-rw-r--r--sound/firewire/amdtp-stream.h2
-rw-r--r--sound/pci/hda/hda_intel.c11
-rw-r--r--tools/include/uapi/linux/bpf.h80
-rw-r--r--tools/perf/util/probe-event.c2
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c231
585 files changed, 17293 insertions, 4689 deletions
diff --git a/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
index 42c3bb2d53e8..01e331a5f3e7 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
@@ -41,9 +41,9 @@ Required properties:
Optional properties:
In order to use the GPIO lines in PWM mode, some additional optional
-properties are required. Only Armada 370 and XP support these properties.
+properties are required.
-- compatible: Must contain "marvell,armada-370-xp-gpio"
+- compatible: Must contain "marvell,armada-370-gpio"
- reg: an additional register set is needed, for the GPIO Blink
Counter on/off registers.
@@ -71,7 +71,7 @@ Example:
};
gpio1: gpio@18140 {
- compatible = "marvell,armada-370-xp-gpio";
+ compatible = "marvell,armada-370-gpio";
reg = <0x18140 0x40>, <0x181c8 0x08>;
reg-names = "gpio", "pwm";
ngpios = <17>;
diff --git a/Documentation/devicetree/bindings/mfd/stm32-timers.txt b/Documentation/devicetree/bindings/mfd/stm32-timers.txt
index bbd083f5600a..1db6e0057a63 100644
--- a/Documentation/devicetree/bindings/mfd/stm32-timers.txt
+++ b/Documentation/devicetree/bindings/mfd/stm32-timers.txt
@@ -31,7 +31,7 @@ Example:
compatible = "st,stm32-timers";
reg = <0x40010000 0x400>;
clocks = <&rcc 0 160>;
- clock-names = "clk_int";
+ clock-names = "int";
pwm {
compatible = "st,stm32-pwm";
diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt
index 1506e948610c..27966ae741e0 100644
--- a/Documentation/devicetree/bindings/net/macb.txt
+++ b/Documentation/devicetree/bindings/net/macb.txt
@@ -22,6 +22,7 @@ Required properties:
Required elements: 'pclk', 'hclk'
Optional elements: 'tx_clk'
Optional elements: 'rx_clk' applies to cdns,zynqmp-gem
+ Optional elements: 'tsu_clk'
- clocks: Phandles to input clocks.
Optional properties for PHY child node:
diff --git a/Documentation/devicetree/bindings/net/nfc/trf7970a.txt b/Documentation/devicetree/bindings/net/nfc/trf7970a.txt
index c627bbb3009e..60c833d62181 100644
--- a/Documentation/devicetree/bindings/net/nfc/trf7970a.txt
+++ b/Documentation/devicetree/bindings/net/nfc/trf7970a.txt
@@ -13,14 +13,10 @@ Optional SoC Specific Properties:
- pinctrl-names: Contains only one value - "default".
- pintctrl-0: Specifies the pin control groups used for this controller.
- autosuspend-delay: Specify autosuspend delay in milliseconds.
-- vin-voltage-override: Specify voltage of VIN pin in microvolts.
- irq-status-read-quirk: Specify that the trf7970a being used has the
"IRQ Status Read" erratum.
- en2-rf-quirk: Specify that the trf7970a being used has the "EN2 RF"
erratum.
-- t5t-rmb-extra-byte-quirk: Specify that the trf7970a has the erratum
- where an extra byte is returned by Read Multiple Block commands issued
- to Type 5 tags.
- vdd-io-supply: Regulator specifying voltage for vdd-io
- clock-frequency: Set to specify that the input frequency to the trf7970a is 13560000Hz or 27120000Hz
@@ -37,15 +33,13 @@ Example (for ARM-based BeagleBone with TRF7970A on SPI1):
spi-max-frequency = <2000000>;
interrupt-parent = <&gpio2>;
interrupts = <14 0>;
- ti,enable-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>,
- <&gpio2 5 GPIO_ACTIVE_LOW>;
+ ti,enable-gpios = <&gpio2 2 GPIO_ACTIVE_HIGH>,
+ <&gpio2 5 GPIO_ACTIVE_HIGH>;
vin-supply = <&ldo3_reg>;
- vin-voltage-override = <5000000>;
vdd-io-supply = <&ldo2_reg>;
autosuspend-delay = <30000>;
irq-status-read-quirk;
en2-rf-quirk;
- t5t-rmb-extra-byte-quirk;
clock-frequency = <27120000>;
status = "okay";
};
diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt
index 24196cef7c91..1fe42a874aae 100644
--- a/Documentation/networking/ipvlan.txt
+++ b/Documentation/networking/ipvlan.txt
@@ -22,9 +22,9 @@ The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module
There are no module parameters for this driver and it can be configured
using IProute2/ip utility.
- ip link add link <master-dev> <slave-dev> type ipvlan mode { l2 | l3 | l3s }
+ ip link add link <master-dev> name <slave-dev> type ipvlan mode { l2 | l3 | l3s }
- e.g. ip link add link ipvl0 eth0 type ipvlan mode l2
+ e.g. ip link add link eth0 name ipvl0 type ipvlan mode l2
4. Operating modes:
diff --git a/Documentation/networking/policy-routing.txt b/Documentation/networking/policy-routing.txt
deleted file mode 100644
index 36f6936d7f21..000000000000
--- a/Documentation/networking/policy-routing.txt
+++ /dev/null
@@ -1,150 +0,0 @@
-Classes
--------
-
- "Class" is a complete routing table in common sense.
- I.e. it is tree of nodes (destination prefix, tos, metric)
- with attached information: gateway, device etc.
- This tree is looked up as specified in RFC1812 5.2.4.3
- 1. Basic match
- 2. Longest match
- 3. Weak TOS.
- 4. Metric. (should not be in kernel space, but they are)
- 5. Additional pruning rules. (not in kernel space).
-
- We have two special type of nodes:
- REJECT - abort route lookup and return an error value.
- THROW - abort route lookup in this class.
-
-
- Currently the number of classes is limited to 255
- (0 is reserved for "not specified class")
-
- Three classes are builtin:
-
- RT_CLASS_LOCAL=255 - local interface addresses,
- broadcasts, nat addresses.
-
- RT_CLASS_MAIN=254 - all normal routes are put there
- by default.
-
- RT_CLASS_DEFAULT=253 - if ip_fib_model==1, then
- normal default routes are put there, if ip_fib_model==2
- all gateway routes are put there.
-
-
-Rules
------
- Rule is a record of (src prefix, src interface, tos, dst prefix)
- with attached information.
-
- Rule types:
- RTP_ROUTE - lookup in attached class
- RTP_NAT - lookup in attached class and if a match is found,
- translate packet source address.
- RTP_MASQUERADE - lookup in attached class and if a match is found,
- masquerade packet as sourced by us.
- RTP_DROP - silently drop the packet.
- RTP_REJECT - drop the packet and send ICMP NET UNREACHABLE.
- RTP_PROHIBIT - drop the packet and send ICMP COMM. ADM. PROHIBITED.
-
- Rule flags:
- RTRF_LOG - log route creations.
- RTRF_VALVE - One way route (used with masquerading)
-
-Default setup:
-
-root@amber:/pub/ip-routing # iproute -r
-Kernel routing policy rules
-Pref Source Destination TOS Iface Cl
- 0 default default 00 * 255
- 254 default default 00 * 254
- 255 default default 00 * 253
-
-
-Lookup algorithm
-----------------
-
- We scan rules list, and if a rule is matched, apply it.
- If a route is found, return it.
- If it is not found or a THROW node was matched, continue
- to scan rules.
-
-Applications
-------------
-
-1. Just ignore classes. All the routes are put into MAIN class
- (and/or into DEFAULT class).
-
- HOWTO: iproute add PREFIX [ tos TOS ] [ gw GW ] [ dev DEV ]
- [ metric METRIC ] [ reject ] ... (look at iproute utility)
-
- or use route utility from current net-tools.
-
-2. Opposite case. Just forget all that you know about routing
- tables. Every rule is supplied with its own gateway, device
- info. record. This approach is not appropriate for automated
- route maintenance, but it is ideal for manual configuration.
-
- HOWTO: iproute addrule [ from PREFIX ] [ to PREFIX ] [ tos TOS ]
- [ dev INPUTDEV] [ pref PREFERENCE ] route [ gw GATEWAY ]
- [ dev OUTDEV ] .....
-
- Warning: As of now the size of the routing table in this
- approach is limited to 256. If someone likes this model, I'll
- relax this limitation.
-
-3. OSPF classes (see RFC1583, RFC1812 E.3.3)
- Very clean, stable and robust algorithm for OSPF routing
- domains. Unfortunately, it is not widely used in the Internet.
-
- Proposed setup:
- 255 local addresses
- 254 interface routes
- 253 ASE routes with external metric
- 252 ASE routes with internal metric
- 251 inter-area routes
- 250 intra-area routes for 1st area
- 249 intra-area routes for 2nd area
- etc.
-
- Rules:
- iproute addrule class 253
- iproute addrule class 252
- iproute addrule class 251
- iproute addrule to a-prefix-for-1st-area class 250
- iproute addrule to another-prefix-for-1st-area class 250
- ...
- iproute addrule to a-prefix-for-2nd-area class 249
- ...
-
- Area classes must be terminated with reject record.
- iproute add default reject class 250
- iproute add default reject class 249
- ...
-
-4. The Variant Router Requirements Algorithm (RFC1812 E.3.2)
- Create 16 classes for different TOS values.
- It is a funny, but pretty useless algorithm.
- I listed it just to show the power of new routing code.
-
-5. All the variety of combinations......
-
-
-GATED
------
-
- Gated does not understand classes, but it will work
- happily in MAIN+DEFAULT. All policy routes can be set
- and maintained manually.
-
-IMPORTANT NOTE
---------------
- route.c has a compilation time switch CONFIG_IP_LOCAL_RT_POLICY.
- If it is set, locally originated packets are routed
- using all the policy list. This is not very convenient and
- pretty ambiguous when used with NAT and masquerading.
- I set it to FALSE by default.
-
-
-Alexey Kuznetov
diff --git a/MAINTAINERS b/MAINTAINERS
index f81e1b765353..5bebe20811c4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2967,7 +2967,7 @@ F: sound/pci/oxygen/
C6X ARCHITECTURE
M: Mark Salter <[email protected]>
-M: Aurelien Jacquiot <[email protected]>
+M: Aurelien Jacquiot <[email protected]>
W: http://www.linux-c6x.org/wiki/index.php/Main_Page
S: Maintained
@@ -8330,6 +8330,16 @@ Q: http://patchwork.ozlabs.org/project/netdev/list/
F: drivers/net/ethernet/mellanox/mlx5/core/fpga/*
F: include/linux/mlx5/mlx5_ifc_fpga.h
+MELLANOX ETHERNET INNOVA IPSEC DRIVER
+M: Ilan Tayari <[email protected]>
+R: Boris Pismenny <[email protected]>
+S: Supported
+W: http://www.mellanox.com
+Q: http://patchwork.ozlabs.org/project/netdev/list/
+F: drivers/net/ethernet/mellanox/mlx5/core/en_ipsec/*
+F: drivers/net/ethernet/mellanox/mlx5/core/ipsec*
+
MELLANOX ETHERNET SWITCH DRIVERS
M: Jiri Pirko <[email protected]>
M: Ido Schimmel <[email protected]>
@@ -9072,9 +9082,6 @@ F: include/uapi/linux/nfc.h
F: drivers/nfc/
F: include/linux/platform_data/nfcmrvl.h
F: include/linux/platform_data/nxp-nci.h
-F: include/linux/platform_data/pn544.h
-F: include/linux/platform_data/st21nfca.h
-F: include/linux/platform_data/st-nci.h
F: Documentation/devicetree/bindings/net/nfc/
NFS, SUNRPC, AND LOCKD CLIENTS
@@ -11409,6 +11416,14 @@ F: kernel/time/alarmtimer.c
F: kernel/time/ntp.c
F: tools/testing/selftests/timers/
+TI TRF7970A NFC DRIVER
+M: Mark Greer <[email protected]>
+L: [email protected] (moderated for non-subscribers)
+S: Supported
+F: drivers/nfc/trf7970a.c
+F: Documentation/devicetree/bindings/net/nfc/trf7970a.txt
+
SC1200 WDT DRIVER
M: Zwane Mwaikambo <[email protected]>
S: Maintained
diff --git a/Makefile b/Makefile
index e40c471abe29..6d8a984ed9c9 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 4
PATCHLEVEL = 12
SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
NAME = Fearless Coyote
# *DOCUMENTATION*
@@ -1437,7 +1437,7 @@ help:
@echo ' make V=0|1 [targets] 0 => quiet build (default), 1 => verbose build'
@echo ' make V=2 [targets] 2 => give reason for rebuild of target'
@echo ' make O=dir [targets] Locate all output files in "dir", including .config'
- @echo ' make C=1 [targets] Check all c source with $$CHECK (sparse by default)'
+ @echo ' make C=1 [targets] Check re-compiled c source with $$CHECK (sparse by default)'
@echo ' make C=2 [targets] Force check of all c source with $$CHECK'
@echo ' make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections'
@echo ' make W=n [targets] Enable extra gcc checks, n=1,2,3 where'
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 6e1242da0159..4104a0839214 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -86,8 +86,6 @@ struct task_struct;
#define TSK_K_BLINK(tsk) TSK_K_REG(tsk, 4)
#define TSK_K_FP(tsk) TSK_K_REG(tsk, 0)
-#define thread_saved_pc(tsk) TSK_K_BLINK(tsk)
-
extern void start_thread(struct pt_regs * regs, unsigned long pc,
unsigned long usp);
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4c1a35f15838..c0fcab6a5504 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1416,6 +1416,7 @@ choice
config VMSPLIT_3G
bool "3G/1G user/kernel split"
config VMSPLIT_3G_OPT
+ depends on !ARM_LPAE
bool "3G/1G user/kernel split (for full 1G low memory)"
config VMSPLIT_2G
bool "2G/2G user/kernel split"
diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S
index 3f7d1b74c5e0..a17ca8d78656 100644
--- a/arch/arm/boot/compressed/efi-header.S
+++ b/arch/arm/boot/compressed/efi-header.S
@@ -17,7 +17,8 @@
@ there.
.inst 'M' | ('Z' << 8) | (0x1310 << 16) @ tstne r0, #0x4d000
#else
- W(mov) r0, r0
+ AR_CLASS( mov r0, r0 )
+ M_CLASS( nop.w )
#endif
.endm
diff --git a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts
index dd3525a0f06a..9e8b082c134f 100644
--- a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts
+++ b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts
@@ -57,7 +57,6 @@
aliases {
serial0 = &uart0;
/* ethernet0 is the H3 emac, defined in sun8i-h3.dtsi */
- ethernet0 = &emac;
ethernet1 = &xr819;
};
@@ -104,13 +103,6 @@
status = "okay";
};
-&emac {
- phy-handle = <&int_mii_phy>;
- phy-mode = "mii";
- allwinner,leds-active-low;
- status = "okay";
-};
-
&mmc0 {
pinctrl-names = "default";
pinctrl-0 = <&mmc0_pins_a>;
diff --git a/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts b/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts
index 78f6c24952dd..8d2cc6e9a03f 100644
--- a/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts
+++ b/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts
@@ -46,10 +46,3 @@
model = "FriendlyARM NanoPi NEO";
compatible = "friendlyarm,nanopi-neo", "allwinner,sun8i-h3";
};
-
-&emac {
- phy-handle = <&int_mii_phy>;
- phy-mode = "mii";
- allwinner,leds-active-low;
- status = "okay";
-};
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts
index cedd326b6089..5b6d14555b7c 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts
@@ -54,7 +54,6 @@
aliases {
serial0 = &uart0;
/* ethernet0 is the H3 emac, defined in sun8i-h3.dtsi */
- ethernet0 = &emac;
ethernet1 = &rtl8189;
};
@@ -109,13 +108,6 @@
status = "okay";
};
-&emac {
- phy-handle = <&int_mii_phy>;
- phy-mode = "mii";
- allwinner,leds-active-low;
- status = "okay";
-};
-
&ir {
pinctrl-names = "default";
pinctrl-0 = <&ir_pins_a>;
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
index 6880268e8b87..5fea430e0eb1 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
@@ -52,7 +52,6 @@
compatible = "xunlong,orangepi-one", "allwinner,sun8i-h3";
aliases {
- ethernet0 = &emac;
serial0 = &uart0;
};
@@ -98,13 +97,6 @@
status = "okay";
};
-&emac {
- phy-handle = <&int_mii_phy>;
- phy-mode = "mii";
- allwinner,leds-active-low;
- status = "okay";
-};
-
&mmc0 {
pinctrl-names = "default";
pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>;
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts
index a10281b455f5..8b93f5c781a7 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts
@@ -53,11 +53,6 @@
};
};
-&emac {
- /* LEDs changed to active high on the plus */
- /delete-property/ allwinner,leds-active-low;
-};
-
&mmc1 {
pinctrl-names = "default";
pinctrl-0 = <&mmc1_pins_a>;
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts
index 52e65755c51a..f148111c326d 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts
@@ -52,7 +52,6 @@
compatible = "xunlong,orangepi-pc", "allwinner,sun8i-h3";
aliases {
- ethernet0 = &emac;
serial0 = &uart0;
};
@@ -110,13 +109,6 @@
status = "okay";
};
-&emac {
- phy-handle = <&int_mii_phy>;
- phy-mode = "mii";
- allwinner,leds-active-low;
- status = "okay";
-};
-
&ir {
pinctrl-names = "default";
pinctrl-0 = <&ir_pins_a>;
diff --git a/arch/arm/boot/dts/sunxi-h3-h5.dtsi b/arch/arm/boot/dts/sunxi-h3-h5.dtsi
index a6d4fda544e1..d4f600dbb7eb 100644
--- a/arch/arm/boot/dts/sunxi-h3-h5.dtsi
+++ b/arch/arm/boot/dts/sunxi-h3-h5.dtsi
@@ -83,12 +83,6 @@
#size-cells = <1>;
ranges;
- syscon: syscon@1c00000 {
- compatible = "allwinner,sun8i-h3-system-controller",
- "syscon";
- reg = <0x01c00000 0x1000>;
- };
-
dma: dma-controller@01c02000 {
compatible = "allwinner,sun8i-h3-dma";
reg = <0x01c02000 0x1000>;
@@ -285,14 +279,6 @@
interrupt-controller;
#interrupt-cells = <3>;
- emac_rgmii_pins: emac0 {
- pins = "PD0", "PD1", "PD2", "PD3", "PD4",
- "PD5", "PD7", "PD8", "PD9", "PD10",
- "PD12", "PD13", "PD15", "PD16", "PD17";
- function = "emac";
- drive-strength = <40>;
- };
-
i2c0_pins: i2c0 {
pins = "PA11", "PA12";
function = "i2c0";
@@ -389,32 +375,6 @@
clocks = <&osc24M>;
};
- emac: ethernet@1c30000 {
- compatible = "allwinner,sun8i-h3-emac";
- syscon = <&syscon>;
- reg = <0x01c30000 0x104>;
- interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "macirq";
- resets = <&ccu RST_BUS_EMAC>;
- reset-names = "stmmaceth";
- clocks = <&ccu CLK_BUS_EMAC>;
- clock-names = "stmmaceth";
- #address-cells = <1>;
- #size-cells = <0>;
- status = "disabled";
-
- mdio: mdio {
- #address-cells = <1>;
- #size-cells = <0>;
- int_mii_phy: ethernet-phy@1 {
- compatible = "ethernet-phy-ieee802.3-c22";
- reg = <1>;
- clocks = <&ccu CLK_BUS_EPHY>;
- resets = <&ccu RST_BUS_EPHY>;
- };
- };
- };
-
spi0: spi@01c68000 {
compatible = "allwinner,sun8i-h3-spi";
reg = <0x01c68000 0x1000>;
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 6da6af8881f7..2685e03600b1 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -257,7 +257,6 @@ CONFIG_SMSC911X=y
CONFIG_STMMAC_ETH=y
CONFIG_STMMAC_PLATFORM=y
CONFIG_DWMAC_DWC_QOS_ETH=y
-CONFIG_DWMAC_SUN8I=y
CONFIG_TI_CPSW=y
CONFIG_XILINX_EMACLITE=y
CONFIG_AT803X_PHY=y
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index 504e02238031..5cd5dd70bc83 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -40,7 +40,6 @@ CONFIG_ATA=y
CONFIG_AHCI_SUNXI=y
CONFIG_NETDEVICES=y
CONFIG_SUN4I_EMAC=y
-CONFIG_DWMAC_SUN8I=y
# CONFIG_NET_VENDOR_ARC is not set
# CONFIG_NET_CADENCE is not set
# CONFIG_NET_VENDOR_BROADCOM is not set
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 32e1a9513dc7..4e80bf7420d4 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -315,7 +315,7 @@ static void __init cacheid_init(void)
if (arch >= CPU_ARCH_ARMv6) {
unsigned int cachetype = read_cpuid_cachetype();
- if ((arch == CPU_ARCH_ARMv7M) && !cachetype) {
+ if ((arch == CPU_ARCH_ARMv7M) && !(cachetype & 0xf000f)) {
cacheid = 0;
} else if ((cachetype & (7 << 29)) == 4 << 29) {
/* ARMv7 register format */
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
index 0d1f026d831a..6872135d7f84 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
@@ -67,14 +67,6 @@
};
};
-&emac {
- pinctrl-names = "default";
- pinctrl-0 = <&rgmii_pins>;
- phy-mode = "rgmii";
- phy-handle = <&ext_rgmii_phy>;
- status = "okay";
-};
-
&i2c1 {
pinctrl-names = "default";
pinctrl-0 = <&i2c1_pins>;
@@ -85,13 +77,6 @@
bias-pull-up;
};
-&mdio {
- ext_rgmii_phy: ethernet-phy@1 {
- compatible = "ethernet-phy-ieee802.3-c22";
- reg = <1>;
- };
-};
-
&mmc0 {
pinctrl-names = "default";
pinctrl-0 = <&mmc0_pins>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts
index 24f1aac366d6..790d14daaa6a 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts
@@ -46,20 +46,5 @@
model = "Pine64+";
compatible = "pine64,pine64-plus", "allwinner,sun50i-a64";
- /* TODO: Camera, touchscreen, etc. */
-};
-
-&emac {
- pinctrl-names = "default";
- pinctrl-0 = <&rgmii_pins>;
- phy-mode = "rgmii";
- phy-handle = <&ext_rgmii_phy>;
- status = "okay";
-};
-
-&mdio {
- ext_rgmii_phy: ethernet-phy@1 {
- compatible = "ethernet-phy-ieee802.3-c22";
- reg = <1>;
- };
+ /* TODO: Camera, Ethernet PHY, touchscreen, etc. */
};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
index 3b491c0e3b0d..c680ed385da3 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
@@ -70,15 +70,6 @@
status = "okay";
};
-&emac {
- pinctrl-names = "default";
- pinctrl-0 = <&rmii_pins>;
- phy-mode = "rmii";
- phy-handle = <&ext_rmii_phy1>;
- status = "okay";
-
-};
-
&i2c1 {
pinctrl-names = "default";
pinctrl-0 = <&i2c1_pins>;
@@ -89,13 +80,6 @@
bias-pull-up;
};
-&mdio {
- ext_rmii_phy1: ethernet-phy@1 {
- compatible = "ethernet-phy-ieee802.3-c22";
- reg = <1>;
- };
-};
-
&mmc0 {
pinctrl-names = "default";
pinctrl-0 = <&mmc0_pins>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
index 769ced01a998..166c9ef884dc 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
@@ -129,12 +129,6 @@
#size-cells = <1>;
ranges;
- syscon: syscon@1c00000 {
- compatible = "allwinner,sun50i-a64-system-controller",
- "syscon";
- reg = <0x01c00000 0x1000>;
- };
-
mmc0: mmc@1c0f000 {
compatible = "allwinner,sun50i-a64-mmc";
reg = <0x01c0f000 0x1000>;
@@ -287,21 +281,6 @@
bias-pull-up;
};
- rmii_pins: rmii_pins {
- pins = "PD10", "PD11", "PD13", "PD14", "PD17",
- "PD18", "PD19", "PD20", "PD22", "PD23";
- function = "emac";
- drive-strength = <40>;
- };
-
- rgmii_pins: rgmii_pins {
- pins = "PD8", "PD9", "PD10", "PD11", "PD12",
- "PD13", "PD15", "PD16", "PD17", "PD18",
- "PD19", "PD20", "PD21", "PD22", "PD23";
- function = "emac";
- drive-strength = <40>;
- };
-
uart0_pins_a: uart0@0 {
pins = "PB8", "PB9";
function = "uart0";
@@ -406,26 +385,6 @@
#size-cells = <0>;
};
- emac: ethernet@1c30000 {
- compatible = "allwinner,sun50i-a64-emac";
- syscon = <&syscon>;
- reg = <0x01c30000 0x100>;
- interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "macirq";
- resets = <&ccu RST_BUS_EMAC>;
- reset-names = "stmmaceth";
- clocks = <&ccu CLK_BUS_EMAC>;
- clock-names = "stmmaceth";
- status = "disabled";
- #address-cells = <1>;
- #size-cells = <0>;
-
- mdio: mdio {
- #address-cells = <1>;
- #size-cells = <0>;
- };
- };
-
gic: interrupt-controller@1c81000 {
compatible = "arm,gic-400";
reg = <0x01c81000 0x1000>,
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index d789858c4f1b..97c123e09e45 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -191,7 +191,6 @@ CONFIG_RAVB=y
CONFIG_SMC91X=y
CONFIG_SMSC911X=y
CONFIG_STMMAC_ETH=m
-CONFIG_DWMAC_SUN8I=m
CONFIG_MDIO_BUS_MUX_MMIOREG=y
CONFIG_MESON_GXL_PHY=m
CONFIG_MICREL_PHY=y
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 41b6e31f8f55..d0cb007fa482 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -221,10 +221,11 @@ void update_vsyscall(struct timekeeper *tk)
/* tkr_mono.cycle_last == tkr_raw.cycle_last */
vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
vdso_data->raw_time_sec = tk->raw_time.tv_sec;
- vdso_data->raw_time_nsec = tk->raw_time.tv_nsec;
+ vdso_data->raw_time_nsec = (tk->raw_time.tv_nsec <<
+ tk->tkr_raw.shift) +
+ tk->tkr_raw.xtime_nsec;
vdso_data->xtime_clock_sec = tk->xtime_sec;
vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
- /* tkr_raw.xtime_nsec == 0 */
vdso_data->cs_mono_mult = tk->tkr_mono.mult;
vdso_data->cs_raw_mult = tk->tkr_raw.mult;
/* tkr_mono.shift == tkr_raw.shift */
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index e00b4671bd7c..76320e920965 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -256,7 +256,6 @@ monotonic_raw:
seqcnt_check fail=monotonic_raw
/* All computations are done with left-shifted nsecs. */
- lsl x14, x14, x12
get_nsec_per_sec res=x9
lsl x9, x9, x12
diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h
index 85d4af97c986..dbdbb8a558df 100644
--- a/arch/blackfin/include/asm/processor.h
+++ b/arch/blackfin/include/asm/processor.h
@@ -75,11 +75,6 @@ static inline void release_thread(struct task_struct *dead_task)
{
}
-/*
- * Return saved PC of a blocked thread.
- */
-#define thread_saved_pc(tsk) (tsk->thread.pc)
-
unsigned long get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) \
diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h
index b9eb3da7f278..7c87b5be53b5 100644
--- a/arch/c6x/include/asm/processor.h
+++ b/arch/c6x/include/asm/processor.h
@@ -96,11 +96,6 @@ static inline void release_thread(struct task_struct *dead_task)
#define release_segments(mm) do { } while (0)
/*
- * saved PC of a blocked thread.
- */
-#define thread_saved_pc(tsk) (task_pt_regs(tsk)->pc)
-
-/*
* saved kernel SP and DP of a blocked thread.
*/
#ifdef _BIG_ENDIAN
diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c
index e299d30105b5..a2cdb1521aca 100644
--- a/arch/cris/arch-v10/kernel/process.c
+++ b/arch/cris/arch-v10/kernel/process.c
@@ -69,14 +69,6 @@ void hard_reset_now (void)
while(1) /* waiting for RETRIBUTION! */ ;
}
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *t)
-{
- return task_pt_regs(t)->irp;
-}
-
/* setup the child's kernel stack with a pt_regs and switch_stack on it.
* it will be un-nested during _resume and _ret_from_sys_call when the
* new thread is scheduled.
diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c
index c530a8fa87ce..fe87b383fbf3 100644
--- a/arch/cris/arch-v32/kernel/process.c
+++ b/arch/cris/arch-v32/kernel/process.c
@@ -85,14 +85,6 @@ hard_reset_now(void)
}
/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *t)
-{
- return task_pt_regs(t)->erp;
-}
-
-/*
* Setup the child's kernel stack with a pt_regs and call switch_stack() on it.
* It will be unnested during _resume and _ret_from_sys_call when the new thread
* is scheduled.
diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h
index 15b815df29c1..bc2729e4b2c9 100644
--- a/arch/cris/include/asm/processor.h
+++ b/arch/cris/include/asm/processor.h
@@ -52,8 +52,6 @@ unsigned long get_wchan(struct task_struct *p);
#define KSTK_ESP(tsk) ((tsk) == current ? rdusp() : (tsk)->thread.usp)
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
/* Free all resources held by a thread. */
static inline void release_thread(struct task_struct *dead_task)
{
diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h
index ddaeb9cc9143..e4d08d74ed9f 100644
--- a/arch/frv/include/asm/processor.h
+++ b/arch/frv/include/asm/processor.h
@@ -96,11 +96,6 @@ extern asmlinkage void *restore_user_regs(const struct user_context *target, ...
#define release_segments(mm) do { } while (0)
#define forget_segments() do { } while (0)
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
unsigned long get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) ((tsk)->thread.frame0->pc)
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index 5a4c92abc99e..a957b374e3a6 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -198,15 +198,6 @@ unsigned long get_wchan(struct task_struct *p)
return 0;
}
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- /* Check whether the thread is blocked in resume() */
- if (in_sched_functions(tsk->thread.pc))
- return ((unsigned long *)tsk->thread.fp)[2];
- else
- return tsk->thread.pc;
-}
-
int elf_check_arch(const struct elf32_hdr *hdr)
{
unsigned long hsr0 = __get_HSR(0);
diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h
index 65132d7ae9e5..afa53147e66a 100644
--- a/arch/h8300/include/asm/processor.h
+++ b/arch/h8300/include/asm/processor.h
@@ -110,10 +110,6 @@ static inline void release_thread(struct task_struct *dead_task)
{
}
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk);
unsigned long get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) \
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 0f5db5bb561b..d1ddcabbbe83 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -129,11 +129,6 @@ int copy_thread(unsigned long clone_flags,
return 0;
}
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- return ((struct pt_regs *)tsk->thread.esp0)->pc;
-}
-
unsigned long get_wchan(struct task_struct *p)
{
unsigned long fp, pc;
diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h
index 45a825402f63..ce67940860a5 100644
--- a/arch/hexagon/include/asm/processor.h
+++ b/arch/hexagon/include/asm/processor.h
@@ -33,9 +33,6 @@
/* task_struct, defined elsewhere, is the "process descriptor" */
struct task_struct;
-/* this is defined in arch/process.c */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
extern void start_thread(struct pt_regs *, unsigned long, unsigned long);
/*
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index de715bab7956..656050c2e6a0 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -61,14 +61,6 @@ void arch_cpu_idle(void)
}
/*
- * Return saved PC of a blocked thread
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- return 0;
-}
-
-/*
* Copy architecture-specific thread state
*/
int copy_thread(unsigned long clone_flags, unsigned long usp,
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index 26a63d69c599..ab982f07ea68 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -602,23 +602,6 @@ ia64_set_unat (__u64 *unat, void *spill_addr, unsigned long nat)
}
/*
- * Return saved PC of a blocked thread.
- * Note that the only way T can block is through a call to schedule() -> switch_to().
- */
-static inline unsigned long
-thread_saved_pc (struct task_struct *t)
-{
- struct unw_frame_info info;
- unsigned long ip;
-
- unw_init_from_blocked_task(&info, t);
- if (unw_unwind(&info) < 0)
- return 0;
- unw_get_ip(&info, &ip);
- return ip;
-}
-
-/*
* Get the current instruction/program counter value.
*/
#define current_text_addr() \
diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h
index 5767367550c6..657874eeeccc 100644
--- a/arch/m32r/include/asm/processor.h
+++ b/arch/m32r/include/asm/processor.h
@@ -122,8 +122,6 @@ extern void release_thread(struct task_struct *);
extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
extern void release_segments(struct mm_struct * mm);
-extern unsigned long thread_saved_pc(struct task_struct *);
-
/* Copy and release all segment info associated with a VM */
#define copy_segments(p, mm) do { } while (0)
#define release_segments(mm) do { } while (0)
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index d8ffcfec599c..8cd7e03f4370 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -39,14 +39,6 @@
#include <linux/err.h>
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- return tsk->thread.lr;
-}
-
void (*pm_power_off)(void) = NULL;
EXPORT_SYMBOL(pm_power_off);
diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h
index 77239e81379b..94c36030440c 100644
--- a/arch/m68k/include/asm/processor.h
+++ b/arch/m68k/include/asm/processor.h
@@ -130,8 +130,6 @@ static inline void release_thread(struct task_struct *dead_task)
{
}
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
unsigned long get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) \
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index e475c945c8b2..7df92f8b0781 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -40,20 +40,6 @@
asmlinkage void ret_from_fork(void);
asmlinkage void ret_from_kernel_thread(void);
-
-/*
- * Return saved PC from a blocked thread
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- struct switch_stack *sw = (struct switch_stack *)tsk->thread.ksp;
- /* Check whether the thread is blocked in resume() */
- if (in_sched_functions(sw->retpc))
- return ((unsigned long *)sw->a6)[1];
- else
- return sw->retpc;
-}
-
void arch_cpu_idle(void)
{
#if defined(MACH_ATARI_ONLY)
diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h
index 37ef196e4519..330d556860ba 100644
--- a/arch/microblaze/include/asm/processor.h
+++ b/arch/microblaze/include/asm/processor.h
@@ -69,8 +69,6 @@ static inline void release_thread(struct task_struct *dead_task)
{
}
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
extern unsigned long get_wchan(struct task_struct *p);
# define KSTK_EIP(tsk) (0)
@@ -121,10 +119,6 @@ static inline void release_thread(struct task_struct *dead_task)
{
}
-/* Return saved (kernel) PC of a blocked thread. */
-# define thread_saved_pc(tsk) \
- ((tsk)->thread.regs ? (tsk)->thread.regs->r15 : 0)
-
unsigned long get_wchan(struct task_struct *p);
/* The size allocated for kernel stacks. This _must_ be a power of two! */
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index e92a817e645f..6527ec22f158 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -119,23 +119,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
return 0;
}
-#ifndef CONFIG_MMU
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- struct cpu_context *ctx =
- &(((struct thread_info *)(tsk->stack))->cpu_context);
-
- /* Check whether the thread is blocked in resume() */
- if (in_sched_functions(ctx->r15))
- return (unsigned long)ctx->r15;
- else
- return ctx->r14;
-}
-#endif
-
unsigned long get_wchan(struct task_struct *p)
{
/* TBD (used by procfs) */
diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
index 7c6336dd2638..7cd92166a0b9 100644
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c
@@ -166,7 +166,11 @@ static int _kvm_mips_host_tlb_inv(unsigned long entryhi)
int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va,
bool user, bool kernel)
{
- int idx_user, idx_kernel;
+ /*
+ * Initialize idx_user and idx_kernel to workaround bogus
+ * maybe-initialized warning when using GCC 6.
+ */
+ int idx_user = 0, idx_kernel = 0;
unsigned long flags, old_entryhi;
local_irq_save(flags);
diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h
index 18e17abf7664..3ae479117b42 100644
--- a/arch/mn10300/include/asm/processor.h
+++ b/arch/mn10300/include/asm/processor.h
@@ -132,11 +132,6 @@ static inline void start_thread(struct pt_regs *regs,
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
unsigned long get_wchan(struct task_struct *p);
#define task_pt_regs(task) ((task)->thread.uregs)
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index c9fa42619c6a..89e8027e07fb 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -40,14 +40,6 @@
#include "internal.h"
/*
- * return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- return ((unsigned long *) tsk->thread.sp)[3];
-}
-
-/*
* power off function, if any
*/
void (*pm_power_off)(void);
diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h
index 3bbbc3d798e5..4944e2e1d8b0 100644
--- a/arch/nios2/include/asm/processor.h
+++ b/arch/nios2/include/asm/processor.h
@@ -75,9 +75,6 @@ static inline void release_thread(struct task_struct *dead_task)
{
}
-/* Return saved PC of a blocked thread. */
-#define thread_saved_pc(tsk) ((tsk)->thread.kregs->ea)
-
extern unsigned long get_wchan(struct task_struct *p);
#define task_pt_regs(p) \
diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h
index a908e6c30a00..396d8f306c21 100644
--- a/arch/openrisc/include/asm/processor.h
+++ b/arch/openrisc/include/asm/processor.h
@@ -84,11 +84,6 @@ void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp);
void release_thread(struct task_struct *);
unsigned long get_wchan(struct task_struct *p);
-/*
- * Return saved PC of a blocked thread. For now, this is the "user" PC
- */
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
#define init_stack (init_thread_union.stack)
#define cpu_relax() barrier()
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 106859ae27ff..f9b77003f113 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -110,11 +110,6 @@ void show_regs(struct pt_regs *regs)
show_registers(regs);
}
-unsigned long thread_saved_pc(struct task_struct *t)
-{
- return (unsigned long)user_regs(t->stack)->pc;
-}
-
void release_thread(struct task_struct *dead_task)
{
}
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index a3661ee6b060..4c6694b4e77e 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -163,12 +163,7 @@ struct thread_struct {
.flags = 0 \
}
-/*
- * Return saved PC of a blocked thread. This is used by ps mostly.
- */
-
struct task_struct;
-unsigned long thread_saved_pc(struct task_struct *t);
void show_trace(struct task_struct *task, unsigned long *stack);
/*
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 4516a5b53f38..b64d7d21646e 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -239,11 +239,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
return 0;
}
-unsigned long thread_saved_pc(struct task_struct *t)
-{
- return t->thread.regs.kpc;
-}
-
unsigned long
get_wchan(struct task_struct *p)
{
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index a83821f33ea3..8814a7249ceb 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -103,6 +103,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
extern int kprobe_handler(struct pt_regs *regs);
extern int kprobe_post_handler(struct pt_regs *regs);
+extern int is_current_kprobe_addr(unsigned long addr);
#ifdef CONFIG_KPROBES_ON_FTRACE
extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb);
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index bb99b651085a..1189d04f3bd1 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -378,12 +378,6 @@ struct thread_struct {
}
#endif
-/*
- * Return saved PC of a blocked thread. For now, this is the "user" PC
- */
-#define thread_saved_pc(tsk) \
- ((tsk)->thread.regs? (tsk)->thread.regs->nip: 0)
-
#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.regs)
unsigned long get_wchan(struct task_struct *p);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ae418b85c17c..b886795060fd 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1411,10 +1411,8 @@ USE_TEXT_SECTION()
.balign IFETCH_ALIGN_BYTES
do_hash_page:
#ifdef CONFIG_PPC_STD_MMU_64
- andis. r0,r4,0xa410 /* weird error? */
+ andis. r0,r4,0xa450 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
- andis. r0,r4,DSISR_DABRMATCH@h
- bne- handle_dabr_fault
CURRENT_THREAD_INFO(r11, r1)
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
@@ -1438,11 +1436,16 @@ do_hash_page:
/* Error */
blt- 13f
+
+ /* Reload DSISR into r4 for the DABR check below */
+ ld r4,_DSISR(r1)
#endif /* CONFIG_PPC_STD_MMU_64 */
/* Here we have a page fault that hash_page can't handle. */
handle_page_fault:
-11: ld r4,_DAR(r1)
+11: andis. r0,r4,DSISR_DABRMATCH@h
+ bne- handle_dabr_fault
+ ld r4,_DAR(r1)
ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_page_fault
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index fc4343514bed..01addfb0ed0a 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -43,6 +43,12 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
+int is_current_kprobe_addr(unsigned long addr)
+{
+ struct kprobe *p = kprobe_running();
+ return (p && (unsigned long)p->addr == addr) ? 1 : 0;
+}
+
bool arch_within_kprobe_blacklist(unsigned long addr)
{
return (addr >= (unsigned long)__kprobes_text_start &&
@@ -617,6 +623,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
#endif
+ /*
+ * jprobes use jprobe_return() which skips the normal return
+ * path of the function, and this messes up the accounting of the
+ * function graph tracer.
+ *
+ * Pause function graph tracing while performing the jprobe function.
+ */
+ pause_graph_tracing();
+
return 1;
}
NOKPROBE_SYMBOL(setjmp_pre_handler);
@@ -642,6 +657,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
* saved regs...
*/
memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+ /* It's OK to start function graph tracing again */
+ unpause_graph_tracing();
preempt_enable_no_resched();
return 1;
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index a8c1f99e9607..4640f6d64f8b 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -616,6 +616,24 @@ void __init exc_lvl_early_init(void)
#endif
/*
+ * Emergency stacks are used for a range of things, from asynchronous
+ * NMIs (system reset, machine check) to synchronous, process context.
+ * We set preempt_count to zero, even though that isn't necessarily correct. To
+ * get the right value we'd need to copy it from the previous thread_info, but
+ * doing that might fault causing more problems.
+ * TODO: what to do with accounting?
+ */
+static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
+{
+ ti->task = NULL;
+ ti->cpu = cpu;
+ ti->preempt_count = 0;
+ ti->local_flags = 0;
+ ti->flags = 0;
+ klp_init_thread_info(ti);
+}
+
+/*
* Stack space used when we detect a bad kernel stack pointer, and
* early in SMP boots before relocation is enabled. Exclusive emergency
* stack for machine checks.
@@ -633,24 +651,31 @@ void __init emergency_stack_init(void)
* Since we use these as temporary stacks during secondary CPU
* bringup, we need to get at them in real mode. This means they
* must also be within the RMO region.
+ *
+ * The IRQ stacks allocated elsewhere in this file are zeroed and
+ * initialized in kernel/irq.c. These are initialized here in order
+ * to have emergency stacks available as early as possible.
*/
limit = min(safe_stack_limit(), ppc64_rma_size);
for_each_possible_cpu(i) {
struct thread_info *ti;
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
- klp_init_thread_info(ti);
+ memset(ti, 0, THREAD_SIZE);
+ emerg_stack_init_thread_info(ti, i);
paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
#ifdef CONFIG_PPC_BOOK3S_64
/* emergency stack for NMI exception handling. */
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
- klp_init_thread_info(ti);
+ memset(ti, 0, THREAD_SIZE);
+ emerg_stack_init_thread_info(ti, i);
paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
/* emergency stack for machine check exception handling. */
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
- klp_init_thread_info(ti);
+ memset(ti, 0, THREAD_SIZE);
+ emerg_stack_init_thread_info(ti, i);
paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
#endif
}
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index 7c933a99f5d5..c98e90b4ea7b 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -45,10 +45,14 @@ _GLOBAL(ftrace_caller)
stdu r1,-SWITCH_FRAME_SIZE(r1)
/* Save all gprs to pt_regs */
- SAVE_8GPRS(0,r1)
- SAVE_8GPRS(8,r1)
- SAVE_8GPRS(16,r1)
- SAVE_8GPRS(24,r1)
+ SAVE_GPR(0, r1)
+ SAVE_10GPRS(2, r1)
+ SAVE_10GPRS(12, r1)
+ SAVE_10GPRS(22, r1)
+
+ /* Save previous stack pointer (r1) */
+ addi r8, r1, SWITCH_FRAME_SIZE
+ std r8, GPR1(r1)
/* Load special regs for save below */
mfmsr r8
@@ -95,18 +99,44 @@ ftrace_call:
bl ftrace_stub
nop
- /* Load ctr with the possibly modified NIP */
- ld r3, _NIP(r1)
- mtctr r3
+ /* Load the possibly modified NIP */
+ ld r15, _NIP(r1)
+
#ifdef CONFIG_LIVEPATCH
- cmpd r14,r3 /* has NIP been altered? */
+ cmpd r14, r15 /* has NIP been altered? */
+#endif
+
+#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE)
+ /* NIP has not been altered, skip over further checks */
+ beq 1f
+
+ /* Check if there is an active kprobe on us */
+ subi r3, r14, 4
+ bl is_current_kprobe_addr
+ nop
+
+ /*
+ * If r3 == 1, then this is a kprobe/jprobe.
+ * else, this is livepatched function.
+ *
+ * The conditional branch for livepatch_handler below will use the
+ * result of this comparison. For kprobe/jprobe, we just need to branch to
+ * the new NIP, not call livepatch_handler. The branch below is bne, so we
+ * want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want
+ * CR0[EQ] = (r3 == 1).
+ */
+ cmpdi r3, 1
+1:
#endif
+ /* Load CTR with the possibly modified NIP */
+ mtctr r15
+
/* Restore gprs */
- REST_8GPRS(0,r1)
- REST_8GPRS(8,r1)
- REST_8GPRS(16,r1)
- REST_8GPRS(24,r1)
+ REST_GPR(0,r1)
+ REST_10GPRS(2,r1)
+ REST_10GPRS(12,r1)
+ REST_10GPRS(22,r1)
/* Restore possibly modified LR */
ld r0, _LINK(r1)
@@ -119,7 +149,10 @@ ftrace_call:
addi r1, r1, SWITCH_FRAME_SIZE
#ifdef CONFIG_LIVEPATCH
- /* Based on the cmpd above, if the NIP was altered handle livepatch */
+ /*
+ * Based on the cmpd or cmpdi above, if the NIP was altered and we're
+ * not on a kprobe/jprobe, then handle livepatch.
+ */
bne- livepatch_handler
#endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 42b7a4fd57d9..8d1a365b8edc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1486,6 +1486,14 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
break;
case KVM_REG_PPC_TB_OFFSET:
+ /*
+ * POWER9 DD1 has an erratum where writing TBU40 causes
+ * the timebase to lose ticks. So we don't let the
+ * timebase offset be changed on P9 DD1. (It is
+ * initialized to zero.)
+ */
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ break;
/* round up to multiple of 2^24 */
vcpu->arch.vcore->tb_offset =
ALIGN(set_reg_val(id, *val), 1UL << 24);
@@ -2907,12 +2915,36 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
int r;
int srcu_idx;
+ unsigned long ebb_regs[3] = {}; /* shut up GCC */
+ unsigned long user_tar = 0;
+ unsigned int user_vrsave;
if (!vcpu->arch.sane) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return -EINVAL;
}
+ /*
+ * Don't allow entry with a suspended transaction, because
+ * the guest entry/exit code will lose it.
+ * If the guest has TM enabled, save away their TM-related SPRs
+ * (they will get restored by the TM unavailable interrupt).
+ */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
+ (current->thread.regs->msr & MSR_TM)) {
+ if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ run->fail_entry.hardware_entry_failure_reason = 0;
+ return -EINVAL;
+ }
+ current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+ current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+ current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+ current->thread.regs->msr &= ~MSR_TM;
+ }
+#endif
+
kvmppc_core_prepare_to_enter(vcpu);
/* No need to go into the guest when all we'll do is come back out */
@@ -2934,6 +2966,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
flush_all_to_thread(current);
+ /* Save userspace EBB and other register values */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ ebb_regs[0] = mfspr(SPRN_EBBHR);
+ ebb_regs[1] = mfspr(SPRN_EBBRR);
+ ebb_regs[2] = mfspr(SPRN_BESCR);
+ user_tar = mfspr(SPRN_TAR);
+ }
+ user_vrsave = mfspr(SPRN_VRSAVE);
+
vcpu->arch.wqp = &vcpu->arch.vcore->wq;
vcpu->arch.pgdir = current->mm->pgd;
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
@@ -2960,6 +3001,16 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
} while (is_kvmppc_resume_guest(r));
+ /* Restore userspace EBB and other register values */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ mtspr(SPRN_EBBHR, ebb_regs[0]);
+ mtspr(SPRN_EBBRR, ebb_regs[1]);
+ mtspr(SPRN_BESCR, ebb_regs[2]);
+ mtspr(SPRN_TAR, user_tar);
+ mtspr(SPRN_FSCR, current->thread.fscr);
+ }
+ mtspr(SPRN_VRSAVE, user_vrsave);
+
out:
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
atomic_dec(&vcpu->kvm->arch.vcpus_running);
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 0fdc4a28970b..404deb512844 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -121,10 +121,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
* Put whatever is in the decrementer into the
* hypervisor decrementer.
*/
+BEGIN_FTR_SECTION
+ ld r5, HSTATE_KVM_VCORE(r13)
+ ld r6, VCORE_KVM(r5)
+ ld r9, KVM_HOST_LPCR(r6)
+ andis. r9, r9, LPCR_LD@h
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mfspr r8,SPRN_DEC
mftb r7
- mtspr SPRN_HDEC,r8
+BEGIN_FTR_SECTION
+ /* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
+ bne 32f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r8,r8
+32: mtspr SPRN_HDEC,r8
add r8,r8,r7
std r8,HSTATE_DECEXP(r13)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index bdb3f76ceb6b..4888dd494604 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -32,12 +32,29 @@
#include <asm/opal.h>
#include <asm/xive-regs.h>
+/* Sign-extend HDEC if not on POWER9 */
+#define EXTEND_HDEC(reg) \
+BEGIN_FTR_SECTION; \
+ extsw reg, reg; \
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+
#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
/* Values in HSTATE_NAPPING(r13) */
#define NAPPING_CEDE 1
#define NAPPING_NOVCPU 2
+/* Stack frame offsets for kvmppc_hv_entry */
+#define SFS 144
+#define STACK_SLOT_TRAP (SFS-4)
+#define STACK_SLOT_TID (SFS-16)
+#define STACK_SLOT_PSSCR (SFS-24)
+#define STACK_SLOT_PID (SFS-32)
+#define STACK_SLOT_IAMR (SFS-40)
+#define STACK_SLOT_CIABR (SFS-48)
+#define STACK_SLOT_DAWR (SFS-56)
+#define STACK_SLOT_DAWRX (SFS-64)
+
/*
* Call kvmppc_hv_entry in real mode.
* Must be called with interrupts hard-disabled.
@@ -214,6 +231,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
kvmppc_primary_no_guest:
/* We handle this much like a ceded vcpu */
/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
+ /* HDEC may be larger than DEC for arch >= v3.00, but since the */
+ /* HDEC value came from DEC in the first place, it will fit */
mfspr r3, SPRN_HDEC
mtspr SPRN_DEC, r3
/*
@@ -295,8 +314,9 @@ kvm_novcpu_wakeup:
/* See if our timeslice has expired (HDEC is negative) */
mfspr r0, SPRN_HDEC
+ EXTEND_HDEC(r0)
li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
- cmpwi r0, 0
+ cmpdi r0, 0
blt kvm_novcpu_exit
/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
@@ -319,10 +339,10 @@ kvm_novcpu_exit:
bl kvmhv_accumulate_time
#endif
13: mr r3, r12
- stw r12, 112-4(r1)
+ stw r12, STACK_SLOT_TRAP(r1)
bl kvmhv_commence_exit
nop
- lwz r12, 112-4(r1)
+ lwz r12, STACK_SLOT_TRAP(r1)
b kvmhv_switch_to_host
/*
@@ -390,8 +410,8 @@ kvm_secondary_got_guest:
lbz r4, HSTATE_PTID(r13)
cmpwi r4, 0
bne 63f
- lis r6, 0x7fff
- ori r6, r6, 0xffff
+ LOAD_REG_ADDR(r6, decrementer_max)
+ ld r6, 0(r6)
mtspr SPRN_HDEC, r6
/* and set per-LPAR registers, if doing dynamic micro-threading */
ld r6, HSTATE_SPLIT_MODE(r13)
@@ -545,11 +565,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
* *
*****************************************************************************/
-/* Stack frame offsets */
-#define STACK_SLOT_TID (112-16)
-#define STACK_SLOT_PSSCR (112-24)
-#define STACK_SLOT_PID (112-32)
-
.global kvmppc_hv_entry
kvmppc_hv_entry:
@@ -565,7 +580,7 @@ kvmppc_hv_entry:
*/
mflr r0
std r0, PPC_LR_STKOFF(r1)
- stdu r1, -112(r1)
+ stdu r1, -SFS(r1)
/* Save R1 in the PACA */
std r1, HSTATE_HOST_R1(r13)
@@ -749,10 +764,20 @@ BEGIN_FTR_SECTION
mfspr r5, SPRN_TIDR
mfspr r6, SPRN_PSSCR
mfspr r7, SPRN_PID
+ mfspr r8, SPRN_IAMR
std r5, STACK_SLOT_TID(r1)
std r6, STACK_SLOT_PSSCR(r1)
std r7, STACK_SLOT_PID(r1)
+ std r8, STACK_SLOT_IAMR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+BEGIN_FTR_SECTION
+ mfspr r5, SPRN_CIABR
+ mfspr r6, SPRN_DAWR
+ mfspr r7, SPRN_DAWRX
+ std r5, STACK_SLOT_CIABR(r1)
+ std r6, STACK_SLOT_DAWR(r1)
+ std r7, STACK_SLOT_DAWRX(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
/* Set partition DABR */
@@ -968,7 +993,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
/* Check if HDEC expires soon */
mfspr r3, SPRN_HDEC
- cmpwi r3, 512 /* 1 microsecond */
+ EXTEND_HDEC(r3)
+ cmpdi r3, 512 /* 1 microsecond */
blt hdec_soon
#ifdef CONFIG_KVM_XICS
@@ -1505,11 +1531,10 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
* set by the guest could disrupt the host.
*/
li r0, 0
- mtspr SPRN_IAMR, r0
- mtspr SPRN_CIABR, r0
- mtspr SPRN_DAWRX, r0
+ mtspr SPRN_PSPB, r0
mtspr SPRN_WORT, r0
BEGIN_FTR_SECTION
+ mtspr SPRN_IAMR, r0
mtspr SPRN_TCSCR, r0
/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
li r0, 1
@@ -1525,6 +1550,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
std r6,VCPU_UAMOR(r9)
li r6,0
mtspr SPRN_AMR,r6
+ mtspr SPRN_UAMOR, r6
/* Switch DSCR back to host value */
mfspr r8, SPRN_DSCR
@@ -1670,12 +1696,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/* Restore host values of some registers */
BEGIN_FTR_SECTION
+ ld r5, STACK_SLOT_CIABR(r1)
+ ld r6, STACK_SLOT_DAWR(r1)
+ ld r7, STACK_SLOT_DAWRX(r1)
+ mtspr SPRN_CIABR, r5
+ mtspr SPRN_DAWR, r6
+ mtspr SPRN_DAWRX, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+BEGIN_FTR_SECTION
ld r5, STACK_SLOT_TID(r1)
ld r6, STACK_SLOT_PSSCR(r1)
ld r7, STACK_SLOT_PID(r1)
+ ld r8, STACK_SLOT_IAMR(r1)
mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6
mtspr SPRN_PID, r7
+ mtspr SPRN_IAMR, r8
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
@@ -1819,8 +1855,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
li r0, KVM_GUEST_MODE_NONE
stb r0, HSTATE_IN_GUEST(r13)
- ld r0, 112+PPC_LR_STKOFF(r1)
- addi r1, r1, 112
+ ld r0, SFS+PPC_LR_STKOFF(r1)
+ addi r1, r1, SFS
mtlr r0
blr
@@ -2366,12 +2402,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
mfspr r3, SPRN_DEC
mfspr r4, SPRN_HDEC
mftb r5
- cmpw r3, r4
+ extsw r3, r3
+ EXTEND_HDEC(r4)
+ cmpd r3, r4
ble 67f
mtspr SPRN_DEC, r4
67:
/* save expiry time of guest decrementer */
- extsw r3, r3
add r3, r3, r5
ld r4, HSTATE_KVM_VCPU(r13)
ld r5, HSTATE_KVM_VCORE(r13)
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index cbd82fde5770..09ceea6175ba 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -101,5 +101,6 @@ void perf_get_regs_user(struct perf_regs *regs_user,
struct pt_regs *regs_user_copy)
{
regs_user->regs = task_pt_regs(current);
- regs_user->abi = perf_reg_abi(current);
+ regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) :
+ PERF_SAMPLE_REGS_ABI_NONE;
}
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index e6f444b46207..b5d960d6db3d 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -449,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
return mmio_atsd_reg;
}
-static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
+static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
{
unsigned long launch;
@@ -465,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
/* PID */
launch |= pid << PPC_BITLSHIFT(38);
+ /* No flush */
+ launch |= !flush << PPC_BITLSHIFT(39);
+
/* Invalidating the entire process doesn't use a va */
return mmio_launch_invalidate(npu, launch, 0);
}
static int mmio_invalidate_va(struct npu *npu, unsigned long va,
- unsigned long pid)
+ unsigned long pid, bool flush)
{
unsigned long launch;
@@ -486,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
/* PID */
launch |= pid << PPC_BITLSHIFT(38);
+ /* No flush */
+ launch |= !flush << PPC_BITLSHIFT(39);
+
return mmio_launch_invalidate(npu, launch, va);
}
#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
+struct mmio_atsd_reg {
+ struct npu *npu;
+ int reg;
+};
+
+static void mmio_invalidate_wait(
+ struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
+{
+ struct npu *npu;
+ int i, reg;
+
+ /* Wait for all invalidations to complete */
+ for (i = 0; i <= max_npu2_index; i++) {
+ if (mmio_atsd_reg[i].reg < 0)
+ continue;
+
+ /* Wait for completion */
+ npu = mmio_atsd_reg[i].npu;
+ reg = mmio_atsd_reg[i].reg;
+ while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
+ cpu_relax();
+
+ put_mmio_atsd_reg(npu, reg);
+
+ /*
+ * The GPU requires two flush ATSDs to ensure all entries have
+ * been flushed. We use PID 0 as it will never be used for a
+ * process on the GPU.
+ */
+ if (flush)
+ mmio_invalidate_pid(npu, 0, true);
+ }
+}
+
/*
* Invalidate either a single address or an entire PID depending on
* the value of va.
*/
static void mmio_invalidate(struct npu_context *npu_context, int va,
- unsigned long address)
+ unsigned long address, bool flush)
{
- int i, j, reg;
+ int i, j;
struct npu *npu;
struct pnv_phb *nphb;
struct pci_dev *npdev;
- struct {
- struct npu *npu;
- int reg;
- } mmio_atsd_reg[NV_MAX_NPUS];
+ struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
unsigned long pid = npu_context->mm->context.id;
/*
@@ -525,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
if (va)
mmio_atsd_reg[i].reg =
- mmio_invalidate_va(npu, address, pid);
+ mmio_invalidate_va(npu, address, pid,
+ flush);
else
mmio_atsd_reg[i].reg =
- mmio_invalidate_pid(npu, pid);
+ mmio_invalidate_pid(npu, pid, flush);
/*
* The NPU hardware forwards the shootdown to all GPUs
@@ -544,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
*/
flush_tlb_mm(npu_context->mm);
- /* Wait for all invalidations to complete */
- for (i = 0; i <= max_npu2_index; i++) {
- if (mmio_atsd_reg[i].reg < 0)
- continue;
-
- /* Wait for completion */
- npu = mmio_atsd_reg[i].npu;
- reg = mmio_atsd_reg[i].reg;
- while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
- cpu_relax();
- put_mmio_atsd_reg(npu, reg);
- }
+ mmio_invalidate_wait(mmio_atsd_reg, flush);
+ if (flush)
+ /* Wait for the flush to complete */
+ mmio_invalidate_wait(mmio_atsd_reg, false);
}
static void pnv_npu2_mn_release(struct mmu_notifier *mn,
@@ -571,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
* There should be no more translation requests for this PID, but we
* need to ensure any entries for it are removed from the TLB.
*/
- mmio_invalidate(npu_context, 0, 0);
+ mmio_invalidate(npu_context, 0, 0, true);
}
static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
@@ -581,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
{
struct npu_context *npu_context = mn_to_npu_context(mn);
- mmio_invalidate(npu_context, 1, address);
+ mmio_invalidate(npu_context, 1, address, true);
}
static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
@@ -590,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
{
struct npu_context *npu_context = mn_to_npu_context(mn);
- mmio_invalidate(npu_context, 1, address);
+ mmio_invalidate(npu_context, 1, address, true);
}
static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
@@ -600,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
struct npu_context *npu_context = mn_to_npu_context(mn);
unsigned long address;
- for (address = start; address <= end; address += PAGE_SIZE)
- mmio_invalidate(npu_context, 1, address);
+ for (address = start; address < end; address += PAGE_SIZE)
+ mmio_invalidate(npu_context, 1, address, false);
+
+ /* Do the flush only on the final addess == end */
+ mmio_invalidate(npu_context, 1, address, true);
}
static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -651,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
/* No nvlink associated with this GPU device */
return ERR_PTR(-ENODEV);
- if (!mm) {
- /* kernel thread contexts are not supported */
+ if (!mm || mm->context.id == 0) {
+ /*
+ * Kernel thread contexts are not supported and context id 0 is
+ * reserved on the GPU.
+ */
return ERR_PTR(-EINVAL);
}
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 60d395fdc864..aeac013968f2 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -221,11 +221,6 @@ extern void release_thread(struct task_struct *);
/* Free guarded storage control block for current */
void exit_thread_gs(void);
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
unsigned long get_wchan(struct task_struct *p);
#define task_pt_regs(tsk) ((struct pt_regs *) \
(task_stack_page(tsk) + THREAD_SIZE) - 1)
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index e545ffe5155a..8e622bb52f7a 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -564,8 +564,6 @@ static struct kset *ipl_kset;
static void __ipl_run(void *unused)
{
- if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW)
- diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
diag308(DIAG308_LOAD_CLEAR, NULL);
if (MACHINE_IS_VM)
__cpcmd("IPL", NULL, 0, NULL);
@@ -1088,10 +1086,7 @@ static void __reipl_run(void *unused)
break;
case REIPL_METHOD_CCW_DIAG:
diag308(DIAG308_SET, reipl_block_ccw);
- if (MACHINE_IS_LPAR)
- diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
- else
- diag308(DIAG308_LOAD_CLEAR, NULL);
+ diag308(DIAG308_LOAD_CLEAR, NULL);
break;
case REIPL_METHOD_FCP_RW_DIAG:
diag308(DIAG308_SET, reipl_block_fcp);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 999d7154bbdc..bb32b8618bf6 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -41,31 +41,6 @@
asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
-/*
- * Return saved PC of a blocked thread. used in kernel/sched.
- * resume in entry.S does not create a new stack frame, it
- * just stores the registers %r6-%r15 to the frame given by
- * schedule. We want to return the address of the caller of
- * schedule, so we have to walk the backchain one time to
- * find the frame schedule() store its return address.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- struct stack_frame *sf, *low, *high;
-
- if (!tsk || !task_stack_page(tsk))
- return 0;
- low = task_stack_page(tsk);
- high = (struct stack_frame *) task_pt_regs(tsk);
- sf = (struct stack_frame *) tsk->thread.ksp;
- if (sf <= low || sf > high)
- return 0;
- sf = (struct stack_frame *) sf->back_chain;
- if (sf <= low || sf > high)
- return 0;
- return sf->gprs[8];
-}
-
extern void kernel_thread_starter(void);
/*
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 9da243d94cc3..3b297fa3aa67 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -977,11 +977,12 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
ptr = asce.origin * 4096;
if (asce.r) {
*fake = 1;
+ ptr = 0;
asce.dt = ASCE_TYPE_REGION1;
}
switch (asce.dt) {
case ASCE_TYPE_REGION1:
- if (vaddr.rfx01 > asce.tl && !asce.r)
+ if (vaddr.rfx01 > asce.tl && !*fake)
return PGM_REGION_FIRST_TRANS;
break;
case ASCE_TYPE_REGION2:
@@ -1009,8 +1010,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
union region1_table_entry rfte;
if (*fake) {
- /* offset in 16EB guest memory block */
- ptr = ptr + ((unsigned long) vaddr.rsx << 53UL);
+ ptr += (unsigned long) vaddr.rfx << 53;
rfte.val = ptr;
goto shadow_r2t;
}
@@ -1036,8 +1036,7 @@ shadow_r2t:
union region2_table_entry rste;
if (*fake) {
- /* offset in 8PB guest memory block */
- ptr = ptr + ((unsigned long) vaddr.rtx << 42UL);
+ ptr += (unsigned long) vaddr.rsx << 42;
rste.val = ptr;
goto shadow_r3t;
}
@@ -1064,8 +1063,7 @@ shadow_r3t:
union region3_table_entry rtte;
if (*fake) {
- /* offset in 4TB guest memory block */
- ptr = ptr + ((unsigned long) vaddr.sx << 31UL);
+ ptr += (unsigned long) vaddr.rtx << 31;
rtte.val = ptr;
goto shadow_sgt;
}
@@ -1101,8 +1099,7 @@ shadow_sgt:
union segment_table_entry ste;
if (*fake) {
- /* offset in 2G guest memory block */
- ptr = ptr + ((unsigned long) vaddr.sx << 20UL);
+ ptr += (unsigned long) vaddr.sx << 20;
ste.val = ptr;
goto shadow_pgt;
}
diff --git a/arch/score/include/asm/processor.h b/arch/score/include/asm/processor.h
index d9a922d8711b..299274581968 100644
--- a/arch/score/include/asm/processor.h
+++ b/arch/score/include/asm/processor.h
@@ -13,7 +13,6 @@ struct task_struct;
*/
extern void (*cpu_wait)(void);
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
extern void start_thread(struct pt_regs *regs,
unsigned long pc, unsigned long sp);
extern unsigned long get_wchan(struct task_struct *p);
diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c
index eb64d7a677cb..6e20241a1ed4 100644
--- a/arch/score/kernel/process.c
+++ b/arch/score/kernel/process.c
@@ -101,11 +101,6 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *r)
return 1;
}
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- return task_pt_regs(tsk)->cp0_epc;
-}
-
unsigned long get_wchan(struct task_struct *task)
{
if (!task || task == current || task->state == TASK_RUNNING)
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index dd27159819eb..b395e5620c0b 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -67,9 +67,6 @@ struct thread_struct {
.current_ds = KERNEL_DS, \
}
-/* Return saved PC of a blocked thread. */
-unsigned long thread_saved_pc(struct task_struct *t);
-
/* Do necessary setup to start up a newly executed thread. */
static inline void start_thread(struct pt_regs * regs, unsigned long pc,
unsigned long sp)
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
index b58ee9018433..f04dc5a43062 100644
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h
@@ -89,9 +89,7 @@ struct thread_struct {
#include <linux/types.h>
#include <asm/fpumacro.h>
-/* Return saved PC of a blocked thread. */
struct task_struct;
-unsigned long thread_saved_pc(struct task_struct *);
/* On Uniprocessor, even in RMO processes see TSO semantics */
#ifdef CONFIG_SMP
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index b6dac8e980f0..9245f93398c7 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -177,14 +177,6 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp)
}
/*
- * Note: sparc64 has a pretty intricated thread_saved_pc, check it out.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- return task_thread_info(tsk)->kpc;
-}
-
-/*
* Free current thread data structures etc..
*/
void exit_thread(struct task_struct *tsk)
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 1badc493e62e..b96104da5bd6 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -400,25 +400,6 @@ core_initcall(sparc_sysrq_init);
#endif
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- struct thread_info *ti = task_thread_info(tsk);
- unsigned long ret = 0xdeadbeefUL;
-
- if (ti && ti->ksp) {
- unsigned long *sp;
- sp = (unsigned long *)(ti->ksp + STACK_BIAS);
- if (((unsigned long)sp & (sizeof(long) - 1)) == 0UL &&
- sp[14]) {
- unsigned long *fp;
- fp = (unsigned long *)(sp[14] + STACK_BIAS);
- if (((unsigned long)fp & (sizeof(long) - 1)) == 0UL)
- ret = fp[15];
- }
- }
- return ret;
-}
-
/* Free current thread data structures etc.. */
void exit_thread(struct task_struct *tsk)
{
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 0bc9968b97a1..f71e5206650b 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -214,13 +214,6 @@ static inline void release_thread(struct task_struct *dead_task)
extern void prepare_exit_to_usermode(struct pt_regs *regs, u32 flags);
-
-/*
- * Return saved (kernel) PC of a blocked thread.
- * Only used in a printk() in kernel/sched/core.c, so don't work too hard.
- */
-#define thread_saved_pc(t) ((t)->thread.pc)
-
unsigned long get_wchan(struct task_struct *p);
/* Return initial ksp value for given task. */
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 2d1e0dd5bb0b..f6d1a3f747a9 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -58,8 +58,6 @@ static inline void release_thread(struct task_struct *task)
{
}
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
static inline void mm_copy_segments(struct mm_struct *from_mm,
struct mm_struct *new_mm)
{
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 64a1fd06f3fd..7b5640117325 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -56,12 +56,6 @@ union thread_union cpu0_irqstack
__attribute__((__section__(".data..init_irqstack"))) =
{ INIT_THREAD_INFO(init_task) };
-unsigned long thread_saved_pc(struct task_struct *task)
-{
- /* FIXME: Need to look up userspace_pid by cpu */
- return os_process_pc(userspace_pid[0]);
-}
-
/* Changed in setup_arch, which is called in early boot */
static char host_info[(__NEW_UTS_LEN + 1) * 5];
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a6d91d4e37a1..110ce8238466 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -431,11 +431,11 @@ static __initconst const u64 skl_hw_cache_event_ids
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
- [ C(RESULT_MISS) ] = 0x608, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
+ [ C(RESULT_MISS) ] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
- [ C(RESULT_MISS) ] = 0x649, /* DTLB_STORE_MISSES.WALK_COMPLETED */
+ [ C(RESULT_MISS) ] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 055962615779..722d0e568863 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -296,6 +296,7 @@ struct x86_emulate_ctxt {
bool perm_ok; /* do not check permissions if true */
bool ud; /* inject an #UD if host doesn't support insn */
+ bool tf; /* TF value before instruction (after for syscall/sysret) */
bool have_exception;
struct x86_exception exception;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index fba100713924..d5acc27ed1cc 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -2,8 +2,7 @@
#define _ASM_X86_MSHYPER_H
#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/clocksource.h>
+#include <linux/atomic.h>
#include <asm/hyperv.h>
/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3cada998a402..a28b671f1549 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -860,8 +860,6 @@ extern unsigned long KSTK_ESP(struct task_struct *task);
#endif /* CONFIG_X86_64 */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
unsigned long new_sp);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 0bb88428cbf2..3ca198080ea9 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -545,17 +545,6 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
}
/*
- * Return saved PC of a blocked thread.
- * What is this good for? it will be always the scheduler or ret_from_fork.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- struct inactive_task_frame *frame =
- (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
- return READ_ONCE_NOCHECK(frame->ret_addr);
-}
-
-/*
* Called from fs/proc with a reference on @p to find the function
* which called into schedule(). This needs to be done carefully
* because the task might wake up and we might look at a stack
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 0816ab2e8adc..80890dee66ce 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2742,6 +2742,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
}
+ ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
return X86EMUL_CONTINUE;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 87d3cb901935..0e846f0cb83b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5313,6 +5313,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
ctxt->eflags = kvm_get_rflags(vcpu);
+ ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
+
ctxt->eip = kvm_rip_read(vcpu);
ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
(ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
@@ -5528,36 +5530,25 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
return dr6;
}
-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
+static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
{
struct kvm_run *kvm_run = vcpu->run;
- /*
- * rflags is the old, "raw" value of the flags. The new value has
- * not been saved yet.
- *
- * This is correct even for TF set by the guest, because "the
- * processor will not generate this exception after the instruction
- * that sets the TF flag".
- */
- if (unlikely(rflags & X86_EFLAGS_TF)) {
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
- kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
- DR6_RTM;
- kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
- kvm_run->debug.arch.exception = DB_VECTOR;
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
- *r = EMULATE_USER_EXIT;
- } else {
- /*
- * "Certain debug exceptions may clear bit 0-3. The
- * remaining contents of the DR6 register are never
- * cleared by the processor".
- */
- vcpu->arch.dr6 &= ~15;
- vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
- kvm_queue_exception(vcpu, DB_VECTOR);
- }
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
+ kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+ kvm_run->debug.arch.exception = DB_VECTOR;
+ kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ *r = EMULATE_USER_EXIT;
+ } else {
+ /*
+ * "Certain debug exceptions may clear bit 0-3. The
+ * remaining contents of the DR6 register are never
+ * cleared by the processor".
+ */
+ vcpu->arch.dr6 &= ~15;
+ vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
+ kvm_queue_exception(vcpu, DB_VECTOR);
}
}
@@ -5567,7 +5558,17 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
int r = EMULATE_DONE;
kvm_x86_ops->skip_emulated_instruction(vcpu);
- kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+
+ /*
+ * rflags is the old, "raw" value of the flags. The new value has
+ * not been saved yet.
+ *
+ * This is correct even for TF set by the guest, because "the
+ * processor will not generate this exception after the instruction
+ * that sets the TF flag".
+ */
+ if (unlikely(rflags & X86_EFLAGS_TF))
+ kvm_vcpu_do_singlestep(vcpu, &r);
return r == EMULATE_DONE;
}
EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
@@ -5726,8 +5727,9 @@ restart:
toggle_interruptibility(vcpu, ctxt->interruptibility);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
kvm_rip_write(vcpu, ctxt->eip);
- if (r == EMULATE_DONE)
- kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+ if (r == EMULATE_DONE &&
+ (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+ kvm_vcpu_do_singlestep(vcpu, &r);
if (!ctxt->have_exception ||
exception_type(ctxt->exception.vector) == EXCPT_TRAP)
__kvm_set_rflags(vcpu, ctxt->eflags);
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 003eeee3fbc6..30ee8c608853 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -213,8 +213,6 @@ struct mm_struct;
#define release_segments(mm) do { } while(0)
#define forget_segments() do { } while (0)
-#define thread_saved_pc(tsk) (task_pt_regs(tsk)->pc)
-
extern unsigned long get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc)
diff --git a/block/bio.c b/block/bio.c
index 888e7801c638..26b0810fb8ea 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -240,20 +240,21 @@ fallback:
return bvl;
}
-static void __bio_free(struct bio *bio)
+void bio_uninit(struct bio *bio)
{
bio_disassociate_task(bio);
if (bio_integrity(bio))
bio_integrity_free(bio);
}
+EXPORT_SYMBOL(bio_uninit);
static void bio_free(struct bio *bio)
{
struct bio_set *bs = bio->bi_pool;
void *p;
- __bio_free(bio);
+ bio_uninit(bio);
if (bs) {
bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
@@ -271,6 +272,11 @@ static void bio_free(struct bio *bio)
}
}
+/*
+ * Users of this function have their own bio allocation. Subsequently,
+ * they must remember to pair any call to bio_init() with bio_uninit()
+ * when IO has completed, or when the bio is released.
+ */
void bio_init(struct bio *bio, struct bio_vec *table,
unsigned short max_vecs)
{
@@ -297,7 +303,7 @@ void bio_reset(struct bio *bio)
{
unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
- __bio_free(bio);
+ bio_uninit(bio);
memset(bio, 0, BIO_RESET_BYTES);
bio->bi_flags = flags;
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 1f5b692526ae..0ded5e846335 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -68,6 +68,45 @@ static void blk_mq_sched_assign_ioc(struct request_queue *q,
__blk_mq_sched_assign_ioc(q, rq, bio, ioc);
}
+/*
+ * Mark a hardware queue as needing a restart. For shared queues, maintain
+ * a count of how many hardware queues are marked for restart.
+ */
+static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
+{
+ if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+ return;
+
+ if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
+ struct request_queue *q = hctx->queue;
+
+ if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+ atomic_inc(&q->shared_hctx_restart);
+ } else
+ set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+}
+
+static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
+{
+ if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+ return false;
+
+ if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
+ struct request_queue *q = hctx->queue;
+
+ if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+ atomic_dec(&q->shared_hctx_restart);
+ } else
+ clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+
+ if (blk_mq_hctx_has_pending(hctx)) {
+ blk_mq_run_hw_queue(hctx, true);
+ return true;
+ }
+
+ return false;
+}
+
struct request *blk_mq_sched_get_request(struct request_queue *q,
struct bio *bio,
unsigned int op,
@@ -266,18 +305,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
return true;
}
-static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
-{
- if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
- clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
- if (blk_mq_hctx_has_pending(hctx)) {
- blk_mq_run_hw_queue(hctx, true);
- return true;
- }
- }
- return false;
-}
-
/**
* list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
* @pos: loop cursor.
@@ -309,6 +336,13 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
unsigned int i, j;
if (set->flags & BLK_MQ_F_TAG_SHARED) {
+ /*
+ * If this is 0, then we know that no hardware queues
+ * have RESTART marked. We're done.
+ */
+ if (!atomic_read(&queue->shared_hctx_restart))
+ return;
+
rcu_read_lock();
list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
tag_set_list) {
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index edafb5383b7b..5007edece51a 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -115,15 +115,6 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
return false;
}
-/*
- * Mark a hardware queue as needing a restart.
- */
-static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
-{
- if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
- set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-}
-
static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
{
return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index bb66c96850b1..958cedaff8b8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2103,20 +2103,30 @@ static void blk_mq_map_swqueue(struct request_queue *q,
}
}
+/*
+ * Caller needs to ensure that we're either frozen/quiesced, or that
+ * the queue isn't live yet.
+ */
static void queue_set_hctx_shared(struct request_queue *q, bool shared)
{
struct blk_mq_hw_ctx *hctx;
int i;
queue_for_each_hw_ctx(q, hctx, i) {
- if (shared)
+ if (shared) {
+ if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+ atomic_inc(&q->shared_hctx_restart);
hctx->flags |= BLK_MQ_F_TAG_SHARED;
- else
+ } else {
+ if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+ atomic_dec(&q->shared_hctx_restart);
hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
+ }
}
}
-static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared)
+static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set,
+ bool shared)
{
struct request_queue *q;
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 8af664f7d27c..be117495eb43 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -877,7 +877,7 @@ static void aead_sock_destruct(struct sock *sk)
unsigned int ivlen = crypto_aead_ivsize(
crypto_aead_reqtfm(&ctx->aead_req));
- WARN_ON(atomic_read(&sk->sk_refcnt) != 0);
+ WARN_ON(refcount_read(&sk->sk_refcnt) != 0);
aead_put_sgl(sk);
sock_kzfree_s(sk, ctx->iv, ivlen);
sock_kfree_s(sk, ctx, ctx->len);
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 3a10d7573477..d53162997f32 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1428,6 +1428,37 @@ static void acpi_init_coherency(struct acpi_device *adev)
adev->flags.coherent_dma = cca;
}
+static int acpi_check_spi_i2c_slave(struct acpi_resource *ares, void *data)
+{
+ bool *is_spi_i2c_slave_p = data;
+
+ if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
+ return 1;
+
+ /*
+ * devices that are connected to UART still need to be enumerated to
+ * platform bus
+ */
+ if (ares->data.common_serial_bus.type != ACPI_RESOURCE_SERIAL_TYPE_UART)
+ *is_spi_i2c_slave_p = true;
+
+ /* no need to do more checking */
+ return -1;
+}
+
+static bool acpi_is_spi_i2c_slave(struct acpi_device *device)
+{
+ struct list_head resource_list;
+ bool is_spi_i2c_slave = false;
+
+ INIT_LIST_HEAD(&resource_list);
+ acpi_dev_get_resources(device, &resource_list, acpi_check_spi_i2c_slave,
+ &is_spi_i2c_slave);
+ acpi_dev_free_resource_list(&resource_list);
+
+ return is_spi_i2c_slave;
+}
+
void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
int type, unsigned long long sta)
{
@@ -1443,6 +1474,7 @@ void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
acpi_bus_get_flags(device);
device->flags.match_driver = false;
device->flags.initialized = true;
+ device->flags.spi_i2c_slave = acpi_is_spi_i2c_slave(device);
acpi_device_clear_enumerated(device);
device_initialize(&device->dev);
dev_set_uevent_suppress(&device->dev, true);
@@ -1727,38 +1759,13 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl_not_used,
return AE_OK;
}
-static int acpi_check_spi_i2c_slave(struct acpi_resource *ares, void *data)
-{
- bool *is_spi_i2c_slave_p = data;
-
- if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
- return 1;
-
- /*
- * devices that are connected to UART still need to be enumerated to
- * platform bus
- */
- if (ares->data.common_serial_bus.type != ACPI_RESOURCE_SERIAL_TYPE_UART)
- *is_spi_i2c_slave_p = true;
-
- /* no need to do more checking */
- return -1;
-}
-
static void acpi_default_enumeration(struct acpi_device *device)
{
- struct list_head resource_list;
- bool is_spi_i2c_slave = false;
-
/*
* Do not enumerate SPI/I2C slaves as they will be enumerated by their
* respective parents.
*/
- INIT_LIST_HEAD(&resource_list);
- acpi_dev_get_resources(device, &resource_list, acpi_check_spi_i2c_slave,
- &is_spi_i2c_slave);
- acpi_dev_free_resource_list(&resource_list);
- if (!is_spi_i2c_slave) {
+ if (!device->flags.spi_i2c_slave) {
acpi_create_platform_device(device, NULL);
acpi_device_set_enumerated(device);
} else {
@@ -1854,7 +1861,7 @@ static void acpi_bus_attach(struct acpi_device *device)
return;
device->flags.match_driver = true;
- if (ret > 0) {
+ if (ret > 0 && !device->flags.spi_i2c_slave) {
acpi_device_set_enumerated(device);
goto ok;
}
@@ -1863,10 +1870,10 @@ static void acpi_bus_attach(struct acpi_device *device)
if (ret < 0)
return;
- if (device->pnp.type.platform_id)
- acpi_default_enumeration(device);
- else
+ if (!device->pnp.type.platform_id && !device->flags.spi_i2c_slave)
acpi_device_set_enumerated(device);
+ else
+ acpi_default_enumeration(device);
ok:
list_for_each_entry(child, &device->children, node)
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index 7584ae1ded85..f0433adcd8fc 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -924,12 +924,7 @@ fore200e_tx_irq(struct fore200e* fore200e)
else {
dev_kfree_skb_any(entry->skb);
}
-#if 1
- /* race fixed by the above incarnation mechanism, but... */
- if (atomic_read(&sk_atm(vcc)->sk_wmem_alloc) < 0) {
- atomic_set(&sk_atm(vcc)->sk_wmem_alloc, 0);
- }
-#endif
+
/* check error condition */
if (*entry->status & STATUS_ERROR)
atomic_inc(&vcc->stats->tx_err);
@@ -1130,13 +1125,9 @@ fore200e_push_rpd(struct fore200e* fore200e, struct atm_vcc* vcc, struct rpd* rp
return -ENOMEM;
}
- ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0);
-
vcc->push(vcc, skb);
atomic_inc(&vcc->stats->rx);
- ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0);
-
return 0;
}
@@ -1572,7 +1563,6 @@ fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb)
unsigned long flags;
ASSERT(vcc);
- ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0);
ASSERT(fore200e);
ASSERT(fore200e_vcc);
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 461da2bce8ef..37ee21c5a5ca 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -2395,7 +2395,7 @@ he_close(struct atm_vcc *vcc)
* TBRQ, the host issues the close command to the adapter.
*/
- while (((tx_inuse = atomic_read(&sk_atm(vcc)->sk_wmem_alloc)) > 1) &&
+ while (((tx_inuse = refcount_read(&sk_atm(vcc)->sk_wmem_alloc)) > 1) &&
(retry < MAX_RETRY)) {
msleep(sleep);
if (sleep < 250)
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 4e64de380bda..60bacba03d17 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -724,7 +724,7 @@ push_on_scq(struct idt77252_dev *card, struct vc_map *vc, struct sk_buff *skb)
struct sock *sk = sk_atm(vcc);
vc->estimator->cells += (skb->len + 47) / 48;
- if (atomic_read(&sk->sk_wmem_alloc) >
+ if (refcount_read(&sk->sk_wmem_alloc) >
(sk->sk_sndbuf >> 1)) {
u32 cps = vc->estimator->maxcps;
@@ -2009,7 +2009,7 @@ idt77252_send_oam(struct atm_vcc *vcc, void *cell, int flags)
atomic_inc(&vcc->stats->tx_err);
return -ENOMEM;
}
- atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+ refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
skb_put_data(skb, cell, 52);
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 726c32e35db9..0e824091a12f 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -609,8 +609,6 @@ int xen_blkif_schedule(void *arg)
unsigned long timeout;
int ret;
- xen_blkif_get(blkif);
-
set_freezable();
while (!kthread_should_stop()) {
if (try_to_freeze())
@@ -665,7 +663,6 @@ purge_gnt_list:
print_stats(ring);
ring->xenblkd = NULL;
- xen_blkif_put(blkif);
return 0;
}
@@ -1436,34 +1433,35 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
static void make_response(struct xen_blkif_ring *ring, u64 id,
unsigned short op, int st)
{
- struct blkif_response resp;
+ struct blkif_response *resp;
unsigned long flags;
union blkif_back_rings *blk_rings;
int notify;
- resp.id = id;
- resp.operation = op;
- resp.status = st;
-
spin_lock_irqsave(&ring->blk_ring_lock, flags);
blk_rings = &ring->blk_rings;
/* Place on the response ring for the relevant domain. */
switch (ring->blkif->blk_protocol) {
case BLKIF_PROTOCOL_NATIVE:
- memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
- &resp, sizeof(resp));
+ resp = RING_GET_RESPONSE(&blk_rings->native,
+ blk_rings->native.rsp_prod_pvt);
break;
case BLKIF_PROTOCOL_X86_32:
- memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
- &resp, sizeof(resp));
+ resp = RING_GET_RESPONSE(&blk_rings->x86_32,
+ blk_rings->x86_32.rsp_prod_pvt);
break;
case BLKIF_PROTOCOL_X86_64:
- memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
- &resp, sizeof(resp));
+ resp = RING_GET_RESPONSE(&blk_rings->x86_64,
+ blk_rings->x86_64.rsp_prod_pvt);
break;
default:
BUG();
}
+
+ resp->id = id;
+ resp->operation = op;
+ resp->status = st;
+
blk_rings->common.rsp_prod_pvt++;
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
spin_unlock_irqrestore(&ring->blk_ring_lock, flags);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index dea61f6ab8cb..ecb35fe8ca8d 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -75,9 +75,8 @@ extern unsigned int xenblk_max_queues;
struct blkif_common_request {
char dummy;
};
-struct blkif_common_response {
- char dummy;
-};
+
+/* i386 protocol version */
struct blkif_x86_32_request_rw {
uint8_t nr_segments; /* number of segments */
@@ -129,14 +128,6 @@ struct blkif_x86_32_request {
} u;
} __attribute__((__packed__));
-/* i386 protocol version */
-#pragma pack(push, 4)
-struct blkif_x86_32_response {
- uint64_t id; /* copied from request */
- uint8_t operation; /* copied from request */
- int16_t status; /* BLKIF_RSP_??? */
-};
-#pragma pack(pop)
/* x86_64 protocol version */
struct blkif_x86_64_request_rw {
@@ -193,18 +184,12 @@ struct blkif_x86_64_request {
} u;
} __attribute__((__packed__));
-struct blkif_x86_64_response {
- uint64_t __attribute__((__aligned__(8))) id;
- uint8_t operation; /* copied from request */
- int16_t status; /* BLKIF_RSP_??? */
-};
-
DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
- struct blkif_common_response);
+ struct blkif_response);
DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
- struct blkif_x86_32_response);
+ struct blkif_response __packed);
DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
- struct blkif_x86_64_response);
+ struct blkif_response);
union blkif_back_rings {
struct blkif_back_ring native;
@@ -281,6 +266,7 @@ struct xen_blkif_ring {
wait_queue_head_t wq;
atomic_t inflight;
+ bool active;
/* One thread per blkif ring. */
struct task_struct *xenblkd;
unsigned int waiting_reqs;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 1f3dfaa54d87..792da683e70d 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -159,7 +159,7 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
init_waitqueue_head(&ring->shutdown_wq);
ring->blkif = blkif;
ring->st_print = jiffies;
- xen_blkif_get(blkif);
+ ring->active = true;
}
return 0;
@@ -249,10 +249,12 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
struct xen_blkif_ring *ring = &blkif->rings[r];
unsigned int i = 0;
+ if (!ring->active)
+ continue;
+
if (ring->xenblkd) {
kthread_stop(ring->xenblkd);
wake_up(&ring->shutdown_wq);
- ring->xenblkd = NULL;
}
/* The above kthread_stop() guarantees that at this point we
@@ -296,7 +298,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
BUG_ON(ring->free_pages_num != 0);
BUG_ON(ring->persistent_gnt_c != 0);
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
- xen_blkif_put(blkif);
+ ring->active = false;
}
blkif->nr_ring_pages = 0;
/*
@@ -312,9 +314,10 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
static void xen_blkif_free(struct xen_blkif *blkif)
{
-
- xen_blkif_disconnect(blkif);
+ WARN_ON(xen_blkif_disconnect(blkif));
xen_vbd_free(&blkif->vbd);
+ kfree(blkif->be->mode);
+ kfree(blkif->be);
/* Make sure everything is drained before shutting down */
kmem_cache_free(xen_blkif_cachep, blkif);
@@ -511,8 +514,6 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
xen_blkif_put(be->blkif);
}
- kfree(be->mode);
- kfree(be);
return 0;
}
diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index 24f8c4e93f4e..9ab6cfbb831d 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -295,6 +295,7 @@ static const struct {
{ 0x410e, "BCM43341B0" }, /* 002.001.014 */
{ 0x4406, "BCM4324B3" }, /* 002.004.006 */
{ 0x610c, "BCM4354" }, /* 003.001.012 */
+ { 0x2209, "BCM43430A1" }, /* 001.002.009 */
{ }
};
diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index d2e9e2d1b014..6a662d0161b4 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -419,8 +419,7 @@ finalize:
if (err)
return err;
- err = bcm_request_irq(bcm);
- if (!err)
+ if (!bcm_request_irq(bcm))
err = bcm_setup_sleep(hu);
return err;
@@ -657,6 +656,15 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = {
},
.driver_data = &acpi_active_low,
},
+ {
+ .ident = "Asus T100CHI",
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR,
+ "ASUSTeK COMPUTER INC."),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100CHI"),
+ },
+ .driver_data = &acpi_active_low,
+ },
{ /* Handle ThinkPad 8 tablets with BCM2E55 chipset ACPI ID */
.ident = "Lenovo ThinkPad 8",
.matches = {
diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c
index 7de0edc0ff8c..aea930101dd2 100644
--- a/drivers/bluetooth/hci_serdev.c
+++ b/drivers/bluetooth/hci_serdev.c
@@ -31,7 +31,7 @@
#include "hci_uart.h"
-struct serdev_device_ops hci_serdev_client_ops;
+static struct serdev_device_ops hci_serdev_client_ops;
static inline void hci_uart_tx_complete(struct hci_uart *hu, int pkt_type)
{
@@ -268,7 +268,7 @@ static int hci_uart_receive_buf(struct serdev_device *serdev, const u8 *data,
return count;
}
-struct serdev_device_ops hci_serdev_client_ops = {
+static struct serdev_device_ops hci_serdev_client_ops = {
.receive_buf = hci_uart_receive_buf,
.write_wakeup = hci_uart_write_wakeup,
};
diff --git a/drivers/char/random.c b/drivers/char/random.c
index e870f329db88..01a260f67437 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -803,13 +803,13 @@ static int crng_fast_load(const char *cp, size_t len)
p[crng_init_cnt % CHACHA20_KEY_SIZE] ^= *cp;
cp++; crng_init_cnt++; len--;
}
+ spin_unlock_irqrestore(&primary_crng.lock, flags);
if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
invalidate_batched_entropy();
crng_init = 1;
wake_up_interruptible(&crng_init_wait);
pr_notice("random: fast init done\n");
}
- spin_unlock_irqrestore(&primary_crng.lock, flags);
return 1;
}
@@ -841,6 +841,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
}
memzero_explicit(&buf, sizeof(buf));
crng->init_time = jiffies;
+ spin_unlock_irqrestore(&primary_crng.lock, flags);
if (crng == &primary_crng && crng_init < 2) {
invalidate_batched_entropy();
crng_init = 2;
@@ -848,7 +849,6 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
wake_up_interruptible(&crng_init_wait);
pr_notice("random: crng init done\n");
}
- spin_unlock_irqrestore(&primary_crng.lock, flags);
}
static inline void crng_wait_ready(void)
@@ -2041,8 +2041,8 @@ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64);
u64 get_random_u64(void)
{
u64 ret;
- bool use_lock = crng_init < 2;
- unsigned long flags;
+ bool use_lock = READ_ONCE(crng_init) < 2;
+ unsigned long flags = 0;
struct batched_entropy *batch;
#if BITS_PER_LONG == 64
@@ -2073,8 +2073,8 @@ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32);
u32 get_random_u32(void)
{
u32 ret;
- bool use_lock = crng_init < 2;
- unsigned long flags;
+ bool use_lock = READ_ONCE(crng_init) < 2;
+ unsigned long flags = 0;
struct batched_entropy *batch;
if (arch_get_random_int(&ret))
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 4bed671e490e..8b5c30062d99 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -1209,9 +1209,9 @@ arch_timer_mem_frame_get_cntfrq(struct arch_timer_mem_frame *frame)
return 0;
}
- rate = readl_relaxed(frame + CNTFRQ);
+ rate = readl_relaxed(base + CNTFRQ);
- iounmap(frame);
+ iounmap(base);
return rate;
}
diff --git a/drivers/clocksource/cadence_ttc_timer.c b/drivers/clocksource/cadence_ttc_timer.c
index 44e5e951583b..8e64b8460f11 100644
--- a/drivers/clocksource/cadence_ttc_timer.c
+++ b/drivers/clocksource/cadence_ttc_timer.c
@@ -18,6 +18,7 @@
#include <linux/clk.h>
#include <linux/interrupt.h>
#include <linux/clockchips.h>
+#include <linux/clocksource.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/slab.h>
diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c
index 2e9c830ae1cd..c4656c4d44a6 100644
--- a/drivers/clocksource/timer-sun5i.c
+++ b/drivers/clocksource/timer-sun5i.c
@@ -12,6 +12,7 @@
#include <linux/clk.h>
#include <linux/clockchips.h>
+#include <linux/clocksource.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
index 5104b6398139..c83ea68be792 100644
--- a/drivers/gpio/gpio-mvebu.c
+++ b/drivers/gpio/gpio-mvebu.c
@@ -721,7 +721,7 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
u32 set;
if (!of_device_is_compatible(mvchip->chip.of_node,
- "marvell,armada-370-xp-gpio"))
+ "marvell,armada-370-gpio"))
return 0;
if (IS_ERR(mvchip->clk))
@@ -852,7 +852,7 @@ static const struct of_device_id mvebu_gpio_of_match[] = {
.data = (void *) MVEBU_GPIO_SOC_VARIANT_ARMADAXP,
},
{
- .compatible = "marvell,armada-370-xp-gpio",
+ .compatible = "marvell,armada-370-gpio",
.data = (void *) MVEBU_GPIO_SOC_VARIANT_ORION,
},
{
@@ -1128,7 +1128,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
mvchip);
}
- /* Armada 370/XP has simple PWM support for GPIO lines */
+ /* Some MVEBU SoCs have simple PWM support for GPIO lines */
if (IS_ENABLED(CONFIG_PWM))
return mvebu_pwm_probe(pdev, mvchip, id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 1cf78f4dd339..1e8e1123ddf4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -693,6 +693,10 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev)
DRM_INFO("Changing default dispclk from %dMhz to 600Mhz\n",
adev->clock.default_dispclk / 100);
adev->clock.default_dispclk = 60000;
+ } else if (adev->clock.default_dispclk <= 60000) {
+ DRM_INFO("Changing default dispclk from %dMhz to 625Mhz\n",
+ adev->clock.default_dispclk / 100);
+ adev->clock.default_dispclk = 62500;
}
adev->clock.dp_extclk =
le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index f2d705e6a75a..ab6b0d0febab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -449,6 +449,7 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
{0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
{0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+ {0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
{0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
/* Vega 10 */
{0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
index 8c9bc75a9c2d..8a0818b23ea4 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
@@ -165,7 +165,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
- ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args;
+ ENABLE_DISP_POWER_GATING_PS_ALLOCATION args;
memset(&args, 0, sizeof(args));
@@ -178,7 +178,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev)
{
int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
- ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args;
+ ENABLE_DISP_POWER_GATING_PS_ALLOCATION args;
memset(&args, 0, sizeof(args));
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 9f847615ac74..48ca2457df8c 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -1229,21 +1229,6 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
if (!connector)
return -ENOENT;
- drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
- encoder = drm_connector_get_encoder(connector);
- if (encoder)
- out_resp->encoder_id = encoder->base.id;
- else
- out_resp->encoder_id = 0;
-
- ret = drm_mode_object_get_properties(&connector->base, file_priv->atomic,
- (uint32_t __user *)(unsigned long)(out_resp->props_ptr),
- (uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr),
- &out_resp->count_props);
- drm_modeset_unlock(&dev->mode_config.connection_mutex);
- if (ret)
- goto out_unref;
-
for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++)
if (connector->encoder_ids[i] != 0)
encoders_count++;
@@ -1256,7 +1241,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
if (put_user(connector->encoder_ids[i],
encoder_ptr + copied)) {
ret = -EFAULT;
- goto out_unref;
+ goto out;
}
copied++;
}
@@ -1300,15 +1285,32 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
if (copy_to_user(mode_ptr + copied,
&u_mode, sizeof(u_mode))) {
ret = -EFAULT;
+ mutex_unlock(&dev->mode_config.mutex);
+
goto out;
}
copied++;
}
}
out_resp->count_modes = mode_count;
-out:
mutex_unlock(&dev->mode_config.mutex);
-out_unref:
+
+ drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
+ encoder = drm_connector_get_encoder(connector);
+ if (encoder)
+ out_resp->encoder_id = encoder->base.id;
+ else
+ out_resp->encoder_id = 0;
+
+ /* Only grab properties after probing, to make sure EDID and other
+ * properties reflect the latest status. */
+ ret = drm_mode_object_get_properties(&connector->base, file_priv->atomic,
+ (uint32_t __user *)(unsigned long)(out_resp->props_ptr),
+ (uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr),
+ &out_resp->count_props);
+ drm_modeset_unlock(&dev->mode_config.connection_mutex);
+
+out:
drm_connector_put(connector);
return ret;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index c4a091e87426..e437fba1209d 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -106,9 +106,10 @@ struct etnaviv_gem_submit {
struct etnaviv_gpu *gpu;
struct ww_acquire_ctx ticket;
struct dma_fence *fence;
+ u32 flags;
unsigned int nr_bos;
struct etnaviv_gem_submit_bo bos[0];
- u32 flags;
+ /* No new members here, the previous one is variable-length! */
};
int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index de80ee1b71df..1013765274da 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -172,7 +172,7 @@ static int submit_fence_sync(const struct etnaviv_gem_submit *submit)
for (i = 0; i < submit->nr_bos; i++) {
struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
bool write = submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE;
- bool explicit = !(submit->flags & ETNA_SUBMIT_NO_IMPLICIT);
+ bool explicit = !!(submit->flags & ETNA_SUBMIT_NO_IMPLICIT);
ret = etnaviv_gpu_fence_sync_obj(etnaviv_obj, context, write,
explicit);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d689e511744e..4bd1467c17b1 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -292,6 +292,8 @@ static int per_file_stats(int id, void *ptr, void *data)
struct file_stats *stats = data;
struct i915_vma *vma;
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+
stats->count++;
stats->total += obj->base.size;
if (!obj->bind_count)
@@ -476,6 +478,8 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
struct drm_i915_gem_request *request;
struct task_struct *task;
+ mutex_lock(&dev->struct_mutex);
+
memset(&stats, 0, sizeof(stats));
stats.file_priv = file->driver_priv;
spin_lock(&file->table_lock);
@@ -487,7 +491,6 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
* still alive (e.g. get_pid(current) => fork() => exit()).
* Therefore, we need to protect this ->comm access using RCU.
*/
- mutex_lock(&dev->struct_mutex);
request = list_first_entry_or_null(&file_priv->mm.request_list,
struct drm_i915_gem_request,
client_link);
@@ -497,6 +500,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
PIDTYPE_PID);
print_file_stats(m, task ? task->comm : "<unknown>", stats);
rcu_read_unlock();
+
mutex_unlock(&dev->struct_mutex);
}
mutex_unlock(&dev->filelist_mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 462031cbd77f..615f0a855222 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2285,8 +2285,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
struct page *page;
unsigned long last_pfn = 0; /* suppress gcc warning */
unsigned int max_segment;
+ gfp_t noreclaim;
int ret;
- gfp_t gfp;
/* Assert that the object is not currently in any GPU domain. As it
* wasn't in the GTT, there shouldn't be any way it could have been in
@@ -2315,22 +2315,31 @@ rebuild_st:
* Fail silently without starting the shrinker
*/
mapping = obj->base.filp->f_mapping;
- gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
- gfp |= __GFP_NORETRY | __GFP_NOWARN;
+ noreclaim = mapping_gfp_constraint(mapping,
+ ~(__GFP_IO | __GFP_RECLAIM));
+ noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
+
sg = st->sgl;
st->nents = 0;
for (i = 0; i < page_count; i++) {
- page = shmem_read_mapping_page_gfp(mapping, i, gfp);
- if (unlikely(IS_ERR(page))) {
- i915_gem_shrink(dev_priv,
- page_count,
- I915_SHRINK_BOUND |
- I915_SHRINK_UNBOUND |
- I915_SHRINK_PURGEABLE);
+ const unsigned int shrink[] = {
+ I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
+ 0,
+ }, *s = shrink;
+ gfp_t gfp = noreclaim;
+
+ do {
page = shmem_read_mapping_page_gfp(mapping, i, gfp);
- }
- if (unlikely(IS_ERR(page))) {
- gfp_t reclaim;
+ if (likely(!IS_ERR(page)))
+ break;
+
+ if (!*s) {
+ ret = PTR_ERR(page);
+ goto err_sg;
+ }
+
+ i915_gem_shrink(dev_priv, 2 * page_count, *s++);
+ cond_resched();
/* We've tried hard to allocate the memory by reaping
* our own buffer, now let the real VM do its job and
@@ -2340,15 +2349,26 @@ rebuild_st:
* defer the oom here by reporting the ENOMEM back
* to userspace.
*/
- reclaim = mapping_gfp_mask(mapping);
- reclaim |= __GFP_NORETRY; /* reclaim, but no oom */
-
- page = shmem_read_mapping_page_gfp(mapping, i, reclaim);
- if (IS_ERR(page)) {
- ret = PTR_ERR(page);
- goto err_sg;
+ if (!*s) {
+ /* reclaim and warn, but no oom */
+ gfp = mapping_gfp_mask(mapping);
+
+ /* Our bo are always dirty and so we require
+ * kswapd to reclaim our pages (direct reclaim
+ * does not effectively begin pageout of our
+ * buffers on its own). However, direct reclaim
+ * only waits for kswapd when under allocation
+ * congestion. So as a result __GFP_RECLAIM is
+ * unreliable and fails to actually reclaim our
+ * dirty pages -- unless you try over and over
+ * again with !__GFP_NORETRY. However, we still
+ * want to fail this allocation rather than
+ * trigger the out-of-memory killer and for
+ * this we want the future __GFP_MAYFAIL.
+ */
}
- }
+ } while (1);
+
if (!i ||
sg->length >= max_segment ||
page_to_pfn(page) != last_pfn + 1) {
@@ -4222,6 +4242,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
mapping = obj->base.filp->f_mapping;
mapping_set_gfp_mask(mapping, mask);
+ GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
i915_gem_object_init(obj, &i915_gem_object_ops);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a3e59c8ef27b..9ad13eeed904 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -546,11 +546,12 @@ repeat:
}
static int
-i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
+i915_gem_execbuffer_relocate_entry(struct i915_vma *vma,
struct eb_vmas *eb,
struct drm_i915_gem_relocation_entry *reloc,
struct reloc_cache *cache)
{
+ struct drm_i915_gem_object *obj = vma->obj;
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct drm_gem_object *target_obj;
struct drm_i915_gem_object *target_i915_obj;
@@ -628,6 +629,16 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
return -EINVAL;
}
+ /*
+ * If we write into the object, we need to force the synchronisation
+ * barrier, either with an asynchronous clflush or if we executed the
+ * patching using the GPU (though that should be serialised by the
+ * timeline). To be completely sure, and since we are required to
+ * do relocations we are already stalling, disable the user's opt
+ * of our synchronisation.
+ */
+ vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC;
+
ret = relocate_entry(obj, reloc, cache, target_offset);
if (ret)
return ret;
@@ -678,7 +689,7 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
do {
u64 offset = r->presumed_offset;
- ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache);
+ ret = i915_gem_execbuffer_relocate_entry(vma, eb, r, &cache);
if (ret)
goto out;
@@ -726,7 +737,7 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
reloc_cache_init(&cache, eb->i915);
for (i = 0; i < entry->relocation_count; i++) {
- ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache);
+ ret = i915_gem_execbuffer_relocate_entry(vma, eb, &relocs[i], &cache);
if (ret)
break;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 5ddbc9499775..a74d0ac737cb 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -623,7 +623,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
* GPU processing the request, we never over-estimate the
* position of the head.
*/
- req->head = req->ring->tail;
+ req->head = req->ring->emit;
/* Check that we didn't interrupt ourselves with a new request */
GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 1642fff9cf13..ab5140ba108d 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -480,9 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client,
GEM_BUG_ON(freespace < wqi_size);
/* The GuC firmware wants the tail index in QWords, not bytes */
- tail = rq->tail;
- assert_ring_tail_valid(rq->ring, rq->tail);
- tail >>= 3;
+ tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 1aba47024656..f066e2d785f5 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -650,6 +650,11 @@ int i915_vma_unbind(struct i915_vma *vma)
break;
}
+ if (!ret) {
+ ret = i915_gem_active_retire(&vma->last_fence,
+ &vma->vm->i915->drm.struct_mutex);
+ }
+
__i915_vma_unpin(vma);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 96b0b01677e2..9106ea32b048 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -120,7 +120,8 @@ static void intel_crtc_init_scalers(struct intel_crtc *crtc,
static void skylake_pfit_enable(struct intel_crtc *crtc);
static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force);
static void ironlake_pfit_enable(struct intel_crtc *crtc);
-static void intel_modeset_setup_hw_state(struct drm_device *dev);
+static void intel_modeset_setup_hw_state(struct drm_device *dev,
+ struct drm_modeset_acquire_ctx *ctx);
static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc);
struct intel_limit {
@@ -3449,7 +3450,7 @@ __intel_display_resume(struct drm_device *dev,
struct drm_crtc *crtc;
int i, ret;
- intel_modeset_setup_hw_state(dev);
+ intel_modeset_setup_hw_state(dev, ctx);
i915_redisable_vga(to_i915(dev));
if (!state)
@@ -5825,7 +5826,8 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state,
intel_update_watermarks(intel_crtc);
}
-static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
+static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
+ struct drm_modeset_acquire_ctx *ctx)
{
struct intel_encoder *encoder;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -5855,7 +5857,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
return;
}
- state->acquire_ctx = crtc->dev->mode_config.acquire_ctx;
+ state->acquire_ctx = ctx;
/* Everything's already locked, -EDEADLK can't happen. */
crtc_state = intel_atomic_get_crtc_state(state, intel_crtc);
@@ -15030,7 +15032,7 @@ int intel_modeset_init(struct drm_device *dev)
intel_setup_outputs(dev_priv);
drm_modeset_lock_all(dev);
- intel_modeset_setup_hw_state(dev);
+ intel_modeset_setup_hw_state(dev, dev->mode_config.acquire_ctx);
drm_modeset_unlock_all(dev);
for_each_intel_crtc(dev, crtc) {
@@ -15067,13 +15069,13 @@ int intel_modeset_init(struct drm_device *dev)
return 0;
}
-static void intel_enable_pipe_a(struct drm_device *dev)
+static void intel_enable_pipe_a(struct drm_device *dev,
+ struct drm_modeset_acquire_ctx *ctx)
{
struct intel_connector *connector;
struct drm_connector_list_iter conn_iter;
struct drm_connector *crt = NULL;
struct intel_load_detect_pipe load_detect_temp;
- struct drm_modeset_acquire_ctx *ctx = dev->mode_config.acquire_ctx;
int ret;
/* We can't just switch on the pipe A, we need to set things up with a
@@ -15145,7 +15147,8 @@ static bool has_pch_trancoder(struct drm_i915_private *dev_priv,
(HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A);
}
-static void intel_sanitize_crtc(struct intel_crtc *crtc)
+static void intel_sanitize_crtc(struct intel_crtc *crtc,
+ struct drm_modeset_acquire_ctx *ctx)
{
struct drm_device *dev = crtc->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
@@ -15191,7 +15194,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
plane = crtc->plane;
crtc->base.primary->state->visible = true;
crtc->plane = !plane;
- intel_crtc_disable_noatomic(&crtc->base);
+ intel_crtc_disable_noatomic(&crtc->base, ctx);
crtc->plane = plane;
}
@@ -15201,13 +15204,13 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
* resume. Force-enable the pipe to fix this, the update_dpms
* call below we restore the pipe to the right state, but leave
* the required bits on. */
- intel_enable_pipe_a(dev);
+ intel_enable_pipe_a(dev, ctx);
}
/* Adjust the state of the output pipe according to whether we
* have active connectors/encoders. */
if (crtc->active && !intel_crtc_has_encoders(crtc))
- intel_crtc_disable_noatomic(&crtc->base);
+ intel_crtc_disable_noatomic(&crtc->base, ctx);
if (crtc->active || HAS_GMCH_DISPLAY(dev_priv)) {
/*
@@ -15505,7 +15508,8 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv)
* and sanitizes it to the current state
*/
static void
-intel_modeset_setup_hw_state(struct drm_device *dev)
+intel_modeset_setup_hw_state(struct drm_device *dev,
+ struct drm_modeset_acquire_ctx *ctx)
{
struct drm_i915_private *dev_priv = to_i915(dev);
enum pipe pipe;
@@ -15525,7 +15529,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev)
for_each_pipe(dev_priv, pipe) {
crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
- intel_sanitize_crtc(crtc);
+ intel_sanitize_crtc(crtc, ctx);
intel_dump_pipe_config(crtc, crtc->config,
"[setup_hw_state]");
}
diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
index 6532e226db29..40ba3134545e 100644
--- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
@@ -119,8 +119,6 @@ static int intel_dp_aux_setup_backlight(struct intel_connector *connector,
struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
struct intel_panel *panel = &connector->panel;
- intel_dp_aux_enable_backlight(connector);
-
if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT)
panel->backlight.max = 0xFFFF;
else
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index dac4e003c1f3..62f44d3e7c43 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -326,8 +326,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
u32 *reg_state = ce->lrc_reg_state;
- assert_ring_tail_valid(rq->ring, rq->tail);
- reg_state[CTX_RING_TAIL+1] = rq->tail;
+ reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
/* True 32b PPGTT with dynamic page allocation: update PDP
* registers and point the unallocated PDPs to scratch page.
@@ -2036,8 +2035,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
ce->state->obj->mm.dirty = true;
i915_gem_object_unpin_map(ce->state->obj);
- ce->ring->head = ce->ring->tail = 0;
- intel_ring_update_space(ce->ring);
+ intel_ring_reset(ce->ring, 0);
}
}
}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 66a2b8b83972..513a0f4b469b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -49,7 +49,7 @@ static int __intel_ring_space(int head, int tail, int size)
void intel_ring_update_space(struct intel_ring *ring)
{
- ring->space = __intel_ring_space(ring->head, ring->tail, ring->size);
+ ring->space = __intel_ring_space(ring->head, ring->emit, ring->size);
}
static int
@@ -774,8 +774,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
i915_gem_request_submit(request);
- assert_ring_tail_valid(request->ring, request->tail);
- I915_WRITE_TAIL(request->engine, request->tail);
+ I915_WRITE_TAIL(request->engine,
+ intel_ring_set_tail(request->ring, request->tail));
}
static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
@@ -1316,11 +1316,23 @@ err:
return PTR_ERR(addr);
}
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+ GEM_BUG_ON(!list_empty(&ring->request_list));
+ ring->tail = tail;
+ ring->head = tail;
+ ring->emit = tail;
+ intel_ring_update_space(ring);
+}
+
void intel_ring_unpin(struct intel_ring *ring)
{
GEM_BUG_ON(!ring->vma);
GEM_BUG_ON(!ring->vaddr);
+ /* Discard any unused bytes beyond that submitted to hw. */
+ intel_ring_reset(ring, ring->tail);
+
if (i915_vma_is_map_and_fenceable(ring->vma))
i915_vma_unpin_iomap(ring->vma);
else
@@ -1562,8 +1574,9 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ /* Restart from the beginning of the rings for convenience */
for_each_engine(engine, dev_priv, id)
- engine->buffer->head = engine->buffer->tail;
+ intel_ring_reset(engine->buffer, 0);
}
static int ring_request_alloc(struct drm_i915_gem_request *request)
@@ -1616,7 +1629,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
unsigned space;
/* Would completion of this request free enough space? */
- space = __intel_ring_space(target->postfix, ring->tail,
+ space = __intel_ring_space(target->postfix, ring->emit,
ring->size);
if (space >= bytes)
break;
@@ -1641,8 +1654,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
{
struct intel_ring *ring = req->ring;
- int remain_actual = ring->size - ring->tail;
- int remain_usable = ring->effective_size - ring->tail;
+ int remain_actual = ring->size - ring->emit;
+ int remain_usable = ring->effective_size - ring->emit;
int bytes = num_dwords * sizeof(u32);
int total_bytes, wait_bytes;
bool need_wrap = false;
@@ -1678,17 +1691,17 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
if (unlikely(need_wrap)) {
GEM_BUG_ON(remain_actual > ring->space);
- GEM_BUG_ON(ring->tail + remain_actual > ring->size);
+ GEM_BUG_ON(ring->emit + remain_actual > ring->size);
/* Fill the tail with MI_NOOP */
- memset(ring->vaddr + ring->tail, 0, remain_actual);
- ring->tail = 0;
+ memset(ring->vaddr + ring->emit, 0, remain_actual);
+ ring->emit = 0;
ring->space -= remain_actual;
}
- GEM_BUG_ON(ring->tail > ring->size - bytes);
- cs = ring->vaddr + ring->tail;
- ring->tail += bytes;
+ GEM_BUG_ON(ring->emit > ring->size - bytes);
+ cs = ring->vaddr + ring->emit;
+ ring->emit += bytes;
ring->space -= bytes;
GEM_BUG_ON(ring->space < 0);
@@ -1699,7 +1712,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
{
int num_dwords =
- (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
+ (req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
u32 *cs;
if (num_dwords == 0)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a82a0807f64d..f7144fe09613 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -145,6 +145,7 @@ struct intel_ring {
u32 head;
u32 tail;
+ u32 emit;
int space;
int size;
@@ -488,6 +489,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
struct intel_ring *
intel_engine_create_ring(struct intel_engine_cs *engine, int size);
int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+void intel_ring_update_space(struct intel_ring *ring);
void intel_ring_unpin(struct intel_ring *ring);
void intel_ring_free(struct intel_ring *ring);
@@ -511,7 +514,7 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
* reserved for the command packet (i.e. the value passed to
* intel_ring_begin()).
*/
- GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs);
+ GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs);
}
static inline u32
@@ -540,7 +543,19 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
GEM_BUG_ON(tail >= ring->size);
}
-void intel_ring_update_space(struct intel_ring *ring);
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+ /* Whilst writes to the tail are strictly order, there is no
+ * serialisation between readers and the writers. The tail may be
+ * read by i915_gem_request_retire() just as it is being updated
+ * by execlists, as although the breadcrumb is complete, the context
+ * switch hasn't been seen.
+ */
+ assert_ring_tail_valid(ring, tail);
+ ring->tail = tail;
+ return tail;
+}
void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 432480ff9d22..3178ba0c537c 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -3393,6 +3393,13 @@ void radeon_combios_asic_init(struct drm_device *dev)
rdev->pdev->subsystem_vendor == 0x103c &&
rdev->pdev->subsystem_device == 0x280a)
return;
+ /* quirk for rs4xx Toshiba Sattellite L20-183 latop to make it resume
+ * - it hangs on resume inside the dynclk 1 table.
+ */
+ if (rdev->family == CHIP_RS400 &&
+ rdev->pdev->subsystem_vendor == 0x1179 &&
+ rdev->pdev->subsystem_device == 0xff31)
+ return;
/* DYN CLK 1 */
table = combios_get_table_offset(dev, COMBIOS_DYN_CLK_1_TABLE);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 6ecf42783d4b..0a6444d72000 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -136,6 +136,10 @@ static struct radeon_px_quirk radeon_px_quirk_list[] = {
* https://bugzilla.kernel.org/show_bug.cgi?id=51381
*/
{ PCI_VENDOR_ID_ATI, 0x6840, 0x1043, 0x2122, RADEON_PX_QUIRK_DISABLE_PX },
+ /* Asus K53TK laptop with AMD A6-3420M APU and Radeon 7670m GPU
+ * https://bugs.freedesktop.org/show_bug.cgi?id=101491
+ */
+ { PCI_VENDOR_ID_ATI, 0x6741, 0x1043, 0x2122, RADEON_PX_QUIRK_DISABLE_PX },
/* macbook pro 8.2 */
{ PCI_VENDOR_ID_ATI, 0x6741, PCI_VENDOR_ID_APPLE, 0x00e2, RADEON_PX_QUIRK_LONG_WAKEUP },
{ 0, 0, 0, 0, 0 },
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
index 13db8a2851ed..1f013d45c9e9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
@@ -321,6 +321,7 @@ void vmw_cmdbuf_res_man_destroy(struct vmw_cmdbuf_res_manager *man)
list_for_each_entry_safe(entry, next, &man->list, head)
vmw_cmdbuf_res_free(man, entry);
+ drm_ht_remove(&man->resources);
kfree(man);
}
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index 95ed17183e73..54a47b40546f 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -734,9 +734,9 @@ static int i2c_imx_dma_read(struct imx_i2c_struct *i2c_imx,
* the first read operation, otherwise the first read cost
* one extra clock cycle.
*/
- temp = readb(i2c_imx->base + IMX_I2C_I2CR);
+ temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
temp |= I2CR_MTX;
- writeb(temp, i2c_imx->base + IMX_I2C_I2CR);
+ imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
}
msgs->buf[msgs->len-1] = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR);
@@ -857,9 +857,9 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, bo
* the first read operation, otherwise the first read cost
* one extra clock cycle.
*/
- temp = readb(i2c_imx->base + IMX_I2C_I2CR);
+ temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
temp |= I2CR_MTX;
- writeb(temp, i2c_imx->base + IMX_I2C_I2CR);
+ imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
}
} else if (i == (msgs->len - 2)) {
dev_dbg(&i2c_imx->adapter.dev,
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 9f7e18612322..dc2f59e33971 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -223,8 +223,8 @@ static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
return 0;
}
-static void mlx5_query_port_roce(struct ib_device *device, u8 port_num,
- struct ib_port_attr *props)
+static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
+ struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
@@ -232,12 +232,14 @@ static void mlx5_query_port_roce(struct ib_device *device, u8 port_num,
enum ib_mtu ndev_ib_mtu;
u16 qkey_viol_cntr;
u32 eth_prot_oper;
+ int err;
/* Possible bad flows are checked before filling out props so in case
* of an error it will still be zeroed out.
*/
- if (mlx5_query_port_eth_proto_oper(mdev, &eth_prot_oper, port_num))
- return;
+ err = mlx5_query_port_eth_proto_oper(mdev, &eth_prot_oper, port_num);
+ if (err)
+ return err;
translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
&props->active_width);
@@ -258,7 +260,7 @@ static void mlx5_query_port_roce(struct ib_device *device, u8 port_num,
ndev = mlx5_ib_get_netdev(device, port_num);
if (!ndev)
- return;
+ return 0;
if (mlx5_lag_is_active(dev->mdev)) {
rcu_read_lock();
@@ -281,75 +283,49 @@ static void mlx5_query_port_roce(struct ib_device *device, u8 port_num,
dev_put(ndev);
props->active_mtu = min(props->max_mtu, ndev_ib_mtu);
+ return 0;
}
-static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- void *mlx5_addr)
+static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
{
-#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
- char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
- source_l3_address);
- void *mlx5_addr_mac = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
- source_mac_47_32);
-
- if (!gid)
- return;
+ enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+ u8 roce_version = 0;
+ u8 roce_l3_type = 0;
+ bool vlan = false;
+ u8 mac[ETH_ALEN];
+ u16 vlan_id = 0;
- ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr);
+ if (gid) {
+ gid_type = attr->gid_type;
+ ether_addr_copy(mac, attr->ndev->dev_addr);
- if (is_vlan_dev(attr->ndev)) {
- MLX5_SET_RA(mlx5_addr, vlan_valid, 1);
- MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev));
+ if (is_vlan_dev(attr->ndev)) {
+ vlan = true;
+ vlan_id = vlan_dev_vlan_id(attr->ndev);
+ }
}
- switch (attr->gid_type) {
+ switch (gid_type) {
case IB_GID_TYPE_IB:
- MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1);
+ roce_version = MLX5_ROCE_VERSION_1;
break;
case IB_GID_TYPE_ROCE_UDP_ENCAP:
- MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2);
+ roce_version = MLX5_ROCE_VERSION_2;
+ if (ipv6_addr_v4mapped((void *)gid))
+ roce_l3_type = MLX5_ROCE_L3_TYPE_IPV4;
+ else
+ roce_l3_type = MLX5_ROCE_L3_TYPE_IPV6;
break;
default:
- WARN_ON(true);
+ mlx5_ib_warn(dev, "Unexpected GID type %u\n", gid_type);
}
- if (attr->gid_type != IB_GID_TYPE_IB) {
- if (ipv6_addr_v4mapped((void *)gid))
- MLX5_SET_RA(mlx5_addr, roce_l3_type,
- MLX5_ROCE_L3_TYPE_IPV4);
- else
- MLX5_SET_RA(mlx5_addr, roce_l3_type,
- MLX5_ROCE_L3_TYPE_IPV6);
- }
-
- if ((attr->gid_type == IB_GID_TYPE_IB) ||
- !ipv6_addr_v4mapped((void *)gid))
- memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid));
- else
- memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4);
-}
-
-static int set_roce_addr(struct ib_device *device, u8 port_num,
- unsigned int index,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr)
-{
- struct mlx5_ib_dev *dev = to_mdev(device);
- u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
- void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
- enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
-
- if (ll != IB_LINK_LAYER_ETHERNET)
- return -EINVAL;
-
- ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
-
- MLX5_SET(set_roce_address_in, in, roce_address_index, index);
- MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
- return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+ return mlx5_core_roce_gid_set(dev->mdev, index, roce_version,
+ roce_l3_type, gid->raw, mac, vlan,
+ vlan_id);
}
static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
@@ -357,13 +333,13 @@ static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
const struct ib_gid_attr *attr,
__always_unused void **context)
{
- return set_roce_addr(device, port_num, index, gid, attr);
+ return set_roce_addr(to_mdev(device), port_num, index, gid, attr);
}
static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
unsigned int index, __always_unused void **context)
{
- return set_roce_addr(device, port_num, index, NULL, NULL);
+ return set_roce_addr(to_mdev(device), port_num, index, NULL, NULL);
}
__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
@@ -978,20 +954,31 @@ out:
int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
{
+ unsigned int count;
+ int ret;
+
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
- return mlx5_query_mad_ifc_port(ibdev, port, props);
+ ret = mlx5_query_mad_ifc_port(ibdev, port, props);
+ break;
case MLX5_VPORT_ACCESS_METHOD_HCA:
- return mlx5_query_hca_port(ibdev, port, props);
+ ret = mlx5_query_hca_port(ibdev, port, props);
+ break;
case MLX5_VPORT_ACCESS_METHOD_NIC:
- mlx5_query_port_roce(ibdev, port, props);
- return 0;
+ ret = mlx5_query_port_roce(ibdev, port, props);
+ break;
default:
- return -EINVAL;
+ ret = -EINVAL;
+ }
+
+ if (!ret && props) {
+ count = mlx5_core_reserved_gids_count(to_mdev(ibdev)->mdev);
+ props->gid_tbl_len -= count;
}
+ return ret;
}
static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 30b256a2c54e..de4025deaa4a 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -742,7 +742,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
if (type == NES_TIMER_TYPE_SEND) {
new_send->seq_num = ntohl(tcp_hdr(skb)->seq);
- atomic_inc(&new_send->skb->users);
+ refcount_inc(&new_send->skb->users);
spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
cm_node->send_entry = new_send;
add_ref_cm_node(cm_node);
@@ -924,7 +924,7 @@ static void nes_cm_timer_tick(unsigned long pass)
flags);
break;
}
- atomic_inc(&send_entry->skb->users);
+ refcount_inc(&send_entry->skb->users);
cm_packets_retrans++;
nes_debug(NES_DBG_CM, "Retransmitting send_entry %p "
"for node %p, jiffies = %lu, time to send = "
diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c
index e37d37273182..f600f3a7a3c6 100644
--- a/drivers/input/misc/soc_button_array.c
+++ b/drivers/input/misc/soc_button_array.c
@@ -248,7 +248,8 @@ static struct soc_button_info *soc_button_get_button_info(struct device *dev)
if (!btns_desc) {
dev_err(dev, "ACPI Button Descriptors not found\n");
- return ERR_PTR(-ENODEV);
+ button_info = ERR_PTR(-ENODEV);
+ goto out;
}
/* The first package describes the collection */
@@ -264,24 +265,31 @@ static struct soc_button_info *soc_button_get_button_info(struct device *dev)
}
if (collection_uid == -1) {
dev_err(dev, "Invalid Button Collection Descriptor\n");
- return ERR_PTR(-ENODEV);
+ button_info = ERR_PTR(-ENODEV);
+ goto out;
}
/* There are package.count - 1 buttons + 1 terminating empty entry */
button_info = devm_kcalloc(dev, btns_desc->package.count,
sizeof(*button_info), GFP_KERNEL);
- if (!button_info)
- return ERR_PTR(-ENOMEM);
+ if (!button_info) {
+ button_info = ERR_PTR(-ENOMEM);
+ goto out;
+ }
/* Parse the button descriptors */
for (i = 1, btn = 0; i < btns_desc->package.count; i++, btn++) {
if (soc_button_parse_btn_desc(dev,
&btns_desc->package.elements[i],
collection_uid,
- &button_info[btn]))
- return ERR_PTR(-ENODEV);
+ &button_info[btn])) {
+ button_info = ERR_PTR(-ENODEV);
+ goto out;
+ }
}
+out:
+ kfree(buf.pointer);
return button_info;
}
diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c
index dea63e2db3e6..f5206e2c767e 100644
--- a/drivers/input/rmi4/rmi_f54.c
+++ b/drivers/input/rmi4/rmi_f54.c
@@ -31,9 +31,6 @@
#define F54_GET_REPORT 1
#define F54_FORCE_CAL 2
-/* Fixed sizes of reports */
-#define F54_QUERY_LEN 27
-
/* F54 capabilities */
#define F54_CAP_BASELINE (1 << 2)
#define F54_CAP_IMAGE8 (1 << 3)
@@ -95,7 +92,6 @@ struct rmi_f54_reports {
struct f54_data {
struct rmi_function *fn;
- u8 qry[F54_QUERY_LEN];
u8 num_rx_electrodes;
u8 num_tx_electrodes;
u8 capabilities;
@@ -632,22 +628,23 @@ static int rmi_f54_detect(struct rmi_function *fn)
{
int error;
struct f54_data *f54;
+ u8 buf[6];
f54 = dev_get_drvdata(&fn->dev);
error = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr,
- &f54->qry, sizeof(f54->qry));
+ buf, sizeof(buf));
if (error) {
dev_err(&fn->dev, "%s: Failed to query F54 properties\n",
__func__);
return error;
}
- f54->num_rx_electrodes = f54->qry[0];
- f54->num_tx_electrodes = f54->qry[1];
- f54->capabilities = f54->qry[2];
- f54->clock_rate = f54->qry[3] | (f54->qry[4] << 8);
- f54->family = f54->qry[5];
+ f54->num_rx_electrodes = buf[0];
+ f54->num_tx_electrodes = buf[1];
+ f54->capabilities = buf[2];
+ f54->clock_rate = buf[3] | (buf[4] << 8);
+ f54->family = buf[5];
rmi_dbg(RMI_DEBUG_FN, &fn->dev, "F54 num_rx_electrodes: %d\n",
f54->num_rx_electrodes);
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 09720d950686..f932a83b4990 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -723,6 +723,13 @@ static const struct dmi_system_id __initconst i8042_dmi_notimeout_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U574"),
},
},
+ {
+ /* Fujitsu UH554 laptop */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK UH544"),
+ },
+ },
{ }
};
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index eb7fbe159963..929f8558bf1c 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -140,7 +140,7 @@ static inline void gic_map_to_vpe(unsigned int intr, unsigned int vpe)
}
#ifdef CONFIG_CLKSRC_MIPS_GIC
-u64 gic_read_count(void)
+u64 notrace gic_read_count(void)
{
unsigned int hi, hi2, lo;
@@ -167,7 +167,7 @@ unsigned int gic_get_count_width(void)
return bits;
}
-void gic_write_compare(u64 cnt)
+void notrace gic_write_compare(u64 cnt)
{
if (mips_cm_is64) {
gic_write(GIC_REG(VPE_LOCAL, GIC_VPE_COMPARE), cnt);
@@ -179,7 +179,7 @@ void gic_write_compare(u64 cnt)
}
}
-void gic_write_cpu_compare(u64 cnt, int cpu)
+void notrace gic_write_cpu_compare(u64 cnt, int cpu)
{
unsigned long flags;
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 99e5f9751e8b..c5603d1a07d6 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -155,7 +155,7 @@ mISDN_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
copied = skb->len + MISDN_HEADER_LEN;
if (len < copied) {
if (flags & MSG_PEEK)
- atomic_dec(&skb->users);
+ refcount_dec(&skb->users);
else
skb_queue_head(&sk->sk_receive_queue, skb);
return -ENOSPC;
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 7910bfe50da4..93b181088168 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -1105,10 +1105,13 @@ static void schedule_autocommit(struct dm_integrity_c *ic)
static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
{
struct bio *bio;
- spin_lock_irq(&ic->endio_wait.lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ic->endio_wait.lock, flags);
bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
bio_list_add(&ic->flush_bio_list, bio);
- spin_unlock_irq(&ic->endio_wait.lock);
+ spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
+
queue_work(ic->commit_wq, &ic->commit_work);
}
@@ -3040,6 +3043,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->error = "The device is too small";
goto bad;
}
+ if (ti->len > ic->provided_data_sectors) {
+ r = -EINVAL;
+ ti->error = "Not enough provided sectors for requested mapping size";
+ goto bad;
+ }
if (!buffer_sectors)
buffer_sectors = 1;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 3702e502466d..8d5ca30f6551 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -317,8 +317,8 @@ static void do_region(int op, int op_flags, unsigned region,
else if (op == REQ_OP_WRITE_SAME)
special_cmd_max_sectors = q->limits.max_write_same_sectors;
if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES ||
- op == REQ_OP_WRITE_SAME) &&
- special_cmd_max_sectors == 0) {
+ op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) {
+ atomic_inc(&io->count);
dec_count(io, region, -EOPNOTSUPP);
return;
}
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 7d893228c40f..b4b75dad816a 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1927,7 +1927,7 @@ struct dm_raid_superblock {
/********************************************************************
* BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
*
- * FEATURE_FLAG_SUPPORTS_V190 in the features member indicates that those exist
+ * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist
*/
__le32 flags; /* Flags defining array states for reshaping */
@@ -2092,6 +2092,11 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
sb->layout = cpu_to_le32(mddev->layout);
sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors);
+ /********************************************************************
+ * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
+ *
+ * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist
+ */
sb->new_level = cpu_to_le32(mddev->new_level);
sb->new_layout = cpu_to_le32(mddev->new_layout);
sb->new_stripe_sectors = cpu_to_le32(mddev->new_chunk_sectors);
@@ -2438,8 +2443,14 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
mddev->bitmap_info.default_offset = mddev->bitmap_info.offset;
if (!test_and_clear_bit(FirstUse, &rdev->flags)) {
- /* Retrieve device size stored in superblock to be prepared for shrink */
- rdev->sectors = le64_to_cpu(sb->sectors);
+ /*
+ * Retrieve rdev size stored in superblock to be prepared for shrink.
+ * Check extended superblock members are present otherwise the size
+ * will not be set!
+ */
+ if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190)
+ rdev->sectors = le64_to_cpu(sb->sectors);
+
rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset);
if (rdev->recovery_offset == MaxSector)
set_bit(In_sync, &rdev->flags);
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index e61c45047c25..4da8858856fb 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -145,6 +145,7 @@ static void dispatch_bios(void *context, struct bio_list *bio_list)
struct dm_raid1_bio_record {
struct mirror *m;
+ /* if details->bi_bdev == NULL, details were not saved */
struct dm_bio_details details;
region_t write_region;
};
@@ -1198,6 +1199,8 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
struct dm_raid1_bio_record *bio_record =
dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record));
+ bio_record->details.bi_bdev = NULL;
+
if (rw == WRITE) {
/* Save region for mirror_end_io() handler */
bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio);
@@ -1256,12 +1259,22 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
}
if (error == -EOPNOTSUPP)
- return error;
+ goto out;
if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD))
- return error;
+ goto out;
if (unlikely(error)) {
+ if (!bio_record->details.bi_bdev) {
+ /*
+ * There wasn't enough memory to record necessary
+ * information for a retry or there was no other
+ * mirror in-sync.
+ */
+ DMERR_LIMIT("Mirror read failed.");
+ return -EIO;
+ }
+
m = bio_record->m;
DMERR("Mirror read failed from %s. Trying alternative device.",
@@ -1277,6 +1290,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
bd = &bio_record->details;
dm_bio_restore(bd, bio);
+ bio_record->details.bi_bdev = NULL;
bio->bi_error = 0;
queue_bio(ms, bio, rw);
@@ -1285,6 +1299,9 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
DMERR("All replicated volumes dead, failing I/O");
}
+out:
+ bio_record->details.bi_bdev = NULL;
+
return error;
}
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 17ad50daed08..28808e5ec0fd 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1094,6 +1094,19 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
return;
}
+ /*
+ * Increment the unmapped blocks. This prevents a race between the
+ * passdown io and reallocation of freed blocks.
+ */
+ r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
+ if (r) {
+ metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
+ bio_io_error(m->bio);
+ cell_defer_no_holder(tc, m->cell);
+ mempool_free(m, pool->mapping_pool);
+ return;
+ }
+
discard_parent = bio_alloc(GFP_NOIO, 1);
if (!discard_parent) {
DMWARN("%s: unable to allocate top level discard bio for passdown. Skipping passdown.",
@@ -1114,19 +1127,6 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
end_discard(&op, r);
}
}
-
- /*
- * Increment the unmapped blocks. This prevents a race between the
- * passdown io and reallocation of freed blocks.
- */
- r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
- if (r) {
- metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
- bio_io_error(m->bio);
- cell_defer_no_holder(tc, m->cell);
- mempool_free(m, pool->mapping_pool);
- return;
- }
}
static void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m)
diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c
index 75488e65cd96..8d46e3ad9529 100644
--- a/drivers/mfd/arizona-core.c
+++ b/drivers/mfd/arizona-core.c
@@ -245,8 +245,7 @@ static int arizona_poll_reg(struct arizona *arizona,
int ret;
ret = regmap_read_poll_timeout(arizona->regmap,
- ARIZONA_INTERRUPT_RAW_STATUS_5, val,
- ((val & mask) == target),
+ reg, val, ((val & mask) == target),
ARIZONA_REG_POLL_DELAY_US,
timeout_ms * 1000);
if (ret)
diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h
index 20bfb9ba83ea..cbb4f8566bbe 100644
--- a/drivers/net/arcnet/arcdevice.h
+++ b/drivers/net/arcnet/arcdevice.h
@@ -269,6 +269,10 @@ struct arcnet_local {
struct timer_list timer;
+ struct net_device *dev;
+ int reply_status;
+ struct tasklet_struct reply_tasklet;
+
/*
* Buffer management: an ARCnet card has 4 x 512-byte buffers, each of
* which can be used for either sending or receiving. The new dynamic
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 62ee439d5882..fcfccbb3d9a2 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -51,6 +51,7 @@
#include <net/arp.h>
#include <linux/init.h>
#include <linux/jiffies.h>
+#include <linux/errqueue.h>
#include <linux/leds.h>
@@ -391,6 +392,52 @@ static void arcnet_timer(unsigned long data)
}
}
+static void arcnet_reply_tasklet(unsigned long data)
+{
+ struct arcnet_local *lp = (struct arcnet_local *)data;
+
+ struct sk_buff *ackskb, *skb;
+ struct sock_exterr_skb *serr;
+ struct sock *sk;
+ int ret;
+
+ local_irq_disable();
+ skb = lp->outgoing.skb;
+ if (!skb || !skb->sk) {
+ local_irq_enable();
+ return;
+ }
+
+ sock_hold(skb->sk);
+ sk = skb->sk;
+ ackskb = skb_clone_sk(skb);
+ sock_put(skb->sk);
+
+ if (!ackskb) {
+ local_irq_enable();
+ return;
+ }
+
+ serr = SKB_EXT_ERR(ackskb);
+ memset(serr, 0, sizeof(*serr));
+ serr->ee.ee_errno = ENOMSG;
+ serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
+ serr->ee.ee_data = skb_shinfo(skb)->tskey;
+ serr->ee.ee_info = lp->reply_status;
+
+ /* finally erasing outgoing skb */
+ dev_kfree_skb(lp->outgoing.skb);
+ lp->outgoing.skb = NULL;
+
+ ackskb->dev = lp->dev;
+
+ ret = sock_queue_err_skb(sk, ackskb);
+ if (ret)
+ kfree_skb(ackskb);
+
+ local_irq_enable();
+};
+
struct net_device *alloc_arcdev(const char *name)
{
struct net_device *dev;
@@ -401,6 +448,7 @@ struct net_device *alloc_arcdev(const char *name)
if (dev) {
struct arcnet_local *lp = netdev_priv(dev);
+ lp->dev = dev;
spin_lock_init(&lp->lock);
init_timer(&lp->timer);
lp->timer.data = (unsigned long) dev;
@@ -436,6 +484,9 @@ int arcnet_open(struct net_device *dev)
arc_cont(D_PROTO, "\n");
}
+ tasklet_init(&lp->reply_tasklet, arcnet_reply_tasklet,
+ (unsigned long)lp);
+
arc_printk(D_INIT, dev, "arcnet_open: resetting card.\n");
/* try to put the card in a defined state - if it fails the first
@@ -527,6 +578,8 @@ int arcnet_close(struct net_device *dev)
netif_stop_queue(dev);
netif_carrier_off(dev);
+ tasklet_kill(&lp->reply_tasklet);
+
/* flush TX and disable RX */
lp->hw.intmask(dev, 0);
lp->hw.command(dev, NOTXcmd); /* stop transmit */
@@ -635,13 +688,13 @@ netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
txbuf = -1;
if (txbuf != -1) {
+ lp->outgoing.skb = skb;
if (proto->prepare_tx(dev, pkt, skb->len, txbuf) &&
!proto->ack_tx) {
/* done right away and we don't want to acknowledge
* the package later - forget about it now
*/
dev->stats.tx_bytes += skb->len;
- dev_kfree_skb(skb);
} else {
/* do it the 'split' way */
lp->outgoing.proto = proto;
@@ -756,6 +809,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
struct net_device *dev = dev_id;
struct arcnet_local *lp;
int recbuf, status, diagstatus, didsomething, boguscount;
+ unsigned long flags;
int retval = IRQ_NONE;
arc_printk(D_DURING, dev, "\n");
@@ -765,7 +819,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
lp = netdev_priv(dev);
BUG_ON(!lp);
- spin_lock(&lp->lock);
+ spin_lock_irqsave(&lp->lock, flags);
/* RESET flag was enabled - if device is not running, we must
* clear it right away (but nothing else).
@@ -774,7 +828,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
if (lp->hw.status(dev) & RESETflag)
lp->hw.command(dev, CFLAGScmd | RESETclear);
lp->hw.intmask(dev, 0);
- spin_unlock(&lp->lock);
+ spin_unlock_irqrestore(&lp->lock, flags);
return retval;
}
@@ -842,8 +896,16 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
/* a transmit finished, and we're interested in it. */
if ((status & lp->intmask & TXFREEflag) || lp->timed_out) {
+ int ackstatus;
lp->intmask &= ~(TXFREEflag | EXCNAKflag);
+ if (status & TXACKflag)
+ ackstatus = 2;
+ else if (lp->excnak_pending)
+ ackstatus = 1;
+ else
+ ackstatus = 0;
+
arc_printk(D_DURING, dev, "TX IRQ (stat=%Xh)\n",
status);
@@ -866,18 +928,11 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
if (lp->outgoing.proto &&
lp->outgoing.proto->ack_tx) {
- int ackstatus;
-
- if (status & TXACKflag)
- ackstatus = 2;
- else if (lp->excnak_pending)
- ackstatus = 1;
- else
- ackstatus = 0;
-
lp->outgoing.proto
->ack_tx(dev, ackstatus);
}
+ lp->reply_status = ackstatus;
+ tasklet_hi_schedule(&lp->reply_tasklet);
}
if (lp->cur_tx != -1)
release_arcbuf(dev, lp->cur_tx);
@@ -998,7 +1053,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
udelay(1);
lp->hw.intmask(dev, lp->intmask);
- spin_unlock(&lp->lock);
+ spin_unlock_irqrestore(&lp->lock, flags);
return retval;
}
EXPORT_SYMBOL(arcnet_interrupt);
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index a80f4eb9262d..b780be6f41ff 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -212,7 +212,7 @@ static int ack_tx(struct net_device *dev, int acked)
ackpkt->soft.cap.proto = 0; /* using protocol 0 for acknowledge */
ackpkt->soft.cap.mes.ack = acked;
- arc_printk(D_PROTO, dev, "Ackknowledge for cap packet %x.\n",
+ arc_printk(D_PROTO, dev, "Acknowledge for cap packet %x.\n",
*((int *)&ackpkt->soft.cap.cookie[0]));
ackskb->protocol = cpu_to_be16(ETH_P_ARCNET);
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index 239de38fbd6a..2d956cb59d06 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -93,6 +93,27 @@ static void led_recon_set(struct led_classdev *led_cdev,
outb(!!value, priv->misc + ci->leds[card->index].red);
}
+static ssize_t backplane_mode_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_device *net_dev = to_net_dev(dev);
+ struct arcnet_local *lp = netdev_priv(net_dev);
+
+ return sprintf(buf, "%s\n", lp->backplane ? "true" : "false");
+}
+static DEVICE_ATTR_RO(backplane_mode);
+
+static struct attribute *com20020_state_attrs[] = {
+ &dev_attr_backplane_mode.attr,
+ NULL,
+};
+
+static struct attribute_group com20020_state_group = {
+ .name = NULL,
+ .attrs = com20020_state_attrs,
+};
+
static void com20020pci_remove(struct pci_dev *pdev);
static int com20020pci_probe(struct pci_dev *pdev,
@@ -135,6 +156,7 @@ static int com20020pci_probe(struct pci_dev *pdev,
for (i = 0; i < ci->devcount; i++) {
struct com20020_pci_channel_map *cm = &ci->chan_map_tbl[i];
struct com20020_dev *card;
+ int dev_id_mask = 0xf;
dev = alloc_arcdev(device);
if (!dev) {
@@ -166,8 +188,10 @@ static int com20020pci_probe(struct pci_dev *pdev,
arcnet_outb(0x00, ioaddr, COM20020_REG_W_COMMAND);
arcnet_inb(ioaddr, COM20020_REG_R_DIAGSTAT);
+ SET_NETDEV_DEV(dev, &pdev->dev);
dev->base_addr = ioaddr;
dev->dev_addr[0] = node;
+ dev->sysfs_groups[0] = &com20020_state_group;
dev->irq = pdev->irq;
lp->card_name = "PCI COM20020";
lp->card_flags = ci->flags;
@@ -177,10 +201,15 @@ static int com20020pci_probe(struct pci_dev *pdev,
lp->timeout = timeout;
lp->hw.owner = THIS_MODULE;
+ lp->backplane = (inb(priv->misc) >> (2 + i)) & 0x1;
+
+ if (!strncmp(ci->name, "EAE PLX-PCI FB2", 15))
+ lp->backplane = 1;
+
/* Get the dev_id from the PLX rotary coder */
if (!strncmp(ci->name, "EAE PLX-PCI MA1", 15))
- dev->dev_id = 0xc;
- dev->dev_id ^= inb(priv->misc + ci->rotary) >> 4;
+ dev_id_mask = 0x3;
+ dev->dev_id = (inb(priv->misc + ci->rotary) >> 4) & dev_id_mask;
snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i);
@@ -361,6 +390,31 @@ static struct com20020_pci_card_info card_info_eae_ma1 = {
.flags = ARC_CAN_10MBIT,
};
+static struct com20020_pci_card_info card_info_eae_fb2 = {
+ .name = "EAE PLX-PCI FB2",
+ .devcount = 1,
+ .chan_map_tbl = {
+ {
+ .bar = 2,
+ .offset = 0x00,
+ .size = 0x08,
+ },
+ },
+ .misc_map = {
+ .bar = 2,
+ .offset = 0x10,
+ .size = 0x04,
+ },
+ .leds = {
+ {
+ .green = 0x0,
+ .red = 0x1,
+ },
+ },
+ .rotary = 0x0,
+ .flags = ARC_CAN_10MBIT,
+};
+
static const struct pci_device_id com20020pci_id_table[] = {
{
0x1571, 0xa001,
@@ -507,6 +561,12 @@ static const struct pci_device_id com20020pci_id_table[] = {
(kernel_ulong_t)&card_info_eae_ma1
},
{
+ 0x10B5, 0x9050,
+ 0x10B5, 0x3294,
+ 0, 0,
+ (kernel_ulong_t)&card_info_eae_fb2
+ },
+ {
0x14BA, 0x6000,
PCI_ANY_ID, PCI_ANY_ID,
0, 0,
diff --git a/drivers/net/arcnet/com20020.c b/drivers/net/arcnet/com20020.c
index 13d9ad4b3f5c..78043a9c5981 100644
--- a/drivers/net/arcnet/com20020.c
+++ b/drivers/net/arcnet/com20020.c
@@ -246,8 +246,6 @@ int com20020_found(struct net_device *dev, int shared)
return -ENODEV;
}
- dev->base_addr = ioaddr;
-
arc_printk(D_NORMAL, dev, "%s: station %02Xh found at %03lXh, IRQ %d.\n",
lp->card_name, dev->dev_addr[0], dev->base_addr, dev->irq);
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 8ca683396fcc..a12d603d41c6 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -464,7 +464,7 @@ const struct bond_opt_value *bond_opt_get_val(unsigned int option, u64 val)
/* Searches for a value in opt's values[] table which matches the flagmask */
static const struct bond_opt_value *bond_opt_get_flags(const struct bond_option *opt,
- u32 flagmask)
+ u32 flagmask)
{
int i;
@@ -744,14 +744,14 @@ static int bond_option_mode_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
if (!bond_mode_uses_arp(newval->value) && bond->params.arp_interval) {
- netdev_info(bond->dev, "%s mode is incompatible with arp monitoring, start mii monitoring\n",
- newval->string);
+ netdev_dbg(bond->dev, "%s mode is incompatible with arp monitoring, start mii monitoring\n",
+ newval->string);
/* disable arp monitoring */
bond->params.arp_interval = 0;
/* set miimon to default value */
bond->params.miimon = BOND_DEFAULT_MIIMON;
- netdev_info(bond->dev, "Setting MII monitoring interval to %d\n",
- bond->params.miimon);
+ netdev_dbg(bond->dev, "Setting MII monitoring interval to %d\n",
+ bond->params.miimon);
}
/* don't cache arp_validate between modes */
@@ -794,7 +794,7 @@ static int bond_option_active_slave_set(struct bonding *bond,
block_netpoll_tx();
/* check to see if we are clearing active */
if (!slave_dev) {
- netdev_info(bond->dev, "Clearing current active slave\n");
+ netdev_dbg(bond->dev, "Clearing current active slave\n");
RCU_INIT_POINTER(bond->curr_active_slave, NULL);
bond_select_active_slave(bond);
} else {
@@ -805,13 +805,13 @@ static int bond_option_active_slave_set(struct bonding *bond,
if (new_active == old_active) {
/* do nothing */
- netdev_info(bond->dev, "%s is already the current active slave\n",
- new_active->dev->name);
+ netdev_dbg(bond->dev, "%s is already the current active slave\n",
+ new_active->dev->name);
} else {
if (old_active && (new_active->link == BOND_LINK_UP) &&
bond_slave_is_up(new_active)) {
- netdev_info(bond->dev, "Setting %s as active slave\n",
- new_active->dev->name);
+ netdev_dbg(bond->dev, "Setting %s as active slave\n",
+ new_active->dev->name);
bond_change_active_slave(bond, new_active);
} else {
netdev_err(bond->dev, "Could not set %s as active slave; either %s is down or the link is down\n",
@@ -833,17 +833,17 @@ static int bond_option_active_slave_set(struct bonding *bond,
static int bond_option_miimon_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
- netdev_info(bond->dev, "Setting MII monitoring interval to %llu\n",
- newval->value);
+ netdev_dbg(bond->dev, "Setting MII monitoring interval to %llu\n",
+ newval->value);
bond->params.miimon = newval->value;
if (bond->params.updelay)
- netdev_info(bond->dev, "Note: Updating updelay (to %d) since it is a multiple of the miimon value\n",
- bond->params.updelay * bond->params.miimon);
+ netdev_dbg(bond->dev, "Note: Updating updelay (to %d) since it is a multiple of the miimon value\n",
+ bond->params.updelay * bond->params.miimon);
if (bond->params.downdelay)
- netdev_info(bond->dev, "Note: Updating downdelay (to %d) since it is a multiple of the miimon value\n",
- bond->params.downdelay * bond->params.miimon);
+ netdev_dbg(bond->dev, "Note: Updating downdelay (to %d) since it is a multiple of the miimon value\n",
+ bond->params.downdelay * bond->params.miimon);
if (newval->value && bond->params.arp_interval) {
- netdev_info(bond->dev, "MII monitoring cannot be used with ARP monitoring - disabling ARP monitoring...\n");
+ netdev_dbg(bond->dev, "MII monitoring cannot be used with ARP monitoring - disabling ARP monitoring...\n");
bond->params.arp_interval = 0;
if (bond->params.arp_validate)
bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
@@ -885,8 +885,8 @@ static int bond_option_updelay_set(struct bonding *bond,
bond->params.miimon);
}
bond->params.updelay = value / bond->params.miimon;
- netdev_info(bond->dev, "Setting up delay to %d\n",
- bond->params.updelay * bond->params.miimon);
+ netdev_dbg(bond->dev, "Setting up delay to %d\n",
+ bond->params.updelay * bond->params.miimon);
return 0;
}
@@ -907,8 +907,8 @@ static int bond_option_downdelay_set(struct bonding *bond,
bond->params.miimon);
}
bond->params.downdelay = value / bond->params.miimon;
- netdev_info(bond->dev, "Setting down delay to %d\n",
- bond->params.downdelay * bond->params.miimon);
+ netdev_dbg(bond->dev, "Setting down delay to %d\n",
+ bond->params.downdelay * bond->params.miimon);
return 0;
}
@@ -916,8 +916,8 @@ static int bond_option_downdelay_set(struct bonding *bond,
static int bond_option_use_carrier_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
- netdev_info(bond->dev, "Setting use_carrier to %llu\n",
- newval->value);
+ netdev_dbg(bond->dev, "Setting use_carrier to %llu\n",
+ newval->value);
bond->params.use_carrier = newval->value;
return 0;
@@ -930,16 +930,16 @@ static int bond_option_use_carrier_set(struct bonding *bond,
static int bond_option_arp_interval_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
- netdev_info(bond->dev, "Setting ARP monitoring interval to %llu\n",
- newval->value);
+ netdev_dbg(bond->dev, "Setting ARP monitoring interval to %llu\n",
+ newval->value);
bond->params.arp_interval = newval->value;
if (newval->value) {
if (bond->params.miimon) {
- netdev_info(bond->dev, "ARP monitoring cannot be used with MII monitoring. Disabling MII monitoring\n");
+ netdev_dbg(bond->dev, "ARP monitoring cannot be used with MII monitoring. Disabling MII monitoring\n");
bond->params.miimon = 0;
}
if (!bond->params.arp_targets[0])
- netdev_info(bond->dev, "ARP monitoring has been set up, but no ARP targets have been specified\n");
+ netdev_dbg(bond->dev, "ARP monitoring has been set up, but no ARP targets have been specified\n");
}
if (bond->dev->flags & IFF_UP) {
/* If the interface is up, we may need to fire off
@@ -1000,7 +1000,7 @@ static int _bond_option_arp_ip_target_add(struct bonding *bond, __be32 target)
return -EINVAL;
}
- netdev_info(bond->dev, "Adding ARP target %pI4\n", &target);
+ netdev_dbg(bond->dev, "Adding ARP target %pI4\n", &target);
_bond_options_arp_ip_target_set(bond, ind, target, jiffies);
@@ -1036,7 +1036,7 @@ static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target)
if (ind == 0 && !targets[1] && bond->params.arp_interval)
netdev_warn(bond->dev, "Removing last arp target with arp_interval on\n");
- netdev_info(bond->dev, "Removing ARP target %pI4\n", &target);
+ netdev_dbg(bond->dev, "Removing ARP target %pI4\n", &target);
bond_for_each_slave(bond, slave, iter) {
targets_rx = slave->target_last_arp_rx;
@@ -1088,8 +1088,8 @@ static int bond_option_arp_ip_targets_set(struct bonding *bond,
static int bond_option_arp_validate_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
- netdev_info(bond->dev, "Setting arp_validate to %s (%llu)\n",
- newval->string, newval->value);
+ netdev_dbg(bond->dev, "Setting arp_validate to %s (%llu)\n",
+ newval->string, newval->value);
if (bond->dev->flags & IFF_UP) {
if (!newval->value)
@@ -1105,8 +1105,8 @@ static int bond_option_arp_validate_set(struct bonding *bond,
static int bond_option_arp_all_targets_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
- netdev_info(bond->dev, "Setting arp_all_targets to %s (%llu)\n",
- newval->string, newval->value);
+ netdev_dbg(bond->dev, "Setting arp_all_targets to %s (%llu)\n",
+ newval->string, newval->value);
bond->params.arp_all_targets = newval->value;
return 0;
@@ -1126,7 +1126,7 @@ static int bond_option_primary_set(struct bonding *bond,
*p = '\0';
/* check to see if we are clearing primary */
if (!strlen(primary)) {
- netdev_info(bond->dev, "Setting primary slave to None\n");
+ netdev_dbg(bond->dev, "Setting primary slave to None\n");
RCU_INIT_POINTER(bond->primary_slave, NULL);
memset(bond->params.primary, 0, sizeof(bond->params.primary));
bond_select_active_slave(bond);
@@ -1135,8 +1135,8 @@ static int bond_option_primary_set(struct bonding *bond,
bond_for_each_slave(bond, slave, iter) {
if (strncmp(slave->dev->name, primary, IFNAMSIZ) == 0) {
- netdev_info(bond->dev, "Setting %s as primary slave\n",
- slave->dev->name);
+ netdev_dbg(bond->dev, "Setting %s as primary slave\n",
+ slave->dev->name);
rcu_assign_pointer(bond->primary_slave, slave);
strcpy(bond->params.primary, slave->dev->name);
bond_select_active_slave(bond);
@@ -1145,15 +1145,15 @@ static int bond_option_primary_set(struct bonding *bond,
}
if (rtnl_dereference(bond->primary_slave)) {
- netdev_info(bond->dev, "Setting primary slave to None\n");
+ netdev_dbg(bond->dev, "Setting primary slave to None\n");
RCU_INIT_POINTER(bond->primary_slave, NULL);
bond_select_active_slave(bond);
}
strncpy(bond->params.primary, primary, IFNAMSIZ);
bond->params.primary[IFNAMSIZ - 1] = 0;
- netdev_info(bond->dev, "Recording %s as primary, but it has not been enslaved to %s yet\n",
- primary, bond->dev->name);
+ netdev_dbg(bond->dev, "Recording %s as primary, but it has not been enslaved to %s yet\n",
+ primary, bond->dev->name);
out:
unblock_netpoll_tx();
@@ -1164,8 +1164,8 @@ out:
static int bond_option_primary_reselect_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
- netdev_info(bond->dev, "Setting primary_reselect to %s (%llu)\n",
- newval->string, newval->value);
+ netdev_dbg(bond->dev, "Setting primary_reselect to %s (%llu)\n",
+ newval->string, newval->value);
bond->params.primary_reselect = newval->value;
block_netpoll_tx();
@@ -1178,8 +1178,8 @@ static int bond_option_primary_reselect_set(struct bonding *bond,
static int bond_option_fail_over_mac_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
- netdev_info(bond->dev, "Setting fail_over_mac to %s (%llu)\n",
- newval->string, newval->value);
+ netdev_dbg(bond->dev, "Setting fail_over_mac to %s (%llu)\n",
+ newval->string, newval->value);
bond->params.fail_over_mac = newval->value;
return 0;
@@ -1188,8 +1188,8 @@ static int bond_option_fail_over_mac_set(struct bonding *bond,
static int bond_option_xmit_hash_policy_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
- netdev_info(bond->dev, "Setting xmit hash policy to %s (%llu)\n",
- newval->string, newval->value);
+ netdev_dbg(bond->dev, "Setting xmit hash policy to %s (%llu)\n",
+ newval->string, newval->value);
bond->params.xmit_policy = newval->value;
return 0;
@@ -1198,8 +1198,8 @@ static int bond_option_xmit_hash_policy_set(struct bonding *bond,
static int bond_option_resend_igmp_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
- netdev_info(bond->dev, "Setting resend_igmp to %llu\n",
- newval->value);
+ netdev_dbg(bond->dev, "Setting resend_igmp to %llu\n",
+ newval->value);
bond->params.resend_igmp = newval->value;
return 0;
@@ -1237,8 +1237,8 @@ static int bond_option_all_slaves_active_set(struct bonding *bond,
static int bond_option_min_links_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
- netdev_info(bond->dev, "Setting min links value to %llu\n",
- newval->value);
+ netdev_dbg(bond->dev, "Setting min links value to %llu\n",
+ newval->value);
bond->params.min_links = newval->value;
bond_set_carrier(bond);
@@ -1256,6 +1256,8 @@ static int bond_option_lp_interval_set(struct bonding *bond,
static int bond_option_pps_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
+ netdev_dbg(bond->dev, "Setting packets per slave to %llu\n",
+ newval->value);
bond->params.packets_per_slave = newval->value;
if (newval->value > 0) {
bond->params.reciprocal_packets_per_slave =
@@ -1274,8 +1276,8 @@ static int bond_option_pps_set(struct bonding *bond,
static int bond_option_lacp_rate_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
- netdev_info(bond->dev, "Setting LACP rate to %s (%llu)\n",
- newval->string, newval->value);
+ netdev_dbg(bond->dev, "Setting LACP rate to %s (%llu)\n",
+ newval->string, newval->value);
bond->params.lacp_fast = newval->value;
bond_3ad_update_lacp_rate(bond);
@@ -1285,8 +1287,8 @@ static int bond_option_lacp_rate_set(struct bonding *bond,
static int bond_option_ad_select_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
- netdev_info(bond->dev, "Setting ad_select to %s (%llu)\n",
- newval->string, newval->value);
+ netdev_dbg(bond->dev, "Setting ad_select to %s (%llu)\n",
+ newval->string, newval->value);
bond->params.ad_select = newval->value;
return 0;
@@ -1347,7 +1349,7 @@ out:
return ret;
err_no_cmd:
- netdev_info(bond->dev, "invalid input for queue_id set\n");
+ netdev_dbg(bond->dev, "invalid input for queue_id set\n");
ret = -EPERM;
goto out;
@@ -1369,20 +1371,20 @@ static int bond_option_slaves_set(struct bonding *bond,
dev = __dev_get_by_name(dev_net(bond->dev), ifname);
if (!dev) {
- netdev_info(bond->dev, "interface %s does not exist!\n",
- ifname);
+ netdev_dbg(bond->dev, "interface %s does not exist!\n",
+ ifname);
ret = -ENODEV;
goto out;
}
switch (command[0]) {
case '+':
- netdev_info(bond->dev, "Adding slave %s\n", dev->name);
+ netdev_dbg(bond->dev, "Adding slave %s\n", dev->name);
ret = bond_enslave(bond->dev, dev);
break;
case '-':
- netdev_info(bond->dev, "Removing slave %s\n", dev->name);
+ netdev_dbg(bond->dev, "Removing slave %s\n", dev->name);
ret = bond_release(bond->dev, dev);
break;
@@ -1402,8 +1404,8 @@ err_no_cmd:
static int bond_option_tlb_dynamic_lb_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
- netdev_info(bond->dev, "Setting dynamic-lb to %s (%llu)\n",
- newval->string, newval->value);
+ netdev_dbg(bond->dev, "Setting dynamic-lb to %s (%llu)\n",
+ newval->string, newval->value);
bond->params.tlb_dynamic_lb = newval->value;
return 0;
@@ -1412,8 +1414,8 @@ static int bond_option_tlb_dynamic_lb_set(struct bonding *bond,
static int bond_option_ad_actor_sys_prio_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
- netdev_info(bond->dev, "Setting ad_actor_sys_prio to %llu\n",
- newval->value);
+ netdev_dbg(bond->dev, "Setting ad_actor_sys_prio to %llu\n",
+ newval->value);
bond->params.ad_actor_sys_prio = newval->value;
bond_3ad_update_ad_actor_settings(bond);
@@ -1442,7 +1444,7 @@ static int bond_option_ad_actor_system_set(struct bonding *bond,
if (!is_valid_ether_addr(mac))
goto err;
- netdev_info(bond->dev, "Setting ad_actor_system to %pM\n", mac);
+ netdev_dbg(bond->dev, "Setting ad_actor_system to %pM\n", mac);
ether_addr_copy(bond->params.ad_actor_system, mac);
bond_3ad_update_ad_actor_settings(bond);
@@ -1456,8 +1458,8 @@ err:
static int bond_option_ad_user_port_key_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
- netdev_info(bond->dev, "Setting ad_user_port_key to %llu\n",
- newval->value);
+ netdev_dbg(bond->dev, "Setting ad_user_port_key to %llu\n",
+ newval->value);
bond->params.ad_user_port_key = newval->value;
return 0;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index 127adbeefb10..9795419aac2d 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -123,38 +123,13 @@
#define DMA_ISR 0x3008
#define DMA_AXIARCR 0x3010
#define DMA_AXIAWCR 0x3018
+#define DMA_AXIAWARCR 0x301c
#define DMA_DSR0 0x3020
#define DMA_DSR1 0x3024
+#define DMA_TXEDMACR 0x3040
+#define DMA_RXEDMACR 0x3044
/* DMA register entry bit positions and sizes */
-#define DMA_AXIARCR_DRC_INDEX 0
-#define DMA_AXIARCR_DRC_WIDTH 4
-#define DMA_AXIARCR_DRD_INDEX 4
-#define DMA_AXIARCR_DRD_WIDTH 2
-#define DMA_AXIARCR_TEC_INDEX 8
-#define DMA_AXIARCR_TEC_WIDTH 4
-#define DMA_AXIARCR_TED_INDEX 12
-#define DMA_AXIARCR_TED_WIDTH 2
-#define DMA_AXIARCR_THC_INDEX 16
-#define DMA_AXIARCR_THC_WIDTH 4
-#define DMA_AXIARCR_THD_INDEX 20
-#define DMA_AXIARCR_THD_WIDTH 2
-#define DMA_AXIAWCR_DWC_INDEX 0
-#define DMA_AXIAWCR_DWC_WIDTH 4
-#define DMA_AXIAWCR_DWD_INDEX 4
-#define DMA_AXIAWCR_DWD_WIDTH 2
-#define DMA_AXIAWCR_RPC_INDEX 8
-#define DMA_AXIAWCR_RPC_WIDTH 4
-#define DMA_AXIAWCR_RPD_INDEX 12
-#define DMA_AXIAWCR_RPD_WIDTH 2
-#define DMA_AXIAWCR_RHC_INDEX 16
-#define DMA_AXIAWCR_RHC_WIDTH 4
-#define DMA_AXIAWCR_RHD_INDEX 20
-#define DMA_AXIAWCR_RHD_WIDTH 2
-#define DMA_AXIAWCR_TDC_INDEX 24
-#define DMA_AXIAWCR_TDC_WIDTH 4
-#define DMA_AXIAWCR_TDD_INDEX 28
-#define DMA_AXIAWCR_TDD_WIDTH 2
#define DMA_ISR_MACIS_INDEX 17
#define DMA_ISR_MACIS_WIDTH 1
#define DMA_ISR_MTLIS_INDEX 16
@@ -163,14 +138,31 @@
#define DMA_MR_INTM_WIDTH 2
#define DMA_MR_SWR_INDEX 0
#define DMA_MR_SWR_WIDTH 1
+#define DMA_RXEDMACR_RDPS_INDEX 0
+#define DMA_RXEDMACR_RDPS_WIDTH 3
+#define DMA_SBMR_AAL_INDEX 12
+#define DMA_SBMR_AAL_WIDTH 1
#define DMA_SBMR_EAME_INDEX 11
#define DMA_SBMR_EAME_WIDTH 1
-#define DMA_SBMR_BLEN_256_INDEX 7
-#define DMA_SBMR_BLEN_256_WIDTH 1
+#define DMA_SBMR_BLEN_INDEX 1
+#define DMA_SBMR_BLEN_WIDTH 7
+#define DMA_SBMR_RD_OSR_LMT_INDEX 16
+#define DMA_SBMR_RD_OSR_LMT_WIDTH 6
#define DMA_SBMR_UNDEF_INDEX 0
#define DMA_SBMR_UNDEF_WIDTH 1
+#define DMA_SBMR_WR_OSR_LMT_INDEX 24
+#define DMA_SBMR_WR_OSR_LMT_WIDTH 6
+#define DMA_TXEDMACR_TDPS_INDEX 0
+#define DMA_TXEDMACR_TDPS_WIDTH 3
/* DMA register values */
+#define DMA_SBMR_BLEN_256 256
+#define DMA_SBMR_BLEN_128 128
+#define DMA_SBMR_BLEN_64 64
+#define DMA_SBMR_BLEN_32 32
+#define DMA_SBMR_BLEN_16 16
+#define DMA_SBMR_BLEN_8 8
+#define DMA_SBMR_BLEN_4 4
#define DMA_DSR_RPS_WIDTH 4
#define DMA_DSR_TPS_WIDTH 4
#define DMA_DSR_Q_WIDTH (DMA_DSR_RPS_WIDTH + DMA_DSR_TPS_WIDTH)
@@ -959,6 +951,7 @@
#define XP_DRIVER_INT_RO 0x0064
#define XP_DRIVER_SCRATCH_0 0x0068
#define XP_DRIVER_SCRATCH_1 0x006c
+#define XP_INT_REISSUE_EN 0x0074
#define XP_INT_EN 0x0078
#define XP_I2C_MUTEX 0x0080
#define XP_MDIO_MUTEX 0x0084
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
index 0a98c369df20..45d92304068e 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
@@ -176,8 +176,8 @@ static void xgbe_free_ring_resources(struct xgbe_prv_data *pdata)
DBGPR("-->xgbe_free_ring_resources\n");
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
+ for (i = 0; i < pdata->channel_count; i++) {
+ channel = pdata->channel[i];
xgbe_free_ring(pdata, channel->tx_ring);
xgbe_free_ring(pdata, channel->rx_ring);
}
@@ -185,34 +185,60 @@ static void xgbe_free_ring_resources(struct xgbe_prv_data *pdata)
DBGPR("<--xgbe_free_ring_resources\n");
}
+static void *xgbe_alloc_node(size_t size, int node)
+{
+ void *mem;
+
+ mem = kzalloc_node(size, GFP_KERNEL, node);
+ if (!mem)
+ mem = kzalloc(size, GFP_KERNEL);
+
+ return mem;
+}
+
+static void *xgbe_dma_alloc_node(struct device *dev, size_t size,
+ dma_addr_t *dma, int node)
+{
+ void *mem;
+ int cur_node = dev_to_node(dev);
+
+ set_dev_node(dev, node);
+ mem = dma_alloc_coherent(dev, size, dma, GFP_KERNEL);
+ set_dev_node(dev, cur_node);
+
+ if (!mem)
+ mem = dma_alloc_coherent(dev, size, dma, GFP_KERNEL);
+
+ return mem;
+}
+
static int xgbe_init_ring(struct xgbe_prv_data *pdata,
struct xgbe_ring *ring, unsigned int rdesc_count)
{
- DBGPR("-->xgbe_init_ring\n");
+ size_t size;
if (!ring)
return 0;
/* Descriptors */
+ size = rdesc_count * sizeof(struct xgbe_ring_desc);
+
ring->rdesc_count = rdesc_count;
- ring->rdesc = dma_alloc_coherent(pdata->dev,
- (sizeof(struct xgbe_ring_desc) *
- rdesc_count), &ring->rdesc_dma,
- GFP_KERNEL);
+ ring->rdesc = xgbe_dma_alloc_node(pdata->dev, size, &ring->rdesc_dma,
+ ring->node);
if (!ring->rdesc)
return -ENOMEM;
/* Descriptor information */
- ring->rdata = kcalloc(rdesc_count, sizeof(struct xgbe_ring_data),
- GFP_KERNEL);
+ size = rdesc_count * sizeof(struct xgbe_ring_data);
+
+ ring->rdata = xgbe_alloc_node(size, ring->node);
if (!ring->rdata)
return -ENOMEM;
netif_dbg(pdata, drv, pdata->netdev,
- "rdesc=%p, rdesc_dma=%pad, rdata=%p\n",
- ring->rdesc, &ring->rdesc_dma, ring->rdata);
-
- DBGPR("<--xgbe_init_ring\n");
+ "rdesc=%p, rdesc_dma=%pad, rdata=%p, node=%d\n",
+ ring->rdesc, &ring->rdesc_dma, ring->rdata, ring->node);
return 0;
}
@@ -223,10 +249,8 @@ static int xgbe_alloc_ring_resources(struct xgbe_prv_data *pdata)
unsigned int i;
int ret;
- DBGPR("-->xgbe_alloc_ring_resources\n");
-
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
+ for (i = 0; i < pdata->channel_count; i++) {
+ channel = pdata->channel[i];
netif_dbg(pdata, drv, pdata->netdev, "%s - Tx ring:\n",
channel->name);
@@ -250,8 +274,6 @@ static int xgbe_alloc_ring_resources(struct xgbe_prv_data *pdata)
}
}
- DBGPR("<--xgbe_alloc_ring_resources\n");
-
return 0;
err_ring:
@@ -261,21 +283,33 @@ err_ring:
}
static int xgbe_alloc_pages(struct xgbe_prv_data *pdata,
- struct xgbe_page_alloc *pa, gfp_t gfp, int order)
+ struct xgbe_page_alloc *pa, int alloc_order,
+ int node)
{
struct page *pages = NULL;
dma_addr_t pages_dma;
- int ret;
+ gfp_t gfp;
+ int order, ret;
+
+again:
+ order = alloc_order;
/* Try to obtain pages, decreasing order if necessary */
- gfp |= __GFP_COLD | __GFP_COMP | __GFP_NOWARN;
+ gfp = GFP_ATOMIC | __GFP_COLD | __GFP_COMP | __GFP_NOWARN;
while (order >= 0) {
- pages = alloc_pages(gfp, order);
+ pages = alloc_pages_node(node, gfp, order);
if (pages)
break;
order--;
}
+
+ /* If we couldn't get local pages, try getting from anywhere */
+ if (!pages && (node != NUMA_NO_NODE)) {
+ node = NUMA_NO_NODE;
+ goto again;
+ }
+
if (!pages)
return -ENOMEM;
@@ -327,14 +361,14 @@ static int xgbe_map_rx_buffer(struct xgbe_prv_data *pdata,
int ret;
if (!ring->rx_hdr_pa.pages) {
- ret = xgbe_alloc_pages(pdata, &ring->rx_hdr_pa, GFP_ATOMIC, 0);
+ ret = xgbe_alloc_pages(pdata, &ring->rx_hdr_pa, 0, ring->node);
if (ret)
return ret;
}
if (!ring->rx_buf_pa.pages) {
- ret = xgbe_alloc_pages(pdata, &ring->rx_buf_pa, GFP_ATOMIC,
- PAGE_ALLOC_COSTLY_ORDER);
+ ret = xgbe_alloc_pages(pdata, &ring->rx_buf_pa,
+ PAGE_ALLOC_COSTLY_ORDER, ring->node);
if (ret)
return ret;
}
@@ -362,8 +396,8 @@ static void xgbe_wrapper_tx_descriptor_init(struct xgbe_prv_data *pdata)
DBGPR("-->xgbe_wrapper_tx_descriptor_init\n");
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
+ for (i = 0; i < pdata->channel_count; i++) {
+ channel = pdata->channel[i];
ring = channel->tx_ring;
if (!ring)
break;
@@ -403,8 +437,8 @@ static void xgbe_wrapper_rx_descriptor_init(struct xgbe_prv_data *pdata)
DBGPR("-->xgbe_wrapper_rx_descriptor_init\n");
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
+ for (i = 0; i < pdata->channel_count; i++) {
+ channel = pdata->channel[i];
ring = channel->rx_ring;
if (!ring)
break;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index 24a687ce4388..06f953e1e9b2 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -174,58 +174,30 @@ static unsigned int xgbe_riwt_to_usec(struct xgbe_prv_data *pdata,
return ret;
}
-static int xgbe_config_pblx8(struct xgbe_prv_data *pdata)
+static int xgbe_config_pbl_val(struct xgbe_prv_data *pdata)
{
- struct xgbe_channel *channel;
+ unsigned int pblx8, pbl;
unsigned int i;
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++)
- XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_CR, PBLX8,
- pdata->pblx8);
-
- return 0;
-}
-
-static int xgbe_get_tx_pbl_val(struct xgbe_prv_data *pdata)
-{
- return XGMAC_DMA_IOREAD_BITS(pdata->channel, DMA_CH_TCR, PBL);
-}
-
-static int xgbe_config_tx_pbl_val(struct xgbe_prv_data *pdata)
-{
- struct xgbe_channel *channel;
- unsigned int i;
-
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
- if (!channel->tx_ring)
- break;
+ pblx8 = DMA_PBL_X8_DISABLE;
+ pbl = pdata->pbl;
- XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, PBL,
- pdata->tx_pbl);
+ if (pdata->pbl > 32) {
+ pblx8 = DMA_PBL_X8_ENABLE;
+ pbl >>= 3;
}
- return 0;
-}
-
-static int xgbe_get_rx_pbl_val(struct xgbe_prv_data *pdata)
-{
- return XGMAC_DMA_IOREAD_BITS(pdata->channel, DMA_CH_RCR, PBL);
-}
-
-static int xgbe_config_rx_pbl_val(struct xgbe_prv_data *pdata)
-{
- struct xgbe_channel *channel;
- unsigned int i;
+ for (i = 0; i < pdata->channel_count; i++) {
+ XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, PBLX8,
+ pblx8);
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
- if (!channel->rx_ring)
- break;
+ if (pdata->channel[i]->tx_ring)
+ XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR,
+ PBL, pbl);
- XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, PBL,
- pdata->rx_pbl);
+ if (pdata->channel[i]->rx_ring)
+ XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR,
+ PBL, pbl);
}
return 0;
@@ -233,15 +205,13 @@ static int xgbe_config_rx_pbl_val(struct xgbe_prv_data *pdata)
static int xgbe_config_osp_mode(struct xgbe_prv_data *pdata)
{
- struct xgbe_channel *channel;
unsigned int i;
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
- if (!channel->tx_ring)
+ for (i = 0; i < pdata->channel_count; i++) {
+ if (!pdata->channel[i]->tx_ring)
break;
- XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, OSP,
+ XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, OSP,
pdata->tx_osp_mode);
}
@@ -292,15 +262,13 @@ static int xgbe_config_tx_threshold(struct xgbe_prv_data *pdata,
static int xgbe_config_rx_coalesce(struct xgbe_prv_data *pdata)
{
- struct xgbe_channel *channel;
unsigned int i;
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
- if (!channel->rx_ring)
+ for (i = 0; i < pdata->channel_count; i++) {
+ if (!pdata->channel[i]->rx_ring)
break;
- XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RIWT, RWT,
+ XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RIWT, RWT,
pdata->rx_riwt);
}
@@ -314,44 +282,38 @@ static int xgbe_config_tx_coalesce(struct xgbe_prv_data *pdata)
static void xgbe_config_rx_buffer_size(struct xgbe_prv_data *pdata)
{
- struct xgbe_channel *channel;
unsigned int i;
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
- if (!channel->rx_ring)
+ for (i = 0; i < pdata->channel_count; i++) {
+ if (!pdata->channel[i]->rx_ring)
break;
- XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, RBSZ,
+ XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, RBSZ,
pdata->rx_buf_size);
}
}
static void xgbe_config_tso_mode(struct xgbe_prv_data *pdata)
{
- struct xgbe_channel *channel;
unsigned int i;
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
- if (!channel->tx_ring)
+ for (i = 0; i < pdata->channel_count; i++) {
+ if (!pdata->channel[i]->tx_ring)
break;
- XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, TSE, 1);
+ XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, TSE, 1);
}
}
static void xgbe_config_sph_mode(struct xgbe_prv_data *pdata)
{
- struct xgbe_channel *channel;
unsigned int i;
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
- if (!channel->rx_ring)
+ for (i = 0; i < pdata->channel_count; i++) {
+ if (!pdata->channel[i]->rx_ring)
break;
- XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_CR, SPH, 1);
+ XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, SPH, 1);
}
XGMAC_IOWRITE_BITS(pdata, MAC_RCR, HDSMS, XGBE_SPH_HDSMS_SIZE);
@@ -651,8 +613,9 @@ static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata)
XGMAC_IOWRITE_BITS(pdata, DMA_MR, INTM,
pdata->channel_irq_mode);
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
+ for (i = 0; i < pdata->channel_count; i++) {
+ channel = pdata->channel[i];
+
/* Clear all the interrupts which are set */
dma_ch_isr = XGMAC_DMA_IOREAD(channel, DMA_CH_SR);
XGMAC_DMA_IOWRITE(channel, DMA_CH_SR, dma_ch_isr);
@@ -1497,26 +1460,37 @@ static void xgbe_rx_desc_init(struct xgbe_channel *channel)
static void xgbe_update_tstamp_addend(struct xgbe_prv_data *pdata,
unsigned int addend)
{
+ unsigned int count = 10000;
+
/* Set the addend register value and tell the device */
XGMAC_IOWRITE(pdata, MAC_TSAR, addend);
XGMAC_IOWRITE_BITS(pdata, MAC_TSCR, TSADDREG, 1);
/* Wait for addend update to complete */
- while (XGMAC_IOREAD_BITS(pdata, MAC_TSCR, TSADDREG))
+ while (--count && XGMAC_IOREAD_BITS(pdata, MAC_TSCR, TSADDREG))
udelay(5);
+
+ if (!count)
+ netdev_err(pdata->netdev,
+ "timed out updating timestamp addend register\n");
}
static void xgbe_set_tstamp_time(struct xgbe_prv_data *pdata, unsigned int sec,
unsigned int nsec)
{
+ unsigned int count = 10000;
+
/* Set the time values and tell the device */
XGMAC_IOWRITE(pdata, MAC_STSUR, sec);
XGMAC_IOWRITE(pdata, MAC_STNUR, nsec);
XGMAC_IOWRITE_BITS(pdata, MAC_TSCR, TSINIT, 1);
/* Wait for time update to complete */
- while (XGMAC_IOREAD_BITS(pdata, MAC_TSCR, TSINIT))
+ while (--count && XGMAC_IOREAD_BITS(pdata, MAC_TSCR, TSINIT))
udelay(5);
+
+ if (!count)
+ netdev_err(pdata->netdev, "timed out initializing timestamp\n");
}
static u64 xgbe_get_tstamp_time(struct xgbe_prv_data *pdata)
@@ -2140,37 +2114,38 @@ static int xgbe_flush_tx_queues(struct xgbe_prv_data *pdata)
static void xgbe_config_dma_bus(struct xgbe_prv_data *pdata)
{
+ unsigned int sbmr;
+
+ sbmr = XGMAC_IOREAD(pdata, DMA_SBMR);
+
/* Set enhanced addressing mode */
- XGMAC_IOWRITE_BITS(pdata, DMA_SBMR, EAME, 1);
+ XGMAC_SET_BITS(sbmr, DMA_SBMR, EAME, 1);
/* Set the System Bus mode */
- XGMAC_IOWRITE_BITS(pdata, DMA_SBMR, UNDEF, 1);
- XGMAC_IOWRITE_BITS(pdata, DMA_SBMR, BLEN_256, 1);
+ XGMAC_SET_BITS(sbmr, DMA_SBMR, UNDEF, 1);
+ XGMAC_SET_BITS(sbmr, DMA_SBMR, BLEN, pdata->blen >> 2);
+ XGMAC_SET_BITS(sbmr, DMA_SBMR, AAL, pdata->aal);
+ XGMAC_SET_BITS(sbmr, DMA_SBMR, RD_OSR_LMT, pdata->rd_osr_limit - 1);
+ XGMAC_SET_BITS(sbmr, DMA_SBMR, WR_OSR_LMT, pdata->wr_osr_limit - 1);
+
+ XGMAC_IOWRITE(pdata, DMA_SBMR, sbmr);
+
+ /* Set descriptor fetching threshold */
+ if (pdata->vdata->tx_desc_prefetch)
+ XGMAC_IOWRITE_BITS(pdata, DMA_TXEDMACR, TDPS,
+ pdata->vdata->tx_desc_prefetch);
+
+ if (pdata->vdata->rx_desc_prefetch)
+ XGMAC_IOWRITE_BITS(pdata, DMA_RXEDMACR, RDPS,
+ pdata->vdata->rx_desc_prefetch);
}
static void xgbe_config_dma_cache(struct xgbe_prv_data *pdata)
{
- unsigned int arcache, awcache;
-
- arcache = 0;
- XGMAC_SET_BITS(arcache, DMA_AXIARCR, DRC, pdata->arcache);
- XGMAC_SET_BITS(arcache, DMA_AXIARCR, DRD, pdata->axdomain);
- XGMAC_SET_BITS(arcache, DMA_AXIARCR, TEC, pdata->arcache);
- XGMAC_SET_BITS(arcache, DMA_AXIARCR, TED, pdata->axdomain);
- XGMAC_SET_BITS(arcache, DMA_AXIARCR, THC, pdata->arcache);
- XGMAC_SET_BITS(arcache, DMA_AXIARCR, THD, pdata->axdomain);
- XGMAC_IOWRITE(pdata, DMA_AXIARCR, arcache);
-
- awcache = 0;
- XGMAC_SET_BITS(awcache, DMA_AXIAWCR, DWC, pdata->awcache);
- XGMAC_SET_BITS(awcache, DMA_AXIAWCR, DWD, pdata->axdomain);
- XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RPC, pdata->awcache);
- XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RPD, pdata->axdomain);
- XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RHC, pdata->awcache);
- XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RHD, pdata->axdomain);
- XGMAC_SET_BITS(awcache, DMA_AXIAWCR, TDC, pdata->awcache);
- XGMAC_SET_BITS(awcache, DMA_AXIAWCR, TDD, pdata->axdomain);
- XGMAC_IOWRITE(pdata, DMA_AXIAWCR, awcache);
+ XGMAC_IOWRITE(pdata, DMA_AXIARCR, pdata->arcr);
+ XGMAC_IOWRITE(pdata, DMA_AXIAWCR, pdata->awcr);
+ if (pdata->awarcr)
+ XGMAC_IOWRITE(pdata, DMA_AXIAWARCR, pdata->awarcr);
}
static void xgbe_config_mtl_mode(struct xgbe_prv_data *pdata)
@@ -3202,16 +3177,14 @@ static void xgbe_prepare_tx_stop(struct xgbe_prv_data *pdata,
static void xgbe_enable_tx(struct xgbe_prv_data *pdata)
{
- struct xgbe_channel *channel;
unsigned int i;
/* Enable each Tx DMA channel */
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
- if (!channel->tx_ring)
+ for (i = 0; i < pdata->channel_count; i++) {
+ if (!pdata->channel[i]->tx_ring)
break;
- XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, ST, 1);
+ XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 1);
}
/* Enable each Tx queue */
@@ -3225,7 +3198,6 @@ static void xgbe_enable_tx(struct xgbe_prv_data *pdata)
static void xgbe_disable_tx(struct xgbe_prv_data *pdata)
{
- struct xgbe_channel *channel;
unsigned int i;
/* Prepare for Tx DMA channel stop */
@@ -3240,12 +3212,11 @@ static void xgbe_disable_tx(struct xgbe_prv_data *pdata)
XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TXQEN, 0);
/* Disable each Tx DMA channel */
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
- if (!channel->tx_ring)
+ for (i = 0; i < pdata->channel_count; i++) {
+ if (!pdata->channel[i]->tx_ring)
break;
- XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, ST, 0);
+ XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 0);
}
}
@@ -3277,16 +3248,14 @@ static void xgbe_prepare_rx_stop(struct xgbe_prv_data *pdata,
static void xgbe_enable_rx(struct xgbe_prv_data *pdata)
{
- struct xgbe_channel *channel;
unsigned int reg_val, i;
/* Enable each Rx DMA channel */
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
- if (!channel->rx_ring)
+ for (i = 0; i < pdata->channel_count; i++) {
+ if (!pdata->channel[i]->rx_ring)
break;
- XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, SR, 1);
+ XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, SR, 1);
}
/* Enable each Rx queue */
@@ -3304,7 +3273,6 @@ static void xgbe_enable_rx(struct xgbe_prv_data *pdata)
static void xgbe_disable_rx(struct xgbe_prv_data *pdata)
{
- struct xgbe_channel *channel;
unsigned int i;
/* Disable MAC Rx */
@@ -3321,27 +3289,24 @@ static void xgbe_disable_rx(struct xgbe_prv_data *pdata)
XGMAC_IOWRITE(pdata, MAC_RQC0R, 0);
/* Disable each Rx DMA channel */
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
- if (!channel->rx_ring)
+ for (i = 0; i < pdata->channel_count; i++) {
+ if (!pdata->channel[i]->rx_ring)
break;
- XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, SR, 0);
+ XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, SR, 0);
}
}
static void xgbe_powerup_tx(struct xgbe_prv_data *pdata)
{
- struct xgbe_channel *channel;
unsigned int i;
/* Enable each Tx DMA channel */
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
- if (!channel->tx_ring)
+ for (i = 0; i < pdata->channel_count; i++) {
+ if (!pdata->channel[i]->tx_ring)
break;
- XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, ST, 1);
+ XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 1);
}
/* Enable MAC Tx */
@@ -3350,7 +3315,6 @@ static void xgbe_powerup_tx(struct xgbe_prv_data *pdata)
static void xgbe_powerdown_tx(struct xgbe_prv_data *pdata)
{
- struct xgbe_channel *channel;
unsigned int i;
/* Prepare for Tx DMA channel stop */
@@ -3361,42 +3325,37 @@ static void xgbe_powerdown_tx(struct xgbe_prv_data *pdata)
XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0);
/* Disable each Tx DMA channel */
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
- if (!channel->tx_ring)
+ for (i = 0; i < pdata->channel_count; i++) {
+ if (!pdata->channel[i]->tx_ring)
break;
- XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, ST, 0);
+ XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 0);
}
}
static void xgbe_powerup_rx(struct xgbe_prv_data *pdata)
{
- struct xgbe_channel *channel;
unsigned int i;
/* Enable each Rx DMA channel */
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
- if (!channel->rx_ring)
+ for (i = 0; i < pdata->channel_count; i++) {
+ if (!pdata->channel[i]->rx_ring)
break;
- XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, SR, 1);
+ XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, SR, 1);
}
}
static void xgbe_powerdown_rx(struct xgbe_prv_data *pdata)
{
- struct xgbe_channel *channel;
unsigned int i;
/* Disable each Rx DMA channel */
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
- if (!channel->rx_ring)
+ for (i = 0; i < pdata->channel_count; i++) {
+ if (!pdata->channel[i]->rx_ring)
break;
- XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, SR, 0);
+ XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, SR, 0);
}
}
@@ -3420,9 +3379,7 @@ static int xgbe_init(struct xgbe_prv_data *pdata)
xgbe_config_dma_bus(pdata);
xgbe_config_dma_cache(pdata);
xgbe_config_osp_mode(pdata);
- xgbe_config_pblx8(pdata);
- xgbe_config_tx_pbl_val(pdata);
- xgbe_config_rx_pbl_val(pdata);
+ xgbe_config_pbl_val(pdata);
xgbe_config_rx_coalesce(pdata);
xgbe_config_tx_coalesce(pdata);
xgbe_config_rx_buffer_size(pdata);
@@ -3550,13 +3507,6 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if)
/* For TX DMA Operating on Second Frame config */
hw_if->config_osp_mode = xgbe_config_osp_mode;
- /* For RX and TX PBL config */
- hw_if->config_rx_pbl_val = xgbe_config_rx_pbl_val;
- hw_if->get_rx_pbl_val = xgbe_get_rx_pbl_val;
- hw_if->config_tx_pbl_val = xgbe_config_tx_pbl_val;
- hw_if->get_tx_pbl_val = xgbe_get_tx_pbl_val;
- hw_if->config_pblx8 = xgbe_config_pblx8;
-
/* For MMC statistics support */
hw_if->tx_mmc_int = xgbe_tx_mmc_int;
hw_if->rx_mmc_int = xgbe_rx_mmc_int;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index a934bd5d0507..ecef3ee87b17 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -158,81 +158,106 @@ static int xgbe_one_poll(struct napi_struct *, int);
static int xgbe_all_poll(struct napi_struct *, int);
static void xgbe_stop(struct xgbe_prv_data *);
+static void *xgbe_alloc_node(size_t size, int node)
+{
+ void *mem;
+
+ mem = kzalloc_node(size, GFP_KERNEL, node);
+ if (!mem)
+ mem = kzalloc(size, GFP_KERNEL);
+
+ return mem;
+}
+
+static void xgbe_free_channels(struct xgbe_prv_data *pdata)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(pdata->channel); i++) {
+ if (!pdata->channel[i])
+ continue;
+
+ kfree(pdata->channel[i]->rx_ring);
+ kfree(pdata->channel[i]->tx_ring);
+ kfree(pdata->channel[i]);
+
+ pdata->channel[i] = NULL;
+ }
+
+ pdata->channel_count = 0;
+}
+
static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
{
- struct xgbe_channel *channel_mem, *channel;
- struct xgbe_ring *tx_ring, *rx_ring;
+ struct xgbe_channel *channel;
+ struct xgbe_ring *ring;
unsigned int count, i;
- int ret = -ENOMEM;
+ unsigned int cpu;
+ int node;
count = max_t(unsigned int, pdata->tx_ring_count, pdata->rx_ring_count);
+ for (i = 0; i < count; i++) {
+ /* Attempt to use a CPU on the node the device is on */
+ cpu = cpumask_local_spread(i, dev_to_node(pdata->dev));
- channel_mem = kcalloc(count, sizeof(struct xgbe_channel), GFP_KERNEL);
- if (!channel_mem)
- goto err_channel;
-
- tx_ring = kcalloc(pdata->tx_ring_count, sizeof(struct xgbe_ring),
- GFP_KERNEL);
- if (!tx_ring)
- goto err_tx_ring;
+ /* Set the allocation node based on the returned CPU */
+ node = cpu_to_node(cpu);
- rx_ring = kcalloc(pdata->rx_ring_count, sizeof(struct xgbe_ring),
- GFP_KERNEL);
- if (!rx_ring)
- goto err_rx_ring;
+ channel = xgbe_alloc_node(sizeof(*channel), node);
+ if (!channel)
+ goto err_mem;
+ pdata->channel[i] = channel;
- for (i = 0, channel = channel_mem; i < count; i++, channel++) {
snprintf(channel->name, sizeof(channel->name), "channel-%u", i);
channel->pdata = pdata;
channel->queue_index = i;
channel->dma_regs = pdata->xgmac_regs + DMA_CH_BASE +
(DMA_CH_INC * i);
+ channel->node = node;
+ cpumask_set_cpu(cpu, &channel->affinity_mask);
if (pdata->per_channel_irq)
channel->dma_irq = pdata->channel_irq[i];
if (i < pdata->tx_ring_count) {
- spin_lock_init(&tx_ring->lock);
- channel->tx_ring = tx_ring++;
+ ring = xgbe_alloc_node(sizeof(*ring), node);
+ if (!ring)
+ goto err_mem;
+
+ spin_lock_init(&ring->lock);
+ ring->node = node;
+
+ channel->tx_ring = ring;
}
if (i < pdata->rx_ring_count) {
- spin_lock_init(&rx_ring->lock);
- channel->rx_ring = rx_ring++;
+ ring = xgbe_alloc_node(sizeof(*ring), node);
+ if (!ring)
+ goto err_mem;
+
+ spin_lock_init(&ring->lock);
+ ring->node = node;
+
+ channel->rx_ring = ring;
}
netif_dbg(pdata, drv, pdata->netdev,
+ "%s: cpu=%u, node=%d\n", channel->name, cpu, node);
+
+ netif_dbg(pdata, drv, pdata->netdev,
"%s: dma_regs=%p, dma_irq=%d, tx=%p, rx=%p\n",
channel->name, channel->dma_regs, channel->dma_irq,
channel->tx_ring, channel->rx_ring);
}
- pdata->channel = channel_mem;
pdata->channel_count = count;
return 0;
-err_rx_ring:
- kfree(tx_ring);
-
-err_tx_ring:
- kfree(channel_mem);
-
-err_channel:
- return ret;
-}
-
-static void xgbe_free_channels(struct xgbe_prv_data *pdata)
-{
- if (!pdata->channel)
- return;
-
- kfree(pdata->channel->rx_ring);
- kfree(pdata->channel->tx_ring);
- kfree(pdata->channel);
+err_mem:
+ xgbe_free_channels(pdata);
- pdata->channel = NULL;
- pdata->channel_count = 0;
+ return -ENOMEM;
}
static inline unsigned int xgbe_tx_avail_desc(struct xgbe_ring *ring)
@@ -301,12 +326,10 @@ static void xgbe_enable_rx_tx_int(struct xgbe_prv_data *pdata,
static void xgbe_enable_rx_tx_ints(struct xgbe_prv_data *pdata)
{
- struct xgbe_channel *channel;
unsigned int i;
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++)
- xgbe_enable_rx_tx_int(pdata, channel);
+ for (i = 0; i < pdata->channel_count; i++)
+ xgbe_enable_rx_tx_int(pdata, pdata->channel[i]);
}
static void xgbe_disable_rx_tx_int(struct xgbe_prv_data *pdata,
@@ -329,12 +352,10 @@ static void xgbe_disable_rx_tx_int(struct xgbe_prv_data *pdata,
static void xgbe_disable_rx_tx_ints(struct xgbe_prv_data *pdata)
{
- struct xgbe_channel *channel;
unsigned int i;
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++)
- xgbe_disable_rx_tx_int(pdata, channel);
+ for (i = 0; i < pdata->channel_count; i++)
+ xgbe_disable_rx_tx_int(pdata, pdata->channel[i]);
}
static bool xgbe_ecc_sec(struct xgbe_prv_data *pdata, unsigned long *period,
@@ -382,9 +403,9 @@ static bool xgbe_ecc_ded(struct xgbe_prv_data *pdata, unsigned long *period,
return false;
}
-static irqreturn_t xgbe_ecc_isr(int irq, void *data)
+static void xgbe_ecc_isr_task(unsigned long data)
{
- struct xgbe_prv_data *pdata = data;
+ struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
unsigned int ecc_isr;
bool stop = false;
@@ -435,12 +456,26 @@ out:
/* Clear all ECC interrupts */
XP_IOWRITE(pdata, XP_ECC_ISR, ecc_isr);
- return IRQ_HANDLED;
+ /* Reissue interrupt if status is not clear */
+ if (pdata->vdata->irq_reissue_support)
+ XP_IOWRITE(pdata, XP_INT_REISSUE_EN, 1 << 1);
}
-static irqreturn_t xgbe_isr(int irq, void *data)
+static irqreturn_t xgbe_ecc_isr(int irq, void *data)
{
struct xgbe_prv_data *pdata = data;
+
+ if (pdata->isr_as_tasklet)
+ tasklet_schedule(&pdata->tasklet_ecc);
+ else
+ xgbe_ecc_isr_task((unsigned long)pdata);
+
+ return IRQ_HANDLED;
+}
+
+static void xgbe_isr_task(unsigned long data)
+{
+ struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
struct xgbe_hw_if *hw_if = &pdata->hw_if;
struct xgbe_channel *channel;
unsigned int dma_isr, dma_ch_isr;
@@ -461,7 +496,7 @@ static irqreturn_t xgbe_isr(int irq, void *data)
if (!(dma_isr & (1 << i)))
continue;
- channel = pdata->channel + i;
+ channel = pdata->channel[i];
dma_ch_isr = XGMAC_DMA_IOREAD(channel, DMA_CH_SR);
netif_dbg(pdata, intr, pdata->netdev, "DMA_CH%u_ISR=%#010x\n",
@@ -543,15 +578,36 @@ static irqreturn_t xgbe_isr(int irq, void *data)
isr_done:
/* If there is not a separate AN irq, handle it here */
if (pdata->dev_irq == pdata->an_irq)
- pdata->phy_if.an_isr(irq, pdata);
+ pdata->phy_if.an_isr(pdata);
/* If there is not a separate ECC irq, handle it here */
if (pdata->vdata->ecc_support && (pdata->dev_irq == pdata->ecc_irq))
- xgbe_ecc_isr(irq, pdata);
+ xgbe_ecc_isr_task((unsigned long)pdata);
/* If there is not a separate I2C irq, handle it here */
if (pdata->vdata->i2c_support && (pdata->dev_irq == pdata->i2c_irq))
- pdata->i2c_if.i2c_isr(irq, pdata);
+ pdata->i2c_if.i2c_isr(pdata);
+
+ /* Reissue interrupt if status is not clear */
+ if (pdata->vdata->irq_reissue_support) {
+ unsigned int reissue_mask;
+
+ reissue_mask = 1 << 0;
+ if (!pdata->per_channel_irq)
+ reissue_mask |= 0xffff < 4;
+
+ XP_IOWRITE(pdata, XP_INT_REISSUE_EN, reissue_mask);
+ }
+}
+
+static irqreturn_t xgbe_isr(int irq, void *data)
+{
+ struct xgbe_prv_data *pdata = data;
+
+ if (pdata->isr_as_tasklet)
+ tasklet_schedule(&pdata->tasklet_dev);
+ else
+ xgbe_isr_task((unsigned long)pdata);
return IRQ_HANDLED;
}
@@ -640,8 +696,8 @@ static void xgbe_init_timers(struct xgbe_prv_data *pdata)
setup_timer(&pdata->service_timer, xgbe_service_timer,
(unsigned long)pdata);
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
+ for (i = 0; i < pdata->channel_count; i++) {
+ channel = pdata->channel[i];
if (!channel->tx_ring)
break;
@@ -662,8 +718,8 @@ static void xgbe_stop_timers(struct xgbe_prv_data *pdata)
del_timer_sync(&pdata->service_timer);
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
+ for (i = 0; i < pdata->channel_count; i++) {
+ channel = pdata->channel[i];
if (!channel->tx_ring)
break;
@@ -781,8 +837,8 @@ static void xgbe_napi_enable(struct xgbe_prv_data *pdata, unsigned int add)
unsigned int i;
if (pdata->per_channel_irq) {
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
+ for (i = 0; i < pdata->channel_count; i++) {
+ channel = pdata->channel[i];
if (add)
netif_napi_add(pdata->netdev, &channel->napi,
xgbe_one_poll, NAPI_POLL_WEIGHT);
@@ -804,8 +860,8 @@ static void xgbe_napi_disable(struct xgbe_prv_data *pdata, unsigned int del)
unsigned int i;
if (pdata->per_channel_irq) {
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
+ for (i = 0; i < pdata->channel_count; i++) {
+ channel = pdata->channel[i];
napi_disable(&channel->napi);
if (del)
@@ -826,6 +882,10 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
unsigned int i;
int ret;
+ tasklet_init(&pdata->tasklet_dev, xgbe_isr_task, (unsigned long)pdata);
+ tasklet_init(&pdata->tasklet_ecc, xgbe_ecc_isr_task,
+ (unsigned long)pdata);
+
ret = devm_request_irq(pdata->dev, pdata->dev_irq, xgbe_isr, 0,
netdev->name, pdata);
if (ret) {
@@ -847,8 +907,8 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
if (!pdata->per_channel_irq)
return 0;
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
+ for (i = 0; i < pdata->channel_count; i++) {
+ channel = pdata->channel[i];
snprintf(channel->dma_irq_name,
sizeof(channel->dma_irq_name) - 1,
"%s-TxRx-%u", netdev_name(netdev),
@@ -862,14 +922,21 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
channel->dma_irq);
goto err_dma_irq;
}
+
+ irq_set_affinity_hint(channel->dma_irq,
+ &channel->affinity_mask);
}
return 0;
err_dma_irq:
/* Using an unsigned int, 'i' will go to UINT_MAX and exit */
- for (i--, channel--; i < pdata->channel_count; i--, channel--)
+ for (i--; i < pdata->channel_count; i--) {
+ channel = pdata->channel[i];
+
+ irq_set_affinity_hint(channel->dma_irq, NULL);
devm_free_irq(pdata->dev, channel->dma_irq, channel);
+ }
if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq))
devm_free_irq(pdata->dev, pdata->ecc_irq, pdata);
@@ -893,9 +960,12 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata)
if (!pdata->per_channel_irq)
return;
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++)
+ for (i = 0; i < pdata->channel_count; i++) {
+ channel = pdata->channel[i];
+
+ irq_set_affinity_hint(channel->dma_irq, NULL);
devm_free_irq(pdata->dev, channel->dma_irq, channel);
+ }
}
void xgbe_init_tx_coalesce(struct xgbe_prv_data *pdata)
@@ -930,16 +1000,14 @@ void xgbe_init_rx_coalesce(struct xgbe_prv_data *pdata)
static void xgbe_free_tx_data(struct xgbe_prv_data *pdata)
{
struct xgbe_desc_if *desc_if = &pdata->desc_if;
- struct xgbe_channel *channel;
struct xgbe_ring *ring;
struct xgbe_ring_data *rdata;
unsigned int i, j;
DBGPR("-->xgbe_free_tx_data\n");
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
- ring = channel->tx_ring;
+ for (i = 0; i < pdata->channel_count; i++) {
+ ring = pdata->channel[i]->tx_ring;
if (!ring)
break;
@@ -955,16 +1023,14 @@ static void xgbe_free_tx_data(struct xgbe_prv_data *pdata)
static void xgbe_free_rx_data(struct xgbe_prv_data *pdata)
{
struct xgbe_desc_if *desc_if = &pdata->desc_if;
- struct xgbe_channel *channel;
struct xgbe_ring *ring;
struct xgbe_ring_data *rdata;
unsigned int i, j;
DBGPR("-->xgbe_free_rx_data\n");
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
- ring = channel->rx_ring;
+ for (i = 0; i < pdata->channel_count; i++) {
+ ring = pdata->channel[i]->rx_ring;
if (!ring)
break;
@@ -1140,8 +1206,8 @@ static void xgbe_stop(struct xgbe_prv_data *pdata)
hw_if->exit(pdata);
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
+ for (i = 0; i < pdata->channel_count; i++) {
+ channel = pdata->channel[i];
if (!channel->tx_ring)
continue;
@@ -1212,6 +1278,10 @@ static void xgbe_tx_tstamp(struct work_struct *work)
u64 nsec;
unsigned long flags;
+ spin_lock_irqsave(&pdata->tstamp_lock, flags);
+ if (!pdata->tx_tstamp_skb)
+ goto unlock;
+
if (pdata->tx_tstamp) {
nsec = timecounter_cyc2time(&pdata->tstamp_tc,
pdata->tx_tstamp);
@@ -1223,8 +1293,9 @@ static void xgbe_tx_tstamp(struct work_struct *work)
dev_kfree_skb_any(pdata->tx_tstamp_skb);
- spin_lock_irqsave(&pdata->tstamp_lock, flags);
pdata->tx_tstamp_skb = NULL;
+
+unlock:
spin_unlock_irqrestore(&pdata->tstamp_lock, flags);
}
@@ -1623,7 +1694,7 @@ static int xgbe_xmit(struct sk_buff *skb, struct net_device *netdev)
DBGPR("-->xgbe_xmit: skb->len = %d\n", skb->len);
- channel = pdata->channel + skb->queue_mapping;
+ channel = pdata->channel[skb->queue_mapping];
txq = netdev_get_tx_queue(netdev, channel->queue_index);
ring = channel->tx_ring;
packet = &ring->packet_data;
@@ -1833,9 +1904,10 @@ static void xgbe_poll_controller(struct net_device *netdev)
DBGPR("-->xgbe_poll_controller\n");
if (pdata->per_channel_irq) {
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++)
+ for (i = 0; i < pdata->channel_count; i++) {
+ channel = pdata->channel[i];
xgbe_dma_isr(channel->dma_irq, channel);
+ }
} else {
disable_irq(pdata->dev_irq);
xgbe_isr(pdata->dev_irq, pdata);
@@ -2328,8 +2400,9 @@ static int xgbe_all_poll(struct napi_struct *napi, int budget)
do {
last_processed = processed;
- channel = pdata->channel;
- for (i = 0; i < pdata->channel_count; i++, channel++) {
+ for (i = 0; i < pdata->channel_count; i++) {
+ channel = pdata->channel[i];
+
/* Cleanup Tx ring first */
xgbe_tx_poll(channel);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index 920566a3a599..67a2e52ad25d 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -247,7 +247,7 @@ static int xgbe_set_pauseparam(struct net_device *netdev,
if (pause->autoneg && (pdata->phy.autoneg != AUTONEG_ENABLE)) {
netdev_err(netdev,
- "autoneg disabled, pause autoneg not avialable\n");
+ "autoneg disabled, pause autoneg not available\n");
return -EINVAL;
}
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
index 417bdb5982a9..4d9062d35930 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
@@ -274,13 +274,16 @@ static void xgbe_i2c_clear_isr_interrupts(struct xgbe_prv_data *pdata,
XI2C_IOREAD(pdata, IC_CLR_STOP_DET);
}
-static irqreturn_t xgbe_i2c_isr(int irq, void *data)
+static void xgbe_i2c_isr_task(unsigned long data)
{
struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
struct xgbe_i2c_op_state *state = &pdata->i2c.op_state;
unsigned int isr;
isr = XI2C_IOREAD(pdata, IC_RAW_INTR_STAT);
+ if (!isr)
+ goto reissue_check;
+
netif_dbg(pdata, intr, pdata->netdev,
"I2C interrupt received: status=%#010x\n", isr);
@@ -308,6 +311,21 @@ out:
if (state->ret || XI2C_GET_BITS(isr, IC_RAW_INTR_STAT, STOP_DET))
complete(&pdata->i2c_complete);
+reissue_check:
+ /* Reissue interrupt if status is not clear */
+ if (pdata->vdata->irq_reissue_support)
+ XP_IOWRITE(pdata, XP_INT_REISSUE_EN, 1 << 2);
+}
+
+static irqreturn_t xgbe_i2c_isr(int irq, void *data)
+{
+ struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+
+ if (pdata->isr_as_tasklet)
+ tasklet_schedule(&pdata->tasklet_i2c);
+ else
+ xgbe_i2c_isr_task((unsigned long)pdata);
+
return IRQ_HANDLED;
}
@@ -349,12 +367,11 @@ static void xgbe_i2c_set_target(struct xgbe_prv_data *pdata, unsigned int addr)
XI2C_IOWRITE(pdata, IC_TAR, addr);
}
-static irqreturn_t xgbe_i2c_combined_isr(int irq, struct xgbe_prv_data *pdata)
+static irqreturn_t xgbe_i2c_combined_isr(struct xgbe_prv_data *pdata)
{
- if (!XI2C_IOREAD(pdata, IC_RAW_INTR_STAT))
- return IRQ_HANDLED;
+ xgbe_i2c_isr_task((unsigned long)pdata);
- return xgbe_i2c_isr(irq, pdata);
+ return IRQ_HANDLED;
}
static int xgbe_i2c_xfer(struct xgbe_prv_data *pdata, struct xgbe_i2c_op *op)
@@ -445,6 +462,9 @@ static int xgbe_i2c_start(struct xgbe_prv_data *pdata)
/* If we have a separate I2C irq, enable it */
if (pdata->dev_irq != pdata->i2c_irq) {
+ tasklet_init(&pdata->tasklet_i2c, xgbe_i2c_isr_task,
+ (unsigned long)pdata);
+
ret = devm_request_irq(pdata->dev, pdata->i2c_irq,
xgbe_i2c_isr, 0, pdata->i2c_name,
pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index 17ac8f9a51a0..500147d9e3c8 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -140,14 +140,16 @@ static void xgbe_default_config(struct xgbe_prv_data *pdata)
{
DBGPR("-->xgbe_default_config\n");
- pdata->pblx8 = DMA_PBL_X8_ENABLE;
+ pdata->blen = DMA_SBMR_BLEN_64;
+ pdata->pbl = DMA_PBL_128;
+ pdata->aal = 1;
+ pdata->rd_osr_limit = 8;
+ pdata->wr_osr_limit = 8;
pdata->tx_sf_mode = MTL_TSF_ENABLE;
pdata->tx_threshold = MTL_TX_THRESHOLD_64;
- pdata->tx_pbl = DMA_PBL_16;
pdata->tx_osp_mode = DMA_OSP_ENABLE;
pdata->rx_sf_mode = MTL_RSF_DISABLE;
pdata->rx_threshold = MTL_RX_THRESHOLD_64;
- pdata->rx_pbl = DMA_PBL_16;
pdata->pause_autoneg = 1;
pdata->tx_pause = 1;
pdata->rx_pause = 1;
@@ -277,7 +279,11 @@ int xgbe_config_netdev(struct xgbe_prv_data *pdata)
pdata->desc_ded_period = jiffies;
/* Issue software reset to device */
- pdata->hw_if.exit(pdata);
+ ret = pdata->hw_if.exit(pdata);
+ if (ret) {
+ dev_err(dev, "software reset failed\n");
+ return ret;
+ }
/* Set default configuration data */
xgbe_default_config(pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index b672d9249539..80684914dd8a 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -665,6 +665,10 @@ static void xgbe_an37_isr(struct xgbe_prv_data *pdata)
} else {
/* Enable AN interrupts */
xgbe_an37_enable_interrupts(pdata);
+
+ /* Reissue interrupt if status is not clear */
+ if (pdata->vdata->irq_reissue_support)
+ XP_IOWRITE(pdata, XP_INT_REISSUE_EN, 1 << 3);
}
}
@@ -684,10 +688,14 @@ static void xgbe_an73_isr(struct xgbe_prv_data *pdata)
} else {
/* Enable AN interrupts */
xgbe_an73_enable_interrupts(pdata);
+
+ /* Reissue interrupt if status is not clear */
+ if (pdata->vdata->irq_reissue_support)
+ XP_IOWRITE(pdata, XP_INT_REISSUE_EN, 1 << 3);
}
}
-static irqreturn_t xgbe_an_isr(int irq, void *data)
+static void xgbe_an_isr_task(unsigned long data)
{
struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
@@ -705,13 +713,25 @@ static irqreturn_t xgbe_an_isr(int irq, void *data)
default:
break;
}
+}
+
+static irqreturn_t xgbe_an_isr(int irq, void *data)
+{
+ struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+
+ if (pdata->isr_as_tasklet)
+ tasklet_schedule(&pdata->tasklet_an);
+ else
+ xgbe_an_isr_task((unsigned long)pdata);
return IRQ_HANDLED;
}
-static irqreturn_t xgbe_an_combined_isr(int irq, struct xgbe_prv_data *pdata)
+static irqreturn_t xgbe_an_combined_isr(struct xgbe_prv_data *pdata)
{
- return xgbe_an_isr(irq, pdata);
+ xgbe_an_isr_task((unsigned long)pdata);
+
+ return IRQ_HANDLED;
}
static void xgbe_an_irq_work(struct work_struct *work)
@@ -915,6 +935,10 @@ static void xgbe_an_state_machine(struct work_struct *work)
break;
}
+ /* Reissue interrupt if status is not clear */
+ if (pdata->vdata->irq_reissue_support)
+ XP_IOWRITE(pdata, XP_INT_REISSUE_EN, 1 << 3);
+
mutex_unlock(&pdata->an_mutex);
}
@@ -1379,6 +1403,9 @@ static int xgbe_phy_start(struct xgbe_prv_data *pdata)
/* If we have a separate AN irq, enable it */
if (pdata->dev_irq != pdata->an_irq) {
+ tasklet_init(&pdata->tasklet_an, xgbe_an_isr_task,
+ (unsigned long)pdata);
+
ret = devm_request_irq(pdata->dev, pdata->an_irq,
xgbe_an_isr, 0, pdata->an_name,
pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
index 38392a520725..1e56ad7bd9a5 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
@@ -139,6 +139,7 @@ static int xgbe_config_multi_msi(struct xgbe_prv_data *pdata)
return ret;
}
+ pdata->isr_as_tasklet = 1;
pdata->irq_count = ret;
pdata->dev_irq = pci_irq_vector(pdata->pcidev, 0);
@@ -175,6 +176,7 @@ static int xgbe_config_irqs(struct xgbe_prv_data *pdata)
return ret;
}
+ pdata->isr_as_tasklet = pdata->pcidev->msi_enabled ? 1 : 0;
pdata->irq_count = 1;
pdata->channel_irq_count = 1;
@@ -325,9 +327,9 @@ static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
/* Set the DMA coherency values */
pdata->coherent = 1;
- pdata->axdomain = XGBE_DMA_OS_AXDOMAIN;
- pdata->arcache = XGBE_DMA_OS_ARCACHE;
- pdata->awcache = XGBE_DMA_OS_AWCACHE;
+ pdata->arcr = XGBE_DMA_PCI_ARCR;
+ pdata->awcr = XGBE_DMA_PCI_AWCR;
+ pdata->awarcr = XGBE_DMA_PCI_AWARCR;
/* Set the maximum channels and queues */
reg = XP_IOREAD(pdata, XP_PROP_1);
@@ -445,6 +447,9 @@ static const struct xgbe_version_data xgbe_v2a = {
.tx_tstamp_workaround = 1,
.ecc_support = 1,
.i2c_support = 1,
+ .irq_reissue_support = 1,
+ .tx_desc_prefetch = 5,
+ .rx_desc_prefetch = 5,
};
static const struct xgbe_version_data xgbe_v2b = {
@@ -456,6 +461,9 @@ static const struct xgbe_version_data xgbe_v2b = {
.tx_tstamp_workaround = 1,
.ecc_support = 1,
.i2c_support = 1,
+ .irq_reissue_support = 1,
+ .tx_desc_prefetch = 5,
+ .rx_desc_prefetch = 5,
};
static const struct pci_device_id xgbe_pci_table[] = {
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index e707c49cc55a..04b5c149caca 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -711,23 +711,39 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata)
{
struct xgbe_phy_data *phy_data = pdata->phy_data;
+ if (!phy_data->sfp_mod_absent && !phy_data->sfp_changed)
+ return;
+
+ pdata->phy.supported &= ~SUPPORTED_Autoneg;
+ pdata->phy.supported &= ~(SUPPORTED_Pause | SUPPORTED_Asym_Pause);
+ pdata->phy.supported &= ~SUPPORTED_TP;
+ pdata->phy.supported &= ~SUPPORTED_FIBRE;
+ pdata->phy.supported &= ~SUPPORTED_100baseT_Full;
+ pdata->phy.supported &= ~SUPPORTED_1000baseT_Full;
+ pdata->phy.supported &= ~SUPPORTED_10000baseT_Full;
+
if (phy_data->sfp_mod_absent) {
pdata->phy.speed = SPEED_UNKNOWN;
pdata->phy.duplex = DUPLEX_UNKNOWN;
pdata->phy.autoneg = AUTONEG_ENABLE;
+ pdata->phy.pause_autoneg = AUTONEG_ENABLE;
+
+ pdata->phy.supported |= SUPPORTED_Autoneg;
+ pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+ pdata->phy.supported |= SUPPORTED_TP;
+ pdata->phy.supported |= SUPPORTED_FIBRE;
+ if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100)
+ pdata->phy.supported |= SUPPORTED_100baseT_Full;
+ if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
+ pdata->phy.supported |= SUPPORTED_1000baseT_Full;
+ if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)
+ pdata->phy.supported |= SUPPORTED_10000baseT_Full;
+
pdata->phy.advertising = pdata->phy.supported;
return;
}
- pdata->phy.advertising &= ~ADVERTISED_Autoneg;
- pdata->phy.advertising &= ~ADVERTISED_TP;
- pdata->phy.advertising &= ~ADVERTISED_FIBRE;
- pdata->phy.advertising &= ~ADVERTISED_100baseT_Full;
- pdata->phy.advertising &= ~ADVERTISED_1000baseT_Full;
- pdata->phy.advertising &= ~ADVERTISED_10000baseT_Full;
- pdata->phy.advertising &= ~ADVERTISED_10000baseR_FEC;
-
switch (phy_data->sfp_base) {
case XGBE_SFP_BASE_1000_T:
case XGBE_SFP_BASE_1000_SX:
@@ -736,17 +752,25 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata)
pdata->phy.speed = SPEED_UNKNOWN;
pdata->phy.duplex = DUPLEX_UNKNOWN;
pdata->phy.autoneg = AUTONEG_ENABLE;
- pdata->phy.advertising |= ADVERTISED_Autoneg;
+ pdata->phy.pause_autoneg = AUTONEG_ENABLE;
+ pdata->phy.supported |= SUPPORTED_Autoneg;
+ pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
break;
case XGBE_SFP_BASE_10000_SR:
case XGBE_SFP_BASE_10000_LR:
case XGBE_SFP_BASE_10000_LRM:
case XGBE_SFP_BASE_10000_ER:
case XGBE_SFP_BASE_10000_CR:
- default:
pdata->phy.speed = SPEED_10000;
pdata->phy.duplex = DUPLEX_FULL;
pdata->phy.autoneg = AUTONEG_DISABLE;
+ pdata->phy.pause_autoneg = AUTONEG_DISABLE;
+ break;
+ default:
+ pdata->phy.speed = SPEED_UNKNOWN;
+ pdata->phy.duplex = DUPLEX_UNKNOWN;
+ pdata->phy.autoneg = AUTONEG_DISABLE;
+ pdata->phy.pause_autoneg = AUTONEG_DISABLE;
break;
}
@@ -754,36 +778,38 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata)
case XGBE_SFP_BASE_1000_T:
case XGBE_SFP_BASE_1000_CX:
case XGBE_SFP_BASE_10000_CR:
- pdata->phy.advertising |= ADVERTISED_TP;
+ pdata->phy.supported |= SUPPORTED_TP;
break;
default:
- pdata->phy.advertising |= ADVERTISED_FIBRE;
+ pdata->phy.supported |= SUPPORTED_FIBRE;
}
switch (phy_data->sfp_speed) {
case XGBE_SFP_SPEED_100_1000:
if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100)
- pdata->phy.advertising |= ADVERTISED_100baseT_Full;
+ pdata->phy.supported |= SUPPORTED_100baseT_Full;
if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
- pdata->phy.advertising |= ADVERTISED_1000baseT_Full;
+ pdata->phy.supported |= SUPPORTED_1000baseT_Full;
break;
case XGBE_SFP_SPEED_1000:
if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
- pdata->phy.advertising |= ADVERTISED_1000baseT_Full;
+ pdata->phy.supported |= SUPPORTED_1000baseT_Full;
break;
case XGBE_SFP_SPEED_10000:
if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)
- pdata->phy.advertising |= ADVERTISED_10000baseT_Full;
+ pdata->phy.supported |= SUPPORTED_10000baseT_Full;
break;
default:
/* Choose the fastest supported speed */
if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)
- pdata->phy.advertising |= ADVERTISED_10000baseT_Full;
+ pdata->phy.supported |= SUPPORTED_10000baseT_Full;
else if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
- pdata->phy.advertising |= ADVERTISED_1000baseT_Full;
+ pdata->phy.supported |= SUPPORTED_1000baseT_Full;
else if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100)
- pdata->phy.advertising |= ADVERTISED_100baseT_Full;
+ pdata->phy.supported |= SUPPORTED_100baseT_Full;
}
+
+ pdata->phy.advertising = pdata->phy.supported;
}
static bool xgbe_phy_sfp_bit_rate(struct xgbe_sfp_eeprom *sfp_eeprom,
@@ -1095,7 +1121,8 @@ static int xgbe_phy_sfp_read_eeprom(struct xgbe_prv_data *pdata)
ret = xgbe_phy_sfp_get_mux(pdata);
if (ret) {
- netdev_err(pdata->netdev, "I2C error setting SFP MUX\n");
+ dev_err_once(pdata->dev, "%s: I2C error setting SFP MUX\n",
+ netdev_name(pdata->netdev));
return ret;
}
@@ -1105,7 +1132,8 @@ static int xgbe_phy_sfp_read_eeprom(struct xgbe_prv_data *pdata)
&eeprom_addr, sizeof(eeprom_addr),
&sfp_eeprom, sizeof(sfp_eeprom));
if (ret) {
- netdev_err(pdata->netdev, "I2C error reading SFP EEPROM\n");
+ dev_err_once(pdata->dev, "%s: I2C error reading SFP EEPROM\n",
+ netdev_name(pdata->netdev));
goto put;
}
@@ -1164,7 +1192,8 @@ static void xgbe_phy_sfp_signals(struct xgbe_prv_data *pdata)
&gpio_reg, sizeof(gpio_reg),
gpio_ports, sizeof(gpio_ports));
if (ret) {
- netdev_err(pdata->netdev, "I2C error reading SFP GPIOs\n");
+ dev_err_once(pdata->dev, "%s: I2C error reading SFP GPIOs\n",
+ netdev_name(pdata->netdev));
return;
}
@@ -1694,19 +1723,25 @@ static void xgbe_phy_set_redrv_mode(struct xgbe_prv_data *pdata)
xgbe_phy_put_comm_ownership(pdata);
}
-static void xgbe_phy_start_ratechange(struct xgbe_prv_data *pdata)
+static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata,
+ unsigned int cmd, unsigned int sub_cmd)
{
- if (!XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS))
- return;
+ unsigned int s0 = 0;
+ unsigned int wait;
/* Log if a previous command did not complete */
- netif_dbg(pdata, link, pdata->netdev,
- "firmware mailbox not ready for command\n");
-}
+ if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS))
+ netif_dbg(pdata, link, pdata->netdev,
+ "firmware mailbox not ready for command\n");
-static void xgbe_phy_complete_ratechange(struct xgbe_prv_data *pdata)
-{
- unsigned int wait;
+ /* Construct the command */
+ XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, cmd);
+ XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, sub_cmd);
+
+ /* Issue the command */
+ XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
+ XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+ XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
/* Wait for command to complete */
wait = XGBE_RATECHANGE_COUNT;
@@ -1723,21 +1758,8 @@ static void xgbe_phy_complete_ratechange(struct xgbe_prv_data *pdata)
static void xgbe_phy_rrc(struct xgbe_prv_data *pdata)
{
- unsigned int s0;
-
- xgbe_phy_start_ratechange(pdata);
-
/* Receiver Reset Cycle */
- s0 = 0;
- XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 5);
- XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0);
-
- /* Call FW to make the change */
- XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
- XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
- XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
- xgbe_phy_complete_ratechange(pdata);
+ xgbe_phy_perform_ratechange(pdata, 5, 0);
netif_dbg(pdata, link, pdata->netdev, "receiver reset complete\n");
}
@@ -1746,14 +1768,8 @@ static void xgbe_phy_power_off(struct xgbe_prv_data *pdata)
{
struct xgbe_phy_data *phy_data = pdata->phy_data;
- xgbe_phy_start_ratechange(pdata);
-
- /* Call FW to make the change */
- XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, 0);
- XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
- XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
- xgbe_phy_complete_ratechange(pdata);
+ /* Power off */
+ xgbe_phy_perform_ratechange(pdata, 0, 0);
phy_data->cur_mode = XGBE_MODE_UNKNOWN;
@@ -1763,33 +1779,21 @@ static void xgbe_phy_power_off(struct xgbe_prv_data *pdata)
static void xgbe_phy_sfi_mode(struct xgbe_prv_data *pdata)
{
struct xgbe_phy_data *phy_data = pdata->phy_data;
- unsigned int s0;
xgbe_phy_set_redrv_mode(pdata);
- xgbe_phy_start_ratechange(pdata);
-
/* 10G/SFI */
- s0 = 0;
- XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 3);
if (phy_data->sfp_cable != XGBE_SFP_CABLE_PASSIVE) {
- XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0);
+ xgbe_phy_perform_ratechange(pdata, 3, 0);
} else {
if (phy_data->sfp_cable_len <= 1)
- XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 1);
+ xgbe_phy_perform_ratechange(pdata, 3, 1);
else if (phy_data->sfp_cable_len <= 3)
- XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 2);
+ xgbe_phy_perform_ratechange(pdata, 3, 2);
else
- XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3);
+ xgbe_phy_perform_ratechange(pdata, 3, 3);
}
- /* Call FW to make the change */
- XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
- XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
- XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
- xgbe_phy_complete_ratechange(pdata);
-
phy_data->cur_mode = XGBE_MODE_SFI;
netif_dbg(pdata, link, pdata->netdev, "10GbE SFI mode set\n");
@@ -1798,23 +1802,11 @@ static void xgbe_phy_sfi_mode(struct xgbe_prv_data *pdata)
static void xgbe_phy_x_mode(struct xgbe_prv_data *pdata)
{
struct xgbe_phy_data *phy_data = pdata->phy_data;
- unsigned int s0;
xgbe_phy_set_redrv_mode(pdata);
- xgbe_phy_start_ratechange(pdata);
-
/* 1G/X */
- s0 = 0;
- XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1);
- XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3);
-
- /* Call FW to make the change */
- XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
- XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
- XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
- xgbe_phy_complete_ratechange(pdata);
+ xgbe_phy_perform_ratechange(pdata, 1, 3);
phy_data->cur_mode = XGBE_MODE_X;
@@ -1824,23 +1816,11 @@ static void xgbe_phy_x_mode(struct xgbe_prv_data *pdata)
static void xgbe_phy_sgmii_1000_mode(struct xgbe_prv_data *pdata)
{
struct xgbe_phy_data *phy_data = pdata->phy_data;
- unsigned int s0;
xgbe_phy_set_redrv_mode(pdata);
- xgbe_phy_start_ratechange(pdata);
-
/* 1G/SGMII */
- s0 = 0;
- XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1);
- XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 2);
-
- /* Call FW to make the change */
- XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
- XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
- XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
- xgbe_phy_complete_ratechange(pdata);
+ xgbe_phy_perform_ratechange(pdata, 1, 2);
phy_data->cur_mode = XGBE_MODE_SGMII_1000;
@@ -1850,23 +1830,11 @@ static void xgbe_phy_sgmii_1000_mode(struct xgbe_prv_data *pdata)
static void xgbe_phy_sgmii_100_mode(struct xgbe_prv_data *pdata)
{
struct xgbe_phy_data *phy_data = pdata->phy_data;
- unsigned int s0;
xgbe_phy_set_redrv_mode(pdata);
- xgbe_phy_start_ratechange(pdata);
-
- /* 1G/SGMII */
- s0 = 0;
- XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1);
- XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 1);
-
- /* Call FW to make the change */
- XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
- XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
- XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
- xgbe_phy_complete_ratechange(pdata);
+ /* 100M/SGMII */
+ xgbe_phy_perform_ratechange(pdata, 1, 1);
phy_data->cur_mode = XGBE_MODE_SGMII_100;
@@ -1876,23 +1844,11 @@ static void xgbe_phy_sgmii_100_mode(struct xgbe_prv_data *pdata)
static void xgbe_phy_kr_mode(struct xgbe_prv_data *pdata)
{
struct xgbe_phy_data *phy_data = pdata->phy_data;
- unsigned int s0;
xgbe_phy_set_redrv_mode(pdata);
- xgbe_phy_start_ratechange(pdata);
-
/* 10G/KR */
- s0 = 0;
- XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 4);
- XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0);
-
- /* Call FW to make the change */
- XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
- XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
- XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
- xgbe_phy_complete_ratechange(pdata);
+ xgbe_phy_perform_ratechange(pdata, 4, 0);
phy_data->cur_mode = XGBE_MODE_KR;
@@ -1902,23 +1858,11 @@ static void xgbe_phy_kr_mode(struct xgbe_prv_data *pdata)
static void xgbe_phy_kx_2500_mode(struct xgbe_prv_data *pdata)
{
struct xgbe_phy_data *phy_data = pdata->phy_data;
- unsigned int s0;
xgbe_phy_set_redrv_mode(pdata);
- xgbe_phy_start_ratechange(pdata);
-
/* 2.5G/KX */
- s0 = 0;
- XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 2);
- XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0);
-
- /* Call FW to make the change */
- XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
- XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
- XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
- xgbe_phy_complete_ratechange(pdata);
+ xgbe_phy_perform_ratechange(pdata, 2, 0);
phy_data->cur_mode = XGBE_MODE_KX_2500;
@@ -1928,23 +1872,11 @@ static void xgbe_phy_kx_2500_mode(struct xgbe_prv_data *pdata)
static void xgbe_phy_kx_1000_mode(struct xgbe_prv_data *pdata)
{
struct xgbe_phy_data *phy_data = pdata->phy_data;
- unsigned int s0;
xgbe_phy_set_redrv_mode(pdata);
- xgbe_phy_start_ratechange(pdata);
-
/* 1G/KX */
- s0 = 0;
- XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1);
- XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3);
-
- /* Call FW to make the change */
- XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
- XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
- XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
- xgbe_phy_complete_ratechange(pdata);
+ xgbe_phy_perform_ratechange(pdata, 1, 3);
phy_data->cur_mode = XGBE_MODE_KX_1000;
@@ -2037,6 +1969,8 @@ static enum xgbe_mode xgbe_phy_get_baset_mode(struct xgbe_phy_data *phy_data,
return XGBE_MODE_SGMII_100;
case SPEED_1000:
return XGBE_MODE_SGMII_1000;
+ case SPEED_2500:
+ return XGBE_MODE_KX_2500;
case SPEED_10000:
return XGBE_MODE_KR;
default:
@@ -2180,6 +2114,9 @@ static bool xgbe_phy_use_baset_mode(struct xgbe_prv_data *pdata,
case XGBE_MODE_SGMII_1000:
return xgbe_phy_check_mode(pdata, mode,
ADVERTISED_1000baseT_Full);
+ case XGBE_MODE_KX_2500:
+ return xgbe_phy_check_mode(pdata, mode,
+ ADVERTISED_2500baseX_Full);
case XGBE_MODE_KR:
return xgbe_phy_check_mode(pdata, mode,
ADVERTISED_10000baseT_Full);
@@ -2210,6 +2147,8 @@ static bool xgbe_phy_use_sfp_mode(struct xgbe_prv_data *pdata,
return xgbe_phy_check_mode(pdata, mode,
ADVERTISED_1000baseT_Full);
case XGBE_MODE_SFI:
+ if (phy_data->sfp_mod_absent)
+ return true;
return xgbe_phy_check_mode(pdata, mode,
ADVERTISED_10000baseT_Full);
default:
@@ -2287,6 +2226,8 @@ static bool xgbe_phy_valid_speed_baset_mode(struct xgbe_phy_data *phy_data,
case SPEED_100:
case SPEED_1000:
return true;
+ case SPEED_2500:
+ return (phy_data->port_mode == XGBE_PORT_MODE_NBASE_T);
case SPEED_10000:
return (phy_data->port_mode == XGBE_PORT_MODE_10GBASE_T);
default:
@@ -3013,9 +2954,6 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) {
pdata->phy.supported |= SUPPORTED_10000baseT_Full;
phy_data->start_mode = XGBE_MODE_SFI;
- if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE)
- pdata->phy.supported |=
- SUPPORTED_10000baseR_FEC;
}
phy_data->phydev_mode = XGBE_MDIO_MODE_CL22;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
index 84d4c51cab8c..d0f3dfb88202 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
@@ -448,13 +448,11 @@ static int xgbe_platform_probe(struct platform_device *pdev)
}
pdata->coherent = (attr == DEV_DMA_COHERENT);
if (pdata->coherent) {
- pdata->axdomain = XGBE_DMA_OS_AXDOMAIN;
- pdata->arcache = XGBE_DMA_OS_ARCACHE;
- pdata->awcache = XGBE_DMA_OS_AWCACHE;
+ pdata->arcr = XGBE_DMA_OS_ARCR;
+ pdata->awcr = XGBE_DMA_OS_AWCR;
} else {
- pdata->axdomain = XGBE_DMA_SYS_AXDOMAIN;
- pdata->arcache = XGBE_DMA_SYS_ARCACHE;
- pdata->awcache = XGBE_DMA_SYS_AWCACHE;
+ pdata->arcr = XGBE_DMA_SYS_ARCR;
+ pdata->awcr = XGBE_DMA_SYS_AWCR;
}
/* Set the maximum fifo amounts */
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c b/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c
index a533a6cc2d53..d06d260cf1e2 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c
@@ -267,7 +267,7 @@ void xgbe_ptp_register(struct xgbe_prv_data *pdata)
ktime_to_ns(ktime_get_real()));
/* Disable all timestamping to start */
- XGMAC_IOWRITE(pdata, MAC_TCR, 0);
+ XGMAC_IOWRITE(pdata, MAC_TSCR, 0);
pdata->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
pdata->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
}
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index f9a24639f574..0938294f640a 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -128,6 +128,7 @@
#include <linux/net_tstamp.h>
#include <net/dcbnl.h>
#include <linux/completion.h>
+#include <linux/cpumask.h>
#define XGBE_DRV_NAME "amd-xgbe"
#define XGBE_DRV_VERSION "1.0.3"
@@ -163,14 +164,17 @@
#define XGBE_DMA_STOP_TIMEOUT 1
/* DMA cache settings - Outer sharable, write-back, write-allocate */
-#define XGBE_DMA_OS_AXDOMAIN 0x2
-#define XGBE_DMA_OS_ARCACHE 0xb
-#define XGBE_DMA_OS_AWCACHE 0xf
+#define XGBE_DMA_OS_ARCR 0x002b2b2b
+#define XGBE_DMA_OS_AWCR 0x2f2f2f2f
/* DMA cache settings - System, no caches used */
-#define XGBE_DMA_SYS_AXDOMAIN 0x3
-#define XGBE_DMA_SYS_ARCACHE 0x0
-#define XGBE_DMA_SYS_AWCACHE 0x0
+#define XGBE_DMA_SYS_ARCR 0x00303030
+#define XGBE_DMA_SYS_AWCR 0x30303030
+
+/* DMA cache settings - PCI device */
+#define XGBE_DMA_PCI_ARCR 0x00000003
+#define XGBE_DMA_PCI_AWCR 0x13131313
+#define XGBE_DMA_PCI_AWARCR 0x00000313
/* DMA channel interrupt modes */
#define XGBE_IRQ_MODE_EDGE 0
@@ -412,6 +416,7 @@ struct xgbe_ring {
/* Page allocation for RX buffers */
struct xgbe_page_alloc rx_hdr_pa;
struct xgbe_page_alloc rx_buf_pa;
+ int node;
/* Ring index values
* cur - Tx: index of descriptor to be used for current transfer
@@ -462,6 +467,9 @@ struct xgbe_channel {
struct xgbe_ring *tx_ring;
struct xgbe_ring *rx_ring;
+
+ int node;
+ cpumask_t affinity_mask;
} ____cacheline_aligned;
enum xgbe_state {
@@ -734,13 +742,6 @@ struct xgbe_hw_if {
/* For TX DMA Operate on Second Frame config */
int (*config_osp_mode)(struct xgbe_prv_data *);
- /* For RX and TX PBL config */
- int (*config_rx_pbl_val)(struct xgbe_prv_data *);
- int (*get_rx_pbl_val)(struct xgbe_prv_data *);
- int (*config_tx_pbl_val)(struct xgbe_prv_data *);
- int (*get_tx_pbl_val)(struct xgbe_prv_data *);
- int (*config_pblx8)(struct xgbe_prv_data *);
-
/* For MMC statistics */
void (*rx_mmc_int)(struct xgbe_prv_data *);
void (*tx_mmc_int)(struct xgbe_prv_data *);
@@ -837,7 +838,7 @@ struct xgbe_phy_if {
bool (*phy_valid_speed)(struct xgbe_prv_data *, int);
/* For single interrupt support */
- irqreturn_t (*an_isr)(int, struct xgbe_prv_data *);
+ irqreturn_t (*an_isr)(struct xgbe_prv_data *);
/* PHY implementation specific services */
struct xgbe_phy_impl_if phy_impl;
@@ -855,7 +856,7 @@ struct xgbe_i2c_if {
int (*i2c_xfer)(struct xgbe_prv_data *, struct xgbe_i2c_op *);
/* For single interrupt support */
- irqreturn_t (*i2c_isr)(int, struct xgbe_prv_data *);
+ irqreturn_t (*i2c_isr)(struct xgbe_prv_data *);
};
struct xgbe_desc_if {
@@ -924,6 +925,9 @@ struct xgbe_version_data {
unsigned int tx_tstamp_workaround;
unsigned int ecc_support;
unsigned int i2c_support;
+ unsigned int irq_reissue_support;
+ unsigned int tx_desc_prefetch;
+ unsigned int rx_desc_prefetch;
};
struct xgbe_prv_data {
@@ -1001,9 +1005,9 @@ struct xgbe_prv_data {
/* AXI DMA settings */
unsigned int coherent;
- unsigned int axdomain;
- unsigned int arcache;
- unsigned int awcache;
+ unsigned int arcr;
+ unsigned int awcr;
+ unsigned int awarcr;
/* Service routine support */
struct workqueue_struct *dev_workqueue;
@@ -1011,7 +1015,7 @@ struct xgbe_prv_data {
struct timer_list service_timer;
/* Rings for Tx/Rx on a DMA channel */
- struct xgbe_channel *channel;
+ struct xgbe_channel *channel[XGBE_MAX_DMA_CHANNELS];
unsigned int tx_max_channel_count;
unsigned int rx_max_channel_count;
unsigned int channel_count;
@@ -1026,19 +1030,21 @@ struct xgbe_prv_data {
unsigned int rx_q_count;
/* Tx/Rx common settings */
- unsigned int pblx8;
+ unsigned int blen;
+ unsigned int pbl;
+ unsigned int aal;
+ unsigned int rd_osr_limit;
+ unsigned int wr_osr_limit;
/* Tx settings */
unsigned int tx_sf_mode;
unsigned int tx_threshold;
- unsigned int tx_pbl;
unsigned int tx_osp_mode;
unsigned int tx_max_fifo_size;
/* Rx settings */
unsigned int rx_sf_mode;
unsigned int rx_threshold;
- unsigned int rx_pbl;
unsigned int rx_max_fifo_size;
/* Tx coalescing settings */
@@ -1159,6 +1165,12 @@ struct xgbe_prv_data {
unsigned int lpm_ctrl; /* CTRL1 for resume */
+ unsigned int isr_as_tasklet;
+ struct tasklet_struct tasklet_dev;
+ struct tasklet_struct tasklet_ecc;
+ struct tasklet_struct tasklet_i2c;
+ struct tasklet_struct tasklet_an;
+
#ifdef CONFIG_DEBUG_FS
struct dentry *xgbe_debugfs;
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 7e913d8331c3..8c9986f3fc01 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -2252,7 +2252,7 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
if (atl1c_tx_map(adapter, skb, tpd, type) < 0) {
netif_info(adapter, tx_done, adapter->netdev,
- "tx-skb droppted due to dma error\n");
+ "tx-skb dropped due to dma error\n");
/* roll back tpd/buffer */
atl1c_tx_rollback(adapter, tpd, type);
dev_kfree_skb_any(skb);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 14c236e5bdb1..c12b4d3e946e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12729,7 +12729,7 @@ static int bnx2x_set_mc_list(struct bnx2x *bp)
} else {
/* If no mc addresses are required, flush the configuration */
rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL);
- if (rc)
+ if (rc < 0)
BNX2X_ERR("Failed to clear multicast configuration %d\n",
rc);
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 11e8a866a312..a19f68f5862d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1311,10 +1311,11 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
cp_cons = NEXT_CMP(cp_cons);
}
- if (unlikely(agg_bufs > MAX_SKB_FRAGS)) {
+ if (unlikely(agg_bufs > MAX_SKB_FRAGS || TPA_END_ERRORS(tpa_end1))) {
bnxt_abort_tpa(bp, bnapi, cp_cons, agg_bufs);
- netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n",
- agg_bufs, (int)MAX_SKB_FRAGS);
+ if (agg_bufs > MAX_SKB_FRAGS)
+ netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n",
+ agg_bufs, (int)MAX_SKB_FRAGS);
return NULL;
}
@@ -1573,6 +1574,45 @@ next_rx_no_prod:
return rc;
}
+/* In netpoll mode, if we are using a combined completion ring, we need to
+ * discard the rx packets and recycle the buffers.
+ */
+static int bnxt_force_rx_discard(struct bnxt *bp, struct bnxt_napi *bnapi,
+ u32 *raw_cons, u8 *event)
+{
+ struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+ u32 tmp_raw_cons = *raw_cons;
+ struct rx_cmp_ext *rxcmp1;
+ struct rx_cmp *rxcmp;
+ u16 cp_cons;
+ u8 cmp_type;
+
+ cp_cons = RING_CMP(tmp_raw_cons);
+ rxcmp = (struct rx_cmp *)
+ &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+
+ tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons);
+ cp_cons = RING_CMP(tmp_raw_cons);
+ rxcmp1 = (struct rx_cmp_ext *)
+ &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+
+ if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons))
+ return -EBUSY;
+
+ cmp_type = RX_CMP_TYPE(rxcmp);
+ if (cmp_type == CMP_TYPE_RX_L2_CMP) {
+ rxcmp1->rx_cmp_cfa_code_errors_v2 |=
+ cpu_to_le32(RX_CMPL_ERRORS_CRC_ERROR);
+ } else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) {
+ struct rx_tpa_end_cmp_ext *tpa_end1;
+
+ tpa_end1 = (struct rx_tpa_end_cmp_ext *)rxcmp1;
+ tpa_end1->rx_tpa_end_cmp_errors_v2 |=
+ cpu_to_le32(RX_TPA_END_CMP_ERRORS);
+ }
+ return bnxt_rx_pkt(bp, bnapi, raw_cons, event);
+}
+
#define BNXT_GET_EVENT_PORT(data) \
((data) & \
ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_MASK)
@@ -1755,7 +1795,11 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
if (unlikely(tx_pkts > bp->tx_wake_thresh))
rx_pkts = budget;
} else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) {
- rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
+ if (likely(budget))
+ rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
+ else
+ rc = bnxt_force_rx_discard(bp, bnapi, &raw_cons,
+ &event);
if (likely(rc >= 0))
rx_pkts += rc;
else if (rc == -EBUSY) /* partial completion */
@@ -6730,12 +6774,11 @@ static void bnxt_poll_controller(struct net_device *dev)
struct bnxt *bp = netdev_priv(dev);
int i;
- for (i = 0; i < bp->cp_nr_rings; i++) {
- struct bnxt_irq *irq = &bp->irq_tbl[i];
+ /* Only process tx rings/combined rings in netpoll mode. */
+ for (i = 0; i < bp->tx_nr_rings; i++) {
+ struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
- disable_irq(irq->vector);
- irq->handler(irq->vector, bp->bnapi[i]);
- enable_irq(irq->vector);
+ napi_schedule(&txr->bnapi->napi);
}
}
#endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 5984423499e6..f872a7db2ca8 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -374,12 +374,16 @@ struct rx_tpa_end_cmp_ext {
__le32 rx_tpa_end_cmp_errors_v2;
#define RX_TPA_END_CMP_V2 (0x1 << 0)
- #define RX_TPA_END_CMP_ERRORS (0x7fff << 1)
+ #define RX_TPA_END_CMP_ERRORS (0x3 << 1)
#define RX_TPA_END_CMPL_ERRORS_SHIFT 1
u32 rx_tpa_end_cmp_start_opaque;
};
+#define TPA_END_ERRORS(rx_tpa_end_ext) \
+ ((rx_tpa_end_ext)->rx_tpa_end_cmp_errors_v2 & \
+ cpu_to_le32(RX_TPA_END_CMP_ERRORS))
+
#define DB_IDX_MASK 0xffffff
#define DB_IDX_VALID (0x1 << 26)
#define DB_IRQ_DIS (0x1 << 27)
diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig
index 608bea171956..427d65a1a126 100644
--- a/drivers/net/ethernet/cadence/Kconfig
+++ b/drivers/net/ethernet/cadence/Kconfig
@@ -29,7 +29,15 @@ config MACB
support for the MACB/GEM chip.
To compile this driver as a module, choose M here: the module
- will be called macb.
+ will be macb.
+
+config MACB_USE_HWSTAMP
+ bool "Use IEEE 1588 hwstamp"
+ depends on MACB
+ default y
+ imply PTP_1588_CLOCK
+ ---help---
+ Enable IEEE 1588 Precision Time Protocol (PTP) support for MACB.
config MACB_PCI
tristate "Cadence PCI MACB/GEM support"
diff --git a/drivers/net/ethernet/cadence/Makefile b/drivers/net/ethernet/cadence/Makefile
index 4ba75594d5c5..1d66ddb68969 100644
--- a/drivers/net/ethernet/cadence/Makefile
+++ b/drivers/net/ethernet/cadence/Makefile
@@ -1,6 +1,11 @@
#
# Makefile for the Atmel network device drivers.
#
+macb-y := macb_main.o
+
+ifeq ($(CONFIG_MACB_USE_HWSTAMP),y)
+macb-y += macb_ptp.o
+endif
obj-$(CONFIG_MACB) += macb.o
obj-$(CONFIG_MACB_PCI) += macb_pci.o
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 2510661102ba..c93f3a2dc6c1 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -11,6 +11,12 @@
#define _MACB_H
#include <linux/phy.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/net_tstamp.h>
+
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) || defined(CONFIG_MACB_USE_HWSTAMP)
+#define MACB_EXT_DESC
+#endif
#define MACB_GREGS_NBR 16
#define MACB_GREGS_VERSION 2
@@ -86,6 +92,10 @@
#define GEM_SA3T 0x009C /* Specific3 Top */
#define GEM_SA4B 0x00A0 /* Specific4 Bottom */
#define GEM_SA4T 0x00A4 /* Specific4 Top */
+#define GEM_EFTSH 0x00e8 /* PTP Event Frame Transmitted Seconds Register 47:32 */
+#define GEM_EFRSH 0x00ec /* PTP Event Frame Received Seconds Register 47:32 */
+#define GEM_PEFTSH 0x00f0 /* PTP Peer Event Frame Transmitted Seconds Register 47:32 */
+#define GEM_PEFRSH 0x00f4 /* PTP Peer Event Frame Received Seconds Register 47:32 */
#define GEM_OTX 0x0100 /* Octets transmitted */
#define GEM_OCTTXL 0x0100 /* Octets transmitted [31:0] */
#define GEM_OCTTXH 0x0104 /* Octets transmitted [47:32] */
@@ -155,6 +165,9 @@
#define GEM_DCFG6 0x0294 /* Design Config 6 */
#define GEM_DCFG7 0x0298 /* Design Config 7 */
+#define GEM_TXBDCTRL 0x04cc /* TX Buffer Descriptor control register */
+#define GEM_RXBDCTRL 0x04d0 /* RX Buffer Descriptor control register */
+
#define GEM_ISR(hw_q) (0x0400 + ((hw_q) << 2))
#define GEM_TBQP(hw_q) (0x0440 + ((hw_q) << 2))
#define GEM_TBQPH(hw_q) (0x04C8)
@@ -191,6 +204,8 @@
#define MACB_TZQ_OFFSET 12 /* Transmit zero quantum pause frame */
#define MACB_TZQ_SIZE 1
#define MACB_SRTSM_OFFSET 15
+#define MACB_OSSMODE_OFFSET 24 /* Enable One Step Synchro Mode */
+#define MACB_OSSMODE_SIZE 1
/* Bitfields in NCFGR */
#define MACB_SPD_OFFSET 0 /* Speed */
@@ -269,6 +284,10 @@
#define GEM_RXBS_SIZE 8
#define GEM_DDRP_OFFSET 24 /* disc_when_no_ahb */
#define GEM_DDRP_SIZE 1
+#define GEM_RXEXT_OFFSET 28 /* RX extended Buffer Descriptor mode */
+#define GEM_RXEXT_SIZE 1
+#define GEM_TXEXT_OFFSET 29 /* TX extended Buffer Descriptor mode */
+#define GEM_TXEXT_SIZE 1
#define GEM_ADDR64_OFFSET 30 /* Address bus width - 64b or 32b */
#define GEM_ADDR64_SIZE 1
@@ -425,6 +444,11 @@
#define GEM_TX_PKT_BUFF_OFFSET 21
#define GEM_TX_PKT_BUFF_SIZE 1
+
+/* Bitfields in DCFG5. */
+#define GEM_TSU_OFFSET 8
+#define GEM_TSU_SIZE 1
+
/* Bitfields in DCFG6. */
#define GEM_PBUF_LSO_OFFSET 27
#define GEM_PBUF_LSO_SIZE 1
@@ -439,6 +463,52 @@
#define GEM_NSINCR_OFFSET 0
#define GEM_NSINCR_SIZE 8
+/* Bitfields in TSH */
+#define GEM_TSH_OFFSET 0 /* TSU timer value (s). MSB [47:32] of seconds timer count */
+#define GEM_TSH_SIZE 16
+
+/* Bitfields in TSL */
+#define GEM_TSL_OFFSET 0 /* TSU timer value (s). LSB [31:0] of seconds timer count */
+#define GEM_TSL_SIZE 32
+
+/* Bitfields in TN */
+#define GEM_TN_OFFSET 0 /* TSU timer value (ns) */
+#define GEM_TN_SIZE 30
+
+/* Bitfields in TXBDCTRL */
+#define GEM_TXTSMODE_OFFSET 4 /* TX Descriptor Timestamp Insertion mode */
+#define GEM_TXTSMODE_SIZE 2
+
+/* Bitfields in RXBDCTRL */
+#define GEM_RXTSMODE_OFFSET 4 /* RX Descriptor Timestamp Insertion mode */
+#define GEM_RXTSMODE_SIZE 2
+
+/* Transmit DMA buffer descriptor Word 1 */
+#define GEM_DMA_TXVALID_OFFSET 23 /* timestamp has been captured in the Buffer Descriptor */
+#define GEM_DMA_TXVALID_SIZE 1
+
+/* Receive DMA buffer descriptor Word 0 */
+#define GEM_DMA_RXVALID_OFFSET 2 /* indicates a valid timestamp in the Buffer Descriptor */
+#define GEM_DMA_RXVALID_SIZE 1
+
+/* DMA buffer descriptor Word 2 (32 bit addressing) or Word 4 (64 bit addressing) */
+#define GEM_DMA_SECL_OFFSET 30 /* Timestamp seconds[1:0] */
+#define GEM_DMA_SECL_SIZE 2
+#define GEM_DMA_NSEC_OFFSET 0 /* Timestamp nanosecs [29:0] */
+#define GEM_DMA_NSEC_SIZE 30
+
+/* DMA buffer descriptor Word 3 (32 bit addressing) or Word 5 (64 bit addressing) */
+
+/* New hardware supports 12 bit precision of timestamp in DMA buffer descriptor.
+ * Old hardware supports only 6 bit precision but it is enough for PTP.
+ * Less accuracy is used always instead of checking hardware version.
+ */
+#define GEM_DMA_SECH_OFFSET 0 /* Timestamp seconds[5:2] */
+#define GEM_DMA_SECH_SIZE 4
+#define GEM_DMA_SEC_WIDTH (GEM_DMA_SECH_SIZE + GEM_DMA_SECL_SIZE)
+#define GEM_DMA_SEC_TOP (1 << GEM_DMA_SEC_WIDTH)
+#define GEM_DMA_SEC_MASK (GEM_DMA_SEC_TOP - 1)
+
/* Bitfields in ADJ */
#define GEM_ADDSUB_OFFSET 31
#define GEM_ADDSUB_SIZE 1
@@ -514,6 +584,8 @@
#define queue_readl(queue, reg) (queue)->bp->macb_reg_readl((queue)->bp, (queue)->reg)
#define queue_writel(queue, reg, value) (queue)->bp->macb_reg_writel((queue)->bp, (queue)->reg, (value))
+#define PTP_TS_BUFFER_SIZE 128 /* must be power of 2 */
+
/* Conditional GEM/MACB macros. These perform the operation to the correct
* register dependent on whether the device is a GEM or a MACB. For registers
* and bitfields that are common across both devices, use macb_{read,write}l
@@ -546,16 +618,26 @@ struct macb_dma_desc {
u32 ctrl;
};
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-enum macb_hw_dma_cap {
- HW_DMA_CAP_32B,
- HW_DMA_CAP_64B,
-};
+#ifdef MACB_EXT_DESC
+#define HW_DMA_CAP_32B 0
+#define HW_DMA_CAP_64B (1 << 0)
+#define HW_DMA_CAP_PTP (1 << 1)
+#define HW_DMA_CAP_64B_PTP (HW_DMA_CAP_64B | HW_DMA_CAP_PTP)
struct macb_dma_desc_64 {
u32 addrh;
u32 resvd;
};
+
+struct macb_dma_desc_ptp {
+ u32 ts_1;
+ u32 ts_2;
+};
+
+struct gem_tx_ts {
+ struct sk_buff *skb;
+ struct macb_dma_desc_ptp desc_ptp;
+};
#endif
/* DMA descriptor bitfields */
@@ -871,6 +953,11 @@ struct macb_config {
int jumbo_max_len;
};
+struct tsu_incr {
+ u32 sub_ns;
+ u32 ns;
+};
+
struct macb_queue {
struct macb *bp;
int irq;
@@ -887,6 +974,12 @@ struct macb_queue {
struct macb_tx_skb *tx_skb;
dma_addr_t tx_ring_dma;
struct work_struct tx_error_task;
+
+#ifdef CONFIG_MACB_USE_HWSTAMP
+ struct work_struct tx_ts_task;
+ unsigned int tx_ts_head, tx_ts_tail;
+ struct gem_tx_ts tx_timestamps[PTP_TS_BUFFER_SIZE];
+#endif
};
struct macb {
@@ -955,11 +1048,62 @@ struct macb {
u32 wol;
struct macb_ptp_info *ptp_info; /* macb-ptp interface */
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
- enum macb_hw_dma_cap hw_dma_cap;
+#ifdef MACB_EXT_DESC
+ uint8_t hw_dma_cap;
#endif
+ spinlock_t tsu_clk_lock; /* gem tsu clock locking */
+ unsigned int tsu_rate;
+ struct ptp_clock *ptp_clock;
+ struct ptp_clock_info ptp_clock_info;
+ struct tsu_incr tsu_incr;
+ struct hwtstamp_config tstamp_config;
};
+#ifdef CONFIG_MACB_USE_HWSTAMP
+#define GEM_TSEC_SIZE (GEM_TSH_SIZE + GEM_TSL_SIZE)
+#define TSU_SEC_MAX_VAL (((u64)1 << GEM_TSEC_SIZE) - 1)
+#define TSU_NSEC_MAX_VAL ((1 << GEM_TN_SIZE) - 1)
+
+enum macb_bd_control {
+ TSTAMP_DISABLED,
+ TSTAMP_FRAME_PTP_EVENT_ONLY,
+ TSTAMP_ALL_PTP_FRAMES,
+ TSTAMP_ALL_FRAMES,
+};
+
+void gem_ptp_init(struct net_device *ndev);
+void gem_ptp_remove(struct net_device *ndev);
+int gem_ptp_txstamp(struct macb_queue *queue, struct sk_buff *skb, struct macb_dma_desc *des);
+void gem_ptp_rxstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc);
+static inline int gem_ptp_do_txstamp(struct macb_queue *queue, struct sk_buff *skb, struct macb_dma_desc *desc)
+{
+ if (queue->bp->tstamp_config.tx_type == TSTAMP_DISABLED)
+ return -ENOTSUPP;
+
+ return gem_ptp_txstamp(queue, skb, desc);
+}
+
+static inline void gem_ptp_do_rxstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc)
+{
+ if (bp->tstamp_config.rx_filter == TSTAMP_DISABLED)
+ return;
+
+ gem_ptp_rxstamp(bp, skb, desc);
+}
+int gem_get_hwtst(struct net_device *dev, struct ifreq *rq);
+int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd);
+#else
+static inline void gem_ptp_init(struct net_device *ndev) { }
+static inline void gem_ptp_remove(struct net_device *ndev) { }
+
+static inline int gem_ptp_do_txstamp(struct macb_queue *queue, struct sk_buff *skb, struct macb_dma_desc *desc)
+{
+ return -1;
+}
+
+static inline void gem_ptp_do_rxstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc) { }
+#endif
+
static inline bool macb_is_gem(struct macb *bp)
{
return !!(bp->caps & MACB_CAPS_MACB_IS_GEM);
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb_main.c
index 3ae9d8071ded..41e5711544fc 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -79,33 +79,84 @@
#define MACB_HALT_TIMEOUT 1230
/* DMA buffer descriptor might be different size
- * depends on hardware configuration.
+ * depends on hardware configuration:
+ *
+ * 1. dma address width 32 bits:
+ * word 1: 32 bit address of Data Buffer
+ * word 2: control
+ *
+ * 2. dma address width 64 bits:
+ * word 1: 32 bit address of Data Buffer
+ * word 2: control
+ * word 3: upper 32 bit address of Data Buffer
+ * word 4: unused
+ *
+ * 3. dma address width 32 bits with hardware timestamping:
+ * word 1: 32 bit address of Data Buffer
+ * word 2: control
+ * word 3: timestamp word 1
+ * word 4: timestamp word 2
+ *
+ * 4. dma address width 64 bits with hardware timestamping:
+ * word 1: 32 bit address of Data Buffer
+ * word 2: control
+ * word 3: upper 32 bit address of Data Buffer
+ * word 4: unused
+ * word 5: timestamp word 1
+ * word 6: timestamp word 2
*/
static unsigned int macb_dma_desc_get_size(struct macb *bp)
{
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
- if (bp->hw_dma_cap == HW_DMA_CAP_64B)
- return sizeof(struct macb_dma_desc) + sizeof(struct macb_dma_desc_64);
+#ifdef MACB_EXT_DESC
+ unsigned int desc_size;
+
+ switch (bp->hw_dma_cap) {
+ case HW_DMA_CAP_64B:
+ desc_size = sizeof(struct macb_dma_desc)
+ + sizeof(struct macb_dma_desc_64);
+ break;
+ case HW_DMA_CAP_PTP:
+ desc_size = sizeof(struct macb_dma_desc)
+ + sizeof(struct macb_dma_desc_ptp);
+ break;
+ case HW_DMA_CAP_64B_PTP:
+ desc_size = sizeof(struct macb_dma_desc)
+ + sizeof(struct macb_dma_desc_64)
+ + sizeof(struct macb_dma_desc_ptp);
+ break;
+ default:
+ desc_size = sizeof(struct macb_dma_desc);
+ }
+ return desc_size;
#endif
return sizeof(struct macb_dma_desc);
}
-static unsigned int macb_adj_dma_desc_idx(struct macb *bp, unsigned int idx)
+static unsigned int macb_adj_dma_desc_idx(struct macb *bp, unsigned int desc_idx)
{
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
- /* Dma buffer descriptor is 4 words length (instead of 2 words)
- * for 64b GEM.
- */
- if (bp->hw_dma_cap == HW_DMA_CAP_64B)
- idx <<= 1;
+#ifdef MACB_EXT_DESC
+ switch (bp->hw_dma_cap) {
+ case HW_DMA_CAP_64B:
+ case HW_DMA_CAP_PTP:
+ desc_idx <<= 1;
+ break;
+ case HW_DMA_CAP_64B_PTP:
+ desc_idx *= 3;
+ break;
+ default:
+ break;
+ }
+ return desc_idx;
#endif
- return idx;
+ return desc_idx;
}
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
static struct macb_dma_desc_64 *macb_64b_desc(struct macb *bp, struct macb_dma_desc *desc)
{
- return (struct macb_dma_desc_64 *)((void *)desc + sizeof(struct macb_dma_desc));
+ if (bp->hw_dma_cap & HW_DMA_CAP_64B)
+ return (struct macb_dma_desc_64 *)((void *)desc + sizeof(struct macb_dma_desc));
+ return NULL;
}
#endif
@@ -621,7 +672,7 @@ static void macb_set_addr(struct macb *bp, struct macb_dma_desc *desc, dma_addr_
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
struct macb_dma_desc_64 *desc_64;
- if (bp->hw_dma_cap == HW_DMA_CAP_64B) {
+ if (bp->hw_dma_cap & HW_DMA_CAP_64B) {
desc_64 = macb_64b_desc(bp, desc);
desc_64->addrh = upper_32_bits(addr);
}
@@ -635,7 +686,7 @@ static dma_addr_t macb_get_addr(struct macb *bp, struct macb_dma_desc *desc)
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
struct macb_dma_desc_64 *desc_64;
- if (bp->hw_dma_cap == HW_DMA_CAP_64B) {
+ if (bp->hw_dma_cap & HW_DMA_CAP_64B) {
desc_64 = macb_64b_desc(bp, desc);
addr = ((u64)(desc_64->addrh) << 32);
}
@@ -734,7 +785,7 @@ static void macb_tx_error_task(struct work_struct *work)
/* Reinitialize the TX desc queue */
queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
- if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+ if (bp->hw_dma_cap & HW_DMA_CAP_64B)
queue_writel(queue, TBQPH, upper_32_bits(queue->tx_ring_dma));
#endif
/* Make TX ring reflect state of hardware */
@@ -796,6 +847,12 @@ static void macb_tx_interrupt(struct macb_queue *queue)
/* First, update TX stats if needed */
if (skb) {
+ if (gem_ptp_do_txstamp(queue, skb, desc) == 0) {
+ /* skb now belongs to timestamp buffer
+ * and will be removed later
+ */
+ tx_skb->skb = NULL;
+ }
netdev_vdbg(bp->dev, "skb %u (data %p) TX complete\n",
macb_tx_ring_wrap(bp, tail),
skb->data);
@@ -962,6 +1019,8 @@ static int gem_rx(struct macb *bp, int budget)
bp->dev->stats.rx_packets++;
bp->dev->stats.rx_bytes += skb->len;
+ gem_ptp_do_rxstamp(bp, skb, desc);
+
#if defined(DEBUG) && defined(VERBOSE_DEBUG)
netdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n",
skb->len, skb->csum);
@@ -1283,7 +1342,6 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
queue_writel(queue, ISR, MACB_BIT(HRESP));
}
-
status = queue_readl(queue, ISR);
}
@@ -1613,7 +1671,6 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
/* Make newly initialized descriptor visible to hardware */
wmb();
-
skb_tx_timestamp(skb);
macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
@@ -1942,9 +1999,13 @@ static void macb_configure_dma(struct macb *bp)
dmacfg &= ~GEM_BIT(TXCOEN);
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
- if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+ if (bp->hw_dma_cap & HW_DMA_CAP_64B)
dmacfg |= GEM_BIT(ADDR64);
#endif
+#ifdef CONFIG_MACB_USE_HWSTAMP
+ if (bp->hw_dma_cap & HW_DMA_CAP_PTP)
+ dmacfg |= GEM_BIT(RXEXT) | GEM_BIT(TXEXT);
+#endif
netdev_dbg(bp->dev, "Cadence configure DMA with 0x%08x\n",
dmacfg);
gem_writel(bp, DMACFG, dmacfg);
@@ -1992,13 +2053,13 @@ static void macb_init_hw(struct macb *bp)
/* Initialize TX and RX buffers */
macb_writel(bp, RBQP, lower_32_bits(bp->rx_ring_dma));
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
- if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+ if (bp->hw_dma_cap & HW_DMA_CAP_64B)
macb_writel(bp, RBQPH, upper_32_bits(bp->rx_ring_dma));
#endif
for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
- if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+ if (bp->hw_dma_cap & HW_DMA_CAP_64B)
queue_writel(queue, TBQPH, upper_32_bits(queue->tx_ring_dma));
#endif
@@ -2467,6 +2528,70 @@ static int macb_set_ringparam(struct net_device *netdev,
return 0;
}
+#ifdef CONFIG_MACB_USE_HWSTAMP
+static unsigned int gem_get_tsu_rate(struct macb *bp)
+{
+ struct clk *tsu_clk;
+ unsigned int tsu_rate;
+
+ tsu_clk = devm_clk_get(&bp->pdev->dev, "tsu_clk");
+ if (!IS_ERR(tsu_clk))
+ tsu_rate = clk_get_rate(tsu_clk);
+ /* try pclk instead */
+ else if (!IS_ERR(bp->pclk)) {
+ tsu_clk = bp->pclk;
+ tsu_rate = clk_get_rate(tsu_clk);
+ } else
+ return -ENOTSUPP;
+ return tsu_rate;
+}
+
+static s32 gem_get_ptp_max_adj(void)
+{
+ return 64000000;
+}
+
+static int gem_get_ts_info(struct net_device *dev,
+ struct ethtool_ts_info *info)
+{
+ struct macb *bp = netdev_priv(dev);
+
+ if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0) {
+ ethtool_op_get_ts_info(dev, info);
+ return 0;
+ }
+
+ info->so_timestamping =
+ SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+ info->tx_types =
+ (1 << HWTSTAMP_TX_ONESTEP_SYNC) |
+ (1 << HWTSTAMP_TX_OFF) |
+ (1 << HWTSTAMP_TX_ON);
+ info->rx_filters =
+ (1 << HWTSTAMP_FILTER_NONE) |
+ (1 << HWTSTAMP_FILTER_ALL);
+
+ info->phc_index = bp->ptp_clock ? ptp_clock_index(bp->ptp_clock) : -1;
+
+ return 0;
+}
+
+static struct macb_ptp_info gem_ptp_info = {
+ .ptp_init = gem_ptp_init,
+ .ptp_remove = gem_ptp_remove,
+ .get_ptp_max_adj = gem_get_ptp_max_adj,
+ .get_tsu_rate = gem_get_tsu_rate,
+ .get_ts_info = gem_get_ts_info,
+ .get_hwtst = gem_get_hwtst,
+ .set_hwtst = gem_set_hwtst,
+};
+#endif
+
static int macb_get_ts_info(struct net_device *netdev,
struct ethtool_ts_info *info)
{
@@ -2600,6 +2725,16 @@ static void macb_configure_caps(struct macb *bp,
dcfg = gem_readl(bp, DCFG2);
if ((dcfg & (GEM_BIT(RX_PKT_BUFF) | GEM_BIT(TX_PKT_BUFF))) == 0)
bp->caps |= MACB_CAPS_FIFO_MODE;
+#ifdef CONFIG_MACB_USE_HWSTAMP
+ if (gem_has_ptp(bp)) {
+ if (!GEM_BFEXT(TSU, gem_readl(bp, DCFG5)))
+ pr_err("GEM doesn't support hardware ptp.\n");
+ else {
+ bp->hw_dma_cap |= HW_DMA_CAP_PTP;
+ bp->ptp_info = &gem_ptp_info;
+ }
+ }
+#endif
}
dev_dbg(&bp->pdev->dev, "Cadence caps 0x%08x\n", bp->caps);
@@ -2737,7 +2872,7 @@ static int macb_init(struct platform_device *pdev)
queue->IMR = GEM_IMR(hw_q - 1);
queue->TBQP = GEM_TBQP(hw_q - 1);
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
- if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+ if (bp->hw_dma_cap & HW_DMA_CAP_64B)
queue->TBQPH = GEM_TBQPH(hw_q - 1);
#endif
} else {
@@ -2748,7 +2883,7 @@ static int macb_init(struct platform_device *pdev)
queue->IMR = MACB_IMR;
queue->TBQP = MACB_TBQP;
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
- if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+ if (bp->hw_dma_cap & HW_DMA_CAP_64B)
queue->TBQPH = MACB_TBQPH;
#endif
}
@@ -3205,7 +3340,9 @@ static const struct macb_config np4_config = {
};
static const struct macb_config zynqmp_config = {
- .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO,
+ .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE |
+ MACB_CAPS_JUMBO |
+ MACB_CAPS_GEM_HAS_PTP,
.dma_burst_length = 16,
.clk_init = macb_clk_init,
.init = macb_init,
@@ -3239,7 +3376,9 @@ MODULE_DEVICE_TABLE(of, macb_dt_ids);
#endif /* CONFIG_OF */
static const struct macb_config default_gem_config = {
- .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO,
+ .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE |
+ MACB_CAPS_JUMBO |
+ MACB_CAPS_GEM_HAS_PTP,
.dma_burst_length = 16,
.clk_init = macb_clk_init,
.init = macb_init,
@@ -3328,19 +3467,17 @@ static int macb_probe(struct platform_device *pdev)
bp->wol |= MACB_WOL_HAS_MAGIC_PACKET;
device_init_wakeup(&pdev->dev, bp->wol & MACB_WOL_HAS_MAGIC_PACKET);
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
- if (GEM_BFEXT(DAW64, gem_readl(bp, DCFG6))) {
- dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
- bp->hw_dma_cap = HW_DMA_CAP_64B;
- } else
- bp->hw_dma_cap = HW_DMA_CAP_32B;
-#endif
-
spin_lock_init(&bp->lock);
/* setup capabilities */
macb_configure_caps(bp, macb_config);
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+ if (GEM_BFEXT(DAW64, gem_readl(bp, DCFG6))) {
+ dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
+ bp->hw_dma_cap |= HW_DMA_CAP_64B;
+ }
+#endif
platform_set_drvdata(pdev, dev);
dev->irq = platform_get_irq(pdev, 0);
diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c
new file mode 100755
index 000000000000..67cca08472b7
--- /dev/null
+++ b/drivers/net/ethernet/cadence/macb_ptp.c
@@ -0,0 +1,518 @@
+/**
+ * 1588 PTP support for Cadence GEM device.
+ *
+ * Copyright (C) 2017 Cadence Design Systems - http://www.cadence.com
+ *
+ * Authors: Rafal Ozieblo <[email protected]>
+ * Bartosz Folta <[email protected]>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 of
+ * the License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/etherdevice.h>
+#include <linux/platform_device.h>
+#include <linux/time64.h>
+#include <linux/ptp_classify.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/net_tstamp.h>
+#include <linux/circ_buf.h>
+#include <linux/spinlock.h>
+
+#include "macb.h"
+
+#define GEM_PTP_TIMER_NAME "gem-ptp-timer"
+
+static struct macb_dma_desc_ptp *macb_ptp_desc(struct macb *bp,
+ struct macb_dma_desc *desc)
+{
+ if (bp->hw_dma_cap == HW_DMA_CAP_PTP)
+ return (struct macb_dma_desc_ptp *)
+ ((u8 *)desc + sizeof(struct macb_dma_desc));
+ if (bp->hw_dma_cap == HW_DMA_CAP_64B_PTP)
+ return (struct macb_dma_desc_ptp *)
+ ((u8 *)desc + sizeof(struct macb_dma_desc)
+ + sizeof(struct macb_dma_desc_64));
+ return NULL;
+}
+
+static int gem_tsu_get_time(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+ struct macb *bp = container_of(ptp, struct macb, ptp_clock_info);
+ unsigned long flags;
+ long first, second;
+ u32 secl, sech;
+
+ spin_lock_irqsave(&bp->tsu_clk_lock, flags);
+ first = gem_readl(bp, TN);
+ secl = gem_readl(bp, TSL);
+ sech = gem_readl(bp, TSH);
+ second = gem_readl(bp, TN);
+
+ /* test for nsec rollover */
+ if (first > second) {
+ /* if so, use later read & re-read seconds
+ * (assume all done within 1s)
+ */
+ ts->tv_nsec = gem_readl(bp, TN);
+ secl = gem_readl(bp, TSL);
+ sech = gem_readl(bp, TSH);
+ } else {
+ ts->tv_nsec = first;
+ }
+
+ spin_unlock_irqrestore(&bp->tsu_clk_lock, flags);
+ ts->tv_sec = (((u64)sech << GEM_TSL_SIZE) | secl)
+ & TSU_SEC_MAX_VAL;
+ return 0;
+}
+
+static int gem_tsu_set_time(struct ptp_clock_info *ptp,
+ const struct timespec64 *ts)
+{
+ struct macb *bp = container_of(ptp, struct macb, ptp_clock_info);
+ unsigned long flags;
+ u32 ns, sech, secl;
+
+ secl = (u32)ts->tv_sec;
+ sech = (ts->tv_sec >> GEM_TSL_SIZE) & ((1 << GEM_TSH_SIZE) - 1);
+ ns = ts->tv_nsec;
+
+ spin_lock_irqsave(&bp->tsu_clk_lock, flags);
+
+ /* TSH doesn't latch the time and no atomicity! */
+ gem_writel(bp, TN, 0); /* clear to avoid overflow */
+ gem_writel(bp, TSH, sech);
+ /* write lower bits 2nd, for synchronized secs update */
+ gem_writel(bp, TSL, secl);
+ gem_writel(bp, TN, ns);
+
+ spin_unlock_irqrestore(&bp->tsu_clk_lock, flags);
+
+ return 0;
+}
+
+static int gem_tsu_incr_set(struct macb *bp, struct tsu_incr *incr_spec)
+{
+ unsigned long flags;
+
+ /* tsu_timer_incr register must be written after
+ * the tsu_timer_incr_sub_ns register and the write operation
+ * will cause the value written to the tsu_timer_incr_sub_ns register
+ * to take effect.
+ */
+ spin_lock_irqsave(&bp->tsu_clk_lock, flags);
+ gem_writel(bp, TISUBN, GEM_BF(SUBNSINCR, incr_spec->sub_ns));
+ gem_writel(bp, TI, GEM_BF(NSINCR, incr_spec->ns));
+ spin_unlock_irqrestore(&bp->tsu_clk_lock, flags);
+
+ return 0;
+}
+
+static int gem_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+ struct macb *bp = container_of(ptp, struct macb, ptp_clock_info);
+ struct tsu_incr incr_spec;
+ bool neg_adj = false;
+ u32 word;
+ u64 adj;
+
+ if (scaled_ppm < 0) {
+ neg_adj = true;
+ scaled_ppm = -scaled_ppm;
+ }
+
+ /* Adjustment is relative to base frequency */
+ incr_spec.sub_ns = bp->tsu_incr.sub_ns;
+ incr_spec.ns = bp->tsu_incr.ns;
+
+ /* scaling: unused(8bit) | ns(8bit) | fractions(16bit) */
+ word = ((u64)incr_spec.ns << GEM_SUBNSINCR_SIZE) + incr_spec.sub_ns;
+ adj = (u64)scaled_ppm * word;
+ /* Divide with rounding, equivalent to floating dividing:
+ * (temp / USEC_PER_SEC) + 0.5
+ */
+ adj += (USEC_PER_SEC >> 1);
+ adj >>= GEM_SUBNSINCR_SIZE; /* remove fractions */
+ adj = div_u64(adj, USEC_PER_SEC);
+ adj = neg_adj ? (word - adj) : (word + adj);
+
+ incr_spec.ns = (adj >> GEM_SUBNSINCR_SIZE)
+ & ((1 << GEM_NSINCR_SIZE) - 1);
+ incr_spec.sub_ns = adj & ((1 << GEM_SUBNSINCR_SIZE) - 1);
+ gem_tsu_incr_set(bp, &incr_spec);
+ return 0;
+}
+
+static int gem_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+ struct macb *bp = container_of(ptp, struct macb, ptp_clock_info);
+ struct timespec64 now, then = ns_to_timespec64(delta);
+ u32 adj, sign = 0;
+
+ if (delta < 0) {
+ sign = 1;
+ delta = -delta;
+ }
+
+ if (delta > TSU_NSEC_MAX_VAL) {
+ gem_tsu_get_time(&bp->ptp_clock_info, &now);
+ if (sign)
+ now = timespec64_sub(now, then);
+ else
+ now = timespec64_add(now, then);
+
+ gem_tsu_set_time(&bp->ptp_clock_info,
+ (const struct timespec64 *)&now);
+ } else {
+ adj = (sign << GEM_ADDSUB_OFFSET) | delta;
+
+ gem_writel(bp, TA, adj);
+ }
+
+ return 0;
+}
+
+static int gem_ptp_enable(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq, int on)
+{
+ return -EOPNOTSUPP;
+}
+
+static struct ptp_clock_info gem_ptp_caps_template = {
+ .owner = THIS_MODULE,
+ .name = GEM_PTP_TIMER_NAME,
+ .max_adj = 0,
+ .n_alarm = 0,
+ .n_ext_ts = 0,
+ .n_per_out = 0,
+ .n_pins = 0,
+ .pps = 1,
+ .adjfine = gem_ptp_adjfine,
+ .adjtime = gem_ptp_adjtime,
+ .gettime64 = gem_tsu_get_time,
+ .settime64 = gem_tsu_set_time,
+ .enable = gem_ptp_enable,
+};
+
+static void gem_ptp_init_timer(struct macb *bp)
+{
+ u32 rem = 0;
+ u64 adj;
+
+ bp->tsu_incr.ns = div_u64_rem(NSEC_PER_SEC, bp->tsu_rate, &rem);
+ if (rem) {
+ adj = rem;
+ adj <<= GEM_SUBNSINCR_SIZE;
+ bp->tsu_incr.sub_ns = div_u64(adj, bp->tsu_rate);
+ } else {
+ bp->tsu_incr.sub_ns = 0;
+ }
+}
+
+static void gem_ptp_init_tsu(struct macb *bp)
+{
+ struct timespec64 ts;
+
+ /* 1. get current system time */
+ ts = ns_to_timespec64(ktime_to_ns(ktime_get_real()));
+
+ /* 2. set ptp timer */
+ gem_tsu_set_time(&bp->ptp_clock_info, &ts);
+
+ /* 3. set PTP timer increment value to BASE_INCREMENT */
+ gem_tsu_incr_set(bp, &bp->tsu_incr);
+
+ gem_writel(bp, TA, 0);
+}
+
+static void gem_ptp_clear_timer(struct macb *bp)
+{
+ bp->tsu_incr.sub_ns = 0;
+ bp->tsu_incr.ns = 0;
+
+ gem_writel(bp, TISUBN, GEM_BF(SUBNSINCR, 0));
+ gem_writel(bp, TI, GEM_BF(NSINCR, 0));
+ gem_writel(bp, TA, 0);
+}
+
+static int gem_hw_timestamp(struct macb *bp, u32 dma_desc_ts_1,
+ u32 dma_desc_ts_2, struct timespec64 *ts)
+{
+ struct timespec64 tsu;
+
+ ts->tv_sec = (GEM_BFEXT(DMA_SECH, dma_desc_ts_2) << GEM_DMA_SECL_SIZE) |
+ GEM_BFEXT(DMA_SECL, dma_desc_ts_1);
+ ts->tv_nsec = GEM_BFEXT(DMA_NSEC, dma_desc_ts_1);
+
+ /* TSU overlapping workaround
+ * The timestamp only contains lower few bits of seconds,
+ * so add value from 1588 timer
+ */
+ gem_tsu_get_time(&bp->ptp_clock_info, &tsu);
+
+ /* If the top bit is set in the timestamp,
+ * but not in 1588 timer, it has rolled over,
+ * so subtract max size
+ */
+ if ((ts->tv_sec & (GEM_DMA_SEC_TOP >> 1)) &&
+ !(tsu.tv_sec & (GEM_DMA_SEC_TOP >> 1)))
+ ts->tv_sec -= GEM_DMA_SEC_TOP;
+
+ ts->tv_sec += ((~GEM_DMA_SEC_MASK) & tsu.tv_sec);
+
+ return 0;
+}
+
+void gem_ptp_rxstamp(struct macb *bp, struct sk_buff *skb,
+ struct macb_dma_desc *desc)
+{
+ struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
+ struct macb_dma_desc_ptp *desc_ptp;
+ struct timespec64 ts;
+
+ if (GEM_BFEXT(DMA_RXVALID, desc->addr)) {
+ desc_ptp = macb_ptp_desc(bp, desc);
+ gem_hw_timestamp(bp, desc_ptp->ts_1, desc_ptp->ts_2, &ts);
+ memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
+ shhwtstamps->hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec);
+ }
+}
+
+static void gem_tstamp_tx(struct macb *bp, struct sk_buff *skb,
+ struct macb_dma_desc_ptp *desc_ptp)
+{
+ struct skb_shared_hwtstamps shhwtstamps;
+ struct timespec64 ts;
+
+ gem_hw_timestamp(bp, desc_ptp->ts_1, desc_ptp->ts_2, &ts);
+ memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+ shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec);
+ skb_tstamp_tx(skb, &shhwtstamps);
+}
+
+int gem_ptp_txstamp(struct macb_queue *queue, struct sk_buff *skb,
+ struct macb_dma_desc *desc)
+{
+ unsigned long tail = READ_ONCE(queue->tx_ts_tail);
+ unsigned long head = queue->tx_ts_head;
+ struct macb_dma_desc_ptp *desc_ptp;
+ struct gem_tx_ts *tx_timestamp;
+
+ if (!GEM_BFEXT(DMA_TXVALID, desc->ctrl))
+ return -EINVAL;
+
+ if (CIRC_SPACE(head, tail, PTP_TS_BUFFER_SIZE) == 0)
+ return -ENOMEM;
+
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+ desc_ptp = macb_ptp_desc(queue->bp, desc);
+ tx_timestamp = &queue->tx_timestamps[head];
+ tx_timestamp->skb = skb;
+ tx_timestamp->desc_ptp.ts_1 = desc_ptp->ts_1;
+ tx_timestamp->desc_ptp.ts_2 = desc_ptp->ts_2;
+ /* move head */
+ smp_store_release(&queue->tx_ts_head,
+ (head + 1) & (PTP_TS_BUFFER_SIZE - 1));
+
+ schedule_work(&queue->tx_ts_task);
+ return 0;
+}
+
+static void gem_tx_timestamp_flush(struct work_struct *work)
+{
+ struct macb_queue *queue =
+ container_of(work, struct macb_queue, tx_ts_task);
+ unsigned long head, tail;
+ struct gem_tx_ts *tx_ts;
+
+ /* take current head */
+ head = smp_load_acquire(&queue->tx_ts_head);
+ tail = queue->tx_ts_tail;
+
+ while (CIRC_CNT(head, tail, PTP_TS_BUFFER_SIZE)) {
+ tx_ts = &queue->tx_timestamps[tail];
+ gem_tstamp_tx(queue->bp, tx_ts->skb, &tx_ts->desc_ptp);
+ /* cleanup */
+ dev_kfree_skb_any(tx_ts->skb);
+ /* remove old tail */
+ smp_store_release(&queue->tx_ts_tail,
+ (tail + 1) & (PTP_TS_BUFFER_SIZE - 1));
+ tail = queue->tx_ts_tail;
+ }
+}
+
+void gem_ptp_init(struct net_device *dev)
+{
+ struct macb *bp = netdev_priv(dev);
+ struct macb_queue *queue;
+ unsigned int q;
+
+ bp->ptp_clock_info = gem_ptp_caps_template;
+
+ /* nominal frequency and maximum adjustment in ppb */
+ bp->tsu_rate = bp->ptp_info->get_tsu_rate(bp);
+ bp->ptp_clock_info.max_adj = bp->ptp_info->get_ptp_max_adj();
+ gem_ptp_init_timer(bp);
+ bp->ptp_clock = ptp_clock_register(&bp->ptp_clock_info, &dev->dev);
+ if (IS_ERR(bp->ptp_clock)) {
+ pr_err("ptp clock register failed: %ld\n",
+ PTR_ERR(bp->ptp_clock));
+ bp->ptp_clock = NULL;
+ return;
+ } else if (bp->ptp_clock == NULL) {
+ pr_err("ptp clock register failed\n");
+ return;
+ }
+
+ spin_lock_init(&bp->tsu_clk_lock);
+ for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+ queue->tx_ts_head = 0;
+ queue->tx_ts_tail = 0;
+ INIT_WORK(&queue->tx_ts_task, gem_tx_timestamp_flush);
+ }
+
+ gem_ptp_init_tsu(bp);
+
+ dev_info(&bp->pdev->dev, "%s ptp clock registered.\n",
+ GEM_PTP_TIMER_NAME);
+}
+
+void gem_ptp_remove(struct net_device *ndev)
+{
+ struct macb *bp = netdev_priv(ndev);
+
+ if (bp->ptp_clock)
+ ptp_clock_unregister(bp->ptp_clock);
+
+ gem_ptp_clear_timer(bp);
+
+ dev_info(&bp->pdev->dev, "%s ptp clock unregistered.\n",
+ GEM_PTP_TIMER_NAME);
+}
+
+static int gem_ptp_set_ts_mode(struct macb *bp,
+ enum macb_bd_control tx_bd_control,
+ enum macb_bd_control rx_bd_control)
+{
+ gem_writel(bp, TXBDCTRL, GEM_BF(TXTSMODE, tx_bd_control));
+ gem_writel(bp, RXBDCTRL, GEM_BF(RXTSMODE, rx_bd_control));
+
+ return 0;
+}
+
+int gem_get_hwtst(struct net_device *dev, struct ifreq *rq)
+{
+ struct hwtstamp_config *tstamp_config;
+ struct macb *bp = netdev_priv(dev);
+
+ tstamp_config = &bp->tstamp_config;
+ if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0)
+ return -EOPNOTSUPP;
+
+ if (copy_to_user(rq->ifr_data, tstamp_config, sizeof(*tstamp_config)))
+ return -EFAULT;
+ else
+ return 0;
+}
+
+static int gem_ptp_set_one_step_sync(struct macb *bp, u8 enable)
+{
+ u32 reg_val;
+
+ reg_val = macb_readl(bp, NCR);
+
+ if (enable)
+ macb_writel(bp, NCR, reg_val | MACB_BIT(OSSMODE));
+ else
+ macb_writel(bp, NCR, reg_val & ~MACB_BIT(OSSMODE));
+
+ return 0;
+}
+
+int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ enum macb_bd_control tx_bd_control = TSTAMP_DISABLED;
+ enum macb_bd_control rx_bd_control = TSTAMP_DISABLED;
+ struct hwtstamp_config *tstamp_config;
+ struct macb *bp = netdev_priv(dev);
+ u32 regval;
+
+ tstamp_config = &bp->tstamp_config;
+ if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(tstamp_config, ifr->ifr_data,
+ sizeof(*tstamp_config)))
+ return -EFAULT;
+
+ /* reserved for future extensions */
+ if (tstamp_config->flags)
+ return -EINVAL;
+
+ switch (tstamp_config->tx_type) {
+ case HWTSTAMP_TX_OFF:
+ break;
+ case HWTSTAMP_TX_ONESTEP_SYNC:
+ if (gem_ptp_set_one_step_sync(bp, 1) != 0)
+ return -ERANGE;
+ case HWTSTAMP_TX_ON:
+ tx_bd_control = TSTAMP_ALL_FRAMES;
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ switch (tstamp_config->rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ break;
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ rx_bd_control = TSTAMP_ALL_PTP_FRAMES;
+ tstamp_config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+ regval = macb_readl(bp, NCR);
+ macb_writel(bp, NCR, (regval | MACB_BIT(SRTSM)));
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_ALL:
+ rx_bd_control = TSTAMP_ALL_FRAMES;
+ tstamp_config->rx_filter = HWTSTAMP_FILTER_ALL;
+ break;
+ default:
+ tstamp_config->rx_filter = HWTSTAMP_FILTER_NONE;
+ return -ERANGE;
+ }
+
+ if (gem_ptp_set_ts_mode(bp, tx_bd_control, rx_bd_control) != 0)
+ return -ERANGE;
+
+ if (copy_to_user(ifr->ifr_data, tstamp_config, sizeof(*tstamp_config)))
+ return -EFAULT;
+ else
+ return 0;
+}
+
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 573755b0a51b..49b80da51ba7 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -227,15 +227,14 @@ static void nicvf_handle_mbx_intr(struct nicvf *nic)
nic->speed = mbx.link_status.speed;
nic->mac_type = mbx.link_status.mac_type;
if (nic->link_up) {
- netdev_info(nic->netdev, "%s: Link is Up %d Mbps %s\n",
- nic->netdev->name, nic->speed,
+ netdev_info(nic->netdev, "Link is Up %d Mbps %s duplex\n",
+ nic->speed,
nic->duplex == DUPLEX_FULL ?
- "Full duplex" : "Half duplex");
+ "Full" : "Half");
netif_carrier_on(nic->netdev);
netif_tx_start_all_queues(nic->netdev);
} else {
- netdev_info(nic->netdev, "%s: Link is Down\n",
- nic->netdev->name);
+ netdev_info(nic->netdev, "Link is Down\n");
netif_carrier_off(nic->netdev);
netif_tx_stop_all_queues(nic->netdev);
}
@@ -721,8 +720,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
return;
if (netif_msg_pktdata(nic)) {
- netdev_info(nic->netdev, "%s: skb 0x%p, len=%d\n", netdev->name,
- skb, skb->len);
+ netdev_info(nic->netdev, "skb 0x%p, len=%d\n", skb, skb->len);
print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1,
skb->data, skb->len, true);
}
@@ -854,10 +852,8 @@ done:
netif_tx_wake_queue(txq);
nic = nic->pnicvf;
this_cpu_inc(nic->drv_stats->txq_wake);
- if (netif_msg_tx_err(nic))
- netdev_warn(netdev,
- "%s: Transmit queue wakeup SQ%d\n",
- netdev->name, txq_idx);
+ netif_warn(nic, tx_err, netdev,
+ "Transmit queue wakeup SQ%d\n", txq_idx);
}
}
@@ -928,9 +924,8 @@ static void nicvf_handle_qs_err(unsigned long data)
static void nicvf_dump_intr_status(struct nicvf *nic)
{
- if (netif_msg_intr(nic))
- netdev_info(nic->netdev, "%s: interrupt status 0x%llx\n",
- nic->netdev->name, nicvf_reg_read(nic, NIC_VF_INT));
+ netif_info(nic, intr, nic->netdev, "interrupt status 0x%llx\n",
+ nicvf_reg_read(nic, NIC_VF_INT));
}
static irqreturn_t nicvf_misc_intr_handler(int irq, void *nicvf_irq)
@@ -1212,10 +1207,8 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev)
netif_tx_wake_queue(txq);
} else {
this_cpu_inc(nic->drv_stats->txq_stop);
- if (netif_msg_tx_err(nic))
- netdev_warn(netdev,
- "%s: Transmit ring full, stopping SQ%d\n",
- netdev->name, qid);
+ netif_warn(nic, tx_err, netdev,
+ "Transmit ring full, stopping SQ%d\n", qid);
}
return NETDEV_TX_BUSY;
}
@@ -1600,9 +1593,7 @@ static void nicvf_tx_timeout(struct net_device *dev)
{
struct nicvf *nic = netdev_priv(dev);
- if (netif_msg_tx_err(nic))
- netdev_warn(dev, "%s: Transmit timed out, resetting\n",
- dev->name);
+ netif_warn(nic, tx_err, dev, "Transmit timed out, resetting\n");
this_cpu_inc(nic->drv_stats->tx_timeout);
schedule_work(&nic->reset_task);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 2b181762ad49..d4496e9afcdf 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -1811,11 +1811,9 @@ void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx)
/* Check for errors in the receive cmp.queue entry */
int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
{
- if (netif_msg_rx_err(nic))
- netdev_err(nic->netdev,
- "%s: RX error CQE err_level 0x%x err_opcode 0x%x\n",
- nic->netdev->name,
- cqe_rx->err_level, cqe_rx->err_opcode);
+ netif_err(nic, rx_err, nic->netdev,
+ "RX error CQE err_level 0x%x err_opcode 0x%x\n",
+ cqe_rx->err_level, cqe_rx->err_opcode);
switch (cqe_rx->err_opcode) {
case CQ_RX_ERROP_RE_PARTIAL:
diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig
index dc0850b3b517..8870a9a798ca 100644
--- a/drivers/net/ethernet/freescale/fman/Kconfig
+++ b/drivers/net/ethernet/freescale/fman/Kconfig
@@ -2,6 +2,7 @@ config FSL_FMAN
tristate "FMan support"
depends on FSL_SOC || ARCH_LAYERSCAPE || COMPILE_TEST
select GENERIC_ALLOCATOR
+ depends on HAS_DMA
select PHYLIB
default n
help
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index a79e257bc338..c4b4b0a1bbf0 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -1718,7 +1718,7 @@ static int gfar_restore(struct device *dev)
return 0;
}
-static struct dev_pm_ops gfar_pm_ops = {
+static const struct dev_pm_ops gfar_pm_ops = {
.suspend = gfar_suspend,
.resume = gfar_resume,
.freeze = gfar_suspend,
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index 04211ac73b36..7ba653af19cb 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -360,6 +360,7 @@ enum hnae_loop {
MAC_INTERNALLOOP_MAC = 0,
MAC_INTERNALLOOP_SERDES,
MAC_INTERNALLOOP_PHY,
+ MAC_LOOP_PHY_NONE,
MAC_LOOP_NONE,
};
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index 00e57bbaf122..a8db27e86a11 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -259,67 +259,27 @@ static const char hns_nic_test_strs[][ETH_GSTRING_LEN] = {
static int hns_nic_config_phy_loopback(struct phy_device *phy_dev, u8 en)
{
-#define COPPER_CONTROL_REG 0
-#define PHY_POWER_DOWN BIT(11)
-#define PHY_LOOP_BACK BIT(14)
- u16 val = 0;
-
- if (phy_dev->is_c45) /* c45 branch adding for XGE PHY */
- return -ENOTSUPP;
+ int err;
if (en) {
- /* speed : 1000M */
- phy_write(phy_dev, HNS_PHY_PAGE_REG, 2);
- phy_write(phy_dev, 21, 0x1046);
-
- phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
- /* Force Master */
- phy_write(phy_dev, 9, 0x1F00);
-
- /* Soft-reset */
- phy_write(phy_dev, 0, 0x9140);
- /* If autoneg disabled,two soft-reset operations */
- phy_write(phy_dev, 0, 0x9140);
-
- phy_write(phy_dev, HNS_PHY_PAGE_REG, 0xFA);
-
- /* Default is 0x0400 */
- phy_write(phy_dev, 1, 0x418);
-
- /* Force 1000M Link, Default is 0x0200 */
- phy_write(phy_dev, 7, 0x20C);
-
- /* Powerup Fiber */
- phy_write(phy_dev, HNS_PHY_PAGE_REG, 1);
- val = phy_read(phy_dev, COPPER_CONTROL_REG);
- val &= ~PHY_POWER_DOWN;
- phy_write(phy_dev, COPPER_CONTROL_REG, val);
-
- /* Enable Phy Loopback */
- phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
- val = phy_read(phy_dev, COPPER_CONTROL_REG);
- val |= PHY_LOOP_BACK;
- val &= ~PHY_POWER_DOWN;
- phy_write(phy_dev, COPPER_CONTROL_REG, val);
+ /* Doing phy loopback in offline state, phy resuming is
+ * needed to power up the device.
+ */
+ err = phy_resume(phy_dev);
+ if (err)
+ goto out;
+
+ err = phy_loopback(phy_dev, true);
} else {
- phy_write(phy_dev, HNS_PHY_PAGE_REG, 0xFA);
- phy_write(phy_dev, 1, 0x400);
- phy_write(phy_dev, 7, 0x200);
-
- phy_write(phy_dev, HNS_PHY_PAGE_REG, 1);
- val = phy_read(phy_dev, COPPER_CONTROL_REG);
- val |= PHY_POWER_DOWN;
- phy_write(phy_dev, COPPER_CONTROL_REG, val);
-
- phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
- phy_write(phy_dev, 9, 0xF00);
-
- val = phy_read(phy_dev, COPPER_CONTROL_REG);
- val &= ~PHY_LOOP_BACK;
- val |= PHY_POWER_DOWN;
- phy_write(phy_dev, COPPER_CONTROL_REG, val);
+ err = phy_loopback(phy_dev, false);
+ if (err)
+ goto out;
+
+ err = phy_suspend(phy_dev);
}
- return 0;
+
+out:
+ return err;
}
static int __lb_setup(struct net_device *ndev,
@@ -332,10 +292,9 @@ static int __lb_setup(struct net_device *ndev,
switch (loop) {
case MAC_INTERNALLOOP_PHY:
- if ((phy_dev) && (!phy_dev->is_c45)) {
- ret = hns_nic_config_phy_loopback(phy_dev, 0x1);
- ret |= h->dev->ops->set_loopback(h, loop, 0x1);
- }
+ ret = hns_nic_config_phy_loopback(phy_dev, 0x1);
+ if (!ret)
+ ret = h->dev->ops->set_loopback(h, loop, 0x1);
break;
case MAC_INTERNALLOOP_MAC:
if ((h->dev->ops->set_loopback) &&
@@ -346,17 +305,17 @@ static int __lb_setup(struct net_device *ndev,
if (h->dev->ops->set_loopback)
ret = h->dev->ops->set_loopback(h, loop, 0x1);
break;
+ case MAC_LOOP_PHY_NONE:
+ ret = hns_nic_config_phy_loopback(phy_dev, 0x0);
case MAC_LOOP_NONE:
- if ((phy_dev) && (!phy_dev->is_c45))
- ret |= hns_nic_config_phy_loopback(phy_dev, 0x0);
-
- if (h->dev->ops->set_loopback) {
+ if (!ret && h->dev->ops->set_loopback) {
if (priv->ae_handle->phy_if != PHY_INTERFACE_MODE_XGMII)
- ret |= h->dev->ops->set_loopback(h,
+ ret = h->dev->ops->set_loopback(h,
MAC_INTERNALLOOP_MAC, 0x0);
- ret |= h->dev->ops->set_loopback(h,
- MAC_INTERNALLOOP_SERDES, 0x0);
+ if (!ret)
+ ret = h->dev->ops->set_loopback(h,
+ MAC_INTERNALLOOP_SERDES, 0x0);
}
break;
default:
@@ -582,13 +541,16 @@ static int __lb_run_test(struct net_device *ndev,
return ret_val;
}
-static int __lb_down(struct net_device *ndev)
+static int __lb_down(struct net_device *ndev, enum hnae_loop loop)
{
struct hns_nic_priv *priv = netdev_priv(ndev);
struct hnae_handle *h = priv->ae_handle;
int ret;
- ret = __lb_setup(ndev, MAC_LOOP_NONE);
+ if (loop == MAC_INTERNALLOOP_PHY)
+ ret = __lb_setup(ndev, MAC_LOOP_PHY_NONE);
+ else
+ ret = __lb_setup(ndev, MAC_LOOP_NONE);
if (ret)
netdev_err(ndev, "%s: __lb_setup return error(%d)!\n",
__func__,
@@ -644,7 +606,8 @@ static void hns_nic_self_test(struct net_device *ndev,
if (!data[test_index]) {
data[test_index] = __lb_run_test(
ndev, (enum hnae_loop)st_param[i][0]);
- (void)__lb_down(ndev);
+ (void)__lb_down(ndev,
+ (enum hnae_loop)st_param[i][0]);
}
if (data[test_index])
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 9a74c4e2e193..3e0a695537e2 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1914,7 +1914,7 @@ static struct vio_device_id ibmveth_device_table[] = {
};
MODULE_DEVICE_TABLE(vio, ibmveth_device_table);
-static struct dev_pm_ops ibmveth_pm_ops = {
+static const struct dev_pm_ops ibmveth_pm_ops = {
.resume = ibmveth_resume
};
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 87db1eb5cc44..a3e694679635 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -763,12 +763,6 @@ static int init_resources(struct ibmvnic_adapter *adapter)
if (rc)
return rc;
- rc = init_sub_crq_irqs(adapter);
- if (rc) {
- netdev_err(netdev, "failed to initialize sub crq irqs\n");
- return -1;
- }
-
rc = init_stats_token(adapter);
if (rc)
return rc;
@@ -1803,7 +1797,6 @@ static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter)
return rc;
}
- rc = init_sub_crq_irqs(adapter);
return rc;
}
@@ -3669,6 +3662,13 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
if (rc) {
dev_err(dev, "Initialization of sub crqs failed\n");
release_crq_queue(adapter);
+ return rc;
+ }
+
+ rc = init_sub_crq_irqs(adapter);
+ if (rc) {
+ dev_err(dev, "Failed to initialize sub crq irqs\n");
+ release_crq_queue(adapter);
}
return rc;
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index c1af47e45d3f..674773b28b2e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -3280,7 +3280,7 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat
if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port))
mlx4_dbg(dev,
- "updating vf %d port %d no link state HW enforcment\n",
+ "updating vf %d port %d no link state HW enforcement\n",
vf, port);
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
index 1dae8e40fb25..5f41dc92aa68 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
@@ -238,7 +238,7 @@ static u8 mlx4_en_dcbnl_set_state(struct net_device *dev, u8 state)
priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED;
}
- if (mlx4_en_setup_tc(dev, num_tcs))
+ if (mlx4_en_alloc_tx_queue_per_tc(dev, num_tcs))
return 1;
return 0;
@@ -303,7 +303,7 @@ static int mlx4_en_ets_validate(struct mlx4_en_priv *priv, struct ieee_ets *ets)
int has_ets_tc = 0;
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
- if (ets->prio_tc[i] >= MLX4_EN_NUM_UP) {
+ if (ets->prio_tc[i] >= MLX4_EN_NUM_UP_HIGH) {
en_err(priv, "Bad priority in UP <=> TC mapping. TC: %d, UP: %d\n",
i, ets->prio_tc[i]);
return -EINVAL;
@@ -472,7 +472,7 @@ static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode)
goto err;
if (mlx4_en_dcbnl_ieee_setpfc(dev, &pfc))
goto err;
- if (mlx4_en_setup_tc(dev, 0))
+ if (mlx4_en_alloc_tx_queue_per_tc(dev, 0))
goto err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index e97fbf327594..c751a1d434ad 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1750,7 +1750,8 @@ static void mlx4_en_get_channels(struct net_device *dev,
channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP;
channel->rx_count = priv->rx_ring_num;
- channel->tx_count = priv->tx_ring_num[TX] / MLX4_EN_NUM_UP;
+ channel->tx_count = priv->tx_ring_num[TX] /
+ priv->prof->num_up;
}
static int mlx4_en_set_channels(struct net_device *dev,
@@ -1763,6 +1764,7 @@ static int mlx4_en_set_channels(struct net_device *dev,
int port_up = 0;
int xdp_count;
int err = 0;
+ u8 up;
if (!channel->tx_count || !channel->rx_count)
return -EINVAL;
@@ -1773,18 +1775,19 @@ static int mlx4_en_set_channels(struct net_device *dev,
mutex_lock(&mdev->state_lock);
xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0;
- if (channel->tx_count * MLX4_EN_NUM_UP + xdp_count > MAX_TX_RINGS) {
+ if (channel->tx_count * priv->prof->num_up + xdp_count >
+ MAX_TX_RINGS) {
err = -EINVAL;
en_err(priv,
"Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n",
- channel->tx_count * MLX4_EN_NUM_UP + xdp_count,
+ channel->tx_count * priv->prof->num_up + xdp_count,
MAX_TX_RINGS);
goto out;
}
memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
new_prof.num_tx_rings_p_up = channel->tx_count;
- new_prof.tx_ring_num[TX] = channel->tx_count * MLX4_EN_NUM_UP;
+ new_prof.tx_ring_num[TX] = channel->tx_count * priv->prof->num_up;
new_prof.tx_ring_num[TX_XDP] = xdp_count;
new_prof.rx_ring_num = channel->rx_count;
@@ -1799,11 +1802,11 @@ static int mlx4_en_set_channels(struct net_device *dev,
mlx4_en_safe_replace_resources(priv, tmp);
- netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
- if (netdev_get_num_tc(dev))
- mlx4_en_setup_tc(dev, MLX4_EN_NUM_UP);
+ up = (priv->prof->num_up == MLX4_EN_NUM_UP_LOW) ?
+ 0 : priv->prof->num_up;
+ mlx4_en_setup_tc(dev, up);
en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num[TX]);
en_warn(priv, "Using %d RX rings\n", priv->rx_ring_num);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index 56cdf38d150e..2b0cbca4beb5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -169,8 +169,10 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
params->prof[i].tx_ppp = pfctx;
params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE;
params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE;
+ params->prof[i].num_up = MLX4_EN_NUM_UP_LOW;
+ params->prof[i].num_tx_rings_p_up = params->num_tx_rings_p_up;
params->prof[i].tx_ring_num[TX] = params->num_tx_rings_p_up *
- MLX4_EN_NUM_UP;
+ params->prof[i].num_up;
params->prof[i].rss_rings = 0;
params->prof[i].inline_thold = inline_thold;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 9da76e3be2fc..3a291fc1780a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -60,11 +60,11 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up)
int i;
unsigned int offset = 0;
- if (up && up != MLX4_EN_NUM_UP)
+ if (up && up != MLX4_EN_NUM_UP_HIGH)
return -EINVAL;
netdev_set_num_tc(dev, up);
-
+ netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
/* Partition Tx queues evenly amongst UP's */
for (i = 0; i < up; i++) {
netdev_set_tc_queue(dev, i, priv->num_tx_rings_p_up, offset);
@@ -86,6 +86,50 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up)
return 0;
}
+int mlx4_en_alloc_tx_queue_per_tc(struct net_device *dev, u8 tc)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_port_profile new_prof;
+ struct mlx4_en_priv *tmp;
+ int port_up = 0;
+ int err = 0;
+
+ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ mutex_lock(&mdev->state_lock);
+ memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
+ new_prof.num_up = (tc == 0) ? MLX4_EN_NUM_UP_LOW :
+ MLX4_EN_NUM_UP_HIGH;
+ new_prof.tx_ring_num[TX] = new_prof.num_tx_rings_p_up *
+ new_prof.num_up;
+ err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true);
+ if (err)
+ goto out;
+
+ if (priv->port_up) {
+ port_up = 1;
+ mlx4_en_stop_port(dev, 1);
+ }
+
+ mlx4_en_safe_replace_resources(priv, tmp);
+ if (port_up) {
+ err = mlx4_en_start_port(dev);
+ if (err) {
+ en_err(priv, "Failed starting port for setup TC\n");
+ goto out;
+ }
+ }
+
+ err = mlx4_en_setup_tc(dev, tc);
+out:
+ mutex_unlock(&mdev->state_lock);
+ kfree(tmp);
+ return err;
+}
+
static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle,
u32 chain_index, __be16 proto,
struct tc_to_netdev *tc)
@@ -93,9 +137,12 @@ static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle,
if (tc->type != TC_SETUP_MQPRIO)
return -EINVAL;
+ if (tc->mqprio->num_tc && tc->mqprio->num_tc != MLX4_EN_NUM_UP_HIGH)
+ return -EINVAL;
+
tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
- return mlx4_en_setup_tc(dev, tc->mqprio->num_tc);
+ return mlx4_en_alloc_tx_queue_per_tc(dev, tc->mqprio->num_tc);
}
#ifdef CONFIG_RFS_ACCEL
@@ -2144,7 +2191,7 @@ static int mlx4_en_copy_priv(struct mlx4_en_priv *dst,
memcpy(&dst->hwtstamp_config, &prof->hwtstamp_config,
sizeof(dst->hwtstamp_config));
- dst->num_tx_rings_p_up = src->mdev->profile.num_tx_rings_p_up;
+ dst->num_tx_rings_p_up = prof->num_tx_rings_p_up;
dst->rx_ring_num = prof->rx_ring_num;
dst->flags = prof->flags;
dst->mdev = src->mdev;
@@ -2197,6 +2244,7 @@ static void mlx4_en_update_priv(struct mlx4_en_priv *dst,
dst->tx_ring[t] = src->tx_ring[t];
dst->tx_cq[t] = src->tx_cq[t];
}
+ dst->num_tx_rings_p_up = src->num_tx_rings_p_up;
dst->rx_ring_num = src->rx_ring_num;
memcpy(dst->prof, src->prof, sizeof(struct mlx4_en_port_profile));
}
@@ -2780,7 +2828,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
if (priv->tx_ring_num[TX] + xdp_ring_num > MAX_TX_RINGS) {
tx_changed = 1;
new_prof.tx_ring_num[TX] =
- MAX_TX_RINGS - ALIGN(xdp_ring_num, MLX4_EN_NUM_UP);
+ MAX_TX_RINGS - ALIGN(xdp_ring_num, priv->prof->num_up);
en_warn(priv, "Reducing the number of TX rings, to not exceed the max total rings number.\n");
}
@@ -3271,7 +3319,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
priv->flags |= MLX4_EN_DCB_ENABLED;
priv->cee_config.pfc_state = false;
- for (i = 0; i < MLX4_EN_NUM_UP; i++)
+ for (i = 0; i < MLX4_EN_NUM_UP_HIGH; i++)
priv->cee_config.dcb_pfc[i] = pfc_disabled;
if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
index a6b0db0e0383..86d2d42d658d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
@@ -63,7 +63,8 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
context->local_qpn = cpu_to_be32(qpn);
context->pri_path.ackto = 1 & 0x07;
context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6;
- if (user_prio >= 0) {
+ /* force user priority per tx ring */
+ if (user_prio >= 0 && priv->prof->num_up == MLX4_EN_NUM_UP_HIGH) {
context->pri_path.sched_queue |= user_prio << 3;
context->pri_path.feup = MLX4_FEUP_FORCE_ETH_UP;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 7d69d939ee2d..4f3a9b27ce4a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -691,15 +691,11 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
{
struct mlx4_en_priv *priv = netdev_priv(dev);
u16 rings_p_up = priv->num_tx_rings_p_up;
- u8 up = 0;
if (netdev_get_num_tc(dev))
return skb_tx_hash(dev, skb);
- if (skb_vlan_tag_present(skb))
- up = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT;
-
- return fallback(dev, skb) % rings_p_up + up * rings_p_up;
+ return fallback(dev, skb) % rings_p_up;
}
static void mlx4_bf_copy(void __iomem *dst, const void *src,
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 457e070bca46..a27c9c13a36e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -91,7 +91,7 @@ module_param_array(probe_vf, byte, &probe_vfs_argc, 0444);
MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n"
"probe_vf=port1,port2,port1+2");
-int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
+static int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
module_param_named(log_num_mgm_entry_size,
mlx4_log_num_mgm_entry_size, int, 0444);
MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 6ea2b7a0c34d..30616cd0140d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -230,7 +230,6 @@ do { \
#define mlx4_warn(mdev, format, ...) \
dev_warn(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__)
-extern int mlx4_log_num_mgm_entry_size;
extern int log_mtts_per_seg;
extern int mlx4_internal_err_reset;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 963b77d51b48..d350b2158104 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -115,11 +115,12 @@
#define MLX4_EN_SMALL_PKT_SIZE 64
#define MLX4_EN_MIN_TX_RING_P_UP 1
#define MLX4_EN_MAX_TX_RING_P_UP 32
-#define MLX4_EN_NUM_UP 8
+#define MLX4_EN_NUM_UP_LOW 1
+#define MLX4_EN_NUM_UP_HIGH 8
#define MLX4_EN_DEF_RX_RING_SIZE 1024
#define MLX4_EN_DEF_TX_RING_SIZE MLX4_EN_DEF_RX_RING_SIZE
#define MAX_TX_RINGS (MLX4_EN_MAX_TX_RING_P_UP * \
- MLX4_EN_NUM_UP)
+ MLX4_EN_NUM_UP_HIGH)
#define MLX4_EN_DEFAULT_TX_WORK 256
@@ -386,6 +387,7 @@ struct mlx4_en_port_profile {
u8 rx_ppp;
u8 tx_pause;
u8 tx_ppp;
+ u8 num_up;
int rss_rings;
int inline_thold;
struct hwtstamp_config hwtstamp_config;
@@ -485,7 +487,7 @@ enum dcb_pfc_type {
struct mlx4_en_cee_config {
bool pfc_state;
- enum dcb_pfc_type dcb_pfc[MLX4_EN_NUM_UP];
+ enum dcb_pfc_type dcb_pfc[MLX4_EN_NUM_UP_HIGH];
};
#endif
@@ -761,6 +763,7 @@ extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops;
#endif
int mlx4_en_setup_tc(struct net_device *dev, u8 up);
+int mlx4_en_alloc_tx_queue_per_tc(struct net_device *dev, u8 tc);
#ifdef CONFIG_RFS_ACCEL
void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index cf1ef48bfd8d..5aee05992f27 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -11,9 +11,13 @@ config MLX5_CORE
Core driver for low level functionality of the ConnectX-4 and
Connect-IB cards by Mellanox Technologies.
+config MLX5_ACCEL
+ bool
+
config MLX5_FPGA
bool "Mellanox Technologies Innova support"
depends on MLX5_CORE
+ select MLX5_ACCEL
---help---
Build support for the Innova family of network cards by Mellanox
Technologies. Innova network cards are comprised of a ConnectX chip
@@ -48,3 +52,15 @@ config MLX5_CORE_IPOIB
default n
---help---
MLX5 IPoIB offloads & acceleration support.
+
+config MLX5_EN_IPSEC
+ bool "IPSec XFRM cryptography-offload accelaration"
+ depends on MLX5_ACCEL
+ depends on MLX5_CORE_EN
+ depends on XFRM_OFFLOAD
+ depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
+ default n
+ ---help---
+ Build support for IPsec cryptography-offload accelaration in the NIC.
+ Note: Support for hardware with this capability needs to be selected
+ for this option to become available.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 5ad093a21a6e..ca367445f864 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -4,9 +4,12 @@ subdir-ccflags-y += -I$(src)
mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
- fs_counters.o rl.o lag.o dev.o
+ fs_counters.o rl.o lag.o dev.o lib/gid.o
-mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o
+mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o
+
+mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
+ fpga/ipsec.o
mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
@@ -16,3 +19,6 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o
mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o
+
+mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
+ en_accel/ipsec_stats.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
new file mode 100644
index 000000000000..53e69edaedde
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+
+#include "accel/ipsec.h"
+#include "mlx5_core.h"
+#include "fpga/ipsec.h"
+
+void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_ipsec_sa *cmd)
+{
+ if (!MLX5_IPSEC_DEV(mdev))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ return mlx5_fpga_ipsec_sa_cmd_exec(mdev, cmd);
+}
+
+int mlx5_accel_ipsec_sa_cmd_wait(void *ctx)
+{
+ return mlx5_fpga_ipsec_sa_cmd_wait(ctx);
+}
+
+u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+ return mlx5_fpga_ipsec_device_caps(mdev);
+}
+
+unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev)
+{
+ return mlx5_fpga_ipsec_counters_count(mdev);
+}
+
+int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+ unsigned int count)
+{
+ return mlx5_fpga_ipsec_counters_read(mdev, counters, count);
+}
+
+int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
+{
+ return mlx5_fpga_ipsec_init(mdev);
+}
+
+void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+ mlx5_fpga_ipsec_cleanup(mdev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
new file mode 100644
index 000000000000..d6e20fea9554
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_ACCEL_IPSEC_H__
+#define __MLX5_ACCEL_IPSEC_H__
+
+#ifdef CONFIG_MLX5_ACCEL
+
+#include <linux/mlx5/driver.h>
+
+enum {
+ MLX5_ACCEL_IPSEC_DEVICE = BIT(1),
+ MLX5_ACCEL_IPSEC_IPV6 = BIT(2),
+ MLX5_ACCEL_IPSEC_ESP = BIT(3),
+ MLX5_ACCEL_IPSEC_LSO = BIT(4),
+};
+
+#define MLX5_IPSEC_SADB_IP_AH BIT(7)
+#define MLX5_IPSEC_SADB_IP_ESP BIT(6)
+#define MLX5_IPSEC_SADB_SA_VALID BIT(5)
+#define MLX5_IPSEC_SADB_SPI_EN BIT(4)
+#define MLX5_IPSEC_SADB_DIR_SX BIT(3)
+#define MLX5_IPSEC_SADB_IPV6 BIT(2)
+
+enum {
+ MLX5_IPSEC_CMD_ADD_SA = 0,
+ MLX5_IPSEC_CMD_DEL_SA = 1,
+};
+
+enum mlx5_accel_ipsec_enc_mode {
+ MLX5_IPSEC_SADB_MODE_NONE = 0,
+ MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128 = 1,
+ MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128 = 3,
+};
+
+#define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \
+ MLX5_ACCEL_IPSEC_DEVICE)
+
+struct mlx5_accel_ipsec_sa {
+ __be32 cmd;
+ u8 key_enc[32];
+ u8 key_auth[32];
+ __be32 sip[4];
+ __be32 dip[4];
+ union {
+ struct {
+ __be32 reserved;
+ u8 salt_iv[8];
+ __be32 salt;
+ } __packed gcm;
+ struct {
+ u8 salt[16];
+ } __packed cbc;
+ };
+ __be32 spi;
+ __be32 sw_sa_handle;
+ __be16 tfclen;
+ u8 enc_mode;
+ u8 sip_masklen;
+ u8 dip_masklen;
+ u8 flags;
+ u8 reserved[2];
+} __packed;
+
+/**
+ * mlx5_accel_ipsec_sa_cmd_exec - Execute an IPSec SADB command
+ * @mdev: mlx5 device
+ * @cmd: command to execute
+ * May be called from atomic context. Returns context pointer, or error
+ * Caller must eventually call mlx5_accel_ipsec_sa_cmd_wait from non-atomic
+ * context, to cleanup the context pointer
+ */
+void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_ipsec_sa *cmd);
+
+/**
+ * mlx5_accel_ipsec_sa_cmd_wait - Wait for command execution completion
+ * @context: Context pointer returned from call to mlx5_accel_ipsec_sa_cmd_exec
+ * Sleeps (killable) until command execution is complete.
+ * Returns the command result, or -EINTR if killed
+ */
+int mlx5_accel_ipsec_sa_cmd_wait(void *context);
+
+u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev);
+
+unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev);
+int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+ unsigned int count);
+
+int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev);
+void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
+
+#else
+
+#define MLX5_IPSEC_DEV(mdev) false
+
+static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
+
+static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+}
+
+#endif
+
+#endif /* __MLX5_ACCEL_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 4d5bd01f1ebb..f5a2c605749f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -307,6 +307,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT:
+ case MLX5_CMD_OP_FPGA_DESTROY_QP:
return MLX5_CMD_STAT_OK;
case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -419,6 +420,10 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+ case MLX5_CMD_OP_FPGA_CREATE_QP:
+ case MLX5_CMD_OP_FPGA_MODIFY_QP:
+ case MLX5_CMD_OP_FPGA_QUERY_QP:
+ case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS:
*status = MLX5_DRIVER_STATUS_ABORTED;
*synd = MLX5_DRIVER_SYND;
return -EIO;
@@ -585,6 +590,11 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER);
MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT);
MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP);
+ MLX5_COMMAND_STR_CASE(FPGA_MODIFY_QP);
+ MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP);
+ MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP_COUNTERS);
+ MLX5_COMMAND_STR_CASE(FPGA_DESTROY_QP);
default: return "unknown command opcode";
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index eef0a50e2388..e1b7ddfecd01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -328,6 +328,7 @@ struct mlx5e_sq_dma {
enum {
MLX5E_SQ_STATE_ENABLED,
+ MLX5E_SQ_STATE_IPSEC,
};
struct mlx5e_sq_wqe_info {
@@ -784,6 +785,9 @@ struct mlx5e_priv {
const struct mlx5e_profile *profile;
void *ppriv;
+#ifdef CONFIG_MLX5_EN_IPSEC
+ struct mlx5e_ipsec *ipsec;
+#endif
};
struct mlx5e_profile {
@@ -833,7 +837,6 @@ void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq);
void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi);
-struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq);
void mlx5e_rx_am(struct mlx5e_rq *rq);
void mlx5e_rx_am_work(struct work_struct *work);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
new file mode 100644
index 000000000000..bac5103efad3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -0,0 +1,461 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <crypto/internal/geniv.h>
+#include <crypto/aead.h>
+#include <linux/inetdevice.h>
+#include <linux/netdevice.h>
+#include <linux/module.h>
+
+#include "en.h"
+#include "accel/ipsec.h"
+#include "en_accel/ipsec.h"
+#include "en_accel/ipsec_rxtx.h"
+
+struct mlx5e_ipsec_sa_entry {
+ struct hlist_node hlist; /* Item in SADB_RX hashtable */
+ unsigned int handle; /* Handle in SADB_RX */
+ struct xfrm_state *x;
+ struct mlx5e_ipsec *ipsec;
+ void *context;
+};
+
+struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec,
+ unsigned int handle)
+{
+ struct mlx5e_ipsec_sa_entry *sa_entry;
+ struct xfrm_state *ret = NULL;
+
+ rcu_read_lock();
+ hash_for_each_possible_rcu(ipsec->sadb_rx, sa_entry, hlist, handle)
+ if (sa_entry->handle == handle) {
+ ret = sa_entry->x;
+ xfrm_state_hold(ret);
+ break;
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+ ret = ida_simple_get(&ipsec->halloc, 1, 0, GFP_KERNEL);
+ if (ret < 0)
+ goto out;
+
+ sa_entry->handle = ret;
+ hash_add_rcu(ipsec->sadb_rx, &sa_entry->hlist, sa_entry->handle);
+ ret = 0;
+
+out:
+ spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
+ return ret;
+}
+
+static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+ hash_del_rcu(&sa_entry->hlist);
+ spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
+}
+
+static void mlx5e_ipsec_sadb_rx_free(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+ unsigned long flags;
+
+ /* Wait for the hash_del_rcu call in sadb_rx_del to affect data path */
+ synchronize_rcu();
+ spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+ ida_simple_remove(&ipsec->halloc, sa_entry->handle);
+ spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
+}
+
+static enum mlx5_accel_ipsec_enc_mode mlx5e_ipsec_enc_mode(struct xfrm_state *x)
+{
+ unsigned int key_len = (x->aead->alg_key_len + 7) / 8 - 4;
+
+ switch (key_len) {
+ case 16:
+ return MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128;
+ case 32:
+ return MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128;
+ default:
+ netdev_warn(x->xso.dev, "Bad key len: %d for alg %s\n",
+ key_len, x->aead->alg_name);
+ return -1;
+ }
+}
+
+static void mlx5e_ipsec_build_hw_sa(u32 op, struct mlx5e_ipsec_sa_entry *sa_entry,
+ struct mlx5_accel_ipsec_sa *hw_sa)
+{
+ struct xfrm_state *x = sa_entry->x;
+ struct aead_geniv_ctx *geniv_ctx;
+ unsigned int crypto_data_len;
+ struct crypto_aead *aead;
+ unsigned int key_len;
+ int ivsize;
+
+ memset(hw_sa, 0, sizeof(*hw_sa));
+
+ if (op == MLX5_IPSEC_CMD_ADD_SA) {
+ crypto_data_len = (x->aead->alg_key_len + 7) / 8;
+ key_len = crypto_data_len - 4; /* 4 bytes salt at end */
+ aead = x->data;
+ geniv_ctx = crypto_aead_ctx(aead);
+ ivsize = crypto_aead_ivsize(aead);
+
+ memcpy(&hw_sa->key_enc, x->aead->alg_key, key_len);
+ /* Duplicate 128 bit key twice according to HW layout */
+ if (key_len == 16)
+ memcpy(&hw_sa->key_enc[16], x->aead->alg_key, key_len);
+ memcpy(&hw_sa->gcm.salt_iv, geniv_ctx->salt, ivsize);
+ hw_sa->gcm.salt = *((__be32 *)(x->aead->alg_key + key_len));
+ }
+
+ hw_sa->cmd = htonl(op);
+ hw_sa->flags |= MLX5_IPSEC_SADB_SA_VALID | MLX5_IPSEC_SADB_SPI_EN;
+ if (x->props.family == AF_INET) {
+ hw_sa->sip[3] = x->props.saddr.a4;
+ hw_sa->dip[3] = x->id.daddr.a4;
+ hw_sa->sip_masklen = 32;
+ hw_sa->dip_masklen = 32;
+ } else {
+ memcpy(hw_sa->sip, x->props.saddr.a6, sizeof(hw_sa->sip));
+ memcpy(hw_sa->dip, x->id.daddr.a6, sizeof(hw_sa->dip));
+ hw_sa->sip_masklen = 128;
+ hw_sa->dip_masklen = 128;
+ hw_sa->flags |= MLX5_IPSEC_SADB_IPV6;
+ }
+ hw_sa->spi = x->id.spi;
+ hw_sa->sw_sa_handle = htonl(sa_entry->handle);
+ switch (x->id.proto) {
+ case IPPROTO_ESP:
+ hw_sa->flags |= MLX5_IPSEC_SADB_IP_ESP;
+ break;
+ case IPPROTO_AH:
+ hw_sa->flags |= MLX5_IPSEC_SADB_IP_AH;
+ break;
+ default:
+ break;
+ }
+ hw_sa->enc_mode = mlx5e_ipsec_enc_mode(x);
+ if (!(x->xso.flags & XFRM_OFFLOAD_INBOUND))
+ hw_sa->flags |= MLX5_IPSEC_SADB_DIR_SX;
+}
+
+static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
+{
+ struct net_device *netdev = x->xso.dev;
+ struct mlx5e_priv *priv;
+
+ priv = netdev_priv(netdev);
+
+ if (x->props.aalgo != SADB_AALG_NONE) {
+ netdev_info(netdev, "Cannot offload authenticated xfrm states\n");
+ return -EINVAL;
+ }
+ if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) {
+ netdev_info(netdev, "Only AES-GCM-ICV16 xfrm state may be offloaded\n");
+ return -EINVAL;
+ }
+ if (x->props.calgo != SADB_X_CALG_NONE) {
+ netdev_info(netdev, "Cannot offload compressed xfrm states\n");
+ return -EINVAL;
+ }
+ if (x->props.flags & XFRM_STATE_ESN) {
+ netdev_info(netdev, "Cannot offload ESN xfrm states\n");
+ return -EINVAL;
+ }
+ if (x->props.family != AF_INET &&
+ x->props.family != AF_INET6) {
+ netdev_info(netdev, "Only IPv4/6 xfrm states may be offloaded\n");
+ return -EINVAL;
+ }
+ if (x->props.mode != XFRM_MODE_TRANSPORT &&
+ x->props.mode != XFRM_MODE_TUNNEL) {
+ dev_info(&netdev->dev, "Only transport and tunnel xfrm states may be offloaded\n");
+ return -EINVAL;
+ }
+ if (x->id.proto != IPPROTO_ESP) {
+ netdev_info(netdev, "Only ESP xfrm state may be offloaded\n");
+ return -EINVAL;
+ }
+ if (x->encap) {
+ netdev_info(netdev, "Encapsulated xfrm state may not be offloaded\n");
+ return -EINVAL;
+ }
+ if (!x->aead) {
+ netdev_info(netdev, "Cannot offload xfrm states without aead\n");
+ return -EINVAL;
+ }
+ if (x->aead->alg_icv_len != 128) {
+ netdev_info(netdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n");
+ return -EINVAL;
+ }
+ if ((x->aead->alg_key_len != 128 + 32) &&
+ (x->aead->alg_key_len != 256 + 32)) {
+ netdev_info(netdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+ return -EINVAL;
+ }
+ if (x->tfcpad) {
+ netdev_info(netdev, "Cannot offload xfrm states with tfc padding\n");
+ return -EINVAL;
+ }
+ if (!x->geniv) {
+ netdev_info(netdev, "Cannot offload xfrm states without geniv\n");
+ return -EINVAL;
+ }
+ if (strcmp(x->geniv, "seqiv")) {
+ netdev_info(netdev, "Cannot offload xfrm states with geniv other than seqiv\n");
+ return -EINVAL;
+ }
+ if (x->props.family == AF_INET6 &&
+ !(mlx5_accel_ipsec_device_caps(priv->mdev) & MLX5_ACCEL_IPSEC_IPV6)) {
+ netdev_info(netdev, "IPv6 xfrm state offload is not supported by this device\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int mlx5e_xfrm_add_state(struct xfrm_state *x)
+{
+ struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
+ struct net_device *netdev = x->xso.dev;
+ struct mlx5_accel_ipsec_sa hw_sa;
+ struct mlx5e_priv *priv;
+ void *context;
+ int err;
+
+ priv = netdev_priv(netdev);
+
+ err = mlx5e_xfrm_validate_state(x);
+ if (err)
+ return err;
+
+ sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
+ if (!sa_entry) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ sa_entry->x = x;
+ sa_entry->ipsec = priv->ipsec;
+
+ /* Add the SA to handle processed incoming packets before the add SA
+ * completion was received
+ */
+ if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
+ err = mlx5e_ipsec_sadb_rx_add(sa_entry);
+ if (err) {
+ netdev_info(netdev, "Failed adding to SADB_RX: %d\n", err);
+ goto err_entry;
+ }
+ }
+
+ mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_ADD_SA, sa_entry, &hw_sa);
+ context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa);
+ if (IS_ERR(context)) {
+ err = PTR_ERR(context);
+ goto err_sadb_rx;
+ }
+
+ err = mlx5_accel_ipsec_sa_cmd_wait(context);
+ if (err)
+ goto err_sadb_rx;
+
+ x->xso.offload_handle = (unsigned long)sa_entry;
+ goto out;
+
+err_sadb_rx:
+ if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
+ mlx5e_ipsec_sadb_rx_del(sa_entry);
+ mlx5e_ipsec_sadb_rx_free(sa_entry);
+ }
+err_entry:
+ kfree(sa_entry);
+out:
+ return err;
+}
+
+static void mlx5e_xfrm_del_state(struct xfrm_state *x)
+{
+ struct mlx5e_ipsec_sa_entry *sa_entry;
+ struct mlx5_accel_ipsec_sa hw_sa;
+ void *context;
+
+ if (!x->xso.offload_handle)
+ return;
+
+ sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+ WARN_ON(sa_entry->x != x);
+
+ if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
+ mlx5e_ipsec_sadb_rx_del(sa_entry);
+
+ mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_DEL_SA, sa_entry, &hw_sa);
+ context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa);
+ if (IS_ERR(context))
+ return;
+
+ sa_entry->context = context;
+}
+
+static void mlx5e_xfrm_free_state(struct xfrm_state *x)
+{
+ struct mlx5e_ipsec_sa_entry *sa_entry;
+ int res;
+
+ if (!x->xso.offload_handle)
+ return;
+
+ sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+ WARN_ON(sa_entry->x != x);
+
+ res = mlx5_accel_ipsec_sa_cmd_wait(sa_entry->context);
+ sa_entry->context = NULL;
+ if (res) {
+ /* Leftover object will leak */
+ return;
+ }
+
+ if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
+ mlx5e_ipsec_sadb_rx_free(sa_entry);
+
+ kfree(sa_entry);
+}
+
+int mlx5e_ipsec_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_ipsec *ipsec = NULL;
+
+ if (!MLX5_IPSEC_DEV(priv->mdev)) {
+ netdev_dbg(priv->netdev, "Not an IPSec offload device\n");
+ return 0;
+ }
+
+ ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL);
+ if (!ipsec)
+ return -ENOMEM;
+
+ hash_init(ipsec->sadb_rx);
+ spin_lock_init(&ipsec->sadb_rx_lock);
+ ida_init(&ipsec->halloc);
+ ipsec->en_priv = priv;
+ ipsec->en_priv->ipsec = ipsec;
+ netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
+ return 0;
+}
+
+void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
+{
+ struct mlx5e_ipsec *ipsec = priv->ipsec;
+
+ if (!ipsec)
+ return;
+
+ ida_destroy(&ipsec->halloc);
+ kfree(ipsec);
+ priv->ipsec = NULL;
+}
+
+static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
+{
+ if (x->props.family == AF_INET) {
+ /* Offload with IPv4 options is not supported yet */
+ if (ip_hdr(skb)->ihl > 5)
+ return false;
+ } else {
+ /* Offload with IPv6 extension headers is not support yet */
+ if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
+ return false;
+ }
+
+ return true;
+}
+
+static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
+ .xdo_dev_state_add = mlx5e_xfrm_add_state,
+ .xdo_dev_state_delete = mlx5e_xfrm_del_state,
+ .xdo_dev_state_free = mlx5e_xfrm_free_state,
+ .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok,
+};
+
+void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct net_device *netdev = priv->netdev;
+
+ if (!priv->ipsec)
+ return;
+
+ if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_ESP) ||
+ !MLX5_CAP_ETH(mdev, swp)) {
+ mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n");
+ return;
+ }
+
+ mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n");
+ netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops;
+ netdev->features |= NETIF_F_HW_ESP;
+ netdev->hw_enc_features |= NETIF_F_HW_ESP;
+
+ if (!MLX5_CAP_ETH(mdev, swp_csum)) {
+ mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n");
+ return;
+ }
+
+ netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
+ netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
+
+ if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_LSO) ||
+ !MLX5_CAP_ETH(mdev, swp_lso)) {
+ mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
+ return;
+ }
+
+ mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
+ netdev->features |= NETIF_F_GSO_ESP;
+ netdev->hw_features |= NETIF_F_GSO_ESP;
+ netdev->hw_enc_features |= NETIF_F_GSO_ESP;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
new file mode 100644
index 000000000000..56e00baf16cc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_IPSEC_H__
+#define __MLX5E_IPSEC_H__
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+
+#include <linux/mlx5/device.h>
+#include <net/xfrm.h>
+#include <linux/idr.h>
+
+#define MLX5E_IPSEC_SADB_RX_BITS 10
+#define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
+#define MLX5E_METADATA_ETHER_LEN 8
+
+struct mlx5e_priv;
+
+struct mlx5e_ipsec_sw_stats {
+ atomic64_t ipsec_rx_drop_sp_alloc;
+ atomic64_t ipsec_rx_drop_sadb_miss;
+ atomic64_t ipsec_rx_drop_syndrome;
+ atomic64_t ipsec_tx_drop_bundle;
+ atomic64_t ipsec_tx_drop_no_state;
+ atomic64_t ipsec_tx_drop_not_ip;
+ atomic64_t ipsec_tx_drop_trailer;
+ atomic64_t ipsec_tx_drop_metadata;
+};
+
+struct mlx5e_ipsec_stats {
+ u64 ipsec_dec_in_packets;
+ u64 ipsec_dec_out_packets;
+ u64 ipsec_dec_bypass_packets;
+ u64 ipsec_enc_in_packets;
+ u64 ipsec_enc_out_packets;
+ u64 ipsec_enc_bypass_packets;
+ u64 ipsec_dec_drop_packets;
+ u64 ipsec_dec_auth_fail_packets;
+ u64 ipsec_enc_drop_packets;
+ u64 ipsec_add_sa_success;
+ u64 ipsec_add_sa_fail;
+ u64 ipsec_del_sa_success;
+ u64 ipsec_del_sa_fail;
+ u64 ipsec_cmd_drop;
+};
+
+struct mlx5e_ipsec {
+ struct mlx5e_priv *en_priv;
+ DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS);
+ spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */
+ struct ida halloc;
+ struct mlx5e_ipsec_sw_stats sw_stats;
+ struct mlx5e_ipsec_stats stats;
+};
+
+void mlx5e_ipsec_build_inverse_table(void);
+int mlx5e_ipsec_init(struct mlx5e_priv *priv);
+void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv);
+void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv);
+
+int mlx5e_ipsec_get_count(struct mlx5e_priv *priv);
+int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data);
+void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv);
+int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data);
+
+struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *dev,
+ unsigned int handle);
+
+#else
+
+static inline void mlx5e_ipsec_build_inverse_table(void)
+{
+}
+
+static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv)
+{
+ return 0;
+}
+
+static inline void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
+{
+}
+
+static inline void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
+{
+}
+
+static inline int mlx5e_ipsec_get_count(struct mlx5e_priv *priv)
+{
+ return 0;
+}
+
+static inline int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv,
+ uint8_t *data)
+{
+ return 0;
+}
+
+static inline void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv)
+{
+}
+
+static inline int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data)
+{
+ return 0;
+}
+
+#endif
+
+#endif /* __MLX5E_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
new file mode 100644
index 000000000000..4a78aefdf157
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -0,0 +1,378 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <crypto/aead.h>
+#include <net/xfrm.h>
+#include <net/esp.h>
+
+#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/ipsec.h"
+#include "en.h"
+
+enum {
+ MLX5E_IPSEC_RX_SYNDROME_DECRYPTED = 0x11,
+ MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED = 0x12,
+};
+
+struct mlx5e_ipsec_rx_metadata {
+ unsigned char reserved;
+ __be32 sa_handle;
+} __packed;
+
+enum {
+ MLX5E_IPSEC_TX_SYNDROME_OFFLOAD = 0x8,
+ MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP = 0x9,
+};
+
+struct mlx5e_ipsec_tx_metadata {
+ __be16 mss_inv; /* 1/MSS in 16bit fixed point, only for LSO */
+ __be16 seq; /* LSBs of the first TCP seq, only for LSO */
+ u8 esp_next_proto; /* Next protocol of ESP */
+} __packed;
+
+struct mlx5e_ipsec_metadata {
+ unsigned char syndrome;
+ union {
+ unsigned char raw[5];
+ /* from FPGA to host, on successful decrypt */
+ struct mlx5e_ipsec_rx_metadata rx;
+ /* from host to FPGA */
+ struct mlx5e_ipsec_tx_metadata tx;
+ } __packed content;
+ /* packet type ID field */
+ __be16 ethertype;
+} __packed;
+
+#define MAX_LSO_MSS 2048
+
+/* Pre-calculated (Q0.16) fixed-point inverse 1/x function */
+static __be16 mlx5e_ipsec_inverse_table[MAX_LSO_MSS];
+
+static inline __be16 mlx5e_ipsec_mss_inv(struct sk_buff *skb)
+{
+ return mlx5e_ipsec_inverse_table[skb_shinfo(skb)->gso_size];
+}
+
+static struct mlx5e_ipsec_metadata *mlx5e_ipsec_add_metadata(struct sk_buff *skb)
+{
+ struct mlx5e_ipsec_metadata *mdata;
+ struct ethhdr *eth;
+
+ if (unlikely(skb_cow_head(skb, sizeof(*mdata))))
+ return ERR_PTR(-ENOMEM);
+
+ eth = (struct ethhdr *)skb_push(skb, sizeof(*mdata));
+ skb->mac_header -= sizeof(*mdata);
+ mdata = (struct mlx5e_ipsec_metadata *)(eth + 1);
+
+ memmove(skb->data, skb->data + sizeof(*mdata),
+ 2 * ETH_ALEN);
+
+ eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE);
+
+ memset(mdata->content.raw, 0, sizeof(mdata->content.raw));
+ return mdata;
+}
+
+static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x)
+{
+ unsigned int alen = crypto_aead_authsize(x->data);
+ struct ipv6hdr *ipv6hdr = ipv6_hdr(skb);
+ struct iphdr *ipv4hdr = ip_hdr(skb);
+ unsigned int trailer_len;
+ u8 plen;
+ int ret;
+
+ ret = skb_copy_bits(skb, skb->len - alen - 2, &plen, 1);
+ if (unlikely(ret))
+ return ret;
+
+ trailer_len = alen + plen + 2;
+
+ pskb_trim(skb, skb->len - trailer_len);
+ if (skb->protocol == htons(ETH_P_IP)) {
+ ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len);
+ ip_send_check(ipv4hdr);
+ } else {
+ ipv6hdr->payload_len = htons(ntohs(ipv6hdr->payload_len) -
+ trailer_len);
+ }
+ return 0;
+}
+
+static void mlx5e_ipsec_set_swp(struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg, u8 mode,
+ struct xfrm_offload *xo)
+{
+ u8 proto;
+
+ /* Tunnel Mode:
+ * SWP: OutL3 InL3 InL4
+ * Pkt: MAC IP ESP IP L4
+ *
+ * Transport Mode:
+ * SWP: OutL3 InL4
+ * InL3
+ * Pkt: MAC IP ESP L4
+ *
+ * Offsets are in 2-byte words, counting from start of frame
+ */
+ eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
+ if (skb->protocol == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
+
+ if (mode == XFRM_MODE_TUNNEL) {
+ eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+ if (xo->proto == IPPROTO_IPV6) {
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+ proto = inner_ipv6_hdr(skb)->nexthdr;
+ } else {
+ proto = inner_ip_hdr(skb)->protocol;
+ }
+ } else {
+ eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2;
+ if (skb->protocol == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+ proto = xo->proto;
+ }
+ switch (proto) {
+ case IPPROTO_UDP:
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+ /* Fall through */
+ case IPPROTO_TCP:
+ eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+ break;
+ }
+}
+
+static void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_offload *xo)
+{
+ int iv_offset;
+ __be64 seqno;
+
+ /* Place the SN in the IV field */
+ seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
+ iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr);
+ skb_store_bits(skb, iv_offset, &seqno, 8);
+}
+
+static void mlx5e_ipsec_set_metadata(struct sk_buff *skb,
+ struct mlx5e_ipsec_metadata *mdata,
+ struct xfrm_offload *xo)
+{
+ struct ip_esp_hdr *esph;
+ struct tcphdr *tcph;
+
+ if (skb_is_gso(skb)) {
+ /* Add LSO metadata indication */
+ esph = ip_esp_hdr(skb);
+ tcph = inner_tcp_hdr(skb);
+ netdev_dbg(skb->dev, " Offloading GSO packet outer L3 %u; L4 %u; Inner L3 %u; L4 %u\n",
+ skb->network_header,
+ skb->transport_header,
+ skb->inner_network_header,
+ skb->inner_transport_header);
+ netdev_dbg(skb->dev, " Offloading GSO packet of len %u; mss %u; TCP sp %u dp %u seq 0x%x ESP seq 0x%x\n",
+ skb->len, skb_shinfo(skb)->gso_size,
+ ntohs(tcph->source), ntohs(tcph->dest),
+ ntohl(tcph->seq), ntohl(esph->seq_no));
+ mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP;
+ mdata->content.tx.mss_inv = mlx5e_ipsec_mss_inv(skb);
+ mdata->content.tx.seq = htons(ntohl(tcph->seq) & 0xFFFF);
+ } else {
+ mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD;
+ }
+ mdata->content.tx.esp_next_proto = xo->proto;
+
+ netdev_dbg(skb->dev, " TX metadata syndrome %u proto %u mss_inv %04x seq %04x\n",
+ mdata->syndrome, mdata->content.tx.esp_next_proto,
+ ntohs(mdata->content.tx.mss_inv),
+ ntohs(mdata->content.tx.seq));
+}
+
+struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+ struct mlx5e_tx_wqe *wqe,
+ struct sk_buff *skb)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ struct mlx5e_ipsec_metadata *mdata;
+ struct xfrm_state *x;
+
+ if (!xo)
+ return skb;
+
+ if (unlikely(skb->sp->len != 1)) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle);
+ goto drop;
+ }
+
+ x = xfrm_input_state(skb);
+ if (unlikely(!x)) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_no_state);
+ goto drop;
+ }
+
+ if (unlikely(!x->xso.offload_handle ||
+ (skb->protocol != htons(ETH_P_IP) &&
+ skb->protocol != htons(ETH_P_IPV6)))) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_not_ip);
+ goto drop;
+ }
+
+ if (!skb_is_gso(skb))
+ if (unlikely(mlx5e_ipsec_remove_trailer(skb, x))) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_trailer);
+ goto drop;
+ }
+ mdata = mlx5e_ipsec_add_metadata(skb);
+ if (unlikely(IS_ERR(mdata))) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata);
+ goto drop;
+ }
+ mlx5e_ipsec_set_swp(skb, &wqe->eth, x->props.mode, xo);
+ mlx5e_ipsec_set_iv(skb, xo);
+ mlx5e_ipsec_set_metadata(skb, mdata, xo);
+
+ return skb;
+
+drop:
+ kfree_skb(skb);
+ return NULL;
+}
+
+static inline struct xfrm_state *
+mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
+ struct mlx5e_ipsec_metadata *mdata)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct xfrm_offload *xo;
+ struct xfrm_state *xs;
+ u32 sa_handle;
+
+ skb->sp = secpath_dup(skb->sp);
+ if (unlikely(!skb->sp)) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc);
+ return NULL;
+ }
+
+ sa_handle = be32_to_cpu(mdata->content.rx.sa_handle);
+ xs = mlx5e_ipsec_sadb_rx_lookup(priv->ipsec, sa_handle);
+ if (unlikely(!xs)) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sadb_miss);
+ return NULL;
+ }
+
+ skb->sp->xvec[skb->sp->len++] = xs;
+ skb->sp->olen++;
+
+ xo = xfrm_offload(skb);
+ xo->flags = CRYPTO_DONE;
+ switch (mdata->syndrome) {
+ case MLX5E_IPSEC_RX_SYNDROME_DECRYPTED:
+ xo->status = CRYPTO_SUCCESS;
+ break;
+ case MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED:
+ xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED;
+ break;
+ default:
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome);
+ return NULL;
+ }
+ return xs;
+}
+
+struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
+ struct sk_buff *skb)
+{
+ struct mlx5e_ipsec_metadata *mdata;
+ struct ethhdr *old_eth;
+ struct ethhdr *new_eth;
+ struct xfrm_state *xs;
+ __be16 *ethtype;
+
+ /* Detect inline metadata */
+ if (skb->len < ETH_HLEN + MLX5E_METADATA_ETHER_LEN)
+ return skb;
+ ethtype = (__be16 *)(skb->data + ETH_ALEN * 2);
+ if (*ethtype != cpu_to_be16(MLX5E_METADATA_ETHER_TYPE))
+ return skb;
+
+ /* Use the metadata */
+ mdata = (struct mlx5e_ipsec_metadata *)(skb->data + ETH_HLEN);
+ xs = mlx5e_ipsec_build_sp(netdev, skb, mdata);
+ if (unlikely(!xs)) {
+ kfree_skb(skb);
+ return NULL;
+ }
+
+ /* Remove the metadata from the buffer */
+ old_eth = (struct ethhdr *)skb->data;
+ new_eth = (struct ethhdr *)(skb->data + MLX5E_METADATA_ETHER_LEN);
+ memmove(new_eth, old_eth, 2 * ETH_ALEN);
+ /* Ethertype is already in its new place */
+ skb_pull_inline(skb, MLX5E_METADATA_ETHER_LEN);
+
+ return skb;
+}
+
+bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct xfrm_state *x;
+
+ if (skb->sp && skb->sp->len) {
+ x = skb->sp->xvec[0];
+ if (x && x->xso.offload_handle)
+ return true;
+ }
+ return false;
+}
+
+void mlx5e_ipsec_build_inverse_table(void)
+{
+ u16 mss_inv;
+ u32 mss;
+
+ /* Calculate 1/x inverse table for use in GSO data path.
+ * Using this table, we provide the IPSec accelerator with the value of
+ * 1/gso_size so that it can infer the position of each segment inside
+ * the GSO, and increment the ESP sequence number, and generate the IV.
+ * The HW needs this value in Q0.16 fixed-point number format
+ */
+ mlx5e_ipsec_inverse_table[1] = htons(0xFFFF);
+ for (mss = 2; mss < MAX_LSO_MSS; mss++) {
+ mss_inv = ((1ULL << 32) / mss) >> 16;
+ mlx5e_ipsec_inverse_table[mss] = htons(mss_inv);
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
new file mode 100644
index 000000000000..e37ae2598dbb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_IPSEC_RXTX_H__
+#define __MLX5E_IPSEC_RXTX_H__
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+
+#include <linux/skbuff.h>
+#include "en.h"
+
+struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
+ struct sk_buff *skb);
+void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+
+void mlx5e_ipsec_inverse_table_init(void);
+bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
+ netdev_features_t features);
+struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+ struct mlx5e_tx_wqe *wqe,
+ struct sk_buff *skb);
+
+#endif /* CONFIG_MLX5_EN_IPSEC */
+
+#endif /* __MLX5E_IPSEC_RXTX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
new file mode 100644
index 000000000000..6fea59223dc4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/ethtool.h>
+#include <net/sock.h>
+
+#include "en.h"
+#include "accel/ipsec.h"
+#include "fpga/sdk.h"
+#include "en_accel/ipsec.h"
+
+static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_in_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_out_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_bypass_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_in_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_out_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_bypass_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_drop_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_auth_fail_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_drop_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_success) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_fail) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_success) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_fail) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_cmd_drop) },
+};
+
+static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sp_alloc) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sadb_miss) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_syndrome) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_bundle) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_no_state) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_not_ip) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_trailer) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_metadata) },
+};
+
+#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \
+ atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset))
+
+#define NUM_IPSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_hw_stats_desc)
+#define NUM_IPSEC_SW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_sw_stats_desc)
+
+#define NUM_IPSEC_COUNTERS (NUM_IPSEC_HW_COUNTERS + NUM_IPSEC_SW_COUNTERS)
+
+int mlx5e_ipsec_get_count(struct mlx5e_priv *priv)
+{
+ if (!priv->ipsec)
+ return 0;
+
+ return NUM_IPSEC_COUNTERS;
+}
+
+int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data)
+{
+ unsigned int i, idx = 0;
+
+ if (!priv->ipsec)
+ return 0;
+
+ for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ mlx5e_ipsec_hw_stats_desc[i].format);
+
+ for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ mlx5e_ipsec_sw_stats_desc[i].format);
+
+ return NUM_IPSEC_COUNTERS;
+}
+
+void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv)
+{
+ int ret;
+
+ if (!priv->ipsec)
+ return;
+
+ ret = mlx5_accel_ipsec_counters_read(priv->mdev, (u64 *)&priv->ipsec->stats,
+ NUM_IPSEC_HW_COUNTERS);
+ if (ret)
+ memset(&priv->ipsec->stats, 0, sizeof(priv->ipsec->stats));
+}
+
+int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data)
+{
+ int i, idx = 0;
+
+ if (!priv->ipsec)
+ return 0;
+
+ for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_CPU(&priv->ipsec->stats,
+ mlx5e_ipsec_hw_stats_desc, i);
+
+ for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->ipsec->sw_stats,
+ mlx5e_ipsec_sw_stats_desc, i);
+
+ return NUM_IPSEC_COUNTERS;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 16b1e96a7050..917fade5f5d5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -31,6 +31,7 @@
*/
#include "en.h"
+#include "en_accel/ipsec.h"
void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
struct ethtool_drvinfo *drvinfo)
@@ -186,7 +187,8 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
MLX5E_NUM_SQ_STATS(priv) +
MLX5E_NUM_PFC_COUNTERS(priv) +
ARRAY_SIZE(mlx5e_pme_status_desc) +
- ARRAY_SIZE(mlx5e_pme_error_desc);
+ ARRAY_SIZE(mlx5e_pme_error_desc) +
+ mlx5e_ipsec_get_count(priv);
case ETH_SS_PRIV_FLAGS:
return ARRAY_SIZE(mlx5e_priv_flags);
@@ -275,6 +277,9 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
for (i = 0; i < ARRAY_SIZE(mlx5e_pme_error_desc); i++)
strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_error_desc[i].format);
+ /* IPSec counters */
+ idx += mlx5e_ipsec_get_strings(priv, data + idx * ETH_GSTRING_LEN);
+
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
return;
@@ -403,6 +408,9 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv,
data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters,
mlx5e_pme_error_desc, i);
+ /* IPSec counters */
+ idx += mlx5e_ipsec_get_stats(priv, data + idx);
+
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
return;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 9f99f624004f..1eac5003084f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -39,6 +39,9 @@
#include "en.h"
#include "en_tc.h"
#include "en_rep.h"
+#include "en_accel/ipsec.h"
+#include "en_accel/ipsec_rxtx.h"
+#include "accel/ipsec.h"
#include "vxlan.h"
struct mlx5e_rq_param {
@@ -115,7 +118,7 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
{
u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
- !params->xdp_prog ?
+ !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ?
MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
MLX5_WQ_TYPE_LINKED_LIST;
mlx5e_set_rq_type_params(mdev, params, rq_type);
@@ -328,8 +331,10 @@ static void mlx5e_update_pcie_counters(struct mlx5e_priv *priv)
void mlx5e_update_stats(struct mlx5e_priv *priv, bool full)
{
- if (full)
+ if (full) {
mlx5e_update_pcie_counters(priv);
+ mlx5e_ipsec_update_stats(priv);
+ }
mlx5e_update_pport_counters(priv, full);
mlx5e_update_vport_counters(priv);
mlx5e_update_q_counter(priv);
@@ -592,6 +597,13 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe_mpwqe;
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (MLX5_IPSEC_DEV(mdev)) {
+ err = -EINVAL;
+ netdev_err(c->netdev, "MPWQE RQ with IPSec offload not supported\n");
+ goto err_rq_wq_destroy;
+ }
+#endif
if (!rq->handle_rx_cqe) {
err = -EINVAL;
netdev_err(c->netdev, "RX handler of MPWQE RQ is not set, err %d\n", err);
@@ -624,7 +636,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
rq->alloc_wqe = mlx5e_alloc_rx_wqe;
rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
- rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe;
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (c->priv->ipsec)
+ rq->handle_rx_cqe = mlx5e_ipsec_handle_rx_cqe;
+ else
+#endif
+ rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe;
if (!rq->handle_rx_cqe) {
kfree(rq->wqe.frag_info);
err = -EINVAL;
@@ -635,6 +652,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
rq->buff.wqe_sz = params->lro_en ?
params->lro_wqe_sz :
MLX5E_SW2HW_MTU(c->priv, c->netdev->mtu);
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (MLX5_IPSEC_DEV(mdev))
+ rq->buff.wqe_sz += MLX5E_METADATA_ETHER_LEN;
+#endif
rq->wqe.page_reuse = !params->xdp_prog && !params->lro_en;
byte_count = rq->buff.wqe_sz;
@@ -1095,6 +1116,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
sq->uar_map = mdev->mlx5e_res.bfreg.map;
sq->max_inline = params->tx_max_inline;
sq->min_inline_mode = params->tx_min_inline_mode;
+ if (MLX5_IPSEC_DEV(c->priv->mdev))
+ set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
@@ -1914,6 +1937,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
mlx5e_build_sq_param_common(priv, param);
MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+ MLX5_SET(sqc, sqc, allow_swp, !!MLX5_IPSEC_DEV(priv->mdev));
}
static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
@@ -3070,8 +3094,6 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
PPORT_802_3_GET(pstats, a_frame_check_sequence_errors);
stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors);
stats->tx_aborted_errors = PPORT_2863_GET(pstats, if_out_discards);
- stats->tx_carrier_errors =
- PPORT_802_3_GET(pstats, a_symbol_error_during_carrier);
stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors +
stats->rx_frame_errors;
stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors;
@@ -3508,6 +3530,11 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
features = vlan_features_check(skb, features);
features = vxlan_features_check(skb, features);
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (mlx5e_ipsec_feature_check(skb, netdev, features))
+ return features;
+#endif
+
/* Validate if the tunneled packet is being offloaded by HW */
if (skb->encapsulation &&
(features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK))
@@ -3555,6 +3582,12 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
goto unlock;
}
+ if ((netdev->features & NETIF_F_HW_ESP) && prog) {
+ netdev_warn(netdev, "can't set XDP with IPSec offload\n");
+ err = -EINVAL;
+ goto unlock;
+ }
+
was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
/* no need for full reset when exchanging programs */
reset = (!priv->channels.params.xdp_prog || !prog);
@@ -4046,6 +4079,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
if (MLX5_CAP_GEN(mdev, vport_group_manager))
netdev->switchdev_ops = &mlx5e_switchdev_ops;
#endif
+
+ mlx5e_ipsec_build_netdev(priv);
}
static void mlx5e_create_q_counter(struct mlx5e_priv *priv)
@@ -4074,14 +4109,19 @@ static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
void *ppriv)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv);
+ err = mlx5e_ipsec_init(priv);
+ if (err)
+ mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err);
mlx5e_build_nic_netdev(netdev);
mlx5e_vxlan_init(priv);
}
static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
{
+ mlx5e_ipsec_cleanup(priv);
mlx5e_vxlan_cleanup(priv);
if (priv->channels.params.xdp_prog)
@@ -4473,6 +4513,7 @@ static struct mlx5_interface mlx5e_interface = {
void mlx5e_init(void)
{
+ mlx5e_ipsec_build_inverse_table();
mlx5e_build_ptys2ethtool_map();
mlx5_register_interface(&mlx5e_interface);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 5f3c138c948d..325b2c8c1c6d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -41,6 +41,7 @@
#include "eswitch.h"
#include "en_rep.h"
#include "ipoib/ipoib.h"
+#include "en_accel/ipsec_rxtx.h"
static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp)
{
@@ -996,7 +997,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
for (; work_done < budget; work_done++) {
- struct mlx5_cqe64 *cqe = mlx5e_get_cqe(cq);
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_cqe(&cq->wq);
if (!cqe)
break;
@@ -1050,7 +1051,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
u16 wqe_counter;
bool last_wqe;
- cqe = mlx5e_get_cqe(cq);
+ cqe = mlx5_cqwq_get_cqe(&cq->wq);
if (!cqe)
break;
@@ -1183,3 +1184,43 @@ wq_free_wqe:
}
#endif /* CONFIG_MLX5_CORE_IPOIB */
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+
+void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ struct mlx5e_wqe_frag_info *wi;
+ struct mlx5e_rx_wqe *wqe;
+ __be16 wqe_counter_be;
+ struct sk_buff *skb;
+ u16 wqe_counter;
+ u32 cqe_bcnt;
+
+ wqe_counter_be = cqe->wqe_counter;
+ wqe_counter = be16_to_cpu(wqe_counter_be);
+ wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter);
+ wi = &rq->wqe.frag_info[wqe_counter];
+ cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
+
+ skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+ if (unlikely(!skb)) {
+ /* a DROP, save the page-reuse checks */
+ mlx5e_free_rx_wqe(rq, wi);
+ goto wq_ll_pop;
+ }
+ skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb);
+ if (unlikely(!skb)) {
+ mlx5e_free_rx_wqe(rq, wi);
+ goto wq_ll_pop;
+ }
+
+ mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+ napi_gro_receive(rq->cq.napi, skb);
+
+ mlx5e_free_rx_wqe_reuse(rq, wi);
+wq_ll_pop:
+ mlx5_wq_ll_pop(&rq->wq, wqe_counter_be,
+ &wqe->next.next_wqe_index);
+}
+
+#endif /* CONFIG_MLX5_EN_IPSEC */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 0433d69429f3..aaa0f4ebba9a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -34,6 +34,7 @@
#include <linux/if_vlan.h>
#include "en.h"
#include "ipoib/ipoib.h"
+#include "en_accel/ipsec_rxtx.h"
#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS
#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
@@ -299,12 +300,9 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
}
}
-static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb)
+static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_tx_wqe *wqe, u16 pi)
{
- struct mlx5_wq_cyc *wq = &sq->wq;
-
- u16 pi = sq->pc & wq->sz_m1;
- struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi];
struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
@@ -319,8 +317,6 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb)
u16 ds_cnt;
u16 ihs;
- memset(wqe, 0, sizeof(*wqe));
-
mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
if (skb_is_gso(skb)) {
@@ -375,8 +371,21 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_txqsq *sq = priv->txq2sq[skb_get_queue_mapping(skb)];
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi = sq->pc & wq->sz_m1;
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+
+ memset(wqe, 0, sizeof(*wqe));
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (sq->state & BIT(MLX5E_SQ_STATE_IPSEC)) {
+ skb = mlx5e_ipsec_handle_tx_skb(dev, wqe, skb);
+ if (unlikely(!skb))
+ return NETDEV_TX_OK;
+ }
+#endif
- return mlx5e_sq_xmit(sq, skb);
+ return mlx5e_sq_xmit(sq, skb, wqe, pi);
}
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
@@ -409,7 +418,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
u16 wqe_counter;
bool last_wqe;
- cqe = mlx5e_get_cqe(cq);
+ cqe = mlx5_cqwq_get_cqe(&cq->wq);
if (!cqe)
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 5ca6714e3e02..92db28a9ed43 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -32,23 +32,6 @@
#include "en.h"
-struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq)
-{
- struct mlx5_cqwq *wq = &cq->wq;
- u32 ci = mlx5_cqwq_get_ci(wq);
- struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
- u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK;
- u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1;
-
- if (cqe_ownership_bit != sw_ownership_val)
- return NULL;
-
- /* ensure cqe content is read after cqe ownership bit */
- dma_rmb();
-
- return cqe;
-}
-
static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq,
struct mlx5e_icosq *sq,
struct mlx5_cqe64 *cqe,
@@ -89,7 +72,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
return;
- cqe = mlx5e_get_cqe(cq);
+ cqe = mlx5_cqwq_get_cqe(&cq->wq);
if (likely(!cqe))
return;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
index 99cba644b4fc..e37453d838db 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
@@ -33,10 +33,44 @@
#include <linux/etherdevice.h>
#include <linux/mlx5/cmd.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
#include "mlx5_core.h"
#include "fpga/cmd.h"
+#define MLX5_FPGA_ACCESS_REG_SZ (MLX5_ST_SZ_DW(fpga_access_reg) + \
+ MLX5_FPGA_ACCESS_REG_SIZE_MAX)
+
+int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr,
+ void *buf, bool write)
+{
+ u32 in[MLX5_FPGA_ACCESS_REG_SZ] = {0};
+ u32 out[MLX5_FPGA_ACCESS_REG_SZ];
+ int err;
+
+ if (size & 3)
+ return -EINVAL;
+ if (addr & 3)
+ return -EINVAL;
+ if (size > MLX5_FPGA_ACCESS_REG_SIZE_MAX)
+ return -EINVAL;
+
+ MLX5_SET(fpga_access_reg, in, size, size);
+ MLX5_SET64(fpga_access_reg, in, address, addr);
+ if (write)
+ memcpy(MLX5_ADDR_OF(fpga_access_reg, in, data), buf, size);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_FPGA_ACCESS_REG, 0, write);
+ if (err)
+ return err;
+
+ if (!write)
+ memcpy(buf, MLX5_ADDR_OF(fpga_access_reg, out, data), size);
+
+ return 0;
+}
+
int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps)
{
u32 in[MLX5_ST_SZ_DW(fpga_cap)] = {0};
@@ -46,6 +80,49 @@ int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps)
MLX5_REG_FPGA_CAP, 0, 0);
}
+int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_ctrl)];
+
+ MLX5_SET(fpga_ctrl, in, operation, op);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_FPGA_CTRL, 0, true);
+}
+
+int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size)
+{
+ unsigned int cap_size = MLX5_CAP_FPGA(dev, sandbox_extended_caps_len);
+ u64 addr = MLX5_CAP64_FPGA(dev, sandbox_extended_caps_addr);
+ unsigned int read;
+ int ret = 0;
+
+ if (cap_size > size) {
+ mlx5_core_warn(dev, "Not enough buffer %u for FPGA SBU caps %u",
+ size, cap_size);
+ return -EINVAL;
+ }
+
+ while (cap_size > 0) {
+ read = min_t(unsigned int, cap_size,
+ MLX5_FPGA_ACCESS_REG_SIZE_MAX);
+
+ ret = mlx5_fpga_access_reg(dev, read, addr, caps, false);
+ if (ret) {
+ mlx5_core_warn(dev, "Error reading FPGA SBU caps %u bytes at address 0x%llx: %d",
+ read, addr, ret);
+ return ret;
+ }
+
+ cap_size -= read;
+ addr += read;
+ caps += read;
+ }
+
+ return ret;
+}
+
int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query)
{
u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0};
@@ -62,3 +139,100 @@ int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query)
query->oper_image = MLX5_GET(fpga_ctrl, out, flash_select_oper);
return 0;
}
+
+int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc,
+ u32 *fpga_qpn)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_create_qp_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_create_qp_out)];
+ int ret;
+
+ MLX5_SET(fpga_create_qp_in, in, opcode, MLX5_CMD_OP_FPGA_CREATE_QP);
+ memcpy(MLX5_ADDR_OF(fpga_create_qp_in, in, fpga_qpc), fpga_qpc,
+ MLX5_FLD_SZ_BYTES(fpga_create_qp_in, fpga_qpc));
+
+ ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (ret)
+ return ret;
+
+ memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_create_qp_out, out, fpga_qpc),
+ MLX5_FLD_SZ_BYTES(fpga_create_qp_out, fpga_qpc));
+ *fpga_qpn = MLX5_GET(fpga_create_qp_out, out, fpga_qpn);
+ return ret;
+}
+
+int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn,
+ enum mlx5_fpga_qpc_field_select fields,
+ void *fpga_qpc)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_modify_qp_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_modify_qp_out)];
+
+ MLX5_SET(fpga_modify_qp_in, in, opcode, MLX5_CMD_OP_FPGA_MODIFY_QP);
+ MLX5_SET(fpga_modify_qp_in, in, field_select, fields);
+ MLX5_SET(fpga_modify_qp_in, in, fpga_qpn, fpga_qpn);
+ memcpy(MLX5_ADDR_OF(fpga_modify_qp_in, in, fpga_qpc), fpga_qpc,
+ MLX5_FLD_SZ_BYTES(fpga_modify_qp_in, fpga_qpc));
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_fpga_query_qp(struct mlx5_core_dev *dev,
+ u32 fpga_qpn, void *fpga_qpc)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_query_qp_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_query_qp_out)];
+ int ret;
+
+ MLX5_SET(fpga_query_qp_in, in, opcode, MLX5_CMD_OP_FPGA_QUERY_QP);
+ MLX5_SET(fpga_query_qp_in, in, fpga_qpn, fpga_qpn);
+
+ ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (ret)
+ return ret;
+
+ memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_query_qp_out, out, fpga_qpc),
+ MLX5_FLD_SZ_BYTES(fpga_query_qp_out, fpga_qpc));
+ return ret;
+}
+
+int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_destroy_qp_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_destroy_qp_out)];
+
+ MLX5_SET(fpga_destroy_qp_in, in, opcode, MLX5_CMD_OP_FPGA_DESTROY_QP);
+ MLX5_SET(fpga_destroy_qp_in, in, fpga_qpn, fpga_qpn);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn,
+ bool clear, struct mlx5_fpga_qp_counters *data)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_query_qp_counters_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_query_qp_counters_out)];
+ int ret;
+
+ MLX5_SET(fpga_query_qp_counters_in, in, opcode,
+ MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS);
+ MLX5_SET(fpga_query_qp_counters_in, in, clear, clear);
+ MLX5_SET(fpga_query_qp_counters_in, in, fpga_qpn, fpga_qpn);
+
+ ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (ret)
+ return ret;
+
+ data->rx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+ rx_ack_packets);
+ data->rx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+ rx_send_packets);
+ data->tx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+ tx_ack_packets);
+ data->tx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+ tx_send_packets);
+ data->rx_total_drop = MLX5_GET64(fpga_query_qp_counters_out, out,
+ rx_total_drop);
+
+ return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
index a74396a61bc3..94bdfd47c3f0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
@@ -53,7 +53,32 @@ struct mlx5_fpga_query {
enum mlx5_fpga_status status;
};
+enum mlx5_fpga_qpc_field_select {
+ MLX5_FPGA_QPC_STATE = BIT(0),
+};
+
+struct mlx5_fpga_qp_counters {
+ u64 rx_ack_packets;
+ u64 rx_send_packets;
+ u64 tx_ack_packets;
+ u64 tx_send_packets;
+ u64 rx_total_drop;
+};
+
int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps);
int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query);
+int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op);
+int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr,
+ void *buf, bool write);
+int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size);
+
+int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc,
+ u32 *fpga_qpn);
+int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn,
+ enum mlx5_fpga_qpc_field_select fields, void *fpga_qpc);
+int mlx5_fpga_query_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, void *fpga_qpc);
+int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn,
+ bool clear, struct mlx5_fpga_qp_counters *data);
+int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn);
#endif /* __MLX5_FPGA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
new file mode 100644
index 000000000000..c4392f741c5f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -0,0 +1,1042 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <net/addrconf.h>
+#include <linux/etherdevice.h>
+#include <linux/mlx5/vport.h>
+
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+#include "fpga/conn.h"
+
+#define MLX5_FPGA_PKEY 0xFFFF
+#define MLX5_FPGA_PKEY_INDEX 0 /* RoCE PKEY 0xFFFF is always at index 0 */
+#define MLX5_FPGA_RECV_SIZE 2048
+#define MLX5_FPGA_PORT_NUM 1
+#define MLX5_FPGA_CQ_BUDGET 64
+
+static int mlx5_fpga_conn_map_buf(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ struct device *dma_device;
+ int err = 0;
+
+ if (unlikely(!buf->sg[0].data))
+ goto out;
+
+ dma_device = &conn->fdev->mdev->pdev->dev;
+ buf->sg[0].dma_addr = dma_map_single(dma_device, buf->sg[0].data,
+ buf->sg[0].size, buf->dma_dir);
+ err = dma_mapping_error(dma_device, buf->sg[0].dma_addr);
+ if (unlikely(err)) {
+ mlx5_fpga_warn(conn->fdev, "DMA error on sg 0: %d\n", err);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (!buf->sg[1].data)
+ goto out;
+
+ buf->sg[1].dma_addr = dma_map_single(dma_device, buf->sg[1].data,
+ buf->sg[1].size, buf->dma_dir);
+ err = dma_mapping_error(dma_device, buf->sg[1].dma_addr);
+ if (unlikely(err)) {
+ mlx5_fpga_warn(conn->fdev, "DMA error on sg 1: %d\n", err);
+ dma_unmap_single(dma_device, buf->sg[0].dma_addr,
+ buf->sg[0].size, buf->dma_dir);
+ err = -ENOMEM;
+ }
+
+out:
+ return err;
+}
+
+static void mlx5_fpga_conn_unmap_buf(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ struct device *dma_device;
+
+ dma_device = &conn->fdev->mdev->pdev->dev;
+ if (buf->sg[1].data)
+ dma_unmap_single(dma_device, buf->sg[1].dma_addr,
+ buf->sg[1].size, buf->dma_dir);
+
+ if (likely(buf->sg[0].data))
+ dma_unmap_single(dma_device, buf->sg[0].dma_addr,
+ buf->sg[0].size, buf->dma_dir);
+}
+
+static int mlx5_fpga_conn_post_recv(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ struct mlx5_wqe_data_seg *data;
+ unsigned int ix;
+ int err = 0;
+
+ err = mlx5_fpga_conn_map_buf(conn, buf);
+ if (unlikely(err))
+ goto out;
+
+ if (unlikely(conn->qp.rq.pc - conn->qp.rq.cc >= conn->qp.rq.size)) {
+ mlx5_fpga_conn_unmap_buf(conn, buf);
+ return -EBUSY;
+ }
+
+ ix = conn->qp.rq.pc & (conn->qp.rq.size - 1);
+ data = mlx5_wq_cyc_get_wqe(&conn->qp.wq.rq, ix);
+ data->byte_count = cpu_to_be32(buf->sg[0].size);
+ data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key);
+ data->addr = cpu_to_be64(buf->sg[0].dma_addr);
+
+ conn->qp.rq.pc++;
+ conn->qp.rq.bufs[ix] = buf;
+
+ /* Make sure that descriptors are written before doorbell record. */
+ dma_wmb();
+ *conn->qp.wq.rq.db = cpu_to_be32(conn->qp.rq.pc & 0xffff);
+out:
+ return err;
+}
+
+static void mlx5_fpga_conn_notify_hw(struct mlx5_fpga_conn *conn, void *wqe)
+{
+ /* ensure wqe is visible to device before updating doorbell record */
+ dma_wmb();
+ *conn->qp.wq.sq.db = cpu_to_be32(conn->qp.sq.pc);
+ /* Make sure that doorbell record is visible before ringing */
+ wmb();
+ mlx5_write64(wqe, conn->fdev->conn_res.uar->map + MLX5_BF_OFFSET, NULL);
+}
+
+static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ struct mlx5_wqe_ctrl_seg *ctrl;
+ struct mlx5_wqe_data_seg *data;
+ unsigned int ix, sgi;
+ int size = 1;
+
+ ix = conn->qp.sq.pc & (conn->qp.sq.size - 1);
+
+ ctrl = mlx5_wq_cyc_get_wqe(&conn->qp.wq.sq, ix);
+ data = (void *)(ctrl + 1);
+
+ for (sgi = 0; sgi < ARRAY_SIZE(buf->sg); sgi++) {
+ if (!buf->sg[sgi].data)
+ break;
+ data->byte_count = cpu_to_be32(buf->sg[sgi].size);
+ data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key);
+ data->addr = cpu_to_be64(buf->sg[sgi].dma_addr);
+ data++;
+ size++;
+ }
+
+ ctrl->imm = 0;
+ ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+ ctrl->opmod_idx_opcode = cpu_to_be32(((conn->qp.sq.pc & 0xffff) << 8) |
+ MLX5_OPCODE_SEND);
+ ctrl->qpn_ds = cpu_to_be32(size | (conn->qp.mqp.qpn << 8));
+
+ conn->qp.sq.pc++;
+ conn->qp.sq.bufs[ix] = buf;
+ mlx5_fpga_conn_notify_hw(conn, ctrl);
+}
+
+int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ unsigned long flags;
+ int err;
+
+ if (!conn->qp.active)
+ return -ENOTCONN;
+
+ err = mlx5_fpga_conn_map_buf(conn, buf);
+ if (err)
+ return err;
+
+ spin_lock_irqsave(&conn->qp.sq.lock, flags);
+
+ if (conn->qp.sq.pc - conn->qp.sq.cc >= conn->qp.sq.size) {
+ list_add_tail(&buf->list, &conn->qp.sq.backlog);
+ goto out_unlock;
+ }
+
+ mlx5_fpga_conn_post_send(conn, buf);
+
+out_unlock:
+ spin_unlock_irqrestore(&conn->qp.sq.lock, flags);
+ return err;
+}
+
+static int mlx5_fpga_conn_post_recv_buf(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_dma_buf *buf;
+ int err;
+
+ buf = kzalloc(sizeof(*buf) + MLX5_FPGA_RECV_SIZE, 0);
+ if (!buf)
+ return -ENOMEM;
+
+ buf->sg[0].data = (void *)(buf + 1);
+ buf->sg[0].size = MLX5_FPGA_RECV_SIZE;
+ buf->dma_dir = DMA_FROM_DEVICE;
+
+ err = mlx5_fpga_conn_post_recv(conn, buf);
+ if (err)
+ kfree(buf);
+
+ return err;
+}
+
+static int mlx5_fpga_conn_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
+ struct mlx5_core_mkey *mkey)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ void *mkc;
+ u32 *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+
+ MLX5_SET(mkc, mkc, pd, pdn);
+ MLX5_SET(mkc, mkc, length64, 1);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+ err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+static void mlx5_fpga_conn_rq_cqe(struct mlx5_fpga_conn *conn,
+ struct mlx5_cqe64 *cqe, u8 status)
+{
+ struct mlx5_fpga_dma_buf *buf;
+ int ix, err;
+
+ ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.rq.size - 1);
+ buf = conn->qp.rq.bufs[ix];
+ conn->qp.rq.bufs[ix] = NULL;
+ if (!status)
+ buf->sg[0].size = be32_to_cpu(cqe->byte_cnt);
+ conn->qp.rq.cc++;
+
+ if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR)))
+ mlx5_fpga_warn(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n",
+ buf, conn->fpga_qpn, status);
+ else
+ mlx5_fpga_dbg(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n",
+ buf, conn->fpga_qpn, status);
+
+ mlx5_fpga_conn_unmap_buf(conn, buf);
+
+ if (unlikely(status || !conn->qp.active)) {
+ conn->qp.active = false;
+ kfree(buf);
+ return;
+ }
+
+ mlx5_fpga_dbg(conn->fdev, "Message with %u bytes received successfully\n",
+ buf->sg[0].size);
+ conn->recv_cb(conn->cb_arg, buf);
+
+ buf->sg[0].size = MLX5_FPGA_RECV_SIZE;
+ err = mlx5_fpga_conn_post_recv(conn, buf);
+ if (unlikely(err)) {
+ mlx5_fpga_warn(conn->fdev,
+ "Failed to re-post recv buf: %d\n", err);
+ kfree(buf);
+ }
+}
+
+static void mlx5_fpga_conn_sq_cqe(struct mlx5_fpga_conn *conn,
+ struct mlx5_cqe64 *cqe, u8 status)
+{
+ struct mlx5_fpga_dma_buf *buf, *nextbuf;
+ unsigned long flags;
+ int ix;
+
+ spin_lock_irqsave(&conn->qp.sq.lock, flags);
+
+ ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.sq.size - 1);
+ buf = conn->qp.sq.bufs[ix];
+ conn->qp.sq.bufs[ix] = NULL;
+ conn->qp.sq.cc++;
+
+ /* Handle backlog still under the spinlock to ensure message post order */
+ if (unlikely(!list_empty(&conn->qp.sq.backlog))) {
+ if (likely(conn->qp.active)) {
+ nextbuf = list_first_entry(&conn->qp.sq.backlog,
+ struct mlx5_fpga_dma_buf, list);
+ list_del(&nextbuf->list);
+ mlx5_fpga_conn_post_send(conn, nextbuf);
+ }
+ }
+
+ spin_unlock_irqrestore(&conn->qp.sq.lock, flags);
+
+ if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR)))
+ mlx5_fpga_warn(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n",
+ buf, conn->fpga_qpn, status);
+ else
+ mlx5_fpga_dbg(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n",
+ buf, conn->fpga_qpn, status);
+
+ mlx5_fpga_conn_unmap_buf(conn, buf);
+
+ if (likely(buf->complete))
+ buf->complete(conn, conn->fdev, buf, status);
+
+ if (unlikely(status))
+ conn->qp.active = false;
+}
+
+static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn,
+ struct mlx5_cqe64 *cqe)
+{
+ u8 opcode, status = 0;
+
+ opcode = cqe->op_own >> 4;
+
+ switch (opcode) {
+ case MLX5_CQE_REQ_ERR:
+ status = ((struct mlx5_err_cqe *)cqe)->syndrome;
+ /* Fall through */
+ case MLX5_CQE_REQ:
+ mlx5_fpga_conn_sq_cqe(conn, cqe, status);
+ break;
+
+ case MLX5_CQE_RESP_ERR:
+ status = ((struct mlx5_err_cqe *)cqe)->syndrome;
+ /* Fall through */
+ case MLX5_CQE_RESP_SEND:
+ mlx5_fpga_conn_rq_cqe(conn, cqe, status);
+ break;
+ default:
+ mlx5_fpga_warn(conn->fdev, "Unexpected cqe opcode %u\n",
+ opcode);
+ }
+}
+
+static void mlx5_fpga_conn_arm_cq(struct mlx5_fpga_conn *conn)
+{
+ mlx5_cq_arm(&conn->cq.mcq, MLX5_CQ_DB_REQ_NOT,
+ conn->fdev->conn_res.uar->map, conn->cq.wq.cc);
+}
+
+static void mlx5_fpga_conn_cq_event(struct mlx5_core_cq *mcq,
+ enum mlx5_event event)
+{
+ struct mlx5_fpga_conn *conn;
+
+ conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq);
+ mlx5_fpga_warn(conn->fdev, "CQ event %u on CQ #%u\n", event, mcq->cqn);
+}
+
+static void mlx5_fpga_conn_event(struct mlx5_core_qp *mqp, int event)
+{
+ struct mlx5_fpga_conn *conn;
+
+ conn = container_of(mqp, struct mlx5_fpga_conn, qp.mqp);
+ mlx5_fpga_warn(conn->fdev, "QP event %u on QP #%u\n", event, mqp->qpn);
+}
+
+static inline void mlx5_fpga_conn_cqes(struct mlx5_fpga_conn *conn,
+ unsigned int budget)
+{
+ struct mlx5_cqe64 *cqe;
+
+ while (budget) {
+ cqe = mlx5_cqwq_get_cqe(&conn->cq.wq);
+ if (!cqe)
+ break;
+
+ budget--;
+ mlx5_cqwq_pop(&conn->cq.wq);
+ mlx5_fpga_conn_handle_cqe(conn, cqe);
+ mlx5_cqwq_update_db_record(&conn->cq.wq);
+ }
+ if (!budget) {
+ tasklet_schedule(&conn->cq.tasklet);
+ return;
+ }
+
+ mlx5_fpga_dbg(conn->fdev, "Re-arming CQ with cc# %u\n", conn->cq.wq.cc);
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+ mlx5_fpga_conn_arm_cq(conn);
+}
+
+static void mlx5_fpga_conn_cq_tasklet(unsigned long data)
+{
+ struct mlx5_fpga_conn *conn = (void *)data;
+
+ if (unlikely(!conn->qp.active))
+ return;
+ mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET);
+}
+
+static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq)
+{
+ struct mlx5_fpga_conn *conn;
+
+ conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq);
+ if (unlikely(!conn->qp.active))
+ return;
+ mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET);
+}
+
+static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {0};
+ struct mlx5_wq_param wqp;
+ struct mlx5_cqe64 *cqe;
+ int inlen, err, eqn;
+ unsigned int irqn;
+ void *cqc, *in;
+ __be64 *pas;
+ u32 i;
+
+ cq_size = roundup_pow_of_two(cq_size);
+ MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(cq_size));
+
+ wqp.buf_numa_node = mdev->priv.numa_node;
+ wqp.db_numa_node = mdev->priv.numa_node;
+
+ err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &conn->cq.wq,
+ &conn->cq.wq_ctrl);
+ if (err)
+ return err;
+
+ for (i = 0; i < mlx5_cqwq_get_size(&conn->cq.wq); i++) {
+ cqe = mlx5_cqwq_get_wqe(&conn->cq.wq, i);
+ cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
+ }
+
+ inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+ sizeof(u64) * conn->cq.wq_ctrl.frag_buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_cqwq;
+ }
+
+ err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn);
+ if (err)
+ goto err_cqwq;
+
+ cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+ MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size));
+ MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index);
+ MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.frag_buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(cqc, cqc, dbr_addr, conn->cq.wq_ctrl.db.dma);
+
+ pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
+ mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.frag_buf, pas);
+
+ err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen);
+ kvfree(in);
+
+ if (err)
+ goto err_cqwq;
+
+ conn->cq.mcq.cqe_sz = 64;
+ conn->cq.mcq.set_ci_db = conn->cq.wq_ctrl.db.db;
+ conn->cq.mcq.arm_db = conn->cq.wq_ctrl.db.db + 1;
+ *conn->cq.mcq.set_ci_db = 0;
+ *conn->cq.mcq.arm_db = 0;
+ conn->cq.mcq.vector = 0;
+ conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete;
+ conn->cq.mcq.event = mlx5_fpga_conn_cq_event;
+ conn->cq.mcq.irqn = irqn;
+ conn->cq.mcq.uar = fdev->conn_res.uar;
+ tasklet_init(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet,
+ (unsigned long)conn);
+
+ mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn);
+
+ goto out;
+
+err_cqwq:
+ mlx5_cqwq_destroy(&conn->cq.wq_ctrl);
+out:
+ return err;
+}
+
+static void mlx5_fpga_conn_destroy_cq(struct mlx5_fpga_conn *conn)
+{
+ tasklet_disable(&conn->cq.tasklet);
+ tasklet_kill(&conn->cq.tasklet);
+ mlx5_core_destroy_cq(conn->fdev->mdev, &conn->cq.mcq);
+ mlx5_cqwq_destroy(&conn->cq.wq_ctrl);
+}
+
+static int mlx5_fpga_conn_create_wq(struct mlx5_fpga_conn *conn, void *qpc)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ struct mlx5_wq_param wqp;
+
+ wqp.buf_numa_node = mdev->priv.numa_node;
+ wqp.db_numa_node = mdev->priv.numa_node;
+
+ return mlx5_wq_qp_create(mdev, &wqp, qpc, &conn->qp.wq,
+ &conn->qp.wq_ctrl);
+}
+
+static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn,
+ unsigned int tx_size, unsigned int rx_size)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {0};
+ void *in = NULL, *qpc;
+ int err, inlen;
+
+ conn->qp.rq.pc = 0;
+ conn->qp.rq.cc = 0;
+ conn->qp.rq.size = roundup_pow_of_two(rx_size);
+ conn->qp.sq.pc = 0;
+ conn->qp.sq.cc = 0;
+ conn->qp.sq.size = roundup_pow_of_two(tx_size);
+
+ MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
+ MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(conn->qp.rq.size));
+ MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(conn->qp.sq.size));
+ err = mlx5_fpga_conn_create_wq(conn, temp_qpc);
+ if (err)
+ goto out;
+
+ conn->qp.rq.bufs = kvzalloc(sizeof(conn->qp.rq.bufs[0]) *
+ conn->qp.rq.size, GFP_KERNEL);
+ if (!conn->qp.rq.bufs) {
+ err = -ENOMEM;
+ goto err_wq;
+ }
+
+ conn->qp.sq.bufs = kvzalloc(sizeof(conn->qp.sq.bufs[0]) *
+ conn->qp.sq.size, GFP_KERNEL);
+ if (!conn->qp.sq.bufs) {
+ err = -ENOMEM;
+ goto err_rq_bufs;
+ }
+
+ inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
+ MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
+ conn->qp.wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_sq_bufs;
+ }
+
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ MLX5_SET(qpc, qpc, uar_page, fdev->conn_res.uar->index);
+ MLX5_SET(qpc, qpc, log_page_size,
+ conn->qp.wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(qpc, qpc, fre, 1);
+ MLX5_SET(qpc, qpc, rlky, 1);
+ MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, pd, fdev->conn_res.pdn);
+ MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
+ MLX5_SET(qpc, qpc, log_rq_size, ilog2(conn->qp.rq.size));
+ MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
+ MLX5_SET(qpc, qpc, log_sq_size, ilog2(conn->qp.sq.size));
+ MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn);
+ MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn);
+ MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma);
+ if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
+ MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
+
+ mlx5_fill_page_array(&conn->qp.wq_ctrl.buf,
+ (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas));
+
+ err = mlx5_core_create_qp(mdev, &conn->qp.mqp, in, inlen);
+ if (err)
+ goto err_sq_bufs;
+
+ conn->qp.mqp.event = mlx5_fpga_conn_event;
+ mlx5_fpga_dbg(fdev, "Created QP #0x%x\n", conn->qp.mqp.qpn);
+
+ goto out;
+
+err_sq_bufs:
+ kvfree(conn->qp.sq.bufs);
+err_rq_bufs:
+ kvfree(conn->qp.rq.bufs);
+err_wq:
+ mlx5_wq_destroy(&conn->qp.wq_ctrl);
+out:
+ kvfree(in);
+ return err;
+}
+
+static void mlx5_fpga_conn_free_recv_bufs(struct mlx5_fpga_conn *conn)
+{
+ int ix;
+
+ for (ix = 0; ix < conn->qp.rq.size; ix++) {
+ if (!conn->qp.rq.bufs[ix])
+ continue;
+ mlx5_fpga_conn_unmap_buf(conn, conn->qp.rq.bufs[ix]);
+ kfree(conn->qp.rq.bufs[ix]);
+ conn->qp.rq.bufs[ix] = NULL;
+ }
+}
+
+static void mlx5_fpga_conn_flush_send_bufs(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_dma_buf *buf, *temp;
+ int ix;
+
+ for (ix = 0; ix < conn->qp.sq.size; ix++) {
+ buf = conn->qp.sq.bufs[ix];
+ if (!buf)
+ continue;
+ conn->qp.sq.bufs[ix] = NULL;
+ mlx5_fpga_conn_unmap_buf(conn, buf);
+ if (!buf->complete)
+ continue;
+ buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR);
+ }
+ list_for_each_entry_safe(buf, temp, &conn->qp.sq.backlog, list) {
+ mlx5_fpga_conn_unmap_buf(conn, buf);
+ if (!buf->complete)
+ continue;
+ buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR);
+ }
+}
+
+static void mlx5_fpga_conn_destroy_qp(struct mlx5_fpga_conn *conn)
+{
+ mlx5_core_destroy_qp(conn->fdev->mdev, &conn->qp.mqp);
+ mlx5_fpga_conn_free_recv_bufs(conn);
+ mlx5_fpga_conn_flush_send_bufs(conn);
+ kvfree(conn->qp.sq.bufs);
+ kvfree(conn->qp.rq.bufs);
+ mlx5_wq_destroy(&conn->qp.wq_ctrl);
+}
+
+static inline int mlx5_fpga_conn_reset_qp(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_core_dev *mdev = conn->fdev->mdev;
+
+ mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to RST\n", conn->qp.mqp.qpn);
+
+ return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, NULL,
+ &conn->qp.mqp);
+}
+
+static inline int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ u32 *qpc = NULL;
+ int err;
+
+ mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to INIT\n", conn->qp.mqp.qpn);
+
+ qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
+ if (!qpc) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX);
+ MLX5_SET(qpc, qpc, primary_address_path.port, MLX5_FPGA_PORT_NUM);
+ MLX5_SET(qpc, qpc, pd, conn->fdev->conn_res.pdn);
+ MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn);
+ MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn);
+ MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma);
+
+ err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc,
+ &conn->qp.mqp);
+ if (err) {
+ mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
+ goto out;
+ }
+
+out:
+ kfree(qpc);
+ return err;
+}
+
+static inline int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ u32 *qpc = NULL;
+ int err;
+
+ mlx5_fpga_dbg(conn->fdev, "QP RTR\n");
+
+ qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
+ if (!qpc) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_1K_BYTES);
+ MLX5_SET(qpc, qpc, log_msg_max, (u8)MLX5_CAP_GEN(mdev, log_max_msg));
+ MLX5_SET(qpc, qpc, remote_qpn, conn->fpga_qpn);
+ MLX5_SET(qpc, qpc, next_rcv_psn,
+ MLX5_GET(fpga_qpc, conn->fpga_qpc, next_send_psn));
+ MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX);
+ MLX5_SET(qpc, qpc, primary_address_path.port, MLX5_FPGA_PORT_NUM);
+ ether_addr_copy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
+ MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_mac_47_32));
+ MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
+ MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port));
+ MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
+ conn->qp.sgid_index);
+ MLX5_SET(qpc, qpc, primary_address_path.hop_limit, 0);
+ memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
+ MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_ip),
+ MLX5_FLD_SZ_BYTES(qpc, primary_address_path.rgid_rip));
+
+ err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc,
+ &conn->qp.mqp);
+ if (err) {
+ mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
+ goto out;
+ }
+
+out:
+ kfree(qpc);
+ return err;
+}
+
+static inline int mlx5_fpga_conn_rts_qp(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ u32 *qpc = NULL;
+ u32 opt_mask;
+ int err;
+
+ mlx5_fpga_dbg(conn->fdev, "QP RTS\n");
+
+ qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
+ if (!qpc) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(qpc, qpc, log_ack_req_freq, 8);
+ MLX5_SET(qpc, qpc, min_rnr_nak, 0x12);
+ MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x12); /* ~1.07s */
+ MLX5_SET(qpc, qpc, next_send_psn,
+ MLX5_GET(fpga_qpc, conn->fpga_qpc, next_rcv_psn));
+ MLX5_SET(qpc, qpc, retry_count, 7);
+ MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */
+
+ opt_mask = MLX5_QP_OPTPAR_RNR_TIMEOUT;
+ err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, opt_mask, qpc,
+ &conn->qp.mqp);
+ if (err) {
+ mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
+ goto out;
+ }
+
+out:
+ kfree(qpc);
+ return err;
+}
+
+static int mlx5_fpga_conn_connect(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ int err;
+
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_ACTIVE);
+ err = mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn,
+ MLX5_FPGA_QPC_STATE, &conn->fpga_qpc);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to activate FPGA RC QP: %d\n", err);
+ goto out;
+ }
+
+ err = mlx5_fpga_conn_reset_qp(conn);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to change QP state to reset\n");
+ goto err_fpga_qp;
+ }
+
+ err = mlx5_fpga_conn_init_qp(conn);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to modify QP from RESET to INIT\n");
+ goto err_fpga_qp;
+ }
+ conn->qp.active = true;
+
+ while (!mlx5_fpga_conn_post_recv_buf(conn))
+ ;
+
+ err = mlx5_fpga_conn_rtr_qp(conn);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to change QP state from INIT to RTR\n");
+ goto err_recv_bufs;
+ }
+
+ err = mlx5_fpga_conn_rts_qp(conn);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to change QP state from RTR to RTS\n");
+ goto err_recv_bufs;
+ }
+ goto out;
+
+err_recv_bufs:
+ mlx5_fpga_conn_free_recv_bufs(conn);
+err_fpga_qp:
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT);
+ if (mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn,
+ MLX5_FPGA_QPC_STATE, &conn->fpga_qpc))
+ mlx5_fpga_err(fdev, "Failed to revert FPGA QP to INIT\n");
+out:
+ return err;
+}
+
+struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_conn_attr *attr,
+ enum mlx5_ifc_fpga_qp_type qp_type)
+{
+ struct mlx5_fpga_conn *ret, *conn;
+ u8 *remote_mac, *remote_ip;
+ int err;
+
+ if (!attr->recv_cb)
+ return ERR_PTR(-EINVAL);
+
+ conn = kzalloc(sizeof(*conn), GFP_KERNEL);
+ if (!conn)
+ return ERR_PTR(-ENOMEM);
+
+ conn->fdev = fdev;
+ INIT_LIST_HEAD(&conn->qp.sq.backlog);
+
+ spin_lock_init(&conn->qp.sq.lock);
+
+ conn->recv_cb = attr->recv_cb;
+ conn->cb_arg = attr->cb_arg;
+
+ remote_mac = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_mac_47_32);
+ err = mlx5_query_nic_vport_mac_address(fdev->mdev, 0, remote_mac);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to query local MAC: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err;
+ }
+
+ /* Build Modified EUI-64 IPv6 address from the MAC address */
+ remote_ip = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_ip);
+ remote_ip[0] = 0xfe;
+ remote_ip[1] = 0x80;
+ addrconf_addr_eui48(&remote_ip[8], remote_mac);
+
+ err = mlx5_core_reserved_gid_alloc(fdev->mdev, &conn->qp.sgid_index);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to allocate SGID: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err;
+ }
+
+ err = mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index,
+ MLX5_ROCE_VERSION_2,
+ MLX5_ROCE_L3_TYPE_IPV6,
+ remote_ip, remote_mac, true, 0);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to set SGID: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err_rsvd_gid;
+ }
+ mlx5_fpga_dbg(fdev, "Reserved SGID index %u\n", conn->qp.sgid_index);
+
+ /* Allow for one cqe per rx/tx wqe, plus one cqe for the next wqe,
+ * created during processing of the cqe
+ */
+ err = mlx5_fpga_conn_create_cq(conn,
+ (attr->tx_size + attr->rx_size) * 2);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to create CQ: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err_gid;
+ }
+
+ mlx5_fpga_conn_arm_cq(conn);
+
+ err = mlx5_fpga_conn_create_qp(conn, attr->tx_size, attr->rx_size);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to create QP: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err_cq;
+ }
+
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, qp_type, qp_type);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, st, MLX5_FPGA_QPC_ST_RC);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, ether_type, ETH_P_8021Q);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, vid, 0);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, next_rcv_psn, 1);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, next_send_psn, 0);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, pkey, MLX5_FPGA_PKEY);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, remote_qpn, conn->qp.mqp.qpn);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, rnr_retry, 7);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, retry_count, 7);
+
+ err = mlx5_fpga_create_qp(fdev->mdev, &conn->fpga_qpc,
+ &conn->fpga_qpn);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to create FPGA RC QP: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err_qp;
+ }
+
+ err = mlx5_fpga_conn_connect(conn);
+ if (err) {
+ ret = ERR_PTR(err);
+ goto err_conn;
+ }
+
+ mlx5_fpga_dbg(fdev, "FPGA QPN is %u\n", conn->fpga_qpn);
+ ret = conn;
+ goto out;
+
+err_conn:
+ mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn);
+err_qp:
+ mlx5_fpga_conn_destroy_qp(conn);
+err_cq:
+ mlx5_fpga_conn_destroy_cq(conn);
+err_gid:
+ mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, 0, 0, NULL,
+ NULL, false, 0);
+err_rsvd_gid:
+ mlx5_core_reserved_gid_free(fdev->mdev, conn->qp.sgid_index);
+err:
+ kfree(conn);
+out:
+ return ret;
+}
+
+void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ int err = 0;
+
+ conn->qp.active = false;
+ tasklet_disable(&conn->cq.tasklet);
+ synchronize_irq(conn->cq.mcq.irqn);
+
+ mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn);
+ err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, NULL,
+ &conn->qp.mqp);
+ if (err)
+ mlx5_fpga_warn(fdev, "qp_modify 2ERR failed: %d\n", err);
+ mlx5_fpga_conn_destroy_qp(conn);
+ mlx5_fpga_conn_destroy_cq(conn);
+
+ mlx5_core_roce_gid_set(conn->fdev->mdev, conn->qp.sgid_index, 0, 0,
+ NULL, NULL, false, 0);
+ mlx5_core_reserved_gid_free(conn->fdev->mdev, conn->qp.sgid_index);
+ kfree(conn);
+}
+
+int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev)
+{
+ int err;
+
+ err = mlx5_nic_vport_enable_roce(fdev->mdev);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to enable RoCE: %d\n", err);
+ goto out;
+ }
+
+ fdev->conn_res.uar = mlx5_get_uars_page(fdev->mdev);
+ if (IS_ERR(fdev->conn_res.uar)) {
+ err = PTR_ERR(fdev->conn_res.uar);
+ mlx5_fpga_err(fdev, "get_uars_page failed, %d\n", err);
+ goto err_roce;
+ }
+ mlx5_fpga_dbg(fdev, "Allocated UAR index %u\n",
+ fdev->conn_res.uar->index);
+
+ err = mlx5_core_alloc_pd(fdev->mdev, &fdev->conn_res.pdn);
+ if (err) {
+ mlx5_fpga_err(fdev, "alloc pd failed, %d\n", err);
+ goto err_uar;
+ }
+ mlx5_fpga_dbg(fdev, "Allocated PD %u\n", fdev->conn_res.pdn);
+
+ err = mlx5_fpga_conn_create_mkey(fdev->mdev, fdev->conn_res.pdn,
+ &fdev->conn_res.mkey);
+ if (err) {
+ mlx5_fpga_err(fdev, "create mkey failed, %d\n", err);
+ goto err_dealloc_pd;
+ }
+ mlx5_fpga_dbg(fdev, "Created mkey 0x%x\n", fdev->conn_res.mkey.key);
+
+ return 0;
+
+err_dealloc_pd:
+ mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn);
+err_uar:
+ mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar);
+err_roce:
+ mlx5_nic_vport_disable_roce(fdev->mdev);
+out:
+ return err;
+}
+
+void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev)
+{
+ mlx5_core_destroy_mkey(fdev->mdev, &fdev->conn_res.mkey);
+ mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn);
+ mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar);
+ mlx5_nic_vport_disable_roce(fdev->mdev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
new file mode 100644
index 000000000000..44bd9eccc711
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_FPGA_CONN_H__
+#define __MLX5_FPGA_CONN_H__
+
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
+
+#include "fpga/core.h"
+#include "fpga/sdk.h"
+#include "wq.h"
+
+struct mlx5_fpga_conn {
+ struct mlx5_fpga_device *fdev;
+
+ void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf);
+ void *cb_arg;
+
+ /* FPGA QP */
+ u32 fpga_qpc[MLX5_ST_SZ_DW(fpga_qpc)];
+ u32 fpga_qpn;
+
+ /* CQ */
+ struct {
+ struct mlx5_cqwq wq;
+ struct mlx5_frag_wq_ctrl wq_ctrl;
+ struct mlx5_core_cq mcq;
+ struct tasklet_struct tasklet;
+ } cq;
+
+ /* QP */
+ struct {
+ bool active;
+ int sgid_index;
+ struct mlx5_wq_qp wq;
+ struct mlx5_wq_ctrl wq_ctrl;
+ struct mlx5_core_qp mqp;
+ struct {
+ spinlock_t lock; /* Protects all SQ state */
+ unsigned int pc;
+ unsigned int cc;
+ unsigned int size;
+ struct mlx5_fpga_dma_buf **bufs;
+ struct list_head backlog;
+ } sq;
+ struct {
+ unsigned int pc;
+ unsigned int cc;
+ unsigned int size;
+ struct mlx5_fpga_dma_buf **bufs;
+ } rq;
+ } qp;
+};
+
+int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev);
+void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev);
+struct mlx5_fpga_conn *
+mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_conn_attr *attr,
+ enum mlx5_ifc_fpga_qp_type qp_type);
+void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn);
+int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf);
+
+#endif /* __MLX5_FPGA_CONN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
index d88b332e9669..31e5a2627eb8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
@@ -35,7 +35,9 @@
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
+#include "lib/mlx5.h"
#include "fpga/core.h"
+#include "fpga/conn.h"
static const char *const mlx5_fpga_error_strings[] = {
"Null Syndrome",
@@ -100,10 +102,34 @@ static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
return 0;
}
+int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
+{
+ int err;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+
+ err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err);
+ return err;
+ }
+ err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err);
+ return err;
+ }
+ err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err);
+ return err;
+ }
+ return 0;
+}
+
int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
{
struct mlx5_fpga_device *fdev = mdev->fpga;
unsigned long flags;
+ unsigned int max_num_qps;
int err;
if (!fdev)
@@ -123,6 +149,28 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
mlx5_fpga_image_name(fdev->last_oper_image),
MLX5_CAP_FPGA(fdev->mdev, image_version));
+ max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
+ err = mlx5_core_reserve_gids(mdev, max_num_qps);
+ if (err)
+ goto out;
+
+ err = mlx5_fpga_conn_device_init(fdev);
+ if (err)
+ goto err_rsvd_gid;
+
+ if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
+ err = mlx5_fpga_device_brb(fdev);
+ if (err)
+ goto err_conn_init;
+ }
+
+ goto out;
+
+err_conn_init:
+ mlx5_fpga_conn_device_cleanup(fdev);
+
+err_rsvd_gid:
+ mlx5_core_unreserve_gids(mdev, max_num_qps);
out:
spin_lock_irqsave(&fdev->state_lock, flags);
fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS;
@@ -130,7 +178,7 @@ out:
return err;
}
-int mlx5_fpga_device_init(struct mlx5_core_dev *mdev)
+int mlx5_fpga_init(struct mlx5_core_dev *mdev)
{
struct mlx5_fpga_device *fdev = NULL;
@@ -151,9 +199,42 @@ int mlx5_fpga_device_init(struct mlx5_core_dev *mdev)
return 0;
}
-void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev)
+void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ unsigned int max_num_qps;
+ unsigned long flags;
+ int err;
+
+ if (!fdev)
+ return;
+
+ spin_lock_irqsave(&fdev->state_lock, flags);
+ if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) {
+ spin_unlock_irqrestore(&fdev->state_lock, flags);
+ return;
+ }
+ fdev->state = MLX5_FPGA_STATUS_NONE;
+ spin_unlock_irqrestore(&fdev->state_lock, flags);
+
+ if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
+ err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
+ if (err)
+ mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n",
+ err);
+ }
+
+ mlx5_fpga_conn_device_cleanup(fdev);
+ max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
+ mlx5_core_unreserve_gids(mdev, max_num_qps);
+}
+
+void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
{
- kfree(mdev->fpga);
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+
+ mlx5_fpga_device_stop(mdev);
+ kfree(fdev);
mdev->fpga = NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
index c55044d66778..82405ed84725 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -44,6 +44,15 @@ struct mlx5_fpga_device {
enum mlx5_fpga_status state;
enum mlx5_fpga_image last_admin_image;
enum mlx5_fpga_image last_oper_image;
+
+ /* QP Connection resources */
+ struct {
+ u32 pdn;
+ struct mlx5_core_mkey mkey;
+ struct mlx5_uars_page *uar;
+ } conn_res;
+
+ struct mlx5_fpga_ipsec *ipsec;
};
#define mlx5_fpga_dbg(__adev, format, ...) \
@@ -68,19 +77,20 @@ struct mlx5_fpga_device {
#define mlx5_fpga_info(__adev, format, ...) \
dev_info(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__)
-int mlx5_fpga_device_init(struct mlx5_core_dev *mdev);
-void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev);
+int mlx5_fpga_init(struct mlx5_core_dev *mdev);
+void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev);
int mlx5_fpga_device_start(struct mlx5_core_dev *mdev);
+void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev);
void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data);
#else
-static inline int mlx5_fpga_device_init(struct mlx5_core_dev *mdev)
+static inline int mlx5_fpga_init(struct mlx5_core_dev *mdev)
{
return 0;
}
-static inline void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev)
+static inline void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
{
}
@@ -89,6 +99,10 @@ static inline int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
return 0;
}
+static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
+{
+}
+
static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event,
void *data)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
new file mode 100644
index 000000000000..42970e2a05ff
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+#include "fpga/ipsec.h"
+#include "fpga/sdk.h"
+#include "fpga/core.h"
+
+#define SBU_QP_QUEUE_SIZE 8
+
+enum mlx5_ipsec_response_syndrome {
+ MLX5_IPSEC_RESPONSE_SUCCESS = 0,
+ MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1,
+ MLX5_IPSEC_RESPONSE_SADB_ISSUE = 2,
+ MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3,
+};
+
+enum mlx5_fpga_ipsec_sacmd_status {
+ MLX5_FPGA_IPSEC_SACMD_PENDING,
+ MLX5_FPGA_IPSEC_SACMD_SEND_FAIL,
+ MLX5_FPGA_IPSEC_SACMD_COMPLETE,
+};
+
+struct mlx5_ipsec_command_context {
+ struct mlx5_fpga_dma_buf buf;
+ struct mlx5_accel_ipsec_sa sa;
+ enum mlx5_fpga_ipsec_sacmd_status status;
+ int status_code;
+ struct completion complete;
+ struct mlx5_fpga_device *dev;
+ struct list_head list; /* Item in pending_cmds */
+};
+
+struct mlx5_ipsec_sadb_resp {
+ __be32 syndrome;
+ __be32 sw_sa_handle;
+ u8 reserved[24];
+} __packed;
+
+struct mlx5_fpga_ipsec {
+ struct list_head pending_cmds;
+ spinlock_t pending_cmds_lock; /* Protects pending_cmds */
+ u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)];
+ struct mlx5_fpga_conn *conn;
+};
+
+static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
+{
+ if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga))
+ return false;
+
+ if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) !=
+ MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX)
+ return false;
+
+ if (MLX5_CAP_FPGA(mdev, sandbox_product_id) !=
+ MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC)
+ return false;
+
+ return true;
+}
+
+static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_dma_buf *buf,
+ u8 status)
+{
+ struct mlx5_ipsec_command_context *context;
+
+ if (status) {
+ context = container_of(buf, struct mlx5_ipsec_command_context,
+ buf);
+ mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n",
+ status);
+ context->status = MLX5_FPGA_IPSEC_SACMD_SEND_FAIL;
+ complete(&context->complete);
+ }
+}
+
+static inline int syndrome_to_errno(enum mlx5_ipsec_response_syndrome syndrome)
+{
+ switch (syndrome) {
+ case MLX5_IPSEC_RESPONSE_SUCCESS:
+ return 0;
+ case MLX5_IPSEC_RESPONSE_SADB_ISSUE:
+ return -EEXIST;
+ case MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST:
+ return -EINVAL;
+ case MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE:
+ return -EIO;
+ }
+ return -EIO;
+}
+
+static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
+{
+ struct mlx5_ipsec_sadb_resp *resp = buf->sg[0].data;
+ struct mlx5_ipsec_command_context *context;
+ enum mlx5_ipsec_response_syndrome syndrome;
+ struct mlx5_fpga_device *fdev = cb_arg;
+ unsigned long flags;
+
+ if (buf->sg[0].size < sizeof(*resp)) {
+ mlx5_fpga_warn(fdev, "Short receive from FPGA IPSec: %u < %zu bytes\n",
+ buf->sg[0].size, sizeof(*resp));
+ return;
+ }
+
+ mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x sa_id %x\n",
+ ntohl(resp->syndrome), ntohl(resp->sw_sa_handle));
+
+ spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
+ context = list_first_entry_or_null(&fdev->ipsec->pending_cmds,
+ struct mlx5_ipsec_command_context,
+ list);
+ if (context)
+ list_del(&context->list);
+ spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
+
+ if (!context) {
+ mlx5_fpga_warn(fdev, "Received IPSec offload response without pending command request\n");
+ return;
+ }
+ mlx5_fpga_dbg(fdev, "Handling response for %p\n", context);
+
+ if (context->sa.sw_sa_handle != resp->sw_sa_handle) {
+ mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n",
+ ntohl(context->sa.sw_sa_handle),
+ ntohl(resp->sw_sa_handle));
+ return;
+ }
+
+ syndrome = ntohl(resp->syndrome);
+ context->status_code = syndrome_to_errno(syndrome);
+ context->status = MLX5_FPGA_IPSEC_SACMD_COMPLETE;
+
+ if (context->status_code)
+ mlx5_fpga_warn(fdev, "IPSec SADB command failed with syndrome %08x\n",
+ syndrome);
+ complete(&context->complete);
+}
+
+void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_ipsec_sa *cmd)
+{
+ struct mlx5_ipsec_command_context *context;
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ unsigned long flags;
+ int res = 0;
+
+ BUILD_BUG_ON((sizeof(struct mlx5_accel_ipsec_sa) & 3) != 0);
+ if (!fdev || !fdev->ipsec)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ context = kzalloc(sizeof(*context), GFP_ATOMIC);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(&context->sa, cmd, sizeof(*cmd));
+ context->buf.complete = mlx5_fpga_ipsec_send_complete;
+ context->buf.sg[0].size = sizeof(context->sa);
+ context->buf.sg[0].data = &context->sa;
+ init_completion(&context->complete);
+ context->dev = fdev;
+ spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
+ list_add_tail(&context->list, &fdev->ipsec->pending_cmds);
+ spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
+
+ context->status = MLX5_FPGA_IPSEC_SACMD_PENDING;
+
+ res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf);
+ if (res) {
+ mlx5_fpga_warn(fdev, "Failure sending IPSec command: %d\n",
+ res);
+ spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
+ list_del(&context->list);
+ spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
+ kfree(context);
+ return ERR_PTR(res);
+ }
+ /* Context will be freed by wait func after completion */
+ return context;
+}
+
+int mlx5_fpga_ipsec_sa_cmd_wait(void *ctx)
+{
+ struct mlx5_ipsec_command_context *context = ctx;
+ int res;
+
+ res = wait_for_completion_killable(&context->complete);
+ if (res) {
+ mlx5_fpga_warn(context->dev, "Failure waiting for IPSec command response\n");
+ return -EINTR;
+ }
+
+ if (context->status == MLX5_FPGA_IPSEC_SACMD_COMPLETE)
+ res = context->status_code;
+ else
+ res = -EIO;
+
+ kfree(context);
+ return res;
+}
+
+u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ u32 ret = 0;
+
+ if (mlx5_fpga_is_ipsec_device(mdev))
+ ret |= MLX5_ACCEL_IPSEC_DEVICE;
+ else
+ return ret;
+
+ if (!fdev->ipsec)
+ return ret;
+
+ if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esp))
+ ret |= MLX5_ACCEL_IPSEC_ESP;
+
+ if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, ipv6))
+ ret |= MLX5_ACCEL_IPSEC_IPV6;
+
+ if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso))
+ ret |= MLX5_ACCEL_IPSEC_LSO;
+
+ return ret;
+}
+
+unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+
+ if (!fdev || !fdev->ipsec)
+ return 0;
+
+ return MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
+ number_of_ipsec_counters);
+}
+
+int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+ unsigned int counters_count)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ unsigned int i;
+ u32 *data;
+ u32 count;
+ u64 addr;
+ int ret;
+
+ if (!fdev || !fdev->ipsec)
+ return 0;
+
+ addr = (u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
+ ipsec_counters_addr_low) +
+ ((u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
+ ipsec_counters_addr_high) << 32);
+
+ count = mlx5_fpga_ipsec_counters_count(mdev);
+
+ data = kzalloc(sizeof(u32) * count * 2, GFP_KERNEL);
+ if (!data) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = mlx5_fpga_mem_read(fdev, count * sizeof(u64), addr, data,
+ MLX5_FPGA_ACCESS_TYPE_DONTCARE);
+ if (ret < 0) {
+ mlx5_fpga_err(fdev, "Failed to read IPSec counters from HW: %d\n",
+ ret);
+ goto out;
+ }
+ ret = 0;
+
+ if (count > counters_count)
+ count = counters_count;
+
+ /* Each counter is low word, then high. But each word is big-endian */
+ for (i = 0; i < count; i++)
+ counters[i] = (u64)ntohl(data[i * 2]) |
+ ((u64)ntohl(data[i * 2 + 1]) << 32);
+
+out:
+ kfree(data);
+ return ret;
+}
+
+int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_conn_attr init_attr = {0};
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ struct mlx5_fpga_conn *conn;
+ int err;
+
+ if (!mlx5_fpga_is_ipsec_device(mdev))
+ return 0;
+
+ fdev->ipsec = kzalloc(sizeof(*fdev->ipsec), GFP_KERNEL);
+ if (!fdev->ipsec)
+ return -ENOMEM;
+
+ err = mlx5_fpga_get_sbu_caps(fdev, sizeof(fdev->ipsec->caps),
+ fdev->ipsec->caps);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to retrieve IPSec extended capabilities: %d\n",
+ err);
+ goto error;
+ }
+
+ INIT_LIST_HEAD(&fdev->ipsec->pending_cmds);
+ spin_lock_init(&fdev->ipsec->pending_cmds_lock);
+
+ init_attr.rx_size = SBU_QP_QUEUE_SIZE;
+ init_attr.tx_size = SBU_QP_QUEUE_SIZE;
+ init_attr.recv_cb = mlx5_fpga_ipsec_recv;
+ init_attr.cb_arg = fdev;
+ conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr);
+ if (IS_ERR(conn)) {
+ err = PTR_ERR(conn);
+ mlx5_fpga_err(fdev, "Error creating IPSec command connection %d\n",
+ err);
+ goto error;
+ }
+ fdev->ipsec->conn = conn;
+ return 0;
+
+error:
+ kfree(fdev->ipsec);
+ fdev->ipsec = NULL;
+ return err;
+}
+
+void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+
+ if (!mlx5_fpga_is_ipsec_device(mdev))
+ return;
+
+ mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn);
+ kfree(fdev->ipsec);
+ fdev->ipsec = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
new file mode 100644
index 000000000000..26a3e4b56972
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_FPGA_IPSEC_H__
+#define __MLX5_FPGA_IPSEC_H__
+
+#include "accel/ipsec.h"
+
+#ifdef CONFIG_MLX5_FPGA
+
+void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_ipsec_sa *cmd);
+int mlx5_fpga_ipsec_sa_cmd_wait(void *context);
+
+u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
+unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev);
+int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+ unsigned int counters_count);
+
+int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev);
+void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev);
+
+#else
+
+static inline void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_ipsec_sa *cmd)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline int mlx5_fpga_ipsec_sa_cmd_wait(void *context)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
+
+static inline unsigned int
+mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
+
+static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev,
+ u64 *counters)
+{
+ return 0;
+}
+
+static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
+
+static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+}
+
+#endif /* CONFIG_MLX5_FPGA */
+
+#endif /* __MLX5_FPGA_SADB_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
new file mode 100644
index 000000000000..3c11d6e2160a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+
+#include "fpga/core.h"
+#include "fpga/conn.h"
+#include "fpga/sdk.h"
+
+struct mlx5_fpga_conn *
+mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_conn_attr *attr)
+{
+ return mlx5_fpga_conn_create(fdev, attr, MLX5_FPGA_QPC_QP_TYPE_SANDBOX_QP);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_create);
+
+void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn)
+{
+ mlx5_fpga_conn_destroy(conn);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_destroy);
+
+int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ return mlx5_fpga_conn_send(conn, buf);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_sendmsg);
+
+static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size,
+ u64 addr, u8 *buf)
+{
+ size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX;
+ size_t bytes_done = 0;
+ u8 actual_size;
+ int err;
+
+ if (!fdev->mdev)
+ return -ENOTCONN;
+
+ while (bytes_done < size) {
+ actual_size = min(max_size, (size - bytes_done));
+
+ err = mlx5_fpga_access_reg(fdev->mdev, actual_size,
+ addr + bytes_done,
+ buf + bytes_done, false);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to read over I2C: %d\n",
+ err);
+ break;
+ }
+
+ bytes_done += actual_size;
+ }
+
+ return err;
+}
+
+static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size,
+ u64 addr, u8 *buf)
+{
+ size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX;
+ size_t bytes_done = 0;
+ u8 actual_size;
+ int err;
+
+ if (!fdev->mdev)
+ return -ENOTCONN;
+
+ while (bytes_done < size) {
+ actual_size = min(max_size, (size - bytes_done));
+
+ err = mlx5_fpga_access_reg(fdev->mdev, actual_size,
+ addr + bytes_done,
+ buf + bytes_done, true);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to write FPGA crspace\n");
+ break;
+ }
+
+ bytes_done += actual_size;
+ }
+
+ return err;
+}
+
+int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+ void *buf, enum mlx5_fpga_access_type access_type)
+{
+ int ret;
+
+ switch (access_type) {
+ case MLX5_FPGA_ACCESS_TYPE_I2C:
+ ret = mlx5_fpga_mem_read_i2c(fdev, size, addr, buf);
+ if (ret)
+ return ret;
+ break;
+ default:
+ mlx5_fpga_warn(fdev, "Unexpected read access_type %u\n",
+ access_type);
+ return -EACCES;
+ }
+
+ return size;
+}
+EXPORT_SYMBOL(mlx5_fpga_mem_read);
+
+int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+ void *buf, enum mlx5_fpga_access_type access_type)
+{
+ int ret;
+
+ switch (access_type) {
+ case MLX5_FPGA_ACCESS_TYPE_I2C:
+ ret = mlx5_fpga_mem_write_i2c(fdev, size, addr, buf);
+ if (ret)
+ return ret;
+ break;
+ default:
+ mlx5_fpga_warn(fdev, "Unexpected write access_type %u\n",
+ access_type);
+ return -EACCES;
+ }
+
+ return size;
+}
+EXPORT_SYMBOL(mlx5_fpga_mem_write);
+
+int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf)
+{
+ return mlx5_fpga_sbu_caps(fdev->mdev, buf, size);
+}
+EXPORT_SYMBOL(mlx5_fpga_get_sbu_caps);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
new file mode 100644
index 000000000000..baa537e54a49
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef MLX5_FPGA_SDK_H
+#define MLX5_FPGA_SDK_H
+
+#include <linux/types.h>
+#include <linux/dma-direction.h>
+
+/**
+ * DOC: Innova SDK
+ * This header defines the in-kernel API for Innova FPGA client drivers.
+ */
+
+enum mlx5_fpga_access_type {
+ MLX5_FPGA_ACCESS_TYPE_I2C = 0x0,
+ MLX5_FPGA_ACCESS_TYPE_DONTCARE = 0x0,
+};
+
+struct mlx5_fpga_conn;
+struct mlx5_fpga_device;
+
+/**
+ * struct mlx5_fpga_dma_entry - A scatter-gather DMA entry
+ */
+struct mlx5_fpga_dma_entry {
+ /** @data: Virtual address pointer to the data */
+ void *data;
+ /** @size: Size in bytes of the data */
+ unsigned int size;
+ /** @dma_addr: Private member. Physical DMA-mapped address of the data */
+ dma_addr_t dma_addr;
+};
+
+/**
+ * struct mlx5_fpga_dma_buf - A packet buffer
+ * May contain up to 2 scatter-gather data entries
+ */
+struct mlx5_fpga_dma_buf {
+ /** @dma_dir: DMA direction */
+ enum dma_data_direction dma_dir;
+ /** @sg: Scatter-gather entries pointing to the data in memory */
+ struct mlx5_fpga_dma_entry sg[2];
+ /** @list: Item in SQ backlog, for TX packets */
+ struct list_head list;
+ /**
+ * @complete: Completion routine, for TX packets
+ * @conn: FPGA Connection this packet was sent to
+ * @fdev: FPGA device this packet was sent to
+ * @buf: The packet buffer
+ * @status: 0 if successful, or an error code otherwise
+ */
+ void (*complete)(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_dma_buf *buf, u8 status);
+};
+
+/**
+ * struct mlx5_fpga_conn_attr - FPGA connection attributes
+ * Describes the attributes of a connection
+ */
+struct mlx5_fpga_conn_attr {
+ /** @tx_size: Size of connection TX queue, in packets */
+ unsigned int tx_size;
+ /** @rx_size: Size of connection RX queue, in packets */
+ unsigned int rx_size;
+ /**
+ * @recv_cb: Callback function which is called for received packets
+ * @cb_arg: The value provided in mlx5_fpga_conn_attr.cb_arg
+ * @buf: A buffer containing a received packet
+ *
+ * buf is guaranteed to only contain a single scatter-gather entry.
+ * The size of the actual packet received is specified in buf.sg[0].size
+ * When this callback returns, the packet buffer may be re-used for
+ * subsequent receives.
+ */
+ void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf);
+ void *cb_arg;
+};
+
+/**
+ * mlx5_fpga_sbu_conn_create() - Initialize a new FPGA SBU connection
+ * @fdev: The FPGA device
+ * @attr: Attributes of the new connection
+ *
+ * Sets up a new FPGA SBU connection with the specified attributes.
+ * The receive callback function may be called for incoming messages even
+ * before this function returns.
+ *
+ * The caller must eventually destroy the connection by calling
+ * mlx5_fpga_sbu_conn_destroy.
+ *
+ * Return: A new connection, or ERR_PTR() error value otherwise.
+ */
+struct mlx5_fpga_conn *
+mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_conn_attr *attr);
+
+/**
+ * mlx5_fpga_sbu_conn_destroy() - Destroy an FPGA SBU connection
+ * @conn: The FPGA SBU connection to destroy
+ *
+ * Cleans up an FPGA SBU connection which was previously created with
+ * mlx5_fpga_sbu_conn_create.
+ */
+void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn);
+
+/**
+ * mlx5_fpga_sbu_conn_sendmsg() - Queue the transmission of a packet
+ * @fdev: An FPGA SBU connection
+ * @buf: The packet buffer
+ *
+ * Queues a packet for transmission over an FPGA SBU connection.
+ * The buffer should not be modified or freed until completion.
+ * Upon completion, the buf's complete() callback is invoked, indicating the
+ * success or error status of the transmission.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf);
+
+/**
+ * mlx5_fpga_mem_read() - Read from FPGA memory address space
+ * @fdev: The FPGA device
+ * @size: Size of chunk to read, in bytes
+ * @addr: Starting address to read from, in FPGA address space
+ * @buf: Buffer to read into
+ * @access_type: Method for reading
+ *
+ * Reads from the specified address into the specified buffer.
+ * The address may point to configuration space or to DDR.
+ * Large reads may be performed internally as several non-atomic operations.
+ * This function may sleep, so should not be called from atomic contexts.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+ void *buf, enum mlx5_fpga_access_type access_type);
+
+/**
+ * mlx5_fpga_mem_write() - Write to FPGA memory address space
+ * @fdev: The FPGA device
+ * @size: Size of chunk to write, in bytes
+ * @addr: Starting address to write to, in FPGA address space
+ * @buf: Buffer which contains data to write
+ * @access_type: Method for writing
+ *
+ * Writes the specified buffer data to FPGA memory at the specified address.
+ * The address may point to configuration space or to DDR.
+ * Large writes may be performed internally as several non-atomic operations.
+ * This function may sleep, so should not be called from atomic contexts.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+ void *buf, enum mlx5_fpga_access_type access_type);
+
+/**
+ * mlx5_fpga_get_sbu_caps() - Read the SBU capabilities
+ * @fdev: The FPGA device
+ * @size: Size of the buffer to read into
+ * @buf: Buffer to read the capabilities into
+ *
+ * Reads the FPGA SBU capabilities into the specified buffer.
+ * The format of the capabilities buffer is SBU-dependent.
+ *
+ * Return: 0 if successful
+ * -EINVAL if the buffer is not large enough to contain SBU caps
+ * or any other error value otherwise.
+ */
+int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf);
+
+#endif /* MLX5_FPGA_SDK_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 0648a659b21d..4b6b03d6297f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -67,6 +67,7 @@ enum {
enum {
MLX5_DROP_NEW_HEALTH_WORK,
+ MLX5_DROP_NEW_RECOVERY_WORK,
};
static u8 get_nic_state(struct mlx5_core_dev *dev)
@@ -194,7 +195,7 @@ static void health_care(struct work_struct *work)
mlx5_handle_bad_state(dev);
spin_lock_irqsave(&health->wq_lock, flags);
- if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
+ if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags))
schedule_delayed_work(&health->recover_work, recover_delay);
else
dev_err(&dev->pdev->dev,
@@ -322,6 +323,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
init_timer(&health->timer);
health->sick = 0;
clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+ clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
health->health = &dev->iseg->health;
health->health_counter = &dev->iseg->health_counter;
@@ -345,11 +347,22 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
spin_lock_irqsave(&health->wq_lock, flags);
set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+ set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
spin_unlock_irqrestore(&health->wq_lock, flags);
cancel_delayed_work_sync(&health->recover_work);
cancel_work_sync(&health->work);
}
+void mlx5_drain_health_recovery(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+
+ spin_lock(&health->wq_lock);
+ set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
+ spin_unlock(&health->wq_lock);
+ cancel_delayed_work_sync(&dev->priv.health.recover_work);
+}
+
void mlx5_health_cleanup(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
new file mode 100644
index 000000000000..de2aed44ab85
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/etherdevice.h>
+#include <linux/idr.h>
+#include "mlx5_core.h"
+
+void mlx5_init_reserved_gids(struct mlx5_core_dev *dev)
+{
+ unsigned int tblsz = MLX5_CAP_ROCE(dev, roce_address_table_size);
+
+ ida_init(&dev->roce.reserved_gids.ida);
+ dev->roce.reserved_gids.start = tblsz;
+ dev->roce.reserved_gids.count = 0;
+}
+
+void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev)
+{
+ WARN_ON(!ida_is_empty(&dev->roce.reserved_gids.ida));
+ dev->roce.reserved_gids.start = 0;
+ dev->roce.reserved_gids.count = 0;
+ ida_destroy(&dev->roce.reserved_gids.ida);
+}
+
+int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count)
+{
+ if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+ mlx5_core_err(dev, "Cannot reserve GIDs when interfaces are up\n");
+ return -EPERM;
+ }
+ if (dev->roce.reserved_gids.start < count) {
+ mlx5_core_warn(dev, "GID table exhausted attempting to reserve %d more GIDs\n",
+ count);
+ return -ENOMEM;
+ }
+ if (dev->roce.reserved_gids.count + count > MLX5_MAX_RESERVED_GIDS) {
+ mlx5_core_warn(dev, "Unable to reserve %d more GIDs\n", count);
+ return -ENOMEM;
+ }
+
+ dev->roce.reserved_gids.start -= count;
+ dev->roce.reserved_gids.count += count;
+ mlx5_core_dbg(dev, "Reserved %u GIDs starting at %u\n",
+ dev->roce.reserved_gids.count,
+ dev->roce.reserved_gids.start);
+ return 0;
+}
+
+void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count)
+{
+ WARN(test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state), "Unreserving GIDs when interfaces are up");
+ WARN(count > dev->roce.reserved_gids.count, "Unreserving %u GIDs when only %u reserved",
+ count, dev->roce.reserved_gids.count);
+
+ dev->roce.reserved_gids.start += count;
+ dev->roce.reserved_gids.count -= count;
+ mlx5_core_dbg(dev, "%u GIDs starting at %u left reserved\n",
+ dev->roce.reserved_gids.count,
+ dev->roce.reserved_gids.start);
+}
+
+int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index)
+{
+ int end = dev->roce.reserved_gids.start +
+ dev->roce.reserved_gids.count;
+ int index = 0;
+
+ index = ida_simple_get(&dev->roce.reserved_gids.ida,
+ dev->roce.reserved_gids.start, end,
+ GFP_KERNEL);
+ if (index < 0)
+ return index;
+
+ mlx5_core_dbg(dev, "Allocating reserved GID %u\n", index);
+ *gid_index = index;
+ return 0;
+}
+
+void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index)
+{
+ mlx5_core_dbg(dev, "Freeing reserved GID %u\n", gid_index);
+ ida_simple_remove(&dev->roce.reserved_gids.ida, gid_index);
+}
+
+unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev)
+{
+ return dev->roce.reserved_gids.count;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_reserved_gids_count);
+
+int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
+ u8 roce_version, u8 roce_l3_type, const u8 *gid,
+ const u8 *mac, bool vlan, u16 vlan_id)
+{
+#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
+ u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
+ void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
+ char *addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, in_addr,
+ source_l3_address);
+ void *addr_mac = MLX5_ADDR_OF(roce_addr_layout, in_addr,
+ source_mac_47_32);
+ int gidsz = MLX5_FLD_SZ_BYTES(roce_addr_layout, source_l3_address);
+
+ if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return -EINVAL;
+
+ if (gid) {
+ if (vlan) {
+ MLX5_SET_RA(in_addr, vlan_valid, 1);
+ MLX5_SET_RA(in_addr, vlan_id, vlan_id);
+ }
+
+ ether_addr_copy(addr_mac, mac);
+ MLX5_SET_RA(in_addr, roce_version, roce_version);
+ MLX5_SET_RA(in_addr, roce_l3_type, roce_l3_type);
+ memcpy(addr_l3_addr, gid, gidsz);
+ }
+
+ MLX5_SET(set_roce_address_in, in, roce_address_index, index);
+ MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_roce_gid_set);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
new file mode 100644
index 000000000000..7550b1cc8c6a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIB_MLX5_H__
+#define __LIB_MLX5_H__
+
+void mlx5_init_reserved_gids(struct mlx5_core_dev *dev);
+void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev);
+int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
+void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
+int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
+void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c7f75e12c13b..c065132b956d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -56,7 +56,9 @@
#ifdef CONFIG_MLX5_CORE_EN
#include "eswitch.h"
#endif
+#include "lib/mlx5.h"
#include "fpga/core.h"
+#include "accel/ipsec.h"
MODULE_AUTHOR("Eli Cohen <[email protected]>");
MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver");
@@ -936,6 +938,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
mlx5_init_mkey_table(dev);
+ mlx5_init_reserved_gids(dev);
+
err = mlx5_init_rl_table(dev);
if (err) {
dev_err(&pdev->dev, "Failed to init rate limiting\n");
@@ -956,8 +960,16 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
goto err_eswitch_cleanup;
}
+ err = mlx5_fpga_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to init fpga device %d\n", err);
+ goto err_sriov_cleanup;
+ }
+
return 0;
+err_sriov_cleanup:
+ mlx5_sriov_cleanup(dev);
err_eswitch_cleanup:
#ifdef CONFIG_MLX5_CORE_EN
mlx5_eswitch_cleanup(dev->priv.eswitch);
@@ -981,11 +993,13 @@ out:
static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
{
+ mlx5_fpga_cleanup(dev);
mlx5_sriov_cleanup(dev);
#ifdef CONFIG_MLX5_CORE_EN
mlx5_eswitch_cleanup(dev->priv.eswitch);
#endif
mlx5_cleanup_rl_table(dev);
+ mlx5_cleanup_reserved_gids(dev);
mlx5_cleanup_mkey_table(dev);
mlx5_cleanup_srq_table(dev);
mlx5_cleanup_qp_table(dev);
@@ -1020,7 +1034,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
if (err) {
dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n",
FW_PRE_INIT_TIMEOUT_MILI);
- goto out;
+ goto out_err;
}
err = mlx5_cmd_init(dev);
@@ -1117,16 +1131,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
goto err_disable_msix;
}
- err = mlx5_fpga_device_init(dev);
- if (err) {
- dev_err(&pdev->dev, "fpga device init failed %d\n", err);
- goto err_put_uars;
- }
-
err = mlx5_start_eqs(dev);
if (err) {
dev_err(&pdev->dev, "Failed to start pages and async EQs\n");
- goto err_fpga_init;
+ goto err_put_uars;
}
err = alloc_comp_eqs(dev);
@@ -1160,7 +1168,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
err = mlx5_fpga_device_start(dev);
if (err) {
dev_err(&pdev->dev, "fpga device start failed %d\n", err);
- goto err_reg_dev;
+ goto err_fpga_start;
+ }
+ err = mlx5_accel_ipsec_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "IPSec device start failed %d\n", err);
+ goto err_ipsec_start;
}
if (mlx5_device_registered(dev)) {
@@ -1181,6 +1194,11 @@ out:
return 0;
err_reg_dev:
+ mlx5_accel_ipsec_cleanup(dev);
+err_ipsec_start:
+ mlx5_fpga_device_stop(dev);
+
+err_fpga_start:
mlx5_sriov_detach(dev);
err_sriov:
@@ -1198,9 +1216,6 @@ err_affinity_hints:
err_stop_eqs:
mlx5_stop_eqs(dev);
-err_fpga_init:
- mlx5_fpga_device_cleanup(dev);
-
err_put_uars:
mlx5_put_uars_page(dev, priv->uar);
@@ -1243,7 +1258,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
int err = 0;
if (cleanup)
- mlx5_drain_health_wq(dev);
+ mlx5_drain_health_recovery(dev);
mutex_lock(&dev->intf_state_mutex);
if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) {
@@ -1254,9 +1269,15 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
goto out;
}
+ clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+ set_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state);
+
if (mlx5_device_registered(dev))
mlx5_detach_device(dev);
+ mlx5_accel_ipsec_cleanup(dev);
+ mlx5_fpga_device_stop(dev);
+
mlx5_sriov_detach(dev);
#ifdef CONFIG_MLX5_CORE_EN
mlx5_eswitch_detach(dev->priv.eswitch);
@@ -1265,7 +1286,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
mlx5_irq_clear_affinity_hints(dev);
free_comp_eqs(dev);
mlx5_stop_eqs(dev);
- mlx5_fpga_device_cleanup(dev);
mlx5_put_uars_page(dev, priv->uar);
mlx5_disable_msix(dev);
if (cleanup)
@@ -1282,8 +1302,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
mlx5_cmd_cleanup(dev);
out:
- clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
- set_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state);
mutex_unlock(&dev->intf_state_mutex);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 06019d00ab7b..5abfec1c3399 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -926,12 +926,16 @@ static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev,
int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev)
{
+ if (atomic_inc_return(&mdev->roce.roce_en) != 1)
+ return 0;
return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED);
}
EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce);
int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
{
+ if (atomic_dec_return(&mdev->roce.roce_en) != 0)
+ return 0;
return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
}
EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 921673c42bc9..6bcfc25350f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -54,6 +54,12 @@ static u32 mlx5_wq_cyc_get_byte_size(struct mlx5_wq_cyc *wq)
return mlx5_wq_cyc_get_size(wq) << wq->log_stride;
}
+static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq)
+{
+ return mlx5_wq_cyc_get_byte_size(&wq->rq) +
+ mlx5_wq_cyc_get_byte_size(&wq->sq);
+}
+
static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq)
{
return mlx5_cqwq_get_size(wq) << wq->log_stride;
@@ -99,6 +105,46 @@ err_db_free:
return err;
}
+int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *qpc, struct mlx5_wq_qp *wq,
+ struct mlx5_wq_ctrl *wq_ctrl)
+{
+ int err;
+
+ wq->rq.log_stride = MLX5_GET(qpc, qpc, log_rq_stride) + 4;
+ wq->rq.sz_m1 = (1 << MLX5_GET(qpc, qpc, log_rq_size)) - 1;
+
+ wq->sq.log_stride = ilog2(MLX5_SEND_WQE_BB);
+ wq->sq.sz_m1 = (1 << MLX5_GET(qpc, qpc, log_sq_size)) - 1;
+
+ err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
+ return err;
+ }
+
+ err = mlx5_buf_alloc_node(mdev, mlx5_wq_qp_get_byte_size(wq),
+ &wq_ctrl->buf, param->buf_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
+ goto err_db_free;
+ }
+
+ wq->rq.buf = wq_ctrl->buf.direct.buf;
+ wq->sq.buf = wq->rq.buf + mlx5_wq_cyc_get_byte_size(&wq->rq);
+ wq->rq.db = &wq_ctrl->db.db[MLX5_RCV_DBR];
+ wq->sq.db = &wq_ctrl->db.db[MLX5_SND_DBR];
+
+ wq_ctrl->mdev = mdev;
+
+ return 0;
+
+err_db_free:
+ mlx5_db_free(mdev, &wq_ctrl->db);
+
+ return err;
+}
+
int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *cqc, struct mlx5_cqwq *wq,
struct mlx5_frag_wq_ctrl *wq_ctrl)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index d8afed898c31..718589d0cec2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -34,6 +34,8 @@
#define __MLX5_WQ_H__
#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
struct mlx5_wq_param {
int linear;
@@ -60,6 +62,11 @@ struct mlx5_wq_cyc {
u8 log_stride;
};
+struct mlx5_wq_qp {
+ struct mlx5_wq_cyc rq;
+ struct mlx5_wq_cyc sq;
+};
+
struct mlx5_cqwq {
struct mlx5_frag_buf frag_buf;
__be32 *db;
@@ -87,6 +94,10 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
struct mlx5_wq_ctrl *wq_ctrl);
u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
+int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *qpc, struct mlx5_wq_qp *wq,
+ struct mlx5_wq_ctrl *wq_ctrl);
+
int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *cqc, struct mlx5_cqwq *wq,
struct mlx5_frag_wq_ctrl *wq_ctrl);
@@ -146,6 +157,22 @@ static inline void mlx5_cqwq_update_db_record(struct mlx5_cqwq *wq)
*wq->db = cpu_to_be32(wq->cc & 0xffffff);
}
+static inline struct mlx5_cqe64 *mlx5_cqwq_get_cqe(struct mlx5_cqwq *wq)
+{
+ u32 ci = mlx5_cqwq_get_ci(wq);
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
+ u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK;
+ u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1;
+
+ if (cqe_ownership_bit != sw_ownership_val)
+ return NULL;
+
+ /* ensure cqe content is read after cqe ownership bit */
+ dma_rmb();
+
+ return cqe;
+}
+
static inline int mlx5_wq_ll_is_full(struct mlx5_wq_ll *wq)
{
return wq->cur_sz == wq->sz_m1;
diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h
index 9ca85383aa35..7a712b6b09ec 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h
@@ -96,7 +96,7 @@ struct mlxfw_dev {
u16 psid_size;
};
-#if IS_ENABLED(CONFIG_MLXFW)
+#if IS_REACHABLE(CONFIG_MLXFW)
int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
const struct firmware *firmware);
#else
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 700cc8c6aa5b..192cb93e7669 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3301,6 +3301,9 @@ static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
u16 vid = vlan_dev_vlan_id(vlan_dev);
+ if (netif_is_bridge_port(vlan_dev))
+ return 0;
+
if (mlxsw_sp_port_dev_check(real_dev))
return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
event, vid);
diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig
index 0d5a7b9203a4..0e331e2f685a 100644
--- a/drivers/net/ethernet/netronome/Kconfig
+++ b/drivers/net/ethernet/netronome/Kconfig
@@ -25,6 +25,16 @@ config NFP
cards working as a advanced Ethernet NIC. It works with both
SR-IOV physical and virtual functions.
+config NFP_APP_FLOWER
+ bool "NFP4000/NFP6000 TC Flower offload support"
+ depends on NFP
+ depends on NET_SWITCHDEV
+ ---help---
+ Enable driver support for TC Flower offload on NFP4000 and NFP6000.
+ Say Y, if you are planning to make use of TC Flower offload
+ either directly, with Open vSwitch, or any other way. Note that
+ TC Flower offload requires specific FW to work.
+
config NFP_DEBUG
bool "Debug support for Netronome(R) NFP4000/NFP6000 NIC drivers"
depends on NFP
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 10b556b2c59d..b8e1358868bd 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -27,9 +27,17 @@ nfp-objs := \
nfp_port.o \
bpf/main.o \
bpf/offload.o \
+ nic/main.o
+
+ifeq ($(CONFIG_NFP_APP_FLOWER),y)
+nfp-objs += \
+ flower/action.o \
flower/cmsg.o \
flower/main.o \
- nic/main.o
+ flower/match.o \
+ flower/metadata.o \
+ flower/offload.o
+endif
ifeq ($(CONFIG_BPF_SYSCALL),y)
nfp-objs += \
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
new file mode 100644
index 000000000000..db9750695dc7
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/bitfield.h>
+#include <net/pkt_cls.h>
+#include <net/switchdev.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_vlan.h>
+
+#include "cmsg.h"
+#include "main.h"
+#include "../nfp_net_repr.h"
+
+static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan)
+{
+ size_t act_size = sizeof(struct nfp_fl_pop_vlan);
+ u16 tmp_pop_vlan_op;
+
+ tmp_pop_vlan_op =
+ FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) |
+ FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_POP_VLAN);
+
+ pop_vlan->a_op = cpu_to_be16(tmp_pop_vlan_op);
+ pop_vlan->reserved = 0;
+}
+
+static void
+nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan,
+ const struct tc_action *action)
+{
+ size_t act_size = sizeof(struct nfp_fl_push_vlan);
+ struct tcf_vlan *vlan = to_vlan(action);
+ u16 tmp_push_vlan_tci;
+ u16 tmp_push_vlan_op;
+
+ tmp_push_vlan_op =
+ FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) |
+ FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_PUSH_VLAN);
+
+ push_vlan->a_op = cpu_to_be16(tmp_push_vlan_op);
+ /* Set action push vlan parameters. */
+ push_vlan->reserved = 0;
+ push_vlan->vlan_tpid = tcf_vlan_push_proto(action);
+
+ tmp_push_vlan_tci =
+ FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, vlan->tcfv_push_prio) |
+ FIELD_PREP(NFP_FL_PUSH_VLAN_VID, vlan->tcfv_push_vid) |
+ NFP_FL_PUSH_VLAN_CFI;
+ push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci);
+}
+
+static int
+nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action,
+ struct nfp_fl_payload *nfp_flow, bool last,
+ struct net_device *in_dev)
+{
+ size_t act_size = sizeof(struct nfp_fl_output);
+ struct net_device *out_dev;
+ u16 tmp_output_op;
+ int ifindex;
+
+ /* Set action opcode to output action. */
+ tmp_output_op =
+ FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) |
+ FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_OUTPUT);
+
+ output->a_op = cpu_to_be16(tmp_output_op);
+
+ /* Set action output parameters. */
+ output->flags = cpu_to_be16(last ? NFP_FL_OUT_FLAGS_LAST : 0);
+
+ ifindex = tcf_mirred_ifindex(action);
+ out_dev = __dev_get_by_index(dev_net(in_dev), ifindex);
+ if (!out_dev)
+ return -EOPNOTSUPP;
+
+ /* Only offload egress ports are on the same device as the ingress
+ * port.
+ */
+ if (!switchdev_port_same_parent_id(in_dev, out_dev))
+ return -EOPNOTSUPP;
+
+ output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev));
+ if (!output->port)
+ return -EOPNOTSUPP;
+
+ nfp_flow->meta.shortcut = output->port;
+
+ return 0;
+}
+
+static int
+nfp_flower_loop_action(const struct tc_action *a,
+ struct nfp_fl_payload *nfp_fl, int *a_len,
+ struct net_device *netdev)
+{
+ struct nfp_fl_push_vlan *psh_v;
+ struct nfp_fl_pop_vlan *pop_v;
+ struct nfp_fl_output *output;
+ int err;
+
+ if (is_tcf_gact_shot(a)) {
+ nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_DROP);
+ } else if (is_tcf_mirred_egress_redirect(a)) {
+ if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ)
+ return -EOPNOTSUPP;
+
+ output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
+ err = nfp_fl_output(output, a, nfp_fl, true, netdev);
+ if (err)
+ return err;
+
+ *a_len += sizeof(struct nfp_fl_output);
+ } else if (is_tcf_mirred_egress_mirror(a)) {
+ if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ)
+ return -EOPNOTSUPP;
+
+ output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
+ err = nfp_fl_output(output, a, nfp_fl, false, netdev);
+ if (err)
+ return err;
+
+ *a_len += sizeof(struct nfp_fl_output);
+ } else if (is_tcf_vlan(a) && tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
+ if (*a_len + sizeof(struct nfp_fl_pop_vlan) > NFP_FL_MAX_A_SIZ)
+ return -EOPNOTSUPP;
+
+ pop_v = (struct nfp_fl_pop_vlan *)&nfp_fl->action_data[*a_len];
+ nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_POPV);
+
+ nfp_fl_pop_vlan(pop_v);
+ *a_len += sizeof(struct nfp_fl_pop_vlan);
+ } else if (is_tcf_vlan(a) && tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
+ if (*a_len + sizeof(struct nfp_fl_push_vlan) > NFP_FL_MAX_A_SIZ)
+ return -EOPNOTSUPP;
+
+ psh_v = (struct nfp_fl_push_vlan *)&nfp_fl->action_data[*a_len];
+ nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
+
+ nfp_fl_push_vlan(psh_v, a);
+ *a_len += sizeof(struct nfp_fl_push_vlan);
+ } else {
+ /* Currently we do not handle any other actions. */
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int nfp_flower_compile_action(struct tc_cls_flower_offload *flow,
+ struct net_device *netdev,
+ struct nfp_fl_payload *nfp_flow)
+{
+ int act_len, act_cnt, err;
+ const struct tc_action *a;
+ LIST_HEAD(actions);
+
+ memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ);
+ nfp_flow->meta.act_len = 0;
+ act_len = 0;
+ act_cnt = 0;
+
+ tcf_exts_to_list(flow->exts, &actions);
+ list_for_each_entry(a, &actions, list) {
+ err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev);
+ if (err)
+ return err;
+ act_cnt++;
+ }
+
+ /* We optimise when the action list is small, this can unfortunately
+ * not happen once we have more than one action in the action list.
+ */
+ if (act_cnt > 1)
+ nfp_flow->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
+
+ nfp_flow->meta.act_len = act_len;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
index 7761be436726..dd7fa9cf225f 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
@@ -36,6 +36,7 @@
#include <linux/skbuff.h>
#include <net/dst_metadata.h>
+#include "main.h"
#include "../nfpcore/nfp_cpp.h"
#include "../nfp_net_repr.h"
#include "./cmsg.h"
@@ -52,12 +53,7 @@ nfp_flower_cmsg_get_hdr(struct sk_buff *skb)
return (struct nfp_flower_cmsg_hdr *)skb->data;
}
-static void *nfp_flower_cmsg_get_data(struct sk_buff *skb)
-{
- return (unsigned char *)skb->data + NFP_FLOWER_CMSG_HLEN;
-}
-
-static struct sk_buff *
+struct sk_buff *
nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size,
enum nfp_flower_cmsg_type_port type)
{
@@ -79,9 +75,8 @@ nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size,
return skb;
}
-int nfp_flower_cmsg_portmod(struct net_device *netdev, bool carrier_ok)
+int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok)
{
- struct nfp_repr *repr = netdev_priv(netdev);
struct nfp_flower_cmsg_portmod *msg;
struct sk_buff *skb;
@@ -94,7 +89,7 @@ int nfp_flower_cmsg_portmod(struct net_device *netdev, bool carrier_ok)
msg->portnum = cpu_to_be32(repr->dst->u.port_info.port_id);
msg->reserved = 0;
msg->info = carrier_ok;
- msg->mtu = cpu_to_be16(netdev->mtu);
+ msg->mtu = cpu_to_be16(repr->netdev->mtu);
nfp_ctrl_tx(repr->app->ctrl, skb);
@@ -149,6 +144,9 @@ void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb)
case NFP_FLOWER_CMSG_TYPE_PORT_MOD:
nfp_flower_cmsg_portmod_rx(app, skb);
break;
+ case NFP_FLOWER_CMSG_TYPE_FLOW_STATS:
+ nfp_flower_rx_flow_stats(app, skb);
+ break;
default:
nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n",
type);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 2eeddada7f4d..cf738de170ab 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -40,6 +40,196 @@
#include "../nfp_app.h"
+#define NFP_FLOWER_LAYER_META BIT(0)
+#define NFP_FLOWER_LAYER_PORT BIT(1)
+#define NFP_FLOWER_LAYER_MAC BIT(2)
+#define NFP_FLOWER_LAYER_TP BIT(3)
+#define NFP_FLOWER_LAYER_IPV4 BIT(4)
+#define NFP_FLOWER_LAYER_IPV6 BIT(5)
+#define NFP_FLOWER_LAYER_CT BIT(6)
+#define NFP_FLOWER_LAYER_VXLAN BIT(7)
+
+#define NFP_FLOWER_LAYER_ETHER BIT(3)
+#define NFP_FLOWER_LAYER_ARP BIT(4)
+
+#define NFP_FLOWER_MASK_VLAN_PRIO GENMASK(15, 13)
+#define NFP_FLOWER_MASK_VLAN_CFI BIT(12)
+#define NFP_FLOWER_MASK_VLAN_VID GENMASK(11, 0)
+
+#define NFP_FL_SC_ACT_DROP 0x80000000
+#define NFP_FL_SC_ACT_USER 0x7D000000
+#define NFP_FL_SC_ACT_POPV 0x6A000000
+#define NFP_FL_SC_ACT_NULL 0x00000000
+
+/* The maximum action list size (in bytes) supported by the NFP.
+ */
+#define NFP_FL_MAX_A_SIZ 1216
+#define NFP_FL_LW_SIZ 2
+
+/* Action opcodes */
+#define NFP_FL_ACTION_OPCODE_OUTPUT 0
+#define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1
+#define NFP_FL_ACTION_OPCODE_POP_VLAN 2
+#define NFP_FL_ACTION_OPCODE_NUM 32
+
+#define NFP_FL_ACT_JMP_ID GENMASK(15, 8)
+#define NFP_FL_ACT_LEN_LW GENMASK(7, 0)
+
+#define NFP_FL_OUT_FLAGS_LAST BIT(15)
+#define NFP_FL_OUT_FLAGS_USE_TUN BIT(4)
+#define NFP_FL_OUT_FLAGS_TYPE_IDX GENMASK(2, 0)
+
+#define NFP_FL_PUSH_VLAN_PRIO GENMASK(15, 13)
+#define NFP_FL_PUSH_VLAN_CFI BIT(12)
+#define NFP_FL_PUSH_VLAN_VID GENMASK(11, 0)
+
+struct nfp_fl_output {
+ __be16 a_op;
+ __be16 flags;
+ __be32 port;
+};
+
+struct nfp_fl_push_vlan {
+ __be16 a_op;
+ __be16 reserved;
+ __be16 vlan_tpid;
+ __be16 vlan_tci;
+};
+
+struct nfp_fl_pop_vlan {
+ __be16 a_op;
+ __be16 reserved;
+};
+
+/* Metadata without L2 (1W/4B)
+ * ----------------------------------------------------------------
+ * 3 2 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | key_layers | mask_id | reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_meta_one {
+ u8 nfp_flow_key_layer;
+ u8 mask_id;
+ u16 reserved;
+};
+
+/* Metadata with L2 (1W/4B)
+ * ----------------------------------------------------------------
+ * 3 2 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | key_type | mask_id | PCP |p| vlan outermost VID |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * ^ ^
+ * NOTE: | TCI |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_meta_two {
+ u8 nfp_flow_key_layer;
+ u8 mask_id;
+ __be16 tci;
+};
+
+/* Port details (1W/4B)
+ * ----------------------------------------------------------------
+ * 3 2 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | port_ingress |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_in_port {
+ __be32 in_port;
+};
+
+/* L2 details (4W/16B)
+ * 3 2 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | mac_addr_dst, 31 - 0 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | mac_addr_dst, 47 - 32 | mac_addr_src, 15 - 0 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | mac_addr_src, 47 - 16 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | mpls outermost label | TC |B| reserved |q|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_mac_mpls {
+ u8 mac_dst[6];
+ u8 mac_src[6];
+ __be32 mpls_lse;
+};
+
+/* L4 ports (for UDP, TCP, SCTP) (1W/4B)
+ * 3 2 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | port_src | port_dst |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_tp_ports {
+ __be16 port_src;
+ __be16 port_dst;
+};
+
+/* L3 IPv4 details (3W/12B)
+ * 3 2 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | DSCP |ECN| protocol | reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv4_addr_src |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv4_addr_dst |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_ipv4 {
+ u8 tos;
+ u8 proto;
+ u8 ttl;
+ u8 reserved;
+ __be32 ipv4_src;
+ __be32 ipv4_dst;
+};
+
+/* L3 IPv6 details (10W/40B)
+ * 3 2 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | DSCP |ECN| protocol | reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_exthdr | res | ipv6_flow_label |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_src, 31 - 0 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_src, 63 - 32 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_src, 95 - 64 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_src, 127 - 96 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_dst, 31 - 0 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_dst, 63 - 32 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_dst, 95 - 64 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_dst, 127 - 96 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_ipv6 {
+ u8 tos;
+ u8 proto;
+ u8 ttl;
+ u8 reserved;
+ __be32 ipv6_flow_label_exthdr;
+ struct in6_addr ipv6_src;
+ struct in6_addr ipv6_dst;
+};
+
/* The base header for a control message packet.
* Defines an 8-bit version, and an 8-bit type, padded
* to a 32-bit word. Rest of the packet is type-specific.
@@ -55,7 +245,10 @@ struct nfp_flower_cmsg_hdr {
/* Types defined for port related control messages */
enum nfp_flower_cmsg_type_port {
+ NFP_FLOWER_CMSG_TYPE_FLOW_ADD = 0,
+ NFP_FLOWER_CMSG_TYPE_FLOW_DEL = 2,
NFP_FLOWER_CMSG_TYPE_PORT_MOD = 8,
+ NFP_FLOWER_CMSG_TYPE_FLOW_STATS = 15,
NFP_FLOWER_CMSG_TYPE_PORT_ECHO = 16,
NFP_FLOWER_CMSG_TYPE_MAX = 32,
};
@@ -110,7 +303,15 @@ nfp_flower_cmsg_pcie_port(u8 nfp_pcie, enum nfp_flower_cmsg_port_vnic_type type,
NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT);
}
-int nfp_flower_cmsg_portmod(struct net_device *netdev, bool carrier_ok);
+static inline void *nfp_flower_cmsg_get_data(struct sk_buff *skb)
+{
+ return (unsigned char *)skb->data + NFP_FLOWER_CMSG_HLEN;
+}
+
+int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok);
void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb);
+struct sk_buff *
+nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size,
+ enum nfp_flower_cmsg_type_port type);
#endif
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c
index 8e5ca6b4bb33..5fe6d3582597 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -34,10 +34,13 @@
#include <linux/etherdevice.h>
#include <linux/pci.h>
#include <linux/skbuff.h>
+#include <linux/vmalloc.h>
#include <net/devlink.h>
#include <net/dst_metadata.h>
+#include "main.h"
#include "../nfpcore/nfp_cpp.h"
+#include "../nfpcore/nfp_nffw.h"
#include "../nfpcore/nfp_nsp.h"
#include "../nfp_app.h"
#include "../nfp_main.h"
@@ -46,13 +49,7 @@
#include "../nfp_port.h"
#include "./cmsg.h"
-/**
- * struct nfp_flower_priv - Flower APP per-vNIC priv data
- * @nn: Pointer to vNIC
- */
-struct nfp_flower_priv {
- struct nfp_net *nn;
-};
+#define NFP_FLOWER_ALLOWED_VER 0x0001000000010000UL
static const char *nfp_flower_extra_cap(struct nfp_app *app, struct nfp_net *nn)
{
@@ -104,51 +101,30 @@ nfp_flower_repr_get(struct nfp_app *app, u32 port_id)
return reprs->reprs[port];
}
-static void
-nfp_flower_repr_netdev_get_stats64(struct net_device *netdev,
- struct rtnl_link_stats64 *stats)
-{
- struct nfp_repr *repr = netdev_priv(netdev);
- enum nfp_repr_type type;
- u32 port_id;
- u8 port = 0;
-
- port_id = repr->dst->u.port_info.port_id;
- type = nfp_flower_repr_get_type_and_port(repr->app, port_id, &port);
- nfp_repr_get_stats64(repr->app, type, port, stats);
-}
-
-static int nfp_flower_repr_netdev_open(struct net_device *netdev)
+static int
+nfp_flower_repr_netdev_open(struct nfp_app *app, struct nfp_repr *repr)
{
int err;
- err = nfp_flower_cmsg_portmod(netdev, true);
+ err = nfp_flower_cmsg_portmod(repr, true);
if (err)
return err;
- netif_carrier_on(netdev);
- netif_tx_wake_all_queues(netdev);
+ netif_carrier_on(repr->netdev);
+ netif_tx_wake_all_queues(repr->netdev);
return 0;
}
-static int nfp_flower_repr_netdev_stop(struct net_device *netdev)
+static int
+nfp_flower_repr_netdev_stop(struct nfp_app *app, struct nfp_repr *repr)
{
- netif_carrier_off(netdev);
- netif_tx_disable(netdev);
+ netif_carrier_off(repr->netdev);
+ netif_tx_disable(repr->netdev);
- return nfp_flower_cmsg_portmod(netdev, false);
+ return nfp_flower_cmsg_portmod(repr, false);
}
-static const struct net_device_ops nfp_flower_repr_netdev_ops = {
- .ndo_open = nfp_flower_repr_netdev_open,
- .ndo_stop = nfp_flower_repr_netdev_stop,
- .ndo_start_xmit = nfp_repr_xmit,
- .ndo_get_stats64 = nfp_flower_repr_netdev_get_stats64,
- .ndo_has_offload_stats = nfp_repr_has_offload_stats,
- .ndo_get_offload_stats = nfp_repr_get_offload_stats,
-};
-
static void nfp_flower_sriov_disable(struct nfp_app *app)
{
nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_VF);
@@ -162,14 +138,19 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
u8 nfp_pcie = nfp_cppcore_pcie_unit(app->pf->cpp);
struct nfp_flower_priv *priv = app->priv;
struct nfp_reprs *reprs, *old_reprs;
+ enum nfp_port_type port_type;
const u8 queue = 0;
int i, err;
+ port_type = repr_type == NFP_REPR_TYPE_PF ? NFP_PORT_PF_PORT :
+ NFP_PORT_VF_PORT;
+
reprs = nfp_reprs_alloc(cnt);
if (!reprs)
return -ENOMEM;
for (i = 0; i < cnt; i++) {
+ struct nfp_port *port;
u32 port_id;
reprs->reprs[i] = nfp_repr_alloc(app);
@@ -178,15 +159,24 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
goto err_reprs_clean;
}
+ port = nfp_port_alloc(app, port_type, reprs->reprs[i]);
+ if (repr_type == NFP_REPR_TYPE_PF) {
+ port->pf_id = i;
+ } else {
+ port->pf_id = 0; /* For now we only support 1 PF */
+ port->vf_id = i;
+ }
+
eth_hw_addr_random(reprs->reprs[i]);
port_id = nfp_flower_cmsg_pcie_port(nfp_pcie, vnic_type,
i, queue);
err = nfp_repr_init(app, reprs->reprs[i],
- &nfp_flower_repr_netdev_ops,
- port_id, NULL, priv->nn->dp.netdev);
- if (err)
+ port_id, port, priv->nn->dp.netdev);
+ if (err) {
+ nfp_port_free(port);
goto err_reprs_clean;
+ }
nfp_info(app->cpp, "%s%d Representor(%s) created\n",
repr_type == NFP_REPR_TYPE_PF ? "PF" : "VF", i,
@@ -260,7 +250,6 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
cmsg_port_id = nfp_flower_cmsg_phys_port(phys_port);
err = nfp_repr_init(app, reprs->reprs[phys_port],
- &nfp_flower_repr_netdev_ops,
cmsg_port_id, port, priv->nn->dp.netdev);
if (err) {
nfp_port_free(port);
@@ -296,26 +285,16 @@ static int nfp_flower_start(struct nfp_app *app)
NFP_REPR_TYPE_PF, 1);
}
-static void nfp_flower_vnic_clean(struct nfp_app *app, struct nfp_net *nn)
-{
- kfree(app->priv);
- app->priv = NULL;
-}
-
static int nfp_flower_vnic_init(struct nfp_app *app, struct nfp_net *nn,
unsigned int id)
{
- struct nfp_flower_priv *priv;
+ struct nfp_flower_priv *priv = app->priv;
if (id > 0) {
nfp_warn(app->cpp, "FlowerNIC doesn't support more than one data vNIC\n");
goto err_invalid_port;
}
- priv = kzalloc(sizeof(*priv), GFP_KERNEL);
- if (!priv)
- return -ENOMEM;
- app->priv = priv;
priv->nn = nn;
eth_hw_addr_random(nn->dp.netdev);
@@ -331,6 +310,8 @@ err_invalid_port:
static int nfp_flower_init(struct nfp_app *app)
{
const struct nfp_pf *pf = app->pf;
+ u64 version;
+ int err;
if (!pf->eth_tbl) {
nfp_warn(app->cpp, "FlowerNIC requires eth table\n");
@@ -347,7 +328,37 @@ static int nfp_flower_init(struct nfp_app *app)
return -EINVAL;
}
+ version = nfp_rtsym_read_le(app->pf->rtbl, "hw_flower_version", &err);
+ if (err) {
+ nfp_warn(app->cpp, "FlowerNIC requires hw_flower_version memory symbol\n");
+ return err;
+ }
+
+ /* We need to ensure hardware has enough flower capabilities. */
+ if (version != NFP_FLOWER_ALLOWED_VER) {
+ nfp_warn(app->cpp, "FlowerNIC: unsupported firmware version\n");
+ return -EINVAL;
+ }
+
+ app->priv = vzalloc(sizeof(struct nfp_flower_priv));
+ if (!app->priv)
+ return -ENOMEM;
+
+ err = nfp_flower_metadata_init(app);
+ if (err)
+ goto err_free_app_priv;
+
return 0;
+
+err_free_app_priv:
+ vfree(app->priv);
+ return err;
+}
+
+static void nfp_flower_clean(struct nfp_app *app)
+{
+ vfree(app->priv);
+ app->priv = NULL;
}
const struct nfp_app_type app_flower = {
@@ -358,9 +369,12 @@ const struct nfp_app_type app_flower = {
.extra_cap = nfp_flower_extra_cap,
.init = nfp_flower_init,
+ .clean = nfp_flower_clean,
.vnic_init = nfp_flower_vnic_init,
- .vnic_clean = nfp_flower_vnic_clean,
+
+ .repr_open = nfp_flower_repr_netdev_open,
+ .repr_stop = nfp_flower_repr_netdev_stop,
.start = nfp_flower_start,
.stop = nfp_flower_stop,
@@ -372,4 +386,6 @@ const struct nfp_app_type app_flower = {
.eswitch_mode_get = eswitch_mode_get,
.repr_get = nfp_flower_repr_get,
+
+ .setup_tc = nfp_flower_setup_tc,
};
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
new file mode 100644
index 000000000000..9e64c048e83f
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NFP_FLOWER_H__
+#define __NFP_FLOWER_H__ 1
+
+#include <linux/circ_buf.h>
+#include <linux/hashtable.h>
+#include <linux/time64.h>
+#include <linux/types.h>
+
+struct tc_to_netdev;
+struct net_device;
+struct nfp_app;
+
+#define NFP_FL_STATS_ENTRY_RS BIT(20)
+#define NFP_FL_STATS_ELEM_RS 4
+#define NFP_FL_REPEATED_HASH_MAX BIT(17)
+#define NFP_FLOWER_HASH_BITS 19
+#define NFP_FLOWER_MASK_ENTRY_RS 256
+#define NFP_FLOWER_MASK_ELEMENT_RS 1
+#define NFP_FLOWER_MASK_HASH_BITS 10
+
+#define NFP_FL_META_FLAG_NEW_MASK 128
+#define NFP_FL_META_FLAG_LAST_MASK 1
+
+#define NFP_FL_MASK_REUSE_TIME_NS 40000
+#define NFP_FL_MASK_ID_LOCATION 1
+
+struct nfp_fl_mask_id {
+ struct circ_buf mask_id_free_list;
+ struct timespec64 *last_used;
+ u8 init_unallocated;
+};
+
+struct nfp_fl_stats_id {
+ struct circ_buf free_list;
+ u32 init_unalloc;
+ u8 repeated_em_count;
+};
+
+/**
+ * struct nfp_flower_priv - Flower APP per-vNIC priv data
+ * @nn: Pointer to vNIC
+ * @mask_id_seed: Seed used for mask hash table
+ * @flower_version: HW version of flower
+ * @stats_ids: List of free stats ids
+ * @mask_ids: List of free mask ids
+ * @mask_table: Hash table used to store masks
+ * @flow_table: Hash table used to store flower rules
+ */
+struct nfp_flower_priv {
+ struct nfp_net *nn;
+ u32 mask_id_seed;
+ u64 flower_version;
+ struct nfp_fl_stats_id stats_ids;
+ struct nfp_fl_mask_id mask_ids;
+ DECLARE_HASHTABLE(mask_table, NFP_FLOWER_MASK_HASH_BITS);
+ DECLARE_HASHTABLE(flow_table, NFP_FLOWER_HASH_BITS);
+};
+
+struct nfp_fl_key_ls {
+ u32 key_layer_two;
+ u8 key_layer;
+ int key_size;
+};
+
+struct nfp_fl_rule_metadata {
+ u8 key_len;
+ u8 mask_len;
+ u8 act_len;
+ u8 flags;
+ __be32 host_ctx_id;
+ __be64 host_cookie __packed;
+ __be64 flow_version __packed;
+ __be32 shortcut;
+};
+
+struct nfp_fl_stats {
+ u64 pkts;
+ u64 bytes;
+ u64 used;
+};
+
+struct nfp_fl_payload {
+ struct nfp_fl_rule_metadata meta;
+ unsigned long tc_flower_cookie;
+ struct hlist_node link;
+ struct rcu_head rcu;
+ spinlock_t lock; /* lock stats */
+ struct nfp_fl_stats stats;
+ char *unmasked_data;
+ char *mask_data;
+ char *action_data;
+};
+
+struct nfp_fl_stats_frame {
+ __be32 stats_con_id;
+ __be32 pkt_count;
+ __be64 byte_count;
+ __be64 stats_cookie;
+};
+
+int nfp_flower_metadata_init(struct nfp_app *app);
+void nfp_flower_metadata_cleanup(struct nfp_app *app);
+
+int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
+ u32 handle, __be16 proto, struct tc_to_netdev *tc);
+int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
+ struct nfp_fl_key_ls *key_ls,
+ struct net_device *netdev,
+ struct nfp_fl_payload *nfp_flow);
+int nfp_flower_compile_action(struct tc_cls_flower_offload *flow,
+ struct net_device *netdev,
+ struct nfp_fl_payload *nfp_flow);
+int nfp_compile_flow_metadata(struct nfp_app *app,
+ struct tc_cls_flower_offload *flow,
+ struct nfp_fl_payload *nfp_flow);
+int nfp_modify_flow_metadata(struct nfp_app *app,
+ struct nfp_fl_payload *nfp_flow);
+
+struct nfp_fl_payload *
+nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie);
+struct nfp_fl_payload *
+nfp_flower_remove_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie);
+
+void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb);
+
+#endif
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
new file mode 100644
index 000000000000..0e08404480ef
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/bitfield.h>
+#include <net/pkt_cls.h>
+
+#include "cmsg.h"
+#include "main.h"
+
+static void
+nfp_flower_compile_meta_tci(struct nfp_flower_meta_two *frame,
+ struct tc_cls_flower_offload *flow, u8 key_type,
+ bool mask_version)
+{
+ struct flow_dissector_key_vlan *flow_vlan;
+ u16 tmp_tci;
+
+ /* Populate the metadata frame. */
+ frame->nfp_flow_key_layer = key_type;
+ frame->mask_id = ~0;
+
+ if (mask_version) {
+ frame->tci = cpu_to_be16(~0);
+ return;
+ }
+
+ flow_vlan = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_VLAN,
+ flow->key);
+
+ /* Populate the tci field. */
+ if (!flow_vlan->vlan_id) {
+ tmp_tci = 0;
+ } else {
+ tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+ flow_vlan->vlan_priority) |
+ FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
+ flow_vlan->vlan_id) |
+ NFP_FLOWER_MASK_VLAN_CFI;
+ }
+ frame->tci = cpu_to_be16(tmp_tci);
+}
+
+static void
+nfp_flower_compile_meta(struct nfp_flower_meta_one *frame, u8 key_type)
+{
+ frame->nfp_flow_key_layer = key_type;
+ frame->mask_id = 0;
+ frame->reserved = 0;
+}
+
+static int
+nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
+ bool mask_version)
+{
+ if (mask_version) {
+ frame->in_port = cpu_to_be32(~0);
+ return 0;
+ }
+
+ frame->in_port = cpu_to_be32(cmsg_port);
+
+ return 0;
+}
+
+static void
+nfp_flower_compile_mac(struct nfp_flower_mac_mpls *frame,
+ struct tc_cls_flower_offload *flow,
+ bool mask_version)
+{
+ struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+ struct flow_dissector_key_eth_addrs *flow_mac;
+
+ flow_mac = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ target);
+
+ memset(frame, 0, sizeof(struct nfp_flower_mac_mpls));
+
+ /* Populate mac frame. */
+ ether_addr_copy(frame->mac_dst, &flow_mac->dst[0]);
+ ether_addr_copy(frame->mac_src, &flow_mac->src[0]);
+
+ if (mask_version)
+ frame->mpls_lse = cpu_to_be32(~0);
+}
+
+static void
+nfp_flower_compile_tport(struct nfp_flower_tp_ports *frame,
+ struct tc_cls_flower_offload *flow,
+ bool mask_version)
+{
+ struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+ struct flow_dissector_key_ports *flow_tp;
+
+ flow_tp = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ target);
+
+ frame->port_src = flow_tp->src;
+ frame->port_dst = flow_tp->dst;
+}
+
+static void
+nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame,
+ struct tc_cls_flower_offload *flow,
+ bool mask_version)
+{
+ struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+ struct flow_dissector_key_ipv4_addrs *flow_ipv4;
+ struct flow_dissector_key_basic *flow_basic;
+
+ flow_ipv4 = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ target);
+
+ flow_basic = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ target);
+
+ /* Populate IPv4 frame. */
+ frame->reserved = 0;
+ frame->ipv4_src = flow_ipv4->src;
+ frame->ipv4_dst = flow_ipv4->dst;
+ frame->proto = flow_basic->ip_proto;
+ /* Wildcard TOS/TTL for now. */
+ frame->tos = 0;
+ frame->ttl = 0;
+}
+
+static void
+nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,
+ struct tc_cls_flower_offload *flow,
+ bool mask_version)
+{
+ struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+ struct flow_dissector_key_ipv6_addrs *flow_ipv6;
+ struct flow_dissector_key_basic *flow_basic;
+
+ flow_ipv6 = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ target);
+
+ flow_basic = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ target);
+
+ /* Populate IPv6 frame. */
+ frame->reserved = 0;
+ frame->ipv6_src = flow_ipv6->src;
+ frame->ipv6_dst = flow_ipv6->dst;
+ frame->proto = flow_basic->ip_proto;
+ /* Wildcard LABEL/TOS/TTL for now. */
+ frame->ipv6_flow_label_exthdr = 0;
+ frame->tos = 0;
+ frame->ttl = 0;
+}
+
+int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
+ struct nfp_fl_key_ls *key_ls,
+ struct net_device *netdev,
+ struct nfp_fl_payload *nfp_flow)
+{
+ int err;
+ u8 *ext;
+ u8 *msk;
+
+ memset(nfp_flow->unmasked_data, 0, key_ls->key_size);
+ memset(nfp_flow->mask_data, 0, key_ls->key_size);
+
+ ext = nfp_flow->unmasked_data;
+ msk = nfp_flow->mask_data;
+ if (NFP_FLOWER_LAYER_PORT & key_ls->key_layer) {
+ /* Populate Exact Metadata. */
+ nfp_flower_compile_meta_tci((struct nfp_flower_meta_two *)ext,
+ flow, key_ls->key_layer, false);
+ /* Populate Mask Metadata. */
+ nfp_flower_compile_meta_tci((struct nfp_flower_meta_two *)msk,
+ flow, key_ls->key_layer, true);
+ ext += sizeof(struct nfp_flower_meta_two);
+ msk += sizeof(struct nfp_flower_meta_two);
+
+ /* Populate Exact Port data. */
+ err = nfp_flower_compile_port((struct nfp_flower_in_port *)ext,
+ nfp_repr_get_port_id(netdev),
+ false);
+ if (err)
+ return err;
+
+ /* Populate Mask Port Data. */
+ err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk,
+ nfp_repr_get_port_id(netdev),
+ true);
+ if (err)
+ return err;
+
+ ext += sizeof(struct nfp_flower_in_port);
+ msk += sizeof(struct nfp_flower_in_port);
+ } else {
+ /* Populate Exact Metadata. */
+ nfp_flower_compile_meta((struct nfp_flower_meta_one *)ext,
+ key_ls->key_layer);
+ /* Populate Mask Metadata. */
+ nfp_flower_compile_meta((struct nfp_flower_meta_one *)msk,
+ key_ls->key_layer);
+ ext += sizeof(struct nfp_flower_meta_one);
+ msk += sizeof(struct nfp_flower_meta_one);
+ }
+
+ if (NFP_FLOWER_LAYER_META & key_ls->key_layer) {
+ /* Additional Metadata Fields.
+ * Currently unsupported.
+ */
+ return -EOPNOTSUPP;
+ }
+
+ if (NFP_FLOWER_LAYER_MAC & key_ls->key_layer) {
+ /* Populate Exact MAC Data. */
+ nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)ext,
+ flow, false);
+ /* Populate Mask MAC Data. */
+ nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)msk,
+ flow, true);
+ ext += sizeof(struct nfp_flower_mac_mpls);
+ msk += sizeof(struct nfp_flower_mac_mpls);
+ }
+
+ if (NFP_FLOWER_LAYER_TP & key_ls->key_layer) {
+ /* Populate Exact TP Data. */
+ nfp_flower_compile_tport((struct nfp_flower_tp_ports *)ext,
+ flow, false);
+ /* Populate Mask TP Data. */
+ nfp_flower_compile_tport((struct nfp_flower_tp_ports *)msk,
+ flow, true);
+ ext += sizeof(struct nfp_flower_tp_ports);
+ msk += sizeof(struct nfp_flower_tp_ports);
+ }
+
+ if (NFP_FLOWER_LAYER_IPV4 & key_ls->key_layer) {
+ /* Populate Exact IPv4 Data. */
+ nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)ext,
+ flow, false);
+ /* Populate Mask IPv4 Data. */
+ nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)msk,
+ flow, true);
+ ext += sizeof(struct nfp_flower_ipv4);
+ msk += sizeof(struct nfp_flower_ipv4);
+ }
+
+ if (NFP_FLOWER_LAYER_IPV6 & key_ls->key_layer) {
+ /* Populate Exact IPv4 Data. */
+ nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)ext,
+ flow, false);
+ /* Populate Mask IPv4 Data. */
+ nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)msk,
+ flow, true);
+ ext += sizeof(struct nfp_flower_ipv6);
+ msk += sizeof(struct nfp_flower_ipv6);
+ }
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
new file mode 100644
index 000000000000..fec0ff2ca94f
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/hash.h>
+#include <linux/hashtable.h>
+#include <linux/jhash.h>
+#include <linux/vmalloc.h>
+#include <net/pkt_cls.h>
+
+#include "cmsg.h"
+#include "main.h"
+#include "../nfp_app.h"
+
+struct nfp_mask_id_table {
+ struct hlist_node link;
+ u32 hash_key;
+ u32 ref_cnt;
+ u8 mask_id;
+};
+
+static int nfp_release_stats_entry(struct nfp_app *app, u32 stats_context_id)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct circ_buf *ring;
+
+ ring = &priv->stats_ids.free_list;
+ /* Check if buffer is full. */
+ if (!CIRC_SPACE(ring->head, ring->tail, NFP_FL_STATS_ENTRY_RS *
+ NFP_FL_STATS_ELEM_RS -
+ NFP_FL_STATS_ELEM_RS + 1))
+ return -ENOBUFS;
+
+ memcpy(&ring->buf[ring->head], &stats_context_id, NFP_FL_STATS_ELEM_RS);
+ ring->head = (ring->head + NFP_FL_STATS_ELEM_RS) %
+ (NFP_FL_STATS_ENTRY_RS * NFP_FL_STATS_ELEM_RS);
+
+ return 0;
+}
+
+static int nfp_get_stats_entry(struct nfp_app *app, u32 *stats_context_id)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ u32 freed_stats_id, temp_stats_id;
+ struct circ_buf *ring;
+
+ ring = &priv->stats_ids.free_list;
+ freed_stats_id = NFP_FL_STATS_ENTRY_RS;
+ /* Check for unallocated entries first. */
+ if (priv->stats_ids.init_unalloc > 0) {
+ *stats_context_id = priv->stats_ids.init_unalloc - 1;
+ priv->stats_ids.init_unalloc--;
+ return 0;
+ }
+
+ /* Check if buffer is empty. */
+ if (ring->head == ring->tail) {
+ *stats_context_id = freed_stats_id;
+ return -ENOENT;
+ }
+
+ memcpy(&temp_stats_id, &ring->buf[ring->tail], NFP_FL_STATS_ELEM_RS);
+ *stats_context_id = temp_stats_id;
+ memcpy(&ring->buf[ring->tail], &freed_stats_id, NFP_FL_STATS_ELEM_RS);
+ ring->tail = (ring->tail + NFP_FL_STATS_ELEM_RS) %
+ (NFP_FL_STATS_ENTRY_RS * NFP_FL_STATS_ELEM_RS);
+
+ return 0;
+}
+
+/* Must be called with either RTNL or rcu_read_lock */
+struct nfp_fl_payload *
+nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_fl_payload *flower_entry;
+
+ hash_for_each_possible_rcu(priv->flow_table, flower_entry, link,
+ tc_flower_cookie)
+ if (flower_entry->tc_flower_cookie == tc_flower_cookie)
+ return flower_entry;
+
+ return NULL;
+}
+
+static void
+nfp_flower_update_stats(struct nfp_app *app, struct nfp_fl_stats_frame *stats)
+{
+ struct nfp_fl_payload *nfp_flow;
+ unsigned long flower_cookie;
+
+ flower_cookie = be64_to_cpu(stats->stats_cookie);
+
+ rcu_read_lock();
+ nfp_flow = nfp_flower_search_fl_table(app, flower_cookie);
+ if (!nfp_flow)
+ goto exit_rcu_unlock;
+
+ if (nfp_flow->meta.host_ctx_id != stats->stats_con_id)
+ goto exit_rcu_unlock;
+
+ spin_lock(&nfp_flow->lock);
+ nfp_flow->stats.pkts += be32_to_cpu(stats->pkt_count);
+ nfp_flow->stats.bytes += be64_to_cpu(stats->byte_count);
+ nfp_flow->stats.used = jiffies;
+ spin_unlock(&nfp_flow->lock);
+
+exit_rcu_unlock:
+ rcu_read_unlock();
+}
+
+void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb)
+{
+ unsigned int msg_len = skb->len - NFP_FLOWER_CMSG_HLEN;
+ struct nfp_fl_stats_frame *stats_frame;
+ unsigned char *msg;
+ int i;
+
+ msg = nfp_flower_cmsg_get_data(skb);
+
+ stats_frame = (struct nfp_fl_stats_frame *)msg;
+ for (i = 0; i < msg_len / sizeof(*stats_frame); i++)
+ nfp_flower_update_stats(app, stats_frame + i);
+}
+
+static int nfp_release_mask_id(struct nfp_app *app, u8 mask_id)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct circ_buf *ring;
+ struct timespec64 now;
+
+ ring = &priv->mask_ids.mask_id_free_list;
+ /* Checking if buffer is full. */
+ if (CIRC_SPACE(ring->head, ring->tail, NFP_FLOWER_MASK_ENTRY_RS) == 0)
+ return -ENOBUFS;
+
+ memcpy(&ring->buf[ring->head], &mask_id, NFP_FLOWER_MASK_ELEMENT_RS);
+ ring->head = (ring->head + NFP_FLOWER_MASK_ELEMENT_RS) %
+ (NFP_FLOWER_MASK_ENTRY_RS * NFP_FLOWER_MASK_ELEMENT_RS);
+
+ getnstimeofday64(&now);
+ priv->mask_ids.last_used[mask_id] = now;
+
+ return 0;
+}
+
+static int nfp_mask_alloc(struct nfp_app *app, u8 *mask_id)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct timespec64 delta, now;
+ struct circ_buf *ring;
+ u8 temp_id, freed_id;
+
+ ring = &priv->mask_ids.mask_id_free_list;
+ freed_id = NFP_FLOWER_MASK_ENTRY_RS - 1;
+ /* Checking for unallocated entries first. */
+ if (priv->mask_ids.init_unallocated > 0) {
+ *mask_id = priv->mask_ids.init_unallocated;
+ priv->mask_ids.init_unallocated--;
+ return 0;
+ }
+
+ /* Checking if buffer is empty. */
+ if (ring->head == ring->tail)
+ goto err_not_found;
+
+ memcpy(&temp_id, &ring->buf[ring->tail], NFP_FLOWER_MASK_ELEMENT_RS);
+ *mask_id = temp_id;
+
+ getnstimeofday64(&now);
+ delta = timespec64_sub(now, priv->mask_ids.last_used[*mask_id]);
+
+ if (timespec64_to_ns(&delta) < NFP_FL_MASK_REUSE_TIME_NS)
+ goto err_not_found;
+
+ memcpy(&ring->buf[ring->tail], &freed_id, NFP_FLOWER_MASK_ELEMENT_RS);
+ ring->tail = (ring->tail + NFP_FLOWER_MASK_ELEMENT_RS) %
+ (NFP_FLOWER_MASK_ENTRY_RS * NFP_FLOWER_MASK_ELEMENT_RS);
+
+ return 0;
+
+err_not_found:
+ *mask_id = freed_id;
+ return -ENOENT;
+}
+
+static int
+nfp_add_mask_table(struct nfp_app *app, char *mask_data, u32 mask_len)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_mask_id_table *mask_entry;
+ unsigned long hash_key;
+ u8 mask_id;
+
+ if (nfp_mask_alloc(app, &mask_id))
+ return -ENOENT;
+
+ mask_entry = kmalloc(sizeof(*mask_entry), GFP_KERNEL);
+ if (!mask_entry) {
+ nfp_release_mask_id(app, mask_id);
+ return -ENOMEM;
+ }
+
+ INIT_HLIST_NODE(&mask_entry->link);
+ mask_entry->mask_id = mask_id;
+ hash_key = jhash(mask_data, mask_len, priv->mask_id_seed);
+ mask_entry->hash_key = hash_key;
+ mask_entry->ref_cnt = 1;
+ hash_add(priv->mask_table, &mask_entry->link, hash_key);
+
+ return mask_id;
+}
+
+static struct nfp_mask_id_table *
+nfp_search_mask_table(struct nfp_app *app, char *mask_data, u32 mask_len)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_mask_id_table *mask_entry;
+ unsigned long hash_key;
+
+ hash_key = jhash(mask_data, mask_len, priv->mask_id_seed);
+
+ hash_for_each_possible(priv->mask_table, mask_entry, link, hash_key)
+ if (mask_entry->hash_key == hash_key)
+ return mask_entry;
+
+ return NULL;
+}
+
+static int
+nfp_find_in_mask_table(struct nfp_app *app, char *mask_data, u32 mask_len)
+{
+ struct nfp_mask_id_table *mask_entry;
+
+ mask_entry = nfp_search_mask_table(app, mask_data, mask_len);
+ if (!mask_entry)
+ return -ENOENT;
+
+ mask_entry->ref_cnt++;
+
+ /* Casting u8 to int for later use. */
+ return mask_entry->mask_id;
+}
+
+static bool
+nfp_check_mask_add(struct nfp_app *app, char *mask_data, u32 mask_len,
+ u8 *meta_flags, u8 *mask_id)
+{
+ int id;
+
+ id = nfp_find_in_mask_table(app, mask_data, mask_len);
+ if (id < 0) {
+ id = nfp_add_mask_table(app, mask_data, mask_len);
+ if (id < 0)
+ return false;
+ *meta_flags |= NFP_FL_META_FLAG_NEW_MASK;
+ }
+ *mask_id = id;
+
+ return true;
+}
+
+static bool
+nfp_check_mask_remove(struct nfp_app *app, char *mask_data, u32 mask_len,
+ u8 *meta_flags, u8 *mask_id)
+{
+ struct nfp_mask_id_table *mask_entry;
+
+ mask_entry = nfp_search_mask_table(app, mask_data, mask_len);
+ if (!mask_entry)
+ return false;
+
+ *mask_id = mask_entry->mask_id;
+ mask_entry->ref_cnt--;
+ if (!mask_entry->ref_cnt) {
+ hash_del(&mask_entry->link);
+ nfp_release_mask_id(app, *mask_id);
+ kfree(mask_entry);
+ if (meta_flags)
+ *meta_flags |= NFP_FL_META_FLAG_LAST_MASK;
+ }
+
+ return true;
+}
+
+int nfp_compile_flow_metadata(struct nfp_app *app,
+ struct tc_cls_flower_offload *flow,
+ struct nfp_fl_payload *nfp_flow)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_fl_payload *check_entry;
+ u8 new_mask_id;
+ u32 stats_cxt;
+
+ if (nfp_get_stats_entry(app, &stats_cxt))
+ return -ENOENT;
+
+ nfp_flow->meta.host_ctx_id = cpu_to_be32(stats_cxt);
+ nfp_flow->meta.host_cookie = cpu_to_be64(flow->cookie);
+
+ new_mask_id = 0;
+ if (!nfp_check_mask_add(app, nfp_flow->mask_data,
+ nfp_flow->meta.mask_len,
+ &nfp_flow->meta.flags, &new_mask_id)) {
+ if (nfp_release_stats_entry(app, stats_cxt))
+ return -EINVAL;
+ return -ENOENT;
+ }
+
+ nfp_flow->meta.flow_version = cpu_to_be64(priv->flower_version);
+ priv->flower_version++;
+
+ /* Update flow payload with mask ids. */
+ nfp_flow->unmasked_data[NFP_FL_MASK_ID_LOCATION] = new_mask_id;
+ nfp_flow->stats.pkts = 0;
+ nfp_flow->stats.bytes = 0;
+ nfp_flow->stats.used = jiffies;
+
+ check_entry = nfp_flower_search_fl_table(app, flow->cookie);
+ if (check_entry) {
+ if (nfp_release_stats_entry(app, stats_cxt))
+ return -EINVAL;
+
+ if (!nfp_check_mask_remove(app, nfp_flow->mask_data,
+ nfp_flow->meta.mask_len,
+ NULL, &new_mask_id))
+ return -EINVAL;
+
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+int nfp_modify_flow_metadata(struct nfp_app *app,
+ struct nfp_fl_payload *nfp_flow)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ u8 new_mask_id = 0;
+ u32 temp_ctx_id;
+
+ nfp_check_mask_remove(app, nfp_flow->mask_data,
+ nfp_flow->meta.mask_len, &nfp_flow->meta.flags,
+ &new_mask_id);
+
+ nfp_flow->meta.flow_version = cpu_to_be64(priv->flower_version);
+ priv->flower_version++;
+
+ /* Update flow payload with mask ids. */
+ nfp_flow->unmasked_data[NFP_FL_MASK_ID_LOCATION] = new_mask_id;
+
+ /* Release the stats ctx id. */
+ temp_ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id);
+
+ return nfp_release_stats_entry(app, temp_ctx_id);
+}
+
+int nfp_flower_metadata_init(struct nfp_app *app)
+{
+ struct nfp_flower_priv *priv = app->priv;
+
+ hash_init(priv->mask_table);
+ hash_init(priv->flow_table);
+ get_random_bytes(&priv->mask_id_seed, sizeof(priv->mask_id_seed));
+
+ /* Init ring buffer and unallocated mask_ids. */
+ priv->mask_ids.mask_id_free_list.buf =
+ kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS,
+ NFP_FLOWER_MASK_ELEMENT_RS, GFP_KERNEL);
+ if (!priv->mask_ids.mask_id_free_list.buf)
+ return -ENOMEM;
+
+ priv->mask_ids.init_unallocated = NFP_FLOWER_MASK_ENTRY_RS - 1;
+
+ /* Init timestamps for mask id*/
+ priv->mask_ids.last_used =
+ kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS,
+ sizeof(*priv->mask_ids.last_used), GFP_KERNEL);
+ if (!priv->mask_ids.last_used)
+ goto err_free_mask_id;
+
+ /* Init ring buffer and unallocated stats_ids. */
+ priv->stats_ids.free_list.buf =
+ vmalloc(NFP_FL_STATS_ENTRY_RS * NFP_FL_STATS_ELEM_RS);
+ if (!priv->stats_ids.free_list.buf)
+ goto err_free_last_used;
+
+ priv->stats_ids.init_unalloc = NFP_FL_REPEATED_HASH_MAX;
+
+ return 0;
+
+err_free_last_used:
+ kfree(priv->stats_ids.free_list.buf);
+err_free_mask_id:
+ kfree(priv->mask_ids.mask_id_free_list.buf);
+ return -ENOMEM;
+}
+
+void nfp_flower_metadata_cleanup(struct nfp_app *app)
+{
+ struct nfp_flower_priv *priv = app->priv;
+
+ if (!priv)
+ return;
+
+ kfree(priv->mask_ids.mask_id_free_list.buf);
+ kfree(priv->mask_ids.last_used);
+ vfree(priv->stats_ids.free_list.buf);
+}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
new file mode 100644
index 000000000000..4ad10bd5e139
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -0,0 +1,400 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/skbuff.h>
+#include <net/devlink.h>
+#include <net/pkt_cls.h>
+
+#include "cmsg.h"
+#include "main.h"
+#include "../nfpcore/nfp_cpp.h"
+#include "../nfpcore/nfp_nsp.h"
+#include "../nfp_app.h"
+#include "../nfp_main.h"
+#include "../nfp_net.h"
+#include "../nfp_port.h"
+
+static int
+nfp_flower_xmit_flow(struct net_device *netdev,
+ struct nfp_fl_payload *nfp_flow, u8 mtype)
+{
+ u32 meta_len, key_len, mask_len, act_len, tot_len;
+ struct nfp_repr *priv = netdev_priv(netdev);
+ struct sk_buff *skb;
+ unsigned char *msg;
+
+ meta_len = sizeof(struct nfp_fl_rule_metadata);
+ key_len = nfp_flow->meta.key_len;
+ mask_len = nfp_flow->meta.mask_len;
+ act_len = nfp_flow->meta.act_len;
+
+ tot_len = meta_len + key_len + mask_len + act_len;
+
+ /* Convert to long words as firmware expects
+ * lengths in units of NFP_FL_LW_SIZ.
+ */
+ nfp_flow->meta.key_len >>= NFP_FL_LW_SIZ;
+ nfp_flow->meta.mask_len >>= NFP_FL_LW_SIZ;
+ nfp_flow->meta.act_len >>= NFP_FL_LW_SIZ;
+
+ skb = nfp_flower_cmsg_alloc(priv->app, tot_len, mtype);
+ if (!skb)
+ return -ENOMEM;
+
+ msg = nfp_flower_cmsg_get_data(skb);
+ memcpy(msg, &nfp_flow->meta, meta_len);
+ memcpy(&msg[meta_len], nfp_flow->unmasked_data, key_len);
+ memcpy(&msg[meta_len + key_len], nfp_flow->mask_data, mask_len);
+ memcpy(&msg[meta_len + key_len + mask_len],
+ nfp_flow->action_data, act_len);
+
+ /* Convert back to bytes as software expects
+ * lengths in units of bytes.
+ */
+ nfp_flow->meta.key_len <<= NFP_FL_LW_SIZ;
+ nfp_flow->meta.mask_len <<= NFP_FL_LW_SIZ;
+ nfp_flow->meta.act_len <<= NFP_FL_LW_SIZ;
+
+ nfp_ctrl_tx(priv->app->ctrl, skb);
+
+ return 0;
+}
+
+static bool nfp_flower_check_higher_than_mac(struct tc_cls_flower_offload *f)
+{
+ return dissector_uses_key(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS) ||
+ dissector_uses_key(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS) ||
+ dissector_uses_key(f->dissector,
+ FLOW_DISSECTOR_KEY_PORTS) ||
+ dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ICMP);
+}
+
+static int
+nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls,
+ struct tc_cls_flower_offload *flow)
+{
+ struct flow_dissector_key_control *mask_enc_ctl;
+ struct flow_dissector_key_basic *mask_basic;
+ struct flow_dissector_key_basic *key_basic;
+ u32 key_layer_two;
+ u8 key_layer;
+ int key_size;
+
+ mask_enc_ctl = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_ENC_CONTROL,
+ flow->mask);
+
+ mask_basic = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ flow->mask);
+
+ key_basic = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ flow->key);
+ key_layer_two = 0;
+ key_layer = NFP_FLOWER_LAYER_PORT | NFP_FLOWER_LAYER_MAC;
+ key_size = sizeof(struct nfp_flower_meta_one) +
+ sizeof(struct nfp_flower_in_port) +
+ sizeof(struct nfp_flower_mac_mpls);
+
+ /* We are expecting a tunnel. For now we ignore offloading. */
+ if (mask_enc_ctl->addr_type)
+ return -EOPNOTSUPP;
+
+ if (mask_basic->n_proto) {
+ /* Ethernet type is present in the key. */
+ switch (key_basic->n_proto) {
+ case cpu_to_be16(ETH_P_IP):
+ key_layer |= NFP_FLOWER_LAYER_IPV4;
+ key_size += sizeof(struct nfp_flower_ipv4);
+ break;
+
+ case cpu_to_be16(ETH_P_IPV6):
+ key_layer |= NFP_FLOWER_LAYER_IPV6;
+ key_size += sizeof(struct nfp_flower_ipv6);
+ break;
+
+ /* Currently we do not offload ARP
+ * because we rely on it to get to the host.
+ */
+ case cpu_to_be16(ETH_P_ARP):
+ return -EOPNOTSUPP;
+
+ /* Will be included in layer 2. */
+ case cpu_to_be16(ETH_P_8021Q):
+ break;
+
+ default:
+ /* Other ethtype - we need check the masks for the
+ * remainder of the key to ensure we can offload.
+ */
+ if (nfp_flower_check_higher_than_mac(flow))
+ return -EOPNOTSUPP;
+ break;
+ }
+ }
+
+ if (mask_basic->ip_proto) {
+ /* Ethernet type is present in the key. */
+ switch (key_basic->ip_proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_SCTP:
+ case IPPROTO_ICMP:
+ case IPPROTO_ICMPV6:
+ key_layer |= NFP_FLOWER_LAYER_TP;
+ key_size += sizeof(struct nfp_flower_tp_ports);
+ break;
+ default:
+ /* Other ip proto - we need check the masks for the
+ * remainder of the key to ensure we can offload.
+ */
+ return -EOPNOTSUPP;
+ }
+ }
+
+ ret_key_ls->key_layer = key_layer;
+ ret_key_ls->key_layer_two = key_layer_two;
+ ret_key_ls->key_size = key_size;
+
+ return 0;
+}
+
+static struct nfp_fl_payload *
+nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer)
+{
+ struct nfp_fl_payload *flow_pay;
+
+ flow_pay = kmalloc(sizeof(*flow_pay), GFP_KERNEL);
+ if (!flow_pay)
+ return NULL;
+
+ flow_pay->meta.key_len = key_layer->key_size;
+ flow_pay->unmasked_data = kmalloc(key_layer->key_size, GFP_KERNEL);
+ if (!flow_pay->unmasked_data)
+ goto err_free_flow;
+
+ flow_pay->meta.mask_len = key_layer->key_size;
+ flow_pay->mask_data = kmalloc(key_layer->key_size, GFP_KERNEL);
+ if (!flow_pay->mask_data)
+ goto err_free_unmasked;
+
+ flow_pay->action_data = kmalloc(NFP_FL_MAX_A_SIZ, GFP_KERNEL);
+ if (!flow_pay->action_data)
+ goto err_free_mask;
+
+ flow_pay->meta.flags = 0;
+ spin_lock_init(&flow_pay->lock);
+
+ return flow_pay;
+
+err_free_mask:
+ kfree(flow_pay->mask_data);
+err_free_unmasked:
+ kfree(flow_pay->unmasked_data);
+err_free_flow:
+ kfree(flow_pay);
+ return NULL;
+}
+
+/**
+ * nfp_flower_add_offload() - Adds a new flow to hardware.
+ * @app: Pointer to the APP handle
+ * @netdev: netdev structure.
+ * @flow: TC flower classifier offload structure.
+ *
+ * Adds a new flow to the repeated hash structure and action payload.
+ *
+ * Return: negative value on error, 0 if configured successfully.
+ */
+static int
+nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
+ struct tc_cls_flower_offload *flow)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_fl_payload *flow_pay;
+ struct nfp_fl_key_ls *key_layer;
+ int err;
+
+ key_layer = kmalloc(sizeof(*key_layer), GFP_KERNEL);
+ if (!key_layer)
+ return -ENOMEM;
+
+ err = nfp_flower_calculate_key_layers(key_layer, flow);
+ if (err)
+ goto err_free_key_ls;
+
+ flow_pay = nfp_flower_allocate_new(key_layer);
+ if (!flow_pay) {
+ err = -ENOMEM;
+ goto err_free_key_ls;
+ }
+
+ err = nfp_flower_compile_flow_match(flow, key_layer, netdev, flow_pay);
+ if (err)
+ goto err_destroy_flow;
+
+ err = nfp_flower_compile_action(flow, netdev, flow_pay);
+ if (err)
+ goto err_destroy_flow;
+
+ err = nfp_compile_flow_metadata(app, flow, flow_pay);
+ if (err)
+ goto err_destroy_flow;
+
+ err = nfp_flower_xmit_flow(netdev, flow_pay,
+ NFP_FLOWER_CMSG_TYPE_FLOW_ADD);
+ if (err)
+ goto err_destroy_flow;
+
+ INIT_HLIST_NODE(&flow_pay->link);
+ flow_pay->tc_flower_cookie = flow->cookie;
+ hash_add_rcu(priv->flow_table, &flow_pay->link, flow->cookie);
+
+ /* Deallocate flow payload when flower rule has been destroyed. */
+ kfree(key_layer);
+
+ return 0;
+
+err_destroy_flow:
+ kfree(flow_pay->action_data);
+ kfree(flow_pay->mask_data);
+ kfree(flow_pay->unmasked_data);
+ kfree(flow_pay);
+err_free_key_ls:
+ kfree(key_layer);
+ return err;
+}
+
+/**
+ * nfp_flower_del_offload() - Removes a flow from hardware.
+ * @app: Pointer to the APP handle
+ * @netdev: netdev structure.
+ * @flow: TC flower classifier offload structure
+ *
+ * Removes a flow from the repeated hash structure and clears the
+ * action payload.
+ *
+ * Return: negative value on error, 0 if removed successfully.
+ */
+static int
+nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
+ struct tc_cls_flower_offload *flow)
+{
+ struct nfp_fl_payload *nfp_flow;
+ int err;
+
+ nfp_flow = nfp_flower_search_fl_table(app, flow->cookie);
+ if (!nfp_flow)
+ return -ENOENT;
+
+ err = nfp_modify_flow_metadata(app, nfp_flow);
+ if (err)
+ goto err_free_flow;
+
+ err = nfp_flower_xmit_flow(netdev, nfp_flow,
+ NFP_FLOWER_CMSG_TYPE_FLOW_DEL);
+ if (err)
+ goto err_free_flow;
+
+err_free_flow:
+ hash_del_rcu(&nfp_flow->link);
+ kfree(nfp_flow->action_data);
+ kfree(nfp_flow->mask_data);
+ kfree(nfp_flow->unmasked_data);
+ kfree_rcu(nfp_flow, rcu);
+ return err;
+}
+
+/**
+ * nfp_flower_get_stats() - Populates flow stats obtained from hardware.
+ * @app: Pointer to the APP handle
+ * @flow: TC flower classifier offload structure
+ *
+ * Populates a flow statistics structure which which corresponds to a
+ * specific flow.
+ *
+ * Return: negative value on error, 0 if stats populated successfully.
+ */
+static int
+nfp_flower_get_stats(struct nfp_app *app, struct tc_cls_flower_offload *flow)
+{
+ struct nfp_fl_payload *nfp_flow;
+
+ nfp_flow = nfp_flower_search_fl_table(app, flow->cookie);
+ if (!nfp_flow)
+ return -EINVAL;
+
+ spin_lock_bh(&nfp_flow->lock);
+ tcf_exts_stats_update(flow->exts, nfp_flow->stats.bytes,
+ nfp_flow->stats.pkts, nfp_flow->stats.used);
+
+ nfp_flow->stats.pkts = 0;
+ nfp_flow->stats.bytes = 0;
+ spin_unlock_bh(&nfp_flow->lock);
+
+ return 0;
+}
+
+static int
+nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev,
+ struct tc_cls_flower_offload *flower)
+{
+ switch (flower->command) {
+ case TC_CLSFLOWER_REPLACE:
+ return nfp_flower_add_offload(app, netdev, flower);
+ case TC_CLSFLOWER_DESTROY:
+ return nfp_flower_del_offload(app, netdev, flower);
+ case TC_CLSFLOWER_STATS:
+ return nfp_flower_get_stats(app, flower);
+ }
+
+ return -EOPNOTSUPP;
+}
+
+int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
+ u32 handle, __be16 proto, struct tc_to_netdev *tc)
+{
+ if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+ return -EOPNOTSUPP;
+
+ if (!eth_proto_is_802_3(proto))
+ return -EOPNOTSUPP;
+
+ if (tc->type != TC_SETUP_CLSFLOWER)
+ return -EINVAL;
+
+ return nfp_flower_repr_offload(app, netdev, tc->cls_flower);
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c
index 5620de05c996..c704c022574f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c
@@ -43,7 +43,9 @@
static const struct nfp_app_type *apps[] = {
&app_nic,
&app_bpf,
+#ifdef CONFIG_NFP_APP_FLOWER
&app_flower,
+#endif
};
const char *nfp_app_mip_name(struct nfp_app *app)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h
index ae2d02753d1a..5d714e10d9a9 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h
@@ -47,6 +47,7 @@ struct sk_buff;
struct nfp_app;
struct nfp_cpp;
struct nfp_pf;
+struct nfp_repr;
struct nfp_net;
enum nfp_app_id {
@@ -66,10 +67,13 @@ extern const struct nfp_app_type app_flower;
* @ctrl_has_meta: control messages have prepend of type:5/port:CTRL
*
* Callbacks
- * @init: perform basic app checks
+ * @init: perform basic app checks and init
+ * @clean: clean app state
* @extra_cap: extra capabilities string
* @vnic_init: init vNICs (assign port types, etc.)
* @vnic_clean: clean up app's vNIC state
+ * @repr_open: representor netdev open callback
+ * @repr_stop: representor netdev stop callback
* @start: start application logic
* @stop: stop application logic
* @ctrl_msg_rx: control message handler
@@ -88,6 +92,7 @@ struct nfp_app_type {
bool ctrl_has_meta;
int (*init)(struct nfp_app *app);
+ void (*clean)(struct nfp_app *app);
const char *(*extra_cap)(struct nfp_app *app, struct nfp_net *nn);
@@ -95,6 +100,9 @@ struct nfp_app_type {
unsigned int id);
void (*vnic_clean)(struct nfp_app *app, struct nfp_net *nn);
+ int (*repr_open)(struct nfp_app *app, struct nfp_repr *repr);
+ int (*repr_stop)(struct nfp_app *app, struct nfp_repr *repr);
+
int (*start)(struct nfp_app *app);
void (*stop)(struct nfp_app *app);
@@ -144,6 +152,12 @@ static inline int nfp_app_init(struct nfp_app *app)
return app->type->init(app);
}
+static inline void nfp_app_clean(struct nfp_app *app)
+{
+ if (app->type->clean)
+ app->type->clean(app);
+}
+
static inline int nfp_app_vnic_init(struct nfp_app *app, struct nfp_net *nn,
unsigned int id)
{
@@ -156,6 +170,20 @@ static inline void nfp_app_vnic_clean(struct nfp_app *app, struct nfp_net *nn)
app->type->vnic_clean(app, nn);
}
+static inline int nfp_app_repr_open(struct nfp_app *app, struct nfp_repr *repr)
+{
+ if (!app->type->repr_open)
+ return -EINVAL;
+ return app->type->repr_open(app, repr);
+}
+
+static inline int nfp_app_repr_stop(struct nfp_app *app, struct nfp_repr *repr)
+{
+ if (!app->type->repr_stop)
+ return -EINVAL;
+ return app->type->repr_stop(app, repr);
+}
+
static inline int nfp_app_start(struct nfp_app *app, struct nfp_net *ctrl)
{
app->ctrl = ctrl;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index 748e54cc885e..d67969d3e484 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -107,17 +107,18 @@ static int nfp_pcie_sriov_enable(struct pci_dev *pdev, int num_vfs)
goto err_unlock;
}
- err = nfp_app_sriov_enable(pf->app, num_vfs);
+ err = pci_enable_sriov(pdev, num_vfs);
if (err) {
- dev_warn(&pdev->dev, "App specific PCI sriov configuration failed: %d\n",
- err);
+ dev_warn(&pdev->dev, "Failed to enable PCI SR-IOV: %d\n", err);
goto err_unlock;
}
- err = pci_enable_sriov(pdev, num_vfs);
+ err = nfp_app_sriov_enable(pf->app, num_vfs);
if (err) {
- dev_warn(&pdev->dev, "Failed to enable PCI sriov: %d\n", err);
- goto err_app_sriov_disable;
+ dev_warn(&pdev->dev,
+ "App specific PCI SR-IOV configuration failed: %d\n",
+ err);
+ goto err_sriov_disable;
}
pf->num_vfs = num_vfs;
@@ -127,8 +128,8 @@ static int nfp_pcie_sriov_enable(struct pci_dev *pdev, int num_vfs)
mutex_unlock(&pf->lock);
return num_vfs;
-err_app_sriov_disable:
- nfp_app_sriov_disable(pf->app);
+err_sriov_disable:
+ pci_disable_sriov(pdev);
err_unlock:
mutex_unlock(&pf->lock);
return err;
@@ -136,17 +137,20 @@ err_unlock:
return 0;
}
-static int __nfp_pcie_sriov_disable(struct pci_dev *pdev)
+static int nfp_pcie_sriov_disable(struct pci_dev *pdev)
{
#ifdef CONFIG_PCI_IOV
struct nfp_pf *pf = pci_get_drvdata(pdev);
+ mutex_lock(&pf->lock);
+
/* If the VFs are assigned we cannot shut down SR-IOV without
* causing issues, so just leave the hardware available but
* disabled
*/
if (pci_vfs_assigned(pdev)) {
dev_warn(&pdev->dev, "Disabling while VFs assigned - VFs will not be deallocated\n");
+ mutex_unlock(&pf->lock);
return -EPERM;
}
@@ -156,20 +160,10 @@ static int __nfp_pcie_sriov_disable(struct pci_dev *pdev)
pci_disable_sriov(pdev);
dev_dbg(&pdev->dev, "Removed VFs.\n");
-#endif
- return 0;
-}
-
-static int nfp_pcie_sriov_disable(struct pci_dev *pdev)
-{
- struct nfp_pf *pf = pci_get_drvdata(pdev);
- int err;
- mutex_lock(&pf->lock);
- err = __nfp_pcie_sriov_disable(pdev);
mutex_unlock(&pf->lock);
-
- return err;
+#endif
+ return 0;
}
static int nfp_pcie_sriov_configure(struct pci_dev *pdev, int num_vfs)
@@ -382,6 +376,12 @@ static int nfp_pci_probe(struct pci_dev *pdev,
pci_set_drvdata(pdev, pf);
pf->pdev = pdev;
+ pf->wq = alloc_workqueue("nfp-%s", 0, 2, pci_name(pdev));
+ if (!pf->wq) {
+ err = -ENOMEM;
+ goto err_pci_priv_unset;
+ }
+
pf->cpp = nfp_cpp_from_nfp6000_pcie(pdev);
if (IS_ERR_OR_NULL(pf->cpp)) {
err = PTR_ERR(pf->cpp);
@@ -414,6 +414,14 @@ static int nfp_pci_probe(struct pci_dev *pdev,
if (err)
goto err_fw_unload;
+ pf->num_vfs = pci_num_vf(pdev);
+ if (pf->num_vfs > pf->limit_vfs) {
+ dev_err(&pdev->dev,
+ "Error: %d VFs already enabled, but loaded FW can only support %d\n",
+ pf->num_vfs, pf->limit_vfs);
+ goto err_fw_unload;
+ }
+
err = nfp_net_pci_probe(pf);
if (err)
goto err_sriov_unlimit;
@@ -443,6 +451,8 @@ err_hwinfo_free:
kfree(pf->hwinfo);
nfp_cpp_free(pf->cpp);
err_disable_msix:
+ destroy_workqueue(pf->wq);
+err_pci_priv_unset:
pci_set_drvdata(pdev, NULL);
mutex_destroy(&pf->lock);
devlink_free(devlink);
@@ -463,11 +473,11 @@ static void nfp_pci_remove(struct pci_dev *pdev)
devlink = priv_to_devlink(pf);
+ nfp_net_pci_remove(pf);
+
nfp_pcie_sriov_disable(pdev);
pci_sriov_set_totalvfs(pf->pdev, 0);
- nfp_net_pci_remove(pf);
-
devlink_unregister(devlink);
kfree(pf->rtbl);
@@ -475,6 +485,7 @@ static void nfp_pci_remove(struct pci_dev *pdev)
if (pf->fw_loaded)
nfp_fw_unload(pf);
+ destroy_workqueue(pf->wq);
pci_set_drvdata(pdev, NULL);
kfree(pf->hwinfo);
nfp_cpp_free(pf->cpp);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h
index edc14dc78674..a08cfba7e68e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h
@@ -89,6 +89,7 @@ struct nfp_rtsym_table;
* @num_vnics: Number of vNICs spawned
* @vnics: Linked list of vNIC structures (struct nfp_net)
* @ports: Linked list of port structures (struct nfp_port)
+ * @wq: Workqueue for running works which need to grab @lock
* @port_refresh_work: Work entry for taking netdevs out
* @lock: Protects all fields which may change after probe
*/
@@ -131,7 +132,10 @@ struct nfp_pf {
struct list_head vnics;
struct list_head ports;
+
+ struct workqueue_struct *wq;
struct work_struct port_refresh_work;
+
struct mutex lock;
};
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 2e728543e840..30f82b41d400 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -64,6 +64,7 @@
#include <linux/vmalloc.h>
#include <linux/ktime.h>
+#include <net/switchdev.h>
#include <net/vxlan.h>
#include "nfpcore/nfp_nsp.h"
@@ -3096,18 +3097,6 @@ static void nfp_net_stat64(struct net_device *netdev,
}
}
-static int
-nfp_net_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index,
- __be16 proto, struct tc_to_netdev *tc)
-{
- struct nfp_net *nn = netdev_priv(netdev);
-
- if (chain_index)
- return -EOPNOTSUPP;
-
- return nfp_app_setup_tc(nn->app, netdev, handle, proto, tc);
-}
-
static int nfp_net_set_features(struct net_device *netdev,
netdev_features_t features)
{
@@ -3423,7 +3412,7 @@ const struct net_device_ops nfp_net_netdev_ops = {
.ndo_get_stats64 = nfp_net_stat64,
.ndo_vlan_rx_add_vid = nfp_net_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = nfp_net_vlan_rx_kill_vid,
- .ndo_setup_tc = nfp_net_setup_tc,
+ .ndo_setup_tc = nfp_port_setup_tc,
.ndo_tx_timeout = nfp_net_tx_timeout,
.ndo_set_rx_mode = nfp_net_set_rx_mode,
.ndo_change_mtu = nfp_net_change_mtu,
@@ -3703,6 +3692,8 @@ static void nfp_net_netdev_init(struct nfp_net *nn)
netdev->netdev_ops = &nfp_net_netdev_ops;
netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000);
+ SWITCHDEV_SET_OPS(netdev, &nfp_port_switchdev_ops);
+
/* MTU range: 68 - hw-specific max */
netdev->min_mtu = ETH_MIN_MTU;
netdev->max_mtu = nn->max_mtu;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index cfcbc3b9a9aa..c85a2f18c4df 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -81,58 +81,6 @@ static int nfp_is_ready(struct nfp_pf *pf)
}
/**
- * nfp_net_map_area() - Help function to map an area
- * @cpp: NFP CPP handler
- * @name: Name for the area
- * @target: CPP target
- * @addr: CPP address
- * @size: Size of the area
- * @area: Area handle (returned).
- *
- * This function is primarily to simplify the code in the main probe
- * function. To undo the effect of this functions call
- * @nfp_cpp_area_release_free(*area);
- *
- * Return: Pointer to memory mapped area or ERR_PTR
- */
-static u8 __iomem *nfp_net_map_area(struct nfp_cpp *cpp,
- const char *name, int isl, int target,
- unsigned long long addr, unsigned long size,
- struct nfp_cpp_area **area)
-{
- u8 __iomem *res;
- u32 dest;
- int err;
-
- dest = NFP_CPP_ISLAND_ID(target, NFP_CPP_ACTION_RW, 0, isl);
-
- *area = nfp_cpp_area_alloc_with_name(cpp, dest, name, addr, size);
- if (!*area) {
- err = -EIO;
- goto err_area;
- }
-
- err = nfp_cpp_area_acquire(*area);
- if (err < 0)
- goto err_acquire;
-
- res = nfp_cpp_area_iomem(*area);
- if (!res) {
- err = -EIO;
- goto err_map;
- }
-
- return res;
-
-err_map:
- nfp_cpp_area_release(*area);
-err_acquire:
- nfp_cpp_area_free(*area);
-err_area:
- return (u8 __iomem *)ERR_PTR(err);
-}
-
-/**
* nfp_net_get_mac_addr() - Get the MAC address.
* @pf: NFP PF handle
* @port: NFP port structure
@@ -226,31 +174,12 @@ static u8 __iomem *
nfp_net_pf_map_rtsym(struct nfp_pf *pf, const char *name, const char *sym_fmt,
unsigned int min_size, struct nfp_cpp_area **area)
{
- const struct nfp_rtsym *sym;
char pf_symbol[256];
- u8 __iomem *mem;
snprintf(pf_symbol, sizeof(pf_symbol), sym_fmt,
nfp_cppcore_pcie_unit(pf->cpp));
- sym = nfp_rtsym_lookup(pf->rtbl, pf_symbol);
- if (!sym)
- return (u8 __iomem *)ERR_PTR(-ENOENT);
-
- if (sym->size < min_size) {
- nfp_err(pf->cpp, "PF symbol %s too small\n", pf_symbol);
- return (u8 __iomem *)ERR_PTR(-EINVAL);
- }
-
- mem = nfp_net_map_area(pf->cpp, name, sym->domain, sym->target,
- sym->addr, sym->size, area);
- if (IS_ERR(mem)) {
- nfp_err(pf->cpp, "Failed to map PF symbol %s: %ld\n",
- pf_symbol, PTR_ERR(mem));
- return mem;
- }
-
- return mem;
+ return nfp_rtsym_map(pf->rtbl, pf_symbol, name, min_size, area);
}
static void nfp_net_pf_free_vnic(struct nfp_pf *pf, struct nfp_net *nn)
@@ -485,7 +414,7 @@ nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride)
if (IS_ERR(ctrl_bar)) {
nfp_err(pf->cpp, "Failed to find data vNIC memory symbol\n");
err = PTR_ERR(ctrl_bar);
- goto err_free;
+ goto err_app_clean;
}
pf->ctrl_vnic = nfp_net_pf_alloc_vnic(pf, false, ctrl_bar, qc_bar,
@@ -499,8 +428,11 @@ nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride)
err_unmap:
nfp_cpp_area_release_free(pf->ctrl_vnic_bar);
+err_app_clean:
+ nfp_app_clean(pf->app);
err_free:
nfp_app_free(pf->app);
+ pf->app = NULL;
return err;
}
@@ -510,6 +442,7 @@ static void nfp_net_pf_app_clean(struct nfp_pf *pf)
nfp_net_pf_free_vnic(pf, pf->ctrl_vnic);
nfp_cpp_area_release_free(pf->ctrl_vnic_bar);
}
+ nfp_app_clean(pf->app);
nfp_app_free(pf->app);
pf->app = NULL;
}
@@ -555,8 +488,16 @@ static int nfp_net_pf_app_start(struct nfp_pf *pf)
if (err)
goto err_ctrl_stop;
+ if (pf->num_vfs) {
+ err = nfp_app_sriov_enable(pf->app, pf->num_vfs);
+ if (err)
+ goto err_app_stop;
+ }
+
return 0;
+err_app_stop:
+ nfp_app_stop(pf->app);
err_ctrl_stop:
nfp_net_pf_app_stop_ctrl(pf);
return err;
@@ -564,6 +505,8 @@ err_ctrl_stop:
static void nfp_net_pf_app_stop(struct nfp_pf *pf)
{
+ if (pf->num_vfs)
+ nfp_app_sriov_disable(pf->app);
nfp_app_stop(pf->app);
nfp_net_pf_app_stop_ctrl(pf);
}
@@ -580,26 +523,22 @@ static void nfp_net_pci_unmap_mem(struct nfp_pf *pf)
static int nfp_net_pci_map_mem(struct nfp_pf *pf)
{
- u32 ctrl_bar_sz;
u8 __iomem *mem;
+ u32 min_size;
int err;
- ctrl_bar_sz = pf->max_data_vnics * NFP_PF_CSR_SLICE_SIZE;
+ min_size = pf->max_data_vnics * NFP_PF_CSR_SLICE_SIZE;
mem = nfp_net_pf_map_rtsym(pf, "net.ctrl", "_pf%d_net_bar0",
- ctrl_bar_sz, &pf->data_vnic_bar);
+ min_size, &pf->data_vnic_bar);
if (IS_ERR(mem)) {
nfp_err(pf->cpp, "Failed to find data vNIC memory symbol\n");
- err = PTR_ERR(mem);
- if (!pf->fw_loaded && err == -ENOENT)
- err = -EPROBE_DEFER;
- return err;
+ return PTR_ERR(mem);
}
- pf->mac_stats_mem = nfp_net_pf_map_rtsym(pf, "net.macstats",
- "_mac_stats",
- NFP_MAC_STATS_SIZE *
- (pf->eth_tbl->max_index + 1),
- &pf->mac_stats_bar);
+ min_size = NFP_MAC_STATS_SIZE * (pf->eth_tbl->max_index + 1);
+ pf->mac_stats_mem = nfp_rtsym_map(pf->rtbl, "_mac_stats",
+ "net.macstats", min_size,
+ &pf->mac_stats_bar);
if (IS_ERR(pf->mac_stats_mem)) {
if (PTR_ERR(pf->mac_stats_mem) != -ENOENT) {
err = PTR_ERR(pf->mac_stats_mem);
@@ -620,7 +559,7 @@ static int nfp_net_pci_map_mem(struct nfp_pf *pf)
pf->vf_cfg_mem = NULL;
}
- mem = nfp_net_map_area(pf->cpp, "net.qc", 0, 0,
+ mem = nfp_cpp_map_area(pf->cpp, "net.qc", 0, 0,
NFP_PCIE_QUEUE(0), NFP_QCP_QUEUE_AREA_SZ,
&pf->qc_area);
if (IS_ERR(mem)) {
@@ -743,7 +682,7 @@ void nfp_net_refresh_port_table(struct nfp_port *port)
set_bit(NFP_PORT_CHANGED, &port->flags);
- schedule_work(&pf->port_refresh_work);
+ queue_work(pf->wq, &pf->port_refresh_work);
}
int nfp_net_refresh_eth_port(struct nfp_port *port)
@@ -786,6 +725,12 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
return -EINVAL;
}
+ if (!pf->rtbl) {
+ nfp_err(pf->cpp, "No %s, giving up.\n",
+ pf->fw_loaded ? "symbol table" : "firmware found");
+ return -EPROBE_DEFER;
+ }
+
mutex_lock(&pf->lock);
pf->max_data_vnics = nfp_net_pf_get_num_ports(pf);
if ((int)pf->max_data_vnics < 0) {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
index 44adcc5df11e..8ec5474f4b18 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
@@ -35,8 +35,10 @@
#include <linux/io-64-nonatomic-hi-lo.h>
#include <linux/lockdep.h>
#include <net/dst_metadata.h>
+#include <net/switchdev.h>
#include "nfpcore/nfp_cpp.h"
+#include "nfpcore/nfp_nsp.h"
#include "nfp_app.h"
#include "nfp_main.h"
#include "nfp_net_ctrl.h"
@@ -135,25 +137,34 @@ nfp_repr_pf_get_stats64(const struct nfp_app *app, u8 pf,
stats->rx_dropped = readq(mem + NFP_NET_CFG_STATS_TX_DISCARDS);
}
-void
-nfp_repr_get_stats64(const struct nfp_app *app, enum nfp_repr_type type,
- u8 port, struct rtnl_link_stats64 *stats)
+static void
+nfp_repr_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
{
- switch (type) {
- case NFP_REPR_TYPE_PHYS_PORT:
- nfp_repr_phy_port_get_stats64(app, port, stats);
+ struct nfp_repr *repr = netdev_priv(netdev);
+ struct nfp_eth_table_port *eth_port;
+ struct nfp_app *app = repr->app;
+
+ if (WARN_ON(!repr->port))
+ return;
+
+ switch (repr->port->type) {
+ case NFP_PORT_PHYS_PORT:
+ eth_port = __nfp_port_get_eth_port(repr->port);
+ if (!eth_port)
+ break;
+ nfp_repr_phy_port_get_stats64(app, eth_port->index, stats);
break;
- case NFP_REPR_TYPE_PF:
- nfp_repr_pf_get_stats64(app, port, stats);
+ case NFP_PORT_PF_PORT:
+ nfp_repr_pf_get_stats64(app, repr->port->pf_id, stats);
break;
- case NFP_REPR_TYPE_VF:
- nfp_repr_vf_get_stats64(app, port, stats);
+ case NFP_PORT_VF_PORT:
+ nfp_repr_vf_get_stats64(app, repr->port->vf_id, stats);
default:
break;
}
}
-bool
+static bool
nfp_repr_has_offload_stats(const struct net_device *dev, int attr_id)
{
switch (attr_id) {
@@ -196,8 +207,9 @@ nfp_repr_get_host_stats64(const struct net_device *netdev,
return 0;
}
-int nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev,
- void *stats)
+static int
+nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev,
+ void *stats)
{
switch (attr_id) {
case IFLA_OFFLOAD_XSTATS_CPU_HIT:
@@ -207,7 +219,7 @@ int nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev,
return -EINVAL;
}
-netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev)
{
struct nfp_repr *repr = netdev_priv(netdev);
unsigned int len = skb->len;
@@ -224,6 +236,31 @@ netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev)
return ret;
}
+static int nfp_repr_stop(struct net_device *netdev)
+{
+ struct nfp_repr *repr = netdev_priv(netdev);
+
+ return nfp_app_repr_stop(repr->app, repr);
+}
+
+static int nfp_repr_open(struct net_device *netdev)
+{
+ struct nfp_repr *repr = netdev_priv(netdev);
+
+ return nfp_app_repr_open(repr->app, repr);
+}
+
+const struct net_device_ops nfp_repr_netdev_ops = {
+ .ndo_open = nfp_repr_open,
+ .ndo_stop = nfp_repr_stop,
+ .ndo_start_xmit = nfp_repr_xmit,
+ .ndo_get_stats64 = nfp_repr_get_stats64,
+ .ndo_has_offload_stats = nfp_repr_has_offload_stats,
+ .ndo_get_offload_stats = nfp_repr_get_offload_stats,
+ .ndo_get_phys_port_name = nfp_port_get_phys_port_name,
+ .ndo_setup_tc = nfp_port_setup_tc,
+};
+
static void nfp_repr_clean(struct nfp_repr *repr)
{
unregister_netdev(repr->netdev);
@@ -248,8 +285,8 @@ static void nfp_repr_set_lockdep_class(struct net_device *dev)
}
int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
- const struct net_device_ops *netdev_ops, u32 cmsg_port_id,
- struct nfp_port *port, struct net_device *pf_netdev)
+ u32 cmsg_port_id, struct nfp_port *port,
+ struct net_device *pf_netdev)
{
struct nfp_repr *repr = netdev_priv(netdev);
int err;
@@ -263,7 +300,13 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
repr->dst->u.port_info.port_id = cmsg_port_id;
repr->dst->u.port_info.lower_dev = pf_netdev;
- netdev->netdev_ops = netdev_ops;
+ netdev->netdev_ops = &nfp_repr_netdev_ops;
+ SWITCHDEV_SET_OPS(netdev, &nfp_port_switchdev_ops);
+
+ if (nfp_app_has_tc(app)) {
+ netdev->features |= NETIF_F_HW_TC;
+ netdev->hw_features |= NETIF_F_HW_TC;
+ }
err = register_netdev(netdev);
if (err)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
index c5ed6611f708..32179cad062a 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
@@ -38,6 +38,8 @@ struct metadata_dst;
struct nfp_net;
struct nfp_port;
+#include <net/dst_metadata.h>
+
/**
* struct nfp_reprs - container for representor netdevs
* @num_reprs: Number of elements in reprs array
@@ -97,16 +99,22 @@ enum nfp_repr_type {
};
#define NFP_REPR_TYPE_MAX (__NFP_REPR_TYPE_MAX - 1)
+extern const struct net_device_ops nfp_repr_netdev_ops;
+
+static inline bool nfp_netdev_is_nfp_repr(struct net_device *netdev)
+{
+ return netdev->netdev_ops == &nfp_repr_netdev_ops;
+}
+
+static inline int nfp_repr_get_port_id(struct net_device *netdev)
+{
+ struct nfp_repr *priv = netdev_priv(netdev);
+
+ return priv->dst->u.port_info.port_id;
+}
+
void nfp_repr_inc_rx_stats(struct net_device *netdev, unsigned int len);
-void
-nfp_repr_get_stats64(const struct nfp_app *app, enum nfp_repr_type type,
- u8 port, struct rtnl_link_stats64 *stats);
-bool nfp_repr_has_offload_stats(const struct net_device *dev, int attr_id);
-int nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev,
- void *stats);
-netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev);
int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
- const struct net_device_ops *netdev_ops,
u32 cmsg_port_id, struct nfp_port *port,
struct net_device *pf_netdev);
struct net_device *nfp_repr_alloc(struct nfp_app *app);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.c b/drivers/net/ethernet/netronome/nfp/nfp_port.c
index 19bceeb82225..776e54dd5dd0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.c
@@ -32,6 +32,7 @@
*/
#include <linux/lockdep.h>
+#include <net/switchdev.h>
#include "nfpcore/nfp_cpp.h"
#include "nfpcore/nfp_nsp.h"
@@ -42,13 +43,64 @@
struct nfp_port *nfp_port_from_netdev(struct net_device *netdev)
{
- struct nfp_net *nn;
+ if (nfp_netdev_is_nfp_net(netdev)) {
+ struct nfp_net *nn = netdev_priv(netdev);
- if (WARN_ON(!nfp_netdev_is_nfp_net(netdev)))
- return NULL;
- nn = netdev_priv(netdev);
+ return nn->port;
+ }
+
+ if (nfp_netdev_is_nfp_repr(netdev)) {
+ struct nfp_repr *repr = netdev_priv(netdev);
+
+ return repr->port;
+ }
- return nn->port;
+ WARN(1, "Unknown netdev type for nfp_port\n");
+
+ return NULL;
+}
+
+static int
+nfp_port_attr_get(struct net_device *netdev, struct switchdev_attr *attr)
+{
+ struct nfp_port *port;
+
+ port = nfp_port_from_netdev(netdev);
+ if (!port)
+ return -EOPNOTSUPP;
+
+ switch (attr->id) {
+ case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: {
+ const u8 *serial;
+ /* N.B: attr->u.ppid.id is binary data */
+ attr->u.ppid.id_len = nfp_cpp_serial(port->app->cpp, &serial);
+ memcpy(&attr->u.ppid.id, serial, attr->u.ppid.id_len);
+ break;
+ }
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+const struct switchdev_ops nfp_port_switchdev_ops = {
+ .switchdev_port_attr_get = nfp_port_attr_get,
+};
+
+int nfp_port_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index,
+ __be16 proto, struct tc_to_netdev *tc)
+{
+ struct nfp_port *port;
+
+ if (chain_index)
+ return -EOPNOTSUPP;
+
+ port = nfp_port_from_netdev(netdev);
+ if (!port)
+ return -EOPNOTSUPP;
+
+ return nfp_app_setup_tc(port->app, netdev, handle, proto, tc);
}
struct nfp_port *
@@ -98,15 +150,31 @@ nfp_port_get_phys_port_name(struct net_device *netdev, char *name, size_t len)
int n;
port = nfp_port_from_netdev(netdev);
- eth_port = __nfp_port_get_eth_port(port);
- if (!eth_port)
+ if (!port)
+ return -EOPNOTSUPP;
+
+ switch (port->type) {
+ case NFP_PORT_PHYS_PORT:
+ eth_port = __nfp_port_get_eth_port(port);
+ if (!eth_port)
+ return -EOPNOTSUPP;
+
+ if (!eth_port->is_split)
+ n = snprintf(name, len, "p%d", eth_port->label_port);
+ else
+ n = snprintf(name, len, "p%ds%d", eth_port->label_port,
+ eth_port->label_subport);
+ break;
+ case NFP_PORT_PF_PORT:
+ n = snprintf(name, len, "pf%d", port->pf_id);
+ break;
+ case NFP_PORT_VF_PORT:
+ n = snprintf(name, len, "pf%dvf%d", port->pf_id, port->vf_id);
+ break;
+ default:
return -EOPNOTSUPP;
+ }
- if (!eth_port->is_split)
- n = snprintf(name, len, "p%d", eth_port->label_port);
- else
- n = snprintf(name, len, "p%ds%d", eth_port->label_port,
- eth_port->label_subport);
if (n >= len)
return -EINVAL;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h
index f472bea4ec2b..a33d22e18f94 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h
@@ -36,6 +36,7 @@
#include <net/devlink.h>
+struct tc_to_netdev;
struct net_device;
struct nfp_app;
struct nfp_pf;
@@ -47,10 +48,14 @@ struct nfp_port;
* state when port disappears because of FW fault or config
* change
* @NFP_PORT_PHYS_PORT: external NIC port
+ * @NFP_PORT_PF_PORT: logical port of PCI PF
+ * @NFP_PORT_VF_PORT: logical port of PCI VF
*/
enum nfp_port_type {
NFP_PORT_INVALID,
NFP_PORT_PHYS_PORT,
+ NFP_PORT_PF_PORT,
+ NFP_PORT_VF_PORT,
};
/**
@@ -72,6 +77,8 @@ enum nfp_port_flags {
* @dl_port: devlink port structure
* @eth_id: for %NFP_PORT_PHYS_PORT port ID in NFP enumeration scheme
* @eth_port: for %NFP_PORT_PHYS_PORT translated ETH Table port entry
+ * @pf_id: for %NFP_PORT_PF_PORT, %NFP_PORT_VF_PORT ID of the PCI PF (0-3)
+ * @vf_id: for %NFP_PORT_VF_PORT ID of the PCI VF within @pf_id
* @port_list: entry on pf's list of ports
*/
struct nfp_port {
@@ -84,12 +91,27 @@ struct nfp_port {
struct devlink_port dl_port;
- unsigned int eth_id;
- struct nfp_eth_table_port *eth_port;
+ union {
+ /* NFP_PORT_PHYS_PORT */
+ struct {
+ unsigned int eth_id;
+ struct nfp_eth_table_port *eth_port;
+ };
+ /* NFP_PORT_PF_PORT, NFP_PORT_VF_PORT */
+ struct {
+ unsigned int pf_id;
+ unsigned int vf_id;
+ };
+ };
struct list_head port_list;
};
+extern const struct switchdev_ops nfp_port_switchdev_ops;
+
+int nfp_port_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index,
+ __be16 proto, struct tc_to_netdev *tc);
+
struct nfp_port *nfp_port_from_netdev(struct net_device *netdev);
struct nfp_port *
nfp_port_from_id(struct nfp_pf *pf, enum nfp_port_type type, unsigned int id);
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
index 25a967158ce9..5798adc57cbc 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
@@ -230,6 +230,9 @@ struct nfp_cpp_area *nfp_cpp_area_alloc_with_name(struct nfp_cpp *cpp,
struct nfp_cpp_area *nfp_cpp_area_alloc(struct nfp_cpp *cpp, u32 cpp_id,
unsigned long long address,
unsigned long size);
+struct nfp_cpp_area *
+nfp_cpp_area_alloc_acquire(struct nfp_cpp *cpp, const char *name, u32 cpp_id,
+ unsigned long long address, unsigned long size);
void nfp_cpp_area_free(struct nfp_cpp_area *area);
int nfp_cpp_area_acquire(struct nfp_cpp_area *area);
int nfp_cpp_area_acquire_nonblocking(struct nfp_cpp_area *area);
@@ -239,8 +242,6 @@ int nfp_cpp_area_read(struct nfp_cpp_area *area, unsigned long offset,
void *buffer, size_t length);
int nfp_cpp_area_write(struct nfp_cpp_area *area, unsigned long offset,
const void *buffer, size_t length);
-int nfp_cpp_area_check_range(struct nfp_cpp_area *area,
- unsigned long long offset, unsigned long size);
const char *nfp_cpp_area_name(struct nfp_cpp_area *cpp_area);
void *nfp_cpp_area_priv(struct nfp_cpp_area *cpp_area);
struct nfp_cpp *nfp_cpp_area_cpp(struct nfp_cpp_area *cpp_area);
@@ -278,6 +279,10 @@ int nfp_cpp_readq(struct nfp_cpp *cpp, u32 cpp_id,
int nfp_cpp_writeq(struct nfp_cpp *cpp, u32 cpp_id,
unsigned long long address, u64 value);
+u8 __iomem *
+nfp_cpp_map_area(struct nfp_cpp *cpp, const char *name, int domain, int target,
+ u64 addr, unsigned long size, struct nfp_cpp_area **area);
+
struct nfp_cpp_mutex;
int nfp_cpp_mutex_init(struct nfp_cpp *cpp, int target,
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
index 9b69dcf87be9..04dd5758ecf5 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
@@ -361,6 +361,41 @@ nfp_cpp_area_alloc(struct nfp_cpp *cpp, u32 dest,
}
/**
+ * nfp_cpp_area_alloc_acquire() - allocate a new CPP area and lock it down
+ * @cpp: CPP handle
+ * @name: Name of region
+ * @dest: CPP id
+ * @address: Start address on CPP target
+ * @size: Size of area
+ *
+ * Allocate and initialize a CPP area structure, and lock it down so
+ * that it can be accessed directly.
+ *
+ * NOTE: @address and @size must be 32-bit aligned values.
+ *
+ * NOTE: The area must also be 'released' when the structure is freed.
+ *
+ * Return: NFP CPP Area handle, or NULL
+ */
+struct nfp_cpp_area *
+nfp_cpp_area_alloc_acquire(struct nfp_cpp *cpp, const char *name, u32 dest,
+ unsigned long long address, unsigned long size)
+{
+ struct nfp_cpp_area *area;
+
+ area = nfp_cpp_area_alloc_with_name(cpp, dest, name, address, size);
+ if (!area)
+ return NULL;
+
+ if (nfp_cpp_area_acquire(area)) {
+ nfp_cpp_area_free(area);
+ return NULL;
+ }
+
+ return area;
+}
+
+/**
* nfp_cpp_area_free() - free up the CPP area
* @area: CPP area handle
*
@@ -536,27 +571,6 @@ int nfp_cpp_area_write(struct nfp_cpp_area *area,
}
/**
- * nfp_cpp_area_check_range() - check if address range fits in CPP area
- * @area: CPP area handle
- * @offset: offset into CPP target
- * @length: size of address range in bytes
- *
- * Check if address range fits within CPP area. Return 0 if area
- * fits or -EFAULT on error.
- *
- * Return: 0, or -ERRNO
- */
-int nfp_cpp_area_check_range(struct nfp_cpp_area *area,
- unsigned long long offset, unsigned long length)
-{
- if (offset < area->offset ||
- offset + length > area->offset + area->size)
- return -EFAULT;
-
- return 0;
-}
-
-/**
* nfp_cpp_area_name() - return name of a CPP area
* @cpp_area: CPP area handle
*
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c
index 0ba0379b8f75..ab86bceb93f2 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c
@@ -279,3 +279,43 @@ exit_release:
return err;
}
+
+/**
+ * nfp_cpp_map_area() - Helper function to map an area
+ * @cpp: NFP CPP handler
+ * @name: Name for the area
+ * @domain: CPP domain
+ * @target: CPP target
+ * @addr: CPP address
+ * @size: Size of the area
+ * @area: Area handle (output)
+ *
+ * Map an area of IOMEM access. To undo the effect of this function call
+ * @nfp_cpp_area_release_free(*area).
+ *
+ * Return: Pointer to memory mapped area or ERR_PTR
+ */
+u8 __iomem *
+nfp_cpp_map_area(struct nfp_cpp *cpp, const char *name, int domain, int target,
+ u64 addr, unsigned long size, struct nfp_cpp_area **area)
+{
+ u8 __iomem *res;
+ u32 dest;
+
+ dest = NFP_CPP_ISLAND_ID(target, NFP_CPP_ACTION_RW, 0, domain);
+
+ *area = nfp_cpp_area_alloc_acquire(cpp, name, dest, addr, size);
+ if (!*area)
+ goto err_eio;
+
+ res = nfp_cpp_area_iomem(*area);
+ if (!res)
+ goto err_release_free;
+
+ return res;
+
+err_release_free:
+ nfp_cpp_area_release_free(*area);
+err_eio:
+ return (u8 __iomem *)ERR_PTR(-EIO);
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h
index d27d29782a12..c9724fb7ea4b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h
@@ -97,7 +97,11 @@ int nfp_rtsym_count(struct nfp_rtsym_table *rtbl);
const struct nfp_rtsym *nfp_rtsym_get(struct nfp_rtsym_table *rtbl, int idx);
const struct nfp_rtsym *
nfp_rtsym_lookup(struct nfp_rtsym_table *rtbl, const char *name);
+
u64 nfp_rtsym_read_le(struct nfp_rtsym_table *rtbl, const char *name,
int *error);
+u8 __iomem *
+nfp_rtsym_map(struct nfp_rtsym_table *rtbl, const char *name, const char *id,
+ unsigned int min_size, struct nfp_cpp_area **area);
#endif /* NFP_NFFW_H */
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c
index 203f9cbae0fb..ecda474ac7c3 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c
@@ -289,3 +289,30 @@ exit:
return ~0ULL;
return val;
}
+
+u8 __iomem *
+nfp_rtsym_map(struct nfp_rtsym_table *rtbl, const char *name, const char *id,
+ unsigned int min_size, struct nfp_cpp_area **area)
+{
+ const struct nfp_rtsym *sym;
+ u8 __iomem *mem;
+
+ sym = nfp_rtsym_lookup(rtbl, name);
+ if (!sym)
+ return (u8 __iomem *)ERR_PTR(-ENOENT);
+
+ if (sym->size < min_size) {
+ nfp_err(rtbl->cpp, "Symbol %s too small\n", name);
+ return (u8 __iomem *)ERR_PTR(-EINVAL);
+ }
+
+ mem = nfp_cpp_map_area(rtbl->cpp, id, sym->domain, sym->target,
+ sym->addr, sym->size, area);
+ if (IS_ERR(mem)) {
+ nfp_err(rtbl->cpp, "Failed to map symbol %s: %ld\n",
+ name, PTR_ERR(mem));
+ return mem;
+ }
+
+ return mem;
+}
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
index e30676515529..6cec2a6a3dcc 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
@@ -174,7 +174,6 @@ netxen_setup_minidump(struct netxen_adapter *adapter)
{
int err = 0, i;
u32 *template, *tmp_buf;
- struct netxen_minidump_template_hdr *hdr;
err = netxen_get_minidump_template_size(adapter);
if (err) {
adapter->mdump.fw_supports_md = 0;
@@ -218,8 +217,6 @@ netxen_setup_minidump(struct netxen_adapter *adapter)
template = (u32 *) adapter->mdump.md_template;
for (i = 0; i < adapter->mdump.md_template_size/sizeof(u32); i++)
*template++ = __le32_to_cpu(*tmp_buf++);
- hdr = (struct netxen_minidump_template_hdr *)
- adapter->mdump.md_template;
adapter->mdump.md_capture_buff = NULL;
adapter->mdump.fw_supports_md = 1;
adapter->mdump.md_enabled = 0;
diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile
index 67452380b60e..82dd47068e18 100644
--- a/drivers/net/ethernet/qlogic/qed/Makefile
+++ b/drivers/net/ethernet/qlogic/qed/Makefile
@@ -5,6 +5,6 @@ qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \
qed_selftest.o qed_dcbx.o qed_debug.o qed_ptp.o
qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o
qed-$(CONFIG_QED_LL2) += qed_ll2.o
-qed-$(CONFIG_QED_RDMA) += qed_roce.o qed_rdma.o
+qed-$(CONFIG_QED_RDMA) += qed_roce.o qed_rdma.o qed_iwarp.o
qed-$(CONFIG_QED_ISCSI) += qed_iscsi.o qed_ooo.o
qed-$(CONFIG_QED_FCOE) += qed_fcoe.o
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 14b08ee9e3ad..91003bc6f00b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -210,14 +210,16 @@ struct qed_tunn_update_params {
/* The PCI personality is not quite synonymous to protocol ID:
* 1. All personalities need CORE connections
- * 2. The Ethernet personality may support also the RoCE protocol
+ * 2. The Ethernet personality may support also the RoCE/iWARP protocol
*/
enum qed_pci_personality {
QED_PCI_ETH,
QED_PCI_FCOE,
QED_PCI_ISCSI,
QED_PCI_ETH_ROCE,
- QED_PCI_DEFAULT /* default in shmem */
+ QED_PCI_ETH_IWARP,
+ QED_PCI_ETH_RDMA,
+ QED_PCI_DEFAULT, /* default in shmem */
};
/* All VFs are symmetric, all counters are PF + all VFs */
@@ -277,6 +279,7 @@ enum qed_dev_cap {
QED_DEV_CAP_FCOE,
QED_DEV_CAP_ISCSI,
QED_DEV_CAP_ROCE,
+ QED_DEV_CAP_IWARP,
};
enum qed_wol_support {
@@ -286,7 +289,24 @@ enum qed_wol_support {
struct qed_hw_info {
/* PCI personality */
- enum qed_pci_personality personality;
+ enum qed_pci_personality personality;
+#define QED_IS_RDMA_PERSONALITY(dev) \
+ ((dev)->hw_info.personality == QED_PCI_ETH_ROCE || \
+ (dev)->hw_info.personality == QED_PCI_ETH_IWARP || \
+ (dev)->hw_info.personality == QED_PCI_ETH_RDMA)
+#define QED_IS_ROCE_PERSONALITY(dev) \
+ ((dev)->hw_info.personality == QED_PCI_ETH_ROCE || \
+ (dev)->hw_info.personality == QED_PCI_ETH_RDMA)
+#define QED_IS_IWARP_PERSONALITY(dev) \
+ ((dev)->hw_info.personality == QED_PCI_ETH_IWARP || \
+ (dev)->hw_info.personality == QED_PCI_ETH_RDMA)
+#define QED_IS_L2_PERSONALITY(dev) \
+ ((dev)->hw_info.personality == QED_PCI_ETH || \
+ QED_IS_RDMA_PERSONALITY(dev))
+#define QED_IS_FCOE_PERSONALITY(dev) \
+ ((dev)->hw_info.personality == QED_PCI_FCOE)
+#define QED_IS_ISCSI_PERSONALITY(dev) \
+ ((dev)->hw_info.personality == QED_PCI_ISCSI)
/* Resource Allocation scheme results */
u32 resc_start[QED_MAX_RESC];
@@ -759,7 +779,7 @@ static inline u8 qed_concrete_to_sw_fid(struct qed_dev *cdev,
}
#define PURE_LB_TC 8
-#define OOO_LB_TC 9
+#define PKT_LB_TC 9
int qed_configure_vport_wfq(struct qed_dev *cdev, u16 vp_id, u32 rate);
void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev,
@@ -769,6 +789,8 @@ void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev,
void qed_clean_wfq_db(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
int qed_device_num_engines(struct qed_dev *cdev);
int qed_device_get_port_id(struct qed_dev *cdev);
+void qed_set_fw_mac_addr(__le16 *fw_msb,
+ __le16 *fw_mid, __le16 *fw_lsb, u8 *mac);
#define QED_LEADING_HWFN(dev) (&dev->hwfns[0])
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index e201214764db..af106be8cc08 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -246,14 +246,16 @@ struct qed_cxt_mngr {
static bool src_proto(enum protocol_type type)
{
return type == PROTOCOLID_ISCSI ||
- type == PROTOCOLID_FCOE;
+ type == PROTOCOLID_FCOE ||
+ type == PROTOCOLID_IWARP;
}
static bool tm_cid_proto(enum protocol_type type)
{
return type == PROTOCOLID_ISCSI ||
type == PROTOCOLID_FCOE ||
- type == PROTOCOLID_ROCE;
+ type == PROTOCOLID_ROCE ||
+ type == PROTOCOLID_IWARP;
}
static bool tm_tid_proto(enum protocol_type type)
@@ -853,7 +855,7 @@ u32 qed_cxt_cfg_ilt_compute_excess(struct qed_hwfn *p_hwfn, u32 used_lines)
if (!excess_lines)
return 0;
- if (p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE)
+ if (!QED_IS_RDMA_PERSONALITY(p_hwfn))
return 0;
p_mngr = p_hwfn->p_cxt_mngr;
@@ -1033,7 +1035,7 @@ static int qed_ilt_blk_alloc(struct qed_hwfn *p_hwfn,
u32 lines, line, sz_left, lines_to_skip = 0;
/* Special handling for RoCE that supports dynamic allocation */
- if ((p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) &&
+ if (QED_IS_RDMA_PERSONALITY(p_hwfn) &&
((ilt_client == ILT_CLI_CDUT) || ilt_client == ILT_CLI_TSDM))
return 0;
@@ -1833,7 +1835,7 @@ static void qed_tm_init_pf(struct qed_hwfn *p_hwfn)
tm_offset += tm_iids.pf_tids[i];
}
- if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE)
+ if (QED_IS_RDMA_PERSONALITY(p_hwfn))
active_seg_mask = 0;
STORE_RT_REG(p_hwfn, TM_REG_PF_ENABLE_TASK_RT_OFFSET, active_seg_mask);
@@ -2068,6 +2070,11 @@ static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn,
num_srqs = min_t(u32, 32 * 1024, p_params->num_srqs);
switch (p_hwfn->hw_info.personality) {
+ case QED_PCI_ETH_IWARP:
+ /* Each QP requires one connection */
+ num_cons = min_t(u32, IWARP_MAX_QPS, p_params->num_qps);
+ proto = PROTOCOLID_IWARP;
+ break;
case QED_PCI_ETH_ROCE:
num_qps = min_t(u32, ROCE_MAX_QPS, p_params->num_qps);
num_cons = num_qps * 2; /* each QP requires two connections */
@@ -2103,6 +2110,8 @@ int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn, u32 rdma_tasks)
qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_CORE, core_cids, 0);
switch (p_hwfn->hw_info.personality) {
+ case QED_PCI_ETH_RDMA:
+ case QED_PCI_ETH_IWARP:
case QED_PCI_ETH_ROCE:
{
qed_rdma_set_pf_params(p_hwfn,
@@ -2344,7 +2353,7 @@ qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn,
last_cid_allocated - 1);
if (!p_hwfn->b_rdma_enabled_in_prs) {
- /* Enable RoCE search */
+ /* Enable RDMA search */
qed_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 1);
p_hwfn->b_rdma_enabled_in_prs = true;
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 49667ad9042d..6c87bed13bd2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -216,6 +216,10 @@ static u32 qed_get_pq_flags(struct qed_hwfn *p_hwfn)
case QED_PCI_ETH_ROCE:
flags |= PQ_FLAGS_MCOS | PQ_FLAGS_OFLD | PQ_FLAGS_LLT;
break;
+ case QED_PCI_ETH_IWARP:
+ flags |= PQ_FLAGS_MCOS | PQ_FLAGS_ACK | PQ_FLAGS_OOO |
+ PQ_FLAGS_OFLD;
+ break;
default:
DP_ERR(p_hwfn,
"unknown personality %d\n", p_hwfn->hw_info.personality);
@@ -936,9 +940,16 @@ int qed_resc_alloc(struct qed_dev *cdev)
/* EQ */
n_eqes = qed_chain_get_capacity(&p_hwfn->p_spq->chain);
- if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) {
+ if (QED_IS_RDMA_PERSONALITY(p_hwfn)) {
+ enum protocol_type rdma_proto;
+
+ if (QED_IS_ROCE_PERSONALITY(p_hwfn))
+ rdma_proto = PROTOCOLID_ROCE;
+ else
+ rdma_proto = PROTOCOLID_IWARP;
+
num_cons = qed_cxt_get_proto_cid_count(p_hwfn,
- PROTOCOLID_ROCE,
+ rdma_proto,
NULL) * 2;
n_eqes += num_cons + 2 * MAX_NUM_VFS_BB;
} else if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) {
@@ -2057,7 +2068,7 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
qed_int_get_num_sbs(p_hwfn, &sb_cnt);
if (IS_ENABLED(CONFIG_QED_RDMA) &&
- p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) {
+ QED_IS_RDMA_PERSONALITY(p_hwfn)) {
/* Roce CNQ each requires: 1 status block + 1 CNQ. We divide
* the status blocks equally between L2 / RoCE but with
* consideration as to how many l2 queues / cnqs we have.
@@ -2068,9 +2079,7 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
non_l2_sbs = feat_num[QED_RDMA_CNQ];
}
-
- if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE ||
- p_hwfn->hw_info.personality == QED_PCI_ETH) {
+ if (QED_IS_L2_PERSONALITY(p_hwfn)) {
/* Start by allocating VF queues, then PF's */
feat_num[QED_VF_L2_QUE] = min_t(u32,
RESC_NUM(p_hwfn, QED_L2_QUEUE),
@@ -2083,12 +2092,12 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
QED_VF_L2_QUE));
}
- if (p_hwfn->hw_info.personality == QED_PCI_FCOE)
+ if (QED_IS_FCOE_PERSONALITY(p_hwfn))
feat_num[QED_FCOE_CQ] = min_t(u32, sb_cnt.cnt,
RESC_NUM(p_hwfn,
QED_CMDQS_CQS));
- if (p_hwfn->hw_info.personality == QED_PCI_ISCSI)
+ if (QED_IS_ISCSI_PERSONALITY(p_hwfn))
feat_num[QED_ISCSI_CQ] = min_t(u32, sb_cnt.cnt,
RESC_NUM(p_hwfn,
QED_CMDQS_CQS));
@@ -4122,3 +4131,14 @@ int qed_device_get_port_id(struct qed_dev *cdev)
{
return (QED_LEADING_HWFN(cdev)->abs_pf_id) % qed_device_num_ports(cdev);
}
+
+void qed_set_fw_mac_addr(__le16 *fw_msb,
+ __le16 *fw_mid, __le16 *fw_lsb, u8 *mac)
+{
+ ((u8 *)fw_msb)[0] = mac[1];
+ ((u8 *)fw_msb)[1] = mac[0];
+ ((u8 *)fw_mid)[0] = mac[3];
+ ((u8 *)fw_mid)[1] = mac[2];
+ ((u8 *)fw_lsb)[0] = mac[5];
+ ((u8 *)fw_lsb)[1] = mac[4];
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 3bf3614b3084..31fb0bffa098 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -46,6 +46,7 @@
#include <linux/qed/fcoe_common.h>
#include <linux/qed/eth_common.h>
#include <linux/qed/iscsi_common.h>
+#include <linux/qed/iwarp_common.h>
#include <linux/qed/rdma_common.h>
#include <linux/qed/roce_common.h>
#include <linux/qed/qed_fcoe_if.h>
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
new file mode 100644
index 000000000000..5cd20da2d4e0
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -0,0 +1,2409 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015-2017 QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and /or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/spinlock.h>
+#include <linux/tcp.h>
+#include "qed_cxt.h"
+#include "qed_hw.h"
+#include "qed_ll2.h"
+#include "qed_rdma.h"
+#include "qed_reg_addr.h"
+#include "qed_sp.h"
+
+#define QED_IWARP_ORD_DEFAULT 32
+#define QED_IWARP_IRD_DEFAULT 32
+#define QED_IWARP_MAX_FW_MSS 4120
+
+#define QED_EP_SIG 0xecabcdef
+
+struct mpa_v2_hdr {
+ __be16 ird;
+ __be16 ord;
+};
+
+#define MPA_V2_PEER2PEER_MODEL 0x8000
+#define MPA_V2_SEND_RTR 0x4000 /* on ird */
+#define MPA_V2_READ_RTR 0x4000 /* on ord */
+#define MPA_V2_WRITE_RTR 0x8000
+#define MPA_V2_IRD_ORD_MASK 0x3FFF
+
+#define MPA_REV2(_mpa_rev) ((_mpa_rev) == MPA_NEGOTIATION_TYPE_ENHANCED)
+
+#define QED_IWARP_INVALID_TCP_CID 0xffffffff
+#define QED_IWARP_RCV_WND_SIZE_DEF (256 * 1024)
+#define QED_IWARP_RCV_WND_SIZE_MIN (64 * 1024)
+#define TIMESTAMP_HEADER_SIZE (12)
+
+#define QED_IWARP_TS_EN BIT(0)
+#define QED_IWARP_DA_EN BIT(1)
+#define QED_IWARP_PARAM_CRC_NEEDED (1)
+#define QED_IWARP_PARAM_P2P (1)
+
+static int qed_iwarp_async_event(struct qed_hwfn *p_hwfn,
+ u8 fw_event_code, u16 echo,
+ union event_ring_data *data,
+ u8 fw_return_code);
+
+/* Override devinfo with iWARP specific values */
+void qed_iwarp_init_devinfo(struct qed_hwfn *p_hwfn)
+{
+ struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev;
+
+ dev->max_inline = IWARP_REQ_MAX_INLINE_DATA_SIZE;
+ dev->max_qp = min_t(u32,
+ IWARP_MAX_QPS,
+ p_hwfn->p_rdma_info->num_qps) -
+ QED_IWARP_PREALLOC_CNT;
+
+ dev->max_cq = dev->max_qp;
+
+ dev->max_qp_resp_rd_atomic_resc = QED_IWARP_IRD_DEFAULT;
+ dev->max_qp_req_rd_atomic_resc = QED_IWARP_ORD_DEFAULT;
+}
+
+void qed_iwarp_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+ p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_TCP;
+ qed_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 1);
+ p_hwfn->b_rdma_enabled_in_prs = true;
+}
+
+/* We have two cid maps, one for tcp which should be used only from passive
+ * syn processing and replacing a pre-allocated ep in the list. The second
+ * for active tcp and for QPs.
+ */
+static void qed_iwarp_cid_cleaned(struct qed_hwfn *p_hwfn, u32 cid)
+{
+ cid -= qed_cxt_get_proto_cid_start(p_hwfn, p_hwfn->p_rdma_info->proto);
+
+ spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+
+ if (cid < QED_IWARP_PREALLOC_CNT)
+ qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map,
+ cid);
+ else
+ qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, cid);
+
+ spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+}
+
+static int qed_iwarp_alloc_cid(struct qed_hwfn *p_hwfn, u32 *cid)
+{
+ int rc;
+
+ spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+ rc = qed_rdma_bmap_alloc_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, cid);
+ spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+ if (rc) {
+ DP_NOTICE(p_hwfn, "Failed in allocating iwarp cid\n");
+ return rc;
+ }
+ *cid += qed_cxt_get_proto_cid_start(p_hwfn, p_hwfn->p_rdma_info->proto);
+
+ rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_CXT, *cid);
+ if (rc)
+ qed_iwarp_cid_cleaned(p_hwfn, *cid);
+
+ return rc;
+}
+
+static void qed_iwarp_set_tcp_cid(struct qed_hwfn *p_hwfn, u32 cid)
+{
+ cid -= qed_cxt_get_proto_cid_start(p_hwfn, p_hwfn->p_rdma_info->proto);
+
+ spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+ qed_bmap_set_id(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, cid);
+ spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+}
+
+/* This function allocates a cid for passive tcp (called from syn receive)
+ * the reason it's separate from the regular cid allocation is because it
+ * is assured that these cids already have ilt allocated. They are preallocated
+ * to ensure that we won't need to allocate memory during syn processing
+ */
+static int qed_iwarp_alloc_tcp_cid(struct qed_hwfn *p_hwfn, u32 *cid)
+{
+ int rc;
+
+ spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+
+ rc = qed_rdma_bmap_alloc_id(p_hwfn,
+ &p_hwfn->p_rdma_info->tcp_cid_map, cid);
+
+ spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+
+ if (rc) {
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "can't allocate iwarp tcp cid max-count=%d\n",
+ p_hwfn->p_rdma_info->tcp_cid_map.max_count);
+
+ *cid = QED_IWARP_INVALID_TCP_CID;
+ return rc;
+ }
+
+ *cid += qed_cxt_get_proto_cid_start(p_hwfn,
+ p_hwfn->p_rdma_info->proto);
+ return 0;
+}
+
+int qed_iwarp_create_qp(struct qed_hwfn *p_hwfn,
+ struct qed_rdma_qp *qp,
+ struct qed_rdma_create_qp_out_params *out_params)
+{
+ struct iwarp_create_qp_ramrod_data *p_ramrod;
+ struct qed_sp_init_data init_data;
+ struct qed_spq_entry *p_ent;
+ u16 physical_queue;
+ u32 cid;
+ int rc;
+
+ qp->shared_queue = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+ IWARP_SHARED_QUEUE_PAGE_SIZE,
+ &qp->shared_queue_phys_addr,
+ GFP_KERNEL);
+ if (!qp->shared_queue)
+ return -ENOMEM;
+
+ out_params->sq_pbl_virt = (u8 *)qp->shared_queue +
+ IWARP_SHARED_QUEUE_PAGE_SQ_PBL_OFFSET;
+ out_params->sq_pbl_phys = qp->shared_queue_phys_addr +
+ IWARP_SHARED_QUEUE_PAGE_SQ_PBL_OFFSET;
+ out_params->rq_pbl_virt = (u8 *)qp->shared_queue +
+ IWARP_SHARED_QUEUE_PAGE_RQ_PBL_OFFSET;
+ out_params->rq_pbl_phys = qp->shared_queue_phys_addr +
+ IWARP_SHARED_QUEUE_PAGE_RQ_PBL_OFFSET;
+
+ rc = qed_iwarp_alloc_cid(p_hwfn, &cid);
+ if (rc)
+ goto err1;
+
+ qp->icid = (u16)cid;
+
+ memset(&init_data, 0, sizeof(init_data));
+ init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+ init_data.cid = qp->icid;
+ init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+ rc = qed_sp_init_request(p_hwfn, &p_ent,
+ IWARP_RAMROD_CMD_ID_CREATE_QP,
+ PROTOCOLID_IWARP, &init_data);
+ if (rc)
+ goto err2;
+
+ p_ramrod = &p_ent->ramrod.iwarp_create_qp;
+
+ SET_FIELD(p_ramrod->flags,
+ IWARP_CREATE_QP_RAMROD_DATA_FMR_AND_RESERVED_EN,
+ qp->fmr_and_reserved_lkey);
+
+ SET_FIELD(p_ramrod->flags,
+ IWARP_CREATE_QP_RAMROD_DATA_SIGNALED_COMP, qp->signal_all);
+
+ SET_FIELD(p_ramrod->flags,
+ IWARP_CREATE_QP_RAMROD_DATA_RDMA_RD_EN,
+ qp->incoming_rdma_read_en);
+
+ SET_FIELD(p_ramrod->flags,
+ IWARP_CREATE_QP_RAMROD_DATA_RDMA_WR_EN,
+ qp->incoming_rdma_write_en);
+
+ SET_FIELD(p_ramrod->flags,
+ IWARP_CREATE_QP_RAMROD_DATA_ATOMIC_EN,
+ qp->incoming_atomic_en);
+
+ SET_FIELD(p_ramrod->flags,
+ IWARP_CREATE_QP_RAMROD_DATA_SRQ_FLG, qp->use_srq);
+
+ p_ramrod->pd = qp->pd;
+ p_ramrod->sq_num_pages = qp->sq_num_pages;
+ p_ramrod->rq_num_pages = qp->rq_num_pages;
+
+ p_ramrod->qp_handle_for_cqe.hi = cpu_to_le32(qp->qp_handle.hi);
+ p_ramrod->qp_handle_for_cqe.lo = cpu_to_le32(qp->qp_handle.lo);
+
+ p_ramrod->cq_cid_for_sq =
+ cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | qp->sq_cq_id);
+ p_ramrod->cq_cid_for_rq =
+ cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | qp->rq_cq_id);
+
+ p_ramrod->dpi = cpu_to_le16(qp->dpi);
+
+ physical_queue = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD);
+ p_ramrod->physical_q0 = cpu_to_le16(physical_queue);
+ physical_queue = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_ACK);
+ p_ramrod->physical_q1 = cpu_to_le16(physical_queue);
+
+ rc = qed_spq_post(p_hwfn, p_ent, NULL);
+ if (rc)
+ goto err2;
+
+ return rc;
+
+err2:
+ qed_iwarp_cid_cleaned(p_hwfn, cid);
+err1:
+ dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+ IWARP_SHARED_QUEUE_PAGE_SIZE,
+ qp->shared_queue, qp->shared_queue_phys_addr);
+
+ return rc;
+}
+
+static int qed_iwarp_modify_fw(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
+{
+ struct iwarp_modify_qp_ramrod_data *p_ramrod;
+ struct qed_sp_init_data init_data;
+ struct qed_spq_entry *p_ent;
+ int rc;
+
+ /* Get SPQ entry */
+ memset(&init_data, 0, sizeof(init_data));
+ init_data.cid = qp->icid;
+ init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+ init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+ rc = qed_sp_init_request(p_hwfn, &p_ent,
+ IWARP_RAMROD_CMD_ID_MODIFY_QP,
+ p_hwfn->p_rdma_info->proto, &init_data);
+ if (rc)
+ return rc;
+
+ p_ramrod = &p_ent->ramrod.iwarp_modify_qp;
+ SET_FIELD(p_ramrod->flags, IWARP_MODIFY_QP_RAMROD_DATA_STATE_TRANS_EN,
+ 0x1);
+ if (qp->iwarp_state == QED_IWARP_QP_STATE_CLOSING)
+ p_ramrod->transition_to_state = IWARP_MODIFY_QP_STATE_CLOSING;
+ else
+ p_ramrod->transition_to_state = IWARP_MODIFY_QP_STATE_ERROR;
+
+ rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x)rc=%d\n", qp->icid, rc);
+
+ return rc;
+}
+
+enum qed_iwarp_qp_state qed_roce2iwarp_state(enum qed_roce_qp_state state)
+{
+ switch (state) {
+ case QED_ROCE_QP_STATE_RESET:
+ case QED_ROCE_QP_STATE_INIT:
+ case QED_ROCE_QP_STATE_RTR:
+ return QED_IWARP_QP_STATE_IDLE;
+ case QED_ROCE_QP_STATE_RTS:
+ return QED_IWARP_QP_STATE_RTS;
+ case QED_ROCE_QP_STATE_SQD:
+ return QED_IWARP_QP_STATE_CLOSING;
+ case QED_ROCE_QP_STATE_ERR:
+ return QED_IWARP_QP_STATE_ERROR;
+ case QED_ROCE_QP_STATE_SQE:
+ return QED_IWARP_QP_STATE_TERMINATE;
+ default:
+ return QED_IWARP_QP_STATE_ERROR;
+ }
+}
+
+static enum qed_roce_qp_state
+qed_iwarp2roce_state(enum qed_iwarp_qp_state state)
+{
+ switch (state) {
+ case QED_IWARP_QP_STATE_IDLE:
+ return QED_ROCE_QP_STATE_INIT;
+ case QED_IWARP_QP_STATE_RTS:
+ return QED_ROCE_QP_STATE_RTS;
+ case QED_IWARP_QP_STATE_TERMINATE:
+ return QED_ROCE_QP_STATE_SQE;
+ case QED_IWARP_QP_STATE_CLOSING:
+ return QED_ROCE_QP_STATE_SQD;
+ case QED_IWARP_QP_STATE_ERROR:
+ return QED_ROCE_QP_STATE_ERR;
+ default:
+ return QED_ROCE_QP_STATE_ERR;
+ }
+}
+
+const char *iwarp_state_names[] = {
+ "IDLE",
+ "RTS",
+ "TERMINATE",
+ "CLOSING",
+ "ERROR",
+};
+
+int
+qed_iwarp_modify_qp(struct qed_hwfn *p_hwfn,
+ struct qed_rdma_qp *qp,
+ enum qed_iwarp_qp_state new_state, bool internal)
+{
+ enum qed_iwarp_qp_state prev_iw_state;
+ bool modify_fw = false;
+ int rc = 0;
+
+ /* modify QP can be called from upper-layer or as a result of async
+ * RST/FIN... therefore need to protect
+ */
+ spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.qp_lock);
+ prev_iw_state = qp->iwarp_state;
+
+ if (prev_iw_state == new_state) {
+ spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.qp_lock);
+ return 0;
+ }
+
+ switch (prev_iw_state) {
+ case QED_IWARP_QP_STATE_IDLE:
+ switch (new_state) {
+ case QED_IWARP_QP_STATE_RTS:
+ qp->iwarp_state = QED_IWARP_QP_STATE_RTS;
+ break;
+ case QED_IWARP_QP_STATE_ERROR:
+ qp->iwarp_state = QED_IWARP_QP_STATE_ERROR;
+ if (!internal)
+ modify_fw = true;
+ break;
+ default:
+ break;
+ }
+ break;
+ case QED_IWARP_QP_STATE_RTS:
+ switch (new_state) {
+ case QED_IWARP_QP_STATE_CLOSING:
+ if (!internal)
+ modify_fw = true;
+
+ qp->iwarp_state = QED_IWARP_QP_STATE_CLOSING;
+ break;
+ case QED_IWARP_QP_STATE_ERROR:
+ if (!internal)
+ modify_fw = true;
+ qp->iwarp_state = QED_IWARP_QP_STATE_ERROR;
+ break;
+ default:
+ break;
+ }
+ break;
+ case QED_IWARP_QP_STATE_ERROR:
+ switch (new_state) {
+ case QED_IWARP_QP_STATE_IDLE:
+
+ qp->iwarp_state = new_state;
+ break;
+ case QED_IWARP_QP_STATE_CLOSING:
+ /* could happen due to race... do nothing.... */
+ break;
+ default:
+ rc = -EINVAL;
+ }
+ break;
+ case QED_IWARP_QP_STATE_TERMINATE:
+ case QED_IWARP_QP_STATE_CLOSING:
+ qp->iwarp_state = new_state;
+ break;
+ default:
+ break;
+ }
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) %s --> %s%s\n",
+ qp->icid,
+ iwarp_state_names[prev_iw_state],
+ iwarp_state_names[qp->iwarp_state],
+ internal ? "internal" : "");
+
+ spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.qp_lock);
+
+ if (modify_fw)
+ rc = qed_iwarp_modify_fw(p_hwfn, qp);
+
+ return rc;
+}
+
+int qed_iwarp_fw_destroy(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
+{
+ struct qed_sp_init_data init_data;
+ struct qed_spq_entry *p_ent;
+ int rc;
+
+ /* Get SPQ entry */
+ memset(&init_data, 0, sizeof(init_data));
+ init_data.cid = qp->icid;
+ init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+ init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+ rc = qed_sp_init_request(p_hwfn, &p_ent,
+ IWARP_RAMROD_CMD_ID_DESTROY_QP,
+ p_hwfn->p_rdma_info->proto, &init_data);
+ if (rc)
+ return rc;
+
+ rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) rc = %d\n", qp->icid, rc);
+
+ return rc;
+}
+
+static void qed_iwarp_destroy_ep(struct qed_hwfn *p_hwfn,
+ struct qed_iwarp_ep *ep,
+ bool remove_from_active_list)
+{
+ dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+ sizeof(*ep->ep_buffer_virt),
+ ep->ep_buffer_virt, ep->ep_buffer_phys);
+
+ if (remove_from_active_list) {
+ spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+ list_del(&ep->list_entry);
+ spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+ }
+
+ if (ep->qp)
+ ep->qp->ep = NULL;
+
+ kfree(ep);
+}
+
+int qed_iwarp_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
+{
+ struct qed_iwarp_ep *ep = qp->ep;
+ int wait_count = 0;
+ int rc = 0;
+
+ if (qp->iwarp_state != QED_IWARP_QP_STATE_ERROR) {
+ rc = qed_iwarp_modify_qp(p_hwfn, qp,
+ QED_IWARP_QP_STATE_ERROR, false);
+ if (rc)
+ return rc;
+ }
+
+ /* Make sure ep is closed before returning and freeing memory. */
+ if (ep) {
+ while (ep->state != QED_IWARP_EP_CLOSED && wait_count++ < 200)
+ msleep(100);
+
+ if (ep->state != QED_IWARP_EP_CLOSED)
+ DP_NOTICE(p_hwfn, "ep state close timeout state=%x\n",
+ ep->state);
+
+ qed_iwarp_destroy_ep(p_hwfn, ep, false);
+ }
+
+ rc = qed_iwarp_fw_destroy(p_hwfn, qp);
+
+ if (qp->shared_queue)
+ dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+ IWARP_SHARED_QUEUE_PAGE_SIZE,
+ qp->shared_queue, qp->shared_queue_phys_addr);
+
+ return rc;
+}
+
+static int
+qed_iwarp_create_ep(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep **ep_out)
+{
+ struct qed_iwarp_ep *ep;
+ int rc;
+
+ ep = kzalloc(sizeof(*ep), GFP_KERNEL);
+ if (!ep)
+ return -ENOMEM;
+
+ ep->state = QED_IWARP_EP_INIT;
+
+ ep->ep_buffer_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+ sizeof(*ep->ep_buffer_virt),
+ &ep->ep_buffer_phys,
+ GFP_KERNEL);
+ if (!ep->ep_buffer_virt) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ ep->sig = QED_EP_SIG;
+
+ *ep_out = ep;
+
+ return 0;
+
+err:
+ kfree(ep);
+ return rc;
+}
+
+static void
+qed_iwarp_print_tcp_ramrod(struct qed_hwfn *p_hwfn,
+ struct iwarp_tcp_offload_ramrod_data *p_tcp_ramrod)
+{
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "local_mac=%x %x %x, remote_mac=%x %x %x\n",
+ p_tcp_ramrod->tcp.local_mac_addr_lo,
+ p_tcp_ramrod->tcp.local_mac_addr_mid,
+ p_tcp_ramrod->tcp.local_mac_addr_hi,
+ p_tcp_ramrod->tcp.remote_mac_addr_lo,
+ p_tcp_ramrod->tcp.remote_mac_addr_mid,
+ p_tcp_ramrod->tcp.remote_mac_addr_hi);
+
+ if (p_tcp_ramrod->tcp.ip_version == TCP_IPV4) {
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "local_ip=%pI4h:%x, remote_ip=%pI4h%x, vlan=%x\n",
+ p_tcp_ramrod->tcp.local_ip,
+ p_tcp_ramrod->tcp.local_port,
+ p_tcp_ramrod->tcp.remote_ip,
+ p_tcp_ramrod->tcp.remote_port,
+ p_tcp_ramrod->tcp.vlan_id);
+ } else {
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "local_ip=%pI6h:%x, remote_ip=%pI6h:%x, vlan=%x\n",
+ p_tcp_ramrod->tcp.local_ip,
+ p_tcp_ramrod->tcp.local_port,
+ p_tcp_ramrod->tcp.remote_ip,
+ p_tcp_ramrod->tcp.remote_port,
+ p_tcp_ramrod->tcp.vlan_id);
+ }
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "flow_label=%x, ttl=%x, tos_or_tc=%x, mss=%x, rcv_wnd_scale=%x, connect_mode=%x, flags=%x\n",
+ p_tcp_ramrod->tcp.flow_label,
+ p_tcp_ramrod->tcp.ttl,
+ p_tcp_ramrod->tcp.tos_or_tc,
+ p_tcp_ramrod->tcp.mss,
+ p_tcp_ramrod->tcp.rcv_wnd_scale,
+ p_tcp_ramrod->tcp.connect_mode,
+ p_tcp_ramrod->tcp.flags);
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "syn_ip_payload_length=%x, lo=%x, hi=%x\n",
+ p_tcp_ramrod->tcp.syn_ip_payload_length,
+ p_tcp_ramrod->tcp.syn_phy_addr_lo,
+ p_tcp_ramrod->tcp.syn_phy_addr_hi);
+}
+
+static int
+qed_iwarp_tcp_offload(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+ struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+ struct iwarp_tcp_offload_ramrod_data *p_tcp_ramrod;
+ struct tcp_offload_params_opt2 *tcp;
+ struct qed_sp_init_data init_data;
+ struct qed_spq_entry *p_ent;
+ dma_addr_t async_output_phys;
+ dma_addr_t in_pdata_phys;
+ u16 physical_q;
+ u8 tcp_flags;
+ int rc;
+ int i;
+
+ memset(&init_data, 0, sizeof(init_data));
+ init_data.cid = ep->tcp_cid;
+ init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+ if (ep->connect_mode == TCP_CONNECT_PASSIVE)
+ init_data.comp_mode = QED_SPQ_MODE_CB;
+ else
+ init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+ rc = qed_sp_init_request(p_hwfn, &p_ent,
+ IWARP_RAMROD_CMD_ID_TCP_OFFLOAD,
+ PROTOCOLID_IWARP, &init_data);
+ if (rc)
+ return rc;
+
+ p_tcp_ramrod = &p_ent->ramrod.iwarp_tcp_offload;
+
+ in_pdata_phys = ep->ep_buffer_phys +
+ offsetof(struct qed_iwarp_ep_memory, in_pdata);
+ DMA_REGPAIR_LE(p_tcp_ramrod->iwarp.incoming_ulp_buffer.addr,
+ in_pdata_phys);
+
+ p_tcp_ramrod->iwarp.incoming_ulp_buffer.len =
+ cpu_to_le16(sizeof(ep->ep_buffer_virt->in_pdata));
+
+ async_output_phys = ep->ep_buffer_phys +
+ offsetof(struct qed_iwarp_ep_memory, async_output);
+ DMA_REGPAIR_LE(p_tcp_ramrod->iwarp.async_eqe_output_buf,
+ async_output_phys);
+
+ p_tcp_ramrod->iwarp.handle_for_async.hi = cpu_to_le32(PTR_HI(ep));
+ p_tcp_ramrod->iwarp.handle_for_async.lo = cpu_to_le32(PTR_LO(ep));
+
+ physical_q = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD);
+ p_tcp_ramrod->iwarp.physical_q0 = cpu_to_le16(physical_q);
+ physical_q = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_ACK);
+ p_tcp_ramrod->iwarp.physical_q1 = cpu_to_le16(physical_q);
+ p_tcp_ramrod->iwarp.mpa_mode = iwarp_info->mpa_rev;
+
+ tcp = &p_tcp_ramrod->tcp;
+ qed_set_fw_mac_addr(&tcp->remote_mac_addr_hi,
+ &tcp->remote_mac_addr_mid,
+ &tcp->remote_mac_addr_lo, ep->remote_mac_addr);
+ qed_set_fw_mac_addr(&tcp->local_mac_addr_hi, &tcp->local_mac_addr_mid,
+ &tcp->local_mac_addr_lo, ep->local_mac_addr);
+
+ tcp->vlan_id = cpu_to_le16(ep->cm_info.vlan);
+
+ tcp_flags = p_hwfn->p_rdma_info->iwarp.tcp_flags;
+ tcp->flags = 0;
+ SET_FIELD(tcp->flags, TCP_OFFLOAD_PARAMS_OPT2_TS_EN,
+ !!(tcp_flags & QED_IWARP_TS_EN));
+
+ SET_FIELD(tcp->flags, TCP_OFFLOAD_PARAMS_OPT2_DA_EN,
+ !!(tcp_flags & QED_IWARP_DA_EN));
+
+ tcp->ip_version = ep->cm_info.ip_version;
+
+ for (i = 0; i < 4; i++) {
+ tcp->remote_ip[i] = cpu_to_le32(ep->cm_info.remote_ip[i]);
+ tcp->local_ip[i] = cpu_to_le32(ep->cm_info.local_ip[i]);
+ }
+
+ tcp->remote_port = cpu_to_le16(ep->cm_info.remote_port);
+ tcp->local_port = cpu_to_le16(ep->cm_info.local_port);
+ tcp->mss = cpu_to_le16(ep->mss);
+ tcp->flow_label = 0;
+ tcp->ttl = 0x40;
+ tcp->tos_or_tc = 0;
+
+ tcp->rcv_wnd_scale = (u8)p_hwfn->p_rdma_info->iwarp.rcv_wnd_scale;
+ tcp->connect_mode = ep->connect_mode;
+
+ if (ep->connect_mode == TCP_CONNECT_PASSIVE) {
+ tcp->syn_ip_payload_length =
+ cpu_to_le16(ep->syn_ip_payload_length);
+ tcp->syn_phy_addr_hi = DMA_HI_LE(ep->syn_phy_addr);
+ tcp->syn_phy_addr_lo = DMA_LO_LE(ep->syn_phy_addr);
+ }
+
+ qed_iwarp_print_tcp_ramrod(p_hwfn, p_tcp_ramrod);
+
+ rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "EP(0x%x) Offload completed rc=%d\n", ep->tcp_cid, rc);
+
+ return rc;
+}
+
+static void
+qed_iwarp_mpa_received(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+ struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+ struct qed_iwarp_cm_event_params params;
+ struct mpa_v2_hdr *mpa_v2;
+ union async_output *async_data;
+ u16 mpa_ord, mpa_ird;
+ u8 mpa_hdr_size = 0;
+ u8 mpa_rev;
+
+ async_data = &ep->ep_buffer_virt->async_output;
+
+ mpa_rev = async_data->mpa_request.mpa_handshake_mode;
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "private_data_len=%x handshake_mode=%x private_data=(%x)\n",
+ async_data->mpa_request.ulp_data_len,
+ mpa_rev, *((u32 *)(ep->ep_buffer_virt->in_pdata)));
+
+ if (mpa_rev == MPA_NEGOTIATION_TYPE_ENHANCED) {
+ /* Read ord/ird values from private data buffer */
+ mpa_v2 = (struct mpa_v2_hdr *)ep->ep_buffer_virt->in_pdata;
+ mpa_hdr_size = sizeof(*mpa_v2);
+
+ mpa_ord = ntohs(mpa_v2->ord);
+ mpa_ird = ntohs(mpa_v2->ird);
+
+ /* Temprary store in cm_info incoming ord/ird requested, later
+ * replace with negotiated value during accept
+ */
+ ep->cm_info.ord = (u8)min_t(u16,
+ (mpa_ord & MPA_V2_IRD_ORD_MASK),
+ QED_IWARP_ORD_DEFAULT);
+
+ ep->cm_info.ird = (u8)min_t(u16,
+ (mpa_ird & MPA_V2_IRD_ORD_MASK),
+ QED_IWARP_IRD_DEFAULT);
+
+ /* Peer2Peer negotiation */
+ ep->rtr_type = MPA_RTR_TYPE_NONE;
+ if (mpa_ird & MPA_V2_PEER2PEER_MODEL) {
+ if (mpa_ord & MPA_V2_WRITE_RTR)
+ ep->rtr_type |= MPA_RTR_TYPE_ZERO_WRITE;
+
+ if (mpa_ord & MPA_V2_READ_RTR)
+ ep->rtr_type |= MPA_RTR_TYPE_ZERO_READ;
+
+ if (mpa_ird & MPA_V2_SEND_RTR)
+ ep->rtr_type |= MPA_RTR_TYPE_ZERO_SEND;
+
+ ep->rtr_type &= iwarp_info->rtr_type;
+
+ /* if we're left with no match send our capabilities */
+ if (ep->rtr_type == MPA_RTR_TYPE_NONE)
+ ep->rtr_type = iwarp_info->rtr_type;
+ }
+
+ ep->mpa_rev = MPA_NEGOTIATION_TYPE_ENHANCED;
+ } else {
+ ep->cm_info.ord = QED_IWARP_ORD_DEFAULT;
+ ep->cm_info.ird = QED_IWARP_IRD_DEFAULT;
+ ep->mpa_rev = MPA_NEGOTIATION_TYPE_BASIC;
+ }
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x rtr:0x%x ulp_data_len = %x mpa_hdr_size = %x\n",
+ mpa_rev, ep->cm_info.ord, ep->cm_info.ird, ep->rtr_type,
+ async_data->mpa_request.ulp_data_len, mpa_hdr_size);
+
+ /* Strip mpa v2 hdr from private data before sending to upper layer */
+ ep->cm_info.private_data = ep->ep_buffer_virt->in_pdata + mpa_hdr_size;
+
+ ep->cm_info.private_data_len = async_data->mpa_request.ulp_data_len -
+ mpa_hdr_size;
+
+ params.event = QED_IWARP_EVENT_MPA_REQUEST;
+ params.cm_info = &ep->cm_info;
+ params.ep_context = ep;
+ params.status = 0;
+
+ ep->state = QED_IWARP_EP_MPA_REQ_RCVD;
+ ep->event_cb(ep->cb_context, &params);
+}
+
+static int
+qed_iwarp_mpa_offload(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+ struct iwarp_mpa_offload_ramrod_data *p_mpa_ramrod;
+ struct qed_sp_init_data init_data;
+ dma_addr_t async_output_phys;
+ struct qed_spq_entry *p_ent;
+ dma_addr_t out_pdata_phys;
+ dma_addr_t in_pdata_phys;
+ struct qed_rdma_qp *qp;
+ bool reject;
+ int rc;
+
+ if (!ep)
+ return -EINVAL;
+
+ qp = ep->qp;
+ reject = !qp;
+
+ memset(&init_data, 0, sizeof(init_data));
+ init_data.cid = reject ? ep->tcp_cid : qp->icid;
+ init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+
+ if (ep->connect_mode == TCP_CONNECT_ACTIVE)
+ init_data.comp_mode = QED_SPQ_MODE_CB;
+ else
+ init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+ rc = qed_sp_init_request(p_hwfn, &p_ent,
+ IWARP_RAMROD_CMD_ID_MPA_OFFLOAD,
+ PROTOCOLID_IWARP, &init_data);
+ if (rc)
+ return rc;
+
+ p_mpa_ramrod = &p_ent->ramrod.iwarp_mpa_offload;
+ out_pdata_phys = ep->ep_buffer_phys +
+ offsetof(struct qed_iwarp_ep_memory, out_pdata);
+ DMA_REGPAIR_LE(p_mpa_ramrod->common.outgoing_ulp_buffer.addr,
+ out_pdata_phys);
+ p_mpa_ramrod->common.outgoing_ulp_buffer.len =
+ ep->cm_info.private_data_len;
+ p_mpa_ramrod->common.crc_needed = p_hwfn->p_rdma_info->iwarp.crc_needed;
+
+ p_mpa_ramrod->common.out_rq.ord = ep->cm_info.ord;
+ p_mpa_ramrod->common.out_rq.ird = ep->cm_info.ird;
+
+ p_mpa_ramrod->tcp_cid = p_hwfn->hw_info.opaque_fid << 16 | ep->tcp_cid;
+
+ in_pdata_phys = ep->ep_buffer_phys +
+ offsetof(struct qed_iwarp_ep_memory, in_pdata);
+ p_mpa_ramrod->tcp_connect_side = ep->connect_mode;
+ DMA_REGPAIR_LE(p_mpa_ramrod->incoming_ulp_buffer.addr,
+ in_pdata_phys);
+ p_mpa_ramrod->incoming_ulp_buffer.len =
+ cpu_to_le16(sizeof(ep->ep_buffer_virt->in_pdata));
+ async_output_phys = ep->ep_buffer_phys +
+ offsetof(struct qed_iwarp_ep_memory, async_output);
+ DMA_REGPAIR_LE(p_mpa_ramrod->async_eqe_output_buf,
+ async_output_phys);
+ p_mpa_ramrod->handle_for_async.hi = cpu_to_le32(PTR_HI(ep));
+ p_mpa_ramrod->handle_for_async.lo = cpu_to_le32(PTR_LO(ep));
+
+ if (!reject) {
+ DMA_REGPAIR_LE(p_mpa_ramrod->shared_queue_addr,
+ qp->shared_queue_phys_addr);
+ p_mpa_ramrod->stats_counter_id =
+ RESC_START(p_hwfn, QED_RDMA_STATS_QUEUE) + qp->stats_queue;
+ } else {
+ p_mpa_ramrod->common.reject = 1;
+ }
+
+ p_mpa_ramrod->mode = ep->mpa_rev;
+ SET_FIELD(p_mpa_ramrod->rtr_pref,
+ IWARP_MPA_OFFLOAD_RAMROD_DATA_RTR_SUPPORTED, ep->rtr_type);
+
+ ep->state = QED_IWARP_EP_MPA_OFFLOADED;
+ rc = qed_spq_post(p_hwfn, p_ent, NULL);
+ if (!reject)
+ ep->cid = qp->icid; /* Now they're migrated. */
+
+ DP_VERBOSE(p_hwfn,
+ QED_MSG_RDMA,
+ "QP(0x%x) EP(0x%x) MPA Offload rc = %d IRD=0x%x ORD=0x%x rtr_type=%d mpa_rev=%d reject=%d\n",
+ reject ? 0xffff : qp->icid,
+ ep->tcp_cid,
+ rc,
+ ep->cm_info.ird,
+ ep->cm_info.ord, ep->rtr_type, ep->mpa_rev, reject);
+ return rc;
+}
+
+static void
+qed_iwarp_return_ep(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+ ep->state = QED_IWARP_EP_INIT;
+ if (ep->qp)
+ ep->qp->ep = NULL;
+ ep->qp = NULL;
+ memset(&ep->cm_info, 0, sizeof(ep->cm_info));
+
+ if (ep->tcp_cid == QED_IWARP_INVALID_TCP_CID) {
+ /* We don't care about the return code, it's ok if tcp_cid
+ * remains invalid...in this case we'll defer allocation
+ */
+ qed_iwarp_alloc_tcp_cid(p_hwfn, &ep->tcp_cid);
+ }
+ spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+ list_del(&ep->list_entry);
+ list_add_tail(&ep->list_entry,
+ &p_hwfn->p_rdma_info->iwarp.ep_free_list);
+
+ spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+}
+
+void
+qed_iwarp_parse_private_data(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+ struct mpa_v2_hdr *mpa_v2_params;
+ union async_output *async_data;
+ u16 mpa_ird, mpa_ord;
+ u8 mpa_data_size = 0;
+
+ if (MPA_REV2(p_hwfn->p_rdma_info->iwarp.mpa_rev)) {
+ mpa_v2_params =
+ (struct mpa_v2_hdr *)(ep->ep_buffer_virt->in_pdata);
+ mpa_data_size = sizeof(*mpa_v2_params);
+ mpa_ird = ntohs(mpa_v2_params->ird);
+ mpa_ord = ntohs(mpa_v2_params->ord);
+
+ ep->cm_info.ird = (u8)(mpa_ord & MPA_V2_IRD_ORD_MASK);
+ ep->cm_info.ord = (u8)(mpa_ird & MPA_V2_IRD_ORD_MASK);
+ }
+ async_data = &ep->ep_buffer_virt->async_output;
+
+ ep->cm_info.private_data = ep->ep_buffer_virt->in_pdata + mpa_data_size;
+ ep->cm_info.private_data_len = async_data->mpa_response.ulp_data_len -
+ mpa_data_size;
+}
+
+void
+qed_iwarp_mpa_reply_arrived(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+ struct qed_iwarp_cm_event_params params;
+
+ if (ep->connect_mode == TCP_CONNECT_PASSIVE) {
+ DP_NOTICE(p_hwfn,
+ "MPA reply event not expected on passive side!\n");
+ return;
+ }
+
+ params.event = QED_IWARP_EVENT_ACTIVE_MPA_REPLY;
+
+ qed_iwarp_parse_private_data(p_hwfn, ep);
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x\n",
+ ep->mpa_rev, ep->cm_info.ord, ep->cm_info.ird);
+
+ params.cm_info = &ep->cm_info;
+ params.ep_context = ep;
+ params.status = 0;
+
+ ep->mpa_reply_processed = true;
+
+ ep->event_cb(ep->cb_context, &params);
+}
+
+#define QED_IWARP_CONNECT_MODE_STRING(ep) \
+ ((ep)->connect_mode == TCP_CONNECT_PASSIVE) ? "Passive" : "Active"
+
+/* Called as a result of the event:
+ * IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE
+ */
+static void
+qed_iwarp_mpa_complete(struct qed_hwfn *p_hwfn,
+ struct qed_iwarp_ep *ep, u8 fw_return_code)
+{
+ struct qed_iwarp_cm_event_params params;
+
+ if (ep->connect_mode == TCP_CONNECT_ACTIVE)
+ params.event = QED_IWARP_EVENT_ACTIVE_COMPLETE;
+ else
+ params.event = QED_IWARP_EVENT_PASSIVE_COMPLETE;
+
+ if (ep->connect_mode == TCP_CONNECT_ACTIVE && !ep->mpa_reply_processed)
+ qed_iwarp_parse_private_data(p_hwfn, ep);
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x\n",
+ ep->mpa_rev, ep->cm_info.ord, ep->cm_info.ird);
+
+ params.cm_info = &ep->cm_info;
+
+ params.ep_context = ep;
+
+ ep->state = QED_IWARP_EP_CLOSED;
+
+ switch (fw_return_code) {
+ case RDMA_RETURN_OK:
+ ep->qp->max_rd_atomic_req = ep->cm_info.ord;
+ ep->qp->max_rd_atomic_resp = ep->cm_info.ird;
+ qed_iwarp_modify_qp(p_hwfn, ep->qp, QED_IWARP_QP_STATE_RTS, 1);
+ ep->state = QED_IWARP_EP_ESTABLISHED;
+ params.status = 0;
+ break;
+ case IWARP_CONN_ERROR_MPA_TIMEOUT:
+ DP_NOTICE(p_hwfn, "%s(0x%x) MPA timeout\n",
+ QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+ params.status = -EBUSY;
+ break;
+ case IWARP_CONN_ERROR_MPA_ERROR_REJECT:
+ DP_NOTICE(p_hwfn, "%s(0x%x) MPA Reject\n",
+ QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+ params.status = -ECONNREFUSED;
+ break;
+ case IWARP_CONN_ERROR_MPA_RST:
+ DP_NOTICE(p_hwfn, "%s(0x%x) MPA reset(tcp cid: 0x%x)\n",
+ QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid,
+ ep->tcp_cid);
+ params.status = -ECONNRESET;
+ break;
+ case IWARP_CONN_ERROR_MPA_FIN:
+ DP_NOTICE(p_hwfn, "%s(0x%x) MPA received FIN\n",
+ QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+ params.status = -ECONNREFUSED;
+ break;
+ case IWARP_CONN_ERROR_MPA_INSUF_IRD:
+ DP_NOTICE(p_hwfn, "%s(0x%x) MPA insufficient ird\n",
+ QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+ params.status = -ECONNREFUSED;
+ break;
+ case IWARP_CONN_ERROR_MPA_RTR_MISMATCH:
+ DP_NOTICE(p_hwfn, "%s(0x%x) MPA RTR MISMATCH\n",
+ QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+ params.status = -ECONNREFUSED;
+ break;
+ case IWARP_CONN_ERROR_MPA_INVALID_PACKET:
+ DP_NOTICE(p_hwfn, "%s(0x%x) MPA Invalid Packet\n",
+ QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+ params.status = -ECONNREFUSED;
+ break;
+ case IWARP_CONN_ERROR_MPA_LOCAL_ERROR:
+ DP_NOTICE(p_hwfn, "%s(0x%x) MPA Local Error\n",
+ QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+ params.status = -ECONNREFUSED;
+ break;
+ case IWARP_CONN_ERROR_MPA_TERMINATE:
+ DP_NOTICE(p_hwfn, "%s(0x%x) MPA TERMINATE\n",
+ QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+ params.status = -ECONNREFUSED;
+ break;
+ default:
+ params.status = -ECONNRESET;
+ break;
+ }
+
+ ep->event_cb(ep->cb_context, &params);
+
+ /* on passive side, if there is no associated QP (REJECT) we need to
+ * return the ep to the pool, (in the regular case we add an element
+ * in accept instead of this one.
+ * In both cases we need to remove it from the ep_list.
+ */
+ if (fw_return_code != RDMA_RETURN_OK) {
+ ep->tcp_cid = QED_IWARP_INVALID_TCP_CID;
+ if ((ep->connect_mode == TCP_CONNECT_PASSIVE) &&
+ (!ep->qp)) { /* Rejected */
+ qed_iwarp_return_ep(p_hwfn, ep);
+ } else {
+ spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+ list_del(&ep->list_entry);
+ spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+ }
+ }
+}
+
+static void
+qed_iwarp_mpa_v2_set_private(struct qed_hwfn *p_hwfn,
+ struct qed_iwarp_ep *ep, u8 *mpa_data_size)
+{
+ struct mpa_v2_hdr *mpa_v2_params;
+ u16 mpa_ird, mpa_ord;
+
+ *mpa_data_size = 0;
+ if (MPA_REV2(ep->mpa_rev)) {
+ mpa_v2_params =
+ (struct mpa_v2_hdr *)ep->ep_buffer_virt->out_pdata;
+ *mpa_data_size = sizeof(*mpa_v2_params);
+
+ mpa_ird = (u16)ep->cm_info.ird;
+ mpa_ord = (u16)ep->cm_info.ord;
+
+ if (ep->rtr_type != MPA_RTR_TYPE_NONE) {
+ mpa_ird |= MPA_V2_PEER2PEER_MODEL;
+
+ if (ep->rtr_type & MPA_RTR_TYPE_ZERO_SEND)
+ mpa_ird |= MPA_V2_SEND_RTR;
+
+ if (ep->rtr_type & MPA_RTR_TYPE_ZERO_WRITE)
+ mpa_ord |= MPA_V2_WRITE_RTR;
+
+ if (ep->rtr_type & MPA_RTR_TYPE_ZERO_READ)
+ mpa_ord |= MPA_V2_READ_RTR;
+ }
+
+ mpa_v2_params->ird = htons(mpa_ird);
+ mpa_v2_params->ord = htons(mpa_ord);
+
+ DP_VERBOSE(p_hwfn,
+ QED_MSG_RDMA,
+ "MPA_NEGOTIATE Header: [%x ord:%x ird] %x ord:%x ird:%x peer2peer:%x rtr_send:%x rtr_write:%x rtr_read:%x\n",
+ mpa_v2_params->ird,
+ mpa_v2_params->ord,
+ *((u32 *)mpa_v2_params),
+ mpa_ord & MPA_V2_IRD_ORD_MASK,
+ mpa_ird & MPA_V2_IRD_ORD_MASK,
+ !!(mpa_ird & MPA_V2_PEER2PEER_MODEL),
+ !!(mpa_ird & MPA_V2_SEND_RTR),
+ !!(mpa_ord & MPA_V2_WRITE_RTR),
+ !!(mpa_ord & MPA_V2_READ_RTR));
+ }
+}
+
+int qed_iwarp_connect(void *rdma_cxt,
+ struct qed_iwarp_connect_in *iparams,
+ struct qed_iwarp_connect_out *oparams)
+{
+ struct qed_hwfn *p_hwfn = rdma_cxt;
+ struct qed_iwarp_info *iwarp_info;
+ struct qed_iwarp_ep *ep;
+ u8 mpa_data_size = 0;
+ u8 ts_hdr_size = 0;
+ u32 cid;
+ int rc;
+
+ if ((iparams->cm_info.ord > QED_IWARP_ORD_DEFAULT) ||
+ (iparams->cm_info.ird > QED_IWARP_IRD_DEFAULT)) {
+ DP_NOTICE(p_hwfn,
+ "QP(0x%x) ERROR: Invalid ord(0x%x)/ird(0x%x)\n",
+ iparams->qp->icid, iparams->cm_info.ord,
+ iparams->cm_info.ird);
+
+ return -EINVAL;
+ }
+
+ iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+
+ /* Allocate ep object */
+ rc = qed_iwarp_alloc_cid(p_hwfn, &cid);
+ if (rc)
+ return rc;
+
+ rc = qed_iwarp_create_ep(p_hwfn, &ep);
+ if (rc)
+ goto err;
+
+ ep->tcp_cid = cid;
+
+ spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+ list_add_tail(&ep->list_entry, &p_hwfn->p_rdma_info->iwarp.ep_list);
+ spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+ ep->qp = iparams->qp;
+ ep->qp->ep = ep;
+ ether_addr_copy(ep->remote_mac_addr, iparams->remote_mac_addr);
+ ether_addr_copy(ep->local_mac_addr, iparams->local_mac_addr);
+ memcpy(&ep->cm_info, &iparams->cm_info, sizeof(ep->cm_info));
+
+ ep->cm_info.ord = iparams->cm_info.ord;
+ ep->cm_info.ird = iparams->cm_info.ird;
+
+ ep->rtr_type = iwarp_info->rtr_type;
+ if (!iwarp_info->peer2peer)
+ ep->rtr_type = MPA_RTR_TYPE_NONE;
+
+ if ((ep->rtr_type & MPA_RTR_TYPE_ZERO_READ) && (ep->cm_info.ord == 0))
+ ep->cm_info.ord = 1;
+
+ ep->mpa_rev = iwarp_info->mpa_rev;
+
+ qed_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size);
+
+ ep->cm_info.private_data = ep->ep_buffer_virt->out_pdata;
+ ep->cm_info.private_data_len = iparams->cm_info.private_data_len +
+ mpa_data_size;
+
+ memcpy((u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size,
+ iparams->cm_info.private_data,
+ iparams->cm_info.private_data_len);
+
+ if (p_hwfn->p_rdma_info->iwarp.tcp_flags & QED_IWARP_TS_EN)
+ ts_hdr_size = TIMESTAMP_HEADER_SIZE;
+
+ ep->mss = iparams->mss - ts_hdr_size;
+ ep->mss = min_t(u16, QED_IWARP_MAX_FW_MSS, ep->mss);
+
+ ep->event_cb = iparams->event_cb;
+ ep->cb_context = iparams->cb_context;
+ ep->connect_mode = TCP_CONNECT_ACTIVE;
+
+ oparams->ep_context = ep;
+
+ rc = qed_iwarp_tcp_offload(p_hwfn, ep);
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) EP(0x%x) rc = %d\n",
+ iparams->qp->icid, ep->tcp_cid, rc);
+
+ if (rc) {
+ qed_iwarp_destroy_ep(p_hwfn, ep, true);
+ goto err;
+ }
+
+ return rc;
+err:
+ qed_iwarp_cid_cleaned(p_hwfn, cid);
+
+ return rc;
+}
+
+static struct qed_iwarp_ep *qed_iwarp_get_free_ep(struct qed_hwfn *p_hwfn)
+{
+ struct qed_iwarp_ep *ep = NULL;
+ int rc;
+
+ spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+ if (list_empty(&p_hwfn->p_rdma_info->iwarp.ep_free_list)) {
+ DP_ERR(p_hwfn, "Ep list is empty\n");
+ goto out;
+ }
+
+ ep = list_first_entry(&p_hwfn->p_rdma_info->iwarp.ep_free_list,
+ struct qed_iwarp_ep, list_entry);
+
+ /* in some cases we could have failed allocating a tcp cid when added
+ * from accept / failure... retry now..this is not the common case.
+ */
+ if (ep->tcp_cid == QED_IWARP_INVALID_TCP_CID) {
+ rc = qed_iwarp_alloc_tcp_cid(p_hwfn, &ep->tcp_cid);
+
+ /* if we fail we could look for another entry with a valid
+ * tcp_cid, but since we don't expect to reach this anyway
+ * it's not worth the handling
+ */
+ if (rc) {
+ ep->tcp_cid = QED_IWARP_INVALID_TCP_CID;
+ ep = NULL;
+ goto out;
+ }
+ }
+
+ list_del(&ep->list_entry);
+
+out:
+ spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+ return ep;
+}
+
+#define QED_IWARP_MAX_CID_CLEAN_TIME 100
+#define QED_IWARP_MAX_NO_PROGRESS_CNT 5
+
+/* This function waits for all the bits of a bmap to be cleared, as long as
+ * there is progress ( i.e. the number of bits left to be cleared decreases )
+ * the function continues.
+ */
+static int
+qed_iwarp_wait_cid_map_cleared(struct qed_hwfn *p_hwfn, struct qed_bmap *bmap)
+{
+ int prev_weight = 0;
+ int wait_count = 0;
+ int weight = 0;
+
+ weight = bitmap_weight(bmap->bitmap, bmap->max_count);
+ prev_weight = weight;
+
+ while (weight) {
+ msleep(QED_IWARP_MAX_CID_CLEAN_TIME);
+
+ weight = bitmap_weight(bmap->bitmap, bmap->max_count);
+
+ if (prev_weight == weight) {
+ wait_count++;
+ } else {
+ prev_weight = weight;
+ wait_count = 0;
+ }
+
+ if (wait_count > QED_IWARP_MAX_NO_PROGRESS_CNT) {
+ DP_NOTICE(p_hwfn,
+ "%s bitmap wait timed out (%d cids pending)\n",
+ bmap->name, weight);
+ return -EBUSY;
+ }
+ }
+ return 0;
+}
+
+static int qed_iwarp_wait_for_all_cids(struct qed_hwfn *p_hwfn)
+{
+ int rc;
+ int i;
+
+ rc = qed_iwarp_wait_cid_map_cleared(p_hwfn,
+ &p_hwfn->p_rdma_info->tcp_cid_map);
+ if (rc)
+ return rc;
+
+ /* Now free the tcp cids from the main cid map */
+ for (i = 0; i < QED_IWARP_PREALLOC_CNT; i++)
+ qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, i);
+
+ /* Now wait for all cids to be completed */
+ return qed_iwarp_wait_cid_map_cleared(p_hwfn,
+ &p_hwfn->p_rdma_info->cid_map);
+}
+
+static void qed_iwarp_free_prealloc_ep(struct qed_hwfn *p_hwfn)
+{
+ struct qed_iwarp_ep *ep;
+
+ while (!list_empty(&p_hwfn->p_rdma_info->iwarp.ep_free_list)) {
+ spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+ ep = list_first_entry(&p_hwfn->p_rdma_info->iwarp.ep_free_list,
+ struct qed_iwarp_ep, list_entry);
+
+ if (!ep) {
+ spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+ break;
+ }
+ list_del(&ep->list_entry);
+
+ spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+ if (ep->tcp_cid != QED_IWARP_INVALID_TCP_CID)
+ qed_iwarp_cid_cleaned(p_hwfn, ep->tcp_cid);
+
+ qed_iwarp_destroy_ep(p_hwfn, ep, false);
+ }
+}
+
+static int qed_iwarp_prealloc_ep(struct qed_hwfn *p_hwfn, bool init)
+{
+ struct qed_iwarp_ep *ep;
+ int rc = 0;
+ int count;
+ u32 cid;
+ int i;
+
+ count = init ? QED_IWARP_PREALLOC_CNT : 1;
+ for (i = 0; i < count; i++) {
+ rc = qed_iwarp_create_ep(p_hwfn, &ep);
+ if (rc)
+ return rc;
+
+ /* During initialization we allocate from the main pool,
+ * afterwards we allocate only from the tcp_cid.
+ */
+ if (init) {
+ rc = qed_iwarp_alloc_cid(p_hwfn, &cid);
+ if (rc)
+ goto err;
+ qed_iwarp_set_tcp_cid(p_hwfn, cid);
+ } else {
+ /* We don't care about the return code, it's ok if
+ * tcp_cid remains invalid...in this case we'll
+ * defer allocation
+ */
+ qed_iwarp_alloc_tcp_cid(p_hwfn, &cid);
+ }
+
+ ep->tcp_cid = cid;
+
+ spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+ list_add_tail(&ep->list_entry,
+ &p_hwfn->p_rdma_info->iwarp.ep_free_list);
+ spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+ }
+
+ return rc;
+
+err:
+ qed_iwarp_destroy_ep(p_hwfn, ep, false);
+
+ return rc;
+}
+
+int qed_iwarp_alloc(struct qed_hwfn *p_hwfn)
+{
+ int rc;
+
+ /* Allocate bitmap for tcp cid. These are used by passive side
+ * to ensure it can allocate a tcp cid during dpc that was
+ * pre-acquired and doesn't require dynamic allocation of ilt
+ */
+ rc = qed_rdma_bmap_alloc(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map,
+ QED_IWARP_PREALLOC_CNT, "TCP_CID");
+ if (rc) {
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "Failed to allocate tcp cid, rc = %d\n", rc);
+ return rc;
+ }
+
+ INIT_LIST_HEAD(&p_hwfn->p_rdma_info->iwarp.ep_free_list);
+ spin_lock_init(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+ return qed_iwarp_prealloc_ep(p_hwfn, true);
+}
+
+void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn)
+{
+ qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, 1);
+}
+
+int qed_iwarp_accept(void *rdma_cxt, struct qed_iwarp_accept_in *iparams)
+{
+ struct qed_hwfn *p_hwfn = rdma_cxt;
+ struct qed_iwarp_ep *ep;
+ u8 mpa_data_size = 0;
+ int rc;
+
+ ep = iparams->ep_context;
+ if (!ep) {
+ DP_ERR(p_hwfn, "Ep Context receive in accept is NULL\n");
+ return -EINVAL;
+ }
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) EP(0x%x)\n",
+ iparams->qp->icid, ep->tcp_cid);
+
+ if ((iparams->ord > QED_IWARP_ORD_DEFAULT) ||
+ (iparams->ird > QED_IWARP_IRD_DEFAULT)) {
+ DP_VERBOSE(p_hwfn,
+ QED_MSG_RDMA,
+ "QP(0x%x) EP(0x%x) ERROR: Invalid ord(0x%x)/ird(0x%x)\n",
+ iparams->qp->icid,
+ ep->tcp_cid, iparams->ord, iparams->ord);
+ return -EINVAL;
+ }
+
+ qed_iwarp_prealloc_ep(p_hwfn, false);
+
+ ep->cb_context = iparams->cb_context;
+ ep->qp = iparams->qp;
+ ep->qp->ep = ep;
+
+ if (ep->mpa_rev == MPA_NEGOTIATION_TYPE_ENHANCED) {
+ /* Negotiate ord/ird: if upperlayer requested ord larger than
+ * ird advertised by remote, we need to decrease our ord
+ */
+ if (iparams->ord > ep->cm_info.ird)
+ iparams->ord = ep->cm_info.ird;
+
+ if ((ep->rtr_type & MPA_RTR_TYPE_ZERO_READ) &&
+ (iparams->ird == 0))
+ iparams->ird = 1;
+ }
+
+ /* Update cm_info ord/ird to be negotiated values */
+ ep->cm_info.ord = iparams->ord;
+ ep->cm_info.ird = iparams->ird;
+
+ qed_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size);
+
+ ep->cm_info.private_data = ep->ep_buffer_virt->out_pdata;
+ ep->cm_info.private_data_len = iparams->private_data_len +
+ mpa_data_size;
+
+ memcpy((u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size,
+ iparams->private_data, iparams->private_data_len);
+
+ rc = qed_iwarp_mpa_offload(p_hwfn, ep);
+ if (rc)
+ qed_iwarp_modify_qp(p_hwfn,
+ iparams->qp, QED_IWARP_QP_STATE_ERROR, 1);
+
+ return rc;
+}
+
+int qed_iwarp_reject(void *rdma_cxt, struct qed_iwarp_reject_in *iparams)
+{
+ struct qed_hwfn *p_hwfn = rdma_cxt;
+ struct qed_iwarp_ep *ep;
+ u8 mpa_data_size = 0;
+
+ ep = iparams->ep_context;
+ if (!ep) {
+ DP_ERR(p_hwfn, "Ep Context receive in reject is NULL\n");
+ return -EINVAL;
+ }
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "EP(0x%x)\n", ep->tcp_cid);
+
+ ep->cb_context = iparams->cb_context;
+ ep->qp = NULL;
+
+ qed_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size);
+
+ ep->cm_info.private_data = ep->ep_buffer_virt->out_pdata;
+ ep->cm_info.private_data_len = iparams->private_data_len +
+ mpa_data_size;
+
+ memcpy((u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size,
+ iparams->private_data, iparams->private_data_len);
+
+ return qed_iwarp_mpa_offload(p_hwfn, ep);
+}
+
+static void
+qed_iwarp_print_cm_info(struct qed_hwfn *p_hwfn,
+ struct qed_iwarp_cm_info *cm_info)
+{
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "ip_version = %d\n",
+ cm_info->ip_version);
+
+ if (cm_info->ip_version == QED_TCP_IPV4)
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "remote_ip %pI4h:%x, local_ip %pI4h:%x vlan=%x\n",
+ cm_info->remote_ip, cm_info->remote_port,
+ cm_info->local_ip, cm_info->local_port,
+ cm_info->vlan);
+ else
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "remote_ip %pI6h:%x, local_ip %pI6h:%x vlan=%x\n",
+ cm_info->remote_ip, cm_info->remote_port,
+ cm_info->local_ip, cm_info->local_port,
+ cm_info->vlan);
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "private_data_len = %x ord = %d, ird = %d\n",
+ cm_info->private_data_len, cm_info->ord, cm_info->ird);
+}
+
+static int
+qed_iwarp_ll2_post_rx(struct qed_hwfn *p_hwfn,
+ struct qed_iwarp_ll2_buff *buf, u8 handle)
+{
+ int rc;
+
+ rc = qed_ll2_post_rx_buffer(p_hwfn, handle, buf->data_phys_addr,
+ (u16)buf->buff_size, buf, 1);
+ if (rc) {
+ DP_NOTICE(p_hwfn,
+ "Failed to repost rx buffer to ll2 rc = %d, handle=%d\n",
+ rc, handle);
+ dma_free_coherent(&p_hwfn->cdev->pdev->dev, buf->buff_size,
+ buf->data, buf->data_phys_addr);
+ kfree(buf);
+ }
+
+ return rc;
+}
+
+static bool
+qed_iwarp_ep_exists(struct qed_hwfn *p_hwfn, struct qed_iwarp_cm_info *cm_info)
+{
+ struct qed_iwarp_ep *ep = NULL;
+ bool found = false;
+
+ list_for_each_entry(ep,
+ &p_hwfn->p_rdma_info->iwarp.ep_list,
+ list_entry) {
+ if ((ep->cm_info.local_port == cm_info->local_port) &&
+ (ep->cm_info.remote_port == cm_info->remote_port) &&
+ (ep->cm_info.vlan == cm_info->vlan) &&
+ !memcmp(&ep->cm_info.local_ip, cm_info->local_ip,
+ sizeof(cm_info->local_ip)) &&
+ !memcmp(&ep->cm_info.remote_ip, cm_info->remote_ip,
+ sizeof(cm_info->remote_ip))) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) {
+ DP_NOTICE(p_hwfn,
+ "SYN received on active connection - dropping\n");
+ qed_iwarp_print_cm_info(p_hwfn, cm_info);
+
+ return true;
+ }
+
+ return false;
+}
+
+static struct qed_iwarp_listener *
+qed_iwarp_get_listener(struct qed_hwfn *p_hwfn,
+ struct qed_iwarp_cm_info *cm_info)
+{
+ struct qed_iwarp_listener *listener = NULL;
+ static const u32 ip_zero[4] = { 0, 0, 0, 0 };
+ bool found = false;
+
+ qed_iwarp_print_cm_info(p_hwfn, cm_info);
+
+ list_for_each_entry(listener,
+ &p_hwfn->p_rdma_info->iwarp.listen_list,
+ list_entry) {
+ if (listener->port == cm_info->local_port) {
+ if (!memcmp(listener->ip_addr,
+ ip_zero, sizeof(ip_zero))) {
+ found = true;
+ break;
+ }
+
+ if (!memcmp(listener->ip_addr,
+ cm_info->local_ip,
+ sizeof(cm_info->local_ip)) &&
+ (listener->vlan == cm_info->vlan)) {
+ found = true;
+ break;
+ }
+ }
+ }
+
+ if (found) {
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "listener found = %p\n",
+ listener);
+ return listener;
+ }
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "listener not found\n");
+ return NULL;
+}
+
+static int
+qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn,
+ struct qed_iwarp_cm_info *cm_info,
+ void *buf,
+ u8 *remote_mac_addr,
+ u8 *local_mac_addr,
+ int *payload_len, int *tcp_start_offset)
+{
+ struct vlan_ethhdr *vethh;
+ bool vlan_valid = false;
+ struct ipv6hdr *ip6h;
+ struct ethhdr *ethh;
+ struct tcphdr *tcph;
+ struct iphdr *iph;
+ int eth_hlen;
+ int ip_hlen;
+ int eth_type;
+ int i;
+
+ ethh = buf;
+ eth_type = ntohs(ethh->h_proto);
+ if (eth_type == ETH_P_8021Q) {
+ vlan_valid = true;
+ vethh = (struct vlan_ethhdr *)ethh;
+ cm_info->vlan = ntohs(vethh->h_vlan_TCI) & VLAN_VID_MASK;
+ eth_type = ntohs(vethh->h_vlan_encapsulated_proto);
+ }
+
+ eth_hlen = ETH_HLEN + (vlan_valid ? sizeof(u32) : 0);
+
+ ether_addr_copy(remote_mac_addr, ethh->h_source);
+ ether_addr_copy(local_mac_addr, ethh->h_dest);
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "eth_type =%d source mac: %pM\n",
+ eth_type, ethh->h_source);
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "eth_hlen=%d destination mac: %pM\n",
+ eth_hlen, ethh->h_dest);
+
+ iph = (struct iphdr *)((u8 *)(ethh) + eth_hlen);
+
+ if (eth_type == ETH_P_IP) {
+ cm_info->local_ip[0] = ntohl(iph->daddr);
+ cm_info->remote_ip[0] = ntohl(iph->saddr);
+ cm_info->ip_version = TCP_IPV4;
+
+ ip_hlen = (iph->ihl) * sizeof(u32);
+ *payload_len = ntohs(iph->tot_len) - ip_hlen;
+ } else if (eth_type == ETH_P_IPV6) {
+ ip6h = (struct ipv6hdr *)iph;
+ for (i = 0; i < 4; i++) {
+ cm_info->local_ip[i] =
+ ntohl(ip6h->daddr.in6_u.u6_addr32[i]);
+ cm_info->remote_ip[i] =
+ ntohl(ip6h->saddr.in6_u.u6_addr32[i]);
+ }
+ cm_info->ip_version = TCP_IPV6;
+
+ ip_hlen = sizeof(*ip6h);
+ *payload_len = ntohs(ip6h->payload_len);
+ } else {
+ DP_NOTICE(p_hwfn, "Unexpected ethertype on ll2 %x\n", eth_type);
+ return -EINVAL;
+ }
+
+ tcph = (struct tcphdr *)((u8 *)iph + ip_hlen);
+
+ if (!tcph->syn) {
+ DP_NOTICE(p_hwfn,
+ "Only SYN type packet expected on this ll2 conn, iph->ihl=%d source=%d dest=%d\n",
+ iph->ihl, tcph->source, tcph->dest);
+ return -EINVAL;
+ }
+
+ cm_info->local_port = ntohs(tcph->dest);
+ cm_info->remote_port = ntohs(tcph->source);
+
+ qed_iwarp_print_cm_info(p_hwfn, cm_info);
+
+ *tcp_start_offset = eth_hlen + ip_hlen;
+
+ return 0;
+}
+
+static void
+qed_iwarp_ll2_comp_syn_pkt(void *cxt, struct qed_ll2_comp_rx_data *data)
+{
+ struct qed_iwarp_ll2_buff *buf = data->cookie;
+ struct qed_iwarp_listener *listener;
+ struct qed_ll2_tx_pkt_info tx_pkt;
+ struct qed_iwarp_cm_info cm_info;
+ struct qed_hwfn *p_hwfn = cxt;
+ u8 remote_mac_addr[ETH_ALEN];
+ u8 local_mac_addr[ETH_ALEN];
+ struct qed_iwarp_ep *ep;
+ int tcp_start_offset;
+ u8 ts_hdr_size = 0;
+ u8 ll2_syn_handle;
+ int payload_len;
+ u32 hdr_size;
+ int rc;
+
+ memset(&cm_info, 0, sizeof(cm_info));
+
+ if (GET_FIELD(data->parse_flags,
+ PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED) &&
+ GET_FIELD(data->parse_flags, PARSING_AND_ERR_FLAGS_L4CHKSMERROR)) {
+ DP_NOTICE(p_hwfn, "Syn packet received with checksum error\n");
+ goto err;
+ }
+
+ rc = qed_iwarp_parse_rx_pkt(p_hwfn, &cm_info, (u8 *)(buf->data) +
+ data->u.placement_offset, remote_mac_addr,
+ local_mac_addr, &payload_len,
+ &tcp_start_offset);
+ if (rc)
+ goto err;
+
+ /* Check if there is a listener for this 4-tuple+vlan */
+ ll2_syn_handle = p_hwfn->p_rdma_info->iwarp.ll2_syn_handle;
+ listener = qed_iwarp_get_listener(p_hwfn, &cm_info);
+ if (!listener) {
+ DP_VERBOSE(p_hwfn,
+ QED_MSG_RDMA,
+ "SYN received on tuple not listened on parse_flags=%d packet len=%d\n",
+ data->parse_flags, data->length.packet_length);
+
+ memset(&tx_pkt, 0, sizeof(tx_pkt));
+ tx_pkt.num_of_bds = 1;
+ tx_pkt.vlan = data->vlan;
+
+ if (GET_FIELD(data->parse_flags,
+ PARSING_AND_ERR_FLAGS_TAG8021QEXIST))
+ SET_FIELD(tx_pkt.bd_flags,
+ CORE_TX_BD_DATA_VLAN_INSERTION, 1);
+
+ tx_pkt.l4_hdr_offset_w = (data->length.packet_length) >> 2;
+ tx_pkt.tx_dest = QED_LL2_TX_DEST_LB;
+ tx_pkt.first_frag = buf->data_phys_addr +
+ data->u.placement_offset;
+ tx_pkt.first_frag_len = data->length.packet_length;
+ tx_pkt.cookie = buf;
+
+ rc = qed_ll2_prepare_tx_packet(p_hwfn, ll2_syn_handle,
+ &tx_pkt, true);
+
+ if (rc) {
+ DP_NOTICE(p_hwfn,
+ "Can't post SYN back to chip rc=%d\n", rc);
+ goto err;
+ }
+ return;
+ }
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Received syn on listening port\n");
+ /* There may be an open ep on this connection if this is a syn
+ * retrasnmit... need to make sure there isn't...
+ */
+ if (qed_iwarp_ep_exists(p_hwfn, &cm_info))
+ goto err;
+
+ ep = qed_iwarp_get_free_ep(p_hwfn);
+ if (!ep)
+ goto err;
+
+ spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+ list_add_tail(&ep->list_entry, &p_hwfn->p_rdma_info->iwarp.ep_list);
+ spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+ ether_addr_copy(ep->remote_mac_addr, remote_mac_addr);
+ ether_addr_copy(ep->local_mac_addr, local_mac_addr);
+
+ memcpy(&ep->cm_info, &cm_info, sizeof(ep->cm_info));
+
+ if (p_hwfn->p_rdma_info->iwarp.tcp_flags & QED_IWARP_TS_EN)
+ ts_hdr_size = TIMESTAMP_HEADER_SIZE;
+
+ hdr_size = ((cm_info.ip_version == QED_TCP_IPV4) ? 40 : 60) +
+ ts_hdr_size;
+ ep->mss = p_hwfn->p_rdma_info->iwarp.max_mtu - hdr_size;
+ ep->mss = min_t(u16, QED_IWARP_MAX_FW_MSS, ep->mss);
+
+ ep->event_cb = listener->event_cb;
+ ep->cb_context = listener->cb_context;
+ ep->connect_mode = TCP_CONNECT_PASSIVE;
+
+ ep->syn = buf;
+ ep->syn_ip_payload_length = (u16)payload_len;
+ ep->syn_phy_addr = buf->data_phys_addr + data->u.placement_offset +
+ tcp_start_offset;
+
+ rc = qed_iwarp_tcp_offload(p_hwfn, ep);
+ if (rc) {
+ qed_iwarp_return_ep(p_hwfn, ep);
+ goto err;
+ }
+
+ return;
+err:
+ qed_iwarp_ll2_post_rx(p_hwfn, buf, ll2_syn_handle);
+}
+
+static void qed_iwarp_ll2_rel_rx_pkt(void *cxt, u8 connection_handle,
+ void *cookie, dma_addr_t rx_buf_addr,
+ bool b_last_packet)
+{
+ struct qed_iwarp_ll2_buff *buffer = cookie;
+ struct qed_hwfn *p_hwfn = cxt;
+
+ dma_free_coherent(&p_hwfn->cdev->pdev->dev, buffer->buff_size,
+ buffer->data, buffer->data_phys_addr);
+ kfree(buffer);
+}
+
+static void qed_iwarp_ll2_comp_tx_pkt(void *cxt, u8 connection_handle,
+ void *cookie, dma_addr_t first_frag_addr,
+ bool b_last_fragment, bool b_last_packet)
+{
+ struct qed_iwarp_ll2_buff *buffer = cookie;
+ struct qed_hwfn *p_hwfn = cxt;
+
+ /* this was originally an rx packet, post it back */
+ qed_iwarp_ll2_post_rx(p_hwfn, buffer, connection_handle);
+}
+
+static void qed_iwarp_ll2_rel_tx_pkt(void *cxt, u8 connection_handle,
+ void *cookie, dma_addr_t first_frag_addr,
+ bool b_last_fragment, bool b_last_packet)
+{
+ struct qed_iwarp_ll2_buff *buffer = cookie;
+ struct qed_hwfn *p_hwfn = cxt;
+
+ if (!buffer)
+ return;
+
+ dma_free_coherent(&p_hwfn->cdev->pdev->dev, buffer->buff_size,
+ buffer->data, buffer->data_phys_addr);
+
+ kfree(buffer);
+}
+
+static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+ struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+ int rc = 0;
+
+ if (iwarp_info->ll2_syn_handle != QED_IWARP_HANDLE_INVAL) {
+ rc = qed_ll2_terminate_connection(p_hwfn,
+ iwarp_info->ll2_syn_handle);
+ if (rc)
+ DP_INFO(p_hwfn, "Failed to terminate syn connection\n");
+
+ qed_ll2_release_connection(p_hwfn, iwarp_info->ll2_syn_handle);
+ iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL;
+ }
+
+ qed_llh_remove_mac_filter(p_hwfn,
+ p_ptt, p_hwfn->p_rdma_info->iwarp.mac_addr);
+ return rc;
+}
+
+static int
+qed_iwarp_ll2_alloc_buffers(struct qed_hwfn *p_hwfn,
+ int num_rx_bufs, int buff_size, u8 ll2_handle)
+{
+ struct qed_iwarp_ll2_buff *buffer;
+ int rc = 0;
+ int i;
+
+ for (i = 0; i < num_rx_bufs; i++) {
+ buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
+ if (!buffer) {
+ rc = -ENOMEM;
+ break;
+ }
+
+ buffer->data = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+ buff_size,
+ &buffer->data_phys_addr,
+ GFP_KERNEL);
+ if (!buffer->data) {
+ kfree(buffer);
+ rc = -ENOMEM;
+ break;
+ }
+
+ buffer->buff_size = buff_size;
+ rc = qed_iwarp_ll2_post_rx(p_hwfn, buffer, ll2_handle);
+ if (rc)
+ /* buffers will be deallocated by qed_ll2 */
+ break;
+ }
+ return rc;
+}
+
+#define QED_IWARP_MAX_BUF_SIZE(mtu) \
+ ALIGN((mtu) + ETH_HLEN + 2 * VLAN_HLEN + 2 + ETH_CACHE_LINE_SIZE, \
+ ETH_CACHE_LINE_SIZE)
+
+static int
+qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
+ struct qed_rdma_start_in_params *params,
+ struct qed_ptt *p_ptt)
+{
+ struct qed_iwarp_info *iwarp_info;
+ struct qed_ll2_acquire_data data;
+ struct qed_ll2_cbs cbs;
+ int rc = 0;
+
+ iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+ iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL;
+
+ iwarp_info->max_mtu = params->max_mtu;
+
+ ether_addr_copy(p_hwfn->p_rdma_info->iwarp.mac_addr, params->mac_addr);
+
+ rc = qed_llh_add_mac_filter(p_hwfn, p_ptt, params->mac_addr);
+ if (rc)
+ return rc;
+
+ /* Start SYN connection */
+ cbs.rx_comp_cb = qed_iwarp_ll2_comp_syn_pkt;
+ cbs.rx_release_cb = qed_iwarp_ll2_rel_rx_pkt;
+ cbs.tx_comp_cb = qed_iwarp_ll2_comp_tx_pkt;
+ cbs.tx_release_cb = qed_iwarp_ll2_rel_tx_pkt;
+ cbs.cookie = p_hwfn;
+
+ memset(&data, 0, sizeof(data));
+ data.input.conn_type = QED_LL2_TYPE_IWARP;
+ data.input.mtu = QED_IWARP_MAX_SYN_PKT_SIZE;
+ data.input.rx_num_desc = QED_IWARP_LL2_SYN_RX_SIZE;
+ data.input.tx_num_desc = QED_IWARP_LL2_SYN_TX_SIZE;
+ data.input.tx_max_bds_per_packet = 1; /* will never be fragmented */
+ data.input.tx_tc = PKT_LB_TC;
+ data.input.tx_dest = QED_LL2_TX_DEST_LB;
+ data.p_connection_handle = &iwarp_info->ll2_syn_handle;
+ data.cbs = &cbs;
+
+ rc = qed_ll2_acquire_connection(p_hwfn, &data);
+ if (rc) {
+ DP_NOTICE(p_hwfn, "Failed to acquire LL2 connection\n");
+ qed_llh_remove_mac_filter(p_hwfn, p_ptt, params->mac_addr);
+ return rc;
+ }
+
+ rc = qed_ll2_establish_connection(p_hwfn, iwarp_info->ll2_syn_handle);
+ if (rc) {
+ DP_NOTICE(p_hwfn, "Failed to establish LL2 connection\n");
+ goto err;
+ }
+
+ rc = qed_iwarp_ll2_alloc_buffers(p_hwfn,
+ QED_IWARP_LL2_SYN_RX_SIZE,
+ QED_IWARP_MAX_SYN_PKT_SIZE,
+ iwarp_info->ll2_syn_handle);
+ if (rc)
+ goto err;
+
+ return rc;
+err:
+ qed_iwarp_ll2_stop(p_hwfn, p_ptt);
+
+ return rc;
+}
+
+int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+ struct qed_rdma_start_in_params *params)
+{
+ struct qed_iwarp_info *iwarp_info;
+ u32 rcv_wnd_size;
+
+ iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+
+ iwarp_info->tcp_flags = QED_IWARP_TS_EN;
+ rcv_wnd_size = QED_IWARP_RCV_WND_SIZE_DEF;
+
+ /* value 0 is used for ilog2(QED_IWARP_RCV_WND_SIZE_MIN) */
+ iwarp_info->rcv_wnd_scale = ilog2(rcv_wnd_size) -
+ ilog2(QED_IWARP_RCV_WND_SIZE_MIN);
+ iwarp_info->crc_needed = QED_IWARP_PARAM_CRC_NEEDED;
+ iwarp_info->mpa_rev = MPA_NEGOTIATION_TYPE_ENHANCED;
+
+ iwarp_info->peer2peer = QED_IWARP_PARAM_P2P;
+
+ iwarp_info->rtr_type = MPA_RTR_TYPE_ZERO_SEND |
+ MPA_RTR_TYPE_ZERO_WRITE |
+ MPA_RTR_TYPE_ZERO_READ;
+
+ spin_lock_init(&p_hwfn->p_rdma_info->iwarp.qp_lock);
+ INIT_LIST_HEAD(&p_hwfn->p_rdma_info->iwarp.ep_list);
+ INIT_LIST_HEAD(&p_hwfn->p_rdma_info->iwarp.listen_list);
+
+ qed_spq_register_async_cb(p_hwfn, PROTOCOLID_IWARP,
+ qed_iwarp_async_event);
+
+ return qed_iwarp_ll2_start(p_hwfn, params, p_ptt);
+}
+
+int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+ int rc;
+
+ qed_iwarp_free_prealloc_ep(p_hwfn);
+ rc = qed_iwarp_wait_for_all_cids(p_hwfn);
+ if (rc)
+ return rc;
+
+ qed_spq_unregister_async_cb(p_hwfn, PROTOCOLID_IWARP);
+
+ return qed_iwarp_ll2_stop(p_hwfn, p_ptt);
+}
+
+void qed_iwarp_qp_in_error(struct qed_hwfn *p_hwfn,
+ struct qed_iwarp_ep *ep, u8 fw_return_code)
+{
+ struct qed_iwarp_cm_event_params params;
+
+ qed_iwarp_modify_qp(p_hwfn, ep->qp, QED_IWARP_QP_STATE_ERROR, true);
+
+ params.event = QED_IWARP_EVENT_CLOSE;
+ params.ep_context = ep;
+ params.cm_info = &ep->cm_info;
+ params.status = (fw_return_code == IWARP_QP_IN_ERROR_GOOD_CLOSE) ?
+ 0 : -ECONNRESET;
+
+ ep->state = QED_IWARP_EP_CLOSED;
+ spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+ list_del(&ep->list_entry);
+ spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+ ep->event_cb(ep->cb_context, &params);
+}
+
+void qed_iwarp_exception_received(struct qed_hwfn *p_hwfn,
+ struct qed_iwarp_ep *ep, int fw_ret_code)
+{
+ struct qed_iwarp_cm_event_params params;
+ bool event_cb = false;
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "EP(0x%x) fw_ret_code=%d\n",
+ ep->cid, fw_ret_code);
+
+ switch (fw_ret_code) {
+ case IWARP_EXCEPTION_DETECTED_LLP_CLOSED:
+ params.status = 0;
+ params.event = QED_IWARP_EVENT_DISCONNECT;
+ event_cb = true;
+ break;
+ case IWARP_EXCEPTION_DETECTED_LLP_RESET:
+ params.status = -ECONNRESET;
+ params.event = QED_IWARP_EVENT_DISCONNECT;
+ event_cb = true;
+ break;
+ case IWARP_EXCEPTION_DETECTED_RQ_EMPTY:
+ params.event = QED_IWARP_EVENT_RQ_EMPTY;
+ event_cb = true;
+ break;
+ case IWARP_EXCEPTION_DETECTED_IRQ_FULL:
+ params.event = QED_IWARP_EVENT_IRQ_FULL;
+ event_cb = true;
+ break;
+ case IWARP_EXCEPTION_DETECTED_LLP_TIMEOUT:
+ params.event = QED_IWARP_EVENT_LLP_TIMEOUT;
+ event_cb = true;
+ break;
+ case IWARP_EXCEPTION_DETECTED_REMOTE_PROTECTION_ERROR:
+ params.event = QED_IWARP_EVENT_REMOTE_PROTECTION_ERROR;
+ event_cb = true;
+ break;
+ case IWARP_EXCEPTION_DETECTED_CQ_OVERFLOW:
+ params.event = QED_IWARP_EVENT_CQ_OVERFLOW;
+ event_cb = true;
+ break;
+ case IWARP_EXCEPTION_DETECTED_LOCAL_CATASTROPHIC:
+ params.event = QED_IWARP_EVENT_QP_CATASTROPHIC;
+ event_cb = true;
+ break;
+ case IWARP_EXCEPTION_DETECTED_LOCAL_ACCESS_ERROR:
+ params.event = QED_IWARP_EVENT_LOCAL_ACCESS_ERROR;
+ event_cb = true;
+ break;
+ case IWARP_EXCEPTION_DETECTED_REMOTE_OPERATION_ERROR:
+ params.event = QED_IWARP_EVENT_REMOTE_OPERATION_ERROR;
+ event_cb = true;
+ break;
+ case IWARP_EXCEPTION_DETECTED_TERMINATE_RECEIVED:
+ params.event = QED_IWARP_EVENT_TERMINATE_RECEIVED;
+ event_cb = true;
+ break;
+ default:
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "Unhandled exception received...fw_ret_code=%d\n",
+ fw_ret_code);
+ break;
+ }
+
+ if (event_cb) {
+ params.ep_context = ep;
+ params.cm_info = &ep->cm_info;
+ ep->event_cb(ep->cb_context, &params);
+ }
+}
+
+static void
+qed_iwarp_tcp_connect_unsuccessful(struct qed_hwfn *p_hwfn,
+ struct qed_iwarp_ep *ep, u8 fw_return_code)
+{
+ struct qed_iwarp_cm_event_params params;
+
+ memset(&params, 0, sizeof(params));
+ params.event = QED_IWARP_EVENT_ACTIVE_COMPLETE;
+ params.ep_context = ep;
+ params.cm_info = &ep->cm_info;
+ ep->state = QED_IWARP_EP_CLOSED;
+
+ switch (fw_return_code) {
+ case IWARP_CONN_ERROR_TCP_CONNECT_INVALID_PACKET:
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "%s(0x%x) TCP connect got invalid packet\n",
+ QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid);
+ params.status = -ECONNRESET;
+ break;
+ case IWARP_CONN_ERROR_TCP_CONNECTION_RST:
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "%s(0x%x) TCP Connection Reset\n",
+ QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid);
+ params.status = -ECONNRESET;
+ break;
+ case IWARP_CONN_ERROR_TCP_CONNECT_TIMEOUT:
+ DP_NOTICE(p_hwfn, "%s(0x%x) TCP timeout\n",
+ QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid);
+ params.status = -EBUSY;
+ break;
+ case IWARP_CONN_ERROR_MPA_NOT_SUPPORTED_VER:
+ DP_NOTICE(p_hwfn, "%s(0x%x) MPA not supported VER\n",
+ QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid);
+ params.status = -ECONNREFUSED;
+ break;
+ case IWARP_CONN_ERROR_MPA_INVALID_PACKET:
+ DP_NOTICE(p_hwfn, "%s(0x%x) MPA Invalid Packet\n",
+ QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid);
+ params.status = -ECONNRESET;
+ break;
+ default:
+ DP_ERR(p_hwfn,
+ "%s(0x%x) Unexpected return code tcp connect: %d\n",
+ QED_IWARP_CONNECT_MODE_STRING(ep),
+ ep->tcp_cid, fw_return_code);
+ params.status = -ECONNRESET;
+ break;
+ }
+
+ if (ep->connect_mode == TCP_CONNECT_PASSIVE) {
+ ep->tcp_cid = QED_IWARP_INVALID_TCP_CID;
+ qed_iwarp_return_ep(p_hwfn, ep);
+ } else {
+ ep->event_cb(ep->cb_context, &params);
+ spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+ list_del(&ep->list_entry);
+ spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+ }
+}
+
+void
+qed_iwarp_connect_complete(struct qed_hwfn *p_hwfn,
+ struct qed_iwarp_ep *ep, u8 fw_return_code)
+{
+ u8 ll2_syn_handle = p_hwfn->p_rdma_info->iwarp.ll2_syn_handle;
+
+ if (ep->connect_mode == TCP_CONNECT_PASSIVE) {
+ /* Done with the SYN packet, post back to ll2 rx */
+ qed_iwarp_ll2_post_rx(p_hwfn, ep->syn, ll2_syn_handle);
+
+ ep->syn = NULL;
+
+ /* If connect failed - upper layer doesn't know about it */
+ if (fw_return_code == RDMA_RETURN_OK)
+ qed_iwarp_mpa_received(p_hwfn, ep);
+ else
+ qed_iwarp_tcp_connect_unsuccessful(p_hwfn, ep,
+ fw_return_code);
+ } else {
+ if (fw_return_code == RDMA_RETURN_OK)
+ qed_iwarp_mpa_offload(p_hwfn, ep);
+ else
+ qed_iwarp_tcp_connect_unsuccessful(p_hwfn, ep,
+ fw_return_code);
+ }
+}
+
+static inline bool
+qed_iwarp_check_ep_ok(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+ if (!ep || (ep->sig != QED_EP_SIG)) {
+ DP_ERR(p_hwfn, "ERROR ON ASYNC ep=%p\n", ep);
+ return false;
+ }
+
+ return true;
+}
+
+static int qed_iwarp_async_event(struct qed_hwfn *p_hwfn,
+ u8 fw_event_code, u16 echo,
+ union event_ring_data *data,
+ u8 fw_return_code)
+{
+ struct regpair *fw_handle = &data->rdma_data.async_handle;
+ struct qed_iwarp_ep *ep = NULL;
+ u16 cid;
+
+ ep = (struct qed_iwarp_ep *)(uintptr_t)HILO_64(fw_handle->hi,
+ fw_handle->lo);
+
+ switch (fw_event_code) {
+ case IWARP_EVENT_TYPE_ASYNC_CONNECT_COMPLETE:
+ /* Async completion after TCP 3-way handshake */
+ if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
+ return -EINVAL;
+ DP_VERBOSE(p_hwfn,
+ QED_MSG_RDMA,
+ "EP(0x%x) IWARP_EVENT_TYPE_ASYNC_CONNECT_COMPLETE fw_ret_code=%d\n",
+ ep->tcp_cid, fw_return_code);
+ qed_iwarp_connect_complete(p_hwfn, ep, fw_return_code);
+ break;
+ case IWARP_EVENT_TYPE_ASYNC_EXCEPTION_DETECTED:
+ if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
+ return -EINVAL;
+ DP_VERBOSE(p_hwfn,
+ QED_MSG_RDMA,
+ "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_EXCEPTION_DETECTED fw_ret_code=%d\n",
+ ep->cid, fw_return_code);
+ qed_iwarp_exception_received(p_hwfn, ep, fw_return_code);
+ break;
+ case IWARP_EVENT_TYPE_ASYNC_QP_IN_ERROR_STATE:
+ /* Async completion for Close Connection ramrod */
+ if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
+ return -EINVAL;
+ DP_VERBOSE(p_hwfn,
+ QED_MSG_RDMA,
+ "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_QP_IN_ERROR_STATE fw_ret_code=%d\n",
+ ep->cid, fw_return_code);
+ qed_iwarp_qp_in_error(p_hwfn, ep, fw_return_code);
+ break;
+ case IWARP_EVENT_TYPE_ASYNC_ENHANCED_MPA_REPLY_ARRIVED:
+ /* Async event for active side only */
+ if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
+ return -EINVAL;
+ DP_VERBOSE(p_hwfn,
+ QED_MSG_RDMA,
+ "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_MPA_REPLY_ARRIVED fw_ret_code=%d\n",
+ ep->cid, fw_return_code);
+ qed_iwarp_mpa_reply_arrived(p_hwfn, ep);
+ break;
+ case IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE:
+ if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
+ return -EINVAL;
+ DP_VERBOSE(p_hwfn,
+ QED_MSG_RDMA,
+ "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE fw_ret_code=%d\n",
+ ep->cid, fw_return_code);
+ qed_iwarp_mpa_complete(p_hwfn, ep, fw_return_code);
+ break;
+ case IWARP_EVENT_TYPE_ASYNC_CID_CLEANED:
+ cid = (u16)le32_to_cpu(fw_handle->lo);
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "(0x%x)IWARP_EVENT_TYPE_ASYNC_CID_CLEANED\n", cid);
+ qed_iwarp_cid_cleaned(p_hwfn, cid);
+
+ break;
+ case IWARP_EVENT_TYPE_ASYNC_CQ_OVERFLOW:
+ DP_NOTICE(p_hwfn, "IWARP_EVENT_TYPE_ASYNC_CQ_OVERFLOW\n");
+
+ p_hwfn->p_rdma_info->events.affiliated_event(
+ p_hwfn->p_rdma_info->events.context,
+ QED_IWARP_EVENT_CQ_OVERFLOW,
+ (void *)fw_handle);
+ break;
+ default:
+ DP_ERR(p_hwfn, "Received unexpected async iwarp event %d\n",
+ fw_event_code);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int
+qed_iwarp_create_listen(void *rdma_cxt,
+ struct qed_iwarp_listen_in *iparams,
+ struct qed_iwarp_listen_out *oparams)
+{
+ struct qed_hwfn *p_hwfn = rdma_cxt;
+ struct qed_iwarp_listener *listener;
+
+ listener = kzalloc(sizeof(*listener), GFP_KERNEL);
+ if (!listener)
+ return -ENOMEM;
+
+ listener->ip_version = iparams->ip_version;
+ memcpy(listener->ip_addr, iparams->ip_addr, sizeof(listener->ip_addr));
+ listener->port = iparams->port;
+ listener->vlan = iparams->vlan;
+
+ listener->event_cb = iparams->event_cb;
+ listener->cb_context = iparams->cb_context;
+ listener->max_backlog = iparams->max_backlog;
+ oparams->handle = listener;
+
+ spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+ list_add_tail(&listener->list_entry,
+ &p_hwfn->p_rdma_info->iwarp.listen_list);
+ spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+ DP_VERBOSE(p_hwfn,
+ QED_MSG_RDMA,
+ "callback=%p handle=%p ip=%x:%x:%x:%x port=0x%x vlan=0x%x\n",
+ listener->event_cb,
+ listener,
+ listener->ip_addr[0],
+ listener->ip_addr[1],
+ listener->ip_addr[2],
+ listener->ip_addr[3], listener->port, listener->vlan);
+
+ return 0;
+}
+
+int qed_iwarp_destroy_listen(void *rdma_cxt, void *handle)
+{
+ struct qed_iwarp_listener *listener = handle;
+ struct qed_hwfn *p_hwfn = rdma_cxt;
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "handle=%p\n", handle);
+
+ spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+ list_del(&listener->list_entry);
+ spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+ kfree(listener);
+
+ return 0;
+}
+
+int qed_iwarp_send_rtr(void *rdma_cxt, struct qed_iwarp_send_rtr_in *iparams)
+{
+ struct qed_hwfn *p_hwfn = rdma_cxt;
+ struct qed_sp_init_data init_data;
+ struct qed_spq_entry *p_ent;
+ struct qed_iwarp_ep *ep;
+ struct qed_rdma_qp *qp;
+ int rc;
+
+ ep = iparams->ep_context;
+ if (!ep) {
+ DP_ERR(p_hwfn, "Ep Context receive in send_rtr is NULL\n");
+ return -EINVAL;
+ }
+
+ qp = ep->qp;
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) EP(0x%x)\n",
+ qp->icid, ep->tcp_cid);
+
+ memset(&init_data, 0, sizeof(init_data));
+ init_data.cid = qp->icid;
+ init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+ init_data.comp_mode = QED_SPQ_MODE_CB;
+
+ rc = qed_sp_init_request(p_hwfn, &p_ent,
+ IWARP_RAMROD_CMD_ID_MPA_OFFLOAD_SEND_RTR,
+ PROTOCOLID_IWARP, &init_data);
+
+ if (rc)
+ return rc;
+
+ rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = 0x%x\n", rc);
+
+ return rc;
+}
+
+void
+qed_iwarp_query_qp(struct qed_rdma_qp *qp,
+ struct qed_rdma_query_qp_out_params *out_params)
+{
+ out_params->state = qed_iwarp2roce_state(qp->iwarp_state);
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
new file mode 100644
index 000000000000..148ef3c33a5d
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
@@ -0,0 +1,189 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015-2017 QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and /or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _QED_IWARP_H
+#define _QED_IWARP_H
+
+enum qed_iwarp_qp_state {
+ QED_IWARP_QP_STATE_IDLE,
+ QED_IWARP_QP_STATE_RTS,
+ QED_IWARP_QP_STATE_TERMINATE,
+ QED_IWARP_QP_STATE_CLOSING,
+ QED_IWARP_QP_STATE_ERROR,
+};
+
+enum qed_iwarp_qp_state qed_roce2iwarp_state(enum qed_roce_qp_state state);
+
+#define QED_IWARP_PREALLOC_CNT (256)
+
+#define QED_IWARP_LL2_SYN_TX_SIZE (128)
+#define QED_IWARP_LL2_SYN_RX_SIZE (256)
+#define QED_IWARP_MAX_SYN_PKT_SIZE (128)
+#define QED_IWARP_HANDLE_INVAL (0xff)
+
+struct qed_iwarp_ll2_buff {
+ void *data;
+ dma_addr_t data_phys_addr;
+ u32 buff_size;
+};
+
+struct qed_iwarp_info {
+ struct list_head listen_list; /* qed_iwarp_listener */
+ struct list_head ep_list; /* qed_iwarp_ep */
+ struct list_head ep_free_list; /* pre-allocated ep's */
+ spinlock_t iw_lock; /* for iwarp resources */
+ spinlock_t qp_lock; /* for teardown races */
+ u32 rcv_wnd_scale;
+ u16 max_mtu;
+ u8 mac_addr[ETH_ALEN];
+ u8 crc_needed;
+ u8 tcp_flags;
+ u8 ll2_syn_handle;
+ u8 peer2peer;
+ enum mpa_negotiation_mode mpa_rev;
+ enum mpa_rtr_type rtr_type;
+};
+
+enum qed_iwarp_ep_state {
+ QED_IWARP_EP_INIT,
+ QED_IWARP_EP_MPA_REQ_RCVD,
+ QED_IWARP_EP_MPA_OFFLOADED,
+ QED_IWARP_EP_ESTABLISHED,
+ QED_IWARP_EP_CLOSED
+};
+
+union async_output {
+ struct iwarp_eqe_data_mpa_async_completion mpa_response;
+ struct iwarp_eqe_data_tcp_async_completion mpa_request;
+};
+
+#define QED_MAX_PRIV_DATA_LEN (512)
+struct qed_iwarp_ep_memory {
+ u8 in_pdata[QED_MAX_PRIV_DATA_LEN];
+ u8 out_pdata[QED_MAX_PRIV_DATA_LEN];
+ union async_output async_output;
+};
+
+/* Endpoint structure represents a TCP connection. This connection can be
+ * associated with a QP or not (in which case QP==NULL)
+ */
+struct qed_iwarp_ep {
+ struct list_head list_entry;
+ struct qed_rdma_qp *qp;
+ struct qed_iwarp_ep_memory *ep_buffer_virt;
+ dma_addr_t ep_buffer_phys;
+ enum qed_iwarp_ep_state state;
+ int sig;
+ struct qed_iwarp_cm_info cm_info;
+ enum tcp_connect_mode connect_mode;
+ enum mpa_rtr_type rtr_type;
+ enum mpa_negotiation_mode mpa_rev;
+ u32 tcp_cid;
+ u32 cid;
+ u16 mss;
+ u8 remote_mac_addr[6];
+ u8 local_mac_addr[6];
+ bool mpa_reply_processed;
+
+ /* For Passive side - syn packet related data */
+ u16 syn_ip_payload_length;
+ struct qed_iwarp_ll2_buff *syn;
+ dma_addr_t syn_phy_addr;
+
+ /* The event_cb function is called for asynchrounous events associated
+ * with the ep. It is initialized at different entry points depending
+ * on whether the ep is the tcp connection active side or passive side
+ * The cb_context is passed to the event_cb function.
+ */
+ iwarp_event_handler event_cb;
+ void *cb_context;
+};
+
+struct qed_iwarp_listener {
+ struct list_head list_entry;
+
+ /* The event_cb function is called for connection requests.
+ * The cb_context is passed to the event_cb function.
+ */
+ iwarp_event_handler event_cb;
+ void *cb_context;
+ u32 max_backlog;
+ u32 ip_addr[4];
+ u16 port;
+ u16 vlan;
+ u8 ip_version;
+};
+
+int qed_iwarp_alloc(struct qed_hwfn *p_hwfn);
+
+int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+ struct qed_rdma_start_in_params *params);
+
+int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn);
+
+void qed_iwarp_init_devinfo(struct qed_hwfn *p_hwfn);
+
+void qed_iwarp_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+int qed_iwarp_create_qp(struct qed_hwfn *p_hwfn,
+ struct qed_rdma_qp *qp,
+ struct qed_rdma_create_qp_out_params *out_params);
+
+int qed_iwarp_modify_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp,
+ enum qed_iwarp_qp_state new_state, bool internal);
+
+int qed_iwarp_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp);
+
+int qed_iwarp_fw_destroy(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp);
+
+void qed_iwarp_query_qp(struct qed_rdma_qp *qp,
+ struct qed_rdma_query_qp_out_params *out_params);
+
+int
+qed_iwarp_connect(void *rdma_cxt,
+ struct qed_iwarp_connect_in *iparams,
+ struct qed_iwarp_connect_out *oparams);
+
+int
+qed_iwarp_create_listen(void *rdma_cxt,
+ struct qed_iwarp_listen_in *iparams,
+ struct qed_iwarp_listen_out *oparams);
+
+int qed_iwarp_accept(void *rdma_cxt, struct qed_iwarp_accept_in *iparams);
+
+int qed_iwarp_reject(void *rdma_cxt, struct qed_iwarp_reject_in *iparams);
+int qed_iwarp_destroy_listen(void *rdma_cxt, void *handle);
+
+int qed_iwarp_send_rtr(void *rdma_cxt, struct qed_iwarp_send_rtr_in *iparams);
+
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index e57699bfbdfa..0ba5ec8a9814 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -79,8 +79,7 @@ int qed_l2_alloc(struct qed_hwfn *p_hwfn)
unsigned long **pp_qids;
u32 i;
- if (p_hwfn->hw_info.personality != QED_PCI_ETH &&
- p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE)
+ if (!QED_IS_L2_PERSONALITY(p_hwfn))
return 0;
p_l2_info = kzalloc(sizeof(*p_l2_info), GFP_KERNEL);
@@ -1228,19 +1227,6 @@ static enum eth_filter_action qed_filter_action(enum qed_filter_opcode opcode)
return action;
}
-static void qed_set_fw_mac_addr(__le16 *fw_msb,
- __le16 *fw_mid,
- __le16 *fw_lsb,
- u8 *mac)
-{
- ((u8 *)fw_msb)[0] = mac[1];
- ((u8 *)fw_msb)[1] = mac[0];
- ((u8 *)fw_mid)[0] = mac[3];
- ((u8 *)fw_mid)[1] = mac[2];
- ((u8 *)fw_lsb)[0] = mac[5];
- ((u8 *)fw_lsb)[1] = mac[4];
-}
-
static int
qed_filter_ucast_common(struct qed_hwfn *p_hwfn,
u16 opaque_fid,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 17f9b0a7b553..c06ad4f0758e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -309,7 +309,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
list_del(&p_pkt->list_entry);
b_last_packet = list_empty(&p_tx->active_descq);
list_add_tail(&p_pkt->list_entry, &p_tx->free_descq);
- if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+ if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) {
struct qed_ooo_buffer *p_buffer;
p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie;
@@ -532,7 +532,7 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
list_move_tail(&p_pkt->list_entry, &p_rx->free_descq);
- if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+ if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) {
struct qed_ooo_buffer *p_buffer;
p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie;
@@ -893,11 +893,11 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
p_ramrod->drop_ttl0_flg = p_ll2_conn->input.rx_drop_ttl0_flg;
p_ramrod->inner_vlan_removal_en = p_ll2_conn->input.rx_vlan_removal_en;
p_ramrod->queue_id = p_ll2_conn->queue_id;
- p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_ISCSI_OOO) ? 0
- : 1;
+ p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_OOO) ? 0 : 1;
if ((IS_MF_DEFAULT(p_hwfn) || IS_MF_SI(p_hwfn)) &&
- p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE)) {
+ p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE) &&
+ (conn_type != QED_LL2_TYPE_IWARP)) {
p_ramrod->mf_si_bcast_accept_all = 1;
p_ramrod->mf_si_mcast_accept_all = 1;
} else {
@@ -924,7 +924,7 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
if (!QED_LL2_TX_REGISTERED(p_ll2_conn))
return 0;
- if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO)
+ if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO)
p_ll2_conn->tx_stats_en = 0;
else
p_ll2_conn->tx_stats_en = 1;
@@ -955,10 +955,10 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
p_ramrod->pbl_size = cpu_to_le16(pbl_size);
switch (p_ll2_conn->input.tx_tc) {
- case LB_TC:
+ case PURE_LB_TC:
pq_id = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_LB);
break;
- case OOO_LB_TC:
+ case PKT_LB_TC:
pq_id = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OOO);
break;
default:
@@ -973,12 +973,20 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
p_ramrod->conn_type = PROTOCOLID_FCOE;
break;
case QED_LL2_TYPE_ISCSI:
- case QED_LL2_TYPE_ISCSI_OOO:
p_ramrod->conn_type = PROTOCOLID_ISCSI;
break;
case QED_LL2_TYPE_ROCE:
p_ramrod->conn_type = PROTOCOLID_ROCE;
break;
+ case QED_LL2_TYPE_IWARP:
+ p_ramrod->conn_type = PROTOCOLID_IWARP;
+ break;
+ case QED_LL2_TYPE_OOO:
+ if (p_hwfn->hw_info.personality == QED_PCI_ISCSI)
+ p_ramrod->conn_type = PROTOCOLID_ISCSI;
+ else
+ p_ramrod->conn_type = PROTOCOLID_IWARP;
+ break;
default:
p_ramrod->conn_type = PROTOCOLID_ETH;
DP_NOTICE(p_hwfn, "Unknown connection type: %d\n", conn_type);
@@ -1142,7 +1150,7 @@ qed_ll2_acquire_connection_ooo(struct qed_hwfn *p_hwfn,
u16 buf_idx;
int rc = 0;
- if (p_ll2_info->input.conn_type != QED_LL2_TYPE_ISCSI_OOO)
+ if (p_ll2_info->input.conn_type != QED_LL2_TYPE_OOO)
return rc;
/* Correct number of requested OOO buffers if needed */
@@ -1280,7 +1288,7 @@ int qed_ll2_acquire_connection(void *cxt, struct qed_ll2_acquire_data *data)
goto q_allocate_fail;
/* Register callbacks for the Rx/Tx queues */
- if (data->input.conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+ if (data->input.conn_type == QED_LL2_TYPE_OOO) {
comp_rx_cb = qed_ll2_lb_rxq_completion;
comp_tx_cb = qed_ll2_lb_txq_completion;
} else {
@@ -1339,7 +1347,7 @@ static void
qed_ll2_establish_connection_ooo(struct qed_hwfn *p_hwfn,
struct qed_ll2_info *p_ll2_conn)
{
- if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_ISCSI_OOO)
+ if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_OOO)
return;
qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
@@ -1421,7 +1429,7 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
if (rc)
goto out;
- if (p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE)
+ if (!QED_IS_RDMA_PERSONALITY(p_hwfn))
qed_wr(p_hwfn, p_ptt, PRS_REG_USE_LIGHT_L2, 1);
qed_ll2_establish_connection_ooo(p_hwfn, p_ll2_conn);
@@ -1794,7 +1802,7 @@ int qed_ll2_terminate_connection(void *cxt, u8 connection_handle)
qed_ll2_rxq_flush(p_hwfn, connection_handle);
}
- if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO)
+ if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO)
qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_FCOE) {
@@ -1816,7 +1824,7 @@ static void qed_ll2_release_connection_ooo(struct qed_hwfn *p_hwfn,
{
struct qed_ooo_buffer *p_buffer;
- if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_ISCSI_OOO)
+ if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_OOO)
return;
qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
@@ -2063,7 +2071,7 @@ static void qed_ll2_set_conn_data(struct qed_dev *cdev,
ll2_cbs.cookie = QED_LEADING_HWFN(cdev);
if (lb) {
- data->input.tx_tc = OOO_LB_TC;
+ data->input.tx_tc = PKT_LB_TC;
data->input.tx_dest = QED_LL2_TX_DEST_LB;
} else {
data->input.tx_tc = 0;
@@ -2080,7 +2088,7 @@ static int qed_ll2_start_ooo(struct qed_dev *cdev,
int rc;
qed_ll2_set_conn_data(cdev, &data, params,
- QED_LL2_TYPE_ISCSI_OOO, handle, true);
+ QED_LL2_TYPE_OOO, handle, true);
rc = qed_ll2_acquire_connection(hwfn, &data);
if (rc) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 16cc30b11cce..b11399606990 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -237,6 +237,8 @@ err0:
int qed_fill_dev_info(struct qed_dev *cdev,
struct qed_dev_info *dev_info)
{
+ struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+ struct qed_hw_info *hw_info = &p_hwfn->hw_info;
struct qed_tunnel_info *tun = &cdev->tunnel;
struct qed_ptt *ptt;
@@ -260,11 +262,10 @@ int qed_fill_dev_info(struct qed_dev *cdev,
dev_info->pci_mem_start = cdev->pci_params.mem_start;
dev_info->pci_mem_end = cdev->pci_params.mem_end;
dev_info->pci_irq = cdev->pci_params.irq;
- dev_info->rdma_supported = (cdev->hwfns[0].hw_info.personality ==
- QED_PCI_ETH_ROCE);
+ dev_info->rdma_supported = QED_IS_RDMA_PERSONALITY(p_hwfn);
dev_info->is_mf_default = IS_MF_DEFAULT(&cdev->hwfns[0]);
dev_info->dev_type = cdev->type;
- ether_addr_copy(dev_info->hw_mac, cdev->hwfns[0].hw_info.hw_mac_addr);
+ ether_addr_copy(dev_info->hw_mac, hw_info->hw_mac_addr);
if (IS_PF(cdev)) {
dev_info->fw_major = FW_MAJOR_VERSION;
@@ -274,8 +275,7 @@ int qed_fill_dev_info(struct qed_dev *cdev,
dev_info->mf_mode = cdev->mf_mode;
dev_info->tx_switching = true;
- if (QED_LEADING_HWFN(cdev)->hw_info.b_wol_support ==
- QED_WOL_SUPPORT_PME)
+ if (hw_info->b_wol_support == QED_WOL_SUPPORT_PME)
dev_info->wol_support = true;
dev_info->abs_pf_id = QED_LEADING_HWFN(cdev)->abs_pf_id;
@@ -304,7 +304,7 @@ int qed_fill_dev_info(struct qed_dev *cdev,
&dev_info->mfw_rev, NULL);
}
- dev_info->mtu = QED_LEADING_HWFN(cdev)->hw_info.mtu;
+ dev_info->mtu = hw_info->mtu;
return 0;
}
@@ -790,7 +790,7 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev,
cdev->num_hwfns;
if (!IS_ENABLED(CONFIG_QED_RDMA) ||
- QED_LEADING_HWFN(cdev)->hw_info.personality != QED_PCI_ETH_ROCE)
+ !QED_IS_RDMA_PERSONALITY(QED_LEADING_HWFN(cdev)))
return 0;
for_each_hwfn(cdev, i)
@@ -931,8 +931,7 @@ static void qed_update_pf_params(struct qed_dev *cdev,
/* In case we might support RDMA, don't allow qede to be greedy
* with the L2 contexts. Allow for 64 queues [rx, tx, xdp] per hwfn.
*/
- if (QED_LEADING_HWFN(cdev)->hw_info.personality ==
- QED_PCI_ETH_ROCE) {
+ if (QED_IS_RDMA_PERSONALITY(QED_LEADING_HWFN(cdev))) {
u16 *num_cons;
num_cons = &params->eth_pf_params.num_cons;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index df76e212f86e..6fb99518a61f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -161,7 +161,10 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto,
NULL);
- p_rdma_info->num_qps = num_cons / 2;
+ if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+ p_rdma_info->num_qps = num_cons;
+ else
+ p_rdma_info->num_qps = num_cons / 2; /* 2 cids per qp */
num_tasks = qed_cxt_get_proto_tid_count(p_hwfn, PROTOCOLID_ROCE);
@@ -252,6 +255,13 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
"Failed to allocate real cid bitmap, rc = %d\n", rc);
goto free_cid_map;
}
+
+ if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+ rc = qed_iwarp_alloc(p_hwfn);
+
+ if (rc)
+ goto free_cid_map;
+
DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocation successful\n");
return 0;
@@ -329,6 +339,9 @@ static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn)
{
struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info;
+ if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+ qed_iwarp_resc_free(p_hwfn);
+
qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->cid_map, 1);
qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->pd_map, 1);
qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->dpi_map, 1);
@@ -470,6 +483,9 @@ static void qed_rdma_init_devinfo(struct qed_hwfn *p_hwfn,
if (pci_status_control & PCI_EXP_DEVCTL2_LTR_EN)
SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_ATOMIC_OP, 1);
+
+ if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+ qed_iwarp_init_devinfo(p_hwfn);
}
static void qed_rdma_init_port(struct qed_hwfn *p_hwfn)
@@ -490,29 +506,17 @@ static void qed_rdma_init_port(struct qed_hwfn *p_hwfn)
static int qed_rdma_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
- u32 ll2_ethertype_en;
+ int rc = 0;
DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW\n");
p_hwfn->b_rdma_enabled_in_prs = false;
- qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0);
-
- p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_ROCE;
-
- /* We delay writing to this reg until first cid is allocated. See
- * qed_cxt_dynamic_ilt_alloc function for more details
- */
- ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN);
- qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN,
- (ll2_ethertype_en | 0x01));
-
- if (qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ROCE) % 2) {
- DP_NOTICE(p_hwfn, "The first RoCE's cid should be even\n");
- return -EINVAL;
- }
+ if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+ qed_iwarp_init_hw(p_hwfn, p_ptt);
+ else
+ rc = qed_roce_init_hw(p_hwfn, p_ptt);
- DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW - Done\n");
- return 0;
+ return rc;
}
static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn,
@@ -544,7 +548,10 @@ static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn,
if (rc)
return rc;
- p_ramrod = &p_ent->ramrod.roce_init_func.rdma;
+ if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+ p_ramrod = &p_ent->ramrod.iwarp_init_func.rdma;
+ else
+ p_ramrod = &p_ent->ramrod.roce_init_func.rdma;
p_params_header = &p_ramrod->params_header;
p_params_header->cnq_start_offset = (u8)RESC_START(p_hwfn,
@@ -641,7 +648,15 @@ static int qed_rdma_setup(struct qed_hwfn *p_hwfn,
if (rc)
return rc;
- qed_roce_setup(p_hwfn);
+ if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+ rc = qed_iwarp_setup(p_hwfn, p_ptt, params);
+ if (rc)
+ return rc;
+ } else {
+ rc = qed_roce_setup(p_hwfn);
+ if (rc)
+ return rc;
+ }
return qed_rdma_start_fw(p_hwfn, params, p_ptt);
}
@@ -675,7 +690,16 @@ int qed_rdma_stop(void *rdma_cxt)
qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN,
(ll2_ethertype_en & 0xFFFE));
- qed_roce_stop(p_hwfn);
+ if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+ rc = qed_iwarp_stop(p_hwfn, p_ptt);
+ if (rc) {
+ qed_ptt_release(p_hwfn, p_ptt);
+ return rc;
+ }
+ } else {
+ qed_roce_stop(p_hwfn);
+ }
+
qed_ptt_release(p_hwfn, p_ptt);
/* Get SPQ entry */
@@ -810,7 +834,9 @@ static int qed_fill_rdma_dev_info(struct qed_dev *cdev,
memset(info, 0, sizeof(*info));
- info->rdma_type = QED_RDMA_TYPE_ROCE;
+ info->rdma_type = QED_IS_ROCE_PERSONALITY(p_hwfn) ?
+ QED_RDMA_TYPE_ROCE : QED_RDMA_TYPE_IWARP;
+
info->user_dpm_enabled = (p_hwfn->db_bar_no_edpm == 0);
qed_fill_dev_info(cdev, &info->common);
@@ -1112,7 +1138,7 @@ static int qed_rdma_query_qp(void *rdma_cxt,
struct qed_rdma_query_qp_out_params *out_params)
{
struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
- int rc;
+ int rc = 0;
DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
@@ -1138,7 +1164,10 @@ static int qed_rdma_query_qp(void *rdma_cxt,
out_params->max_dest_rd_atomic = qp->max_rd_atomic_resp;
out_params->sqd_async = qp->sqd_async;
- rc = qed_roce_query_qp(p_hwfn, qp, out_params);
+ if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+ qed_iwarp_query_qp(qp, out_params);
+ else
+ rc = qed_roce_query_qp(p_hwfn, qp, out_params);
DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Query QP, rc = %d\n", rc);
return rc;
@@ -1151,7 +1180,10 @@ static int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp)
DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
- rc = qed_roce_destroy_qp(p_hwfn, qp);
+ if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+ rc = qed_iwarp_destroy_qp(p_hwfn, qp);
+ else
+ rc = qed_roce_destroy_qp(p_hwfn, qp);
/* free qp params struct */
kfree(qp);
@@ -1190,20 +1222,27 @@ qed_rdma_create_qp(void *rdma_cxt,
return NULL;
}
+ if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+ if (in_params->sq_num_pages * sizeof(struct regpair) >
+ IWARP_SHARED_QUEUE_PAGE_SQ_PBL_MAX_SIZE) {
+ DP_NOTICE(p_hwfn->cdev,
+ "Sq num pages: %d exceeds maximum\n",
+ in_params->sq_num_pages);
+ return NULL;
+ }
+ if (in_params->rq_num_pages * sizeof(struct regpair) >
+ IWARP_SHARED_QUEUE_PAGE_RQ_PBL_MAX_SIZE) {
+ DP_NOTICE(p_hwfn->cdev,
+ "Rq num pages: %d exceeds maximum\n",
+ in_params->rq_num_pages);
+ return NULL;
+ }
+ }
+
qp = kzalloc(sizeof(*qp), GFP_KERNEL);
if (!qp)
return NULL;
- rc = qed_roce_alloc_cid(p_hwfn, &qp->icid);
- qp->qpid = ((0xFF << 16) | qp->icid);
-
- DP_INFO(p_hwfn, "ROCE qpid=%x\n", qp->qpid);
-
- if (rc) {
- kfree(qp);
- return NULL;
- }
-
qp->cur_state = QED_ROCE_QP_STATE_RESET;
qp->qp_handle.hi = cpu_to_le32(in_params->qp_handle_hi);
qp->qp_handle.lo = cpu_to_le32(in_params->qp_handle_lo);
@@ -1226,6 +1265,19 @@ qed_rdma_create_qp(void *rdma_cxt,
qp->e2e_flow_control_en = qp->use_srq ? false : true;
qp->stats_queue = in_params->stats_queue;
+ if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+ rc = qed_iwarp_create_qp(p_hwfn, qp, out_params);
+ qp->qpid = qp->icid;
+ } else {
+ rc = qed_roce_alloc_cid(p_hwfn, &qp->icid);
+ qp->qpid = ((0xFF << 16) | qp->icid);
+ }
+
+ if (rc) {
+ kfree(qp);
+ return NULL;
+ }
+
out_params->icid = qp->icid;
out_params->qp_id = qp->qpid;
@@ -1324,7 +1376,14 @@ static int qed_rdma_modify_qp(void *rdma_cxt,
qp->cur_state);
}
- rc = qed_roce_modify_qp(p_hwfn, qp, prev_state, params);
+ if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+ enum qed_iwarp_qp_state new_state =
+ qed_roce2iwarp_state(qp->cur_state);
+
+ rc = qed_iwarp_modify_qp(p_hwfn, qp, new_state, 0);
+ } else {
+ rc = qed_roce_modify_qp(p_hwfn, qp, prev_state, params);
+ }
DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Modify QP, rc = %d\n", rc);
return rc;
@@ -1713,6 +1772,12 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = {
.ll2_set_fragment_of_tx_packet = &qed_ll2_set_fragment_of_tx_packet,
.ll2_set_mac_filter = &qed_roce_ll2_set_mac_filter,
.ll2_get_stats = &qed_ll2_get_stats,
+ .iwarp_connect = &qed_iwarp_connect,
+ .iwarp_create_listen = &qed_iwarp_create_listen,
+ .iwarp_destroy_listen = &qed_iwarp_destroy_listen,
+ .iwarp_accept = &qed_iwarp_accept,
+ .iwarp_reject = &qed_iwarp_reject,
+ .iwarp_send_rtr = &qed_iwarp_send_rtr,
};
const struct qed_rdma_ops *qed_get_rdma_ops(void)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.h b/drivers/net/ethernet/qlogic/qed/qed_rdma.h
index d91e5c4069a6..18ec9cbd84f5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.h
@@ -42,6 +42,7 @@
#include "qed.h"
#include "qed_dev_api.h"
#include "qed_hsi.h"
+#include "qed_iwarp.h"
#include "qed_roce.h"
#define QED_RDMA_MAX_FMR (RDMA_MAX_TIDS)
@@ -84,6 +85,7 @@ struct qed_rdma_info {
struct qed_bmap qp_map;
struct qed_bmap srq_map;
struct qed_bmap cid_map;
+ struct qed_bmap tcp_cid_map;
struct qed_bmap real_cid_map;
struct qed_bmap dpi_map;
struct qed_bmap toggle_bits;
@@ -97,6 +99,7 @@ struct qed_rdma_info {
u16 queue_zone_base;
u16 max_queue_zones;
enum protocol_type proto;
+ struct qed_iwarp_info iwarp;
};
struct qed_rdma_qp {
@@ -105,6 +108,7 @@ struct qed_rdma_qp {
u32 qpid;
u16 icid;
enum qed_roce_qp_state cur_state;
+ enum qed_iwarp_qp_state iwarp_state;
bool use_srq;
bool signal_all;
bool fmr_and_reserved_lkey;
@@ -164,6 +168,7 @@ struct qed_rdma_qp {
void *shared_queue;
dma_addr_t shared_queue_phys_addr;
+ struct qed_iwarp_ep *ep;
};
#if IS_ENABLED(CONFIG_QED_RDMA)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index e53adc3d009b..fb7c2d1562ae 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -1149,3 +1149,23 @@ int qed_roce_setup(struct qed_hwfn *p_hwfn)
qed_roce_async_event);
}
+int qed_roce_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+ u32 ll2_ethertype_en;
+
+ qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0);
+
+ p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_ROCE;
+
+ ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN);
+ qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN,
+ (ll2_ethertype_en | 0x01));
+
+ if (qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ROCE) % 2) {
+ DP_NOTICE(p_hwfn, "The first RoCE's cid should be even\n");
+ return -EINVAL;
+ }
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW - Done\n");
+ return 0;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index 56c95fb9a26d..ab4ad8a1e2a5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -104,12 +104,17 @@ union ramrod_data {
struct roce_query_qp_req_ramrod_data roce_query_qp_req;
struct roce_destroy_qp_resp_ramrod_data roce_destroy_qp_resp;
struct roce_destroy_qp_req_ramrod_data roce_destroy_qp_req;
+ struct roce_init_func_ramrod_data roce_init_func;
struct rdma_create_cq_ramrod_data rdma_create_cq;
struct rdma_destroy_cq_ramrod_data rdma_destroy_cq;
struct rdma_srq_create_ramrod_data rdma_create_srq;
struct rdma_srq_destroy_ramrod_data rdma_destroy_srq;
struct rdma_srq_modify_ramrod_data rdma_modify_srq;
- struct roce_init_func_ramrod_data roce_init_func;
+ struct iwarp_create_qp_ramrod_data iwarp_create_qp;
+ struct iwarp_tcp_offload_ramrod_data iwarp_tcp_offload;
+ struct iwarp_mpa_offload_ramrod_data iwarp_mpa_offload;
+ struct iwarp_modify_qp_ramrod_data iwarp_modify_qp;
+ struct iwarp_init_func_ramrod_data iwarp_init_func;
struct fcoe_init_ramrod_params fcoe_init;
struct fcoe_conn_offload_ramrod_params fcoe_conn_ofld;
struct fcoe_conn_terminate_ramrod_params fcoe_conn_terminate;
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index bd0e3f157e9e..600e30e8f0be 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -1409,8 +1409,8 @@ static int ofdpa_port_ipv4_nh(struct ofdpa_port *ofdpa_port,
*index = entry->index;
resolved = false;
} else if (removing) {
- ofdpa_neigh_del(found);
*index = found->index;
+ ofdpa_neigh_del(found);
} else if (updating) {
ofdpa_neigh_update(found, NULL, false);
resolved = !is_zero_ether_addr(found->eth_dst);
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index ad9c4ded2b90..761c518b2f92 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -4172,7 +4172,7 @@ found:
* recipients
*/
if (is_mc_recip) {
- MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
unsigned int depth, i;
memset(inbuf, 0, sizeof(inbuf));
@@ -4320,7 +4320,7 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
efx_ef10_filter_set_entry(table, filter_idx, NULL, 0);
} else {
efx_mcdi_display_error(efx, MC_CMD_FILTER_OP,
- MC_CMD_FILTER_OP_IN_LEN,
+ MC_CMD_FILTER_OP_EXT_IN_LEN,
NULL, 0, rc);
}
}
@@ -4453,7 +4453,7 @@ static s32 efx_ef10_filter_rfs_insert(struct efx_nic *efx,
struct efx_filter_spec *spec)
{
struct efx_ef10_filter_table *table = efx->filter_state;
- MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
struct efx_filter_spec *saved_spec;
unsigned int hash, i, depth = 1;
bool replacing = false;
@@ -4940,7 +4940,7 @@ not_restored:
static void efx_ef10_filter_table_remove(struct efx_nic *efx)
{
struct efx_ef10_filter_table *table = efx->filter_state;
- MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
struct efx_filter_spec *spec;
unsigned int filter_idx;
int rc;
@@ -5105,6 +5105,7 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
/* Insert/renew filters */
for (i = 0; i < addr_count; i++) {
+ EFX_WARN_ON_PARANOID(ids[i] != EFX_EF10_FILTER_ID_INVALID);
efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
efx_filter_set_eth_local(&spec, vlan->vid, addr_list[i].addr);
rc = efx_ef10_filter_insert(efx, &spec, true);
@@ -5122,11 +5123,11 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
}
return rc;
} else {
- /* mark as not inserted, and carry on */
- rc = EFX_EF10_FILTER_ID_INVALID;
+ /* keep invalid ID, and carry on */
}
+ } else {
+ ids[i] = efx_ef10_filter_get_unsafe_id(rc);
}
- ids[i] = efx_ef10_filter_get_unsafe_id(rc);
}
if (multicast && rollback) {
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index b9422450deb8..3df872f56289 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -1301,7 +1301,7 @@ static void efx_mcdi_abandon(struct efx_nic *efx)
efx_schedule_reset(efx, RESET_TYPE_MCDI_TIMEOUT);
}
-/* Called from falcon_process_eventq for MCDI events */
+/* Called from efx_farch_ev_process and efx_ef10_ev_process for MCDI events */
void efx_mcdi_process_event(struct efx_channel *channel,
efx_qword_t *event)
{
@@ -1389,8 +1389,9 @@ void efx_mcdi_process_event(struct efx_channel *channel,
MCDI_EVENT_FIELD(*event, PROXY_RESPONSE_RC));
break;
default:
- netif_err(efx, hw, efx->net_dev, "Unknown MCDI event 0x%x\n",
- code);
+ netif_err(efx, hw, efx->net_dev,
+ "Unknown MCDI event " EFX_QWORD_FMT "\n",
+ EFX_QWORD_VAL(*event));
}
}
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 0d230b125c6c..080428762858 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -2485,7 +2485,7 @@ static int smc_drv_resume(struct device *dev)
return 0;
}
-static struct dev_pm_ops smc_drv_pm_ops = {
+static const struct dev_pm_ops smc_drv_pm_ops = {
.suspend = smc_drv_suspend,
.resume = smc_drv_resume,
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index fffd6d5fc907..6c2d1da05588 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -638,7 +638,7 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
{
struct sunxi_priv_data *gmac = priv->plat->bsp_priv;
struct device_node *node = priv->device->of_node;
- int ret;
+ int ret, phy_interface;
u32 reg, val;
regmap_read(gmac->regmap, SYSCON_EMAC_REG, &val);
@@ -718,7 +718,11 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
if (gmac->variant->support_rmii)
reg &= ~SYSCON_RMII_EN;
- switch (priv->plat->interface) {
+ phy_interface = priv->plat->interface;
+ /* if PHY is internal, select the mode (xMII) used by the SoC */
+ if (gmac->use_internal_phy)
+ phy_interface = gmac->variant->internal_phy;
+ switch (phy_interface) {
case PHY_INTERFACE_MODE_MII:
/* default */
break;
@@ -932,7 +936,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
}
plat_dat->interface = of_get_phy_mode(dev->of_node);
- if (plat_dat->interface == gmac->variant->internal_phy) {
+ if (plat_dat->interface == PHY_INTERFACE_MODE_INTERNAL) {
dev_info(&pdev->dev, "Will use internal PHY\n");
gmac->use_internal_phy = true;
gmac->ephy_clk = of_clk_get(plat_dat->phy_node, 0);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
index 471a9aa6ac94..22cf6353ba04 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
@@ -205,8 +205,8 @@ static void dwmac1000_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space)
{
int i;
- for (i = 0; i < 22; i++)
- if ((i < 9) || (i > 17))
+ for (i = 0; i < 23; i++)
+ if ((i < 12) || (i > 17))
reg_space[DMA_BUS_MODE / 4 + i] =
readl(ioaddr + DMA_BUS_MODE + i * 4);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 743170d57f62..babb39c646ff 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -29,7 +29,7 @@
#include "stmmac.h"
#include "dwmac_dma.h"
-#define REG_SPACE_SIZE 0x1054
+#define REG_SPACE_SIZE 0x1060
#define MAC100_ETHTOOL_NAME "st_mac100"
#define GMAC_ETHTOOL_NAME "st_gmac"
diff --git a/drivers/net/ethernet/ti/cpsw-common.c b/drivers/net/ethernet/ti/cpsw-common.c
index 1562ab4151e1..56ba411421f0 100644
--- a/drivers/net/ethernet/ti/cpsw-common.c
+++ b/drivers/net/ethernet/ti/cpsw-common.c
@@ -90,7 +90,7 @@ int ti_cm_get_macid(struct device *dev, int slave, u8 *mac_addr)
if (of_device_is_compatible(dev->of_node, "ti,dm816-emac"))
return cpsw_am33xx_cm_get_macid(dev, 0x30, slave, mac_addr);
- if (of_machine_is_compatible("ti,am4372"))
+ if (of_machine_is_compatible("ti,am43"))
return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr);
if (of_machine_is_compatible("ti,dra7"))
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index b7a0f5eeab62..1850e348f555 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1236,6 +1236,7 @@ static inline int cpsw_tx_packet_submit(struct cpsw_priv *priv,
{
struct cpsw_common *cpsw = priv->cpsw;
+ skb_tx_timestamp(skb);
return cpdma_chan_submit(txch, skb, skb->data, skb->len,
priv->emac_port + cpsw->data.dual_emac);
}
@@ -1597,6 +1598,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
{
struct cpsw_priv *priv = netdev_priv(ndev);
struct cpsw_common *cpsw = priv->cpsw;
+ struct cpts *cpts = cpsw->cpts;
struct netdev_queue *txq;
struct cpdma_chan *txch;
int ret, q_idx;
@@ -1608,11 +1610,9 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
}
if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
- cpts_is_tx_enabled(cpsw->cpts))
+ cpts_is_tx_enabled(cpts) && cpts_can_timestamp(cpts, skb))
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
- skb_tx_timestamp(skb);
-
q_idx = skb_get_queue_mapping(skb);
if (q_idx >= cpsw->tx_ch_num)
q_idx = q_idx % cpsw->tx_ch_num;
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index c96eca2b1b46..01ea82ba9cdc 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -30,6 +30,7 @@
#include <linux/of.h>
#include <linux/ptp_clock_kernel.h>
#include <linux/skbuff.h>
+#include <linux/ptp_classify.h>
#include <linux/timecounter.h>
struct cpsw_cpts {
@@ -155,6 +156,16 @@ static inline bool cpts_is_tx_enabled(struct cpts *cpts)
return !!cpts->tx_enable;
}
+static inline bool cpts_can_timestamp(struct cpts *cpts, struct sk_buff *skb)
+{
+ unsigned int class = ptp_classify_raw(skb);
+
+ if (class == PTP_CLASS_NONE)
+ return false;
+
+ return true;
+}
+
#else
struct cpts;
@@ -203,6 +214,11 @@ static inline bool cpts_is_tx_enabled(struct cpts *cpts)
{
return false;
}
+
+static inline bool cpts_can_timestamp(struct cpts *cpts, struct sk_buff *skb)
+{
+ return false;
+}
#endif
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index 0847a8f48cfe..28cb38af1a34 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -2503,24 +2503,8 @@ static bool gbe_need_txtstamp(struct gbe_intf *gbe_intf,
const struct netcp_packet *p_info)
{
struct sk_buff *skb = p_info->skb;
- unsigned int class = ptp_classify_raw(skb);
- if (class == PTP_CLASS_NONE)
- return false;
-
- switch (class) {
- case PTP_CLASS_V1_IPV4:
- case PTP_CLASS_V1_IPV6:
- case PTP_CLASS_V2_IPV4:
- case PTP_CLASS_V2_IPV6:
- case PTP_CLASS_V2_L2:
- case (PTP_CLASS_V2_VLAN | PTP_CLASS_L2):
- case (PTP_CLASS_V2_VLAN | PTP_CLASS_IPV4):
- case (PTP_CLASS_V2_VLAN | PTP_CLASS_IPV6):
- return true;
- }
-
- return false;
+ return cpts_can_timestamp(gbe_intf->gbe_dev->cpts, skb);
}
static int gbe_txtstamp_mark_pkt(struct gbe_intf *gbe_intf,
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index eb77201cb718..de8156c6b292 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -45,9 +45,17 @@ struct geneve_net {
static unsigned int geneve_net_id;
+struct geneve_dev_node {
+ struct hlist_node hlist;
+ struct geneve_dev *geneve;
+};
+
/* Pseudo network device */
struct geneve_dev {
- struct hlist_node hlist; /* vni hash table */
+ struct geneve_dev_node hlist4; /* vni hash table for IPv4 socket */
+#if IS_ENABLED(CONFIG_IPV6)
+ struct geneve_dev_node hlist6; /* vni hash table for IPv6 socket */
+#endif
struct net *net; /* netns for packet i/o */
struct net_device *dev; /* netdev for geneve tunnel */
struct ip_tunnel_info info;
@@ -123,16 +131,16 @@ static struct geneve_dev *geneve_lookup(struct geneve_sock *gs,
__be32 addr, u8 vni[])
{
struct hlist_head *vni_list_head;
- struct geneve_dev *geneve;
+ struct geneve_dev_node *node;
__u32 hash;
/* Find the device for this VNI */
hash = geneve_net_vni_hash(vni);
vni_list_head = &gs->vni_list[hash];
- hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) {
- if (eq_tun_id_and_vni((u8 *)&geneve->info.key.tun_id, vni) &&
- addr == geneve->info.key.u.ipv4.dst)
- return geneve;
+ hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
+ if (eq_tun_id_and_vni((u8 *)&node->geneve->info.key.tun_id, vni) &&
+ addr == node->geneve->info.key.u.ipv4.dst)
+ return node->geneve;
}
return NULL;
}
@@ -142,16 +150,16 @@ static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs,
struct in6_addr addr6, u8 vni[])
{
struct hlist_head *vni_list_head;
- struct geneve_dev *geneve;
+ struct geneve_dev_node *node;
__u32 hash;
/* Find the device for this VNI */
hash = geneve_net_vni_hash(vni);
vni_list_head = &gs->vni_list[hash];
- hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) {
- if (eq_tun_id_and_vni((u8 *)&geneve->info.key.tun_id, vni) &&
- ipv6_addr_equal(&addr6, &geneve->info.key.u.ipv6.dst))
- return geneve;
+ hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
+ if (eq_tun_id_and_vni((u8 *)&node->geneve->info.key.tun_id, vni) &&
+ ipv6_addr_equal(&addr6, &node->geneve->info.key.u.ipv6.dst))
+ return node->geneve;
}
return NULL;
}
@@ -591,6 +599,7 @@ static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6)
{
struct net *net = geneve->net;
struct geneve_net *gn = net_generic(net, geneve_net_id);
+ struct geneve_dev_node *node;
struct geneve_sock *gs;
__u8 vni[3];
__u32 hash;
@@ -609,15 +618,20 @@ static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6)
out:
gs->collect_md = geneve->collect_md;
#if IS_ENABLED(CONFIG_IPV6)
- if (ipv6)
+ if (ipv6) {
rcu_assign_pointer(geneve->sock6, gs);
- else
+ node = &geneve->hlist6;
+ } else
#endif
+ {
rcu_assign_pointer(geneve->sock4, gs);
+ node = &geneve->hlist4;
+ }
+ node->geneve = geneve;
tunnel_id_to_vni(geneve->info.key.tun_id, vni);
hash = geneve_net_vni_hash(vni);
- hlist_add_head_rcu(&geneve->hlist, &gs->vni_list[hash]);
+ hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]);
return 0;
}
@@ -644,8 +658,10 @@ static int geneve_stop(struct net_device *dev)
{
struct geneve_dev *geneve = netdev_priv(dev);
- if (!hlist_unhashed(&geneve->hlist))
- hlist_del_rcu(&geneve->hlist);
+ hlist_del_init_rcu(&geneve->hlist4.hlist);
+#if IS_ENABLED(CONFIG_IPV6)
+ hlist_del_init_rcu(&geneve->hlist6.hlist);
+#endif
geneve_sock_release(geneve);
return 0;
}
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 991372150463..63c98bbbc596 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -752,7 +752,7 @@ static int netvsc_set_channels(struct net_device *net,
channels->rx_count || channels->tx_count || channels->other_count)
return -EINVAL;
- if (count > net->num_tx_queues || count > net->num_rx_queues)
+ if (count > net->num_tx_queues || count > VRSS_CHANNEL_MAX)
return -EINVAL;
if (!nvdev || nvdev->destroy)
@@ -1179,7 +1179,7 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
rndis_dev = ndev->extension;
if (indir) {
for (i = 0; i < ITAB_NUM; i++)
- if (indir[i] >= dev->num_rx_queues)
+ if (indir[i] >= VRSS_CHANNEL_MAX)
return -EINVAL;
for (i = 0; i < ITAB_NUM; i++)
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 9ffff0362a11..0f581ee74fe4 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -39,16 +39,20 @@
#define MACVLAN_HASH_SIZE (1<<MACVLAN_HASH_BITS)
#define MACVLAN_BC_QUEUE_LEN 1000
+#define MACVLAN_F_PASSTHRU 1
+#define MACVLAN_F_ADDRCHANGE 2
+
struct macvlan_port {
struct net_device *dev;
struct hlist_head vlan_hash[MACVLAN_HASH_SIZE];
struct list_head vlans;
struct sk_buff_head bc_queue;
struct work_struct bc_work;
- bool passthru;
+ u32 flags;
int count;
struct hlist_head vlan_source_hash[MACVLAN_HASH_SIZE];
DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
+ unsigned char perm_addr[ETH_ALEN];
};
struct macvlan_source_entry {
@@ -66,6 +70,31 @@ struct macvlan_skb_cb {
static void macvlan_port_destroy(struct net_device *dev);
+static inline bool macvlan_passthru(const struct macvlan_port *port)
+{
+ return port->flags & MACVLAN_F_PASSTHRU;
+}
+
+static inline void macvlan_set_passthru(struct macvlan_port *port)
+{
+ port->flags |= MACVLAN_F_PASSTHRU;
+}
+
+static inline bool macvlan_addr_change(const struct macvlan_port *port)
+{
+ return port->flags & MACVLAN_F_ADDRCHANGE;
+}
+
+static inline void macvlan_set_addr_change(struct macvlan_port *port)
+{
+ port->flags |= MACVLAN_F_ADDRCHANGE;
+}
+
+static inline void macvlan_clear_addr_change(struct macvlan_port *port)
+{
+ port->flags &= ~MACVLAN_F_ADDRCHANGE;
+}
+
/* Hash Ethernet address */
static u32 macvlan_eth_hash(const unsigned char *addr)
{
@@ -181,11 +210,12 @@ static void macvlan_hash_change_addr(struct macvlan_dev *vlan,
static bool macvlan_addr_busy(const struct macvlan_port *port,
const unsigned char *addr)
{
- /* Test to see if the specified multicast address is
+ /* Test to see if the specified address is
* currently in use by the underlying device or
* another macvlan.
*/
- if (ether_addr_equal_64bits(port->dev->dev_addr, addr))
+ if (!macvlan_passthru(port) && !macvlan_addr_change(port) &&
+ ether_addr_equal_64bits(port->dev->dev_addr, addr))
return true;
if (macvlan_hash_lookup(port, addr))
@@ -445,7 +475,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
}
macvlan_forward_source(skb, port, eth->h_source);
- if (port->passthru)
+ if (macvlan_passthru(port))
vlan = list_first_or_null_rcu(&port->vlans,
struct macvlan_dev, list);
else
@@ -574,7 +604,7 @@ static int macvlan_open(struct net_device *dev)
struct net_device *lowerdev = vlan->lowerdev;
int err;
- if (vlan->port->passthru) {
+ if (macvlan_passthru(vlan->port)) {
if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC)) {
err = dev_set_promiscuity(lowerdev, 1);
if (err < 0)
@@ -649,7 +679,7 @@ static int macvlan_stop(struct net_device *dev)
dev_uc_unsync(lowerdev, dev);
dev_mc_unsync(lowerdev, dev);
- if (vlan->port->passthru) {
+ if (macvlan_passthru(vlan->port)) {
if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC))
dev_set_promiscuity(lowerdev, -1);
goto hash_del;
@@ -672,6 +702,7 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
{
struct macvlan_dev *vlan = netdev_priv(dev);
struct net_device *lowerdev = vlan->lowerdev;
+ struct macvlan_port *port = vlan->port;
int err;
if (!(dev->flags & IFF_UP)) {
@@ -682,7 +713,7 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
if (macvlan_addr_busy(vlan->port, addr))
return -EBUSY;
- if (!vlan->port->passthru) {
+ if (!macvlan_passthru(port)) {
err = dev_uc_add(lowerdev, addr);
if (err)
return err;
@@ -692,6 +723,15 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
macvlan_hash_change_addr(vlan, addr);
}
+ if (macvlan_passthru(port) && !macvlan_addr_change(port)) {
+ /* Since addr_change isn't set, we are here due to lower
+ * device change. Save the lower-dev address so we can
+ * restore it later.
+ */
+ ether_addr_copy(vlan->port->perm_addr,
+ lowerdev->dev_addr);
+ }
+ macvlan_clear_addr_change(port);
return 0;
}
@@ -703,8 +743,14 @@ static int macvlan_set_mac_address(struct net_device *dev, void *p)
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
- if (vlan->mode == MACVLAN_MODE_PASSTHRU)
+ /* If the addresses are the same, this is a no-op */
+ if (ether_addr_equal(dev->dev_addr, addr->sa_data))
+ return 0;
+
+ if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
+ macvlan_set_addr_change(vlan->port);
return dev_set_mac_address(vlan->lowerdev, addr);
+ }
return macvlan_sync_address(dev, addr->sa_data);
}
@@ -926,7 +972,7 @@ static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
/* Support unicast filter only on passthru devices.
* Multicast filter should be allowed on all devices.
*/
- if (!vlan->port->passthru && is_unicast_ether_addr(addr))
+ if (!macvlan_passthru(vlan->port) && is_unicast_ether_addr(addr))
return -EOPNOTSUPP;
if (flags & NLM_F_REPLACE)
@@ -950,7 +996,7 @@ static int macvlan_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
/* Support unicast filter only on passthru devices.
* Multicast filter should be allowed on all devices.
*/
- if (!vlan->port->passthru && is_unicast_ether_addr(addr))
+ if (!macvlan_passthru(vlan->port) && is_unicast_ether_addr(addr))
return -EOPNOTSUPP;
if (is_unicast_ether_addr(addr))
@@ -1118,8 +1164,8 @@ static int macvlan_port_create(struct net_device *dev)
if (port == NULL)
return -ENOMEM;
- port->passthru = false;
port->dev = dev;
+ ether_addr_copy(port->perm_addr, dev->dev_addr);
INIT_LIST_HEAD(&port->vlans);
for (i = 0; i < MACVLAN_HASH_SIZE; i++)
INIT_HLIST_HEAD(&port->vlan_hash[i]);
@@ -1159,6 +1205,18 @@ static void macvlan_port_destroy(struct net_device *dev)
kfree_skb(skb);
}
+ /* If the lower device address has been changed by passthru
+ * macvlan, put it back.
+ */
+ if (macvlan_passthru(port) &&
+ !ether_addr_equal(port->dev->dev_addr, port->perm_addr)) {
+ struct sockaddr sa;
+
+ sa.sa_family = port->dev->type;
+ memcpy(&sa.sa_data, port->perm_addr, port->dev->addr_len);
+ dev_set_mac_address(port->dev, &sa);
+ }
+
kfree(port);
}
@@ -1325,7 +1383,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
port = macvlan_port_get_rtnl(lowerdev);
/* Only 1 macvlan device can be created in passthru mode */
- if (port->passthru) {
+ if (macvlan_passthru(port)) {
/* The macvlan port must be not created this time,
* still goto destroy_macvlan_port for readability.
*/
@@ -1351,7 +1409,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
err = -EINVAL;
goto destroy_macvlan_port;
}
- port->passthru = true;
+ macvlan_set_passthru(port);
eth_hw_addr_inherit(dev, lowerdev);
}
@@ -1435,7 +1493,7 @@ static int macvlan_changelink(struct net_device *dev,
if (data && data[IFLA_MACVLAN_FLAGS]) {
__u16 flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]);
bool promisc = (flags ^ vlan->flags) & MACVLAN_FLAG_NOPROMISC;
- if (vlan->port->passthru && promisc) {
+ if (macvlan_passthru(vlan->port) && promisc) {
int err;
if (flags & MACVLAN_FLAG_NOPROMISC)
@@ -1598,7 +1656,7 @@ static int macvlan_device_event(struct notifier_block *unused,
}
break;
case NETDEV_CHANGEADDR:
- if (!port->passthru)
+ if (!macvlan_passthru(port))
return NOTIFY_DONE;
vlan = list_first_entry_or_null(&port->vlans,
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index ed0d10f54f26..c3065236ffcc 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -908,7 +908,7 @@ static void decode_txts(struct dp83640_private *dp83640,
if (overflow) {
pr_debug("tx timestamp queue overflow, count %d\n", overflow);
while (skb) {
- skb_complete_tx_timestamp(skb, NULL);
+ kfree_skb(skb);
skb = skb_dequeue(&dp83640->tx_queue);
}
return;
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 8400403b3f62..5d314f143aea 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -2171,6 +2171,7 @@ static struct phy_driver marvell_drivers[] = {
.get_sset_count = marvell_get_sset_count,
.get_strings = marvell_get_strings,
.get_stats = marvell_get_stats,
+ .set_loopback = genphy_loopback,
},
{
.phy_id = MARVELL_PHY_ID_88E1540,
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 9365b0792309..fdb43dd9b5cd 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -620,6 +620,8 @@ static int ksz9031_read_status(struct phy_device *phydev)
if ((regval & 0xFF) == 0xFF) {
phy_init_hw(phydev);
phydev->link = 0;
+ if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev))
+ phydev->drv->config_intr(phydev);
}
return 0;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index acf00f071c9a..1790f7fec125 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1136,6 +1136,39 @@ int phy_resume(struct phy_device *phydev)
}
EXPORT_SYMBOL(phy_resume);
+int phy_loopback(struct phy_device *phydev, bool enable)
+{
+ struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver);
+ int ret = 0;
+
+ mutex_lock(&phydev->lock);
+
+ if (enable && phydev->loopback_enabled) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (!enable && !phydev->loopback_enabled) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (phydev->drv && phydrv->set_loopback)
+ ret = phydrv->set_loopback(phydev, enable);
+ else
+ ret = -EOPNOTSUPP;
+
+ if (ret)
+ goto out;
+
+ phydev->loopback_enabled = enable;
+
+out:
+ mutex_unlock(&phydev->lock);
+ return ret;
+}
+EXPORT_SYMBOL(phy_loopback);
+
/* Generic PHY support and helper functions */
/**
@@ -1584,6 +1617,23 @@ int genphy_resume(struct phy_device *phydev)
}
EXPORT_SYMBOL(genphy_resume);
+int genphy_loopback(struct phy_device *phydev, bool enable)
+{
+ int value;
+
+ value = phy_read(phydev, MII_BMCR);
+ if (value < 0)
+ return value;
+
+ if (enable)
+ value |= BMCR_LOOPBACK;
+ else
+ value &= ~BMCR_LOOPBACK;
+
+ return phy_write(phydev, MII_BMCR, value);
+}
+EXPORT_SYMBOL(genphy_loopback);
+
static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
{
/* The default values for phydev->supported are provided by the PHY
@@ -1829,6 +1879,7 @@ static struct phy_driver genphy_driver = {
.read_status = genphy_read_status,
.suspend = genphy_suspend,
.resume = genphy_resume,
+ .set_loopback = genphy_loopback,
};
static int __init phy_init(void)
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index 300bb1479b3a..e9f101c9bae2 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -201,7 +201,7 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
rionet_queue_tx_msg(skb, ndev,
nets[rnet->mport->id].active[i]);
if (count)
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
count++;
}
} else if (RIONET_MAC_MATCH(eth->h_dest)) {
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 793ce900dffa..f32261ecd215 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1725,6 +1725,18 @@ static const struct driver_info lenovo_info = {
.tx_fixup = ax88179_tx_fixup,
};
+static const struct driver_info belkin_info = {
+ .description = "Belkin USB Ethernet Adapter",
+ .bind = ax88179_bind,
+ .unbind = ax88179_unbind,
+ .status = ax88179_status,
+ .link_reset = ax88179_link_reset,
+ .reset = ax88179_reset,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .rx_fixup = ax88179_rx_fixup,
+ .tx_fixup = ax88179_tx_fixup,
+};
+
static const struct usb_device_id products[] = {
{
/* ASIX AX88179 10/100/1000 */
@@ -1754,6 +1766,10 @@ static const struct usb_device_id products[] = {
/* Lenovo OneLinkDock Gigabit LAN */
USB_DEVICE(0x17ef, 0x304b),
.driver_info = (unsigned long)&lenovo_info,
+}, {
+ /* Belkin B2B128 USB 3.0 Hub + Gigabit Ethernet Adapter */
+ USB_DEVICE(0x050d, 0x0128),
+ .driver_info = (unsigned long)&belkin_info,
},
{ },
};
diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index 18fa45fc979b..7220cd620717 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -643,6 +643,13 @@ static const struct usb_device_id mbim_devs[] = {
.driver_info = (unsigned long)&cdc_mbim_info_ndp_to_end,
},
+ /* The HP lt4132 (03f0:a31d) is a rebranded Huawei ME906s-158,
+ * therefore it too requires the above "NDP to end" quirk.
+ */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
+ .driver_info = (unsigned long)&cdc_mbim_info_ndp_to_end,
+ },
+
/* Telit LE922A6 in MBIM composition */
{ USB_DEVICE_AND_INTERFACE_INFO(0x1bc7, 0x1041, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
.driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle,
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 2067743f51ca..d103a1d4fb36 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -89,6 +89,8 @@ static const struct cdc_ncm_stats cdc_ncm_gstrings_stats[] = {
CDC_NCM_SIMPLE_STAT(rx_ntbs),
};
+#define CDC_NCM_LOW_MEM_MAX_CNT 10
+
static int cdc_ncm_get_sset_count(struct net_device __always_unused *netdev, int sset)
{
switch (sset) {
@@ -1055,10 +1057,10 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_
/* align new NDP */
if (!(ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END))
- cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_max);
+ cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_curr_size);
/* verify that there is room for the NDP and the datagram (reserve) */
- if ((ctx->tx_max - skb->len - reserve) < ctx->max_ndp_size)
+ if ((ctx->tx_curr_size - skb->len - reserve) < ctx->max_ndp_size)
return NULL;
/* link to it */
@@ -1111,13 +1113,41 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
/* allocate a new OUT skb */
if (!skb_out) {
- skb_out = alloc_skb(ctx->tx_max, GFP_ATOMIC);
+ if (ctx->tx_low_mem_val == 0) {
+ ctx->tx_curr_size = ctx->tx_max;
+ skb_out = alloc_skb(ctx->tx_curr_size, GFP_ATOMIC);
+ /* If the memory allocation fails we will wait longer
+ * each time before attempting another full size
+ * allocation again to not overload the system
+ * further.
+ */
+ if (skb_out == NULL) {
+ ctx->tx_low_mem_max_cnt = min(ctx->tx_low_mem_max_cnt + 1,
+ (unsigned)CDC_NCM_LOW_MEM_MAX_CNT);
+ ctx->tx_low_mem_val = ctx->tx_low_mem_max_cnt;
+ }
+ }
if (skb_out == NULL) {
- if (skb != NULL) {
- dev_kfree_skb_any(skb);
- dev->net->stats.tx_dropped++;
+ /* See if a very small allocation is possible.
+ * We will send this packet immediately and hope
+ * that there is more memory available later.
+ */
+ if (skb)
+ ctx->tx_curr_size = max(skb->len,
+ (u32)USB_CDC_NCM_NTB_MIN_OUT_SIZE);
+ else
+ ctx->tx_curr_size = USB_CDC_NCM_NTB_MIN_OUT_SIZE;
+ skb_out = alloc_skb(ctx->tx_curr_size, GFP_ATOMIC);
+
+ /* No allocation possible so we will abort */
+ if (skb_out == NULL) {
+ if (skb != NULL) {
+ dev_kfree_skb_any(skb);
+ dev->net->stats.tx_dropped++;
+ }
+ goto exit_no_skb;
}
- goto exit_no_skb;
+ ctx->tx_low_mem_val--;
}
/* fill out the initial 16-bit NTB header */
nth16 = skb_put_zero(skb_out, sizeof(struct usb_cdc_ncm_nth16));
@@ -1148,10 +1178,10 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
ndp16 = cdc_ncm_ndp(ctx, skb_out, sign, skb->len + ctx->tx_modulus + ctx->tx_remainder);
/* align beginning of next frame */
- cdc_ncm_align_tail(skb_out, ctx->tx_modulus, ctx->tx_remainder, ctx->tx_max);
+ cdc_ncm_align_tail(skb_out, ctx->tx_modulus, ctx->tx_remainder, ctx->tx_curr_size);
/* check if we had enough room left for both NDP and frame */
- if (!ndp16 || skb_out->len + skb->len + delayed_ndp_size > ctx->tx_max) {
+ if (!ndp16 || skb_out->len + skb->len + delayed_ndp_size > ctx->tx_curr_size) {
if (n == 0) {
/* won't fit, MTU problem? */
dev_kfree_skb_any(skb);
@@ -1227,7 +1257,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
/* If requested, put NDP at end of frame. */
if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) {
nth16 = (struct usb_cdc_ncm_nth16 *)skb_out->data;
- cdc_ncm_align_tail(skb_out, ctx->tx_ndp_modulus, 0, ctx->tx_max);
+ cdc_ncm_align_tail(skb_out, ctx->tx_ndp_modulus, 0, ctx->tx_curr_size);
nth16->wNdpIndex = cpu_to_le16(skb_out->len);
skb_put_data(skb_out, ctx->delayed_ndp16, ctx->max_ndp_size);
@@ -1246,9 +1276,9 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
*/
if (!(dev->driver_info->flags & FLAG_SEND_ZLP) &&
skb_out->len > ctx->min_tx_pkt) {
- padding_count = ctx->tx_max - skb_out->len;
+ padding_count = ctx->tx_curr_size - skb_out->len;
skb_put_zero(skb_out, padding_count);
- } else if (skb_out->len < ctx->tx_max &&
+ } else if (skb_out->len < ctx->tx_curr_size &&
(skb_out->len % dev->maxpacket) == 0) {
skb_put_u8(skb_out, 0); /* force short packet */
}
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index b33553b1e19c..f5438d0978ca 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -385,7 +385,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
tbp = tb;
}
- if (tbp[IFLA_IFNAME]) {
+ if (ifmp && tbp[IFLA_IFNAME]) {
nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
name_assign_type = NET_NAME_USER;
} else {
@@ -404,7 +404,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
return PTR_ERR(peer);
}
- if (tbp[IFLA_ADDRESS] == NULL)
+ if (!ifmp || !tbp[IFLA_ADDRESS])
eth_hw_addr_random(peer);
if (ifmp && (dev->ifindex != 0))
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 5c6388fb7dd1..2e69bcdc5b07 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1802,6 +1802,7 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
flush_work(&vi->config_work);
netif_device_detach(vi->dev);
+ netif_tx_disable(vi->dev);
cancel_delayed_work_sync(&vi->refill);
if (netif_running(vi->dev)) {
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 0dafd8e6c665..b04e103350fb 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -229,25 +229,25 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, int ifindex,
__be32 vni)
{
- struct vxlan_dev *vxlan;
+ struct vxlan_dev_node *node;
/* For flow based devices, map all packets to VNI 0 */
if (vs->flags & VXLAN_F_COLLECT_METADATA)
vni = 0;
- hlist_for_each_entry_rcu(vxlan, vni_head(vs, vni), hlist) {
- if (vxlan->default_dst.remote_vni != vni)
+ hlist_for_each_entry_rcu(node, vni_head(vs, vni), hlist) {
+ if (node->vxlan->default_dst.remote_vni != vni)
continue;
if (IS_ENABLED(CONFIG_IPV6)) {
- const struct vxlan_config *cfg = &vxlan->cfg;
+ const struct vxlan_config *cfg = &node->vxlan->cfg;
if ((cfg->flags & VXLAN_F_IPV6_LINKLOCAL) &&
cfg->remote_ifindex != ifindex)
continue;
}
- return vxlan;
+ return node->vxlan;
}
return NULL;
@@ -2387,17 +2387,22 @@ static void vxlan_vs_del_dev(struct vxlan_dev *vxlan)
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
spin_lock(&vn->sock_lock);
- hlist_del_init_rcu(&vxlan->hlist);
+ hlist_del_init_rcu(&vxlan->hlist4.hlist);
+#if IS_ENABLED(CONFIG_IPV6)
+ hlist_del_init_rcu(&vxlan->hlist6.hlist);
+#endif
spin_unlock(&vn->sock_lock);
}
-static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
+static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan,
+ struct vxlan_dev_node *node)
{
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
__be32 vni = vxlan->default_dst.remote_vni;
+ node->vxlan = vxlan;
spin_lock(&vn->sock_lock);
- hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
+ hlist_add_head_rcu(&node->hlist, vni_head(vs, vni));
spin_unlock(&vn->sock_lock);
}
@@ -2656,7 +2661,6 @@ static void vxlan_setup(struct net_device *dev)
vxlan->age_timer.data = (unsigned long) vxlan;
vxlan->dev = dev;
- vxlan->net = dev_net(dev);
gro_cells_init(&vxlan->gro_cells, dev);
@@ -2727,7 +2731,7 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
}
if (tb[IFLA_MTU]) {
- u32 mtu = nla_get_u32(data[IFLA_MTU]);
+ u32 mtu = nla_get_u32(tb[IFLA_MTU]);
if (mtu < ETH_MIN_MTU || mtu > ETH_MAX_MTU)
return -EINVAL;
@@ -2850,6 +2854,7 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
{
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
struct vxlan_sock *vs = NULL;
+ struct vxlan_dev_node *node;
if (!vxlan->cfg.no_share) {
spin_lock(&vn->sock_lock);
@@ -2867,12 +2872,16 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
if (IS_ERR(vs))
return PTR_ERR(vs);
#if IS_ENABLED(CONFIG_IPV6)
- if (ipv6)
+ if (ipv6) {
rcu_assign_pointer(vxlan->vn6_sock, vs);
- else
+ node = &vxlan->hlist6;
+ } else
#endif
+ {
rcu_assign_pointer(vxlan->vn4_sock, vs);
- vxlan_vs_add_dev(vs, vxlan);
+ node = &vxlan->hlist4;
+ }
+ vxlan_vs_add_dev(vs, vxlan, node);
return 0;
}
@@ -3028,7 +3037,9 @@ static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
static void vxlan_config_apply(struct net_device *dev,
struct vxlan_config *conf,
- struct net_device *lowerdev, bool changelink)
+ struct net_device *lowerdev,
+ struct net *src_net,
+ bool changelink)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_rdst *dst = &vxlan->default_dst;
@@ -3044,6 +3055,8 @@ static void vxlan_config_apply(struct net_device *dev,
if (conf->mtu)
dev->mtu = conf->mtu;
+
+ vxlan->net = src_net;
}
dst->remote_vni = conf->vni;
@@ -3086,7 +3099,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
if (ret)
return ret;
- vxlan_config_apply(dev, conf, lowerdev, changelink);
+ vxlan_config_apply(dev, conf, lowerdev, src_net, changelink);
return 0;
}
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 530586be05b4..5b1d2e8402d9 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -199,6 +199,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
unsigned long remaining_credit;
struct timer_list credit_timeout;
u64 credit_window_start;
+ bool rate_limited;
/* Statistics */
struct xenvif_stats stats;
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 8397f6c92451..e322a862ddfe 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -106,7 +106,11 @@ static int xenvif_poll(struct napi_struct *napi, int budget)
if (work_done < budget) {
napi_complete_done(napi, work_done);
- xenvif_napi_schedule_or_enable_events(queue);
+ /* If the queue is rate-limited, it shall be
+ * rescheduled in the timer callback.
+ */
+ if (likely(!queue->rate_limited))
+ xenvif_napi_schedule_or_enable_events(queue);
}
return work_done;
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 602d408fa25e..5042ff8d449a 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -180,6 +180,7 @@ static void tx_add_credit(struct xenvif_queue *queue)
max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
queue->remaining_credit = min(max_credit, max_burst);
+ queue->rate_limited = false;
}
void xenvif_tx_credit_callback(unsigned long data)
@@ -686,8 +687,10 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
msecs_to_jiffies(queue->credit_usec / 1000);
/* Timer could already be pending in rare cases. */
- if (timer_pending(&queue->credit_timeout))
+ if (timer_pending(&queue->credit_timeout)) {
+ queue->rate_limited = true;
return true;
+ }
/* Passed the point where we can replenish credit? */
if (time_after_eq64(now, next_credit)) {
@@ -702,6 +705,7 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
mod_timer(&queue->credit_timeout,
next_credit);
queue->credit_window_start = next_credit;
+ queue->rate_limited = true;
return true;
}
diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig
index c4208487fadc..b065eb605215 100644
--- a/drivers/nfc/Kconfig
+++ b/drivers/nfc/Kconfig
@@ -7,7 +7,7 @@ menu "Near Field Communication (NFC) devices"
config NFC_TRF7970A
tristate "Texas Instruments TRF7970a NFC driver"
- depends on SPI && NFC_DIGITAL
+ depends on SPI && NFC_DIGITAL && GPIOLIB
help
This option enables the NFC driver for Texas Instruments' TRF7970a
device. Such device supports 5 different protocols: ISO14443A,
diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c
index badd8167ac73..ec50027b0d8b 100644
--- a/drivers/nfc/fdp/fdp.c
+++ b/drivers/nfc/fdp/fdp.c
@@ -749,11 +749,9 @@ int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops,
u32 protocols;
int r;
- info = kzalloc(sizeof(struct fdp_nci_info), GFP_KERNEL);
- if (!info) {
- r = -ENOMEM;
- goto err_info_alloc;
- }
+ info = devm_kzalloc(dev, sizeof(struct fdp_nci_info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
info->phy = phy;
info->phy_ops = phy_ops;
@@ -775,8 +773,7 @@ int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops,
tx_tailroom);
if (!ndev) {
nfc_err(dev, "Cannot allocate nfc ndev\n");
- r = -ENOMEM;
- goto err_alloc_ndev;
+ return -ENOMEM;
}
r = nci_register_device(ndev);
@@ -792,9 +789,6 @@ int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops,
err_regdev:
nci_free_device(ndev);
-err_alloc_ndev:
- kfree(info);
-err_info_alloc:
return r;
}
EXPORT_SYMBOL(fdp_nci_probe);
@@ -808,7 +802,6 @@ void fdp_nci_remove(struct nci_dev *ndev)
nci_unregister_device(ndev);
nci_free_device(ndev);
- kfree(info);
}
EXPORT_SYMBOL(fdp_nci_remove);
diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c
index e0baec848ff2..c4da50e07bbc 100644
--- a/drivers/nfc/fdp/i2c.c
+++ b/drivers/nfc/fdp/i2c.c
@@ -27,7 +27,6 @@
#define FDP_I2C_DRIVER_NAME "fdp_nci_i2c"
-#define FDP_DP_POWER_GPIO_NAME "power"
#define FDP_DP_CLOCK_TYPE_NAME "clock-type"
#define FDP_DP_CLOCK_FREQ_NAME "clock-freq"
#define FDP_DP_FW_VSC_CFG_NAME "fw-vsc-cfg"
@@ -281,8 +280,14 @@ vsc_read_err:
*clock_type, *clock_freq, *fw_vsc_cfg != NULL ? "yes" : "no");
}
-static int fdp_nci_i2c_probe(struct i2c_client *client,
- const struct i2c_device_id *id)
+static const struct acpi_gpio_params power_gpios = { 0, 0, false };
+
+static const struct acpi_gpio_mapping acpi_fdp_gpios[] = {
+ { "power-gpios", &power_gpios, 1 },
+ {},
+};
+
+static int fdp_nci_i2c_probe(struct i2c_client *client)
{
struct fdp_i2c_phy *phy;
struct device *dev = &client->dev;
@@ -304,8 +309,7 @@ static int fdp_nci_i2c_probe(struct i2c_client *client,
return -ENODEV;
}
- phy = devm_kzalloc(dev, sizeof(struct fdp_i2c_phy),
- GFP_KERNEL);
+ phy = devm_kzalloc(dev, sizeof(struct fdp_i2c_phy), GFP_KERNEL);
if (!phy)
return -ENOMEM;
@@ -313,19 +317,22 @@ static int fdp_nci_i2c_probe(struct i2c_client *client,
phy->next_read_size = FDP_NCI_I2C_MIN_PAYLOAD;
i2c_set_clientdata(client, phy);
- r = request_threaded_irq(client->irq, NULL, fdp_nci_i2c_irq_thread_fn,
- IRQF_TRIGGER_RISING | IRQF_ONESHOT,
- FDP_I2C_DRIVER_NAME, phy);
+ r = devm_request_threaded_irq(dev, client->irq,
+ NULL, fdp_nci_i2c_irq_thread_fn,
+ IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+ FDP_I2C_DRIVER_NAME, phy);
if (r < 0) {
nfc_err(&client->dev, "Unable to register IRQ handler\n");
return r;
}
- /* Requesting the power gpio */
- phy->power_gpio = devm_gpiod_get(dev, FDP_DP_POWER_GPIO_NAME,
- GPIOD_OUT_LOW);
+ r = devm_acpi_dev_add_driver_gpios(dev, acpi_fdp_gpios);
+ if (r)
+ dev_dbg(dev, "Unable to add GPIO mapping table\n");
+ /* Requesting the power gpio */
+ phy->power_gpio = devm_gpiod_get(dev, "power", GPIOD_OUT_LOW);
if (IS_ERR(phy->power_gpio)) {
nfc_err(dev, "Power GPIO request failed\n");
return PTR_ERR(phy->power_gpio);
@@ -360,12 +367,6 @@ static int fdp_nci_i2c_remove(struct i2c_client *client)
return 0;
}
-static struct i2c_device_id fdp_nci_i2c_id_table[] = {
- {"int339a", 0},
- {}
-};
-MODULE_DEVICE_TABLE(i2c, fdp_nci_i2c_id_table);
-
static const struct acpi_device_id fdp_nci_i2c_acpi_match[] = {
{"INT339A", 0},
{}
@@ -377,8 +378,7 @@ static struct i2c_driver fdp_nci_i2c_driver = {
.name = FDP_I2C_DRIVER_NAME,
.acpi_match_table = ACPI_PTR(fdp_nci_i2c_acpi_match),
},
- .id_table = fdp_nci_i2c_id_table,
- .probe = fdp_nci_i2c_probe,
+ .probe_new = fdp_nci_i2c_probe,
.remove = fdp_nci_i2c_remove,
};
module_i2c_driver(fdp_nci_i2c_driver);
diff --git a/drivers/nfc/nfcmrvl/fw_dnld.c b/drivers/nfc/nfcmrvl/fw_dnld.c
index f9f000c546d1..7f8960a46aab 100644
--- a/drivers/nfc/nfcmrvl/fw_dnld.c
+++ b/drivers/nfc/nfcmrvl/fw_dnld.c
@@ -457,7 +457,7 @@ int nfcmrvl_fw_dnld_init(struct nfcmrvl_private *priv)
INIT_WORK(&priv->fw_dnld.rx_work, fw_dnld_rx_work);
snprintf(name, sizeof(name), "%s_nfcmrvl_fw_dnld_rx_wq",
- dev_name(priv->dev));
+ dev_name(&priv->ndev->nfc_dev->dev));
priv->fw_dnld.rx_wq = create_singlethread_workqueue(name);
if (!priv->fw_dnld.rx_wq)
return -ENOMEM;
@@ -494,6 +494,7 @@ int nfcmrvl_fw_dnld_start(struct nci_dev *ndev, const char *firmware_name)
{
struct nfcmrvl_private *priv = nci_get_drvdata(ndev);
struct nfcmrvl_fw_dnld *fw_dnld = &priv->fw_dnld;
+ int res;
if (!priv->support_fw_dnld)
return -ENOTSUPP;
@@ -509,7 +510,9 @@ int nfcmrvl_fw_dnld_start(struct nci_dev *ndev, const char *firmware_name)
*/
/* Retrieve FW binary */
- if (request_firmware(&fw_dnld->fw, firmware_name, priv->dev) < 0) {
+ res = request_firmware(&fw_dnld->fw, firmware_name,
+ &ndev->nfc_dev->dev);
+ if (res < 0) {
nfc_err(priv->dev, "failed to retrieve FW %s", firmware_name);
return -ENOENT;
}
diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c
index c5038e6447bd..e65d027b91fa 100644
--- a/drivers/nfc/nfcmrvl/main.c
+++ b/drivers/nfc/nfcmrvl/main.c
@@ -123,13 +123,14 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy,
memcpy(&priv->config, pdata, sizeof(*pdata));
- if (priv->config.reset_n_io) {
- rc = devm_gpio_request_one(dev,
- priv->config.reset_n_io,
- GPIOF_OUT_INIT_LOW,
- "nfcmrvl_reset_n");
- if (rc < 0)
+ if (gpio_is_valid(priv->config.reset_n_io)) {
+ rc = gpio_request_one(priv->config.reset_n_io,
+ GPIOF_OUT_INIT_LOW,
+ "nfcmrvl_reset_n");
+ if (rc < 0) {
+ priv->config.reset_n_io = -EINVAL;
nfc_err(dev, "failed to request reset_n io\n");
+ }
}
if (phy == NFCMRVL_PHY_SPI) {
@@ -154,7 +155,13 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy,
if (!priv->ndev) {
nfc_err(dev, "nci_allocate_device failed\n");
rc = -ENOMEM;
- goto error;
+ goto error_free_gpio;
+ }
+
+ rc = nfcmrvl_fw_dnld_init(priv);
+ if (rc) {
+ nfc_err(dev, "failed to initialize FW download %d\n", rc);
+ goto error_free_dev;
}
nci_set_drvdata(priv->ndev, priv);
@@ -162,24 +169,22 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy,
rc = nci_register_device(priv->ndev);
if (rc) {
nfc_err(dev, "nci_register_device failed %d\n", rc);
- goto error_free_dev;
+ goto error_fw_dnld_deinit;
}
/* Ensure that controller is powered off */
nfcmrvl_chip_halt(priv);
- rc = nfcmrvl_fw_dnld_init(priv);
- if (rc) {
- nfc_err(dev, "failed to initialize FW download %d\n", rc);
- goto error_free_dev;
- }
-
nfc_info(dev, "registered with nci successfully\n");
return priv;
+error_fw_dnld_deinit:
+ nfcmrvl_fw_dnld_deinit(priv);
error_free_dev:
nci_free_device(priv->ndev);
-error:
+error_free_gpio:
+ if (gpio_is_valid(priv->config.reset_n_io))
+ gpio_free(priv->config.reset_n_io);
kfree(priv);
return ERR_PTR(rc);
}
@@ -194,8 +199,8 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv)
nfcmrvl_fw_dnld_deinit(priv);
- if (priv->config.reset_n_io)
- devm_gpio_free(priv->dev, priv->config.reset_n_io);
+ if (gpio_is_valid(priv->config.reset_n_io))
+ gpio_free(priv->config.reset_n_io);
nci_unregister_device(ndev);
nci_free_device(ndev);
@@ -262,7 +267,6 @@ int nfcmrvl_parse_dt(struct device_node *node,
reset_n_io = of_get_named_gpio(node, "reset-n-io", 0);
if (reset_n_io < 0) {
pr_info("no reset-n-io config\n");
- reset_n_io = 0;
} else if (!gpio_is_valid(reset_n_io)) {
pr_err("invalid reset-n-io GPIO\n");
return reset_n_io;
diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c
index 83a99e38e7bd..91162f8e0366 100644
--- a/drivers/nfc/nfcmrvl/uart.c
+++ b/drivers/nfc/nfcmrvl/uart.c
@@ -84,6 +84,7 @@ static int nfcmrvl_uart_parse_dt(struct device_node *node,
ret = nfcmrvl_parse_dt(matched_node, pdata);
if (ret < 0) {
pr_err("Failed to get generic entries\n");
+ of_node_put(matched_node);
return ret;
}
@@ -97,6 +98,8 @@ static int nfcmrvl_uart_parse_dt(struct device_node *node,
else
pdata->break_control = 0;
+ of_node_put(matched_node);
+
return 0;
}
@@ -109,6 +112,7 @@ static int nfcmrvl_nci_uart_open(struct nci_uart *nu)
struct nfcmrvl_private *priv;
struct nfcmrvl_platform_data *pdata = NULL;
struct nfcmrvl_platform_data config;
+ struct device *dev = nu->tty->dev;
/*
* Platform data cannot be used here since usually it is already used
@@ -116,9 +120,8 @@ static int nfcmrvl_nci_uart_open(struct nci_uart *nu)
* and check if DT entries were added.
*/
- if (nu->tty->dev->parent && nu->tty->dev->parent->of_node)
- if (nfcmrvl_uart_parse_dt(nu->tty->dev->parent->of_node,
- &config) == 0)
+ if (dev && dev->parent && dev->parent->of_node)
+ if (nfcmrvl_uart_parse_dt(dev->parent->of_node, &config) == 0)
pdata = &config;
if (!pdata) {
@@ -131,7 +134,7 @@ static int nfcmrvl_nci_uart_open(struct nci_uart *nu)
}
priv = nfcmrvl_nci_register_dev(NFCMRVL_PHY_UART, nu, &uart_ops,
- nu->tty->dev, pdata);
+ dev, pdata);
if (IS_ERR(priv))
return PTR_ERR(priv);
diff --git a/drivers/nfc/nfcmrvl/usb.c b/drivers/nfc/nfcmrvl/usb.c
index 699aa9d16575..bd35eab652be 100644
--- a/drivers/nfc/nfcmrvl/usb.c
+++ b/drivers/nfc/nfcmrvl/usb.c
@@ -341,15 +341,13 @@ static int nfcmrvl_probe(struct usb_interface *intf,
init_usb_anchor(&drv_data->deferred);
priv = nfcmrvl_nci_register_dev(NFCMRVL_PHY_USB, drv_data, &usb_ops,
- &drv_data->udev->dev, &config);
+ &intf->dev, &config);
if (IS_ERR(priv))
return PTR_ERR(priv);
drv_data->priv = priv;
drv_data->priv->support_fw_dnld = false;
- priv->dev = &drv_data->udev->dev;
-
usb_set_intfdata(intf, drv_data);
return 0;
diff --git a/drivers/nfc/nfcsim.c b/drivers/nfc/nfcsim.c
index a466e7978466..33449820e754 100644
--- a/drivers/nfc/nfcsim.c
+++ b/drivers/nfc/nfcsim.c
@@ -482,8 +482,10 @@ static int __init nfcsim_init(void)
exit_err:
pr_err("Failed to initialize nfcsim driver (%d)\n", rc);
- nfcsim_link_free(link0);
- nfcsim_link_free(link1);
+ if (link0)
+ nfcsim_link_free(link0);
+ if (link1)
+ nfcsim_link_free(link1);
return rc;
}
diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c
index fedde9d46ab6..4b14740edb67 100644
--- a/drivers/nfc/pn544/i2c.c
+++ b/drivers/nfc/pn544/i2c.c
@@ -904,7 +904,7 @@ static int pn544_hci_i2c_probe(struct i2c_client *client,
phy->i2c_dev = client;
i2c_set_clientdata(client, phy);
- r = acpi_dev_add_driver_gpios(ACPI_COMPANION(dev), acpi_pn544_gpios);
+ r = devm_acpi_dev_add_driver_gpios(dev, acpi_pn544_gpios);
if (r)
dev_dbg(dev, "Unable to add GPIO mapping table\n");
@@ -958,7 +958,6 @@ static int pn544_hci_i2c_remove(struct i2c_client *client)
if (phy->powered)
pn544_hci_i2c_disable(phy);
- acpi_dev_remove_driver_gpios(ACPI_COMPANION(&client->dev));
return 0;
}
diff --git a/drivers/nfc/st-nci/i2c.c b/drivers/nfc/st-nci/i2c.c
index 9dfae0efa922..515f08d037fb 100644
--- a/drivers/nfc/st-nci/i2c.c
+++ b/drivers/nfc/st-nci/i2c.c
@@ -19,15 +19,12 @@
#include <linux/module.h>
#include <linux/i2c.h>
-#include <linux/gpio.h>
#include <linux/gpio/consumer.h>
-#include <linux/of_irq.h>
-#include <linux/of_gpio.h>
#include <linux/acpi.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/nfc.h>
-#include <linux/platform_data/st-nci.h>
+#include <linux/of.h>
#include "st-nci.h"
@@ -40,18 +37,16 @@
#define ST_NCI_I2C_MIN_SIZE 4 /* PCB(1) + NCI Packet header(3) */
#define ST_NCI_I2C_MAX_SIZE 250 /* req 4.2.1 */
+#define ST_NCI_DRIVER_NAME "st_nci"
#define ST_NCI_I2C_DRIVER_NAME "st_nci_i2c"
-#define ST_NCI_GPIO_NAME_RESET "reset"
-
struct st_nci_i2c_phy {
struct i2c_client *i2c_dev;
struct llt_ndlc *ndlc;
bool irq_active;
- unsigned int gpio_reset;
- unsigned int irq_polarity;
+ struct gpio_desc *gpiod_reset;
struct st_nci_se_status se_status;
};
@@ -60,9 +55,9 @@ static int st_nci_i2c_enable(void *phy_id)
{
struct st_nci_i2c_phy *phy = phy_id;
- gpio_set_value(phy->gpio_reset, 0);
+ gpiod_set_value(phy->gpiod_reset, 0);
usleep_range(10000, 15000);
- gpio_set_value(phy->gpio_reset, 1);
+ gpiod_set_value(phy->gpiod_reset, 1);
usleep_range(80000, 85000);
if (phy->ndlc->powered == 0 && phy->irq_active == 0) {
@@ -208,114 +203,18 @@ static struct nfc_phy_ops i2c_phy_ops = {
.disable = st_nci_i2c_disable,
};
-static int st_nci_i2c_acpi_request_resources(struct i2c_client *client)
-{
- struct st_nci_i2c_phy *phy = i2c_get_clientdata(client);
- struct gpio_desc *gpiod_reset;
- struct device *dev = &client->dev;
- u8 tmp;
-
- /* Get RESET GPIO from ACPI */
- gpiod_reset = devm_gpiod_get_index(dev, ST_NCI_GPIO_NAME_RESET, 1,
- GPIOD_OUT_HIGH);
- if (IS_ERR(gpiod_reset)) {
- nfc_err(dev, "Unable to get RESET GPIO\n");
- return -ENODEV;
- }
-
- phy->gpio_reset = desc_to_gpio(gpiod_reset);
-
- phy->irq_polarity = irq_get_trigger_type(client->irq);
-
- phy->se_status.is_ese_present = false;
- phy->se_status.is_uicc_present = false;
-
- if (device_property_present(dev, "ese-present")) {
- device_property_read_u8(dev, "ese-present", &tmp);
- phy->se_status.is_ese_present = tmp;
- }
-
- if (device_property_present(dev, "uicc-present")) {
- device_property_read_u8(dev, "uicc-present", &tmp);
- phy->se_status.is_uicc_present = tmp;
- }
-
- return 0;
-}
-
-static int st_nci_i2c_of_request_resources(struct i2c_client *client)
-{
- struct st_nci_i2c_phy *phy = i2c_get_clientdata(client);
- struct device_node *pp;
- int gpio;
- int r;
-
- pp = client->dev.of_node;
- if (!pp)
- return -ENODEV;
-
- /* Get GPIO from device tree */
- gpio = of_get_named_gpio(pp, "reset-gpios", 0);
- if (gpio < 0) {
- nfc_err(&client->dev,
- "Failed to retrieve reset-gpios from device tree\n");
- return gpio;
- }
-
- /* GPIO request and configuration */
- r = devm_gpio_request_one(&client->dev, gpio,
- GPIOF_OUT_INIT_HIGH, ST_NCI_GPIO_NAME_RESET);
- if (r) {
- nfc_err(&client->dev, "Failed to request reset pin\n");
- return r;
- }
- phy->gpio_reset = gpio;
-
- phy->irq_polarity = irq_get_trigger_type(client->irq);
-
- phy->se_status.is_ese_present =
- of_property_read_bool(pp, "ese-present");
- phy->se_status.is_uicc_present =
- of_property_read_bool(pp, "uicc-present");
-
- return 0;
-}
-
-static int st_nci_i2c_request_resources(struct i2c_client *client)
-{
- struct st_nci_nfc_platform_data *pdata;
- struct st_nci_i2c_phy *phy = i2c_get_clientdata(client);
- int r;
-
- pdata = client->dev.platform_data;
- if (pdata == NULL) {
- nfc_err(&client->dev, "No platform data\n");
- return -EINVAL;
- }
+static const struct acpi_gpio_params reset_gpios = { 1, 0, false };
- /* store for later use */
- phy->gpio_reset = pdata->gpio_reset;
- phy->irq_polarity = pdata->irq_polarity;
-
- r = devm_gpio_request_one(&client->dev,
- phy->gpio_reset, GPIOF_OUT_INIT_HIGH,
- ST_NCI_GPIO_NAME_RESET);
- if (r) {
- pr_err("%s : reset gpio_request failed\n", __FILE__);
- return r;
- }
-
- phy->se_status.is_ese_present = pdata->is_ese_present;
- phy->se_status.is_uicc_present = pdata->is_uicc_present;
-
- return 0;
-}
+static const struct acpi_gpio_mapping acpi_st_nci_gpios[] = {
+ { "reset-gpios", &reset_gpios, 1 },
+ {},
+};
static int st_nci_i2c_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
+ struct device *dev = &client->dev;
struct st_nci_i2c_phy *phy;
- struct st_nci_nfc_platform_data *pdata;
int r;
dev_dbg(&client->dev, "%s\n", __func__);
@@ -326,8 +225,7 @@ static int st_nci_i2c_probe(struct i2c_client *client,
return -ENODEV;
}
- phy = devm_kzalloc(&client->dev, sizeof(struct st_nci_i2c_phy),
- GFP_KERNEL);
+ phy = devm_kzalloc(dev, sizeof(struct st_nci_i2c_phy), GFP_KERNEL);
if (!phy)
return -ENOMEM;
@@ -335,32 +233,22 @@ static int st_nci_i2c_probe(struct i2c_client *client,
i2c_set_clientdata(client, phy);
- pdata = client->dev.platform_data;
- if (!pdata && client->dev.of_node) {
- r = st_nci_i2c_of_request_resources(client);
- if (r) {
- nfc_err(&client->dev, "No platform data\n");
- return r;
- }
- } else if (pdata) {
- r = st_nci_i2c_request_resources(client);
- if (r) {
- nfc_err(&client->dev,
- "Cannot get platform resources\n");
- return r;
- }
- } else if (ACPI_HANDLE(&client->dev)) {
- r = st_nci_i2c_acpi_request_resources(client);
- if (r) {
- nfc_err(&client->dev, "Cannot get ACPI data\n");
- return r;
- }
- } else {
- nfc_err(&client->dev,
- "st_nci platform resources not available\n");
+ r = devm_acpi_dev_add_driver_gpios(dev, acpi_st_nci_gpios);
+ if (r)
+ dev_dbg(dev, "Unable to add GPIO mapping table\n");
+
+ /* Get RESET GPIO */
+ phy->gpiod_reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
+ if (IS_ERR(phy->gpiod_reset)) {
+ nfc_err(dev, "Unable to get RESET GPIO\n");
return -ENODEV;
}
+ phy->se_status.is_ese_present =
+ device_property_read_bool(dev, "ese-present");
+ phy->se_status.is_uicc_present =
+ device_property_read_bool(dev, "uicc-present");
+
r = ndlc_probe(phy, &i2c_phy_ops, &client->dev,
ST_NCI_FRAME_HEADROOM, ST_NCI_FRAME_TAILROOM,
&phy->ndlc, &phy->se_status);
@@ -372,7 +260,7 @@ static int st_nci_i2c_probe(struct i2c_client *client,
phy->irq_active = true;
r = devm_request_threaded_irq(&client->dev, client->irq, NULL,
st_nci_irq_thread_fn,
- phy->irq_polarity | IRQF_ONESHOT,
+ IRQF_ONESHOT,
ST_NCI_DRIVER_NAME, phy);
if (r < 0)
nfc_err(&client->dev, "Unable to register IRQ handler\n");
diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c
index 89e341eba3eb..14705591b0fb 100644
--- a/drivers/nfc/st-nci/spi.c
+++ b/drivers/nfc/st-nci/spi.c
@@ -19,16 +19,13 @@
#include <linux/module.h>
#include <linux/spi/spi.h>
-#include <linux/gpio.h>
#include <linux/gpio/consumer.h>
-#include <linux/of_irq.h>
-#include <linux/of_gpio.h>
#include <linux/acpi.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/nfc.h>
+#include <linux/of.h>
#include <net/nfc/nci.h>
-#include <linux/platform_data/st-nci.h>
#include "st-nci.h"
@@ -41,18 +38,16 @@
#define ST_NCI_SPI_MIN_SIZE 4 /* PCB(1) + NCI Packet header(3) */
#define ST_NCI_SPI_MAX_SIZE 250 /* req 4.2.1 */
+#define ST_NCI_DRIVER_NAME "st_nci"
#define ST_NCI_SPI_DRIVER_NAME "st_nci_spi"
-#define ST_NCI_GPIO_NAME_RESET "reset"
-
struct st_nci_spi_phy {
struct spi_device *spi_dev;
struct llt_ndlc *ndlc;
bool irq_active;
- unsigned int gpio_reset;
- unsigned int irq_polarity;
+ struct gpio_desc *gpiod_reset;
struct st_nci_se_status se_status;
};
@@ -61,9 +56,9 @@ static int st_nci_spi_enable(void *phy_id)
{
struct st_nci_spi_phy *phy = phy_id;
- gpio_set_value(phy->gpio_reset, 0);
+ gpiod_set_value(phy->gpiod_reset, 0);
usleep_range(10000, 15000);
- gpio_set_value(phy->gpio_reset, 1);
+ gpiod_set_value(phy->gpiod_reset, 1);
usleep_range(80000, 85000);
if (phy->ndlc->powered == 0 && phy->irq_active == 0) {
@@ -223,113 +218,16 @@ static struct nfc_phy_ops spi_phy_ops = {
.disable = st_nci_spi_disable,
};
-static int st_nci_spi_acpi_request_resources(struct spi_device *spi_dev)
-{
- struct st_nci_spi_phy *phy = spi_get_drvdata(spi_dev);
- struct gpio_desc *gpiod_reset;
- struct device *dev = &spi_dev->dev;
- u8 tmp;
-
- /* Get RESET GPIO from ACPI */
- gpiod_reset = devm_gpiod_get_index(dev, ST_NCI_GPIO_NAME_RESET, 1,
- GPIOD_OUT_HIGH);
- if (IS_ERR(gpiod_reset)) {
- nfc_err(dev, "Unable to get RESET GPIO\n");
- return -ENODEV;
- }
-
- phy->gpio_reset = desc_to_gpio(gpiod_reset);
-
- phy->irq_polarity = irq_get_trigger_type(spi_dev->irq);
-
- phy->se_status.is_ese_present = false;
- phy->se_status.is_uicc_present = false;
-
- if (device_property_present(dev, "ese-present")) {
- device_property_read_u8(dev, "ese-present", &tmp);
- tmp = phy->se_status.is_ese_present;
- }
-
- if (device_property_present(dev, "uicc-present")) {
- device_property_read_u8(dev, "uicc-present", &tmp);
- tmp = phy->se_status.is_uicc_present;
- }
-
- return 0;
-}
-
-static int st_nci_spi_of_request_resources(struct spi_device *dev)
-{
- struct st_nci_spi_phy *phy = spi_get_drvdata(dev);
- struct device_node *pp;
- int gpio;
- int r;
-
- pp = dev->dev.of_node;
- if (!pp)
- return -ENODEV;
-
- /* Get GPIO from device tree */
- gpio = of_get_named_gpio(pp, "reset-gpios", 0);
- if (gpio < 0) {
- nfc_err(&dev->dev,
- "Failed to retrieve reset-gpios from device tree\n");
- return gpio;
- }
-
- /* GPIO request and configuration */
- r = devm_gpio_request_one(&dev->dev, gpio,
- GPIOF_OUT_INIT_HIGH, ST_NCI_GPIO_NAME_RESET);
- if (r) {
- nfc_err(&dev->dev, "Failed to request reset pin\n");
- return r;
- }
- phy->gpio_reset = gpio;
-
- phy->irq_polarity = irq_get_trigger_type(dev->irq);
+static const struct acpi_gpio_params reset_gpios = { 1, 0, false };
- phy->se_status.is_ese_present =
- of_property_read_bool(pp, "ese-present");
- phy->se_status.is_uicc_present =
- of_property_read_bool(pp, "uicc-present");
-
- return 0;
-}
-
-static int st_nci_spi_request_resources(struct spi_device *dev)
-{
- struct st_nci_nfc_platform_data *pdata;
- struct st_nci_spi_phy *phy = spi_get_drvdata(dev);
- int r;
-
- pdata = dev->dev.platform_data;
- if (pdata == NULL) {
- nfc_err(&dev->dev, "No platform data\n");
- return -EINVAL;
- }
-
- /* store for later use */
- phy->gpio_reset = pdata->gpio_reset;
- phy->irq_polarity = pdata->irq_polarity;
-
- r = devm_gpio_request_one(&dev->dev,
- phy->gpio_reset, GPIOF_OUT_INIT_HIGH,
- ST_NCI_GPIO_NAME_RESET);
- if (r) {
- pr_err("%s : reset gpio_request failed\n", __FILE__);
- return r;
- }
-
- phy->se_status.is_ese_present = pdata->is_ese_present;
- phy->se_status.is_uicc_present = pdata->is_uicc_present;
-
- return 0;
-}
+static const struct acpi_gpio_mapping acpi_st_nci_gpios[] = {
+ { "reset-gpios", &reset_gpios, 1 },
+ {},
+};
static int st_nci_spi_probe(struct spi_device *dev)
{
struct st_nci_spi_phy *phy;
- struct st_nci_nfc_platform_data *pdata;
int r;
dev_dbg(&dev->dev, "%s\n", __func__);
@@ -351,32 +249,22 @@ static int st_nci_spi_probe(struct spi_device *dev)
spi_set_drvdata(dev, phy);
- pdata = dev->dev.platform_data;
- if (!pdata && dev->dev.of_node) {
- r = st_nci_spi_of_request_resources(dev);
- if (r) {
- nfc_err(&dev->dev, "No platform data\n");
- return r;
- }
- } else if (pdata) {
- r = st_nci_spi_request_resources(dev);
- if (r) {
- nfc_err(&dev->dev,
- "Cannot get platform resources\n");
- return r;
- }
- } else if (ACPI_HANDLE(&dev->dev)) {
- r = st_nci_spi_acpi_request_resources(dev);
- if (r) {
- nfc_err(&dev->dev, "Cannot get ACPI data\n");
- return r;
- }
- } else {
- nfc_err(&dev->dev,
- "st_nci platform resources not available\n");
- return -ENODEV;
+ r = devm_acpi_dev_add_driver_gpios(&dev->dev, acpi_st_nci_gpios);
+ if (r)
+ dev_dbg(&dev->dev, "Unable to add GPIO mapping table\n");
+
+ /* Get RESET GPIO */
+ phy->gpiod_reset = devm_gpiod_get(&dev->dev, "reset", GPIOD_OUT_HIGH);
+ if (IS_ERR(phy->gpiod_reset)) {
+ nfc_err(&dev->dev, "Unable to get RESET GPIO\n");
+ return PTR_ERR(phy->gpiod_reset);
}
+ phy->se_status.is_ese_present =
+ device_property_read_bool(&dev->dev, "ese-present");
+ phy->se_status.is_uicc_present =
+ device_property_read_bool(&dev->dev, "uicc-present");
+
r = ndlc_probe(phy, &spi_phy_ops, &dev->dev,
ST_NCI_FRAME_HEADROOM, ST_NCI_FRAME_TAILROOM,
&phy->ndlc, &phy->se_status);
@@ -388,7 +276,7 @@ static int st_nci_spi_probe(struct spi_device *dev)
phy->irq_active = true;
r = devm_request_threaded_irq(&dev->dev, dev->irq, NULL,
st_nci_irq_thread_fn,
- phy->irq_polarity | IRQF_ONESHOT,
+ IRQF_ONESHOT,
ST_NCI_SPI_DRIVER_NAME, phy);
if (r < 0)
nfc_err(&dev->dev, "Unable to register IRQ handler\n");
diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c
index 4bff76baa341..cd1f7bfa75eb 100644
--- a/drivers/nfc/st21nfca/i2c.c
+++ b/drivers/nfc/st21nfca/i2c.c
@@ -61,8 +61,6 @@
#define ST21NFCA_HCI_DRIVER_NAME "st21nfca_hci"
#define ST21NFCA_HCI_I2C_DRIVER_NAME "st21nfca_hci_i2c"
-#define ST21NFCA_GPIO_NAME_EN "enable"
-
struct st21nfca_i2c_phy {
struct i2c_client *i2c_dev;
struct nfc_hci_dev *hdev;
@@ -501,41 +499,17 @@ static struct nfc_phy_ops i2c_phy_ops = {
.disable = st21nfca_hci_i2c_disable,
};
-static int st21nfca_hci_i2c_acpi_request_resources(struct i2c_client *client)
-{
- struct st21nfca_i2c_phy *phy = i2c_get_clientdata(client);
- struct device *dev = &client->dev;
-
- /* Get EN GPIO from ACPI */
- phy->gpiod_ena = devm_gpiod_get_index(dev, ST21NFCA_GPIO_NAME_EN, 1,
- GPIOD_OUT_LOW);
- if (IS_ERR(phy->gpiod_ena)) {
- nfc_err(dev, "Unable to get ENABLE GPIO\n");
- return PTR_ERR(phy->gpiod_ena);
- }
-
- return 0;
-}
-
-static int st21nfca_hci_i2c_of_request_resources(struct i2c_client *client)
-{
- struct st21nfca_i2c_phy *phy = i2c_get_clientdata(client);
- struct device *dev = &client->dev;
-
- /* Get GPIO from device tree */
- phy->gpiod_ena = devm_gpiod_get_index(dev, ST21NFCA_GPIO_NAME_EN, 0,
- GPIOD_OUT_HIGH);
- if (IS_ERR(phy->gpiod_ena)) {
- nfc_err(dev, "Failed to request enable pin\n");
- return PTR_ERR(phy->gpiod_ena);
- }
+static const struct acpi_gpio_params enable_gpios = { 1, 0, false };
- return 0;
-}
+static const struct acpi_gpio_mapping acpi_st21nfca_gpios[] = {
+ { "enable-gpios", &enable_gpios, 1 },
+ {},
+};
static int st21nfca_hci_i2c_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
+ struct device *dev = &client->dev;
struct st21nfca_i2c_phy *phy;
int r;
@@ -562,21 +536,15 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client,
mutex_init(&phy->phy_lock);
i2c_set_clientdata(client, phy);
- if (client->dev.of_node) {
- r = st21nfca_hci_i2c_of_request_resources(client);
- if (r) {
- nfc_err(&client->dev, "No platform data\n");
- return r;
- }
- } else if (ACPI_HANDLE(&client->dev)) {
- r = st21nfca_hci_i2c_acpi_request_resources(client);
- if (r) {
- nfc_err(&client->dev, "Cannot get ACPI data\n");
- return r;
- }
- } else {
- nfc_err(&client->dev, "st21nfca platform resources not available\n");
- return -ENODEV;
+ r = devm_acpi_dev_add_driver_gpios(dev, acpi_st21nfca_gpios);
+ if (r)
+ dev_dbg(dev, "Unable to add GPIO mapping table\n");
+
+ /* Get EN GPIO from resource provider */
+ phy->gpiod_ena = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW);
+ if (IS_ERR(phy->gpiod_ena)) {
+ nfc_err(dev, "Unable to get ENABLE GPIO\n");
+ return PTR_ERR(phy->gpiod_ena);
}
phy->se_status.is_ese_present =
diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c
index 2d1c8ca6e679..eee5cc1a9220 100644
--- a/drivers/nfc/trf7970a.c
+++ b/drivers/nfc/trf7970a.c
@@ -20,9 +20,8 @@
#include <linux/nfc.h>
#include <linux/skbuff.h>
#include <linux/delay.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
#include <linux/of.h>
-#include <linux/of_gpio.h>
#include <linux/spi/spi.h>
#include <linux/regulator/consumer.h>
@@ -123,11 +122,10 @@
NFC_PROTO_ISO14443_B_MASK | NFC_PROTO_FELICA_MASK | \
NFC_PROTO_ISO15693_MASK | NFC_PROTO_NFC_DEP_MASK)
-#define TRF7970A_AUTOSUSPEND_DELAY 30000 /* 30 seconds */
+#define TRF7970A_AUTOSUSPEND_DELAY 30000 /* 30 seconds */
#define TRF7970A_13MHZ_CLOCK_FREQUENCY 13560000
#define TRF7970A_27MHZ_CLOCK_FREQUENCY 27120000
-
#define TRF7970A_RX_SKB_ALLOC_SIZE 256
#define TRF7970A_FIFO_SIZE 127
@@ -152,7 +150,6 @@
*/
#define TRF7970A_QUIRK_IRQ_STATUS_READ BIT(0)
#define TRF7970A_QUIRK_EN2_MUST_STAY_LOW BIT(1)
-#define TRF7970A_QUIRK_T5T_RMB_EXTRA_BYTE BIT(2)
/* Direct commands */
#define TRF7970A_CMD_IDLE 0x00
@@ -295,7 +292,7 @@
#define TRF7970A_REG_IO_CTRL_AUTO_REG BIT(7)
/* IRQ Status Register Bits */
-#define TRF7970A_IRQ_STATUS_NORESP BIT(0) /* ISO15693 only */
+#define TRF7970A_IRQ_STATUS_NORESP BIT(0) /* ISO15693 only */
#define TRF7970A_IRQ_STATUS_NFC_COL_ERROR BIT(0)
#define TRF7970A_IRQ_STATUS_COL BIT(1)
#define TRF7970A_IRQ_STATUS_FRAMING_EOF_ERROR BIT(2)
@@ -451,16 +448,14 @@ struct trf7970a {
u8 md_rf_tech;
u8 tx_cmd;
bool issue_eof;
- bool adjust_resp_len;
- int en2_gpio;
- int en_gpio;
+ struct gpio_desc *en_gpiod;
+ struct gpio_desc *en2_gpiod;
struct mutex lock;
unsigned int timeout;
bool ignore_timeout;
struct delayed_work timeout_work;
};
-
static int trf7970a_cmd(struct trf7970a *trf, u8 opcode)
{
u8 cmd = TRF7970A_CMD_BIT_CTRL | TRF7970A_CMD_BIT_OPCODE(opcode);
@@ -471,7 +466,7 @@ static int trf7970a_cmd(struct trf7970a *trf, u8 opcode)
ret = spi_write(trf->spi, &cmd, 1);
if (ret)
dev_err(trf->dev, "%s - cmd: 0x%x, ret: %d\n", __func__, cmd,
- ret);
+ ret);
return ret;
}
@@ -483,14 +478,15 @@ static int trf7970a_read(struct trf7970a *trf, u8 reg, u8 *val)
ret = spi_write_then_read(trf->spi, &addr, 1, val, 1);
if (ret)
dev_err(trf->dev, "%s - addr: 0x%x, ret: %d\n", __func__, addr,
- ret);
+ ret);
dev_dbg(trf->dev, "read(0x%x): 0x%x\n", addr, *val);
return ret;
}
-static int trf7970a_read_cont(struct trf7970a *trf, u8 reg, u8 *buf, size_t len)
+static int trf7970a_read_cont(struct trf7970a *trf, u8 reg, u8 *buf,
+ size_t len)
{
u8 addr = reg | TRF7970A_CMD_BIT_RW | TRF7970A_CMD_BIT_CONTINUOUS;
struct spi_transfer t[2];
@@ -514,7 +510,7 @@ static int trf7970a_read_cont(struct trf7970a *trf, u8 reg, u8 *buf, size_t len)
ret = spi_sync(trf->spi, &m);
if (ret)
dev_err(trf->dev, "%s - addr: 0x%x, ret: %d\n", __func__, addr,
- ret);
+ ret);
return ret;
}
@@ -528,7 +524,7 @@ static int trf7970a_write(struct trf7970a *trf, u8 reg, u8 val)
ret = spi_write(trf->spi, buf, 2);
if (ret)
dev_err(trf->dev, "%s - write: 0x%x 0x%x, ret: %d\n", __func__,
- buf[0], buf[1], ret);
+ buf[0], buf[1], ret);
return ret;
}
@@ -550,7 +546,7 @@ static int trf7970a_read_irqstatus(struct trf7970a *trf, u8 *status)
if (ret)
dev_err(trf->dev, "%s - irqstatus: Status read failed: %d\n",
- __func__, ret);
+ __func__, ret);
else
*status = buf[0];
@@ -564,12 +560,12 @@ static int trf7970a_read_target_proto(struct trf7970a *trf, u8 *target_proto)
u8 addr;
addr = TRF79070A_NFC_TARGET_PROTOCOL | TRF7970A_CMD_BIT_RW |
- TRF7970A_CMD_BIT_CONTINUOUS;
+ TRF7970A_CMD_BIT_CONTINUOUS;
ret = spi_write_then_read(trf->spi, &addr, 1, buf, 2);
if (ret)
dev_err(trf->dev, "%s - target_proto: Read failed: %d\n",
- __func__, ret);
+ __func__, ret);
else
*target_proto = buf[0];
@@ -600,7 +596,7 @@ static int trf7970a_mode_detect(struct trf7970a *trf, u8 *rf_tech)
break;
default:
dev_dbg(trf->dev, "%s - mode_detect: target_proto: 0x%x\n",
- __func__, target_proto);
+ __func__, target_proto);
return -EIO;
}
@@ -616,8 +612,8 @@ static void trf7970a_send_upstream(struct trf7970a *trf)
if (trf->rx_skb && !IS_ERR(trf->rx_skb) && !trf->aborting)
print_hex_dump_debug("trf7970a rx data: ", DUMP_PREFIX_NONE,
- 16, 1, trf->rx_skb->data, trf->rx_skb->len,
- false);
+ 16, 1, trf->rx_skb->data, trf->rx_skb->len,
+ false);
trf->state = TRF7970A_ST_IDLE;
@@ -632,13 +628,6 @@ static void trf7970a_send_upstream(struct trf7970a *trf)
trf->aborting = false;
}
- if (trf->adjust_resp_len) {
- if (trf->rx_skb)
- skb_trim(trf->rx_skb, trf->rx_skb->len - 1);
-
- trf->adjust_resp_len = false;
- }
-
trf->cb(trf->ddev, trf->cb_arg, trf->rx_skb);
trf->rx_skb = NULL;
@@ -657,7 +646,8 @@ static void trf7970a_send_err_upstream(struct trf7970a *trf, int errno)
}
static int trf7970a_transmit(struct trf7970a *trf, struct sk_buff *skb,
- unsigned int len, u8 *prefix, unsigned int prefix_len)
+ unsigned int len, u8 *prefix,
+ unsigned int prefix_len)
{
struct spi_transfer t[2];
struct spi_message m;
@@ -665,7 +655,7 @@ static int trf7970a_transmit(struct trf7970a *trf, struct sk_buff *skb,
int ret;
print_hex_dump_debug("trf7970a tx data: ", DUMP_PREFIX_NONE,
- 16, 1, skb->data, len, false);
+ 16, 1, skb->data, len, false);
spi_message_init(&m);
@@ -682,7 +672,7 @@ static int trf7970a_transmit(struct trf7970a *trf, struct sk_buff *skb,
ret = spi_sync(trf->spi, &m);
if (ret) {
dev_err(trf->dev, "%s - Can't send tx data: %d\n", __func__,
- ret);
+ ret);
return ret;
}
@@ -706,7 +696,7 @@ static int trf7970a_transmit(struct trf7970a *trf, struct sk_buff *skb,
}
dev_dbg(trf->dev, "Setting timeout for %d ms, state: %d\n", timeout,
- trf->state);
+ trf->state);
schedule_delayed_work(&trf->timeout_work, msecs_to_jiffies(timeout));
@@ -774,9 +764,9 @@ static void trf7970a_drain_fifo(struct trf7970a *trf, u8 status)
if (fifo_bytes > skb_tailroom(skb)) {
skb = skb_copy_expand(skb, skb_headroom(skb),
- max_t(int, fifo_bytes,
- TRF7970A_RX_SKB_ALLOC_SIZE),
- GFP_KERNEL);
+ max_t(int, fifo_bytes,
+ TRF7970A_RX_SKB_ALLOC_SIZE),
+ GFP_KERNEL);
if (!skb) {
trf7970a_send_err_upstream(trf, -ENOMEM);
return;
@@ -787,7 +777,7 @@ static void trf7970a_drain_fifo(struct trf7970a *trf, u8 status)
}
ret = trf7970a_read_cont(trf, TRF7970A_FIFO_IO_REGISTER,
- skb_put(skb, fifo_bytes), fifo_bytes);
+ skb_put(skb, fifo_bytes), fifo_bytes);
if (ret) {
trf7970a_send_err_upstream(trf, ret);
return;
@@ -795,8 +785,7 @@ static void trf7970a_drain_fifo(struct trf7970a *trf, u8 status)
/* If received Type 2 ACK/NACK, shift right 4 bits and pass up */
if ((trf->framing == NFC_DIGITAL_FRAMING_NFCA_T2T) && (skb->len == 1) &&
- (trf->special_fcn_reg1 ==
- TRF7970A_SPECIAL_FCN_REG1_4_BIT_RX)) {
+ (trf->special_fcn_reg1 == TRF7970A_SPECIAL_FCN_REG1_4_BIT_RX)) {
skb->data[0] >>= 4;
status = TRF7970A_IRQ_STATUS_SRX;
} else {
@@ -819,16 +808,16 @@ static void trf7970a_drain_fifo(struct trf7970a *trf, u8 status)
}
no_rx_data:
- if (status == TRF7970A_IRQ_STATUS_SRX) { /* Receive complete */
+ if (status == TRF7970A_IRQ_STATUS_SRX) { /* Receive complete */
trf7970a_send_upstream(trf);
return;
}
dev_dbg(trf->dev, "Setting timeout for %d ms\n",
- TRF7970A_WAIT_FOR_RX_DATA_TIMEOUT);
+ TRF7970A_WAIT_FOR_RX_DATA_TIMEOUT);
schedule_delayed_work(&trf->timeout_work,
- msecs_to_jiffies(TRF7970A_WAIT_FOR_RX_DATA_TIMEOUT));
+ msecs_to_jiffies(TRF7970A_WAIT_FOR_RX_DATA_TIMEOUT));
}
static irqreturn_t trf7970a_irq(int irq, void *dev_id)
@@ -851,7 +840,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
}
dev_dbg(trf->dev, "IRQ - state: %d, status: 0x%x\n", trf->state,
- status);
+ status);
if (!status) {
mutex_unlock(&trf->lock);
@@ -876,7 +865,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
case TRF7970A_ST_WAIT_FOR_TX_FIFO:
if (status & TRF7970A_IRQ_STATUS_TX) {
trf->ignore_timeout =
- !cancel_delayed_work(&trf->timeout_work);
+ !cancel_delayed_work(&trf->timeout_work);
trf7970a_fill_fifo(trf);
} else {
trf7970a_send_err_upstream(trf, -EIO);
@@ -886,11 +875,11 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
case TRF7970A_ST_WAIT_FOR_RX_DATA_CONT:
if (status & TRF7970A_IRQ_STATUS_SRX) {
trf->ignore_timeout =
- !cancel_delayed_work(&trf->timeout_work);
+ !cancel_delayed_work(&trf->timeout_work);
trf7970a_drain_fifo(trf, status);
} else if (status & TRF7970A_IRQ_STATUS_FIFO) {
ret = trf7970a_read(trf, TRF7970A_FIFO_STATUS,
- &fifo_bytes);
+ &fifo_bytes);
fifo_bytes &= ~TRF7970A_FIFO_STATUS_OVERFLOW;
@@ -899,14 +888,14 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
else if (!fifo_bytes)
trf7970a_cmd(trf, TRF7970A_CMD_FIFO_RESET);
} else if ((status == TRF7970A_IRQ_STATUS_TX) ||
- (!trf->is_initiator &&
- (status == (TRF7970A_IRQ_STATUS_TX |
- TRF7970A_IRQ_STATUS_NFC_RF)))) {
+ (!trf->is_initiator &&
+ (status == (TRF7970A_IRQ_STATUS_TX |
+ TRF7970A_IRQ_STATUS_NFC_RF)))) {
trf7970a_cmd(trf, TRF7970A_CMD_FIFO_RESET);
if (!trf->timeout) {
- trf->ignore_timeout = !cancel_delayed_work(
- &trf->timeout_work);
+ trf->ignore_timeout =
+ !cancel_delayed_work(&trf->timeout_work);
trf->rx_skb = ERR_PTR(0);
trf7970a_send_upstream(trf);
break;
@@ -930,13 +919,13 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
break;
case NFC_DIGITAL_FRAMING_NFCA_ANTICOL_COMPLETE:
ret = trf7970a_write(trf,
- TRF7970A_SPECIAL_FCN_REG1,
- TRF7970A_SPECIAL_FCN_REG1_14_ANTICOLL);
+ TRF7970A_SPECIAL_FCN_REG1,
+ TRF7970A_SPECIAL_FCN_REG1_14_ANTICOLL);
if (ret)
goto err_unlock_exit;
trf->special_fcn_reg1 =
- TRF7970A_SPECIAL_FCN_REG1_14_ANTICOLL;
+ TRF7970A_SPECIAL_FCN_REG1_14_ANTICOLL;
break;
default:
break;
@@ -944,7 +933,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
if (iso_ctrl != trf->iso_ctrl) {
ret = trf7970a_write(trf, TRF7970A_ISO_CTRL,
- iso_ctrl);
+ iso_ctrl);
if (ret)
goto err_unlock_exit;
@@ -961,7 +950,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
case TRF7970A_ST_LISTENING:
if (status & TRF7970A_IRQ_STATUS_SRX) {
trf->ignore_timeout =
- !cancel_delayed_work(&trf->timeout_work);
+ !cancel_delayed_work(&trf->timeout_work);
trf7970a_drain_fifo(trf, status);
} else if (!(status & TRF7970A_IRQ_STATUS_NFC_RF)) {
trf7970a_send_err_upstream(trf, -EIO);
@@ -970,7 +959,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
case TRF7970A_ST_LISTENING_MD:
if (status & TRF7970A_IRQ_STATUS_SRX) {
trf->ignore_timeout =
- !cancel_delayed_work(&trf->timeout_work);
+ !cancel_delayed_work(&trf->timeout_work);
ret = trf7970a_mode_detect(trf, &trf->md_rf_tech);
if (ret) {
@@ -985,7 +974,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
break;
default:
dev_err(trf->dev, "%s - Driver in invalid state: %d\n",
- __func__, trf->state);
+ __func__, trf->state);
}
err_unlock_exit:
@@ -1010,19 +999,19 @@ static void trf7970a_issue_eof(struct trf7970a *trf)
trf->state = TRF7970A_ST_WAIT_FOR_RX_DATA;
dev_dbg(trf->dev, "Setting timeout for %d ms, state: %d\n",
- trf->timeout, trf->state);
+ trf->timeout, trf->state);
schedule_delayed_work(&trf->timeout_work,
- msecs_to_jiffies(trf->timeout));
+ msecs_to_jiffies(trf->timeout));
}
static void trf7970a_timeout_work_handler(struct work_struct *work)
{
struct trf7970a *trf = container_of(work, struct trf7970a,
- timeout_work.work);
+ timeout_work.work);
dev_dbg(trf->dev, "Timeout - state: %d, ignore_timeout: %d\n",
- trf->state, trf->ignore_timeout);
+ trf->state, trf->ignore_timeout);
mutex_lock(&trf->lock);
@@ -1053,7 +1042,7 @@ static int trf7970a_init(struct trf7970a *trf)
goto err_out;
ret = trf7970a_write(trf, TRF7970A_REG_IO_CTRL,
- trf->io_ctrl | TRF7970A_REG_IO_CTRL_VRS(0x1));
+ trf->io_ctrl | TRF7970A_REG_IO_CTRL_VRS(0x1));
if (ret)
goto err_out;
@@ -1066,13 +1055,13 @@ static int trf7970a_init(struct trf7970a *trf)
trf->chip_status_ctrl &= ~TRF7970A_CHIP_STATUS_RF_ON;
ret = trf7970a_write(trf, TRF7970A_MODULATOR_SYS_CLK_CTRL,
- trf->modulator_sys_clk_ctrl);
+ trf->modulator_sys_clk_ctrl);
if (ret)
goto err_out;
ret = trf7970a_write(trf, TRF7970A_ADJUTABLE_FIFO_IRQ_LEVELS,
- TRF7970A_ADJUTABLE_FIFO_IRQ_LEVELS_WLH_96 |
- TRF7970A_ADJUTABLE_FIFO_IRQ_LEVELS_WLL_32);
+ TRF7970A_ADJUTABLE_FIFO_IRQ_LEVELS_WLH_96 |
+ TRF7970A_ADJUTABLE_FIFO_IRQ_LEVELS_WLL_32);
if (ret)
goto err_out;
@@ -1093,7 +1082,7 @@ err_out:
static void trf7970a_switch_rf_off(struct trf7970a *trf)
{
if ((trf->state == TRF7970A_ST_PWR_OFF) ||
- (trf->state == TRF7970A_ST_RF_OFF))
+ (trf->state == TRF7970A_ST_RF_OFF))
return;
dev_dbg(trf->dev, "Switching rf off\n");
@@ -1117,9 +1106,9 @@ static int trf7970a_switch_rf_on(struct trf7970a *trf)
pm_runtime_get_sync(trf->dev);
- if (trf->state != TRF7970A_ST_RF_OFF) { /* Power on, RF off */
+ if (trf->state != TRF7970A_ST_RF_OFF) { /* Power on, RF off */
dev_err(trf->dev, "%s - Incorrect state: %d\n", __func__,
- trf->state);
+ trf->state);
return -EINVAL;
}
@@ -1154,7 +1143,7 @@ static int trf7970a_switch_rf(struct nfc_digital_dev *ddev, bool on)
break;
default:
dev_err(trf->dev, "%s - Invalid request: %d %d\n",
- __func__, trf->state, on);
+ __func__, trf->state, on);
trf7970a_switch_rf_off(trf);
ret = -EINVAL;
}
@@ -1165,7 +1154,7 @@ static int trf7970a_switch_rf(struct nfc_digital_dev *ddev, bool on)
break;
default:
dev_err(trf->dev, "%s - Invalid request: %d %d\n",
- __func__, trf->state, on);
+ __func__, trf->state, on);
ret = -EINVAL;
/* FALLTHROUGH */
case TRF7970A_ST_IDLE:
@@ -1190,36 +1179,36 @@ static int trf7970a_in_config_rf_tech(struct trf7970a *trf, int tech)
case NFC_DIGITAL_RF_TECH_106A:
trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_14443A_106;
trf->modulator_sys_clk_ctrl =
- (trf->modulator_sys_clk_ctrl & 0xf8) |
- TRF7970A_MODULATOR_DEPTH_OOK;
+ (trf->modulator_sys_clk_ctrl & 0xf8) |
+ TRF7970A_MODULATOR_DEPTH_OOK;
trf->guard_time = TRF7970A_GUARD_TIME_NFCA;
break;
case NFC_DIGITAL_RF_TECH_106B:
trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_14443B_106;
trf->modulator_sys_clk_ctrl =
- (trf->modulator_sys_clk_ctrl & 0xf8) |
- TRF7970A_MODULATOR_DEPTH_ASK10;
+ (trf->modulator_sys_clk_ctrl & 0xf8) |
+ TRF7970A_MODULATOR_DEPTH_ASK10;
trf->guard_time = TRF7970A_GUARD_TIME_NFCB;
break;
case NFC_DIGITAL_RF_TECH_212F:
trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_FELICA_212;
trf->modulator_sys_clk_ctrl =
- (trf->modulator_sys_clk_ctrl & 0xf8) |
- TRF7970A_MODULATOR_DEPTH_ASK10;
+ (trf->modulator_sys_clk_ctrl & 0xf8) |
+ TRF7970A_MODULATOR_DEPTH_ASK10;
trf->guard_time = TRF7970A_GUARD_TIME_NFCF;
break;
case NFC_DIGITAL_RF_TECH_424F:
trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_FELICA_424;
trf->modulator_sys_clk_ctrl =
- (trf->modulator_sys_clk_ctrl & 0xf8) |
- TRF7970A_MODULATOR_DEPTH_ASK10;
+ (trf->modulator_sys_clk_ctrl & 0xf8) |
+ TRF7970A_MODULATOR_DEPTH_ASK10;
trf->guard_time = TRF7970A_GUARD_TIME_NFCF;
break;
case NFC_DIGITAL_RF_TECH_ISO15693:
trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_15693_SGL_1OF4_2648;
trf->modulator_sys_clk_ctrl =
- (trf->modulator_sys_clk_ctrl & 0xf8) |
- TRF7970A_MODULATOR_DEPTH_OOK;
+ (trf->modulator_sys_clk_ctrl & 0xf8) |
+ TRF7970A_MODULATOR_DEPTH_OOK;
trf->guard_time = TRF7970A_GUARD_TIME_15693;
break;
default:
@@ -1246,7 +1235,8 @@ static int trf7970a_is_rf_field(struct trf7970a *trf, bool *is_rf_field)
u8 rssi;
ret = trf7970a_write(trf, TRF7970A_CHIP_STATUS_CTRL,
- trf->chip_status_ctrl | TRF7970A_CHIP_STATUS_REC_ON);
+ trf->chip_status_ctrl |
+ TRF7970A_CHIP_STATUS_REC_ON);
if (ret)
return ret;
@@ -1261,7 +1251,7 @@ static int trf7970a_is_rf_field(struct trf7970a *trf, bool *is_rf_field)
return ret;
ret = trf7970a_write(trf, TRF7970A_CHIP_STATUS_CTRL,
- trf->chip_status_ctrl);
+ trf->chip_status_ctrl);
if (ret)
return ret;
@@ -1328,15 +1318,15 @@ static int trf7970a_in_config_framing(struct trf7970a *trf, int framing)
trf->iso_ctrl = iso_ctrl;
ret = trf7970a_write(trf, TRF7970A_MODULATOR_SYS_CLK_CTRL,
- trf->modulator_sys_clk_ctrl);
+ trf->modulator_sys_clk_ctrl);
if (ret)
return ret;
}
if (!(trf->chip_status_ctrl & TRF7970A_CHIP_STATUS_RF_ON)) {
ret = trf7970a_write(trf, TRF7970A_CHIP_STATUS_CTRL,
- trf->chip_status_ctrl |
- TRF7970A_CHIP_STATUS_RF_ON);
+ trf->chip_status_ctrl |
+ TRF7970A_CHIP_STATUS_RF_ON);
if (ret)
return ret;
@@ -1349,7 +1339,7 @@ static int trf7970a_in_config_framing(struct trf7970a *trf, int framing)
}
static int trf7970a_in_configure_hw(struct nfc_digital_dev *ddev, int type,
- int param)
+ int param)
{
struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
int ret;
@@ -1361,7 +1351,7 @@ static int trf7970a_in_configure_hw(struct nfc_digital_dev *ddev, int type,
trf->is_initiator = true;
if ((trf->state == TRF7970A_ST_PWR_OFF) ||
- (trf->state == TRF7970A_ST_RF_OFF)) {
+ (trf->state == TRF7970A_ST_RF_OFF)) {
ret = trf7970a_switch_rf_on(trf);
if (ret)
goto err_unlock;
@@ -1419,7 +1409,7 @@ static int trf7970a_per_cmd_config(struct trf7970a *trf, struct sk_buff *skb)
* has to send an EOF in order to get a response.
*/
if ((trf->technology == NFC_DIGITAL_RF_TECH_106A) &&
- (trf->framing == NFC_DIGITAL_FRAMING_NFCA_T2T)) {
+ (trf->framing == NFC_DIGITAL_FRAMING_NFCA_T2T)) {
if (req[0] == NFC_T2T_CMD_READ)
special_fcn_reg1 = 0;
else
@@ -1427,7 +1417,7 @@ static int trf7970a_per_cmd_config(struct trf7970a *trf, struct sk_buff *skb)
if (special_fcn_reg1 != trf->special_fcn_reg1) {
ret = trf7970a_write(trf, TRF7970A_SPECIAL_FCN_REG1,
- special_fcn_reg1);
+ special_fcn_reg1);
if (ret)
return ret;
@@ -1447,7 +1437,7 @@ static int trf7970a_per_cmd_config(struct trf7970a *trf, struct sk_buff *skb)
iso_ctrl |= TRF7970A_ISO_CTRL_15693_SGL_1OF4_2648;
break;
case (ISO15693_REQ_FLAG_SUB_CARRIER |
- ISO15693_REQ_FLAG_DATA_RATE):
+ ISO15693_REQ_FLAG_DATA_RATE):
iso_ctrl |= TRF7970A_ISO_CTRL_15693_DBL_1OF4_2669;
break;
}
@@ -1460,23 +1450,18 @@ static int trf7970a_per_cmd_config(struct trf7970a *trf, struct sk_buff *skb)
trf->iso_ctrl = iso_ctrl;
}
- if (trf->framing == NFC_DIGITAL_FRAMING_ISO15693_T5T) {
- if (trf7970a_is_iso15693_write_or_lock(req[1]) &&
- (req[0] & ISO15693_REQ_FLAG_OPTION))
- trf->issue_eof = true;
- else if ((trf->quirks &
- TRF7970A_QUIRK_T5T_RMB_EXTRA_BYTE) &&
- (req[1] == ISO15693_CMD_READ_MULTIPLE_BLOCK))
- trf->adjust_resp_len = true;
- }
+ if ((trf->framing == NFC_DIGITAL_FRAMING_ISO15693_T5T) &&
+ trf7970a_is_iso15693_write_or_lock(req[1]) &&
+ (req[0] & ISO15693_REQ_FLAG_OPTION))
+ trf->issue_eof = true;
}
return 0;
}
static int trf7970a_send_cmd(struct nfc_digital_dev *ddev,
- struct sk_buff *skb, u16 timeout,
- nfc_digital_cmd_complete_t cb, void *arg)
+ struct sk_buff *skb, u16 timeout,
+ nfc_digital_cmd_complete_t cb, void *arg)
{
struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
u8 prefix[5];
@@ -1485,7 +1470,7 @@ static int trf7970a_send_cmd(struct nfc_digital_dev *ddev,
u8 status;
dev_dbg(trf->dev, "New request - state: %d, timeout: %d ms, len: %d\n",
- trf->state, timeout, skb->len);
+ trf->state, timeout, skb->len);
if (skb->len > TRF7970A_TX_MAX)
return -EINVAL;
@@ -1493,9 +1478,9 @@ static int trf7970a_send_cmd(struct nfc_digital_dev *ddev,
mutex_lock(&trf->lock);
if ((trf->state != TRF7970A_ST_IDLE) &&
- (trf->state != TRF7970A_ST_IDLE_RX_BLOCKED)) {
+ (trf->state != TRF7970A_ST_IDLE_RX_BLOCKED)) {
dev_err(trf->dev, "%s - Bogus state: %d\n", __func__,
- trf->state);
+ trf->state);
ret = -EIO;
goto out_err;
}
@@ -1509,7 +1494,7 @@ static int trf7970a_send_cmd(struct nfc_digital_dev *ddev,
if (timeout) {
trf->rx_skb = nfc_alloc_recv_skb(TRF7970A_RX_SKB_ALLOC_SIZE,
- GFP_KERNEL);
+ GFP_KERNEL);
if (!trf->rx_skb) {
dev_dbg(trf->dev, "Can't alloc rx_skb\n");
ret = -ENOMEM;
@@ -1546,14 +1531,14 @@ static int trf7970a_send_cmd(struct nfc_digital_dev *ddev,
* That totals 5 bytes.
*/
prefix[0] = TRF7970A_CMD_BIT_CTRL |
- TRF7970A_CMD_BIT_OPCODE(TRF7970A_CMD_FIFO_RESET);
+ TRF7970A_CMD_BIT_OPCODE(TRF7970A_CMD_FIFO_RESET);
prefix[1] = TRF7970A_CMD_BIT_CTRL |
- TRF7970A_CMD_BIT_OPCODE(trf->tx_cmd);
+ TRF7970A_CMD_BIT_OPCODE(trf->tx_cmd);
prefix[2] = TRF7970A_CMD_BIT_CONTINUOUS | TRF7970A_TX_LENGTH_BYTE1;
if (trf->framing == NFC_DIGITAL_FRAMING_NFCA_SHORT) {
prefix[3] = 0x00;
- prefix[4] = 0x0f; /* 7 bits */
+ prefix[4] = 0x0f; /* 7 bits */
} else {
prefix[3] = (len & 0xf00) >> 4;
prefix[3] |= ((len & 0xf0) >> 4);
@@ -1587,25 +1572,24 @@ static int trf7970a_tg_config_rf_tech(struct trf7970a *trf, int tech)
switch (tech) {
case NFC_DIGITAL_RF_TECH_106A:
trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_NFC_NFC_CE_MODE |
- TRF7970A_ISO_CTRL_NFC_CE |
- TRF7970A_ISO_CTRL_NFC_CE_14443A;
+ TRF7970A_ISO_CTRL_NFC_CE | TRF7970A_ISO_CTRL_NFC_CE_14443A;
trf->modulator_sys_clk_ctrl =
- (trf->modulator_sys_clk_ctrl & 0xf8) |
- TRF7970A_MODULATOR_DEPTH_OOK;
+ (trf->modulator_sys_clk_ctrl & 0xf8) |
+ TRF7970A_MODULATOR_DEPTH_OOK;
break;
case NFC_DIGITAL_RF_TECH_212F:
trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_NFC_NFC_CE_MODE |
- TRF7970A_ISO_CTRL_NFC_NFCF_212;
+ TRF7970A_ISO_CTRL_NFC_NFCF_212;
trf->modulator_sys_clk_ctrl =
- (trf->modulator_sys_clk_ctrl & 0xf8) |
- TRF7970A_MODULATOR_DEPTH_ASK10;
+ (trf->modulator_sys_clk_ctrl & 0xf8) |
+ TRF7970A_MODULATOR_DEPTH_ASK10;
break;
case NFC_DIGITAL_RF_TECH_424F:
trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_NFC_NFC_CE_MODE |
- TRF7970A_ISO_CTRL_NFC_NFCF_424;
+ TRF7970A_ISO_CTRL_NFC_NFCF_424;
trf->modulator_sys_clk_ctrl =
- (trf->modulator_sys_clk_ctrl & 0xf8) |
- TRF7970A_MODULATOR_DEPTH_ASK10;
+ (trf->modulator_sys_clk_ctrl & 0xf8) |
+ TRF7970A_MODULATOR_DEPTH_ASK10;
break;
default:
dev_dbg(trf->dev, "Unsupported rf technology: %d\n", tech);
@@ -1622,9 +1606,9 @@ static int trf7970a_tg_config_rf_tech(struct trf7970a *trf, int tech)
* here.
*/
if ((trf->framing == NFC_DIGITAL_FRAMING_NFC_DEP_ACTIVATED) &&
- (trf->iso_ctrl_tech != trf->iso_ctrl)) {
+ (trf->iso_ctrl_tech != trf->iso_ctrl)) {
ret = trf7970a_write(trf, TRF7970A_ISO_CTRL,
- trf->iso_ctrl_tech);
+ trf->iso_ctrl_tech);
trf->iso_ctrl = trf->iso_ctrl_tech;
}
@@ -1679,15 +1663,15 @@ static int trf7970a_tg_config_framing(struct trf7970a *trf, int framing)
trf->iso_ctrl = iso_ctrl;
ret = trf7970a_write(trf, TRF7970A_MODULATOR_SYS_CLK_CTRL,
- trf->modulator_sys_clk_ctrl);
+ trf->modulator_sys_clk_ctrl);
if (ret)
return ret;
}
if (!(trf->chip_status_ctrl & TRF7970A_CHIP_STATUS_RF_ON)) {
ret = trf7970a_write(trf, TRF7970A_CHIP_STATUS_CTRL,
- trf->chip_status_ctrl |
- TRF7970A_CHIP_STATUS_RF_ON);
+ trf->chip_status_ctrl |
+ TRF7970A_CHIP_STATUS_RF_ON);
if (ret)
return ret;
@@ -1698,7 +1682,7 @@ static int trf7970a_tg_config_framing(struct trf7970a *trf, int framing)
}
static int trf7970a_tg_configure_hw(struct nfc_digital_dev *ddev, int type,
- int param)
+ int param)
{
struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
int ret;
@@ -1710,7 +1694,7 @@ static int trf7970a_tg_configure_hw(struct nfc_digital_dev *ddev, int type,
trf->is_initiator = false;
if ((trf->state == TRF7970A_ST_PWR_OFF) ||
- (trf->state == TRF7970A_ST_RF_OFF)) {
+ (trf->state == TRF7970A_ST_RF_OFF)) {
ret = trf7970a_switch_rf_on(trf);
if (ret)
goto err_unlock;
@@ -1734,7 +1718,8 @@ err_unlock:
}
static int _trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout,
- nfc_digital_cmd_complete_t cb, void *arg, bool mode_detect)
+ nfc_digital_cmd_complete_t cb, void *arg,
+ bool mode_detect)
{
struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
int ret;
@@ -1742,9 +1727,9 @@ static int _trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout,
mutex_lock(&trf->lock);
if ((trf->state != TRF7970A_ST_IDLE) &&
- (trf->state != TRF7970A_ST_IDLE_RX_BLOCKED)) {
+ (trf->state != TRF7970A_ST_IDLE_RX_BLOCKED)) {
dev_err(trf->dev, "%s - Bogus state: %d\n", __func__,
- trf->state);
+ trf->state);
ret = -EIO;
goto out_err;
}
@@ -1757,7 +1742,7 @@ static int _trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout,
}
trf->rx_skb = nfc_alloc_recv_skb(TRF7970A_RX_SKB_ALLOC_SIZE,
- GFP_KERNEL);
+ GFP_KERNEL);
if (!trf->rx_skb) {
dev_dbg(trf->dev, "Can't alloc rx_skb\n");
ret = -ENOMEM;
@@ -1765,25 +1750,25 @@ static int _trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout,
}
ret = trf7970a_write(trf, TRF7970A_RX_SPECIAL_SETTINGS,
- TRF7970A_RX_SPECIAL_SETTINGS_HBT |
- TRF7970A_RX_SPECIAL_SETTINGS_M848 |
- TRF7970A_RX_SPECIAL_SETTINGS_C424 |
- TRF7970A_RX_SPECIAL_SETTINGS_C212);
+ TRF7970A_RX_SPECIAL_SETTINGS_HBT |
+ TRF7970A_RX_SPECIAL_SETTINGS_M848 |
+ TRF7970A_RX_SPECIAL_SETTINGS_C424 |
+ TRF7970A_RX_SPECIAL_SETTINGS_C212);
if (ret)
goto out_err;
ret = trf7970a_write(trf, TRF7970A_REG_IO_CTRL,
- trf->io_ctrl | TRF7970A_REG_IO_CTRL_VRS(0x1));
+ trf->io_ctrl | TRF7970A_REG_IO_CTRL_VRS(0x1));
if (ret)
goto out_err;
ret = trf7970a_write(trf, TRF7970A_NFC_LOW_FIELD_LEVEL,
- TRF7970A_NFC_LOW_FIELD_LEVEL_RFDET(0x3));
+ TRF7970A_NFC_LOW_FIELD_LEVEL_RFDET(0x3));
if (ret)
goto out_err;
ret = trf7970a_write(trf, TRF7970A_NFC_TARGET_LEVEL,
- TRF7970A_NFC_TARGET_LEVEL_RFDET(0x7));
+ TRF7970A_NFC_TARGET_LEVEL_RFDET(0x7));
if (ret)
goto out_err;
@@ -1808,32 +1793,33 @@ out_err:
}
static int trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout,
- nfc_digital_cmd_complete_t cb, void *arg)
+ nfc_digital_cmd_complete_t cb, void *arg)
{
struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
dev_dbg(trf->dev, "Listen - state: %d, timeout: %d ms\n",
- trf->state, timeout);
+ trf->state, timeout);
return _trf7970a_tg_listen(ddev, timeout, cb, arg, false);
}
static int trf7970a_tg_listen_md(struct nfc_digital_dev *ddev,
- u16 timeout, nfc_digital_cmd_complete_t cb, void *arg)
+ u16 timeout, nfc_digital_cmd_complete_t cb,
+ void *arg)
{
struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
int ret;
dev_dbg(trf->dev, "Listen MD - state: %d, timeout: %d ms\n",
- trf->state, timeout);
+ trf->state, timeout);
ret = trf7970a_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_RF_TECH,
- NFC_DIGITAL_RF_TECH_106A);
+ NFC_DIGITAL_RF_TECH_106A);
if (ret)
return ret;
ret = trf7970a_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING,
- NFC_DIGITAL_FRAMING_NFCA_NFC_DEP);
+ NFC_DIGITAL_FRAMING_NFCA_NFC_DEP);
if (ret)
return ret;
@@ -1845,7 +1831,7 @@ static int trf7970a_tg_get_rf_tech(struct nfc_digital_dev *ddev, u8 *rf_tech)
struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
dev_dbg(trf->dev, "Get RF Tech - state: %d, rf_tech: %d\n",
- trf->state, trf->md_rf_tech);
+ trf->state, trf->md_rf_tech);
*rf_tech = trf->md_rf_tech;
@@ -1908,14 +1894,13 @@ static int trf7970a_power_up(struct trf7970a *trf)
usleep_range(5000, 6000);
- if (!(trf->quirks & TRF7970A_QUIRK_EN2_MUST_STAY_LOW)) {
- if (gpio_is_valid(trf->en2_gpio)) {
- gpio_set_value(trf->en2_gpio, 1);
- usleep_range(1000, 2000);
- }
+ if (trf->en2_gpiod &&
+ !(trf->quirks & TRF7970A_QUIRK_EN2_MUST_STAY_LOW)) {
+ gpiod_set_value_cansleep(trf->en2_gpiod, 1);
+ usleep_range(1000, 2000);
}
- gpio_set_value(trf->en_gpio, 1);
+ gpiod_set_value_cansleep(trf->en_gpiod, 1);
usleep_range(20000, 21000);
@@ -1935,18 +1920,19 @@ static int trf7970a_power_down(struct trf7970a *trf)
if (trf->state != TRF7970A_ST_RF_OFF) {
dev_dbg(trf->dev, "Can't power down - not RF_OFF state (%d)\n",
- trf->state);
+ trf->state);
return -EBUSY;
}
- gpio_set_value(trf->en_gpio, 0);
- if (gpio_is_valid(trf->en2_gpio))
- gpio_set_value(trf->en2_gpio, 0);
+ gpiod_set_value_cansleep(trf->en_gpiod, 0);
+
+ if (trf->en2_gpiod && !(trf->quirks & TRF7970A_QUIRK_EN2_MUST_STAY_LOW))
+ gpiod_set_value_cansleep(trf->en2_gpiod, 0);
ret = regulator_disable(trf->regulator);
if (ret)
dev_err(trf->dev, "%s - Can't disable VIN: %d\n", __func__,
- ret);
+ ret);
trf->state = TRF7970A_ST_PWR_OFF;
@@ -2003,12 +1989,6 @@ static int trf7970a_get_autosuspend_delay(struct device_node *np)
return autosuspend_delay;
}
-static int trf7970a_get_vin_voltage_override(struct device_node *np,
- u32 *vin_uvolts)
-{
- return of_property_read_u32(np, "vin-voltage-override", vin_uvolts);
-}
-
static int trf7970a_probe(struct spi_device *spi)
{
struct device_node *np = spi->dev.of_node;
@@ -2038,53 +2018,48 @@ static int trf7970a_probe(struct spi_device *spi)
return ret;
}
- if (of_property_read_bool(np, "t5t-rmb-extra-byte-quirk"))
- trf->quirks |= TRF7970A_QUIRK_T5T_RMB_EXTRA_BYTE;
-
if (of_property_read_bool(np, "irq-status-read-quirk"))
trf->quirks |= TRF7970A_QUIRK_IRQ_STATUS_READ;
- /* There are two enable pins - both must be present */
- trf->en_gpio = of_get_named_gpio(np, "ti,enable-gpios", 0);
- if (!gpio_is_valid(trf->en_gpio)) {
+ /* There are two enable pins - only EN must be present in the DT */
+ trf->en_gpiod = devm_gpiod_get_index(trf->dev, "ti,enable", 0,
+ GPIOD_OUT_LOW);
+ if (IS_ERR(trf->en_gpiod)) {
dev_err(trf->dev, "No EN GPIO property\n");
- return trf->en_gpio;
+ return PTR_ERR(trf->en_gpiod);
}
- ret = devm_gpio_request_one(trf->dev, trf->en_gpio,
- GPIOF_DIR_OUT | GPIOF_INIT_LOW, "trf7970a EN");
- if (ret) {
- dev_err(trf->dev, "Can't request EN GPIO: %d\n", ret);
- return ret;
- }
-
- trf->en2_gpio = of_get_named_gpio(np, "ti,enable-gpios", 1);
- if (!gpio_is_valid(trf->en2_gpio)) {
+ trf->en2_gpiod = devm_gpiod_get_index_optional(trf->dev, "ti,enable", 1,
+ GPIOD_OUT_LOW);
+ if (!trf->en2_gpiod) {
dev_info(trf->dev, "No EN2 GPIO property\n");
- } else {
- ret = devm_gpio_request_one(trf->dev, trf->en2_gpio,
- GPIOF_DIR_OUT | GPIOF_INIT_LOW, "trf7970a EN2");
- if (ret) {
- dev_err(trf->dev, "Can't request EN2 GPIO: %d\n", ret);
- return ret;
- }
+ } else if (IS_ERR(trf->en2_gpiod)) {
+ dev_err(trf->dev, "Error getting EN2 GPIO property: %ld\n",
+ PTR_ERR(trf->en2_gpiod));
+ return PTR_ERR(trf->en2_gpiod);
+ } else if (of_property_read_bool(np, "en2-rf-quirk")) {
+ trf->quirks |= TRF7970A_QUIRK_EN2_MUST_STAY_LOW;
}
of_property_read_u32(np, "clock-frequency", &clk_freq);
- if ((clk_freq != TRF7970A_27MHZ_CLOCK_FREQUENCY) ||
- (clk_freq != TRF7970A_13MHZ_CLOCK_FREQUENCY)) {
+ if ((clk_freq != TRF7970A_27MHZ_CLOCK_FREQUENCY) &&
+ (clk_freq != TRF7970A_13MHZ_CLOCK_FREQUENCY)) {
dev_err(trf->dev,
- "clock-frequency (%u Hz) unsupported\n",
- clk_freq);
+ "clock-frequency (%u Hz) unsupported\n", clk_freq);
return -EINVAL;
}
- if (of_property_read_bool(np, "en2-rf-quirk"))
- trf->quirks |= TRF7970A_QUIRK_EN2_MUST_STAY_LOW;
+ if (clk_freq == TRF7970A_27MHZ_CLOCK_FREQUENCY) {
+ trf->modulator_sys_clk_ctrl = TRF7970A_MODULATOR_27MHZ;
+ dev_dbg(trf->dev, "trf7970a configured for 27MHz crystal\n");
+ } else {
+ trf->modulator_sys_clk_ctrl = 0;
+ }
ret = devm_request_threaded_irq(trf->dev, spi->irq, NULL,
- trf7970a_irq, IRQF_TRIGGER_RISING | IRQF_ONESHOT,
- "trf7970a", trf);
+ trf7970a_irq,
+ IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+ "trf7970a", trf);
if (ret) {
dev_err(trf->dev, "Can't request IRQ#%d: %d\n", spi->irq, ret);
return ret;
@@ -2106,10 +2081,7 @@ static int trf7970a_probe(struct spi_device *spi)
goto err_destroy_lock;
}
- ret = trf7970a_get_vin_voltage_override(np, &uvolts);
- if (ret)
- uvolts = regulator_get_voltage(trf->regulator);
-
+ uvolts = regulator_get_voltage(trf->regulator);
if (uvolts > 4000000)
trf->chip_status_ctrl = TRF7970A_CHIP_STATUS_VRS5_3;
@@ -2132,9 +2104,10 @@ static int trf7970a_probe(struct spi_device *spi)
}
trf->ddev = nfc_digital_allocate_device(&trf7970a_nfc_ops,
- TRF7970A_SUPPORTED_PROTOCOLS,
- NFC_DIGITAL_DRV_CAPS_IN_CRC |
- NFC_DIGITAL_DRV_CAPS_TG_CRC, 0, 0);
+ TRF7970A_SUPPORTED_PROTOCOLS,
+ NFC_DIGITAL_DRV_CAPS_IN_CRC |
+ NFC_DIGITAL_DRV_CAPS_TG_CRC, 0,
+ 0);
if (!trf->ddev) {
dev_err(trf->dev, "Can't allocate NFC digital device\n");
ret = -ENOMEM;
@@ -2157,7 +2130,7 @@ static int trf7970a_probe(struct spi_device *spi)
ret = nfc_digital_register_device(trf->ddev);
if (ret) {
dev_err(trf->dev, "Can't register NFC digital device: %d\n",
- ret);
+ ret);
goto err_shutdown;
}
@@ -2266,29 +2239,31 @@ static int trf7970a_pm_runtime_resume(struct device *dev)
static const struct dev_pm_ops trf7970a_pm_ops = {
SET_SYSTEM_SLEEP_PM_OPS(trf7970a_suspend, trf7970a_resume)
SET_RUNTIME_PM_OPS(trf7970a_pm_runtime_suspend,
- trf7970a_pm_runtime_resume, NULL)
+ trf7970a_pm_runtime_resume, NULL)
};
static const struct of_device_id trf7970a_of_match[] = {
- { .compatible = "ti,trf7970a", },
- { /* sentinel */ },
+ {.compatible = "ti,trf7970a",},
+ {},
};
+
MODULE_DEVICE_TABLE(of, trf7970a_of_match);
static const struct spi_device_id trf7970a_id_table[] = {
- { "trf7970a", 0 },
- { }
+ {"trf7970a", 0},
+ {}
};
+
MODULE_DEVICE_TABLE(spi, trf7970a_id_table);
static struct spi_driver trf7970a_spi_driver = {
.probe = trf7970a_probe,
.remove = trf7970a_remove,
.id_table = trf7970a_id_table,
- .driver = {
- .name = "trf7970a",
- .of_match_table = of_match_ptr(trf7970a_of_match),
- .pm = &trf7970a_pm_ops,
+ .driver = {
+ .name = "trf7970a",
+ .of_match_table = of_match_ptr(trf7970a_of_match),
+ .pm = &trf7970a_pm_ops,
},
};
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 951042a375d6..40c7581caeb0 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1805,7 +1805,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
if (pci_is_enabled(pdev)) {
u32 csts = readl(dev->bar + NVME_REG_CSTS);
- if (dev->ctrl.state == NVME_CTRL_LIVE)
+ if (dev->ctrl.state == NVME_CTRL_LIVE ||
+ dev->ctrl.state == NVME_CTRL_RESETTING)
nvme_start_freeze(&dev->ctrl);
dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
pdev->error_state != pci_channel_io_normal);
diff --git a/drivers/s390/net/ctcm_fsms.c b/drivers/s390/net/ctcm_fsms.c
index e9847ce3860d..570ae3b7adf6 100644
--- a/drivers/s390/net/ctcm_fsms.c
+++ b/drivers/s390/net/ctcm_fsms.c
@@ -217,7 +217,7 @@ void ctcm_purge_skb_queue(struct sk_buff_head *q)
CTCM_DBF_TEXT(TRACE, CTC_DBF_DEBUG, __func__);
while ((skb = skb_dequeue(q))) {
- atomic_dec(&skb->users);
+ refcount_dec(&skb->users);
dev_kfree_skb_any(skb);
}
}
@@ -271,7 +271,7 @@ static void chx_txdone(fsm_instance *fi, int event, void *arg)
priv->stats.tx_bytes += 2;
first = 0;
}
- atomic_dec(&skb->users);
+ refcount_dec(&skb->users);
dev_kfree_skb_irq(skb);
}
spin_lock(&ch->collect_lock);
@@ -297,7 +297,7 @@ static void chx_txdone(fsm_instance *fi, int event, void *arg)
skb_put(ch->trans_skb, skb->len), skb->len);
priv->stats.tx_packets++;
priv->stats.tx_bytes += skb->len - LL_HEADER_LENGTH;
- atomic_dec(&skb->users);
+ refcount_dec(&skb->users);
dev_kfree_skb_irq(skb);
i++;
}
@@ -1248,7 +1248,7 @@ static void ctcmpc_chx_txdone(fsm_instance *fi, int event, void *arg)
priv->stats.tx_bytes += 2;
first = 0;
}
- atomic_dec(&skb->users);
+ refcount_dec(&skb->users);
dev_kfree_skb_irq(skb);
}
spin_lock(&ch->collect_lock);
@@ -1298,7 +1298,7 @@ static void ctcmpc_chx_txdone(fsm_instance *fi, int event, void *arg)
data_space -= skb->len;
priv->stats.tx_packets++;
priv->stats.tx_bytes += skb->len;
- atomic_dec(&skb->users);
+ refcount_dec(&skb->users);
dev_kfree_skb_any(skb);
peekskb = skb_peek(&ch->collect_queue);
if (peekskb->len > data_space)
@@ -1795,7 +1795,7 @@ static void ctcmpc_chx_send_sweep(fsm_instance *fsm, int event, void *arg)
fsm_event(grp->fsm, MPCG_EVENT_INOP, dev);
goto done;
} else {
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
skb_queue_tail(&wch->io_queue, skb);
}
diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c
index 99121352c57b..e8782a8619f7 100644
--- a/drivers/s390/net/ctcm_main.c
+++ b/drivers/s390/net/ctcm_main.c
@@ -483,7 +483,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
spin_unlock_irqrestore(&ch->collect_lock, saveflags);
return -EBUSY;
} else {
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
header.length = l;
header.type = be16_to_cpu(skb->protocol);
header.unused = 0;
@@ -500,7 +500,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
* Protect skb against beeing free'd by upper
* layers.
*/
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
ch->prof.txlen += skb->len;
header.length = skb->len + LL_HEADER_LENGTH;
header.type = be16_to_cpu(skb->protocol);
@@ -517,14 +517,14 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
if (hi) {
nskb = alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA);
if (!nskb) {
- atomic_dec(&skb->users);
+ refcount_dec(&skb->users);
skb_pull(skb, LL_HEADER_LENGTH + 2);
ctcm_clear_busy(ch->netdev);
return -ENOMEM;
} else {
skb_put_data(nskb, skb->data, skb->len);
- atomic_inc(&nskb->users);
- atomic_dec(&skb->users);
+ refcount_inc(&nskb->users);
+ refcount_dec(&skb->users);
dev_kfree_skb_irq(skb);
skb = nskb;
}
@@ -542,7 +542,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
* Remove our header. It gets added
* again on retransmit.
*/
- atomic_dec(&skb->users);
+ refcount_dec(&skb->users);
skb_pull(skb, LL_HEADER_LENGTH + 2);
ctcm_clear_busy(ch->netdev);
return -ENOMEM;
@@ -553,7 +553,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
ch->ccw[1].count = skb->len;
skb_copy_from_linear_data(skb,
skb_put(ch->trans_skb, skb->len), skb->len);
- atomic_dec(&skb->users);
+ refcount_dec(&skb->users);
dev_kfree_skb_irq(skb);
ccw_idx = 0;
} else {
@@ -679,7 +679,7 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb)
if ((fsm_getstate(ch->fsm) != CTC_STATE_TXIDLE) || grp->in_sweep) {
spin_lock_irqsave(&ch->collect_lock, saveflags);
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
p_header = kmalloc(PDU_HEADER_LENGTH, gfp_type());
if (!p_header) {
@@ -716,7 +716,7 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb)
* Protect skb against beeing free'd by upper
* layers.
*/
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
/*
* IDAL support in CTCM is broken, so we have to
@@ -729,8 +729,8 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb)
goto nomem_exit;
} else {
skb_put_data(nskb, skb->data, skb->len);
- atomic_inc(&nskb->users);
- atomic_dec(&skb->users);
+ refcount_inc(&nskb->users);
+ refcount_dec(&skb->users);
dev_kfree_skb_irq(skb);
skb = nskb;
}
@@ -810,7 +810,7 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb)
ch->trans_skb->len = 0;
ch->ccw[1].count = skb->len;
skb_put_data(ch->trans_skb, skb->data, skb->len);
- atomic_dec(&skb->users);
+ refcount_dec(&skb->users);
dev_kfree_skb_irq(skb);
ccw_idx = 0;
CTCM_PR_DBGDATA("%s(%s): trans_skb len: %04x\n"
@@ -855,7 +855,7 @@ nomem_exit:
"%s(%s): MEMORY allocation ERROR\n",
CTCM_FUNTAIL, ch->id);
rc = -ENOMEM;
- atomic_dec(&skb->users);
+ refcount_dec(&skb->users);
dev_kfree_skb_any(skb);
fsm_event(priv->mpcg->fsm, MPCG_EVENT_INOP, dev);
done:
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 7db427c0a6a4..1579695f4e64 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -743,7 +743,7 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg)
conn->prof.tx_pending--;
if (single_flag) {
if ((skb = skb_dequeue(&conn->commit_queue))) {
- atomic_dec(&skb->users);
+ refcount_dec(&skb->users);
if (privptr) {
privptr->stats.tx_packets++;
privptr->stats.tx_bytes +=
@@ -766,7 +766,7 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg)
txbytes += skb->len;
txpackets++;
stat_maxcq++;
- atomic_dec(&skb->users);
+ refcount_dec(&skb->users);
dev_kfree_skb_any(skb);
}
if (conn->collect_len > conn->prof.maxmulti)
@@ -958,7 +958,7 @@ static void netiucv_purge_skb_queue(struct sk_buff_head *q)
struct sk_buff *skb;
while ((skb = skb_dequeue(q))) {
- atomic_dec(&skb->users);
+ refcount_dec(&skb->users);
dev_kfree_skb_any(skb);
}
}
@@ -1176,7 +1176,7 @@ static int netiucv_transmit_skb(struct iucv_connection *conn,
IUCV_DBF_TEXT(data, 2,
"EBUSY from netiucv_transmit_skb\n");
} else {
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
skb_queue_tail(&conn->collect_queue, skb);
conn->collect_len += l;
rc = 0;
@@ -1245,7 +1245,7 @@ static int netiucv_transmit_skb(struct iucv_connection *conn,
} else {
if (copied)
dev_kfree_skb(skb);
- atomic_inc(&nskb->users);
+ refcount_inc(&nskb->users);
skb_queue_tail(&conn->commit_queue, nskb);
}
}
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 3b657d5b7e49..aec06e10b969 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1242,7 +1242,7 @@ static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf)
iucv->sk_txnotify(skb, TX_NOTIFY_GENERALERROR);
}
}
- atomic_dec(&skb->users);
+ refcount_dec(&skb->users);
dev_kfree_skb_any(skb);
skb = skb_dequeue(&buf->skb_list);
}
@@ -3975,7 +3975,7 @@ static inline int qeth_fill_buffer(struct qeth_qdio_out_q *queue,
int flush_cnt = 0, hdr_len, large_send = 0;
buffer = buf->buffer;
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
skb_queue_tail(&buf->skb_list, skb);
/*check first on TSO ....*/
diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index 2ee92aa90fe9..e937490d5d97 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -870,7 +870,6 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
QEDI_ERR(&qedi->dbg_ctx,
"Delayed or untracked cleanup response, itt=0x%x, tid=0x%x, cid=0x%x, task=%p\n",
protoitt, cqe->itid, qedi_conn->iscsi_conn_id, task);
- WARN_ON(1);
}
}
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index f46880315ba8..5f5a4ef2e529 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -1499,11 +1499,9 @@ err_idx:
void qedi_clear_task_idx(struct qedi_ctx *qedi, int idx)
{
- if (!test_and_clear_bit(idx, qedi->task_idx_map)) {
+ if (!test_and_clear_bit(idx, qedi->task_idx_map))
QEDI_ERR(&qedi->dbg_ctx,
"FW task context, already cleared, tid=0x%x\n", idx);
- WARN_ON(1);
- }
}
void qedi_update_itt_map(struct qedi_ctx *qedi, u32 tid, u32 proto_itt,
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 0d8f81591bed..3fdca2cdd8da 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -1279,6 +1279,18 @@ iscsit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr,
*/
if (dump_payload)
goto after_immediate_data;
+ /*
+ * Check for underflow case where both EDTL and immediate data payload
+ * exceeds what is presented by CDB's TRANSFER LENGTH, and what has
+ * already been set in target_cmd_size_check() as se_cmd->data_length.
+ *
+ * For this special case, fail the command and dump the immediate data
+ * payload.
+ */
+ if (cmd->first_burst_len > cmd->se_cmd.data_length) {
+ cmd->sense_reason = TCM_INVALID_CDB_FIELD;
+ goto after_immediate_data;
+ }
immed_ret = iscsit_handle_immediate_data(cmd, hdr,
cmd->first_burst_len);
@@ -4423,8 +4435,11 @@ static void iscsit_logout_post_handler_closesession(
* always sleep waiting for RX/TX thread shutdown to complete
* within iscsit_close_connection().
*/
- if (!conn->conn_transport->rdma_shutdown)
+ if (!conn->conn_transport->rdma_shutdown) {
sleep = cmpxchg(&conn->tx_thread_active, true, false);
+ if (!sleep)
+ return;
+ }
atomic_set(&conn->conn_logout_remove, 0);
complete(&conn->conn_logout_comp);
@@ -4440,8 +4455,11 @@ static void iscsit_logout_post_handler_samecid(
{
int sleep = 1;
- if (!conn->conn_transport->rdma_shutdown)
+ if (!conn->conn_transport->rdma_shutdown) {
sleep = cmpxchg(&conn->tx_thread_active, true, false);
+ if (!sleep)
+ return;
+ }
atomic_set(&conn->conn_logout_remove, 0);
complete(&conn->conn_logout_comp);
diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index 9ab7090f7c83..0912de7c0cf8 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -136,7 +136,7 @@ int init_se_kmem_caches(void);
void release_se_kmem_caches(void);
u32 scsi_get_new_index(scsi_index_t);
void transport_subsystem_check_init(void);
-void transport_cmd_finish_abort(struct se_cmd *, int);
+int transport_cmd_finish_abort(struct se_cmd *, int);
unsigned char *transport_dump_cmd_direction(struct se_cmd *);
void transport_dump_dev_state(struct se_device *, char *, int *);
void transport_dump_dev_info(struct se_device *, struct se_lun *,
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index dce1e1b47316..13f47bf4d16b 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -75,7 +75,7 @@ void core_tmr_release_req(struct se_tmr_req *tmr)
kfree(tmr);
}
-static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
+static int core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
{
unsigned long flags;
bool remove = true, send_tas;
@@ -91,7 +91,7 @@ static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
transport_send_task_abort(cmd);
}
- transport_cmd_finish_abort(cmd, remove);
+ return transport_cmd_finish_abort(cmd, remove);
}
static int target_check_cdb_and_preempt(struct list_head *list,
@@ -184,8 +184,8 @@ void core_tmr_abort_task(
cancel_work_sync(&se_cmd->work);
transport_wait_for_tasks(se_cmd);
- transport_cmd_finish_abort(se_cmd, true);
- target_put_sess_cmd(se_cmd);
+ if (!transport_cmd_finish_abort(se_cmd, true))
+ target_put_sess_cmd(se_cmd);
printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for"
" ref_tag: %llu\n", ref_tag);
@@ -281,8 +281,8 @@ static void core_tmr_drain_tmr_list(
cancel_work_sync(&cmd->work);
transport_wait_for_tasks(cmd);
- transport_cmd_finish_abort(cmd, 1);
- target_put_sess_cmd(cmd);
+ if (!transport_cmd_finish_abort(cmd, 1))
+ target_put_sess_cmd(cmd);
}
}
@@ -380,8 +380,8 @@ static void core_tmr_drain_state_list(
cancel_work_sync(&cmd->work);
transport_wait_for_tasks(cmd);
- core_tmr_handle_tas_abort(cmd, tas);
- target_put_sess_cmd(cmd);
+ if (!core_tmr_handle_tas_abort(cmd, tas))
+ target_put_sess_cmd(cmd);
}
}
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 6025935036c9..f1b3a46bdcaf 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -651,9 +651,10 @@ static void transport_lun_remove_cmd(struct se_cmd *cmd)
percpu_ref_put(&lun->lun_ref);
}
-void transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
+int transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
{
bool ack_kref = (cmd->se_cmd_flags & SCF_ACK_KREF);
+ int ret = 0;
if (cmd->se_cmd_flags & SCF_SE_LUN_CMD)
transport_lun_remove_cmd(cmd);
@@ -665,9 +666,11 @@ void transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
cmd->se_tfo->aborted_task(cmd);
if (transport_cmd_check_stop_to_fabric(cmd))
- return;
+ return 1;
if (remove && ack_kref)
- transport_put_cmd(cmd);
+ ret = transport_put_cmd(cmd);
+
+ return ret;
}
static void target_complete_failure_work(struct work_struct *work)
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 734cbf8d9676..dd9f1bebb5a3 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -344,7 +344,7 @@ static int autofs_dev_ioctl_fail(struct file *fp,
int status;
token = (autofs_wqt_t) param->fail.token;
- status = param->fail.status ? param->fail.status : -ENOENT;
+ status = param->fail.status < 0 ? param->fail.status : -ENOENT;
return autofs4_wait_release(sbi, token, status);
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 519599dddd36..0a7404ef9335 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -263,7 +263,10 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
kfree(vecs);
if (unlikely(bio.bi_error))
- return bio.bi_error;
+ ret = bio.bi_error;
+
+ bio_uninit(&bio);
+
return ret;
}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0fd081bd2a2f..fcef70602b27 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3271,7 +3271,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
if (!is_sync_kiocb(iocb))
ctx->iocb = iocb;
- if (to->type & ITER_IOVEC)
+ if (to->type == ITER_IOVEC)
ctx->should_dirty = true;
rc = setup_aio_ctx_iter(ctx, to, READ);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index b08531977daa..3b147dc6af63 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -810,7 +810,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
if (!pages) {
pages = vmalloc(max_pages * sizeof(struct page *));
- if (!bv) {
+ if (!pages) {
kvfree(bv);
return -ENOMEM;
}
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 27bc360c7ffd..a723df3e0197 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -849,8 +849,13 @@ cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid *fid, __u16 search_flags,
struct cifs_search_info *srch_inf)
{
- return CIFSFindFirst(xid, tcon, path, cifs_sb,
- &fid->netfid, search_flags, srch_inf, true);
+ int rc;
+
+ rc = CIFSFindFirst(xid, tcon, path, cifs_sb,
+ &fid->netfid, search_flags, srch_inf, true);
+ if (rc)
+ cifs_dbg(FYI, "find first failed=%d\n", rc);
+ return rc;
}
static int
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index c58691834eb2..7e48561abd29 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -982,7 +982,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
kfree(utf16_path);
if (rc) {
- cifs_dbg(VFS, "open dir failed\n");
+ cifs_dbg(FYI, "open dir failed rc=%d\n", rc);
return rc;
}
@@ -992,7 +992,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_query_directory(xid, tcon, fid->persistent_fid,
fid->volatile_fid, 0, srch_inf);
if (rc) {
- cifs_dbg(VFS, "query directory failed\n");
+ cifs_dbg(FYI, "query directory failed rc=%d\n", rc);
SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
}
return rc;
@@ -1809,7 +1809,8 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
sg = init_sg(rqst, sign);
if (!sg) {
- cifs_dbg(VFS, "%s: Failed to init sg %d", __func__, rc);
+ cifs_dbg(VFS, "%s: Failed to init sg", __func__);
+ rc = -ENOMEM;
goto free_req;
}
@@ -1817,6 +1818,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
iv = kzalloc(iv_len, GFP_KERNEL);
if (!iv) {
cifs_dbg(VFS, "%s: Failed to alloc IV", __func__);
+ rc = -ENOMEM;
goto free_sg;
}
iv[0] = 3;
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 3cb5c9e2d4e7..de50e749ff05 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -188,8 +188,6 @@ static int cifs_creation_time_get(struct dentry *dentry, struct inode *inode,
pcreatetime = (__u64 *)value;
*pcreatetime = CIFS_I(inode)->createtime;
return sizeof(__u64);
-
- return rc;
}
diff --git a/fs/dax.c b/fs/dax.c
index 2a6889b3585f..9187f3b07f3e 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -859,6 +859,7 @@ int dax_writeback_mapping_range(struct address_space *mapping,
if (ret < 0)
goto out;
}
+ start_index = indices[pvec.nr - 1] + 1;
}
out:
put_dax(dax_dev);
diff --git a/fs/exec.c b/fs/exec.c
index 72934df68471..904199086490 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -220,8 +220,26 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
if (write) {
unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
+ unsigned long ptr_size;
struct rlimit *rlim;
+ /*
+ * Since the stack will hold pointers to the strings, we
+ * must account for them as well.
+ *
+ * The size calculation is the entire vma while each arg page is
+ * built, so each time we get here it's calculating how far it
+ * is currently (rather than each call being just the newly
+ * added size from the arg page). As a result, we need to
+ * always add the entire size of the pointers, so that on the
+ * last call to get_arg_page() we'll actually have the entire
+ * correct size.
+ */
+ ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
+ if (ptr_size > ULONG_MAX - size)
+ goto fail;
+ size += ptr_size;
+
acct_arg_size(bprm, size / PAGE_SIZE);
/*
@@ -239,13 +257,15 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
* to work from.
*/
rlim = current->signal->rlim;
- if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) {
- put_page(page);
- return NULL;
- }
+ if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4)
+ goto fail;
}
return page;
+
+fail:
+ put_page(page);
+ return NULL;
}
static void put_arg_page(struct page *page)
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index c14758e08d73..390ac9c39c59 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -753,7 +753,6 @@ static void nfs4_callback_free_slot(struct nfs4_session *session,
* A single slot, so highest used slotid is either 0 or -1
*/
nfs4_free_slot(tbl, slot);
- nfs4_slot_tbl_drain_complete(tbl);
spin_unlock(&tbl->slot_tbl_lock);
}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 32ccd7754f8a..2ac00bf4ecf1 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1946,29 +1946,6 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
}
EXPORT_SYMBOL_GPL(nfs_link);
-static void
-nfs_complete_rename(struct rpc_task *task, struct nfs_renamedata *data)
-{
- struct dentry *old_dentry = data->old_dentry;
- struct dentry *new_dentry = data->new_dentry;
- struct inode *old_inode = d_inode(old_dentry);
- struct inode *new_inode = d_inode(new_dentry);
-
- nfs_mark_for_revalidate(old_inode);
-
- switch (task->tk_status) {
- case 0:
- if (new_inode != NULL)
- nfs_drop_nlink(new_inode);
- d_move(old_dentry, new_dentry);
- nfs_set_verifier(new_dentry,
- nfs_save_change_attribute(data->new_dir));
- break;
- case -ENOENT:
- nfs_dentry_handle_enoent(old_dentry);
- }
-}
-
/*
* RENAME
* FIXME: Some nfsds, like the Linux user space nfsd, may generate a
@@ -1999,7 +1976,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
{
struct inode *old_inode = d_inode(old_dentry);
struct inode *new_inode = d_inode(new_dentry);
- struct dentry *dentry = NULL;
+ struct dentry *dentry = NULL, *rehash = NULL;
struct rpc_task *task;
int error = -EBUSY;
@@ -2022,8 +1999,10 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
* To prevent any new references to the target during the
* rename, we unhash the dentry in advance.
*/
- if (!d_unhashed(new_dentry))
+ if (!d_unhashed(new_dentry)) {
d_drop(new_dentry);
+ rehash = new_dentry;
+ }
if (d_count(new_dentry) > 2) {
int err;
@@ -2040,6 +2019,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out;
new_dentry = dentry;
+ rehash = NULL;
new_inode = NULL;
}
}
@@ -2048,8 +2028,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_inode != NULL)
NFS_PROTO(new_inode)->return_delegation(new_inode);
- task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
- nfs_complete_rename);
+ task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
if (IS_ERR(task)) {
error = PTR_ERR(task);
goto out;
@@ -2059,9 +2038,27 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (error == 0)
error = task->tk_status;
rpc_put_task(task);
+ nfs_mark_for_revalidate(old_inode);
out:
+ if (rehash)
+ d_rehash(rehash);
trace_nfs_rename_exit(old_dir, old_dentry,
new_dir, new_dentry, error);
+ if (!error) {
+ if (new_inode != NULL)
+ nfs_drop_nlink(new_inode);
+ /*
+ * The d_move() should be here instead of in an async RPC completion
+ * handler because we need the proper locks to move the dentry. If
+ * we're interrupted by a signal, the async RPC completion handler
+ * should mark the directories for revalidation.
+ */
+ d_move(old_dentry, new_dentry);
+ nfs_set_verifier(new_dentry,
+ nfs_save_change_attribute(new_dir));
+ } else if (error == -ENOENT)
+ nfs_dentry_handle_enoent(old_dentry);
+
/* new dentry created? */
if (dentry)
dput(dentry);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c08c46a3b8cd..dbfa18900e25 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2589,7 +2589,8 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata,
/* Except MODE, it seems harmless of setting twice. */
if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE &&
- attrset[1] & FATTR4_WORD1_MODE)
+ (attrset[1] & FATTR4_WORD1_MODE ||
+ attrset[2] & FATTR4_WORD2_MODE_UMASK))
sattr->ia_valid &= ~ATTR_MODE;
if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL)
@@ -8416,6 +8417,7 @@ static void nfs4_layoutget_release(void *calldata)
size_t max_pages = max_response_pages(server);
dprintk("--> %s\n", __func__);
+ nfs4_sequence_free_slot(&lgp->res.seq_res);
nfs4_free_pages(lgp->args.layout.pages, max_pages);
pnfs_put_layout_hdr(NFS_I(inode)->layout);
put_nfs_open_context(lgp->args.ctx);
@@ -8490,7 +8492,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)
/* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
if (status == 0 && lgp->res.layoutp->len)
lseg = pnfs_layout_process(lgp);
- nfs4_sequence_free_slot(&lgp->res.seq_res);
rpc_put_task(task);
dprintk("<-- %s status=%d\n", __func__, status);
if (status)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index b34de036501b..cbf82b0d4467 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2134,6 +2134,8 @@ again:
put_rpccred(cred);
switch (status) {
case 0:
+ case -EINTR:
+ case -ERESTARTSYS:
break;
case -ETIMEDOUT:
if (clnt->cl_softrtry)
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 3b7c937a36b5..4689940a953c 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2591,6 +2591,10 @@ void ocfs2_inode_unlock_tracker(struct inode *inode,
struct ocfs2_lock_res *lockres;
lockres = &OCFS2_I(inode)->ip_inode_lockres;
+ /* had_lock means that the currect process already takes the cluster
+ * lock previously. If had_lock is 1, we have nothing to do here, and
+ * it will get unlocked where we got the lock.
+ */
if (!had_lock) {
ocfs2_remove_holder(lockres, oh);
ocfs2_inode_unlock(inode, ex);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3c5384d9b3a5..f70c3778d600 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1328,20 +1328,21 @@ static int ocfs2_xattr_get(struct inode *inode,
void *buffer,
size_t buffer_size)
{
- int ret;
+ int ret, had_lock;
struct buffer_head *di_bh = NULL;
+ struct ocfs2_lock_holder oh;
- ret = ocfs2_inode_lock(inode, &di_bh, 0);
- if (ret < 0) {
- mlog_errno(ret);
- return ret;
+ had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh);
+ if (had_lock < 0) {
+ mlog_errno(had_lock);
+ return had_lock;
}
down_read(&OCFS2_I(inode)->ip_xattr_sem);
ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
name, buffer, buffer_size);
up_read(&OCFS2_I(inode)->ip_xattr_sem);
- ocfs2_inode_unlock(inode, 0);
+ ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
brelse(di_bh);
@@ -3537,11 +3538,12 @@ int ocfs2_xattr_set(struct inode *inode,
{
struct buffer_head *di_bh = NULL;
struct ocfs2_dinode *di;
- int ret, credits, ref_meta = 0, ref_credits = 0;
+ int ret, credits, had_lock, ref_meta = 0, ref_credits = 0;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct inode *tl_inode = osb->osb_tl_inode;
struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, };
struct ocfs2_refcount_tree *ref_tree = NULL;
+ struct ocfs2_lock_holder oh;
struct ocfs2_xattr_info xi = {
.xi_name_index = name_index,
@@ -3572,8 +3574,9 @@ int ocfs2_xattr_set(struct inode *inode,
return -ENOMEM;
}
- ret = ocfs2_inode_lock(inode, &di_bh, 1);
- if (ret < 0) {
+ had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh);
+ if (had_lock < 0) {
+ ret = had_lock;
mlog_errno(ret);
goto cleanup_nolock;
}
@@ -3670,7 +3673,7 @@ cleanup:
if (ret)
mlog_errno(ret);
}
- ocfs2_inode_unlock(inode, 1);
+ ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
cleanup_nolock:
brelse(di_bh);
brelse(xbs.xattr_bh);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 09af0f7cd55e..3b91faacc1ba 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1316,9 +1316,12 @@ xfs_vm_bmap(
* The swap code (ab-)uses ->bmap to get a block mapping and then
* bypasseѕ the file system for actual I/O. We really can't allow
* that on reflinks inodes, so we have to skip out here. And yes,
- * 0 is the magic code for a bmap error..
+ * 0 is the magic code for a bmap error.
+ *
+ * Since we don't pass back blockdev info, we can't return bmap
+ * information for rt files either.
*/
- if (xfs_is_reflink_inode(ip))
+ if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
return 0;
filemap_write_and_wait(mapping);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 197f3fffc9a7..408c7820e200 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -210,7 +210,8 @@ struct acpi_device_flags {
u32 of_compatible_ok:1;
u32 coherent_dma:1;
u32 cca_seen:1;
- u32 reserved:20;
+ u32 spi_i2c_slave:1;
+ u32 reserved:19;
};
/* File System */
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index c1da539f5e28..4d97a89da066 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -254,7 +254,7 @@ static inline void atm_return(struct atm_vcc *vcc,int truesize)
static inline int atm_may_send(struct atm_vcc *vcc,unsigned int size)
{
- return (size + atomic_read(&sk_atm(vcc)->sk_wmem_alloc)) <
+ return (size + refcount_read(&sk_atm(vcc)->sk_wmem_alloc)) <
sk_atm(vcc)->sk_sndbuf;
}
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d1b04b0e99cf..a7e29fa0981f 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -426,6 +426,7 @@ extern void bio_advance(struct bio *, unsigned);
extern void bio_init(struct bio *bio, struct bio_vec *table,
unsigned short max_vecs);
+extern void bio_uninit(struct bio *);
extern void bio_reset(struct bio *);
void bio_chain(struct bio *, struct bio *);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b74a3edcb3da..1ddd36bd2173 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -391,6 +391,8 @@ struct request_queue {
int nr_rqs[2]; /* # allocated [a]sync rqs */
int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
+ atomic_t shared_hctx_restart;
+
struct blk_queue_stats *stats;
struct rq_wb *rq_wb;
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index c970a25d2a49..360c082e885c 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -7,6 +7,7 @@
struct sock;
struct cgroup;
struct sk_buff;
+struct bpf_sock_ops_kern;
#ifdef CONFIG_CGROUP_BPF
@@ -42,6 +43,10 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
int __cgroup_bpf_run_filter_sk(struct sock *sk,
enum bpf_attach_type type);
+int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
+ struct bpf_sock_ops_kern *sock_ops,
+ enum bpf_attach_type type);
+
/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
({ \
@@ -75,6 +80,18 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
__ret; \
})
+#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
+({ \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled && (sock_ops)->sk) { \
+ typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \
+ if (sk_fullsock(__sk)) \
+ __ret = __cgroup_bpf_run_filter_sock_ops(__sk, \
+ sock_ops, \
+ BPF_CGROUP_SOCK_OPS); \
+ } \
+ __ret; \
+})
#else
struct cgroup_bpf {};
@@ -85,6 +102,7 @@ static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#endif /* CONFIG_CGROUP_BPF */
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index deca4e7f2845..b69e7a5869ff 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -36,6 +36,7 @@ struct bpf_map_ops {
int fd);
void (*map_fd_put_ptr)(void *ptr);
u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
+ u32 (*map_fd_sys_lookup_elem)(void *ptr);
};
struct bpf_map {
@@ -155,9 +156,14 @@ struct bpf_prog;
struct bpf_insn_access_aux {
enum bpf_reg_type reg_type;
int ctx_field_size;
- int converted_op_size;
};
+static inline void
+bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
+{
+ aux->ctx_field_size = size;
+}
+
struct bpf_verifier_ops {
/* return eBPF function prototype for verification */
const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
@@ -172,7 +178,7 @@ struct bpf_verifier_ops {
u32 (*convert_ctx_access)(enum bpf_access_type type,
const struct bpf_insn *src,
struct bpf_insn *dst,
- struct bpf_prog *prog);
+ struct bpf_prog *prog, u32 *target_size);
int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);
};
@@ -288,9 +294,11 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags);
+int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
void bpf_fd_array_map_clear(struct bpf_map *map);
int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags);
+int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
* forced to use 'long' read/writes to try to atomically copy long counters.
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 03bf223f18be..3d137c33d664 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -10,6 +10,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock_prog_ops)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops_prog_ops)
#endif
#ifdef CONFIG_BPF_EVENTS
BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1fa26dc562ce..f1fc9baa3509 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -337,6 +337,22 @@ struct bpf_prog_aux;
bpf_size; \
})
+#define bpf_size_to_bytes(bpf_size) \
+({ \
+ int bytes = -EINVAL; \
+ \
+ if (bpf_size == BPF_B) \
+ bytes = sizeof(u8); \
+ else if (bpf_size == BPF_H) \
+ bytes = sizeof(u16); \
+ else if (bpf_size == BPF_W) \
+ bytes = sizeof(u32); \
+ else if (bpf_size == BPF_DW) \
+ bytes = sizeof(u64); \
+ \
+ bytes; \
+})
+
#define BPF_SIZEOF(type) \
({ \
const int __size = bytes_to_bpf_size(sizeof(type)); \
@@ -351,6 +367,13 @@ struct bpf_prog_aux;
__size; \
})
+#define BPF_LDST_BYTES(insn) \
+ ({ \
+ const int __size = bpf_size_to_bytes(BPF_SIZE(insn->code)); \
+ WARN_ON(__size < 0); \
+ __size; \
+ })
+
#define __BPF_MAP_0(m, v, ...) v
#define __BPF_MAP_1(m, v, t, a, ...) m(t, a)
#define __BPF_MAP_2(m, v, t, a, ...) m(t, a), __BPF_MAP_1(m, v, __VA_ARGS__)
@@ -401,6 +424,18 @@ struct bpf_prog_aux;
#define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__)
#define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__)
+#define bpf_ctx_range(TYPE, MEMBER) \
+ offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
+#define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2) \
+ offsetof(TYPE, MEMBER1) ... offsetofend(TYPE, MEMBER2) - 1
+
+#define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE) \
+ ({ \
+ BUILD_BUG_ON(FIELD_SIZEOF(TYPE, MEMBER) != (SIZE)); \
+ *(PTR_SIZE) = (SIZE); \
+ offsetof(TYPE, MEMBER); \
+ })
+
#ifdef CONFIG_COMPAT
/* A struct sock_filter is architecture independent. */
struct compat_sock_fprog {
@@ -564,6 +599,18 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog)
return prog->type == BPF_PROG_TYPE_UNSPEC;
}
+static inline bool
+bpf_ctx_narrow_access_ok(u32 off, u32 size, const u32 size_default)
+{
+ bool off_ok;
+#ifdef __LITTLE_ENDIAN
+ off_ok = (off & (size_default - 1)) == 0;
+#else
+ off_ok = (off & (size_default - 1)) + size == size_default;
+#endif
+ return off_ok && size <= size_default && (size & (size - 1)) == 0;
+}
+
#define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
@@ -898,4 +945,13 @@ static inline int bpf_tell_extensions(void)
return SKF_AD_MAX;
}
+struct bpf_sock_ops_kern {
+ struct sock *sk;
+ u32 op;
+ union {
+ u32 reply;
+ u32 replylong[4];
+ };
+};
+
#endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 12f6fba6d21a..97caf1821de8 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -18,6 +18,7 @@
#include <linux/skbuff.h>
#include <linux/timer.h>
#include <linux/in.h>
+#include <linux/refcount.h>
#include <uapi/linux/igmp.h>
static inline struct igmphdr *igmp_hdr(const struct sk_buff *skb)
@@ -84,7 +85,7 @@ struct ip_mc_list {
struct ip_mc_list __rcu *next_hash;
struct timer_list timer;
int users;
- atomic_t refcnt;
+ refcount_t refcnt;
spinlock_t lock;
char tm_running;
char reporter;
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index e7c04c4e4bcd..fb3f809e34e4 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -11,6 +11,7 @@
#include <linux/timer.h>
#include <linux/sysctl.h>
#include <linux/rtnetlink.h>
+#include <linux/refcount.h>
struct ipv4_devconf {
void *sysctl;
@@ -22,7 +23,7 @@ struct ipv4_devconf {
struct in_device {
struct net_device *dev;
- atomic_t refcnt;
+ refcount_t refcnt;
int dead;
struct in_ifaddr *ifa_list; /* IP ifaddr chain */
@@ -219,7 +220,7 @@ static inline struct in_device *in_dev_get(const struct net_device *dev)
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
if (in_dev)
- atomic_inc(&in_dev->refcnt);
+ refcount_inc(&in_dev->refcnt);
rcu_read_unlock();
return in_dev;
}
@@ -240,12 +241,12 @@ void in_dev_finish_destroy(struct in_device *idev);
static inline void in_dev_put(struct in_device *idev)
{
- if (atomic_dec_and_test(&idev->refcnt))
+ if (refcount_dec_and_test(&idev->refcnt))
in_dev_finish_destroy(idev);
}
-#define __in_dev_put(idev) atomic_dec(&(idev)->refcnt)
-#define in_dev_hold(idev) atomic_inc(&(idev)->refcnt)
+#define __in_dev_put(idev) refcount_dec(&(idev)->refcnt)
+#define in_dev_hold(idev) refcount_inc(&(idev)->refcnt)
#endif /* __KERNEL__ */
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 556e1c31b5d0..f31a0b5377e1 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1103,6 +1103,9 @@ enum mlx5_mcam_feature_groups {
#define MLX5_CAP_FPGA(mdev, cap) \
MLX5_GET(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap)
+#define MLX5_CAP64_FPGA(mdev, cap) \
+ MLX5_GET64(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap)
+
enum {
MLX5_CMD_STAT_OK = 0x0,
MLX5_CMD_STAT_INT_ERR = 0x1,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 750701b3b863..df6ce59a1f95 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -44,6 +44,7 @@
#include <linux/workqueue.h>
#include <linux/mempool.h>
#include <linux/interrupt.h>
+#include <linux/idr.h>
#include <linux/mlx5/device.h>
#include <linux/mlx5/doorbell.h>
@@ -110,6 +111,7 @@ enum {
MLX5_REG_DCBX_APP = 0x4021,
MLX5_REG_FPGA_CAP = 0x4022,
MLX5_REG_FPGA_CTRL = 0x4023,
+ MLX5_REG_FPGA_ACCESS_REG = 0x4024,
MLX5_REG_PCAP = 0x5001,
MLX5_REG_PMTU = 0x5003,
MLX5_REG_PTYS = 0x5004,
@@ -737,6 +739,14 @@ struct mlx5e_resources {
struct mlx5_sq_bfreg bfreg;
};
+#define MLX5_MAX_RESERVED_GIDS 8
+
+struct mlx5_rsvd_gids {
+ unsigned int start;
+ unsigned int count;
+ struct ida ida;
+};
+
struct mlx5_core_dev {
struct pci_dev *pdev;
/* sync pci state */
@@ -766,6 +776,10 @@ struct mlx5_core_dev {
atomic_t num_qps;
u32 issi;
struct mlx5e_resources mlx5e_res;
+ struct {
+ struct mlx5_rsvd_gids reserved_gids;
+ atomic_t roce_en;
+ } roce;
#ifdef CONFIG_MLX5_FPGA
struct mlx5_fpga_device *fpga;
#endif
@@ -932,6 +946,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev);
void mlx5_stop_health_poll(struct mlx5_core_dev *dev);
void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
+void mlx5_drain_health_recovery(struct mlx5_core_dev *dev);
int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
struct mlx5_buf *buf, int node);
int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf);
@@ -1045,6 +1060,11 @@ int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
bool map_wc, bool fast_path);
void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg);
+unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev);
+int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
+ u8 roce_version, u8 roce_l3_type, const u8 *gid,
+ const u8 *mac, bool vlan, u16 vlan_id);
+
static inline int fw_initializing(struct mlx5_core_dev *dev)
{
return ioread32be(&dev->iseg->initializing) >> 31;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index d6b99d5d0f24..87869c04849a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -232,6 +232,11 @@ enum {
MLX5_CMD_OP_DEALLOC_ENCAP_HEADER = 0x93e,
MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940,
MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
+ MLX5_CMD_OP_FPGA_CREATE_QP = 0x960,
+ MLX5_CMD_OP_FPGA_MODIFY_QP = 0x961,
+ MLX5_CMD_OP_FPGA_QUERY_QP = 0x962,
+ MLX5_CMD_OP_FPGA_DESTROY_QP = 0x963,
+ MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS = 0x964,
MLX5_CMD_OP_MAX
};
@@ -600,7 +605,10 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
u8 tunnel_statless_gre[0x1];
u8 tunnel_stateless_vxlan[0x1];
- u8 reserved_at_20[0x20];
+ u8 swp[0x1];
+ u8 swp_csum[0x1];
+ u8 swp_lso[0x1];
+ u8 reserved_at_23[0x1d];
u8 reserved_at_40[0x10];
u8 lro_min_mss_size[0x10];
@@ -2433,7 +2441,8 @@ struct mlx5_ifc_sqc_bits {
u8 min_wqe_inline_mode[0x3];
u8 state[0x4];
u8 reg_umr[0x1];
- u8 reserved_at_d[0x13];
+ u8 allow_swp[0x1];
+ u8 reserved_at_e[0x12];
u8 reserved_at_20[0x8];
u8 user_index[0x18];
@@ -8304,6 +8313,7 @@ union mlx5_ifc_ports_control_registers_document_bits {
struct mlx5_ifc_sltp_reg_bits sltp_reg;
struct mlx5_ifc_mtpps_reg_bits mtpps_reg;
struct mlx5_ifc_mtppse_reg_bits mtppse_reg;
+ struct mlx5_ifc_fpga_access_reg_bits fpga_access_reg;
struct mlx5_ifc_fpga_ctrl_bits fpga_ctrl_bits;
struct mlx5_ifc_fpga_cap_bits fpga_cap_bits;
struct mlx5_ifc_mcqi_reg_bits mcqi_reg;
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
index 0032d10ac6cf..255a88d08078 100644
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -32,6 +32,14 @@
#ifndef MLX5_IFC_FPGA_H
#define MLX5_IFC_FPGA_H
+enum {
+ MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX = 0x2c9,
+};
+
+enum {
+ MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC = 0x2,
+};
+
struct mlx5_ifc_fpga_shell_caps_bits {
u8 max_num_qps[0x10];
u8 reserved_at_10[0x8];
@@ -108,6 +116,15 @@ struct mlx5_ifc_fpga_cap_bits {
u8 reserved_at_500[0x300];
};
+enum {
+ MLX5_FPGA_CTRL_OPERATION_LOAD = 0x1,
+ MLX5_FPGA_CTRL_OPERATION_RESET = 0x2,
+ MLX5_FPGA_CTRL_OPERATION_FLASH_SELECT = 0x3,
+ MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON = 0x4,
+ MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF = 0x5,
+ MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX = 0x6,
+};
+
struct mlx5_ifc_fpga_ctrl_bits {
u8 reserved_at_0[0x8];
u8 operation[0x8];
@@ -141,4 +158,275 @@ struct mlx5_ifc_fpga_error_event_bits {
u8 reserved_at_60[0x80];
};
+#define MLX5_FPGA_ACCESS_REG_SIZE_MAX 64
+
+struct mlx5_ifc_fpga_access_reg_bits {
+ u8 reserved_at_0[0x20];
+
+ u8 reserved_at_20[0x10];
+ u8 size[0x10];
+
+ u8 address[0x40];
+
+ u8 data[0][0x8];
+};
+
+enum mlx5_ifc_fpga_qp_state {
+ MLX5_FPGA_QPC_STATE_INIT = 0x0,
+ MLX5_FPGA_QPC_STATE_ACTIVE = 0x1,
+ MLX5_FPGA_QPC_STATE_ERROR = 0x2,
+};
+
+enum mlx5_ifc_fpga_qp_type {
+ MLX5_FPGA_QPC_QP_TYPE_SHELL_QP = 0x0,
+ MLX5_FPGA_QPC_QP_TYPE_SANDBOX_QP = 0x1,
+};
+
+enum mlx5_ifc_fpga_qp_service_type {
+ MLX5_FPGA_QPC_ST_RC = 0x0,
+};
+
+struct mlx5_ifc_fpga_qpc_bits {
+ u8 state[0x4];
+ u8 reserved_at_4[0x1b];
+ u8 qp_type[0x1];
+
+ u8 reserved_at_20[0x4];
+ u8 st[0x4];
+ u8 reserved_at_28[0x10];
+ u8 traffic_class[0x8];
+
+ u8 ether_type[0x10];
+ u8 prio[0x3];
+ u8 dei[0x1];
+ u8 vid[0xc];
+
+ u8 reserved_at_60[0x20];
+
+ u8 reserved_at_80[0x8];
+ u8 next_rcv_psn[0x18];
+
+ u8 reserved_at_a0[0x8];
+ u8 next_send_psn[0x18];
+
+ u8 reserved_at_c0[0x10];
+ u8 pkey[0x10];
+
+ u8 reserved_at_e0[0x8];
+ u8 remote_qpn[0x18];
+
+ u8 reserved_at_100[0x15];
+ u8 rnr_retry[0x3];
+ u8 reserved_at_118[0x5];
+ u8 retry_count[0x3];
+
+ u8 reserved_at_120[0x20];
+
+ u8 reserved_at_140[0x10];
+ u8 remote_mac_47_32[0x10];
+
+ u8 remote_mac_31_0[0x20];
+
+ u8 remote_ip[16][0x8];
+
+ u8 reserved_at_200[0x40];
+
+ u8 reserved_at_240[0x10];
+ u8 fpga_mac_47_32[0x10];
+
+ u8 fpga_mac_31_0[0x20];
+
+ u8 fpga_ip[16][0x8];
+};
+
+struct mlx5_ifc_fpga_create_qp_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+
+ struct mlx5_ifc_fpga_qpc_bits fpga_qpc;
+};
+
+struct mlx5_ifc_fpga_create_qp_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x8];
+ u8 fpga_qpn[0x18];
+
+ u8 reserved_at_60[0x20];
+
+ struct mlx5_ifc_fpga_qpc_bits fpga_qpc;
+};
+
+struct mlx5_ifc_fpga_modify_qp_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x8];
+ u8 fpga_qpn[0x18];
+
+ u8 field_select[0x20];
+
+ struct mlx5_ifc_fpga_qpc_bits fpga_qpc;
+};
+
+struct mlx5_ifc_fpga_modify_qp_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_fpga_query_qp_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x8];
+ u8 fpga_qpn[0x18];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_fpga_query_qp_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+
+ struct mlx5_ifc_fpga_qpc_bits fpga_qpc;
+};
+
+struct mlx5_ifc_fpga_query_qp_counters_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 clear[0x1];
+ u8 reserved_at_41[0x7];
+ u8 fpga_qpn[0x18];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_fpga_query_qp_counters_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+
+ u8 rx_ack_packets[0x40];
+
+ u8 rx_send_packets[0x40];
+
+ u8 tx_ack_packets[0x40];
+
+ u8 tx_send_packets[0x40];
+
+ u8 rx_total_drop[0x40];
+
+ u8 reserved_at_1c0[0x1c0];
+};
+
+struct mlx5_ifc_fpga_destroy_qp_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x8];
+ u8 fpga_qpn[0x18];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_fpga_destroy_qp_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_ipsec_extended_cap_bits {
+ u8 encapsulation[0x20];
+
+ u8 reserved_0[0x15];
+ u8 ipv4_fragment[0x1];
+ u8 ipv6[0x1];
+ u8 esn[0x1];
+ u8 lso[0x1];
+ u8 transport_and_tunnel_mode[0x1];
+ u8 tunnel_mode[0x1];
+ u8 transport_mode[0x1];
+ u8 ah_esp[0x1];
+ u8 esp[0x1];
+ u8 ah[0x1];
+ u8 ipv4_options[0x1];
+
+ u8 auth_alg[0x20];
+
+ u8 enc_alg[0x20];
+
+ u8 sa_cap[0x20];
+
+ u8 reserved_1[0x10];
+ u8 number_of_ipsec_counters[0x10];
+
+ u8 ipsec_counters_addr_low[0x20];
+ u8 ipsec_counters_addr_high[0x20];
+};
+
+struct mlx5_ifc_ipsec_counters_bits {
+ u8 dec_in_packets[0x40];
+
+ u8 dec_out_packets[0x40];
+
+ u8 dec_bypass_packets[0x40];
+
+ u8 enc_in_packets[0x40];
+
+ u8 enc_out_packets[0x40];
+
+ u8 enc_bypass_packets[0x40];
+
+ u8 drop_dec_packets[0x40];
+
+ u8 failed_auth_dec_packets[0x40];
+
+ u8 drop_enc_packets[0x40];
+
+ u8 success_add_sa[0x40];
+
+ u8 fail_add_sa[0x40];
+
+ u8 success_delete_sa[0x40];
+
+ u8 fail_delete_sa[0x40];
+
+ u8 dropped_cmd[0x40];
+};
+
#endif /* MLX5_IFC_FPGA_H */
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 1f637f4d1265..6f41270d80c0 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -225,10 +225,20 @@ enum {
MLX5_ETH_WQE_INSERT_VLAN = 1 << 15,
};
+enum {
+ MLX5_ETH_WQE_SWP_INNER_L3_IPV6 = 1 << 0,
+ MLX5_ETH_WQE_SWP_INNER_L4_UDP = 1 << 1,
+ MLX5_ETH_WQE_SWP_OUTER_L3_IPV6 = 1 << 4,
+ MLX5_ETH_WQE_SWP_OUTER_L4_UDP = 1 << 5,
+};
+
struct mlx5_wqe_eth_seg {
- u8 rsvd0[4];
+ u8 swp_outer_l4_offset;
+ u8 swp_outer_l3_offset;
+ u8 swp_inner_l4_offset;
+ u8 swp_inner_l3_offset;
u8 cs_flags;
- u8 rsvd1;
+ u8 swp_flags;
__be16 mss;
__be32 rsvd2;
union {
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 996711d8a7b4..41d04e9d088a 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -1,7 +1,6 @@
#ifndef _NFNETLINK_H
#define _NFNETLINK_H
-
#include <linux/netlink.h>
#include <linux/capability.h>
#include <net/netlink.h>
@@ -10,13 +9,16 @@
struct nfnl_callback {
int (*call)(struct net *net, struct sock *nl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const cda[]);
+ const struct nlattr * const cda[],
+ struct netlink_ext_ack *extack);
int (*call_rcu)(struct net *net, struct sock *nl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const cda[]);
+ const struct nlattr * const cda[],
+ struct netlink_ext_ack *extack);
int (*call_batch)(struct net *net, struct sock *nl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const cda[]);
+ const struct nlattr * const cda[],
+ struct netlink_ext_ack *extack);
const struct nla_policy *policy; /* netlink attribute policy */
const u_int16_t attr_count; /* number of nlattr's */
};
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index e0cbf17af780..2c2a5514b0df 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -122,8 +122,6 @@ extern unsigned int ebt_do_table(struct sk_buff *skb,
#define BASE_CHAIN (par->hook_mask & (1 << NF_BR_NUMHOOKS))
/* Clear the bit in the hook mask that tells if the rule is on a base chain */
#define CLEAR_BASE_CHAIN_BIT (par->hook_mask &= ~(1 << NF_BR_NUMHOOKS))
-/* True if the target is not a standard target */
-#define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0)
static inline bool ebt_invalid_target(int target)
{
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 1828900c9411..27c0aaa22cb0 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -11,6 +11,7 @@
#include <linux/interrupt.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
+#include <linux/refcount.h>
union inet_addr {
__u32 all[4];
@@ -34,7 +35,7 @@ struct netpoll {
};
struct netpoll_info {
- atomic_t refcnt;
+ refcount_t refcnt;
struct semaphore dev_lock;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 1d8d70193782..2a9567bb8186 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -372,6 +372,7 @@ struct phy_c45_device_ids {
* has_fixups: Set to true if this phy has fixups/quirks.
* suspended: Set to true if this phy has been suspended successfully.
* sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal.
+ * loopback_enabled: Set true if this phy has been loopbacked successfully.
* state: state of the PHY for management purposes
* dev_flags: Device-specific flags used by the PHY driver.
* link_timeout: The number of timer firings to wait before the
@@ -409,6 +410,7 @@ struct phy_device {
bool has_fixups;
bool suspended;
bool sysfs_links;
+ bool loopback_enabled;
enum phy_state state;
@@ -648,6 +650,7 @@ struct phy_driver {
int (*set_tunable)(struct phy_device *dev,
struct ethtool_tunable *tuna,
const void *data);
+ int (*set_loopback)(struct phy_device *dev, bool enable);
};
#define to_phy_driver(d) container_of(to_mdio_common_driver(d), \
struct phy_driver, mdiodrv)
@@ -793,6 +796,7 @@ void phy_device_remove(struct phy_device *phydev);
int phy_init_hw(struct phy_device *phydev);
int phy_suspend(struct phy_device *phydev);
int phy_resume(struct phy_device *phydev);
+int phy_loopback(struct phy_device *phydev, bool enable);
struct phy_device *phy_attach(struct net_device *dev, const char *bus_id,
phy_interface_t interface);
struct phy_device *phy_find_first(struct mii_bus *bus);
@@ -847,6 +851,7 @@ int genphy_update_link(struct phy_device *phydev);
int genphy_read_status(struct phy_device *phydev);
int genphy_suspend(struct phy_device *phydev);
int genphy_resume(struct phy_device *phydev);
+int genphy_loopback(struct phy_device *phydev, bool enable);
int genphy_soft_reset(struct phy_device *phydev);
static inline int genphy_no_soft_reset(struct phy_device *phydev)
{
diff --git a/include/linux/platform_data/nfcmrvl.h b/include/linux/platform_data/nfcmrvl.h
index a6f9d633f5be..9e75ac8d19be 100644
--- a/include/linux/platform_data/nfcmrvl.h
+++ b/include/linux/platform_data/nfcmrvl.h
@@ -23,7 +23,7 @@ struct nfcmrvl_platform_data {
*/
/* GPIO that is wired to RESET_N signal */
- unsigned int reset_n_io;
+ int reset_n_io;
/* Tell if transport is muxed in HCI one */
unsigned int hci_muxed;
diff --git a/include/linux/platform_data/st-nci.h b/include/linux/platform_data/st-nci.h
deleted file mode 100644
index f6494b347c06..000000000000
--- a/include/linux/platform_data/st-nci.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Driver include for ST NCI NFC chip family.
- *
- * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _ST_NCI_H_
-#define _ST_NCI_H_
-
-#define ST_NCI_DRIVER_NAME "st_nci"
-
-struct st_nci_nfc_platform_data {
- unsigned int gpio_reset;
- unsigned int irq_polarity;
- bool is_ese_present;
- bool is_uicc_present;
-};
-
-#endif /* _ST_NCI_H_ */
diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index a567cbf8c5b4..39e2a2ac2471 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -38,6 +38,8 @@
#include <linux/slab.h>
/* dma_addr_t manip */
+#define PTR_LO(x) ((u32)(((uintptr_t)(x)) & 0xffffffff))
+#define PTR_HI(x) ((u32)((((uintptr_t)(x)) >> 16) >> 16))
#define DMA_LO_LE(x) cpu_to_le32(lower_32_bits(x))
#define DMA_HI_LE(x) cpu_to_le32(upper_32_bits(x))
#define DMA_REGPAIR_LE(x, val) do { \
@@ -778,7 +780,7 @@ enum protocol_type {
PROTOCOLID_ROCE,
PROTOCOLID_CORE,
PROTOCOLID_ETH,
- PROTOCOLID_RESERVED4,
+ PROTOCOLID_IWARP,
PROTOCOLID_RESERVED5,
PROTOCOLID_PREROCE,
PROTOCOLID_COMMON,
diff --git a/include/linux/qed/iwarp_common.h b/include/linux/qed/iwarp_common.h
new file mode 100644
index 000000000000..b8b3e1cfae90
--- /dev/null
+++ b/include/linux/qed/iwarp_common.h
@@ -0,0 +1,53 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015-2017 QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and /or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __IWARP_COMMON__
+#define __IWARP_COMMON__
+#include <linux/qed/rdma_common.h>
+/************************/
+/* IWARP FW CONSTANTS */
+/************************/
+
+#define IWARP_ACTIVE_MODE 0
+#define IWARP_PASSIVE_MODE 1
+
+#define IWARP_SHARED_QUEUE_PAGE_SIZE (0x8000)
+#define IWARP_SHARED_QUEUE_PAGE_RQ_PBL_OFFSET (0x4000)
+#define IWARP_SHARED_QUEUE_PAGE_RQ_PBL_MAX_SIZE (0x1000)
+#define IWARP_SHARED_QUEUE_PAGE_SQ_PBL_OFFSET (0x5000)
+#define IWARP_SHARED_QUEUE_PAGE_SQ_PBL_MAX_SIZE (0x3000)
+
+#define IWARP_REQ_MAX_INLINE_DATA_SIZE (128)
+#define IWARP_REQ_MAX_SINGLE_SQ_WQE_SIZE (176)
+
+#define IWARP_MAX_QPS (64 * 1024)
+
+#endif /* __IWARP_COMMON__ */
diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h
index 5958b45eb699..dd7a3b86bb9e 100644
--- a/include/linux/qed/qed_ll2_if.h
+++ b/include/linux/qed/qed_ll2_if.h
@@ -47,9 +47,10 @@ enum qed_ll2_conn_type {
QED_LL2_TYPE_FCOE,
QED_LL2_TYPE_ISCSI,
QED_LL2_TYPE_TEST,
- QED_LL2_TYPE_ISCSI_OOO,
+ QED_LL2_TYPE_OOO,
QED_LL2_TYPE_RESERVED2,
QED_LL2_TYPE_ROCE,
+ QED_LL2_TYPE_IWARP,
QED_LL2_TYPE_RESERVED3,
MAX_QED_LL2_RX_CONN_TYPE
};
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index ff9be01b5f53..4dd72ba210f5 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -470,6 +470,101 @@ struct qed_rdma_counters_out_params {
#define QED_ROCE_TX_HEAD_FAILURE (1)
#define QED_ROCE_TX_FRAG_FAILURE (2)
+enum qed_iwarp_event_type {
+ QED_IWARP_EVENT_MPA_REQUEST, /* Passive side request received */
+ QED_IWARP_EVENT_PASSIVE_COMPLETE, /* ack on mpa response */
+ QED_IWARP_EVENT_ACTIVE_COMPLETE, /* Active side reply received */
+ QED_IWARP_EVENT_DISCONNECT,
+ QED_IWARP_EVENT_CLOSE,
+ QED_IWARP_EVENT_IRQ_FULL,
+ QED_IWARP_EVENT_RQ_EMPTY,
+ QED_IWARP_EVENT_LLP_TIMEOUT,
+ QED_IWARP_EVENT_REMOTE_PROTECTION_ERROR,
+ QED_IWARP_EVENT_CQ_OVERFLOW,
+ QED_IWARP_EVENT_QP_CATASTROPHIC,
+ QED_IWARP_EVENT_ACTIVE_MPA_REPLY,
+ QED_IWARP_EVENT_LOCAL_ACCESS_ERROR,
+ QED_IWARP_EVENT_REMOTE_OPERATION_ERROR,
+ QED_IWARP_EVENT_TERMINATE_RECEIVED
+};
+
+enum qed_tcp_ip_version {
+ QED_TCP_IPV4,
+ QED_TCP_IPV6,
+};
+
+struct qed_iwarp_cm_info {
+ enum qed_tcp_ip_version ip_version;
+ u32 remote_ip[4];
+ u32 local_ip[4];
+ u16 remote_port;
+ u16 local_port;
+ u16 vlan;
+ u8 ord;
+ u8 ird;
+ u16 private_data_len;
+ const void *private_data;
+};
+
+struct qed_iwarp_cm_event_params {
+ enum qed_iwarp_event_type event;
+ const struct qed_iwarp_cm_info *cm_info;
+ void *ep_context; /* To be passed to accept call */
+ int status;
+};
+
+typedef int (*iwarp_event_handler) (void *context,
+ struct qed_iwarp_cm_event_params *event);
+
+struct qed_iwarp_connect_in {
+ iwarp_event_handler event_cb;
+ void *cb_context;
+ struct qed_rdma_qp *qp;
+ struct qed_iwarp_cm_info cm_info;
+ u16 mss;
+ u8 remote_mac_addr[ETH_ALEN];
+ u8 local_mac_addr[ETH_ALEN];
+};
+
+struct qed_iwarp_connect_out {
+ void *ep_context;
+};
+
+struct qed_iwarp_listen_in {
+ iwarp_event_handler event_cb;
+ void *cb_context; /* passed to event_cb */
+ u32 max_backlog;
+ enum qed_tcp_ip_version ip_version;
+ u32 ip_addr[4];
+ u16 port;
+ u16 vlan;
+};
+
+struct qed_iwarp_listen_out {
+ void *handle;
+};
+
+struct qed_iwarp_accept_in {
+ void *ep_context;
+ void *cb_context;
+ struct qed_rdma_qp *qp;
+ const void *private_data;
+ u16 private_data_len;
+ u8 ord;
+ u8 ird;
+};
+
+struct qed_iwarp_reject_in {
+ void *ep_context;
+ void *cb_context;
+ const void *private_data;
+ u16 private_data_len;
+};
+
+struct qed_iwarp_send_rtr_in {
+ void *ep_context;
+};
+
struct qed_roce_ll2_header {
void *vaddr;
dma_addr_t baddr;
@@ -491,6 +586,7 @@ struct qed_roce_ll2_packet {
enum qed_rdma_type {
QED_RDMA_TYPE_ROCE,
+ QED_RDMA_TYPE_IWARP
};
struct qed_dev_rdma_info {
@@ -575,6 +671,24 @@ struct qed_rdma_ops {
int (*ll2_set_mac_filter)(struct qed_dev *cdev,
u8 *old_mac_address, u8 *new_mac_address);
+ int (*iwarp_connect)(void *rdma_cxt,
+ struct qed_iwarp_connect_in *iparams,
+ struct qed_iwarp_connect_out *oparams);
+
+ int (*iwarp_create_listen)(void *rdma_cxt,
+ struct qed_iwarp_listen_in *iparams,
+ struct qed_iwarp_listen_out *oparams);
+
+ int (*iwarp_accept)(void *rdma_cxt,
+ struct qed_iwarp_accept_in *iparams);
+
+ int (*iwarp_reject)(void *rdma_cxt,
+ struct qed_iwarp_reject_in *iparams);
+
+ int (*iwarp_destroy_listen)(void *rdma_cxt, void *handle);
+
+ int (*iwarp_send_rtr)(void *rdma_cxt,
+ struct qed_iwarp_send_rtr_in *iparams);
};
const struct qed_rdma_ops *qed_get_rdma_ops(void);
diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 7a4804c4a593..99e866487e2f 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -57,12 +57,12 @@
#include <uapi/linux/sctp.h>
/* Section 3.1. SCTP Common Header Format */
-typedef struct sctphdr {
+struct sctphdr {
__be16 source;
__be16 dest;
__be32 vtag;
__le32 checksum;
-} sctp_sctphdr_t;
+};
static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb)
{
@@ -70,11 +70,11 @@ static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb)
}
/* Section 3.2. Chunk Field Descriptions. */
-typedef struct sctp_chunkhdr {
+struct sctp_chunkhdr {
__u8 type;
__u8 flags;
__be16 length;
-} sctp_chunkhdr_t;
+};
/* Section 3.2. Chunk Type Values.
@@ -82,7 +82,7 @@ typedef struct sctp_chunkhdr {
* Value field. It takes a value from 0 to 254. The value of 255 is
* reserved for future use as an extension field.
*/
-typedef enum {
+enum sctp_cid {
SCTP_CID_DATA = 0,
SCTP_CID_INIT = 1,
SCTP_CID_INIT_ACK = 2,
@@ -109,7 +109,7 @@ typedef enum {
SCTP_CID_ASCONF = 0xC1,
SCTP_CID_ASCONF_ACK = 0x80,
SCTP_CID_RECONF = 0x82,
-} sctp_cid_t; /* enum */
+}; /* enum */
/* Section 3.2
@@ -117,12 +117,12 @@ typedef enum {
* the action that must be taken if the processing endpoint does not
* recognize the Chunk Type.
*/
-typedef enum {
+enum {
SCTP_CID_ACTION_DISCARD = 0x00,
SCTP_CID_ACTION_DISCARD_ERR = 0x40,
SCTP_CID_ACTION_SKIP = 0x80,
SCTP_CID_ACTION_SKIP_ERR = 0xc0,
-} sctp_cid_action_t;
+};
enum { SCTP_CID_ACTION_MASK = 0xc0, };
@@ -162,12 +162,12 @@ enum { SCTP_CHUNK_FLAG_T = 0x01 };
* Section 3.2.1 Optional/Variable-length Parmaeter Format.
*/
-typedef struct sctp_paramhdr {
+struct sctp_paramhdr {
__be16 type;
__be16 length;
-} sctp_paramhdr_t;
+};
-typedef enum {
+enum sctp_param {
/* RFC 2960 Section 3.3.5 */
SCTP_PARAM_HEARTBEAT_INFO = cpu_to_be16(1),
@@ -207,7 +207,7 @@ typedef enum {
SCTP_PARAM_RESET_RESPONSE = cpu_to_be16(0x0010),
SCTP_PARAM_RESET_ADD_OUT_STREAMS = cpu_to_be16(0x0011),
SCTP_PARAM_RESET_ADD_IN_STREAMS = cpu_to_be16(0x0012),
-} sctp_param_t; /* enum */
+}; /* enum */
/* RFC 2960 Section 3.2.1
@@ -216,29 +216,29 @@ typedef enum {
* not recognize the Parameter Type.
*
*/
-typedef enum {
+enum {
SCTP_PARAM_ACTION_DISCARD = cpu_to_be16(0x0000),
SCTP_PARAM_ACTION_DISCARD_ERR = cpu_to_be16(0x4000),
SCTP_PARAM_ACTION_SKIP = cpu_to_be16(0x8000),
SCTP_PARAM_ACTION_SKIP_ERR = cpu_to_be16(0xc000),
-} sctp_param_action_t;
+};
enum { SCTP_PARAM_ACTION_MASK = cpu_to_be16(0xc000), };
/* RFC 2960 Section 3.3.1 Payload Data (DATA) (0) */
-typedef struct sctp_datahdr {
+struct sctp_datahdr {
__be32 tsn;
__be16 stream;
__be16 ssn;
__be32 ppid;
__u8 payload[0];
-} sctp_datahdr_t;
+};
-typedef struct sctp_data_chunk {
- sctp_chunkhdr_t chunk_hdr;
- sctp_datahdr_t data_hdr;
-} sctp_data_chunk_t;
+struct sctp_data_chunk {
+ struct sctp_chunkhdr chunk_hdr;
+ struct sctp_datahdr data_hdr;
+};
/* DATA Chuck Specific Flags */
enum {
@@ -257,54 +257,54 @@ enum { SCTP_DATA_FRAG_MASK = 0x03, };
* This chunk is used to initiate a SCTP association between two
* endpoints.
*/
-typedef struct sctp_inithdr {
+struct sctp_inithdr {
__be32 init_tag;
__be32 a_rwnd;
__be16 num_outbound_streams;
__be16 num_inbound_streams;
__be32 initial_tsn;
__u8 params[0];
-} sctp_inithdr_t;
+};
-typedef struct sctp_init_chunk {
- sctp_chunkhdr_t chunk_hdr;
- sctp_inithdr_t init_hdr;
-} sctp_init_chunk_t;
+struct sctp_init_chunk {
+ struct sctp_chunkhdr chunk_hdr;
+ struct sctp_inithdr init_hdr;
+};
/* Section 3.3.2.1. IPv4 Address Parameter (5) */
typedef struct sctp_ipv4addr_param {
- sctp_paramhdr_t param_hdr;
+ struct sctp_paramhdr param_hdr;
struct in_addr addr;
} sctp_ipv4addr_param_t;
/* Section 3.3.2.1. IPv6 Address Parameter (6) */
typedef struct sctp_ipv6addr_param {
- sctp_paramhdr_t param_hdr;
+ struct sctp_paramhdr param_hdr;
struct in6_addr addr;
} sctp_ipv6addr_param_t;
/* Section 3.3.2.1 Cookie Preservative (9) */
typedef struct sctp_cookie_preserve_param {
- sctp_paramhdr_t param_hdr;
+ struct sctp_paramhdr param_hdr;
__be32 lifespan_increment;
} sctp_cookie_preserve_param_t;
/* Section 3.3.2.1 Host Name Address (11) */
typedef struct sctp_hostname_param {
- sctp_paramhdr_t param_hdr;
+ struct sctp_paramhdr param_hdr;
uint8_t hostname[0];
} sctp_hostname_param_t;
/* Section 3.3.2.1 Supported Address Types (12) */
typedef struct sctp_supported_addrs_param {
- sctp_paramhdr_t param_hdr;
+ struct sctp_paramhdr param_hdr;
__be16 types[0];
} sctp_supported_addrs_param_t;
/* Appendix A. ECN Capable (32768) */
typedef struct sctp_ecn_capable_param {
- sctp_paramhdr_t param_hdr;
+ struct sctp_paramhdr param_hdr;
} sctp_ecn_capable_param_t;
/* ADDIP Section 3.2.6 Adaptation Layer Indication */
@@ -321,19 +321,19 @@ typedef struct sctp_supported_ext_param {
/* AUTH Section 3.1 Random */
typedef struct sctp_random_param {
- sctp_paramhdr_t param_hdr;
+ struct sctp_paramhdr param_hdr;
__u8 random_val[0];
} sctp_random_param_t;
/* AUTH Section 3.2 Chunk List */
typedef struct sctp_chunks_param {
- sctp_paramhdr_t param_hdr;
+ struct sctp_paramhdr param_hdr;
__u8 chunks[0];
} sctp_chunks_param_t;
/* AUTH Section 3.3 HMAC Algorithm */
typedef struct sctp_hmac_algo_param {
- sctp_paramhdr_t param_hdr;
+ struct sctp_paramhdr param_hdr;
__be16 hmac_ids[0];
} sctp_hmac_algo_param_t;
@@ -341,18 +341,18 @@ typedef struct sctp_hmac_algo_param {
* The INIT ACK chunk is used to acknowledge the initiation of an SCTP
* association.
*/
-typedef sctp_init_chunk_t sctp_initack_chunk_t;
+typedef struct sctp_init_chunk sctp_initack_chunk_t;
/* Section 3.3.3.1 State Cookie (7) */
typedef struct sctp_cookie_param {
- sctp_paramhdr_t p;
+ struct sctp_paramhdr p;
__u8 body[0];
} sctp_cookie_param_t;
/* Section 3.3.3.1 Unrecognized Parameters (8) */
typedef struct sctp_unrecognized_param {
- sctp_paramhdr_t param_hdr;
- sctp_paramhdr_t unrecognized;
+ struct sctp_paramhdr param_hdr;
+ struct sctp_paramhdr unrecognized;
} sctp_unrecognized_param_t;
@@ -386,7 +386,7 @@ typedef struct sctp_sackhdr {
} sctp_sackhdr_t;
typedef struct sctp_sack_chunk {
- sctp_chunkhdr_t chunk_hdr;
+ struct sctp_chunkhdr chunk_hdr;
sctp_sackhdr_t sack_hdr;
} sctp_sack_chunk_t;
@@ -399,11 +399,11 @@ typedef struct sctp_sack_chunk {
*/
typedef struct sctp_heartbeathdr {
- sctp_paramhdr_t info;
+ struct sctp_paramhdr info;
} sctp_heartbeathdr_t;
typedef struct sctp_heartbeat_chunk {
- sctp_chunkhdr_t chunk_hdr;
+ struct sctp_chunkhdr chunk_hdr;
sctp_heartbeathdr_t hb_hdr;
} sctp_heartbeat_chunk_t;
@@ -413,7 +413,7 @@ typedef struct sctp_heartbeat_chunk {
* chunk descriptor.
*/
typedef struct sctp_abort_chunk {
- sctp_chunkhdr_t uh;
+ struct sctp_chunkhdr uh;
} sctp_abort_chunk_t;
@@ -425,8 +425,8 @@ typedef struct sctp_shutdownhdr {
} sctp_shutdownhdr_t;
struct sctp_shutdown_chunk_t {
- sctp_chunkhdr_t chunk_hdr;
- sctp_shutdownhdr_t shutdown_hdr;
+ struct sctp_chunkhdr chunk_hdr;
+ sctp_shutdownhdr_t shutdown_hdr;
};
/* RFC 2960. Section 3.3.10 Operation Error (ERROR) (9) */
@@ -438,8 +438,8 @@ typedef struct sctp_errhdr {
} sctp_errhdr_t;
typedef struct sctp_operr_chunk {
- sctp_chunkhdr_t chunk_hdr;
- sctp_errhdr_t err_hdr;
+ struct sctp_chunkhdr chunk_hdr;
+ sctp_errhdr_t err_hdr;
} sctp_operr_chunk_t;
/* RFC 2960 3.3.10 - Operation Error
@@ -528,7 +528,7 @@ typedef struct sctp_ecnehdr {
} sctp_ecnehdr_t;
typedef struct sctp_ecne_chunk {
- sctp_chunkhdr_t chunk_hdr;
+ struct sctp_chunkhdr chunk_hdr;
sctp_ecnehdr_t ence_hdr;
} sctp_ecne_chunk_t;
@@ -540,7 +540,7 @@ typedef struct sctp_cwrhdr {
} sctp_cwrhdr_t;
typedef struct sctp_cwr_chunk {
- sctp_chunkhdr_t chunk_hdr;
+ struct sctp_chunkhdr chunk_hdr;
sctp_cwrhdr_t cwr_hdr;
} sctp_cwr_chunk_t;
@@ -639,7 +639,7 @@ struct sctp_fwdtsn_chunk {
* report status of ASCONF processing.
*/
typedef struct sctp_addip_param {
- sctp_paramhdr_t param_hdr;
+ struct sctp_paramhdr param_hdr;
__be32 crr_id;
} sctp_addip_param_t;
@@ -649,7 +649,7 @@ typedef struct sctp_addiphdr {
} sctp_addiphdr_t;
typedef struct sctp_addip_chunk {
- sctp_chunkhdr_t chunk_hdr;
+ struct sctp_chunkhdr chunk_hdr;
sctp_addiphdr_t addip_hdr;
} sctp_addip_chunk_t;
@@ -709,7 +709,7 @@ typedef struct sctp_authhdr {
} sctp_authhdr_t;
typedef struct sctp_auth_chunk {
- sctp_chunkhdr_t chunk_hdr;
+ struct sctp_chunkhdr chunk_hdr;
sctp_authhdr_t auth_hdr;
} sctp_auth_chunk_t;
@@ -719,12 +719,12 @@ struct sctp_infox {
};
struct sctp_reconf_chunk {
- sctp_chunkhdr_t chunk_hdr;
+ struct sctp_chunkhdr chunk_hdr;
__u8 params[0];
};
struct sctp_strreset_outreq {
- sctp_paramhdr_t param_hdr;
+ struct sctp_paramhdr param_hdr;
__u32 request_seq;
__u32 response_seq;
__u32 send_reset_at_tsn;
@@ -732,18 +732,18 @@ struct sctp_strreset_outreq {
};
struct sctp_strreset_inreq {
- sctp_paramhdr_t param_hdr;
+ struct sctp_paramhdr param_hdr;
__u32 request_seq;
__u16 list_of_streams[0];
};
struct sctp_strreset_tsnreq {
- sctp_paramhdr_t param_hdr;
+ struct sctp_paramhdr param_hdr;
__u32 request_seq;
};
struct sctp_strreset_addstrm {
- sctp_paramhdr_t param_hdr;
+ struct sctp_paramhdr param_hdr;
__u32 request_seq;
__u16 number_of_streams;
__u16 reserved;
@@ -760,13 +760,13 @@ enum {
};
struct sctp_strreset_resp {
- sctp_paramhdr_t param_hdr;
+ struct sctp_paramhdr param_hdr;
__u32 response_seq;
__u32 result;
};
struct sctp_strreset_resptsn {
- sctp_paramhdr_t param_hdr;
+ struct sctp_paramhdr param_hdr;
__u32 response_seq;
__u32 result;
__u32 senders_next_tsn;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a17e235639ae..3d3ceaac13b1 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -252,7 +252,7 @@ struct nf_conntrack {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct nf_bridge_info {
- atomic_t use;
+ refcount_t use;
enum {
BRNF_PROTO_UNCHANGED,
BRNF_PROTO_8021Q,
@@ -761,7 +761,7 @@ struct sk_buff {
unsigned char *head,
*data;
unsigned int truesize;
- atomic_t users;
+ refcount_t users;
};
#ifdef __KERNEL__
@@ -872,9 +872,9 @@ static inline bool skb_unref(struct sk_buff *skb)
{
if (unlikely(!skb))
return false;
- if (likely(atomic_read(&skb->users) == 1))
+ if (likely(refcount_read(&skb->users) == 1))
smp_rmb();
- else if (likely(!atomic_dec_and_test(&skb->users)))
+ else if (likely(!refcount_dec_and_test(&skb->users)))
return false;
return true;
@@ -915,7 +915,7 @@ struct sk_buff_fclones {
struct sk_buff skb2;
- atomic_t fclone_ref;
+ refcount_t fclone_ref;
};
/**
@@ -935,7 +935,7 @@ static inline bool skb_fclone_busy(const struct sock *sk,
fclones = container_of(skb, struct sk_buff_fclones, skb1);
return skb->fclone == SKB_FCLONE_ORIG &&
- atomic_read(&fclones->fclone_ref) > 1 &&
+ refcount_read(&fclones->fclone_ref) > 1 &&
fclones->skb2.sk == sk;
}
@@ -1283,7 +1283,7 @@ static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list,
*/
static inline struct sk_buff *skb_get(struct sk_buff *skb)
{
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
return skb;
}
@@ -1384,7 +1384,7 @@ static inline void __skb_header_release(struct sk_buff *skb)
*/
static inline int skb_shared(const struct sk_buff *skb)
{
- return atomic_read(&skb->users) != 1;
+ return refcount_read(&skb->users) != 1;
}
/**
@@ -2206,6 +2206,11 @@ static inline int skb_mac_offset(const struct sk_buff *skb)
return skb_mac_header(skb) - skb->data;
}
+static inline u32 skb_mac_header_len(const struct sk_buff *skb)
+{
+ return skb->network_header - skb->mac_header;
+}
+
static inline int skb_mac_header_was_set(const struct sk_buff *skb)
{
return skb->mac_header != (typeof(skb->mac_header))~0U;
@@ -3589,13 +3594,13 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
{
- if (nf_bridge && atomic_dec_and_test(&nf_bridge->use))
+ if (nf_bridge && refcount_dec_and_test(&nf_bridge->use))
kfree(nf_bridge);
}
static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge)
{
if (nf_bridge)
- atomic_inc(&nf_bridge->use);
+ refcount_inc(&nf_bridge->use);
}
#endif /* CONFIG_BRIDGE_NETFILTER */
static inline void nf_reset(struct sk_buff *skb)
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 07ef550c6627..93315d6b21a8 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -84,6 +84,7 @@ struct kmem_cache {
int red_left_pad; /* Left redzone padding size */
#ifdef CONFIG_SYSFS
struct kobject kobj; /* For sysfs */
+ struct work_struct kobj_remove_work;
#endif
#ifdef CONFIG_MEMCG
struct memcg_cache_params memcg_params;
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 110f4532188c..f7043ccca81c 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -29,7 +29,6 @@
*/
struct tk_read_base {
struct clocksource *clock;
- u64 (*read)(struct clocksource *cs);
u64 mask;
u64 cycle_last;
u32 mult;
@@ -58,7 +57,7 @@ struct tk_read_base {
* interval.
* @xtime_remainder: Shifted nano seconds left over when rounding
* @cycle_interval
- * @raw_interval: Raw nano seconds accumulated per NTP interval.
+ * @raw_interval: Shifted raw nano seconds accumulated per NTP interval.
* @ntp_error: Difference between accumulated time and NTP time in ntp
* shifted nano seconds.
* @ntp_error_shift: Shift conversion between clock shifted nano seconds and
@@ -100,7 +99,7 @@ struct timekeeper {
u64 cycle_interval;
u64 xtime_interval;
s64 xtime_remainder;
- u32 raw_interval;
+ u64 raw_interval;
/* The ntp_tick_length() value currently being used.
* This cached copy ensures we consistently apply the tick
* length for an entire tick, as ntp_tick_length may change
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 00d232406f18..021f7a88f52c 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -117,6 +117,9 @@ struct cdc_ncm_ctx {
u32 tx_curr_frame_num;
u32 rx_max;
u32 tx_max;
+ u32 tx_curr_size;
+ u32 tx_low_mem_max_cnt;
+ u32 tx_low_mem_val;
u32 max_datagram_size;
u16 tx_max_datagrams;
u16 tx_remainder;
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index fd60eccb59a6..3a385e4767f0 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -4,6 +4,7 @@
#include <linux/socket.h>
#include <linux/un.h>
#include <linux/mutex.h>
+#include <linux/refcount.h>
#include <net/sock.h>
void unix_inflight(struct user_struct *user, struct file *fp);
@@ -21,7 +22,7 @@ extern spinlock_t unix_table_lock;
extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
struct unix_address {
- atomic_t refcnt;
+ refcount_t refcnt;
int len;
unsigned int hash;
struct sockaddr_un name[0];
diff --git a/include/net/arp.h b/include/net/arp.h
index 65619a2de6f4..17d90e4e8dc5 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -28,7 +28,7 @@ static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32
rcu_read_lock_bh();
n = __ipv4_neigh_lookup_noref(dev, key);
- if (n && !atomic_inc_not_zero(&n->refcnt))
+ if (n && !refcount_inc_not_zero(&n->refcnt))
n = NULL;
rcu_read_unlock_bh();
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 76c7300626d6..c487bfa2f479 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -5,6 +5,7 @@
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/fib_rules.h>
+#include <linux/refcount.h>
#include <net/flow.h>
#include <net/rtnetlink.h>
@@ -29,7 +30,7 @@ struct fib_rule {
struct fib_rule __rcu *ctarget;
struct net *fr_net;
- atomic_t refcnt;
+ refcount_t refcnt;
u32 pref;
int suppress_ifgroup;
int suppress_prefixlen;
@@ -103,12 +104,12 @@ struct fib_rules_ops {
static inline void fib_rule_get(struct fib_rule *rule)
{
- atomic_inc(&rule->refcnt);
+ refcount_inc(&rule->refcnt);
}
static inline void fib_rule_put(struct fib_rule *rule)
{
- if (atomic_dec_and_test(&rule->refcnt))
+ if (refcount_dec_and_test(&rule->refcnt))
kfree_rcu(rule, rcu);
}
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 975779d0e7b0..440c1e9d0623 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -50,7 +50,7 @@ struct inet_frag_queue {
spinlock_t lock;
struct timer_list timer;
struct hlist_node list;
- atomic_t refcnt;
+ refcount_t refcnt;
struct sk_buff *fragments;
struct sk_buff *fragments_tail;
ktime_t stamp;
@@ -129,7 +129,7 @@ void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f)
{
- if (atomic_dec_and_test(&q->refcnt))
+ if (refcount_dec_and_test(&q->refcnt))
inet_frag_destroy(q, f);
}
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 1178931288cb..5026b1f08bb8 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -32,7 +32,7 @@
#include <net/tcp_states.h>
#include <net/netns/hash.h>
-#include <linux/atomic.h>
+#include <linux/refcount.h>
#include <asm/byteorder.h>
/* This is for all connections with a full identity, no wildcards.
@@ -334,7 +334,7 @@ static inline struct sock *inet_lookup(struct net *net,
sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
dport, dif, &refcounted);
- if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt))
+ if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
return sk;
}
@@ -359,7 +359,6 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
refcounted);
}
-u32 sk_ehashfn(const struct sock *sk);
u32 inet6_ehashfn(const struct net *net,
const struct in6_addr *laddr, const u16 lport,
const struct in6_addr *faddr, const __be16 fport);
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 235c7811a86a..f2a215fc78e4 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -46,7 +46,7 @@ struct inet_peer {
struct rcu_head gc_rcu;
};
/*
- * Once inet_peer is queued for deletion (refcnt == -1), following field
+ * Once inet_peer is queued for deletion (refcnt == 0), following field
* is not available: rid
* We can share memory with rcu_head to help keep inet_peer small.
*/
@@ -60,7 +60,7 @@ struct inet_peer {
/* following fields might be frequently dirtied */
__u32 dtime; /* the time of last use of not referenced entries */
- atomic_t refcnt;
+ refcount_t refcnt;
};
struct inet_peer_base {
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 1036c902d2c9..31b1bb11ba3f 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -384,7 +384,7 @@ static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, cons
rcu_read_lock_bh();
n = __ipv6_neigh_lookup_noref(dev, pkey);
- if (n && !atomic_inc_not_zero(&n->refcnt))
+ if (n && !refcount_inc_not_zero(&n->refcnt))
n = NULL;
rcu_read_unlock_bh();
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 639b67564a7d..afc39e3a3f7c 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -17,6 +17,7 @@
*/
#include <linux/atomic.h>
+#include <linux/refcount.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rcupdate.h>
@@ -76,7 +77,7 @@ struct neigh_parms {
void *sysctl_table;
int dead;
- atomic_t refcnt;
+ refcount_t refcnt;
struct rcu_head rcu_head;
int reachable_time;
@@ -137,7 +138,7 @@ struct neighbour {
unsigned long confirmed;
unsigned long updated;
rwlock_t lock;
- atomic_t refcnt;
+ refcount_t refcnt;
struct sk_buff_head arp_queue;
unsigned int arp_queue_len_bytes;
struct timer_list timer;
@@ -395,12 +396,12 @@ void neigh_sysctl_unregister(struct neigh_parms *p);
static inline void __neigh_parms_put(struct neigh_parms *parms)
{
- atomic_dec(&parms->refcnt);
+ refcount_dec(&parms->refcnt);
}
static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms)
{
- atomic_inc(&parms->refcnt);
+ refcount_inc(&parms->refcnt);
return parms;
}
@@ -410,18 +411,18 @@ static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms)
static inline void neigh_release(struct neighbour *neigh)
{
- if (atomic_dec_and_test(&neigh->refcnt))
+ if (refcount_dec_and_test(&neigh->refcnt))
neigh_destroy(neigh);
}
static inline struct neighbour * neigh_clone(struct neighbour *neigh)
{
if (neigh)
- atomic_inc(&neigh->refcnt);
+ refcount_inc(&neigh->refcnt);
return neigh;
}
-#define neigh_hold(n) atomic_inc(&(n)->refcnt)
+#define neigh_hold(n) refcount_inc(&(n)->refcnt)
static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index fe80bb48ab1f..31a2b51bef2c 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -5,6 +5,7 @@
#define __NET_NET_NAMESPACE_H
#include <linux/atomic.h>
+#include <linux/refcount.h>
#include <linux/workqueue.h>
#include <linux/list.h>
#include <linux/sysctl.h>
@@ -46,7 +47,7 @@ struct netns_ipvs;
#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
struct net {
- atomic_t passive; /* To decided when the network
+ refcount_t passive; /* To decided when the network
* namespace should be freed.
*/
atomic_t count; /* To decided when the network
@@ -158,6 +159,7 @@ extern struct net init_net;
struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns,
struct net *old_net);
+void net_ns_barrier(void);
#else /* CONFIG_NET_NS */
#include <linux/sched.h>
#include <linux/nsproxy.h>
@@ -168,6 +170,8 @@ static inline struct net *copy_net_ns(unsigned long flags,
return ERR_PTR(-EINVAL);
return old_net;
}
+
+static inline void net_ns_barrier(void) {}
#endif /* CONFIG_NET_NS */
diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index 0b0c35c37125..925524ede6c8 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -8,7 +8,7 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC);
if (likely(skb->nf_bridge))
- atomic_set(&(skb->nf_bridge->use), 1);
+ refcount_set(&(skb->nf_bridge->use), 1);
return skb->nf_bridge;
}
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 8ece3612d0cd..48407569585d 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -225,9 +225,13 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
u32 seq);
/* Iterate over all conntracks: if iter returns true, it's deleted. */
-void nf_ct_iterate_cleanup(struct net *net,
- int (*iter)(struct nf_conn *i, void *data),
- void *data, u32 portid, int report);
+void nf_ct_iterate_cleanup_net(struct net *net,
+ int (*iter)(struct nf_conn *i, void *data),
+ void *data, u32 portid, int report);
+
+/* also set unconfirmed conntracks as dying. Only use in module exit path. */
+void nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data),
+ void *data);
struct nf_conntrack_zone;
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index e01559b4d781..6d14b36e3a49 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -71,7 +71,7 @@ struct nf_conntrack_l3proto {
struct module *me;
};
-extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX];
+extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO];
#ifdef CONFIG_SYSCTL
/* Protocol pernet registration. */
@@ -100,7 +100,7 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic;
static inline struct nf_conntrack_l3proto *
__nf_ct_l3proto_find(u_int16_t l3proto)
{
- if (unlikely(l3proto >= AF_MAX))
+ if (unlikely(l3proto >= NFPROTO_NUMPROTO))
return &nf_conntrack_l3proto_generic;
return rcu_dereference(nf_ct_l3protos[l3proto]);
}
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 8a8bab8d7b15..bd5be0d691d5 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -281,6 +281,23 @@ struct nft_set_estimate {
enum nft_set_class space;
};
+/**
+ * struct nft_set_type - nf_tables set type
+ *
+ * @select_ops: function to select nft_set_ops
+ * @ops: default ops, used when no select_ops functions is present
+ * @list: used internally
+ * @owner: module reference
+ */
+struct nft_set_type {
+ const struct nft_set_ops *(*select_ops)(const struct nft_ctx *,
+ const struct nft_set_desc *desc,
+ u32 flags);
+ const struct nft_set_ops *ops;
+ struct list_head list;
+ struct module *owner;
+};
+
struct nft_set_ext;
struct nft_expr;
@@ -297,8 +314,6 @@ struct nft_expr;
* @privsize: function to return size of set private data
* @init: initialize private data of new set instance
* @destroy: destroy private data of set instance
- * @list: nf_tables_set_ops list node
- * @owner: module reference
* @elemsize: element private size
* @features: features supported by the implementation
*/
@@ -336,7 +351,8 @@ struct nft_set_ops {
struct nft_set *set,
struct nft_set_iter *iter);
- unsigned int (*privsize)(const struct nlattr * const nla[]);
+ unsigned int (*privsize)(const struct nlattr * const nla[],
+ const struct nft_set_desc *desc);
bool (*estimate)(const struct nft_set_desc *desc,
u32 features,
struct nft_set_estimate *est);
@@ -345,14 +361,13 @@ struct nft_set_ops {
const struct nlattr * const nla[]);
void (*destroy)(const struct nft_set *set);
- struct list_head list;
- struct module *owner;
unsigned int elemsize;
u32 features;
+ const struct nft_set_type *type;
};
-int nft_register_set(struct nft_set_ops *ops);
-void nft_unregister_set(struct nft_set_ops *ops);
+int nft_register_set(struct nft_set_type *type);
+void nft_unregister_set(struct nft_set_type *type);
/**
* struct nft_set - nf_tables set instance
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index efe98068880f..72d6435fc16c 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -37,7 +37,7 @@
#include <linux/in6.h>
#include <net/netlink.h>
#include <net/request_sock.h>
-#include <linux/atomic.h>
+#include <linux/refcount.h>
struct cipso_v4_doi;
struct calipso_doi;
@@ -136,7 +136,7 @@ struct netlbl_audit {
*
*/
struct netlbl_lsm_cache {
- atomic_t refcount;
+ refcount_t refcount;
void (*free) (const void *data);
void *data;
};
@@ -295,7 +295,7 @@ static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(gfp_t flags)
cache = kzalloc(sizeof(*cache), flags);
if (cache)
- atomic_set(&cache->refcount, 1);
+ refcount_set(&cache->refcount, 1);
return cache;
}
@@ -309,7 +309,7 @@ static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(gfp_t flags)
*/
static inline void netlbl_secattr_cache_free(struct netlbl_lsm_cache *cache)
{
- if (!atomic_dec_and_test(&cache->refcount))
+ if (!refcount_dec_and_test(&cache->refcount))
return;
if (cache->free)
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 53ced67c4ae9..23e22054aa60 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -19,6 +19,7 @@
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/bug.h>
+#include <linux/refcount.h>
#include <net/sock.h>
@@ -89,7 +90,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
return NULL;
req->rsk_listener = NULL;
if (attach_listener) {
- if (unlikely(!atomic_inc_not_zero(&sk_listener->sk_refcnt))) {
+ if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) {
kmem_cache_free(ops->slab, req);
return NULL;
}
@@ -100,7 +101,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
sk_node_init(&req_to_sk(req)->sk_node);
sk_tx_queue_clear(req_to_sk(req));
req->saved_syn = NULL;
- atomic_set(&req->rsk_refcnt, 0);
+ refcount_set(&req->rsk_refcnt, 0);
return req;
}
@@ -108,7 +109,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
static inline void reqsk_free(struct request_sock *req)
{
/* temporary debugging */
- WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 0);
+ WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0);
req->rsk_ops->destructor(req);
if (req->rsk_listener)
@@ -119,7 +120,7 @@ static inline void reqsk_free(struct request_sock *req)
static inline void reqsk_put(struct request_sock *req)
{
- if (atomic_dec_and_test(&req->rsk_refcnt))
+ if (refcount_dec_and_test(&req->rsk_refcnt))
reqsk_free(req);
}
diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h
index 9b9fb122b31f..171244bd856f 100644
--- a/include/net/sctp/auth.h
+++ b/include/net/sctp/auth.h
@@ -97,8 +97,10 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc,
struct sctp_hmac_algo_param *hmacs);
int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc,
__be16 hmac_id);
-int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc);
-int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc);
+int sctp_auth_send_cid(enum sctp_cid chunk,
+ const struct sctp_association *asoc);
+int sctp_auth_recv_cid(enum sctp_cid chunk,
+ const struct sctp_association *asoc);
void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
struct sk_buff *skb,
struct sctp_auth_chunk *auth, gfp_t gfp);
diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index d4a20d00461c..d4679e7a5ed5 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -132,7 +132,7 @@ typedef union {
struct sctp_association *asoc;
struct sctp_transport *transport;
struct sctp_bind_addr *bp;
- sctp_init_chunk_t *init;
+ struct sctp_init_chunk *init;
struct sctp_ulpevent *ulpevent;
struct sctp_packet *packet;
sctp_sackhdr_t *sackh;
@@ -173,7 +173,7 @@ SCTP_ARG_CONSTRUCTOR(CHUNK, struct sctp_chunk *, chunk)
SCTP_ARG_CONSTRUCTOR(ASOC, struct sctp_association *, asoc)
SCTP_ARG_CONSTRUCTOR(TRANSPORT, struct sctp_transport *, transport)
SCTP_ARG_CONSTRUCTOR(BA, struct sctp_bind_addr *, bp)
-SCTP_ARG_CONSTRUCTOR(PEER_INIT, sctp_init_chunk_t *, init)
+SCTP_ARG_CONSTRUCTOR(PEER_INIT, struct sctp_init_chunk *, init)
SCTP_ARG_CONSTRUCTOR(ULPEVENT, struct sctp_ulpevent *, ulpevent)
SCTP_ARG_CONSTRUCTOR(PACKET, struct sctp_packet *, packet)
SCTP_ARG_CONSTRUCTOR(SACKH, sctp_sackhdr_t *, sackh)
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index b07a745ab69f..9b18044c551e 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -130,7 +130,7 @@ typedef enum {
*/
typedef union {
- sctp_cid_t chunk;
+ enum sctp_cid chunk;
sctp_event_timeout_t timeout;
sctp_event_other_t other;
sctp_event_primitive_t primitive;
@@ -141,7 +141,7 @@ static inline sctp_subtype_t \
SCTP_ST_## _name (_type _arg) \
{ sctp_subtype_t _retval; _retval._elt = _arg; return _retval; }
-SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, sctp_cid_t, chunk)
+SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, enum sctp_cid, chunk)
SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT, sctp_event_timeout_t, timeout)
SCTP_SUBTYPE_CONSTRUCTOR(OTHER, sctp_event_other_t, other)
SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, sctp_event_primitive_t, primitive)
@@ -152,7 +152,7 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, sctp_event_primitive_t, primitive)
/* Calculate the actual data size in a data chunk */
#define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end)\
- (unsigned long)(c->chunk_hdr)\
- - sizeof(sctp_data_chunk_t)))
+ - sizeof(struct sctp_data_chunk)))
/* Internal error codes */
typedef enum {
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 069582ee5d7f..a9519a06a23b 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -470,7 +470,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member)
#define _sctp_walk_params(pos, chunk, end, member)\
for (pos.v = chunk->member;\
pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
- ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\
+ ntohs(pos.p->length) >= sizeof(struct sctp_paramhdr);\
pos.v += SCTP_PAD4(ntohs(pos.p->length)))
#define sctp_walk_errors(err, chunk_hdr)\
@@ -478,7 +478,7 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length))
#define _sctp_walk_errors(err, chunk_hdr, end)\
for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
- sizeof(sctp_chunkhdr_t));\
+ sizeof(struct sctp_chunkhdr));\
(void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
ntohs(err->length) >= sizeof(sctp_errhdr_t); \
err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length))))
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 47113f2c4b0a..860f378333b5 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -325,19 +325,17 @@ void sctp_generate_heartbeat_event(unsigned long peer);
void sctp_generate_reconf_event(unsigned long peer);
void sctp_generate_proto_unreach_event(unsigned long peer);
-void sctp_ootb_pkt_free(struct sctp_packet *);
+void sctp_ootb_pkt_free(struct sctp_packet *packet);
-struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *,
- const struct sctp_association *,
- struct sctp_chunk *,
+struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ struct sctp_chunk *chunk,
gfp_t gfp, int *err,
struct sctp_chunk **err_chk_p);
-int sctp_addip_addr_config(struct sctp_association *, sctp_param_t,
- struct sockaddr_storage*, int);
/* 3rd level prototypes */
-__u32 sctp_generate_tag(const struct sctp_endpoint *);
-__u32 sctp_generate_tsn(const struct sctp_endpoint *);
+__u32 sctp_generate_tag(const struct sctp_endpoint *ep);
+__u32 sctp_generate_tsn(const struct sctp_endpoint *ep);
/* Extern declarations for major data structures. */
extern sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES];
@@ -349,7 +347,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk)
__u16 size;
size = ntohs(chunk->chunk_hdr->length);
- size -= sizeof(sctp_data_chunk_t);
+ size -= sizeof(struct sctp_data_chunk);
return size;
}
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index e26763bfabd6..07c11fefa8c4 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -310,9 +310,10 @@ struct sctp_cookie {
__u32 adaptation_ind;
- __u8 auth_random[sizeof(sctp_paramhdr_t) + SCTP_AUTH_RANDOM_LENGTH];
+ __u8 auth_random[sizeof(struct sctp_paramhdr) +
+ SCTP_AUTH_RANDOM_LENGTH];
__u8 auth_hmacs[SCTP_AUTH_NUM_HMACS * sizeof(__u16) + 2];
- __u8 auth_chunks[sizeof(sctp_paramhdr_t) + SCTP_AUTH_MAX_CHUNKS];
+ __u8 auth_chunks[sizeof(struct sctp_paramhdr) + SCTP_AUTH_MAX_CHUNKS];
/* This is a shim for my peer's INIT packet, followed by
* a copy of the raw address list of the association.
@@ -1297,11 +1298,11 @@ int sctp_has_association(struct net *net, const union sctp_addr *laddr,
int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
- sctp_cid_t, sctp_init_chunk_t *peer_init,
+ enum sctp_cid cid, struct sctp_init_chunk *peer_init,
struct sctp_chunk *chunk, struct sctp_chunk **err_chunk);
int sctp_process_init(struct sctp_association *, struct sctp_chunk *chunk,
const union sctp_addr *peer,
- sctp_init_chunk_t *init, gfp_t gfp);
+ struct sctp_init_chunk *init, gfp_t gfp);
__u32 sctp_generate_tag(const struct sctp_endpoint *);
__u32 sctp_generate_tsn(const struct sctp_endpoint *);
diff --git a/include/net/sock.h b/include/net/sock.h
index 00d09140e354..60200f4f4028 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -66,6 +66,7 @@
#include <linux/poll.h>
#include <linux/atomic.h>
+#include <linux/refcount.h>
#include <net/dst.h>
#include <net/checksum.h>
#include <net/tcp_states.h>
@@ -219,7 +220,7 @@ struct sock_common {
u32 skc_tw_rcv_nxt; /* struct tcp_timewait_sock */
};
- atomic_t skc_refcnt;
+ refcount_t skc_refcnt;
/* private: */
int skc_dontcopy_end[0];
union {
@@ -390,7 +391,7 @@ struct sock {
/* ===== cache line for TX ===== */
int sk_wmem_queued;
- atomic_t sk_wmem_alloc;
+ refcount_t sk_wmem_alloc;
unsigned long sk_tsq_flags;
struct sk_buff *sk_send_head;
struct sk_buff_head sk_write_queue;
@@ -611,7 +612,7 @@ static inline bool __sk_del_node_init(struct sock *sk)
static __always_inline void sock_hold(struct sock *sk)
{
- atomic_inc(&sk->sk_refcnt);
+ refcount_inc(&sk->sk_refcnt);
}
/* Ungrab socket in the context, which assumes that socket refcnt
@@ -619,7 +620,7 @@ static __always_inline void sock_hold(struct sock *sk)
*/
static __always_inline void __sock_put(struct sock *sk)
{
- atomic_dec(&sk->sk_refcnt);
+ refcount_dec(&sk->sk_refcnt);
}
static inline bool sk_del_node_init(struct sock *sk)
@@ -628,7 +629,7 @@ static inline bool sk_del_node_init(struct sock *sk)
if (rc) {
/* paranoid for a while -acme */
- WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
+ WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
__sock_put(sk);
}
return rc;
@@ -650,7 +651,7 @@ static inline bool sk_nulls_del_node_init_rcu(struct sock *sk)
if (rc) {
/* paranoid for a while -acme */
- WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
+ WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
__sock_put(sk);
}
return rc;
@@ -1144,9 +1145,9 @@ static inline void sk_refcnt_debug_dec(struct sock *sk)
static inline void sk_refcnt_debug_release(const struct sock *sk)
{
- if (atomic_read(&sk->sk_refcnt) != 1)
+ if (refcount_read(&sk->sk_refcnt) != 1)
printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n",
- sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt));
+ sk->sk_prot->name, sk, refcount_read(&sk->sk_refcnt));
}
#else /* SOCK_REFCNT_DEBUG */
#define sk_refcnt_debug_inc(sk) do { } while (0)
@@ -1636,7 +1637,7 @@ void sock_init_data(struct socket *sock, struct sock *sk);
/* Ungrab socket and destroy it, if it was the last reference. */
static inline void sock_put(struct sock *sk)
{
- if (atomic_dec_and_test(&sk->sk_refcnt))
+ if (refcount_dec_and_test(&sk->sk_refcnt))
sk_free(sk);
}
/* Generic version of sock_put(), dealing with all sockets
@@ -1911,7 +1912,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro
*/
static inline int sk_wmem_alloc_get(const struct sock *sk)
{
- return atomic_read(&sk->sk_wmem_alloc) - 1;
+ return refcount_read(&sk->sk_wmem_alloc) - 1;
}
/**
@@ -2055,7 +2056,7 @@ static inline unsigned long sock_wspace(struct sock *sk)
int amt = 0;
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
- amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
+ amt = sk->sk_sndbuf - refcount_read(&sk->sk_wmem_alloc);
if (amt < 0)
amt = 0;
}
@@ -2136,7 +2137,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
*/
static inline bool sock_writeable(const struct sock *sk)
{
- return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
+ return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
}
static inline gfp_t gfp_any(void)
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index c784a6ac6ef1..8ae9e3b6392e 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -217,6 +217,8 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
bool switchdev_port_same_parent_id(struct net_device *a,
struct net_device *b);
+
+#define SWITCHDEV_SET_OPS(netdev, ops) ((netdev)->switchdev_ops = (ops))
#else
static inline void switchdev_deferred_process(void)
@@ -322,6 +324,8 @@ static inline bool switchdev_port_same_parent_id(struct net_device *a,
return false;
}
+#define SWITCHDEV_SET_OPS(netdev, ops) do {} while (0)
+
#endif
#endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d0751b79d99c..70483296157f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -46,6 +46,10 @@
#include <linux/seq_file.h>
#include <linux/memcontrol.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/bpf-cgroup.h>
+
extern struct inet_hashinfo tcp_hashinfo;
extern struct percpu_counter tcp_orphan_count;
@@ -1000,7 +1004,9 @@ void tcp_get_default_congestion_control(char *name);
void tcp_get_available_congestion_control(char *buf, size_t len);
void tcp_get_allowed_congestion_control(char *buf, size_t len);
int tcp_set_allowed_congestion_control(char *allowed);
-int tcp_set_congestion_control(struct sock *sk, const char *name);
+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load);
+void tcp_reinit_congestion_control(struct sock *sk,
+ const struct tcp_congestion_ops *ca);
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
@@ -2021,4 +2027,62 @@ int tcp_set_ulp(struct sock *sk, const char *name);
void tcp_get_available_ulp(char *buf, size_t len);
void tcp_cleanup_ulp(struct sock *sk);
+/* Call BPF_SOCK_OPS program that returns an int. If the return value
+ * is < 0, then the BPF op failed (for example if the loaded BPF
+ * program does not support the chosen operation or there is no BPF
+ * program loaded).
+ */
+#ifdef CONFIG_BPF
+static inline int tcp_call_bpf(struct sock *sk, int op)
+{
+ struct bpf_sock_ops_kern sock_ops;
+ int ret;
+
+ if (sk_fullsock(sk))
+ sock_owned_by_me(sk);
+
+ memset(&sock_ops, 0, sizeof(sock_ops));
+ sock_ops.sk = sk;
+ sock_ops.op = op;
+
+ ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
+ if (ret == 0)
+ ret = sock_ops.reply;
+ else
+ ret = -1;
+ return ret;
+}
+#else
+static inline int tcp_call_bpf(struct sock *sk, int op)
+{
+ return -EPERM;
+}
+#endif
+
+static inline u32 tcp_timeout_init(struct sock *sk)
+{
+ int timeout;
+
+ timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT);
+
+ if (timeout <= 0)
+ timeout = TCP_TIMEOUT_INIT;
+ return timeout;
+}
+
+static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
+{
+ int rwnd;
+
+ rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT);
+
+ if (rwnd < 0)
+ rwnd = 0;
+ return rwnd;
+}
+
+static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
+{
+ return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
+}
#endif /* _TCP_H */
diff --git a/include/net/udp.h b/include/net/udp.h
index 1468dbd0f09a..972ce4baab6b 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -302,6 +302,67 @@ struct sock *__udp6_lib_lookup(struct net *net,
struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport);
+/* UDP uses skb->dev_scratch to cache as much information as possible and avoid
+ * possibly multiple cache miss on dequeue()
+ */
+#if BITS_PER_LONG == 64
+
+/* truesize, len and the bit needed to compute skb_csum_unnecessary will be on
+ * cold cache lines at recvmsg time.
+ * skb->len can be stored on 16 bits since the udp header has been already
+ * validated and pulled.
+ */
+struct udp_dev_scratch {
+ u32 truesize;
+ u16 len;
+ bool is_linear;
+ bool csum_unnecessary;
+};
+
+static inline unsigned int udp_skb_len(struct sk_buff *skb)
+{
+ return ((struct udp_dev_scratch *)&skb->dev_scratch)->len;
+}
+
+static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb)
+{
+ return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary;
+}
+
+static inline bool udp_skb_is_linear(struct sk_buff *skb)
+{
+ return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear;
+}
+
+#else
+static inline unsigned int udp_skb_len(struct sk_buff *skb)
+{
+ return skb->len;
+}
+
+static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb)
+{
+ return skb_csum_unnecessary(skb);
+}
+
+static inline bool udp_skb_is_linear(struct sk_buff *skb)
+{
+ return !skb_is_nonlinear(skb);
+}
+#endif
+
+static inline int copy_linear_skb(struct sk_buff *skb, int len, int off,
+ struct iov_iter *to)
+{
+ int n, copy = len - off;
+
+ n = copy_to_iter(skb->data + off, copy, to);
+ if (n == copy)
+ return 0;
+
+ return -EFAULT;
+}
+
/*
* SNMP statistics for UDP and UDP-Lite
*/
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index b816a0a6686e..326e8498b10e 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -221,9 +221,17 @@ struct vxlan_config {
bool no_share;
};
+struct vxlan_dev_node {
+ struct hlist_node hlist;
+ struct vxlan_dev *vxlan;
+};
+
/* Pseudo network device */
struct vxlan_dev {
- struct hlist_node hlist; /* vni hash table */
+ struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */
+#if IS_ENABLED(CONFIG_IPV6)
+ struct vxlan_dev_node hlist6; /* vni hash table for IPv6 socket */
+#endif
struct list_head next; /* vxlan's per namespace list */
struct vxlan_sock __rcu *vn4_sock; /* listening socket for IPv4 */
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 01f5bc144ee5..01fa357e9a32 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1854,8 +1854,9 @@ static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb)
}
#endif
-#ifdef CONFIG_XFRM_OFFLOAD
void __net_init xfrm_dev_init(void);
+
+#ifdef CONFIG_XFRM_OFFLOAD
int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features);
int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
struct xfrm_user_offload *xuo);
@@ -1881,10 +1882,6 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
}
}
#else
-static inline void __net_init xfrm_dev_init(void)
-{
-}
-
static inline int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
{
return 0;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f94b48b168dc..e99e3e6f8b37 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -120,12 +120,14 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LWT_IN,
BPF_PROG_TYPE_LWT_OUT,
BPF_PROG_TYPE_LWT_XMIT,
+ BPF_PROG_TYPE_SOCK_OPS,
};
enum bpf_attach_type {
BPF_CGROUP_INET_INGRESS,
BPF_CGROUP_INET_EGRESS,
BPF_CGROUP_INET_SOCK_CREATE,
+ BPF_CGROUP_SOCK_OPS,
__MAX_BPF_ATTACH_TYPE
};
@@ -518,6 +520,25 @@ union bpf_attr {
* Set full skb->hash.
* @skb: pointer to skb
* @hash: hash to set
+ *
+ * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen)
+ * Calls setsockopt. Not all opts are available, only those with
+ * integer optvals plus TCP_CONGESTION.
+ * Supported levels: SOL_SOCKET and IPROTO_TCP
+ * @bpf_socket: pointer to bpf_socket
+ * @level: SOL_SOCKET or IPROTO_TCP
+ * @optname: option name
+ * @optval: pointer to option value
+ * @optlen: length of optval in byes
+ * Return: 0 or negative error
+ *
+ * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
+ * Grow or shrink room in sk_buff.
+ * @skb: pointer to skb
+ * @len_diff: (signed) amount of room to grow/shrink
+ * @mode: operation mode (enum bpf_adj_room_mode)
+ * @flags: reserved for future use
+ * Return: 0 on success or negative error code
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -568,7 +589,9 @@ union bpf_attr {
FN(probe_read_str), \
FN(get_socket_cookie), \
FN(get_socket_uid), \
- FN(set_hash),
+ FN(set_hash), \
+ FN(setsockopt), \
+ FN(skb_adjust_room),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -618,6 +641,11 @@ enum bpf_func_id {
/* BPF_FUNC_perf_event_output for sk_buff input context. */
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
+/* Mode for BPF_FUNC_skb_adjust_room helper. */
+enum bpf_adj_room_mode {
+ BPF_ADJ_ROOM_NET,
+};
+
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
*/
@@ -720,4 +748,56 @@ struct bpf_map_info {
__u32 map_flags;
} __attribute__((aligned(8)));
+/* User bpf_sock_ops struct to access socket values and specify request ops
+ * and their replies.
+ * Some of this fields are in network (bigendian) byte order and may need
+ * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h).
+ * New fields can only be added at the end of this structure
+ */
+struct bpf_sock_ops {
+ __u32 op;
+ union {
+ __u32 reply;
+ __u32 replylong[4];
+ };
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
+};
+
+/* List of known BPF sock_ops operators.
+ * New entries can only be added at the end
+ */
+enum {
+ BPF_SOCK_OPS_VOID,
+ BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or
+ * -1 if default value should be used
+ */
+ BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized
+ * window (in packets) or -1 if default
+ * value should be used
+ */
+ BPF_SOCK_OPS_TCP_CONNECT_CB, /* Calls BPF program right before an
+ * active connection is initialized
+ */
+ BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, /* Calls BPF program when an
+ * active connection is
+ * established
+ */
+ BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, /* Calls BPF program when a
+ * passive connection is
+ * established
+ */
+ BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control
+ * needs ECN
+ */
+};
+
+#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
+#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index ced9d8b97426..6217ff8500a1 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -121,6 +121,7 @@ typedef __s32 sctp_assoc_t;
#define SCTP_RESET_STREAMS 119
#define SCTP_RESET_ASSOC 120
#define SCTP_ADD_STREAMS 121
+#define SCTP_SOCKOPT_PEELOFF_FLAGS 122
/* PR-SCTP policies */
#define SCTP_PR_SCTP_NONE 0x0000
@@ -978,6 +979,11 @@ typedef struct {
int sd;
} sctp_peeloff_arg_t;
+typedef struct {
+ sctp_peeloff_arg_t p_arg;
+ unsigned flags;
+} sctp_peeloff_flags_arg_t;
+
/*
* Peer Address Thresholds socket option
*/
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index ecb43542246e..d771a3872500 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -335,6 +335,26 @@ static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
}
/* only called from syscall */
+int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
+{
+ void **elem, *ptr;
+ int ret = 0;
+
+ if (!map->ops->map_fd_sys_lookup_elem)
+ return -ENOTSUPP;
+
+ rcu_read_lock();
+ elem = array_map_lookup_elem(map, key);
+ if (elem && (ptr = READ_ONCE(*elem)))
+ *value = map->ops->map_fd_sys_lookup_elem(ptr);
+ else
+ ret = -ENOENT;
+ rcu_read_unlock();
+
+ return ret;
+}
+
+/* only called from syscall */
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags)
{
@@ -400,6 +420,11 @@ static void prog_fd_array_put_ptr(void *ptr)
bpf_prog_put(ptr);
}
+static u32 prog_fd_array_sys_lookup_elem(void *ptr)
+{
+ return ((struct bpf_prog *)ptr)->aux->id;
+}
+
/* decrement refcnt of all bpf_progs that are stored in this map */
void bpf_fd_array_map_clear(struct bpf_map *map)
{
@@ -418,6 +443,7 @@ const struct bpf_map_ops prog_array_map_ops = {
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = prog_fd_array_get_ptr,
.map_fd_put_ptr = prog_fd_array_put_ptr,
+ .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
};
static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
@@ -585,4 +611,5 @@ const struct bpf_map_ops array_of_maps_map_ops = {
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = bpf_map_fd_get_ptr,
.map_fd_put_ptr = bpf_map_fd_put_ptr,
+ .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
};
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index ea6033cba947..546113430049 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -236,3 +236,40 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
return ret;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
+
+/**
+ * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
+ * @sk: socket to get cgroup from
+ * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
+ * sk with connection information (IP addresses, etc.) May not contain
+ * cgroup info if it is a req sock.
+ * @type: The type of program to be exectuted
+ *
+ * socket passed is expected to be of type INET or INET6.
+ *
+ * The program type passed in via @type must be suitable for sock_ops
+ * filtering. No further check is performed to assert that.
+ *
+ * This function will return %-EPERM if any if an attached program was found
+ * and if it returned != 1 during execution. In all other cases, 0 is returned.
+ */
+int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
+ struct bpf_sock_ops_kern *sock_ops,
+ enum bpf_attach_type type)
+{
+ struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+ struct bpf_prog *prog;
+ int ret = 0;
+
+
+ rcu_read_lock();
+
+ prog = rcu_dereference(cgrp->bpf.effective[type]);
+ if (prog)
+ ret = BPF_PROG_RUN(prog, sock_ops) == 1 ? 0 : -EPERM;
+
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 774069ca18a7..ad5f55922a13 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1297,7 +1297,9 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
*/
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
{
- fp->bpf_func = interpreters[round_down(fp->aux->stack_depth, 32) / 32];
+ u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
+
+ fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
/* eBPF JITs can rewrite the program in case constant
* blinding is active. However, in case of error during
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 004334ea13ba..4fb463172aa8 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1244,6 +1244,26 @@ static void fd_htab_map_free(struct bpf_map *map)
}
/* only called from syscall */
+int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
+{
+ void **ptr;
+ int ret = 0;
+
+ if (!map->ops->map_fd_sys_lookup_elem)
+ return -ENOTSUPP;
+
+ rcu_read_lock();
+ ptr = htab_map_lookup_elem(map, key);
+ if (ptr)
+ *value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr));
+ else
+ ret = -ENOENT;
+ rcu_read_unlock();
+
+ return ret;
+}
+
+/* only called from syscall */
int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags)
{
@@ -1305,4 +1325,5 @@ const struct bpf_map_ops htab_of_maps_map_ops = {
.map_delete_elem = htab_map_delete_elem,
.map_fd_get_ptr = bpf_map_fd_get_ptr,
.map_fd_put_ptr = bpf_map_fd_put_ptr,
+ .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
};
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 59bcdf821ae4..1da574612bea 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -95,3 +95,8 @@ void bpf_map_fd_put_ptr(void *ptr)
*/
bpf_map_put(ptr);
}
+
+u32 bpf_map_fd_sys_lookup_elem(void *ptr)
+{
+ return ((struct bpf_map *)ptr)->id;
+}
diff --git a/kernel/bpf/map_in_map.h b/kernel/bpf/map_in_map.h
index 177fadb689dc..6183db9ec08c 100644
--- a/kernel/bpf/map_in_map.h
+++ b/kernel/bpf/map_in_map.h
@@ -19,5 +19,6 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
void *bpf_map_fd_get_ptr(struct bpf_map *map, struct file *map_file,
int ufd);
void bpf_map_fd_put_ptr(void *ptr);
+u32 bpf_map_fd_sys_lookup_elem(void *ptr);
#endif
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8942c820d620..18980472f5b0 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -24,6 +24,13 @@
#include <linux/kernel.h>
#include <linux/idr.h>
+#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
+ (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
+ (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
+ (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
+#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
+#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map))
+
DEFINE_PER_CPU(int, bpf_prog_active);
static DEFINE_IDR(prog_idr);
static DEFINE_SPINLOCK(prog_idr_lock);
@@ -209,10 +216,12 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
const struct bpf_map *map = filp->private_data;
const struct bpf_array *array;
u32 owner_prog_type = 0;
+ u32 owner_jited = 0;
if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
array = container_of(map, struct bpf_array, map);
owner_prog_type = array->owner_prog_type;
+ owner_jited = array->owner_jited;
}
seq_printf(m,
@@ -229,9 +238,12 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
map->map_flags,
map->pages * 1ULL << PAGE_SHIFT);
- if (owner_prog_type)
+ if (owner_prog_type) {
seq_printf(m, "owner_prog_type:\t%u\n",
owner_prog_type);
+ seq_printf(m, "owner_jited:\t%u\n",
+ owner_jited);
+ }
}
#endif
@@ -411,6 +423,8 @@ static int map_lookup_elem(union bpf_attr *attr)
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
value_size = round_up(map->value_size, 8) * num_possible_cpus();
+ else if (IS_FD_MAP(map))
+ value_size = sizeof(u32);
else
value_size = map->value_size;
@@ -426,9 +440,10 @@ static int map_lookup_elem(union bpf_attr *attr)
err = bpf_percpu_array_copy(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
err = bpf_stackmap_copy(map, key, value);
- } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
- map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
- err = -ENOTSUPP;
+ } else if (IS_FD_ARRAY(map)) {
+ err = bpf_fd_array_map_lookup_elem(map, key, value);
+ } else if (IS_FD_HASH(map)) {
+ err = bpf_fd_htab_map_lookup_elem(map, key, value);
} else {
rcu_read_lock();
ptr = map->ops->map_lookup_elem(map, key);
@@ -1069,6 +1084,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_CGROUP_INET_SOCK_CREATE:
ptype = BPF_PROG_TYPE_CGROUP_SOCK;
break;
+ case BPF_CGROUP_SOCK_OPS:
+ ptype = BPF_PROG_TYPE_SOCK_OPS;
+ break;
default:
return -EINVAL;
}
@@ -1109,6 +1127,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
case BPF_CGROUP_INET_INGRESS:
case BPF_CGROUP_INET_EGRESS:
case BPF_CGROUP_INET_SOCK_CREATE:
+ case BPF_CGROUP_SOCK_OPS:
cgrp = cgroup_get_from_fd(attr->target_fd);
if (IS_ERR(cgrp))
return PTR_ERR(cgrp);
@@ -1123,6 +1142,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
return ret;
}
+
#endif /* CONFIG_CGROUP_BPF */
#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 74ea96ea391b..6a86723c5b64 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -546,20 +546,6 @@ static int check_reg_arg(struct bpf_reg_state *regs, u32 regno,
return 0;
}
-static int bpf_size_to_bytes(int bpf_size)
-{
- if (bpf_size == BPF_W)
- return 4;
- else if (bpf_size == BPF_H)
- return 2;
- else if (bpf_size == BPF_B)
- return 1;
- else if (bpf_size == BPF_DW)
- return 8;
- else
- return -EINVAL;
-}
-
static bool is_spillable_regtype(enum bpf_reg_type type)
{
switch (type) {
@@ -761,7 +747,9 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
enum bpf_access_type t, enum bpf_reg_type *reg_type)
{
- struct bpf_insn_access_aux info = { .reg_type = *reg_type };
+ struct bpf_insn_access_aux info = {
+ .reg_type = *reg_type,
+ };
/* for analyzer ctx accesses are already validated and converted */
if (env->analyzer_ops)
@@ -769,25 +757,14 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
if (env->prog->aux->ops->is_valid_access &&
env->prog->aux->ops->is_valid_access(off, size, t, &info)) {
- /* a non zero info.ctx_field_size indicates:
- * . For this field, the prog type specific ctx conversion algorithm
- * only supports whole field access.
- * . This ctx access is a candiate for later verifier transformation
- * to load the whole field and then apply a mask to get correct result.
- * a non zero info.converted_op_size indicates perceived actual converted
- * value width in convert_ctx_access.
+ /* A non zero info.ctx_field_size indicates that this field is a
+ * candidate for later verifier transformation to load the whole
+ * field and then apply a mask when accessed with a narrower
+ * access than actual ctx access size. A zero info.ctx_field_size
+ * will only allow for whole field access and rejects any other
+ * type of narrower access.
*/
- if ((info.ctx_field_size && !info.converted_op_size) ||
- (!info.ctx_field_size && info.converted_op_size)) {
- verbose("verifier bug in is_valid_access prog type=%u off=%d size=%d\n",
- env->prog->type, off, size);
- return -EACCES;
- }
-
- if (info.ctx_field_size) {
- env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
- env->insn_aux_data[insn_idx].converted_op_size = info.converted_op_size;
- }
+ env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
*reg_type = info.reg_type;
/* remember the offset of last byte accessed in ctx */
@@ -1016,6 +993,11 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
if (err)
return err;
+ if (is_pointer_value(env, insn->src_reg)) {
+ verbose("R%d leaks addr into mem\n", insn->src_reg);
+ return -EACCES;
+ }
+
/* check whether atomic_add can read the memory */
err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_READ, -1);
@@ -1675,6 +1657,65 @@ static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn)
return 0;
}
+static int evaluate_reg_imm_alu_unknown(struct bpf_verifier_env *env,
+ struct bpf_insn *insn)
+{
+ struct bpf_reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
+ struct bpf_reg_state *src_reg = &regs[insn->src_reg];
+ u8 opcode = BPF_OP(insn->code);
+ s64 imm_log2 = __ilog2_u64((long long)dst_reg->imm);
+
+ /* BPF_X code with src_reg->type UNKNOWN_VALUE here. */
+ if (src_reg->imm > 0 && dst_reg->imm) {
+ switch (opcode) {
+ case BPF_ADD:
+ /* dreg += sreg
+ * where both have zero upper bits. Adding them
+ * can only result making one more bit non-zero
+ * in the larger value.
+ * Ex. 0xffff (imm=48) + 1 (imm=63) = 0x10000 (imm=47)
+ * 0xffff (imm=48) + 0xffff = 0x1fffe (imm=47)
+ */
+ dst_reg->imm = min(src_reg->imm, 63 - imm_log2);
+ dst_reg->imm--;
+ break;
+ case BPF_AND:
+ /* dreg &= sreg
+ * AND can not extend zero bits only shrink
+ * Ex. 0x00..00ffffff
+ * & 0x0f..ffffffff
+ * ----------------
+ * 0x00..00ffffff
+ */
+ dst_reg->imm = max(src_reg->imm, 63 - imm_log2);
+ break;
+ case BPF_OR:
+ /* dreg |= sreg
+ * OR can only extend zero bits
+ * Ex. 0x00..00ffffff
+ * | 0x0f..ffffffff
+ * ----------------
+ * 0x0f..00ffffff
+ */
+ dst_reg->imm = min(src_reg->imm, 63 - imm_log2);
+ break;
+ case BPF_SUB:
+ case BPF_MUL:
+ case BPF_RSH:
+ case BPF_LSH:
+ /* These may be flushed out later */
+ default:
+ mark_reg_unknown_value(regs, insn->dst_reg);
+ }
+ } else {
+ mark_reg_unknown_value(regs, insn->dst_reg);
+ }
+
+ dst_reg->type = UNKNOWN_VALUE;
+ return 0;
+}
+
static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
struct bpf_insn *insn)
{
@@ -1684,6 +1725,9 @@ static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
u8 opcode = BPF_OP(insn->code);
u64 dst_imm = dst_reg->imm;
+ if (BPF_SRC(insn->code) == BPF_X && src_reg->type == UNKNOWN_VALUE)
+ return evaluate_reg_imm_alu_unknown(env, insn);
+
/* dst_reg->type == CONST_IMM here. Simulate execution of insns
* containing ALU ops. Don't care about overflow or negative
* values, just add/sub/... them; registers are in u64.
@@ -3396,11 +3440,13 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
static int convert_ctx_accesses(struct bpf_verifier_env *env)
{
const struct bpf_verifier_ops *ops = env->prog->aux->ops;
+ int i, cnt, size, ctx_field_size, delta = 0;
const int insn_cnt = env->prog->len;
struct bpf_insn insn_buf[16], *insn;
struct bpf_prog *new_prog;
enum bpf_access_type type;
- int i, cnt, off, size, ctx_field_size, converted_op_size, is_narrower_load, delta = 0;
+ bool is_narrower_load;
+ u32 target_size;
if (ops->gen_prologue) {
cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
@@ -3440,39 +3486,50 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
continue;
- off = insn->off;
- size = bpf_size_to_bytes(BPF_SIZE(insn->code));
ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
- converted_op_size = env->insn_aux_data[i + delta].converted_op_size;
- is_narrower_load = type == BPF_READ && size < ctx_field_size;
+ size = BPF_LDST_BYTES(insn);
/* If the read access is a narrower load of the field,
* convert to a 4/8-byte load, to minimum program type specific
* convert_ctx_access changes. If conversion is successful,
* we will apply proper mask to the result.
*/
+ is_narrower_load = size < ctx_field_size;
if (is_narrower_load) {
- int size_code = BPF_H;
+ u32 off = insn->off;
+ u8 size_code;
+ if (type == BPF_WRITE) {
+ verbose("bpf verifier narrow ctx access misconfigured\n");
+ return -EINVAL;
+ }
+
+ size_code = BPF_H;
if (ctx_field_size == 4)
size_code = BPF_W;
else if (ctx_field_size == 8)
size_code = BPF_DW;
+
insn->off = off & ~(ctx_field_size - 1);
insn->code = BPF_LDX | BPF_MEM | size_code;
}
- cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog);
- if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+
+ target_size = 0;
+ cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog,
+ &target_size);
+ if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
+ (ctx_field_size && !target_size)) {
verbose("bpf verifier is misconfigured\n");
return -EINVAL;
}
- if (is_narrower_load && size < converted_op_size) {
+
+ if (is_narrower_load && size < target_size) {
if (ctx_field_size <= 4)
insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
- (1 << size * 8) - 1);
+ (1 << size * 8) - 1);
else
insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
- (1 << size * 8) - 1);
+ (1 << size * 8) - 1);
}
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 2831480c63a2..ee97196bb151 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -580,7 +580,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
int ret = -ENOMEM, max_order = 0;
if (!has_aux(event))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
if (event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) {
/*
diff --git a/kernel/signal.c b/kernel/signal.c
index ca92bcfeb322..45b4c1ffe14e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -510,7 +510,8 @@ int unhandled_signal(struct task_struct *tsk, int sig)
return !tsk->ptrace;
}
-static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
+static void collect_signal(int sig, struct sigpending *list, siginfo_t *info,
+ bool *resched_timer)
{
struct sigqueue *q, *first = NULL;
@@ -532,6 +533,12 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
still_pending:
list_del_init(&first->list);
copy_siginfo(info, &first->info);
+
+ *resched_timer =
+ (first->flags & SIGQUEUE_PREALLOC) &&
+ (info->si_code == SI_TIMER) &&
+ (info->si_sys_private);
+
__sigqueue_free(first);
} else {
/*
@@ -548,12 +555,12 @@ still_pending:
}
static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
- siginfo_t *info)
+ siginfo_t *info, bool *resched_timer)
{
int sig = next_signal(pending, mask);
if (sig)
- collect_signal(sig, pending, info);
+ collect_signal(sig, pending, info, resched_timer);
return sig;
}
@@ -565,15 +572,16 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
*/
int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
{
+ bool resched_timer = false;
int signr;
/* We only dequeue private signals from ourselves, we don't let
* signalfd steal them
*/
- signr = __dequeue_signal(&tsk->pending, mask, info);
+ signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer);
if (!signr) {
signr = __dequeue_signal(&tsk->signal->shared_pending,
- mask, info);
+ mask, info, &resched_timer);
#ifdef CONFIG_POSIX_TIMERS
/*
* itimer signal ?
@@ -621,7 +629,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
current->jobctl |= JOBCTL_STOP_DEQUEUED;
}
#ifdef CONFIG_POSIX_TIMERS
- if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
+ if (resched_timer) {
/*
* Release the siglock to ensure proper locking order
* of timer locks outside of siglocks. Note, we leave
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 9652bc57fd09..b602c48cb841 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -118,6 +118,26 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
tk->offs_boot = ktime_add(tk->offs_boot, delta);
}
+/*
+ * tk_clock_read - atomic clocksource read() helper
+ *
+ * This helper is necessary to use in the read paths because, while the
+ * seqlock ensures we don't return a bad value while structures are updated,
+ * it doesn't protect from potential crashes. There is the possibility that
+ * the tkr's clocksource may change between the read reference, and the
+ * clock reference passed to the read function. This can cause crashes if
+ * the wrong clocksource is passed to the wrong read function.
+ * This isn't necessary to use when holding the timekeeper_lock or doing
+ * a read of the fast-timekeeper tkrs (which is protected by its own locking
+ * and update logic).
+ */
+static inline u64 tk_clock_read(struct tk_read_base *tkr)
+{
+ struct clocksource *clock = READ_ONCE(tkr->clock);
+
+ return clock->read(clock);
+}
+
#ifdef CONFIG_DEBUG_TIMEKEEPING
#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
@@ -175,7 +195,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
*/
do {
seq = read_seqcount_begin(&tk_core.seq);
- now = tkr->read(tkr->clock);
+ now = tk_clock_read(tkr);
last = tkr->cycle_last;
mask = tkr->mask;
max = tkr->clock->max_cycles;
@@ -209,7 +229,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
u64 cycle_now, delta;
/* read clocksource */
- cycle_now = tkr->read(tkr->clock);
+ cycle_now = tk_clock_read(tkr);
/* calculate the delta since the last update_wall_time */
delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
@@ -238,12 +258,10 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
++tk->cs_was_changed_seq;
old_clock = tk->tkr_mono.clock;
tk->tkr_mono.clock = clock;
- tk->tkr_mono.read = clock->read;
tk->tkr_mono.mask = clock->mask;
- tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
+ tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);
tk->tkr_raw.clock = clock;
- tk->tkr_raw.read = clock->read;
tk->tkr_raw.mask = clock->mask;
tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
@@ -262,7 +280,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
/* Go back from cycles -> shifted ns */
tk->xtime_interval = interval * clock->mult;
tk->xtime_remainder = ntpinterval - tk->xtime_interval;
- tk->raw_interval = (interval * clock->mult) >> clock->shift;
+ tk->raw_interval = interval * clock->mult;
/* if changing clocks, convert xtime_nsec shift units */
if (old_clock) {
@@ -404,7 +422,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
now += timekeeping_delta_to_ns(tkr,
clocksource_delta(
- tkr->read(tkr->clock),
+ tk_clock_read(tkr),
tkr->cycle_last,
tkr->mask));
} while (read_seqcount_retry(&tkf->seq, seq));
@@ -461,6 +479,10 @@ static u64 dummy_clock_read(struct clocksource *cs)
return cycles_at_suspend;
}
+static struct clocksource dummy_clock = {
+ .read = dummy_clock_read,
+};
+
/**
* halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
* @tk: Timekeeper to snapshot.
@@ -477,13 +499,13 @@ static void halt_fast_timekeeper(struct timekeeper *tk)
struct tk_read_base *tkr = &tk->tkr_mono;
memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
- cycles_at_suspend = tkr->read(tkr->clock);
- tkr_dummy.read = dummy_clock_read;
+ cycles_at_suspend = tk_clock_read(tkr);
+ tkr_dummy.clock = &dummy_clock;
update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
tkr = &tk->tkr_raw;
memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
- tkr_dummy.read = dummy_clock_read;
+ tkr_dummy.clock = &dummy_clock;
update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
}
@@ -649,11 +671,10 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
*/
static void timekeeping_forward_now(struct timekeeper *tk)
{
- struct clocksource *clock = tk->tkr_mono.clock;
u64 cycle_now, delta;
u64 nsec;
- cycle_now = tk->tkr_mono.read(clock);
+ cycle_now = tk_clock_read(&tk->tkr_mono);
delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
tk->tkr_mono.cycle_last = cycle_now;
tk->tkr_raw.cycle_last = cycle_now;
@@ -929,8 +950,7 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
do {
seq = read_seqcount_begin(&tk_core.seq);
-
- now = tk->tkr_mono.read(tk->tkr_mono.clock);
+ now = tk_clock_read(&tk->tkr_mono);
systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
base_real = ktime_add(tk->tkr_mono.base,
@@ -1108,7 +1128,7 @@ int get_device_system_crosststamp(int (*get_time_fn)
* Check whether the system counter value provided by the
* device driver is on the current timekeeping interval.
*/
- now = tk->tkr_mono.read(tk->tkr_mono.clock);
+ now = tk_clock_read(&tk->tkr_mono);
interval_start = tk->tkr_mono.cycle_last;
if (!cycle_between(interval_start, cycles, now)) {
clock_was_set_seq = tk->clock_was_set_seq;
@@ -1629,7 +1649,7 @@ void timekeeping_resume(void)
* The less preferred source will only be tried if there is no better
* usable source. The rtc part is handled separately in rtc core code.
*/
- cycle_now = tk->tkr_mono.read(clock);
+ cycle_now = tk_clock_read(&tk->tkr_mono);
if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
cycle_now > tk->tkr_mono.cycle_last) {
u64 nsec, cyc_delta;
@@ -1976,7 +1996,7 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
u32 shift, unsigned int *clock_set)
{
u64 interval = tk->cycle_interval << shift;
- u64 raw_nsecs;
+ u64 snsec_per_sec;
/* If the offset is smaller than a shifted interval, do nothing */
if (offset < interval)
@@ -1991,14 +2011,15 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
*clock_set |= accumulate_nsecs_to_secs(tk);
/* Accumulate raw time */
- raw_nsecs = (u64)tk->raw_interval << shift;
- raw_nsecs += tk->raw_time.tv_nsec;
- if (raw_nsecs >= NSEC_PER_SEC) {
- u64 raw_secs = raw_nsecs;
- raw_nsecs = do_div(raw_secs, NSEC_PER_SEC);
- tk->raw_time.tv_sec += raw_secs;
+ tk->tkr_raw.xtime_nsec += (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
+ tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
+ snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
+ while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
+ tk->tkr_raw.xtime_nsec -= snsec_per_sec;
+ tk->raw_time.tv_sec++;
}
- tk->raw_time.tv_nsec = raw_nsecs;
+ tk->raw_time.tv_nsec = tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift;
+ tk->tkr_raw.xtime_nsec -= (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
/* Accumulate error between NTP and clock interval */
tk->ntp_error += tk->ntp_tick << shift;
@@ -2030,7 +2051,7 @@ void update_wall_time(void)
#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
offset = real_tk->cycle_interval;
#else
- offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
+ offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
#endif
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 97c46b440cd6..37385193a608 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -122,8 +122,8 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
}
/*
- * limited trace_printk()
- * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed
+ * Only limited trace_printk() conversion specifiers allowed:
+ * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s
*/
BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
u64, arg2, u64, arg3)
@@ -198,7 +198,8 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
i++;
}
- if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x')
+ if (fmt[i] != 'i' && fmt[i] != 'd' &&
+ fmt[i] != 'u' && fmt[i] != 'x')
return -EINVAL;
fmt_cnt++;
}
@@ -583,7 +584,8 @@ const struct bpf_verifier_ops tracepoint_prog_ops = {
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
- int sample_period_off;
+ const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data,
+ sample_period);
if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
return false;
@@ -592,43 +594,35 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type
if (off % size != 0)
return false;
- /* permit 1, 2, 4 byte narrower and 8 normal read access to sample_period */
- sample_period_off = offsetof(struct bpf_perf_event_data, sample_period);
- if (off >= sample_period_off && off < sample_period_off + sizeof(__u64)) {
- int allowed;
-
-#ifdef __LITTLE_ENDIAN
- allowed = (off & 0x7) == 0 && size <= 8 && (size & (size - 1)) == 0;
-#else
- allowed = ((off & 0x7) + size) == 8 && size <= 8 && (size & (size - 1)) == 0;
-#endif
- if (!allowed)
+ switch (off) {
+ case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
+ bpf_ctx_record_field_size(info, size_sp);
+ if (!bpf_ctx_narrow_access_ok(off, size, size_sp))
return false;
- info->ctx_field_size = 8;
- info->converted_op_size = 8;
- } else {
+ break;
+ default:
if (size != sizeof(long))
return false;
}
+
return true;
}
static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
- struct bpf_prog *prog)
+ struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
switch (si->off) {
case offsetof(struct bpf_perf_event_data, sample_period):
- BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64));
-
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
data), si->dst_reg, si->src_reg,
offsetof(struct bpf_perf_event_data_kern, data));
*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
- offsetof(struct perf_sample_data, period));
+ bpf_target_off(struct perf_sample_data, period, 8,
+ target_size));
break;
default:
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
diff --git a/lib/cmdline.c b/lib/cmdline.c
index 3c6432df7e63..4c0888c4a68d 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -23,14 +23,14 @@
* the values[M, M+1, ..., N] into the ints array in get_options.
*/
-static int get_range(char **str, int *pint)
+static int get_range(char **str, int *pint, int n)
{
int x, inc_counter, upper_range;
(*str)++;
upper_range = simple_strtol((*str), NULL, 0);
inc_counter = upper_range - *pint;
- for (x = *pint; x < upper_range; x++)
+ for (x = *pint; n && x < upper_range; x++, n--)
*pint++ = x;
return inc_counter;
}
@@ -97,7 +97,7 @@ char *get_options(const char *str, int nints, int *ints)
break;
if (res == 3) {
int range_nums;
- range_nums = get_range((char **)&str, ints + i);
+ range_nums = get_range((char **)&str, ints + i, nints - i);
if (range_nums < 0)
break;
/*
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 945fd1ca49b5..df4ebdb2b10a 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -652,7 +652,6 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
spin_unlock(ptl);
free_page_and_swap_cache(src_page);
}
- cond_resched();
}
}
diff --git a/mm/slub.c b/mm/slub.c
index 7449593fca72..8addc535bcdc 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -5625,6 +5625,28 @@ static char *create_unique_id(struct kmem_cache *s)
return name;
}
+static void sysfs_slab_remove_workfn(struct work_struct *work)
+{
+ struct kmem_cache *s =
+ container_of(work, struct kmem_cache, kobj_remove_work);
+
+ if (!s->kobj.state_in_sysfs)
+ /*
+ * For a memcg cache, this may be called during
+ * deactivation and again on shutdown. Remove only once.
+ * A cache is never shut down before deactivation is
+ * complete, so no need to worry about synchronization.
+ */
+ return;
+
+#ifdef CONFIG_MEMCG
+ kset_unregister(s->memcg_kset);
+#endif
+ kobject_uevent(&s->kobj, KOBJ_REMOVE);
+ kobject_del(&s->kobj);
+ kobject_put(&s->kobj);
+}
+
static int sysfs_slab_add(struct kmem_cache *s)
{
int err;
@@ -5632,6 +5654,8 @@ static int sysfs_slab_add(struct kmem_cache *s)
struct kset *kset = cache_kset(s);
int unmergeable = slab_unmergeable(s);
+ INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
+
if (!kset) {
kobject_init(&s->kobj, &slab_ktype);
return 0;
@@ -5695,20 +5719,8 @@ static void sysfs_slab_remove(struct kmem_cache *s)
*/
return;
- if (!s->kobj.state_in_sysfs)
- /*
- * For a memcg cache, this may be called during
- * deactivation and again on shutdown. Remove only once.
- * A cache is never shut down before deactivation is
- * complete, so no need to worry about synchronization.
- */
- return;
-
-#ifdef CONFIG_MEMCG
- kset_unregister(s->memcg_kset);
-#endif
- kobject_uevent(&s->kobj, KOBJ_REMOVE);
- kobject_del(&s->kobj);
+ kobject_get(&s->kobj);
+ schedule_work(&s->kobj_remove_work);
}
void sysfs_slab_release(struct kmem_cache *s)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 34a1c3e46ed7..ecc97f74ab18 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -287,10 +287,21 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
if (p4d_none(*p4d))
return NULL;
pud = pud_offset(p4d, addr);
- if (pud_none(*pud))
+
+ /*
+ * Don't dereference bad PUD or PMD (below) entries. This will also
+ * identify huge mappings, which we may encounter on architectures
+ * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be
+ * identified as vmalloc addresses by is_vmalloc_addr(), but are
+ * not [unambiguously] associated with a struct page, so there is
+ * no correct value to return for them.
+ */
+ WARN_ON_ONCE(pud_bad(*pud));
+ if (pud_none(*pud) || pud_bad(*pud))
return NULL;
pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd))
+ WARN_ON_ONCE(pmd_bad(*pmd));
+ if (pmd_none(*pmd) || pmd_bad(*pmd))
return NULL;
ptep = pte_offset_map(pmd, addr);
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index fca84e111c89..4e111196f902 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -252,7 +252,7 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev,
ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc;
pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev);
- atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
+ refcount_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
ATM_SKB(skb)->atm_options = atmvcc->atm_options;
dev->stats.tx_packets++;
dev->stats.tx_bytes += skb->len;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index a7e4018370b4..f271a7bcf5b2 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -137,11 +137,11 @@ static int neigh_check_cb(struct neighbour *n)
if (entry->vccs || time_before(jiffies, entry->expires))
return 0;
- if (atomic_read(&n->refcnt) > 1) {
+ if (refcount_read(&n->refcnt) > 1) {
struct sk_buff *skb;
pr_debug("destruction postponed with ref %d\n",
- atomic_read(&n->refcnt));
+ refcount_read(&n->refcnt));
while ((skb = skb_dequeue(&n->arp_queue)) != NULL)
dev_kfree_skb(skb);
@@ -381,7 +381,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
memcpy(here, llc_oui, sizeof(llc_oui));
((__be16 *) here)[3] = skb->protocol;
}
- atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+ refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
ATM_SKB(skb)->atm_options = vcc->atm_options;
entry->vccs->last_use = jiffies;
pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev);
@@ -767,7 +767,7 @@ static void atmarp_info(struct seq_file *seq, struct neighbour *n,
seq_printf(seq, "(resolving)\n");
else
seq_printf(seq, "(expired, ref %d)\n",
- atomic_read(&entry->neigh->refcnt));
+ refcount_read(&entry->neigh->refcnt));
} else if (!svc) {
seq_printf(seq, "%d.%d.%d\n",
clip_vcc->vcc->dev->number,
diff --git a/net/atm/common.c b/net/atm/common.c
index f06422f4108d..8a4f99114cd2 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -80,9 +80,9 @@ static void vcc_sock_destruct(struct sock *sk)
printk(KERN_DEBUG "%s: rmem leakage (%d bytes) detected.\n",
__func__, atomic_read(&sk->sk_rmem_alloc));
- if (atomic_read(&sk->sk_wmem_alloc))
+ if (refcount_read(&sk->sk_wmem_alloc))
printk(KERN_DEBUG "%s: wmem leakage (%d bytes) detected.\n",
- __func__, atomic_read(&sk->sk_wmem_alloc));
+ __func__, refcount_read(&sk->sk_wmem_alloc));
}
static void vcc_def_wakeup(struct sock *sk)
@@ -101,7 +101,7 @@ static inline int vcc_writable(struct sock *sk)
struct atm_vcc *vcc = atm_sk(sk);
return (vcc->qos.txtp.max_sdu +
- atomic_read(&sk->sk_wmem_alloc)) <= sk->sk_sndbuf;
+ refcount_read(&sk->sk_wmem_alloc)) <= sk->sk_sndbuf;
}
static void vcc_write_space(struct sock *sk)
@@ -156,7 +156,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family, i
memset(&vcc->local, 0, sizeof(struct sockaddr_atmsvc));
memset(&vcc->remote, 0, sizeof(struct sockaddr_atmsvc));
vcc->qos.txtp.max_sdu = 1 << 16; /* for meta VCs */
- atomic_set(&sk->sk_wmem_alloc, 1);
+ refcount_set(&sk->sk_wmem_alloc, 1);
atomic_set(&sk->sk_rmem_alloc, 0);
vcc->push = NULL;
vcc->pop = NULL;
@@ -630,7 +630,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
goto out;
}
pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize);
- atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+ refcount_add(skb->truesize, &sk->sk_wmem_alloc);
skb->dev = NULL; /* for paths shared with net_device interfaces */
ATM_SKB(skb)->atm_options = vcc->atm_options;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 09cfe87f0a44..75545717fa46 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -181,7 +181,7 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb)
ATM_SKB(skb)->vcc = vcc;
ATM_SKB(skb)->atm_options = vcc->atm_options;
- atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+ refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
if (vcc->send(vcc, skb) < 0) {
dev->stats.tx_dropped++;
return;
@@ -345,7 +345,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
int i;
char *tmp; /* FIXME */
- atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+ WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc));
mesg = (struct atmlec_msg *)skb->data;
tmp = skb->data;
tmp += sizeof(struct atmlec_msg);
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index a190800572bd..680a4b9095a1 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -555,7 +555,7 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc)
sizeof(struct llc_snap_hdr));
}
- atomic_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc);
+ refcount_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc);
ATM_SKB(skb)->atm_options = entry->shortcut->atm_options;
entry->shortcut->send(entry->shortcut, skb);
entry->packets_fwded++;
@@ -911,7 +911,7 @@ static int msg_from_mpoad(struct atm_vcc *vcc, struct sk_buff *skb)
struct mpoa_client *mpc = find_mpc_by_vcc(vcc);
struct k_message *mesg = (struct k_message *)skb->data;
- atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+ WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc));
if (mpc == NULL) {
pr_info("no mpc found\n");
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index c4e09846d1de..21d9d341a619 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -350,7 +350,7 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
return 1;
}
- atomic_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc);
+ refcount_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc);
ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options;
pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n",
skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev);
diff --git a/net/atm/proc.c b/net/atm/proc.c
index bbb6461a4b7f..27c9c01c537d 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -211,7 +211,7 @@ static void vcc_info(struct seq_file *seq, struct atm_vcc *vcc)
vcc->flags, sk->sk_err,
sk_wmem_alloc_get(sk), sk->sk_sndbuf,
sk_rmem_alloc_get(sk), sk->sk_rcvbuf,
- atomic_read(&sk->sk_refcnt));
+ refcount_read(&sk->sk_refcnt));
}
static void svc_info(struct seq_file *seq, struct atm_vcc *vcc)
diff --git a/net/atm/raw.c b/net/atm/raw.c
index 2e17e97a7a8b..821c0797553d 100644
--- a/net/atm/raw.c
+++ b/net/atm/raw.c
@@ -35,7 +35,7 @@ static void atm_pop_raw(struct atm_vcc *vcc, struct sk_buff *skb)
pr_debug("(%d) %d -= %d\n",
vcc->vci, sk_wmem_alloc_get(sk), skb->truesize);
- atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+ WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc));
dev_kfree_skb_any(skb);
sk->sk_write_space(sk);
}
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index f640a99e14b8..983c3a21a133 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -67,7 +67,7 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb)
struct sock *sk;
msg = (struct atmsvc_msg *) skb->data;
- atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+ WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc));
vcc = *(struct atm_vcc **) &msg->vcc;
pr_debug("%d (0x%lx)\n", (int)msg->type, (unsigned long)vcc);
sk = sk_atm(vcc);
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 8a8f77a247e6..91e3ba280706 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -657,7 +657,7 @@ static int bt_seq_show(struct seq_file *seq, void *v)
seq_printf(seq,
"%pK %-6d %-6u %-6u %-6u %-6lu %-6lu",
sk,
- atomic_read(&sk->sk_refcnt),
+ refcount_read(&sk->sk_refcnt),
sk_rmem_alloc_get(sk),
sk_wmem_alloc_get(sk),
from_kuid(seq_user_ns(seq), sock_i_uid(sk)),
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 9a40013da915..7b3965861013 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -481,16 +481,16 @@ static int bnep_session(void *arg)
struct net_device *dev = s->dev;
struct sock *sk = s->sock->sk;
struct sk_buff *skb;
- wait_queue_t wait;
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
BT_DBG("");
set_user_nice(current, -15);
- init_waitqueue_entry(&wait, current);
add_wait_queue(sk_sleep(sk), &wait);
while (1) {
- set_current_state(TASK_INTERRUPTIBLE);
+ /* Ensure session->terminate is updated */
+ smp_mb__before_atomic();
if (atomic_read(&s->terminate))
break;
@@ -512,9 +512,8 @@ static int bnep_session(void *arg)
break;
netif_wake_queue(dev);
- schedule();
+ wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
- __set_current_state(TASK_RUNNING);
remove_wait_queue(sk_sleep(sk), &wait);
/* Cleanup session */
@@ -663,7 +662,7 @@ int bnep_del_connection(struct bnep_conndel_req *req)
s = __bnep_get_session(req->dst);
if (s) {
atomic_inc(&s->terminate);
- wake_up_process(s->task);
+ wake_up_interruptible(sk_sleep(s->sock->sk));
} else
err = -ENOENT;
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index f4c64ef01c24..7f26a5a19ff6 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -280,16 +280,16 @@ static int cmtp_session(void *arg)
struct cmtp_session *session = arg;
struct sock *sk = session->sock->sk;
struct sk_buff *skb;
- wait_queue_t wait;
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
BT_DBG("session %p", session);
set_user_nice(current, -15);
- init_waitqueue_entry(&wait, current);
add_wait_queue(sk_sleep(sk), &wait);
while (1) {
- set_current_state(TASK_INTERRUPTIBLE);
+ /* Ensure session->terminate is updated */
+ smp_mb__before_atomic();
if (atomic_read(&session->terminate))
break;
@@ -306,9 +306,8 @@ static int cmtp_session(void *arg)
cmtp_process_transmit(session);
- schedule();
+ wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
- __set_current_state(TASK_RUNNING);
remove_wait_queue(sk_sleep(sk), &wait);
down_write(&cmtp_session_sem);
@@ -393,7 +392,7 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
err = cmtp_attach_device(session);
if (err < 0) {
atomic_inc(&session->terminate);
- wake_up_process(session->task);
+ wake_up_interruptible(sk_sleep(session->sock->sk));
up_write(&cmtp_session_sem);
return err;
}
@@ -431,7 +430,11 @@ int cmtp_del_connection(struct cmtp_conndel_req *req)
/* Stop session thread */
atomic_inc(&session->terminate);
- wake_up_process(session->task);
+
+ /* Ensure session->terminate is updated */
+ smp_mb__after_atomic();
+
+ wake_up_interruptible(sk_sleep(session->sock->sk));
} else
err = -ENOENT;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index d860e3cc23cf..6bc679cd3481 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3096,15 +3096,14 @@ int hci_register_dev(struct hci_dev *hdev)
BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
- hdev->workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND |
- WQ_MEM_RECLAIM, 1, hdev->name);
+ hdev->workqueue = alloc_ordered_workqueue("%s", WQ_HIGHPRI, hdev->name);
if (!hdev->workqueue) {
error = -ENOMEM;
goto err;
}
- hdev->req_workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND |
- WQ_MEM_RECLAIM, 1, hdev->name);
+ hdev->req_workqueue = alloc_ordered_workqueue("%s", WQ_HIGHPRI,
+ hdev->name);
if (!hdev->req_workqueue) {
destroy_workqueue(hdev->workqueue);
error = -ENOMEM;
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 961f7f53e178..472b3907b1b0 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -36,6 +36,7 @@
#define VERSION "1.2"
static DECLARE_RWSEM(hidp_session_sem);
+static DECLARE_WAIT_QUEUE_HEAD(hidp_session_wq);
static LIST_HEAD(hidp_session_list);
static unsigned char hidp_keycode[256] = {
@@ -1068,12 +1069,12 @@ static int hidp_session_start_sync(struct hidp_session *session)
* Wake up session thread and notify it to stop. This is asynchronous and
* returns immediately. Call this whenever a runtime error occurs and you want
* the session to stop.
- * Note: wake_up_process() performs any necessary memory-barriers for us.
+ * Note: wake_up_interruptible() performs any necessary memory-barriers for us.
*/
static void hidp_session_terminate(struct hidp_session *session)
{
atomic_inc(&session->terminate);
- wake_up_process(session->task);
+ wake_up_interruptible(&hidp_session_wq);
}
/*
@@ -1180,7 +1181,9 @@ static void hidp_session_run(struct hidp_session *session)
struct sock *ctrl_sk = session->ctrl_sock->sk;
struct sock *intr_sk = session->intr_sock->sk;
struct sk_buff *skb;
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ add_wait_queue(&hidp_session_wq, &wait);
for (;;) {
/*
* This thread can be woken up two ways:
@@ -1188,12 +1191,10 @@ static void hidp_session_run(struct hidp_session *session)
* session->terminate flag and wakes this thread up.
* - Via modifying the socket state of ctrl/intr_sock. This
* thread is woken up by ->sk_state_changed().
- *
- * Note: set_current_state() performs any necessary
- * memory-barriers for us.
*/
- set_current_state(TASK_INTERRUPTIBLE);
+ /* Ensure session->terminate is updated */
+ smp_mb__before_atomic();
if (atomic_read(&session->terminate))
break;
@@ -1227,11 +1228,22 @@ static void hidp_session_run(struct hidp_session *session)
hidp_process_transmit(session, &session->ctrl_transmit,
session->ctrl_sock);
- schedule();
+ wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
+ remove_wait_queue(&hidp_session_wq, &wait);
atomic_inc(&session->terminate);
- set_current_state(TASK_RUNNING);
+
+ /* Ensure session->terminate is updated */
+ smp_mb__after_atomic();
+}
+
+static int hidp_session_wake_function(wait_queue_t *wait,
+ unsigned int mode,
+ int sync, void *key)
+{
+ wake_up_interruptible(&hidp_session_wq);
+ return false;
}
/*
@@ -1244,7 +1256,8 @@ static void hidp_session_run(struct hidp_session *session)
static int hidp_session_thread(void *arg)
{
struct hidp_session *session = arg;
- wait_queue_t ctrl_wait, intr_wait;
+ DEFINE_WAIT_FUNC(ctrl_wait, hidp_session_wake_function);
+ DEFINE_WAIT_FUNC(intr_wait, hidp_session_wake_function);
BT_DBG("session %p", session);
@@ -1254,8 +1267,6 @@ static int hidp_session_thread(void *arg)
set_user_nice(current, -15);
hidp_set_timer(session);
- init_waitqueue_entry(&ctrl_wait, current);
- init_waitqueue_entry(&intr_wait, current);
add_wait_queue(sk_sleep(session->ctrl_sock->sk), &ctrl_wait);
add_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait);
/* This memory barrier is paired with wq_has_sleeper(). See
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 507b80d59dec..67a8642f57ea 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -87,7 +87,8 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
BT_DBG("sk %p", sk);
- if (!addr || addr->sa_family != AF_BLUETOOTH)
+ if (!addr || alen < offsetofend(struct sockaddr, sa_family) ||
+ addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
memset(&la, 0, sizeof(la));
@@ -181,7 +182,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr,
BT_DBG("sk %p", sk);
- if (!addr || alen < sizeof(addr->sa_family) ||
+ if (!addr || alen < offsetofend(struct sockaddr, sa_family) ||
addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index ac3c650cb234..1aaccf637479 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -197,7 +197,7 @@ static void rfcomm_sock_kill(struct sock *sk)
if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
return;
- BT_DBG("sk %p state %d refcnt %d", sk, sk->sk_state, atomic_read(&sk->sk_refcnt));
+ BT_DBG("sk %p state %d refcnt %d", sk, sk->sk_state, refcount_read(&sk->sk_refcnt));
/* Kill poor orphan */
bt_sock_unlink(&rfcomm_sk_list, sk);
@@ -339,7 +339,8 @@ static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr
struct sock *sk = sock->sk;
int len, err = 0;
- if (!addr || addr->sa_family != AF_BLUETOOTH)
+ if (!addr || addr_len < offsetofend(struct sockaddr, sa_family) ||
+ addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
memset(&sa, 0, sizeof(sa));
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 728e0c8dc8e7..795e920a3281 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -524,10 +524,8 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr,
BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr);
- if (!addr || addr->sa_family != AF_BLUETOOTH)
- return -EINVAL;
-
- if (addr_len < sizeof(struct sockaddr_sco))
+ if (!addr || addr_len < sizeof(struct sockaddr_sco) ||
+ addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
lock_sock(sk);
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 067cf0313449..2261e5194c82 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -149,12 +149,12 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge = skb->nf_bridge;
- if (atomic_read(&nf_bridge->use) > 1) {
+ if (refcount_read(&nf_bridge->use) > 1) {
struct nf_bridge_info *tmp = nf_bridge_alloc(skb);
if (tmp) {
memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info));
- atomic_set(&tmp->use, 1);
+ refcount_set(&tmp->use, 1);
}
nf_bridge_put(nf_bridge);
nf_bridge = tmp;
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 0b5dd607444c..723f25eed8ea 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -865,7 +865,7 @@ static struct attribute *bridge_attrs[] = {
NULL
};
-static struct attribute_group bridge_group = {
+static const struct attribute_group bridge_group = {
.name = SYSFS_BRIDGE_ATTR,
.attrs = bridge_attrs,
};
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index e0bb624c3845..dfc86a0199da 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -61,7 +61,7 @@ static int ebt_dnat_tg_check(const struct xt_tgchk_param *par)
(strcmp(par->table, "broute") != 0 ||
hook_mask & ~(1 << NF_BR_BROUTING)))
return -EINVAL;
- if (INVALID_TARGET)
+ if (ebt_invalid_target(info->target))
return -EINVAL;
return 0;
}
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 66697cbd0a8b..19f0f9592d32 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -44,7 +44,7 @@ static int ebt_mark_tg_check(const struct xt_tgchk_param *par)
tmp = info->target | ~EBT_VERDICT_BITS;
if (BASE_CHAIN && tmp == EBT_RETURN)
return -EINVAL;
- if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
+ if (ebt_invalid_target(tmp))
return -EINVAL;
tmp = info->target & ~EBT_VERDICT_BITS;
if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE &&
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 8d2a85e0594e..a7223eaf490b 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -47,7 +47,7 @@ static int ebt_redirect_tg_check(const struct xt_tgchk_param *par)
(strcmp(par->table, "broute") != 0 ||
hook_mask & ~(1 << NF_BR_BROUTING)))
return -EINVAL;
- if (INVALID_TARGET)
+ if (ebt_invalid_target(info->target))
return -EINVAL;
return 0;
}
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index e56ccd060d26..11cf9e9e9222 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -51,7 +51,7 @@ static int ebt_snat_tg_check(const struct xt_tgchk_param *par)
if (BASE_CHAIN && tmp == EBT_RETURN)
return -EINVAL;
- if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
+ if (ebt_invalid_target(tmp))
return -EINVAL;
tmp = info->target | EBT_VERDICT_BITS;
if ((tmp & ~NAT_ARP_BIT) != ~NAT_ARP_BIT)
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 7506b853a84d..632d5a416d97 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1013,7 +1013,7 @@ static const struct proto_ops caif_stream_ops = {
static void caif_sock_destructor(struct sock *sk)
{
struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
- caif_assert(!atomic_read(&sk->sk_wmem_alloc));
+ caif_assert(!refcount_read(&sk->sk_wmem_alloc));
caif_assert(sk_unhashed(sk));
caif_assert(!sk->sk_socket);
if (!sock_flag(sk, SOCK_DEAD)) {
diff --git a/net/core/datagram.c b/net/core/datagram.c
index e5311a7c70da..454ec8923333 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -188,7 +188,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
}
}
*peeked = 1;
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
} else {
__skb_unlink(skb, queue);
if (destructor)
@@ -358,7 +358,7 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
spin_lock_bh(&sk_queue->lock);
if (skb == skb_peek(sk_queue)) {
__skb_unlink(skb, sk_queue);
- atomic_dec(&skb->users);
+ refcount_dec(&skb->users);
if (destructor)
destructor(sk, skb);
err = 0;
@@ -614,7 +614,7 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
skb->data_len += copied;
skb->len += copied;
skb->truesize += truesize;
- atomic_add(truesize, &skb->sk->sk_wmem_alloc);
+ refcount_add(truesize, &skb->sk->sk_wmem_alloc);
while (copied) {
int size = min_t(int, copied, PAGE_SIZE - start);
skb_fill_page_desc(skb, frag++, pages[n], start, size);
diff --git a/net/core/dev.c b/net/core/dev.c
index a91572aa73d5..7098fba52be1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1862,7 +1862,7 @@ static inline int deliver_skb(struct sk_buff *skb,
{
if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
return -ENOMEM;
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}
@@ -2484,10 +2484,10 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
if (unlikely(!skb))
return;
- if (likely(atomic_read(&skb->users) == 1)) {
+ if (likely(refcount_read(&skb->users) == 1)) {
smp_rmb();
- atomic_set(&skb->users, 0);
- } else if (likely(!atomic_dec_and_test(&skb->users))) {
+ refcount_set(&skb->users, 0);
+ } else if (likely(!refcount_dec_and_test(&skb->users))) {
return;
}
get_kfree_skb_cb(skb)->reason = reason;
@@ -3955,7 +3955,7 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
clist = clist->next;
- WARN_ON(atomic_read(&skb->users));
+ WARN_ON(refcount_read(&skb->users));
if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
trace_consume_skb(skb);
else
@@ -4844,6 +4844,13 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
}
EXPORT_SYMBOL(gro_find_complete_by_type);
+static void napi_skb_free_stolen_head(struct sk_buff *skb)
+{
+ skb_dst_drop(skb);
+ secpath_reset(skb);
+ kmem_cache_free(skbuff_head_cache, skb);
+}
+
static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
{
switch (ret) {
@@ -4857,13 +4864,10 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
break;
case GRO_MERGED_FREE:
- if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
- skb_dst_drop(skb);
- secpath_reset(skb);
- kmem_cache_free(skbuff_head_cache, skb);
- } else {
+ if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+ napi_skb_free_stolen_head(skb);
+ else
__kfree_skb(skb);
- }
break;
case GRO_HELD:
@@ -4935,10 +4939,16 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi,
break;
case GRO_DROP:
- case GRO_MERGED_FREE:
napi_reuse_skb(napi, skb);
break;
+ case GRO_MERGED_FREE:
+ if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+ napi_skb_free_stolen_head(skb);
+ else
+ napi_reuse_skb(napi, skb);
+ break;
+
case GRO_MERGED:
case GRO_CONSUMED:
break;
@@ -7825,7 +7835,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
{
#if BITS_PER_LONG == 64
BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
- memcpy(stats64, netdev_stats, sizeof(*stats64));
+ memcpy(stats64, netdev_stats, sizeof(*netdev_stats));
/* zero out counters that only exist in rtnl_link_stats64 */
memset((char *)stats64 + sizeof(*netdev_stats), 0,
sizeof(*stats64) - sizeof(*netdev_stats));
@@ -7867,9 +7877,9 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
} else {
netdev_stats_to_stats64(storage, &dev->stats);
}
- storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
- storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
- storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler);
+ storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped);
+ storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped);
+ storage->rx_nohandler += (unsigned long)atomic_long_read(&dev->rx_nohandler);
return storage;
}
EXPORT_SYMBOL(dev_get_stats);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 3bba291c6c32..a0093e1b0235 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -46,7 +46,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
if (r == NULL)
return -ENOMEM;
- atomic_set(&r->refcnt, 1);
+ refcount_set(&r->refcnt, 1);
r->action = FR_ACT_TO_TBL;
r->pref = pref;
r->table = table;
@@ -283,7 +283,7 @@ jumped:
if (err != -EAGAIN) {
if ((arg->flags & FIB_LOOKUP_NOREF) ||
- likely(atomic_inc_not_zero(&rule->refcnt))) {
+ likely(refcount_inc_not_zero(&rule->refcnt))) {
arg->rule = rule;
goto out;
}
@@ -517,7 +517,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
last = r;
}
- fib_rule_get(rule);
+ refcount_set(&rule->refcnt, 1);
if (last)
list_add_rcu(&rule->list, &last->list);
diff --git a/net/core/filter.c b/net/core/filter.c
index b39c869d22e3..94169572d002 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -54,6 +54,7 @@
#include <net/dst.h>
#include <net/sock_reuseport.h>
#include <net/busy_poll.h>
+#include <net/tcp.h>
/**
* sk_filter_trim_cap - run a packet through a socket filter
@@ -2011,7 +2012,7 @@ static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
{
const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
- u32 off = skb->network_header - skb->mac_header;
+ u32 off = skb_mac_header_len(skb);
int ret;
ret = skb_cow(skb, len_diff);
@@ -2047,7 +2048,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
{
const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
- u32 off = skb->network_header - skb->mac_header;
+ u32 off = skb_mac_header_len(skb);
int ret;
ret = skb_unclone(skb, GFP_ATOMIC);
@@ -2153,6 +2154,124 @@ static const struct bpf_func_proto bpf_skb_change_type_proto = {
.arg2_type = ARG_ANYTHING,
};
+static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
+{
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ return sizeof(struct iphdr);
+ case htons(ETH_P_IPV6):
+ return sizeof(struct ipv6hdr);
+ default:
+ return ~0U;
+ }
+}
+
+static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
+{
+ u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
+ int ret;
+
+ ret = skb_cow(skb, len_diff);
+ if (unlikely(ret < 0))
+ return ret;
+
+ ret = bpf_skb_net_hdr_push(skb, off, len_diff);
+ if (unlikely(ret < 0))
+ return ret;
+
+ if (skb_is_gso(skb)) {
+ /* Due to header grow, MSS needs to be downgraded. */
+ skb_shinfo(skb)->gso_size -= len_diff;
+ /* Header must be checked, and gso_segs recomputed. */
+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+ skb_shinfo(skb)->gso_segs = 0;
+ }
+
+ return 0;
+}
+
+static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
+{
+ u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
+ int ret;
+
+ ret = skb_unclone(skb, GFP_ATOMIC);
+ if (unlikely(ret < 0))
+ return ret;
+
+ ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
+ if (unlikely(ret < 0))
+ return ret;
+
+ if (skb_is_gso(skb)) {
+ /* Due to header shrink, MSS can be upgraded. */
+ skb_shinfo(skb)->gso_size += len_diff;
+ /* Header must be checked, and gso_segs recomputed. */
+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+ skb_shinfo(skb)->gso_segs = 0;
+ }
+
+ return 0;
+}
+
+static u32 __bpf_skb_max_len(const struct sk_buff *skb)
+{
+ return skb->dev->mtu + skb->dev->hard_header_len;
+}
+
+static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
+{
+ bool trans_same = skb->transport_header == skb->network_header;
+ u32 len_cur, len_diff_abs = abs(len_diff);
+ u32 len_min = bpf_skb_net_base_len(skb);
+ u32 len_max = __bpf_skb_max_len(skb);
+ __be16 proto = skb->protocol;
+ bool shrink = len_diff < 0;
+ int ret;
+
+ if (unlikely(len_diff_abs > 0xfffU))
+ return -EFAULT;
+ if (unlikely(proto != htons(ETH_P_IP) &&
+ proto != htons(ETH_P_IPV6)))
+ return -ENOTSUPP;
+
+ len_cur = skb->len - skb_network_offset(skb);
+ if (skb_transport_header_was_set(skb) && !trans_same)
+ len_cur = skb_network_header_len(skb);
+ if ((shrink && (len_diff_abs >= len_cur ||
+ len_cur - len_diff_abs < len_min)) ||
+ (!shrink && (skb->len + len_diff_abs > len_max &&
+ !skb_is_gso(skb))))
+ return -ENOTSUPP;
+
+ ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) :
+ bpf_skb_net_grow(skb, len_diff_abs);
+
+ bpf_compute_data_end(skb);
+ return 0;
+}
+
+BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
+ u32, mode, u64, flags)
+{
+ if (unlikely(flags))
+ return -EINVAL;
+ if (likely(mode == BPF_ADJ_ROOM_NET))
+ return bpf_skb_adjust_net(skb, len_diff);
+
+ return -ENOTSUPP;
+}
+
+static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
+ .func = bpf_skb_adjust_room,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+};
+
static u32 __bpf_skb_min_len(const struct sk_buff *skb)
{
u32 min_len = skb_network_offset(skb);
@@ -2165,11 +2284,6 @@ static u32 __bpf_skb_min_len(const struct sk_buff *skb)
return min_len;
}
-static u32 __bpf_skb_max_len(const struct sk_buff *skb)
-{
- return skb->dev->mtu + skb->dev->hard_header_len;
-}
-
static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
{
unsigned int old_len = skb->len;
@@ -2306,6 +2420,7 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_skb_change_proto ||
func == bpf_skb_change_head ||
func == bpf_skb_change_tail ||
+ func == bpf_skb_adjust_room ||
func == bpf_skb_pull_data ||
func == bpf_clone_redirect ||
func == bpf_l3_csum_replace ||
@@ -2672,6 +2787,109 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
+BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
+ int, level, int, optname, char *, optval, int, optlen)
+{
+ struct sock *sk = bpf_sock->sk;
+ int ret = 0;
+ int val;
+
+ if (!sk_fullsock(sk))
+ return -EINVAL;
+
+ if (level == SOL_SOCKET) {
+ if (optlen != sizeof(int))
+ return -EINVAL;
+ val = *((int *)optval);
+
+ /* Only some socketops are supported */
+ switch (optname) {
+ case SO_RCVBUF:
+ sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+ sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
+ break;
+ case SO_SNDBUF:
+ sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+ sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
+ break;
+ case SO_MAX_PACING_RATE:
+ sk->sk_max_pacing_rate = val;
+ sk->sk_pacing_rate = min(sk->sk_pacing_rate,
+ sk->sk_max_pacing_rate);
+ break;
+ case SO_PRIORITY:
+ sk->sk_priority = val;
+ break;
+ case SO_RCVLOWAT:
+ if (val < 0)
+ val = INT_MAX;
+ sk->sk_rcvlowat = val ? : 1;
+ break;
+ case SO_MARK:
+ sk->sk_mark = val;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+#ifdef CONFIG_INET
+ } else if (level == SOL_TCP &&
+ sk->sk_prot->setsockopt == tcp_setsockopt) {
+ if (optname == TCP_CONGESTION) {
+ char name[TCP_CA_NAME_MAX];
+
+ strncpy(name, optval, min_t(long, optlen,
+ TCP_CA_NAME_MAX-1));
+ name[TCP_CA_NAME_MAX-1] = 0;
+ ret = tcp_set_congestion_control(sk, name, false);
+ if (!ret && bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
+ /* replacing an existing ca */
+ tcp_reinit_congestion_control(sk,
+ inet_csk(sk)->icsk_ca_ops);
+ } else {
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (optlen != sizeof(int))
+ return -EINVAL;
+
+ val = *((int *)optval);
+ /* Only some options are supported */
+ switch (optname) {
+ case TCP_BPF_IW:
+ if (val <= 0 || tp->data_segs_out > 0)
+ ret = -EINVAL;
+ else
+ tp->snd_cwnd = val;
+ break;
+ case TCP_BPF_SNDCWND_CLAMP:
+ if (val <= 0) {
+ ret = -EINVAL;
+ } else {
+ tp->snd_cwnd_clamp = val;
+ tp->snd_ssthresh = val;
+ }
+ default:
+ ret = -EINVAL;
+ }
+ }
+ ret = -EINVAL;
+#endif
+ } else {
+ ret = -EINVAL;
+ }
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_setsockopt_proto = {
+ .func = bpf_setsockopt,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -2745,6 +2963,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_skb_change_proto_proto;
case BPF_FUNC_skb_change_type:
return &bpf_skb_change_type_proto;
+ case BPF_FUNC_skb_adjust_room:
+ return &bpf_skb_adjust_room_proto;
case BPF_FUNC_skb_change_tail:
return &bpf_skb_change_tail_proto;
case BPF_FUNC_skb_get_tunnel_key:
@@ -2823,6 +3043,17 @@ lwt_inout_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
+ sock_ops_func_proto(enum bpf_func_id func_id)
+{
+ switch (func_id) {
+ case BPF_FUNC_setsockopt:
+ return &bpf_setsockopt_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
+static const struct bpf_func_proto *
lwt_xmit_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
@@ -2857,38 +3088,11 @@ lwt_xmit_func_proto(enum bpf_func_id func_id)
}
}
-static void __set_access_aux_info(int off, struct bpf_insn_access_aux *info)
+static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
{
- info->ctx_field_size = 4;
- switch (off) {
- case offsetof(struct __sk_buff, pkt_type) ...
- offsetof(struct __sk_buff, pkt_type) + sizeof(__u32) - 1:
- case offsetof(struct __sk_buff, vlan_present) ...
- offsetof(struct __sk_buff, vlan_present) + sizeof(__u32) - 1:
- info->converted_op_size = 1;
- break;
- case offsetof(struct __sk_buff, queue_mapping) ...
- offsetof(struct __sk_buff, queue_mapping) + sizeof(__u32) - 1:
- case offsetof(struct __sk_buff, protocol) ...
- offsetof(struct __sk_buff, protocol) + sizeof(__u32) - 1:
- case offsetof(struct __sk_buff, vlan_tci) ...
- offsetof(struct __sk_buff, vlan_tci) + sizeof(__u32) - 1:
- case offsetof(struct __sk_buff, vlan_proto) ...
- offsetof(struct __sk_buff, vlan_proto) + sizeof(__u32) - 1:
- case offsetof(struct __sk_buff, tc_index) ...
- offsetof(struct __sk_buff, tc_index) + sizeof(__u32) - 1:
- case offsetof(struct __sk_buff, tc_classid) ...
- offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1:
- info->converted_op_size = 2;
- break;
- default:
- info->converted_op_size = 4;
- }
-}
+ const int size_default = sizeof(__u32);
-static bool __is_valid_access(int off, int size, enum bpf_access_type type,
- struct bpf_insn_access_aux *info)
-{
if (off < 0 || off >= sizeof(struct __sk_buff))
return false;
@@ -2897,40 +3101,24 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type,
return false;
switch (off) {
- case offsetof(struct __sk_buff, cb[0]) ...
- offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
- if (off + size >
- offsetof(struct __sk_buff, cb[4]) + sizeof(__u32))
+ case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+ if (off + size > offsetofend(struct __sk_buff, cb[4]))
return false;
break;
- case offsetof(struct __sk_buff, data) ...
- offsetof(struct __sk_buff, data) + sizeof(__u32) - 1:
- if (size != sizeof(__u32))
+ case bpf_ctx_range(struct __sk_buff, data):
+ case bpf_ctx_range(struct __sk_buff, data_end):
+ if (size != size_default)
return false;
- info->reg_type = PTR_TO_PACKET;
- break;
- case offsetof(struct __sk_buff, data_end) ...
- offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1:
- if (size != sizeof(__u32))
- return false;
- info->reg_type = PTR_TO_PACKET_END;
break;
default:
+ /* Only narrow read access allowed for now. */
if (type == BPF_WRITE) {
- if (size != sizeof(__u32))
+ if (size != size_default)
return false;
} else {
- int allowed;
-
- /* permit narrower load for not cb/data/data_end fields */
-#ifdef __LITTLE_ENDIAN
- allowed = (off & 0x3) == 0 && size <= 4 && (size & (size - 1)) == 0;
-#else
- allowed = (off & 0x3) + size == 4 && size <= 4 && (size & (size - 1)) == 0;
-#endif
- if (!allowed)
+ bpf_ctx_record_field_size(info, size_default);
+ if (!bpf_ctx_narrow_access_ok(off, size, size_default))
return false;
- __set_access_aux_info(off, info);
}
}
@@ -2942,26 +3130,22 @@ static bool sk_filter_is_valid_access(int off, int size,
struct bpf_insn_access_aux *info)
{
switch (off) {
- case offsetof(struct __sk_buff, tc_classid) ...
- offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1:
- case offsetof(struct __sk_buff, data) ...
- offsetof(struct __sk_buff, data) + sizeof(__u32) - 1:
- case offsetof(struct __sk_buff, data_end) ...
- offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1:
+ case bpf_ctx_range(struct __sk_buff, tc_classid):
+ case bpf_ctx_range(struct __sk_buff, data):
+ case bpf_ctx_range(struct __sk_buff, data_end):
return false;
}
if (type == BPF_WRITE) {
switch (off) {
- case offsetof(struct __sk_buff, cb[0]) ...
- offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
+ case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
break;
default:
return false;
}
}
- return __is_valid_access(off, size, type, info);
+ return bpf_skb_is_valid_access(off, size, type, info);
}
static bool lwt_is_valid_access(int off, int size,
@@ -2969,24 +3153,31 @@ static bool lwt_is_valid_access(int off, int size,
struct bpf_insn_access_aux *info)
{
switch (off) {
- case offsetof(struct __sk_buff, tc_classid) ...
- offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1:
+ case bpf_ctx_range(struct __sk_buff, tc_classid):
return false;
}
if (type == BPF_WRITE) {
switch (off) {
- case offsetof(struct __sk_buff, mark):
- case offsetof(struct __sk_buff, priority):
- case offsetof(struct __sk_buff, cb[0]) ...
- offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
+ case bpf_ctx_range(struct __sk_buff, mark):
+ case bpf_ctx_range(struct __sk_buff, priority):
+ case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
break;
default:
return false;
}
}
- return __is_valid_access(off, size, type, info);
+ switch (off) {
+ case bpf_ctx_range(struct __sk_buff, data):
+ info->reg_type = PTR_TO_PACKET;
+ break;
+ case bpf_ctx_range(struct __sk_buff, data_end):
+ info->reg_type = PTR_TO_PACKET_END;
+ break;
+ }
+
+ return bpf_skb_is_valid_access(off, size, type, info);
}
static bool sock_filter_is_valid_access(int off, int size,
@@ -3058,19 +3249,27 @@ static bool tc_cls_act_is_valid_access(int off, int size,
{
if (type == BPF_WRITE) {
switch (off) {
- case offsetof(struct __sk_buff, mark):
- case offsetof(struct __sk_buff, tc_index):
- case offsetof(struct __sk_buff, priority):
- case offsetof(struct __sk_buff, cb[0]) ...
- offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
- case offsetof(struct __sk_buff, tc_classid):
+ case bpf_ctx_range(struct __sk_buff, mark):
+ case bpf_ctx_range(struct __sk_buff, tc_index):
+ case bpf_ctx_range(struct __sk_buff, priority):
+ case bpf_ctx_range(struct __sk_buff, tc_classid):
+ case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
break;
default:
return false;
}
}
- return __is_valid_access(off, size, type, info);
+ switch (off) {
+ case bpf_ctx_range(struct __sk_buff, data):
+ info->reg_type = PTR_TO_PACKET;
+ break;
+ case bpf_ctx_range(struct __sk_buff, data_end):
+ info->reg_type = PTR_TO_PACKET_END;
+ break;
+ }
+
+ return bpf_skb_is_valid_access(off, size, type, info);
}
static bool __is_valid_xdp_access(int off, int size)
@@ -3110,101 +3309,141 @@ void bpf_warn_invalid_xdp_action(u32 act)
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
+static bool __is_valid_sock_ops_access(int off, int size)
+{
+ if (off < 0 || off >= sizeof(struct bpf_sock_ops))
+ return false;
+ /* The verifier guarantees that size > 0. */
+ if (off % size != 0)
+ return false;
+ if (size != sizeof(__u32))
+ return false;
+
+ return true;
+}
+
+static bool sock_ops_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
+{
+ if (type == BPF_WRITE) {
+ switch (off) {
+ case offsetof(struct bpf_sock_ops, op) ...
+ offsetof(struct bpf_sock_ops, replylong[3]):
+ break;
+ default:
+ return false;
+ }
+ }
+
+ return __is_valid_sock_ops_access(off, size);
+}
+
static u32 bpf_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
- struct bpf_prog *prog)
+ struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
int off;
switch (si->off) {
case offsetof(struct __sk_buff, len):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
-
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sk_buff, len));
+ bpf_target_off(struct sk_buff, len, 4,
+ target_size));
break;
case offsetof(struct __sk_buff, protocol):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
-
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
- offsetof(struct sk_buff, protocol));
+ bpf_target_off(struct sk_buff, protocol, 2,
+ target_size));
break;
case offsetof(struct __sk_buff, vlan_proto):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
-
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
- offsetof(struct sk_buff, vlan_proto));
+ bpf_target_off(struct sk_buff, vlan_proto, 2,
+ target_size));
break;
case offsetof(struct __sk_buff, priority):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4);
-
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sk_buff, priority));
+ bpf_target_off(struct sk_buff, priority, 4,
+ target_size));
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sk_buff, priority));
+ bpf_target_off(struct sk_buff, priority, 4,
+ target_size));
break;
case offsetof(struct __sk_buff, ingress_ifindex):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, skb_iif) != 4);
-
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sk_buff, skb_iif));
+ bpf_target_off(struct sk_buff, skb_iif, 4,
+ target_size));
break;
case offsetof(struct __sk_buff, ifindex):
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
-
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
si->dst_reg, si->src_reg,
offsetof(struct sk_buff, dev));
*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
- offsetof(struct net_device, ifindex));
+ bpf_target_off(struct net_device, ifindex, 4,
+ target_size));
break;
case offsetof(struct __sk_buff, hash):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
-
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sk_buff, hash));
+ bpf_target_off(struct sk_buff, hash, 4,
+ target_size));
break;
case offsetof(struct __sk_buff, mark):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
-
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sk_buff, mark));
+ bpf_target_off(struct sk_buff, mark, 4,
+ target_size));
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sk_buff, mark));
+ bpf_target_off(struct sk_buff, mark, 4,
+ target_size));
break;
case offsetof(struct __sk_buff, pkt_type):
- return convert_skb_access(SKF_AD_PKTTYPE, si->dst_reg,
- si->src_reg, insn);
+ *target_size = 1;
+ *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
+ PKT_TYPE_OFFSET());
+ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX);
+#ifdef __BIG_ENDIAN_BITFIELD
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5);
+#endif
+ break;
case offsetof(struct __sk_buff, queue_mapping):
- return convert_skb_access(SKF_AD_QUEUE, si->dst_reg,
- si->src_reg, insn);
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct sk_buff, queue_mapping, 2,
+ target_size));
+ break;
case offsetof(struct __sk_buff, vlan_present):
- return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
- si->dst_reg, si->src_reg, insn);
-
case offsetof(struct __sk_buff, vlan_tci):
- return convert_skb_access(SKF_AD_VLAN_TAG,
- si->dst_reg, si->src_reg, insn);
+ BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct sk_buff, vlan_tci, 2,
+ target_size));
+ if (si->off == offsetof(struct __sk_buff, vlan_tci)) {
+ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg,
+ ~VLAN_TAG_PRESENT);
+ } else {
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 12);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1);
+ }
+ break;
case offsetof(struct __sk_buff, cb[0]) ...
- offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
+ offsetofend(struct __sk_buff, cb[4]) - 1:
BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
offsetof(struct qdisc_skb_cb, data)) %
@@ -3230,6 +3469,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
off -= offsetof(struct __sk_buff, tc_classid);
off += offsetof(struct sk_buff, cb);
off += offsetof(struct qdisc_skb_cb, tc_classid);
+ *target_size = 2;
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_H, si->dst_reg,
si->src_reg, off);
@@ -3255,14 +3495,14 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, tc_index):
#ifdef CONFIG_NET_SCHED
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
-
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
- offsetof(struct sk_buff, tc_index));
+ bpf_target_off(struct sk_buff, tc_index, 2,
+ target_size));
else
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
- offsetof(struct sk_buff, tc_index));
+ bpf_target_off(struct sk_buff, tc_index, 2,
+ target_size));
#else
if (type == BPF_WRITE)
*insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
@@ -3273,10 +3513,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, napi_id):
#if defined(CONFIG_NET_RX_BUSY_POLL)
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, napi_id) != 4);
-
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sk_buff, napi_id));
+ bpf_target_off(struct sk_buff, napi_id, 4,
+ target_size));
*insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
#else
@@ -3291,7 +3530,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
- struct bpf_prog *prog)
+ struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
@@ -3335,22 +3574,22 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
- struct bpf_prog *prog)
+ struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
switch (si->off) {
case offsetof(struct __sk_buff, ifindex):
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
-
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
si->dst_reg, si->src_reg,
offsetof(struct sk_buff, dev));
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
- offsetof(struct net_device, ifindex));
+ bpf_target_off(struct net_device, ifindex, 4,
+ target_size));
break;
default:
- return bpf_convert_ctx_access(type, si, insn_buf, prog);
+ return bpf_convert_ctx_access(type, si, insn_buf, prog,
+ target_size);
}
return insn - insn_buf;
@@ -3359,7 +3598,7 @@ static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
static u32 xdp_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
- struct bpf_prog *prog)
+ struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
@@ -3379,6 +3618,139 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
+static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog,
+ u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+ int off;
+
+ switch (si->off) {
+ case offsetof(struct bpf_sock_ops, op) ...
+ offsetof(struct bpf_sock_ops, replylong[3]):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, op) !=
+ FIELD_SIZEOF(struct bpf_sock_ops_kern, op));
+ BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, reply) !=
+ FIELD_SIZEOF(struct bpf_sock_ops_kern, reply));
+ BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, replylong) !=
+ FIELD_SIZEOF(struct bpf_sock_ops_kern, replylong));
+ off = si->off;
+ off -= offsetof(struct bpf_sock_ops, op);
+ off += offsetof(struct bpf_sock_ops_kern, op);
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ off);
+ else
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ off);
+ break;
+
+ case offsetof(struct bpf_sock_ops, family):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern, sk));
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common, skc_family));
+ break;
+
+ case offsetof(struct bpf_sock_ops, remote_ip4):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern, sk));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common, skc_daddr));
+ break;
+
+ case offsetof(struct bpf_sock_ops, local_ip4):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_rcv_saddr) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern, sk));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common,
+ skc_rcv_saddr));
+ break;
+
+ case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
+ offsetof(struct bpf_sock_ops, remote_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ skc_v6_daddr.s6_addr32[0]) != 4);
+
+ off = si->off;
+ off -= offsetof(struct bpf_sock_ops, remote_ip6[0]);
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern, sk));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common,
+ skc_v6_daddr.s6_addr32[0]) +
+ off);
+#else
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+ break;
+
+ case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
+ offsetof(struct bpf_sock_ops, local_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ skc_v6_rcv_saddr.s6_addr32[0]) != 4);
+
+ off = si->off;
+ off -= offsetof(struct bpf_sock_ops, local_ip6[0]);
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern, sk));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common,
+ skc_v6_rcv_saddr.s6_addr32[0]) +
+ off);
+#else
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+ break;
+
+ case offsetof(struct bpf_sock_ops, remote_port):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern, sk));
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common, skc_dport));
+#ifndef __BIG_ENDIAN_BITFIELD
+ *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
+#endif
+ break;
+
+ case offsetof(struct bpf_sock_ops, local_port):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern, sk));
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common, skc_num));
+ break;
+ }
+ return insn - insn_buf;
+}
+
const struct bpf_verifier_ops sk_filter_prog_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
@@ -3428,6 +3800,12 @@ const struct bpf_verifier_ops cg_sock_prog_ops = {
.convert_ctx_access = sock_filter_convert_ctx_access,
};
+const struct bpf_verifier_ops sock_ops_prog_ops = {
+ .get_func_proto = sock_ops_func_proto,
+ .is_valid_access = sock_ops_is_valid_access,
+ .convert_ctx_access = sock_ops_convert_ctx_access,
+};
+
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index dadb5eef91c3..e31fc11a8000 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -124,7 +124,7 @@ static bool neigh_del(struct neighbour *n, __u8 state,
bool retval = false;
write_lock(&n->lock);
- if (atomic_read(&n->refcnt) == 1 && !(n->nud_state & state)) {
+ if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state)) {
struct neighbour *neigh;
neigh = rcu_dereference_protected(n->next,
@@ -254,7 +254,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
neigh_del_timer(n);
n->dead = 1;
- if (atomic_read(&n->refcnt) != 1) {
+ if (refcount_read(&n->refcnt) != 1) {
/* The most unpleasant situation.
We must destroy neighbour entry,
but someone still uses it.
@@ -335,7 +335,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
NEIGH_CACHE_STAT_INC(tbl, allocs);
n->tbl = tbl;
- atomic_set(&n->refcnt, 1);
+ refcount_set(&n->refcnt, 1);
n->dead = 1;
out:
return n;
@@ -444,7 +444,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
rcu_read_lock_bh();
n = __neigh_lookup_noref(tbl, pkey, dev);
if (n) {
- if (!atomic_inc_not_zero(&n->refcnt))
+ if (!refcount_inc_not_zero(&n->refcnt))
n = NULL;
NEIGH_CACHE_STAT_INC(tbl, hits);
}
@@ -473,7 +473,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
n = rcu_dereference_bh(n->next)) {
if (!memcmp(n->primary_key, pkey, key_len) &&
net_eq(dev_net(n->dev), net)) {
- if (!atomic_inc_not_zero(&n->refcnt))
+ if (!refcount_inc_not_zero(&n->refcnt))
n = NULL;
NEIGH_CACHE_STAT_INC(tbl, hits);
break;
@@ -709,7 +709,7 @@ static void neigh_parms_destroy(struct neigh_parms *parms);
static inline void neigh_parms_put(struct neigh_parms *parms)
{
- if (atomic_dec_and_test(&parms->refcnt))
+ if (refcount_dec_and_test(&parms->refcnt))
neigh_parms_destroy(parms);
}
@@ -821,7 +821,7 @@ static void neigh_periodic_work(struct work_struct *work)
if (time_before(n->used, n->confirmed))
n->used = n->confirmed;
- if (atomic_read(&n->refcnt) == 1 &&
+ if (refcount_read(&n->refcnt) == 1 &&
(state == NUD_FAILED ||
time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
*np = n->next;
@@ -1479,7 +1479,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
if (p) {
p->tbl = tbl;
- atomic_set(&p->refcnt, 1);
+ refcount_set(&p->refcnt, 1);
p->reachable_time =
neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
dev_hold(dev);
@@ -1542,7 +1542,7 @@ void neigh_table_init(int index, struct neigh_table *tbl)
INIT_LIST_HEAD(&tbl->parms_list);
list_add(&tbl->parms.list, &tbl->parms_list);
write_pnet(&tbl->parms.net, &init_net);
- atomic_set(&tbl->parms.refcnt, 1);
+ refcount_set(&tbl->parms.refcnt, 1);
tbl->parms.reachable_time =
neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
@@ -1796,7 +1796,7 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
if ((parms->dev &&
nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
- nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
+ nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
/* approximative value for deprecated QUEUE_LEN (in packets) */
@@ -2234,7 +2234,7 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
- ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
+ ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1;
read_unlock_bh(&neigh->lock);
if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 58e6cc70500d..b4f9922b6f23 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -626,7 +626,7 @@ static struct attribute *netstat_attrs[] = {
};
-static struct attribute_group netstat_group = {
+static const struct attribute_group netstat_group = {
.name = "statistics",
.attrs = netstat_attrs,
};
@@ -636,7 +636,7 @@ static struct attribute *wireless_attrs[] = {
NULL
};
-static struct attribute_group wireless_group = {
+static const struct attribute_group wireless_group = {
.name = "wireless",
.attrs = wireless_attrs,
};
@@ -1204,7 +1204,7 @@ static struct attribute *dql_attrs[] = {
NULL
};
-static struct attribute_group dql_group = {
+static const struct attribute_group dql_group = {
.name = "byte_queue_limits",
.attrs = dql_attrs,
};
@@ -1448,7 +1448,7 @@ static void *net_grab_current_ns(void)
struct net *ns = current->nsproxy->net_ns;
#ifdef CONFIG_NET_NS
if (ns)
- atomic_inc(&ns->passive);
+ refcount_inc(&ns->passive);
#endif
return ns;
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2178db8e47cd..8726d051f31d 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -284,7 +284,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
LIST_HEAD(net_exit_list);
atomic_set(&net->count, 1);
- atomic_set(&net->passive, 1);
+ refcount_set(&net->passive, 1);
net->dev_base_seq = 1;
net->user_ns = user_ns;
idr_init(&net->netns_ids);
@@ -380,7 +380,7 @@ static void net_free(struct net *net)
void net_drop_ns(void *p)
{
struct net *ns = p;
- if (ns && atomic_dec_and_test(&ns->passive))
+ if (ns && refcount_dec_and_test(&ns->passive))
net_free(ns);
}
@@ -501,6 +501,23 @@ static void cleanup_net(struct work_struct *work)
net_drop_ns(net);
}
}
+
+/**
+ * net_ns_barrier - wait until concurrent net_cleanup_work is done
+ *
+ * cleanup_net runs from work queue and will first remove namespaces
+ * from the global list, then run net exit functions.
+ *
+ * Call this in module exit path to make sure that all netns
+ * ->exit ops have been invoked before the function is removed.
+ */
+void net_ns_barrier(void)
+{
+ mutex_lock(&net_mutex);
+ mutex_unlock(&net_mutex);
+}
+EXPORT_SYMBOL(net_ns_barrier);
+
static DECLARE_WORK(net_cleanup_work, cleanup_net);
void __put_net(struct net *net)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 37c1e34ddd85..d3408a693166 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -277,7 +277,7 @@ static void zap_completion_queue(void)
struct sk_buff *skb = clist;
clist = clist->next;
if (!skb_irq_freeable(skb)) {
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
dev_kfree_skb_any(skb); /* put this one back */
} else {
__kfree_skb(skb);
@@ -309,7 +309,7 @@ repeat:
return NULL;
}
- atomic_set(&skb->users, 1);
+ refcount_set(&skb->users, 1);
skb_reserve(skb, reserve);
return skb;
}
@@ -632,7 +632,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
skb_queue_head_init(&npinfo->txq);
INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
- atomic_set(&npinfo->refcnt, 1);
+ refcount_set(&npinfo->refcnt, 1);
ops = np->dev->netdev_ops;
if (ops->ndo_netpoll_setup) {
@@ -642,7 +642,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
}
} else {
npinfo = rtnl_dereference(ndev->npinfo);
- atomic_inc(&npinfo->refcnt);
+ refcount_inc(&npinfo->refcnt);
}
npinfo->netpoll = np;
@@ -821,7 +821,7 @@ void __netpoll_cleanup(struct netpoll *np)
synchronize_srcu(&netpoll_srcu);
- if (atomic_dec_and_test(&npinfo->refcnt)) {
+ if (refcount_dec_and_test(&npinfo->refcnt)) {
const struct net_device_ops *ops;
ops = np->dev->netdev_ops;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 2dd42c5b0366..6e1e10ff433a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3363,7 +3363,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
{
ktime_t idle_start = ktime_get();
- while (atomic_read(&(pkt_dev->skb->users)) != 1) {
+ while (refcount_read(&(pkt_dev->skb->users)) != 1) {
if (signal_pending(current))
break;
@@ -3420,7 +3420,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) {
skb = pkt_dev->skb;
skb->protocol = eth_type_trans(skb, skb->dev);
- atomic_add(burst, &skb->users);
+ refcount_add(burst, &skb->users);
local_bh_disable();
do {
ret = netif_receive_skb(skb);
@@ -3428,11 +3428,11 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
pkt_dev->errors++;
pkt_dev->sofar++;
pkt_dev->seq_num++;
- if (atomic_read(&skb->users) != burst) {
+ if (refcount_read(&skb->users) != burst) {
/* skb was queued by rps/rfs or taps,
* so cannot reuse this skb
*/
- atomic_sub(burst - 1, &skb->users);
+ WARN_ON(refcount_sub_and_test(burst - 1, &skb->users));
/* get out of the loop and wait
* until skb is consumed
*/
@@ -3446,7 +3446,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
goto out; /* Skips xmit_mode M_START_XMIT */
} else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) {
local_bh_disable();
- atomic_inc(&pkt_dev->skb->users);
+ refcount_inc(&pkt_dev->skb->users);
ret = dev_queue_xmit(pkt_dev->skb);
switch (ret) {
@@ -3487,7 +3487,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
pkt_dev->last_ok = 0;
goto unlock;
}
- atomic_add(burst, &pkt_dev->skb->users);
+ refcount_add(burst, &pkt_dev->skb->users);
xmit_more:
ret = netdev_start_xmit(pkt_dev->skb, odev, txq, --burst > 0);
@@ -3513,11 +3513,11 @@ xmit_more:
/* fallthru */
case NETDEV_TX_BUSY:
/* Retry it next time */
- atomic_dec(&(pkt_dev->skb->users));
+ refcount_dec(&(pkt_dev->skb->users));
pkt_dev->last_ok = 0;
}
if (unlikely(burst))
- atomic_sub(burst, &pkt_dev->skb->users);
+ WARN_ON(refcount_sub_and_test(burst, &pkt_dev->skb->users));
unlock:
HARD_TX_UNLOCK(odev, txq);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ed51de525a88..d1ba90980be1 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -649,7 +649,7 @@ int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int g
NETLINK_CB(skb).dst_group = group;
if (echo)
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
if (echo)
err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f75897a33fa4..8b11341ed69a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -176,7 +176,7 @@ struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
memset(skb, 0, offsetof(struct sk_buff, tail));
skb->head = NULL;
skb->truesize = sizeof(struct sk_buff);
- atomic_set(&skb->users, 1);
+ refcount_set(&skb->users, 1);
skb->mac_header = (typeof(skb->mac_header))~0U;
out:
@@ -247,7 +247,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
/* Account for allocated memory : skb + skb->head */
skb->truesize = SKB_TRUESIZE(size);
skb->pfmemalloc = pfmemalloc;
- atomic_set(&skb->users, 1);
+ refcount_set(&skb->users, 1);
skb->head = data;
skb->data = data;
skb_reset_tail_pointer(skb);
@@ -268,7 +268,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
skb->fclone = SKB_FCLONE_ORIG;
- atomic_set(&fclones->fclone_ref, 1);
+ refcount_set(&fclones->fclone_ref, 1);
fclones->skb2.fclone = SKB_FCLONE_CLONE;
}
@@ -314,7 +314,7 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
memset(skb, 0, offsetof(struct sk_buff, tail));
skb->truesize = SKB_TRUESIZE(size);
- atomic_set(&skb->users, 1);
+ refcount_set(&skb->users, 1);
skb->head = data;
skb->data = data;
skb_reset_tail_pointer(skb);
@@ -629,7 +629,7 @@ static void kfree_skbmem(struct sk_buff *skb)
* This test would have no chance to be true for the clone,
* while here, branch prediction will be good.
*/
- if (atomic_read(&fclones->fclone_ref) == 1)
+ if (refcount_read(&fclones->fclone_ref) == 1)
goto fastpath;
break;
@@ -637,7 +637,7 @@ static void kfree_skbmem(struct sk_buff *skb)
fclones = container_of(skb, struct sk_buff_fclones, skb2);
break;
}
- if (!atomic_dec_and_test(&fclones->fclone_ref))
+ if (!refcount_dec_and_test(&fclones->fclone_ref))
return;
fastpath:
kmem_cache_free(skbuff_fclone_cache, fclones);
@@ -915,7 +915,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
C(head_frag);
C(data);
C(truesize);
- atomic_set(&n->users, 1);
+ refcount_set(&n->users, 1);
atomic_inc(&(skb_shinfo(skb)->dataref));
skb->cloned = 1;
@@ -1027,9 +1027,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
return NULL;
if (skb->fclone == SKB_FCLONE_ORIG &&
- atomic_read(&fclones->fclone_ref) == 1) {
+ refcount_read(&fclones->fclone_ref) == 1) {
n = &fclones->skb2;
- atomic_set(&fclones->fclone_ref, 2);
+ refcount_set(&fclones->fclone_ref, 2);
} else {
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
@@ -3024,7 +3024,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
get_page(pfrag->page);
skb->truesize += copy;
- atomic_add(copy, &sk->sk_wmem_alloc);
+ refcount_add(copy, &sk->sk_wmem_alloc);
skb->len += copy;
skb->data_len += copy;
offset += copy;
@@ -3844,7 +3844,7 @@ struct sk_buff *skb_clone_sk(struct sk_buff *skb)
struct sock *sk = skb->sk;
struct sk_buff *clone;
- if (!sk || !atomic_inc_not_zero(&sk->sk_refcnt))
+ if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
return NULL;
clone = skb_clone(skb, GFP_ATOMIC);
@@ -3915,7 +3915,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
/* Take a reference to prevent skb_orphan() from freeing the socket,
* but only if the socket refcount is not zero.
*/
- if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+ if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) {
*skb_hwtstamps(skb) = *hwtstamps;
__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
sock_put(sk);
@@ -3997,7 +3997,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
/* Take a reference to prevent skb_orphan() from freeing the socket,
* but only if the socket refcount is not zero.
*/
- if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+ if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) {
err = sock_queue_err_skb(sk, skb);
sock_put(sk);
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 6f4b090241c1..ba0ef6a7dbaf 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1528,7 +1528,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
if (likely(sk->sk_net_refcnt))
get_net(net);
sock_net_set(sk, net);
- atomic_set(&sk->sk_wmem_alloc, 1);
+ refcount_set(&sk->sk_wmem_alloc, 1);
mem_cgroup_sk_alloc(sk);
cgroup_sk_alloc(&sk->sk_cgrp_data);
@@ -1552,7 +1552,7 @@ static void __sk_destruct(struct rcu_head *head)
sk->sk_destruct(sk);
filter = rcu_dereference_check(sk->sk_filter,
- atomic_read(&sk->sk_wmem_alloc) == 0);
+ refcount_read(&sk->sk_wmem_alloc) == 0);
if (filter) {
sk_filter_uncharge(sk, filter);
RCU_INIT_POINTER(sk->sk_filter, NULL);
@@ -1602,7 +1602,7 @@ void sk_free(struct sock *sk)
* some packets are still in some tx queue.
* If not null, sock_wfree() will call __sk_free(sk) later
*/
- if (atomic_dec_and_test(&sk->sk_wmem_alloc))
+ if (refcount_dec_and_test(&sk->sk_wmem_alloc))
__sk_free(sk);
}
EXPORT_SYMBOL(sk_free);
@@ -1659,7 +1659,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
/*
* sk_wmem_alloc set to one (see sk_free() and sock_wfree())
*/
- atomic_set(&newsk->sk_wmem_alloc, 1);
+ refcount_set(&newsk->sk_wmem_alloc, 1);
atomic_set(&newsk->sk_omem_alloc, 0);
sk_init_common(newsk);
@@ -1708,7 +1708,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
* (Documentation/RCU/rculist_nulls.txt for details)
*/
smp_wmb();
- atomic_set(&newsk->sk_refcnt, 2);
+ refcount_set(&newsk->sk_refcnt, 2);
/*
* Increment the counter in the same struct proto as the master
@@ -1787,7 +1787,7 @@ void sock_wfree(struct sk_buff *skb)
* Keep a reference on sk_wmem_alloc, this will be released
* after sk_write_space() call
*/
- atomic_sub(len - 1, &sk->sk_wmem_alloc);
+ WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc));
sk->sk_write_space(sk);
len = 1;
}
@@ -1795,7 +1795,7 @@ void sock_wfree(struct sk_buff *skb)
* if sk_wmem_alloc reaches 0, we must finish what sk_free()
* could not do because of in-flight packets
*/
- if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
+ if (refcount_sub_and_test(len, &sk->sk_wmem_alloc))
__sk_free(sk);
}
EXPORT_SYMBOL(sock_wfree);
@@ -1807,7 +1807,7 @@ void __sock_wfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
- if (atomic_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
+ if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
__sk_free(sk);
}
@@ -1829,7 +1829,7 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
* is enough to guarantee sk_free() wont free this sock until
* all in-flight packets are completed
*/
- atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+ refcount_add(skb->truesize, &sk->sk_wmem_alloc);
}
EXPORT_SYMBOL(skb_set_owner_w);
@@ -1851,8 +1851,8 @@ void skb_orphan_partial(struct sk_buff *skb)
) {
struct sock *sk = skb->sk;
- if (atomic_inc_not_zero(&sk->sk_refcnt)) {
- atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+ if (refcount_inc_not_zero(&sk->sk_refcnt)) {
+ WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc));
skb->destructor = sock_efree;
}
} else {
@@ -1912,7 +1912,7 @@ EXPORT_SYMBOL(sock_i_ino);
struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
gfp_t priority)
{
- if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+ if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
struct sk_buff *skb = alloc_skb(size, priority);
if (skb) {
skb_set_owner_w(skb, sk);
@@ -1987,7 +1987,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
break;
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
+ if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
break;
if (sk->sk_shutdown & SEND_SHUTDOWN)
break;
@@ -2310,7 +2310,7 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
if (sk->sk_type == SOCK_STREAM) {
if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
return 1;
- } else if (atomic_read(&sk->sk_wmem_alloc) <
+ } else if (refcount_read(&sk->sk_wmem_alloc) <
prot->sysctl_wmem[0])
return 1;
}
@@ -2577,7 +2577,7 @@ static void sock_def_write_space(struct sock *sk)
/* Do not wake up a writer until he can make "significant"
* progress. --DaveM
*/
- if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
+ if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
@@ -2687,7 +2687,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
* (Documentation/RCU/rculist_nulls.txt for details)
*/
smp_wmb();
- atomic_set(&sk->sk_refcnt, 1);
+ refcount_set(&sk->sk_refcnt, 1);
atomic_set(&sk->sk_drops, 0);
}
EXPORT_SYMBOL(sock_init_data);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 4fccc0c37fbd..c376af5bfdfb 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -353,7 +353,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
ireq->pktopts = skb;
}
ireq->ir_iif = sk->sk_bound_dev_if;
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index eeb5fc561f80..21dedf6fd0f7 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -559,7 +559,7 @@ static inline void dn_neigh_format_entry(struct seq_file *seq,
(dn->flags&DN_NDFLAG_R2) ? "2" : "-",
(dn->flags&DN_NDFLAG_P3) ? "3" : "-",
dn->n.nud_state,
- atomic_read(&dn->n.refcnt),
+ refcount_read(&dn->n.refcnt),
dn->blksize,
(dn->n.dev) ? dn->n.dev->name : "?");
read_unlock(&n->lock);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 58925b6597de..76c2077c3f5b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -150,7 +150,7 @@ void inet_sock_destruct(struct sock *sk)
}
WARN_ON(atomic_read(&sk->sk_rmem_alloc));
- WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+ WARN_ON(refcount_read(&sk->sk_wmem_alloc));
WARN_ON(sk->sk_wmem_queued);
WARN_ON(sk->sk_forward_alloc);
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index ae206163c273..c2044775ae7d 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -265,7 +265,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
entry->key_len == key_len &&
memcmp(entry->key, key, key_len) == 0) {
entry->activity += 1;
- atomic_inc(&entry->lsm_data->refcount);
+ refcount_inc(&entry->lsm_data->refcount);
secattr->cache = entry->lsm_data;
secattr->flags |= NETLBL_SECATTR_CACHE;
secattr->type = NETLBL_NLTYPE_CIPSOV4;
@@ -332,7 +332,7 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr,
}
entry->key_len = cipso_ptr_len;
entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len);
- atomic_inc(&secattr->cache->refcount);
+ refcount_inc(&secattr->cache->refcount);
entry->lsm_data = secattr->cache;
bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a7dd088d5fc9..38d9af9b917c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -252,7 +252,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
/* Reference in_dev->dev */
dev_hold(dev);
/* Account for reference dev->ip_ptr (below) */
- in_dev_hold(in_dev);
+ refcount_set(&in_dev->refcnt, 1);
err = devinet_sysctl_register(in_dev);
if (err) {
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 1f18b4650253..0cbee0a666ff 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -307,7 +307,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
skb->data_len += tailen;
skb->truesize += tailen;
if (sk)
- atomic_add(tailen, &sk->sk_wmem_alloc);
+ refcount_add(tailen, &sk->sk_wmem_alloc);
goto out;
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c4032302d7cd..28f14afd0dd3 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -173,7 +173,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
static void ip_ma_put(struct ip_mc_list *im)
{
- if (atomic_dec_and_test(&im->refcnt)) {
+ if (refcount_dec_and_test(&im->refcnt)) {
in_dev_put(im->interface);
kfree_rcu(im, rcu);
}
@@ -199,7 +199,7 @@ static void igmp_stop_timer(struct ip_mc_list *im)
{
spin_lock_bh(&im->lock);
if (del_timer(&im->timer))
- atomic_dec(&im->refcnt);
+ refcount_dec(&im->refcnt);
im->tm_running = 0;
im->reporter = 0;
im->unsolicit_count = 0;
@@ -213,7 +213,7 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
im->tm_running = 1;
if (!mod_timer(&im->timer, jiffies+tv+2))
- atomic_inc(&im->refcnt);
+ refcount_inc(&im->refcnt);
}
static void igmp_gq_start_timer(struct in_device *in_dev)
@@ -249,7 +249,7 @@ static void igmp_mod_timer(struct ip_mc_list *im, int max_delay)
spin_unlock_bh(&im->lock);
return;
}
- atomic_dec(&im->refcnt);
+ refcount_dec(&im->refcnt);
}
igmp_start_timer(im, max_delay);
spin_unlock_bh(&im->lock);
@@ -1374,7 +1374,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
/* initial mode is (EX, empty) */
im->sfmode = MCAST_EXCLUDE;
im->sfcount[MCAST_EXCLUDE] = 1;
- atomic_set(&im->refcnt, 1);
+ refcount_set(&im->refcnt, 1);
spin_lock_init(&im->lock);
#ifdef CONFIG_IP_MULTICAST
setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index a3fa1a5b6d98..4089c013cb03 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -756,7 +756,7 @@ static void reqsk_queue_hash_req(struct request_sock *req,
* are committed to memory and refcnt initialized.
*/
smp_wmb();
- atomic_set(&req->rsk_refcnt, 2 + 1);
+ refcount_set(&req->rsk_refcnt, 2 + 1);
}
void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index b5e9317eaf9e..96e95e83cc61 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -276,11 +276,11 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
{
if (del_timer(&fq->timer))
- atomic_dec(&fq->refcnt);
+ refcount_dec(&fq->refcnt);
if (!(fq->flags & INET_FRAG_COMPLETE)) {
fq_unlink(fq, f);
- atomic_dec(&fq->refcnt);
+ refcount_dec(&fq->refcnt);
}
}
EXPORT_SYMBOL(inet_frag_kill);
@@ -329,7 +329,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
*/
hlist_for_each_entry(qp, &hb->chain, list) {
if (qp->net == nf && f->match(qp, arg)) {
- atomic_inc(&qp->refcnt);
+ refcount_inc(&qp->refcnt);
spin_unlock(&hb->chain_lock);
qp_in->flags |= INET_FRAG_COMPLETE;
inet_frag_put(qp_in, f);
@@ -339,9 +339,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
#endif
qp = qp_in;
if (!mod_timer(&qp->timer, jiffies + nf->timeout))
- atomic_inc(&qp->refcnt);
+ refcount_inc(&qp->refcnt);
- atomic_inc(&qp->refcnt);
+ refcount_inc(&qp->refcnt);
hlist_add_head(&qp->list, &hb->chain);
spin_unlock(&hb->chain_lock);
@@ -370,7 +370,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
spin_lock_init(&q->lock);
- atomic_set(&q->refcnt, 1);
+ refcount_set(&q->refcnt, 1);
return q;
}
@@ -405,7 +405,7 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
spin_lock(&hb->chain_lock);
hlist_for_each_entry(q, &hb->chain, list) {
if (q->net == nf && f->match(q, key)) {
- atomic_inc(&q->refcnt);
+ refcount_inc(&q->refcnt);
spin_unlock(&hb->chain_lock);
return q;
}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e9a59d2d91d4..2e3389d614d1 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -43,7 +43,7 @@ static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
/* This function handles inet_sock, but also timewait and request sockets
* for IPv4/IPv6.
*/
-u32 sk_ehashfn(const struct sock *sk)
+static u32 sk_ehashfn(const struct sock *sk)
{
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6 &&
@@ -246,7 +246,7 @@ EXPORT_SYMBOL_GPL(__inet_lookup_listener);
/* All sockets share common refcount, but have different destructors */
void sock_gen_put(struct sock *sk)
{
- if (!atomic_dec_and_test(&sk->sk_refcnt))
+ if (!refcount_dec_and_test(&sk->sk_refcnt))
return;
if (sk->sk_state == TCP_TIME_WAIT)
@@ -287,7 +287,7 @@ begin:
continue;
if (likely(INET_MATCH(sk, net, acookie,
saddr, daddr, ports, dif))) {
- if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
+ if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
goto out;
if (unlikely(!INET_MATCH(sk, net, acookie,
saddr, daddr, ports, dif))) {
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index f8aff2c71cde..5b039159e67a 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -76,7 +76,7 @@ void inet_twsk_free(struct inet_timewait_sock *tw)
void inet_twsk_put(struct inet_timewait_sock *tw)
{
- if (atomic_dec_and_test(&tw->tw_refcnt))
+ if (refcount_dec_and_test(&tw->tw_refcnt))
inet_twsk_free(tw);
}
EXPORT_SYMBOL_GPL(inet_twsk_put);
@@ -131,7 +131,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
* We can use atomic_set() because prior spin_lock()/spin_unlock()
* committed into memory all tw fields.
*/
- atomic_set(&tw->tw_refcnt, 4);
+ refcount_set(&tw->tw_refcnt, 4);
inet_twsk_add_node_rcu(tw, &ehead->chain);
/* Step 3: Remove SK from hash chain */
@@ -195,7 +195,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
* to a non null value before everything is setup for this
* timewait socket.
*/
- atomic_set(&tw->tw_refcnt, 0);
+ refcount_set(&tw->tw_refcnt, 0);
__module_get(tw->tw_prot->owner);
}
@@ -278,7 +278,7 @@ restart:
atomic_read(&twsk_net(tw)->count))
continue;
- if (unlikely(!atomic_inc_not_zero(&tw->tw_refcnt)))
+ if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
continue;
if (unlikely((tw->tw_family != family) ||
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 86fa45809540..c5a117cc6619 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -115,7 +115,7 @@ static void inetpeer_gc_worker(struct work_struct *work)
n = list_entry(p->gc_list.next, struct inet_peer, gc_list);
- if (!atomic_read(&p->refcnt)) {
+ if (refcount_read(&p->refcnt) == 1) {
list_del(&p->gc_list);
kmem_cache_free(peer_cachep, p);
}
@@ -202,10 +202,11 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
int cmp = inetpeer_addr_cmp(daddr, &u->daddr);
if (cmp == 0) {
/* Before taking a reference, check if this entry was
- * deleted (refcnt=-1)
+ * deleted (refcnt=0)
*/
- if (!atomic_add_unless(&u->refcnt, 1, -1))
+ if (!refcount_inc_not_zero(&u->refcnt)) {
u = NULL;
+ }
return u;
}
if (cmp == -1)
@@ -382,11 +383,10 @@ static int inet_peer_gc(struct inet_peer_base *base,
while (stackptr > stack) {
stackptr--;
p = rcu_deref_locked(**stackptr, base);
- if (atomic_read(&p->refcnt) == 0) {
+ if (refcount_read(&p->refcnt) == 1) {
smp_rmb();
delta = (__u32)jiffies - p->dtime;
- if (delta >= ttl &&
- atomic_cmpxchg(&p->refcnt, 0, -1) == 0) {
+ if (delta >= ttl && refcount_dec_if_one(&p->refcnt)) {
p->gc_next = gchead;
gchead = p;
}
@@ -432,7 +432,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
relookup:
p = lookup(daddr, stack, base);
if (p != peer_avl_empty) {
- atomic_inc(&p->refcnt);
+ refcount_inc(&p->refcnt);
write_sequnlock_bh(&base->lock);
return p;
}
@@ -444,7 +444,7 @@ relookup:
p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
if (p) {
p->daddr = *daddr;
- atomic_set(&p->refcnt, 1);
+ refcount_set(&p->refcnt, 2);
atomic_set(&p->rid, 0);
p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
p->rate_tokens = 0;
@@ -468,7 +468,7 @@ void inet_putpeer(struct inet_peer *p)
{
p->dtime = (__u32)jiffies;
smp_mb__before_atomic();
- atomic_dec(&p->refcnt);
+ refcount_dec(&p->refcnt);
}
EXPORT_SYMBOL_GPL(inet_putpeer);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b3cdeec85f1f..9a8cfac503dc 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -312,7 +312,7 @@ static int ip_frag_reinit(struct ipq *qp)
unsigned int sum_truesize = 0;
if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
- atomic_inc(&qp->q.refcnt);
+ refcount_inc(&qp->q.refcnt);
return -ETIMEDOUT;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7a3fd25e8913..2e61e2af251a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -964,7 +964,8 @@ static int __ip_append_data(struct sock *sk,
csummode = CHECKSUM_PARTIAL;
cork->length += length;
- if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) &&
+ if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) ||
+ (skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
(sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
@@ -1036,7 +1037,7 @@ alloc_new_skb:
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
- if (atomic_read(&sk->sk_wmem_alloc) <=
+ if (refcount_read(&sk->sk_wmem_alloc) <=
2 * sk->sk_sndbuf)
skb = sock_wmalloc(sk,
alloclen + hh_len + 15, 1,
@@ -1144,7 +1145,7 @@ alloc_new_skb:
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
- atomic_add(copy, &sk->sk_wmem_alloc);
+ refcount_add(copy, &sk->sk_wmem_alloc);
}
offset += copy;
length -= copy;
@@ -1368,7 +1369,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
skb->len += len;
skb->data_len += len;
skb->truesize += len;
- atomic_add(len, &sk->sk_wmem_alloc);
+ refcount_add(len, &sk->sk_wmem_alloc);
offset += len;
size -= len;
}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a1d521be612b..bb909f1d7537 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2406,6 +2406,67 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS);
}
+static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(in_skb->sk);
+ struct nlattr *tb[RTA_MAX + 1];
+ struct sk_buff *skb = NULL;
+ struct mfc_cache *cache;
+ struct mr_table *mrt;
+ struct rtmsg *rtm;
+ __be32 src, grp;
+ u32 tableid;
+ int err;
+
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
+ rtm_ipv4_policy, extack);
+ if (err < 0)
+ goto errout;
+
+ rtm = nlmsg_data(nlh);
+
+ src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
+ grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
+ tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
+
+ mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT);
+ if (IS_ERR(mrt)) {
+ err = PTR_ERR(mrt);
+ goto errout_free;
+ }
+
+ /* entries are added/deleted only under RTNL */
+ rcu_read_lock();
+ cache = ipmr_cache_find(mrt, src, grp);
+ rcu_read_unlock();
+ if (!cache) {
+ err = -ENOENT;
+ goto errout_free;
+ }
+
+ skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL);
+ if (!skb) {
+ err = -ENOBUFS;
+ goto errout_free;
+ }
+
+ err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, cache,
+ RTM_NEWROUTE, 0);
+ if (err < 0)
+ goto errout_free;
+
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+
+errout:
+ return err;
+
+errout_free:
+ kfree_skb(skb);
+ goto errout;
+}
+
static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
@@ -3053,7 +3114,7 @@ int __init ip_mr_init(void)
}
#endif
rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
- NULL, ipmr_rtm_dumproute, NULL);
+ ipmr_rtm_getroute, ipmr_rtm_dumproute, NULL);
rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE,
ipmr_rtm_route, NULL, NULL);
rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE,
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 038f293c2376..7d72decb80f9 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -47,7 +47,7 @@ struct clusterip_config {
__be32 clusterip; /* the IP address */
u_int8_t clustermac[ETH_ALEN]; /* the MAC address */
- struct net_device *dev; /* device */
+ int ifindex; /* device ifindex */
u_int16_t num_total_nodes; /* total number of nodes */
unsigned long local_nodes; /* node number array */
@@ -57,6 +57,9 @@ struct clusterip_config {
enum clusterip_hashmode hash_mode; /* which hashing mode */
u_int32_t hash_initval; /* hash initialization */
struct rcu_head rcu;
+
+ char ifname[IFNAMSIZ]; /* device ifname */
+ struct notifier_block notifier; /* refresh c->ifindex in it */
};
#ifdef CONFIG_PROC_FS
@@ -98,9 +101,8 @@ clusterip_config_put(struct clusterip_config *c)
* entry(rule) is removed, remove the config from lists, but don't free it
* yet, since proc-files could still be holding references */
static inline void
-clusterip_config_entry_put(struct clusterip_config *c)
+clusterip_config_entry_put(struct net *net, struct clusterip_config *c)
{
- struct net *net = dev_net(c->dev);
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
local_bh_disable();
@@ -109,8 +111,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
spin_unlock(&cn->lock);
local_bh_enable();
- dev_mc_del(c->dev, c->clustermac);
- dev_put(c->dev);
+ unregister_netdevice_notifier(&c->notifier);
/* In case anyone still accesses the file, the open/close
* functions are also incrementing the refcount on their own,
@@ -170,19 +171,55 @@ clusterip_config_init_nodelist(struct clusterip_config *c,
set_bit(i->local_nodes[n] - 1, &c->local_nodes);
}
-static struct clusterip_config *
-clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
- struct net_device *dev)
+static int
+clusterip_netdev_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
{
- struct net *net = dev_net(dev);
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct clusterip_config *c;
+
+ c = container_of(this, struct clusterip_config, notifier);
+ switch (event) {
+ case NETDEV_REGISTER:
+ if (!strcmp(dev->name, c->ifname)) {
+ c->ifindex = dev->ifindex;
+ dev_mc_add(dev, c->clustermac);
+ }
+ break;
+ case NETDEV_UNREGISTER:
+ if (dev->ifindex == c->ifindex) {
+ dev_mc_del(dev, c->clustermac);
+ c->ifindex = -1;
+ }
+ break;
+ case NETDEV_CHANGENAME:
+ if (!strcmp(dev->name, c->ifname)) {
+ c->ifindex = dev->ifindex;
+ dev_mc_add(dev, c->clustermac);
+ } else if (dev->ifindex == c->ifindex) {
+ dev_mc_del(dev, c->clustermac);
+ c->ifindex = -1;
+ }
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct clusterip_config *
+clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
+ __be32 ip, const char *iniface)
+{
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ struct clusterip_config *c;
+ int err;
c = kzalloc(sizeof(*c), GFP_ATOMIC);
if (!c)
return ERR_PTR(-ENOMEM);
- c->dev = dev;
+ strcpy(c->ifname, iniface);
+ c->ifindex = -1;
c->clusterip = ip;
memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
c->num_total_nodes = i->num_total_nodes;
@@ -213,17 +250,27 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
cn->procdir,
&clusterip_proc_fops, c);
if (!c->pde) {
- spin_lock_bh(&cn->lock);
- list_del_rcu(&c->list);
- spin_unlock_bh(&cn->lock);
- kfree(c);
-
- return ERR_PTR(-ENOMEM);
+ err = -ENOMEM;
+ goto err;
}
}
#endif
- return c;
+ c->notifier.notifier_call = clusterip_netdev_event;
+ err = register_netdevice_notifier(&c->notifier);
+ if (!err)
+ return c;
+
+#ifdef CONFIG_PROC_FS
+ proc_remove(c->pde);
+err:
+#endif
+ spin_lock_bh(&cn->lock);
+ list_del_rcu(&c->list);
+ spin_unlock_bh(&cn->lock);
+ kfree(c);
+
+ return ERR_PTR(err);
}
#ifdef CONFIG_PROC_FS
@@ -425,14 +472,13 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
e->ip.iniface);
return -ENOENT;
}
+ dev_put(dev);
- config = clusterip_config_init(cipinfo,
- e->ip.dst.s_addr, dev);
- if (IS_ERR(config)) {
- dev_put(dev);
+ config = clusterip_config_init(par->net, cipinfo,
+ e->ip.dst.s_addr,
+ e->ip.iniface);
+ if (IS_ERR(config))
return PTR_ERR(config);
- }
- dev_mc_add(config->dev, config->clustermac);
}
}
cipinfo->config = config;
@@ -458,7 +504,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
/* if no more entries are referencing the config, remove it
* from the list and destroy the proc entry */
- clusterip_config_entry_put(cipinfo->config);
+ clusterip_config_entry_put(par->net, cipinfo->config);
clusterip_config_put(cipinfo->config);
@@ -558,10 +604,9 @@ arp_mangle(void *priv,
* addresses on different interfacs. However, in the CLUSTERIP case
* this wouldn't work, since we didn't subscribe the mcast group on
* other interfaces */
- if (c->dev != state->out) {
- pr_debug("not mangling arp reply on different "
- "interface: cip'%s'-skb'%s'\n",
- c->dev->name, state->out->name);
+ if (c->ifindex != state->out->ifindex) {
+ pr_debug("not mangling arp reply on different interface: cip'%d'-skb'%d'\n",
+ c->ifindex, state->out->ifindex);
clusterip_config_put(c);
return NF_ACCEPT;
}
@@ -743,14 +788,20 @@ static const struct file_operations clusterip_proc_fops = {
static int clusterip_net_init(struct net *net)
{
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ int ret;
INIT_LIST_HEAD(&cn->configs);
spin_lock_init(&cn->lock);
+ ret = nf_register_net_hook(net, &cip_arp_ops);
+ if (ret < 0)
+ return ret;
+
#ifdef CONFIG_PROC_FS
cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net);
if (!cn->procdir) {
+ nf_unregister_net_hook(net, &cip_arp_ops);
pr_err("Unable to proc dir entry\n");
return -ENOMEM;
}
@@ -765,6 +816,7 @@ static void clusterip_net_exit(struct net *net)
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
proc_remove(cn->procdir);
#endif
+ nf_unregister_net_hook(net, &cip_arp_ops);
}
static struct pernet_operations clusterip_net_ops = {
@@ -786,17 +838,11 @@ static int __init clusterip_tg_init(void)
if (ret < 0)
goto cleanup_subsys;
- ret = nf_register_hook(&cip_arp_ops);
- if (ret < 0)
- goto cleanup_target;
-
pr_info("ClusterIP Version %s loaded successfully\n",
CLUSTERIP_VERSION);
return 0;
-cleanup_target:
- xt_unregister_target(&clusterip_tg_reg);
cleanup_subsys:
unregister_pernet_subsys(&clusterip_net_ops);
return ret;
@@ -806,7 +852,6 @@ static void __exit clusterip_tg_exit(void)
{
pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
- nf_unregister_hook(&cip_arp_ops);
xt_unregister_target(&clusterip_tg_reg);
unregister_pernet_subsys(&clusterip_net_ops);
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index dc1dea15c1b4..f39037fca923 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -98,8 +98,8 @@ static int masq_device_event(struct notifier_block *this,
*/
NF_CT_ASSERT(dev->ifindex != 0);
- nf_ct_iterate_cleanup(net, device_cmp,
- (void *)(long)dev->ifindex, 0, 0);
+ nf_ct_iterate_cleanup_net(net, device_cmp,
+ (void *)(long)dev->ifindex, 0, 0);
}
return NOTIFY_DONE;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index ccfbce13a633..b8f0db54b197 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -290,7 +290,7 @@ void ping_close(struct sock *sk, long timeout)
{
pr_debug("ping_close(sk=%p,sk->num=%u)\n",
inet_sk(sk), inet_sk(sk)->inet_num);
- pr_debug("isk->refcnt = %d\n", sk->sk_refcnt.counter);
+ pr_debug("isk->refcnt = %d\n", refcount_read(&sk->sk_refcnt));
sk_common_release(sk);
}
@@ -1127,7 +1127,7 @@ static void ping_v4_format_sock(struct sock *sp, struct seq_file *f,
0, 0L, 0,
from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)),
0, sock_i_ino(sp),
- atomic_read(&sp->sk_refcnt), sp,
+ refcount_read(&sp->sk_refcnt), sp,
atomic_read(&sp->sk_drops));
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bdffad875691..b0bb5d0a30bd 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -1063,7 +1063,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
0, 0L, 0,
from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
0, sock_i_ino(sp),
- atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
+ refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
}
static int raw_seq_show(struct seq_file *seq, void *v)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 7835bb4a1fab..0905cf04c2a4 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -213,7 +213,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
NULL, &own_req);
if (child) {
- atomic_set(&req->rsk_refcnt, 1);
+ refcount_set(&req->rsk_refcnt, 1);
tcp_sk(child)->tsoffset = tsoff;
sock_rps_save_rxhash(child, skb);
inet_csk_reqsk_queue_add(sk, req, child);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 058f509ca98e..71ce33decd97 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -664,7 +664,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
return skb->len < size_goal &&
sysctl_tcp_autocorking &&
skb != tcp_write_queue_head(sk) &&
- atomic_read(&sk->sk_wmem_alloc) > skb->truesize;
+ refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
}
static void tcp_push(struct sock *sk, int flags, int mss_now,
@@ -692,7 +692,7 @@ static void tcp_push(struct sock *sk, int flags, int mss_now,
/* It is possible TX completion already happened
* before we set TSQ_THROTTLED.
*/
- if (atomic_read(&sk->sk_wmem_alloc) > skb->truesize)
+ if (refcount_read(&sk->sk_wmem_alloc) > skb->truesize)
return;
}
@@ -2350,6 +2350,8 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_init_send_head(sk);
memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
__sk_dst_reset(sk);
+ dst_release(sk->sk_rx_dst);
+ sk->sk_rx_dst = NULL;
tcp_saved_syn_free(tp);
/* Clean up fastopen related fields */
@@ -2479,7 +2481,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
name[val] = 0;
lock_sock(sk);
- err = tcp_set_congestion_control(sk, name);
+ err = tcp_set_congestion_control(sk, name, true);
release_sock(sk);
return err;
}
@@ -3062,6 +3064,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
if (get_user(len, optlen))
return -EFAULT;
len = min_t(unsigned int, len, TCP_ULP_NAME_MAX);
+ if (!icsk->icsk_ulp_ops) {
+ if (put_user(0, optlen))
+ return -EFAULT;
+ return 0;
+ }
if (put_user(len, optlen))
return -EFAULT;
if (copy_to_user(optval, icsk->icsk_ulp_ops->name, len))
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 324c9bcc5456..fde983f6376b 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -189,8 +189,8 @@ void tcp_init_congestion_control(struct sock *sk)
INET_ECN_dontxmit(sk);
}
-static void tcp_reinit_congestion_control(struct sock *sk,
- const struct tcp_congestion_ops *ca)
+void tcp_reinit_congestion_control(struct sock *sk,
+ const struct tcp_congestion_ops *ca)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -333,8 +333,12 @@ out:
return ret;
}
-/* Change congestion control for socket */
-int tcp_set_congestion_control(struct sock *sk, const char *name)
+/* Change congestion control for socket. If load is false, then it is the
+ * responsibility of the caller to call tcp_init_congestion_control or
+ * tcp_reinit_congestion_control (if the current congestion control was
+ * already initialized.
+ */
+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
{
struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_congestion_ops *ca;
@@ -344,21 +348,29 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
return -EPERM;
rcu_read_lock();
- ca = __tcp_ca_find_autoload(name);
+ if (!load)
+ ca = tcp_ca_find(name);
+ else
+ ca = __tcp_ca_find_autoload(name);
/* No change asking for existing value */
if (ca == icsk->icsk_ca_ops) {
icsk->icsk_ca_setsockopt = 1;
goto out;
}
- if (!ca)
+ if (!ca) {
err = -ENOENT;
- else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
- ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)))
+ } else if (!load) {
+ icsk->icsk_ca_ops = ca;
+ if (!try_module_get(ca->owner))
+ err = -EBUSY;
+ } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
+ ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
err = -EPERM;
- else if (!try_module_get(ca->owner))
+ } else if (!try_module_get(ca->owner)) {
err = -EBUSY;
- else
+ } else {
tcp_reinit_congestion_control(sk, ca);
+ }
out:
rcu_read_unlock();
return err;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 4af82b914dd4..ce9c7fef200f 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -214,13 +214,14 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
TCP_TIMEOUT_INIT, TCP_RTO_MAX);
- atomic_set(&req->rsk_refcnt, 2);
+ refcount_set(&req->rsk_refcnt, 2);
/* Now finish processing the fastopen child socket. */
inet_csk(child)->icsk_af_ops->rebuild_header(child);
tcp_init_congestion_control(child);
tcp_mtup_init(child);
tcp_init_metrics(child);
+ tcp_call_bpf(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
tcp_init_buffer_space(child);
tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2ab7e2fa9bb9..2920e0cb09f8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5571,7 +5571,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
icsk->icsk_af_ops->rebuild_header(sk);
tcp_init_metrics(sk);
-
+ tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
tcp_init_congestion_control(sk);
/* Prevent spurious tcp_cwnd_restart() on first data
@@ -5977,6 +5977,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
} else {
/* Make sure socket is routed, for correct metrics. */
icsk->icsk_af_ops->rebuild_header(sk);
+ tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
tcp_init_congestion_control(sk);
tcp_mtup_init(sk);
@@ -6190,7 +6191,8 @@ static void tcp_ecn_create_request(struct request_sock *req,
ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
- (ecn_ok_dst & DST_FEATURE_ECN_CA))
+ (ecn_ok_dst & DST_FEATURE_ECN_CA) ||
+ tcp_bpf_ca_needs_ecn((struct sock *)req))
inet_rsk(req)->ecn_ok = 1;
}
@@ -6406,7 +6408,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
} else {
tcp_rsk(req)->tfo_listener = false;
if (!want_cookie)
- inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+ inet_csk_reqsk_queue_hash_add(sk, req,
+ tcp_timeout_init((struct sock *)req));
af_ops->send_synack(sk, dst, &fl, req, &foc,
!want_cookie ? TCP_SYNACK_NORMAL :
TCP_SYNACK_COOKIE);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d774bcd9a54b..6ec6900eb300 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2323,7 +2323,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
icsk->icsk_probes_out,
sock_i_ino(sk),
- atomic_read(&sk->sk_refcnt), sk,
+ refcount_read(&sk->sk_refcnt), sk,
jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
@@ -2349,7 +2349,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw,
" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
- atomic_read(&tw->tw_refcnt), tw);
+ refcount_read(&tw->tw_refcnt), tw);
}
#define TMPSZ 150
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index d30ee31e94eb..0ff83c1637d8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -351,6 +351,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
int full_space = tcp_full_space(sk_listener);
u32 window_clamp;
__u8 rcv_wscale;
+ u32 rcv_wnd;
int mss;
mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
@@ -363,6 +364,12 @@ void tcp_openreq_init_rwin(struct request_sock *req,
(req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
req->rsk_window_clamp = full_space;
+ rcv_wnd = tcp_rwnd_init_bpf((struct sock *)req);
+ if (rcv_wnd == 0)
+ rcv_wnd = dst_metric(dst, RTAX_INITRWND);
+ else if (full_space < rcv_wnd * mss)
+ full_space = rcv_wnd * mss;
+
/* tcp_full_space because it is guaranteed to be the first packet */
tcp_select_initial_window(full_space,
mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
@@ -370,7 +377,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
&req->rsk_window_clamp,
ireq->wscale_ok,
&rcv_wscale,
- dst_metric(dst, RTAX_INITRWND));
+ rcv_wnd);
ireq->rcv_wscale = rcv_wscale;
}
EXPORT_SYMBOL(tcp_openreq_init_rwin);
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index bc68da38ea86..11f69bbf9307 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -152,7 +152,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
swap(gso_skb->sk, skb->sk);
swap(gso_skb->destructor, skb->destructor);
sum_truesize += skb->truesize;
- atomic_add(sum_truesize - gso_skb->truesize,
+ refcount_add(sum_truesize - gso_skb->truesize,
&skb->sk->sk_wmem_alloc);
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9a9c395b6235..4d36f0b093e6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -316,7 +316,8 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
if (!(tp->ecn_flags & TCP_ECN_OK))
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
- else if (tcp_ca_needs_ecn(sk))
+ else if (tcp_ca_needs_ecn(sk) ||
+ tcp_bpf_ca_needs_ecn(sk))
INET_ECN_xmit(sk);
}
@@ -324,8 +325,9 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
+ bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
- tcp_ca_needs_ecn(sk);
+ tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
if (!use_ecn) {
const struct dst_entry *dst = __sk_dst_get(sk);
@@ -339,7 +341,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
if (use_ecn) {
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
tp->ecn_flags = TCP_ECN_OK;
- if (tcp_ca_needs_ecn(sk))
+ if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
INET_ECN_xmit(sk);
}
}
@@ -861,12 +863,11 @@ void tcp_wfree(struct sk_buff *skb)
struct sock *sk = skb->sk;
struct tcp_sock *tp = tcp_sk(sk);
unsigned long flags, nval, oval;
- int wmem;
/* Keep one reference on sk_wmem_alloc.
* Will be released by sk_free() from here or tcp_tasklet_func()
*/
- wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc);
+ WARN_ON(refcount_sub_and_test(skb->truesize - 1, &sk->sk_wmem_alloc));
/* If this softirq is serviced by ksoftirqd, we are likely under stress.
* Wait until our queues (qdisc + devices) are drained.
@@ -875,7 +876,7 @@ void tcp_wfree(struct sk_buff *skb)
* - chance for incoming ACK (processed by another cpu maybe)
* to migrate this flow (skb->ooo_okay will be eventually set)
*/
- if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
+ if (refcount_read(&sk->sk_wmem_alloc) >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
goto out;
for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
@@ -925,7 +926,7 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
if (nval != oval)
continue;
- if (!atomic_inc_not_zero(&sk->sk_wmem_alloc))
+ if (!refcount_inc_not_zero(&sk->sk_wmem_alloc))
break;
/* queue this socket to tasklet queue */
tsq = this_cpu_ptr(&tsq_tasklet);
@@ -1045,7 +1046,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb->sk = sk;
skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
skb_set_hash_from_sk(skb, sk);
- atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+ refcount_add(skb->truesize, &sk->sk_wmem_alloc);
skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm);
@@ -2176,7 +2177,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
limit <<= factor;
- if (atomic_read(&sk->sk_wmem_alloc) > limit) {
+ if (refcount_read(&sk->sk_wmem_alloc) > limit) {
/* Always send the 1st or 2nd skb in write queue.
* No need to wait for TX completion to call us back,
* after softirq/tasklet schedule.
@@ -2192,7 +2193,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
* test again the condition.
*/
smp_mb__after_atomic();
- if (atomic_read(&sk->sk_wmem_alloc) > limit)
+ if (refcount_read(&sk->sk_wmem_alloc) > limit)
return true;
}
return false;
@@ -2812,7 +2813,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
/* Do not sent more than we queued. 1/4 is reserved for possible
* copying overhead: fragmentation, tunneling, mangling etc.
*/
- if (atomic_read(&sk->sk_wmem_alloc) >
+ if (refcount_read(&sk->sk_wmem_alloc) >
min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2),
sk->sk_sndbuf))
return -EAGAIN;
@@ -3267,6 +3268,7 @@ static void tcp_connect_init(struct sock *sk)
const struct dst_entry *dst = __sk_dst_get(sk);
struct tcp_sock *tp = tcp_sk(sk);
__u8 rcv_wscale;
+ u32 rcv_wnd;
/* We'll fix this up when we get a response from the other end.
* See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
@@ -3300,13 +3302,17 @@ static void tcp_connect_init(struct sock *sk)
(tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
tp->window_clamp = tcp_full_space(sk);
+ rcv_wnd = tcp_rwnd_init_bpf(sk);
+ if (rcv_wnd == 0)
+ rcv_wnd = dst_metric(dst, RTAX_INITRWND);
+
tcp_select_initial_window(tcp_full_space(sk),
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
&tp->rcv_wnd,
&tp->window_clamp,
sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
&rcv_wscale,
- dst_metric(dst, RTAX_INITRWND));
+ rcv_wnd);
tp->rx_opt.rcv_wscale = rcv_wscale;
tp->rcv_ssthresh = tp->rcv_wnd;
@@ -3327,7 +3333,7 @@ static void tcp_connect_init(struct sock *sk)
tp->rcv_wup = tp->rcv_nxt;
tp->copied_seq = tp->rcv_nxt;
- inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
+ inet_csk(sk)->icsk_rto = tcp_timeout_init(sk);
inet_csk(sk)->icsk_retransmits = 0;
tcp_clear_retrans(tp);
}
@@ -3440,6 +3446,7 @@ int tcp_connect(struct sock *sk)
struct sk_buff *buff;
int err;
+ tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB);
tcp_connect_init(sk);
if (unlikely(tp->repair)) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 47c7aa0501af..25294d43e147 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -577,7 +577,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport,
dif, &udp_table, NULL);
- if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+ if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
return sk;
}
@@ -1163,24 +1163,7 @@ out:
return ret;
}
-/* Copy as much information as possible into skb->dev_scratch to avoid
- * possibly multiple cache miss on dequeue();
- */
#if BITS_PER_LONG == 64
-
-/* we can store multiple info here: truesize, len and the bit needed to
- * compute skb_csum_unnecessary will be on cold cache lines at recvmsg
- * time.
- * skb->len can be stored on 16 bits since the udp header has been already
- * validated and pulled.
- */
-struct udp_dev_scratch {
- u32 truesize;
- u16 len;
- bool is_linear;
- bool csum_unnecessary;
-};
-
static void udp_set_dev_scratch(struct sk_buff *skb)
{
struct udp_dev_scratch *scratch;
@@ -1197,22 +1180,6 @@ static int udp_skb_truesize(struct sk_buff *skb)
{
return ((struct udp_dev_scratch *)&skb->dev_scratch)->truesize;
}
-
-static unsigned int udp_skb_len(struct sk_buff *skb)
-{
- return ((struct udp_dev_scratch *)&skb->dev_scratch)->len;
-}
-
-static bool udp_skb_csum_unnecessary(struct sk_buff *skb)
-{
- return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary;
-}
-
-static bool udp_skb_is_linear(struct sk_buff *skb)
-{
- return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear;
-}
-
#else
static void udp_set_dev_scratch(struct sk_buff *skb)
{
@@ -1223,21 +1190,6 @@ static int udp_skb_truesize(struct sk_buff *skb)
{
return skb->dev_scratch;
}
-
-static unsigned int udp_skb_len(struct sk_buff *skb)
-{
- return skb->len;
-}
-
-static bool udp_skb_csum_unnecessary(struct sk_buff *skb)
-{
- return skb_csum_unnecessary(skb);
-}
-
-static bool udp_skb_is_linear(struct sk_buff *skb)
-{
- return !skb_is_nonlinear(skb);
-}
#endif
/* fully reclaim rmem/fwd memory allocated for skb */
@@ -1598,18 +1550,6 @@ busy_check:
}
EXPORT_SYMBOL_GPL(__skb_recv_udp);
-static int copy_linear_skb(struct sk_buff *skb, int len, int off,
- struct iov_iter *to)
-{
- int n, copy = len - off;
-
- n = copy_to_iter(skb->data + off, copy, to);
- if (n == copy)
- return 0;
-
- return -EFAULT;
-}
-
/*
* This should be easy, if there is something there we
* return it, otherwise we block.
@@ -2302,7 +2242,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
uh->source, iph->saddr, dif);
}
- if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2))
+ if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
return;
skb->sk = sk;
@@ -2751,7 +2691,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
0, 0L, 0,
from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)),
0, sock_i_ino(sp),
- atomic_read(&sp->sk_refcnt), sp,
+ refcount_read(&sp->sk_refcnt), sp,
atomic_read(&sp->sk_drops));
}
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 9a89c10a55f0..4515836d2a3a 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -55,7 +55,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
req->id.idiag_dport,
req->id.idiag_if, tbl, NULL);
#endif
- if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+ if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
rcu_read_unlock();
err = -ENOENT;
@@ -206,7 +206,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
return -EINVAL;
}
- if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+ if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
rcu_read_unlock();
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a885ffcf0973..114fb64cf176 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1927,15 +1927,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
if (dad_failed)
ifp->flags |= IFA_F_DADFAILED;
- if (ifp->flags&IFA_F_PERMANENT) {
- spin_lock_bh(&ifp->lock);
- addrconf_del_dad_work(ifp);
- ifp->flags |= IFA_F_TENTATIVE;
- spin_unlock_bh(&ifp->lock);
- if (dad_failed)
- ipv6_ifa_notify(0, ifp);
- in6_ifa_put(ifp);
- } else if (ifp->flags&IFA_F_TEMPORARY) {
+ if (ifp->flags&IFA_F_TEMPORARY) {
struct inet6_ifaddr *ifpub;
spin_lock_bh(&ifp->lock);
ifpub = ifp->ifpub;
@@ -1948,6 +1940,14 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
spin_unlock_bh(&ifp->lock);
}
ipv6_del_addr(ifp);
+ } else if (ifp->flags&IFA_F_PERMANENT || !dad_failed) {
+ spin_lock_bh(&ifp->lock);
+ addrconf_del_dad_work(ifp);
+ ifp->flags |= IFA_F_TENTATIVE;
+ spin_unlock_bh(&ifp->lock);
+ if (dad_failed)
+ ipv6_ifa_notify(0, ifp);
+ in6_ifa_put(ifp);
} else {
ipv6_del_addr(ifp);
}
@@ -3384,6 +3384,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct netdev_notifier_changeupper_info *info;
struct inet6_dev *idev = __in6_dev_get(dev);
+ struct net *net = dev_net(dev);
int run_pending = 0;
int err;
@@ -3399,7 +3400,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
case NETDEV_CHANGEMTU:
/* if MTU under IPV6_MIN_MTU stop IPv6 on this interface. */
if (dev->mtu < IPV6_MIN_MTU) {
- addrconf_ifdown(dev, 1);
+ addrconf_ifdown(dev, dev != net->loopback_dev);
break;
}
@@ -3515,7 +3516,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
* IPV6_MIN_MTU stop IPv6 on this interface.
*/
if (dev->mtu < IPV6_MIN_MTU)
- addrconf_ifdown(dev, 1);
+ addrconf_ifdown(dev, dev != net->loopback_dev);
}
break;
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 8d772fea1dde..44067521e7cd 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -227,7 +227,7 @@ static int calipso_cache_check(const unsigned char *key,
entry->key_len == key_len &&
memcmp(entry->key, key, key_len) == 0) {
entry->activity += 1;
- atomic_inc(&entry->lsm_data->refcount);
+ refcount_inc(&entry->lsm_data->refcount);
secattr->cache = entry->lsm_data;
secattr->flags |= NETLBL_SECATTR_CACHE;
secattr->type = NETLBL_NLTYPE_CALIPSO;
@@ -296,7 +296,7 @@ static int calipso_cache_add(const unsigned char *calipso_ptr,
}
entry->key_len = calipso_ptr_len;
entry->hash = calipso_map_cache_hash(calipso_ptr, calipso_ptr_len);
- atomic_inc(&secattr->cache->refcount);
+ refcount_inc(&secattr->cache->refcount);
entry->lsm_data = secattr->cache;
bkt = entry->hash & (CALIPSO_CACHE_BUCKETS - 1);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index e011122ebd43..a1f918713006 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -250,8 +250,14 @@ ipv4_connected:
*/
err = ip6_datagram_dst_update(sk, true);
- if (err)
+ if (err) {
+ /* Reset daddr and dport so that udp_v6_early_demux()
+ * fails to find this socket
+ */
+ memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr));
+ inet->inet_dport = 0;
goto out;
+ }
sk->sk_state = TCP_ESTABLISHED;
sk_set_txhash(sk);
@@ -1035,6 +1041,6 @@ void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
0,
sock_i_ino(sp),
- atomic_read(&sp->sk_refcnt), sp,
+ refcount_read(&sp->sk_refcnt), sp,
atomic_read(&sp->sk_drops));
}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 71faffdd55d9..9ed35473dcb5 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -275,7 +275,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
skb->data_len += tailen;
skb->truesize += tailen;
if (sk)
- atomic_add(tailen, &sk->sk_wmem_alloc);
+ refcount_add(tailen, &sk->sk_wmem_alloc);
goto out;
}
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index d950d43ba255..f02f131f6435 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -30,6 +30,25 @@
#include <net/ipv6.h>
#include <linux/icmpv6.h>
+static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
+{
+ int off = sizeof(struct ipv6hdr);
+ struct ipv6_opt_hdr *exthdr;
+
+ if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP))
+ return offsetof(struct ipv6hdr, nexthdr);
+
+ while (off < nhlen) {
+ exthdr = (void *)ipv6_hdr + off;
+ if (exthdr->nexthdr == NEXTHDR_ESP)
+ return off;
+
+ off += ipv6_optlen(exthdr);
+ }
+
+ return 0;
+}
+
static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
struct sk_buff *skb)
{
@@ -38,6 +57,7 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
struct xfrm_state *x;
__be32 seq;
__be32 spi;
+ int nhoff;
int err;
skb_pull(skb, offset);
@@ -72,6 +92,11 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
xo->flags |= XFRM_GRO;
+ nhoff = esp6_nexthdr_esp_offset(ipv6_hdr(skb), offset);
+ if (!nhoff)
+ goto out;
+
+ IP6CB(skb)->nhoff = nhoff;
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index d0900918a19e..b13b8f93079d 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -75,7 +75,7 @@ begin:
continue;
if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
continue;
- if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
+ if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
goto out;
if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
@@ -172,7 +172,7 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
ntohs(dport), dif, &refcounted);
- if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt))
+ if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
return sk;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5baa6fab4b97..1422d6c08377 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1385,7 +1385,7 @@ emsgsize:
*/
cork->length += length;
- if ((((length + fragheaderlen) > mtu) ||
+ if ((((length + (skb ? skb->len : headersize)) > mtu) ||
(skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
@@ -1472,7 +1472,7 @@ alloc_new_skb:
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
- if (atomic_read(&sk->sk_wmem_alloc) <=
+ if (refcount_read(&sk->sk_wmem_alloc) <=
2 * sk->sk_sndbuf)
skb = sock_wmalloc(sk,
alloclen + hh_len, 1,
@@ -1581,7 +1581,7 @@ alloc_new_skb:
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
- atomic_add(copy, &sk->sk_wmem_alloc);
+ refcount_add(copy, &sk->sk_wmem_alloc);
}
offset += copy;
length -= copy;
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 2297c9f073ba..d7b679037bae 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -75,8 +75,8 @@ static int masq_device_event(struct notifier_block *this,
struct net *net = dev_net(dev);
if (event == NETDEV_DOWN)
- nf_ct_iterate_cleanup(net, device_cmp,
- (void *)(long)dev->ifindex, 0, 0);
+ nf_ct_iterate_cleanup_net(net, device_cmp,
+ (void *)(long)dev->ifindex, 0, 0);
return NOTIFY_DONE;
}
@@ -99,7 +99,7 @@ static void iterate_cleanup_work(struct work_struct *work)
w = container_of(work, struct masq_dev_work, work);
index = w->ifindex;
- nf_ct_iterate_cleanup(w->net, device_cmp, (void *)index, 0, 0);
+ nf_ct_iterate_cleanup_net(w->net, device_cmp, (void *)index, 0, 0);
put_net(w->net);
kfree(w);
@@ -110,12 +110,12 @@ static void iterate_cleanup_work(struct work_struct *work)
/* ipv6 inet notifier is an atomic notifier, i.e. we cannot
* schedule.
*
- * Unfortunately, nf_ct_iterate_cleanup can run for a long
+ * Unfortunately, nf_ct_iterate_cleanup_net can run for a long
* time if there are lots of conntracks and the system
* handles high softirq load, so it frequently calls cond_resched
* while iterating the conntrack table.
*
- * So we defer nf_ct_iterate_cleanup walk to the system workqueue.
+ * So we defer nf_ct_iterate_cleanup_net walk to the system workqueue.
*
* As we can have 'a lot' of inet_events (depending on amount
* of ipv6 addresses being deleted), we also need to add an upper
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 2e4490076061..0488a24c2a44 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3730,7 +3730,11 @@ static int ip6_route_dev_notify(struct notifier_block *this,
net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
#endif
- } else if (event == NETDEV_UNREGISTER) {
+ } else if (event == NETDEV_UNREGISTER &&
+ dev->reg_state != NETREG_UNREGISTERED) {
+ /* NETDEV_UNREGISTER could be fired for multiple times by
+ * netdev_wait_allrefs(). Make sure we only call this once.
+ */
in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e9958b1398cb..ac912bb21747 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -305,7 +305,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
* we try harder to allocate.
*/
kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
- kcalloc(cmax, sizeof(*kp), GFP_KERNEL) :
+ kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) :
NULL;
rcu_read_lock();
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 2f7e99af67db..7b75b0620730 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -194,7 +194,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
ireq->pktopts = skb;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f85cbfc183d6..2521690d62d6 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -734,7 +734,7 @@ static void tcp_v6_init_req(struct request_sock *req,
np->rxopt.bits.rxinfo ||
np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
np->rxopt.bits.rxohlim || np->repflow)) {
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
ireq->pktopts = skb;
}
}
@@ -1809,7 +1809,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
icsk->icsk_probes_out,
sock_i_ino(sp),
- atomic_read(&sp->sk_refcnt), sp,
+ refcount_read(&sp->sk_refcnt), sp,
jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
@@ -1842,7 +1842,7 @@ static void get_timewait6_sock(struct seq_file *seq,
dest->s6_addr32[2], dest->s6_addr32[3], destp,
tw->tw_substate, 0, 0,
3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
- atomic_read(&tw->tw_refcnt), tw);
+ refcount_read(&tw->tw_refcnt), tw);
}
static int tcp6_seq_show(struct seq_file *seq, void *v)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d1d728805729..4a3e65626e8b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -325,7 +325,7 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be
sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport,
dif, &udp_table, NULL);
- if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+ if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
return sk;
}
@@ -362,7 +362,7 @@ try_again:
if (!skb)
return err;
- ulen = skb->len;
+ ulen = udp_skb_len(skb);
copied = len;
if (copied > ulen - off)
copied = ulen - off;
@@ -379,14 +379,18 @@ try_again:
if (copied < ulen || peeking ||
(is_udplite && UDP_SKB_CB(skb)->partial_cov)) {
- checksum_valid = !udp_lib_checksum_complete(skb);
+ checksum_valid = udp_skb_csum_unnecessary(skb) ||
+ !__udp_lib_checksum_complete(skb);
if (!checksum_valid)
goto csum_copy_err;
}
- if (checksum_valid || skb_csum_unnecessary(skb))
- err = skb_copy_datagram_msg(skb, off, msg, copied);
- else {
+ if (checksum_valid || udp_skb_csum_unnecessary(skb)) {
+ if (udp_skb_is_linear(skb))
+ err = copy_linear_skb(skb, copied, off, &msg->msg_iter);
+ else
+ err = skb_copy_datagram_msg(skb, off, msg, copied);
+ } else {
err = skb_copy_and_csum_datagram_msg(skb, off, msg);
if (err == -EINVAL)
goto csum_copy_err;
@@ -881,7 +885,8 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
struct sock *sk;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
- if (INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
+ if (sk->sk_state == TCP_ESTABLISHED &&
+ INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
return sk;
/* Only check first socket in chain */
break;
@@ -911,7 +916,7 @@ static void udp_v6_early_demux(struct sk_buff *skb)
else
return;
- if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2))
+ if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
return;
skb->sk = sk;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 08a807b29298..3ef5d913e7a3 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -43,8 +43,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
return 1;
#endif
- ipv6_hdr(skb)->payload_len = htons(skb->len);
__skb_push(skb, skb->data - skb_network_header(skb));
+ ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
if (xo && (xo->flags & XFRM_GRO)) {
skb_mac_header_rebuild(skb);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index ac033e413bc5..148533169b1d 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -402,7 +402,7 @@ static void iucv_sock_destruct(struct sock *sk)
}
WARN_ON(atomic_read(&sk->sk_rmem_alloc));
- WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+ WARN_ON(refcount_read(&sk->sk_wmem_alloc));
WARN_ON(sk->sk_wmem_queued);
WARN_ON(sk->sk_forward_alloc);
}
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index bf75c9231cca..c343ac60bf50 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -162,7 +162,7 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
psock->sk->sk_receive_queue.qlen,
atomic_read(&psock->sk->sk_rmem_alloc),
psock->sk->sk_write_queue.qlen,
- atomic_read(&psock->sk->sk_wmem_alloc));
+ refcount_read(&psock->sk->sk_wmem_alloc));
if (psock->done)
seq_puts(seq, "Done ");
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ce9b8565d825..edcf1d0f82c8 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -109,7 +109,7 @@ static void pfkey_sock_destruct(struct sock *sk)
}
WARN_ON(atomic_read(&sk->sk_rmem_alloc));
- WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+ WARN_ON(refcount_read(&sk->sk_wmem_alloc));
atomic_dec(&net_pfkey->socks_nr);
}
@@ -203,11 +203,11 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
sock_hold(sk);
if (*skb2 == NULL) {
- if (atomic_read(&skb->users) != 1) {
+ if (refcount_read(&skb->users) != 1) {
*skb2 = skb_clone(skb, allocation);
} else {
*skb2 = skb;
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
}
}
if (*skb2 != NULL) {
@@ -1150,6 +1150,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
goto out;
}
+ err = -ENOBUFS;
key = ext_hdrs[SADB_EXT_KEY_AUTH - 1];
if (sa->sadb_sa_auth) {
int keysize = 0;
@@ -1161,8 +1162,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
if (key)
keysize = (key->sadb_key_bits + 7) / 8;
x->aalg = kmalloc(sizeof(*x->aalg) + keysize, GFP_KERNEL);
- if (!x->aalg)
+ if (!x->aalg) {
+ err = -ENOMEM;
goto out;
+ }
strcpy(x->aalg->alg_name, a->name);
x->aalg->alg_key_len = 0;
if (key) {
@@ -1181,8 +1184,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
goto out;
}
x->calg = kmalloc(sizeof(*x->calg), GFP_KERNEL);
- if (!x->calg)
+ if (!x->calg) {
+ err = -ENOMEM;
goto out;
+ }
strcpy(x->calg->alg_name, a->name);
x->props.calgo = sa->sadb_sa_encrypt;
} else {
@@ -1196,8 +1201,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
if (key)
keysize = (key->sadb_key_bits + 7) / 8;
x->ealg = kmalloc(sizeof(*x->ealg) + keysize, GFP_KERNEL);
- if (!x->ealg)
+ if (!x->ealg) {
+ err = -ENOMEM;
goto out;
+ }
strcpy(x->ealg->alg_name, a->name);
x->ealg->alg_key_len = 0;
if (key) {
@@ -1242,8 +1249,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
struct xfrm_encap_tmpl *natt;
x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL);
- if (!x->encap)
+ if (!x->encap) {
+ err = -ENOMEM;
goto out;
+ }
natt = x->encap;
n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1];
@@ -2742,6 +2751,8 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
int err, err2;
err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true);
+ if (!err)
+ xfrm_garbage_collect(net);
err2 = unicast_flush_resp(sk, hdr);
if (err || err2) {
if (err == -ESRCH) /* empty table - old silent behavior */
@@ -3728,7 +3739,7 @@ static int pfkey_seq_show(struct seq_file *f, void *v)
else
seq_printf(f, "%pK %-6d %-6u %-6u %-6u %-6lu\n",
s,
- atomic_read(&s->sk_refcnt),
+ refcount_read(&s->sk_refcnt),
sk_rmem_alloc_get(s),
sk_wmem_alloc_get(s),
from_kuid_munged(seq_user_ns(f), sock_i_uid(s)),
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index d100aed3d06f..98a005d0d04a 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -144,9 +144,8 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
tunnel->encap == L2TP_ENCAPTYPE_IP ? "IP" :
"");
seq_printf(m, " %d sessions, refcnt %d/%d\n", session_count,
- tunnel->sock ? atomic_read(&tunnel->sock->sk_refcnt) : 0,
+ tunnel->sock ? refcount_read(&tunnel->sock->sk_refcnt) : 0,
atomic_read(&tunnel->ref_count));
-
seq_printf(m, " %08x rx %ld/%ld/%ld rx %ld/%ld/%ld\n",
tunnel->debug,
atomic_long_read(&tunnel->stats.tx_packets),
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 9b02c13d258b..5e91b47f0d2a 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -507,7 +507,7 @@ again:
sk_nulls_for_each_rcu(rc, node, laddr_hb) {
if (llc_estab_match(sap, daddr, laddr, rc)) {
/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
- if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
+ if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
goto again;
if (unlikely(llc_sk(rc)->sap != sap ||
!llc_estab_match(sap, daddr, laddr, rc))) {
@@ -566,7 +566,7 @@ again:
sk_nulls_for_each_rcu(rc, node, laddr_hb) {
if (llc_listener_match(sap, laddr, rc)) {
/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
- if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
+ if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
goto again;
if (unlikely(llc_sk(rc)->sap != sap ||
!llc_listener_match(sap, laddr, rc))) {
@@ -973,9 +973,9 @@ void llc_sk_free(struct sock *sk)
skb_queue_purge(&sk->sk_write_queue);
skb_queue_purge(&llc->pdu_unack_q);
#ifdef LLC_REFCNT_DEBUG
- if (atomic_read(&sk->sk_refcnt) != 1) {
+ if (refcount_read(&sk->sk_refcnt) != 1) {
printk(KERN_DEBUG "Destruction of LLC sock %p delayed in %s, cnt=%d\n",
- sk, __func__, atomic_read(&sk->sk_refcnt));
+ sk, __func__, refcount_read(&sk->sk_refcnt));
printk(KERN_DEBUG "%d LLC sockets are still alive\n",
atomic_read(&llc_sock_nr));
} else {
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 63b6ab056370..d90928f50226 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -329,7 +329,7 @@ again:
sk_nulls_for_each_rcu(rc, node, laddr_hb) {
if (llc_dgram_match(sap, laddr, rc)) {
/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
- if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
+ if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
goto again;
if (unlikely(llc_sk(rc)->sap != sap ||
!llc_dgram_match(sap, laddr, rc))) {
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index c9b78e7b342f..913380919301 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -70,10 +70,9 @@ obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o
# nf_tables
-nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o
-nf_tables-objs += nft_immediate.o nft_cmp.o nft_range.o
-nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
-nf_tables-objs += nft_lookup.o nft_dynset.o
+nf_tables-objs := nf_tables_core.o nf_tables_api.o nf_tables_trace.o \
+ nft_immediate.o nft_cmp.o nft_range.o nft_bitwise.o \
+ nft_byteorder.o nft_payload.o nft_lookup.o nft_dynset.o
obj-$(CONFIG_NF_TABLES) += nf_tables.o
obj-$(CONFIG_NF_TABLES_INET) += nf_tables_inet.o
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index ba6a5516dc7c..e495b5e484b1 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -841,14 +841,16 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
static int ip_set_none(struct net *net, struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+ const struct nlattr * const attr[],
+ struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
}
static int ip_set_create(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+ const struct nlattr * const attr[],
+ struct netlink_ext_ack *extack)
{
struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *set, *clash = NULL;
@@ -989,7 +991,8 @@ ip_set_destroy_set(struct ip_set *set)
static int ip_set_destroy(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+ const struct nlattr * const attr[],
+ struct netlink_ext_ack *extack)
{
struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *s;
@@ -1067,7 +1070,8 @@ ip_set_flush_set(struct ip_set *set)
static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+ const struct nlattr * const attr[],
+ struct netlink_ext_ack *extack)
{
struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *s;
@@ -1106,7 +1110,8 @@ ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
static int ip_set_rename(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+ const struct nlattr * const attr[],
+ struct netlink_ext_ack *extack)
{
struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *set, *s;
@@ -1155,7 +1160,8 @@ out:
static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+ const struct nlattr * const attr[],
+ struct netlink_ext_ack *extack)
{
struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *from, *to;
@@ -1428,7 +1434,8 @@ out:
static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+ const struct nlattr * const attr[],
+ struct netlink_ext_ack *extack)
{
if (unlikely(protocol_failed(attr)))
return -IPSET_ERR_PROTOCOL;
@@ -1513,7 +1520,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+ const struct nlattr * const attr[],
+ struct netlink_ext_ack *extack)
{
struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *set;
@@ -1567,7 +1575,8 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+ const struct nlattr * const attr[],
+ struct netlink_ext_ack *extack)
{
struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *set;
@@ -1621,7 +1630,8 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb,
static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+ const struct nlattr * const attr[],
+ struct netlink_ext_ack *extack)
{
struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *set;
@@ -1656,7 +1666,8 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
static int ip_set_header(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+ const struct nlattr * const attr[],
+ struct netlink_ext_ack *extack)
{
struct ip_set_net *inst = ip_set_pernet(net);
const struct ip_set *set;
@@ -1712,7 +1723,8 @@ static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+ const struct nlattr * const attr[],
+ struct netlink_ext_ack *extack)
{
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
@@ -1770,7 +1782,8 @@ ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
static int ip_set_protocol(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+ const struct nlattr * const attr[],
+ struct netlink_ext_ack *extack)
{
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 42c3e3ba1b94..3f09cdb42562 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -38,8 +38,8 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
break;
}
case IPPROTO_SCTP: {
- sctp_sctphdr_t _sh;
- const sctp_sctphdr_t *sh;
+ struct sctphdr _sh;
+ const struct sctphdr *sh;
sh = skb_header_pointer(skb, protooff, sizeof(_sh), &_sh);
if (!sh)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index ad99c1ceea6f..e31956b58aba 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1037,9 +1037,9 @@ static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
*/
static inline int is_sctp_abort(const struct sk_buff *skb, int nh_len)
{
- sctp_chunkhdr_t *sch, schunk;
- sch = skb_header_pointer(skb, nh_len + sizeof(sctp_sctphdr_t),
- sizeof(schunk), &schunk);
+ struct sctp_chunkhdr *sch, schunk;
+ sch = skb_header_pointer(skb, nh_len + sizeof(struct sctphdr),
+ sizeof(schunk), &schunk);
if (sch == NULL)
return 0;
if (sch->type == SCTP_CID_ABORT)
@@ -1070,9 +1070,9 @@ static inline bool is_new_conn(const struct sk_buff *skb,
return th->syn;
}
case IPPROTO_SCTP: {
- sctp_chunkhdr_t *sch, schunk;
+ struct sctp_chunkhdr *sch, schunk;
- sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t),
+ sch = skb_header_pointer(skb, iph->len + sizeof(struct sctphdr),
sizeof(schunk), &schunk);
if (sch == NULL)
return false;
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 56f8e4b204ff..3ffad4adaddf 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -15,16 +15,15 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
struct ip_vs_iphdr *iph)
{
struct ip_vs_service *svc;
- sctp_chunkhdr_t _schunkh, *sch;
- sctp_sctphdr_t *sh, _sctph;
+ struct sctp_chunkhdr _schunkh, *sch;
+ struct sctphdr *sh, _sctph;
__be16 _ports[2], *ports = NULL;
if (likely(!ip_vs_iph_icmp(iph))) {
sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
if (sh) {
- sch = skb_header_pointer(
- skb, iph->len + sizeof(sctp_sctphdr_t),
- sizeof(_schunkh), &_schunkh);
+ sch = skb_header_pointer(skb, iph->len + sizeof(_sctph),
+ sizeof(_schunkh), &_schunkh);
if (sch && (sch->type == SCTP_CID_INIT ||
sysctl_sloppy_sctp(ipvs)))
ports = &sh->source;
@@ -77,7 +76,7 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
return 1;
}
-static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph,
+static void sctp_nat_csum(struct sk_buff *skb, struct sctphdr *sctph,
unsigned int sctphoff)
{
sctph->checksum = sctp_compute_cksum(skb, sctphoff);
@@ -88,7 +87,7 @@ static int
sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
{
- sctp_sctphdr_t *sctph;
+ struct sctphdr *sctph;
unsigned int sctphoff = iph->len;
bool payload_csum = false;
@@ -135,7 +134,7 @@ static int
sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
{
- sctp_sctphdr_t *sctph;
+ struct sctphdr *sctph;
unsigned int sctphoff = iph->len;
bool payload_csum = false;
@@ -378,7 +377,7 @@ static inline void
set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
int direction, const struct sk_buff *skb)
{
- sctp_chunkhdr_t _sctpch, *sch;
+ struct sctp_chunkhdr _sctpch, *sch;
unsigned char chunk_type;
int event, next_state;
int ihl, cofs;
@@ -389,7 +388,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
ihl = ip_hdrlen(skb);
#endif
- cofs = ihl + sizeof(sctp_sctphdr_t);
+ cofs = ihl + sizeof(struct sctphdr);
sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch);
if (sch == NULL)
return;
@@ -410,7 +409,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
(sch->type == SCTP_CID_COOKIE_ACK)) {
int clen = ntohs(sch->length);
- if (clen >= sizeof(sctp_chunkhdr_t)) {
+ if (clen >= sizeof(_sctpch)) {
sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4),
sizeof(_sctpch), &_sctpch);
if (sch && sch->type == SCTP_CID_ABORT)
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 03d2ccffa9fa..20edd589fe06 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -197,8 +197,8 @@ static void __exit nf_conntrack_amanda_fini(void)
{
int i;
- nf_conntrack_helper_unregister(&amanda_helper[0]);
- nf_conntrack_helper_unregister(&amanda_helper[1]);
+ nf_conntrack_helpers_unregister(amanda_helper,
+ ARRAY_SIZE(amanda_helper));
for (i = 0; i < ARRAY_SIZE(search); i++)
textsearch_destroy(search[i].ts);
}
@@ -218,16 +218,12 @@ static int __init nf_conntrack_amanda_init(void)
goto err1;
}
}
- ret = nf_conntrack_helper_register(&amanda_helper[0]);
+ ret = nf_conntrack_helpers_register(amanda_helper,
+ ARRAY_SIZE(amanda_helper));
if (ret < 0)
goto err1;
- ret = nf_conntrack_helper_register(&amanda_helper[1]);
- if (ret < 0)
- goto err2;
return 0;
-err2:
- nf_conntrack_helper_unregister(&amanda_helper[0]);
err1:
while (--i >= 0)
textsearch_destroy(search[i].ts);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index e847dbaa0c6b..9979f46c81dc 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1586,13 +1586,12 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
/* Bring out ya dead! */
static struct nf_conn *
-get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
+get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
void *data, unsigned int *bucket)
{
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct hlist_nulls_node *n;
- int cpu;
spinlock_t *lockp;
for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
@@ -1604,8 +1603,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
continue;
ct = nf_ct_tuplehash_to_ctrack(h);
- if (net_eq(nf_ct_net(ct), net) &&
- iter(ct, data))
+ if (iter(ct, data))
goto found;
}
}
@@ -1614,51 +1612,150 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
cond_resched();
}
+ return NULL;
+found:
+ atomic_inc(&ct->ct_general.use);
+ spin_unlock(lockp);
+ local_bh_enable();
+ return ct;
+}
+
+static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data),
+ void *data, u32 portid, int report)
+{
+ unsigned int bucket = 0, sequence;
+ struct nf_conn *ct;
+
+ might_sleep();
+
+ for (;;) {
+ sequence = read_seqcount_begin(&nf_conntrack_generation);
+
+ while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
+ /* Time to push up daises... */
+
+ nf_ct_delete(ct, portid, report);
+ nf_ct_put(ct);
+ cond_resched();
+ }
+
+ if (!read_seqcount_retry(&nf_conntrack_generation, sequence))
+ break;
+ bucket = 0;
+ }
+}
+
+struct iter_data {
+ int (*iter)(struct nf_conn *i, void *data);
+ void *data;
+ struct net *net;
+};
+
+static int iter_net_only(struct nf_conn *i, void *data)
+{
+ struct iter_data *d = data;
+
+ if (!net_eq(d->net, nf_ct_net(i)))
+ return 0;
+
+ return d->iter(i, d->data);
+}
+
+static void
+__nf_ct_unconfirmed_destroy(struct net *net)
+{
+ int cpu;
+
for_each_possible_cpu(cpu) {
- struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+ struct nf_conntrack_tuple_hash *h;
+ struct hlist_nulls_node *n;
+ struct ct_pcpu *pcpu;
+
+ pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
spin_lock_bh(&pcpu->lock);
hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) {
+ struct nf_conn *ct;
+
ct = nf_ct_tuplehash_to_ctrack(h);
- if (iter(ct, data))
- set_bit(IPS_DYING_BIT, &ct->status);
+
+ /* we cannot call iter() on unconfirmed list, the
+ * owning cpu can reallocate ct->ext at any time.
+ */
+ set_bit(IPS_DYING_BIT, &ct->status);
}
spin_unlock_bh(&pcpu->lock);
cond_resched();
}
- return NULL;
-found:
- atomic_inc(&ct->ct_general.use);
- spin_unlock(lockp);
- local_bh_enable();
- return ct;
}
-void nf_ct_iterate_cleanup(struct net *net,
- int (*iter)(struct nf_conn *i, void *data),
- void *data, u32 portid, int report)
+void nf_ct_iterate_cleanup_net(struct net *net,
+ int (*iter)(struct nf_conn *i, void *data),
+ void *data, u32 portid, int report)
{
- struct nf_conn *ct;
- unsigned int bucket = 0;
+ struct iter_data d;
might_sleep();
if (atomic_read(&net->ct.count) == 0)
return;
- while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
- /* Time to push up daises... */
+ __nf_ct_unconfirmed_destroy(net);
- nf_ct_delete(ct, portid, report);
- nf_ct_put(ct);
- cond_resched();
+ d.iter = iter;
+ d.data = data;
+ d.net = net;
+
+ synchronize_net();
+
+ nf_ct_iterate_cleanup(iter_net_only, &d, portid, report);
+}
+EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net);
+
+/**
+ * nf_ct_iterate_destroy - destroy unconfirmed conntracks and iterate table
+ * @iter: callback to invoke for each conntrack
+ * @data: data to pass to @iter
+ *
+ * Like nf_ct_iterate_cleanup, but first marks conntracks on the
+ * unconfirmed list as dying (so they will not be inserted into
+ * main table).
+ *
+ * Can only be called in module exit path.
+ */
+void
+nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
+{
+ struct net *net;
+
+ rtnl_lock();
+ for_each_net(net) {
+ if (atomic_read(&net->ct.count) == 0)
+ continue;
+ __nf_ct_unconfirmed_destroy(net);
}
+ rtnl_unlock();
+
+ /* Need to wait for netns cleanup worker to finish, if its
+ * running -- it might have deleted a net namespace from
+ * the global list, so our __nf_ct_unconfirmed_destroy() might
+ * not have affected all namespaces.
+ */
+ net_ns_barrier();
+
+ /* a conntrack could have been unlinked from unconfirmed list
+ * before we grabbed pcpu lock in __nf_ct_unconfirmed_destroy().
+ * This makes sure its inserted into conntrack table.
+ */
+ synchronize_net();
+
+ nf_ct_iterate_cleanup(iter, data, 0, 0);
}
-EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
+EXPORT_SYMBOL_GPL(nf_ct_iterate_destroy);
static int kill_all(struct nf_conn *i, void *data)
{
- return 1;
+ return net_eq(nf_ct_net(i), data);
}
void nf_ct_free_hashtable(void *hash, unsigned int size)
@@ -1723,7 +1820,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
i_see_dead_people:
busy = 0;
list_for_each_entry(net, net_exit_list, exit_list) {
- nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0);
+ nf_ct_iterate_cleanup(kill_all, net, 0, 0);
if (atomic_read(&net->ct.count) != 0)
busy = 1;
}
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 3bcdc718484e..f71f0d2558fd 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1815,14 +1815,44 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = {
},
};
+static int __init h323_helper_init(void)
+{
+ int ret;
+
+ ret = nf_conntrack_helper_register(&nf_conntrack_helper_h245);
+ if (ret < 0)
+ return ret;
+ ret = nf_conntrack_helpers_register(nf_conntrack_helper_q931,
+ ARRAY_SIZE(nf_conntrack_helper_q931));
+ if (ret < 0)
+ goto err1;
+ ret = nf_conntrack_helpers_register(nf_conntrack_helper_ras,
+ ARRAY_SIZE(nf_conntrack_helper_ras));
+ if (ret < 0)
+ goto err2;
+
+ return 0;
+err2:
+ nf_conntrack_helpers_unregister(nf_conntrack_helper_q931,
+ ARRAY_SIZE(nf_conntrack_helper_q931));
+err1:
+ nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
+ return ret;
+}
+
+static void __exit h323_helper_exit(void)
+{
+ nf_conntrack_helpers_unregister(nf_conntrack_helper_ras,
+ ARRAY_SIZE(nf_conntrack_helper_ras));
+ nf_conntrack_helpers_unregister(nf_conntrack_helper_q931,
+ ARRAY_SIZE(nf_conntrack_helper_q931));
+ nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
+}
+
/****************************************************************************/
static void __exit nf_conntrack_h323_fini(void)
{
- nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[1]);
- nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]);
- nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]);
- nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]);
- nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
+ h323_helper_exit();
kfree(h323_buffer);
pr_debug("nf_ct_h323: fini\n");
}
@@ -1837,32 +1867,11 @@ static int __init nf_conntrack_h323_init(void)
h323_buffer = kmalloc(65536, GFP_KERNEL);
if (!h323_buffer)
return -ENOMEM;
- ret = nf_conntrack_helper_register(&nf_conntrack_helper_h245);
+ ret = h323_helper_init();
if (ret < 0)
goto err1;
- ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[0]);
- if (ret < 0)
- goto err2;
- ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[1]);
- if (ret < 0)
- goto err3;
- ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[0]);
- if (ret < 0)
- goto err4;
- ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[1]);
- if (ret < 0)
- goto err5;
pr_debug("nf_ct_h323: init success\n");
return 0;
-
-err5:
- nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]);
-err4:
- nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]);
-err3:
- nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]);
-err2:
- nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
err1:
kfree(h323_buffer);
return ret;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 7f6100ca63be..9129bb3b5153 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -285,16 +285,16 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper);
/* appropriate ct lock protecting must be taken by caller */
-static inline int unhelp(struct nf_conntrack_tuple_hash *i,
- const struct nf_conntrack_helper *me)
+static int unhelp(struct nf_conn *ct, void *me)
{
- struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
struct nf_conn_help *help = nfct_help(ct);
if (help && rcu_dereference_raw(help->helper) == me) {
nf_conntrack_event(IPCT_HELPER, ct);
RCU_INIT_POINTER(help->helper, NULL);
}
+
+ /* We are not intended to delete this conntrack. */
return 0;
}
@@ -437,33 +437,10 @@ out:
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
-static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
- struct net *net)
-{
- struct nf_conntrack_tuple_hash *h;
- const struct hlist_nulls_node *nn;
- int cpu;
-
- /* Get rid of expecteds, set helpers to NULL. */
- for_each_possible_cpu(cpu) {
- struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
-
- spin_lock_bh(&pcpu->lock);
- hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
- unhelp(h, me);
- spin_unlock_bh(&pcpu->lock);
- }
-}
-
void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
{
- struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_expect *exp;
const struct hlist_node *next;
- const struct hlist_nulls_node *nn;
- unsigned int last_hsize;
- spinlock_t *lock;
- struct net *net;
unsigned int i;
mutex_lock(&nf_ct_helper_mutex);
@@ -491,26 +468,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
}
spin_unlock_bh(&nf_conntrack_expect_lock);
- rtnl_lock();
- for_each_net(net)
- __nf_conntrack_helper_unregister(me, net);
- rtnl_unlock();
-
- local_bh_disable();
-restart:
- last_hsize = nf_conntrack_htable_size;
- for (i = 0; i < last_hsize; i++) {
- lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS];
- nf_conntrack_lock(lock);
- if (last_hsize != nf_conntrack_htable_size) {
- spin_unlock(lock);
- goto restart;
- }
- hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
- unhelp(h, me);
- spin_unlock(lock);
- }
- local_bh_enable();
+ nf_ct_iterate_destroy(unhelp, me);
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index a8be9b72e6cd..7999e70c3bfb 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -636,11 +636,11 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
if (events & (1 << IPCT_DESTROY)) {
type = IPCTNL_MSG_CT_DELETE;
group = NFNLGRP_CONNTRACK_DESTROY;
- } else if (events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED))) {
+ } else if (events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED))) {
type = IPCTNL_MSG_CT_NEW;
flags = NLM_F_CREATE|NLM_F_EXCL;
group = NFNLGRP_CONNTRACK_NEW;
- } else if (events) {
+ } else if (events) {
type = IPCTNL_MSG_CT_NEW;
group = NFNLGRP_CONNTRACK_UPDATE;
} else
@@ -1122,8 +1122,8 @@ static int ctnetlink_flush_conntrack(struct net *net,
return PTR_ERR(filter);
}
- nf_ct_iterate_cleanup(net, ctnetlink_filter_match, filter,
- portid, report);
+ nf_ct_iterate_cleanup_net(net, ctnetlink_filter_match, filter,
+ portid, report);
kfree(filter);
return 0;
@@ -1132,7 +1132,8 @@ static int ctnetlink_flush_conntrack(struct net *net,
static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+ const struct nlattr * const cda[],
+ struct netlink_ext_ack *extack)
{
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
@@ -1184,7 +1185,8 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+ const struct nlattr * const cda[],
+ struct netlink_ext_ack *extack)
{
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
@@ -1345,7 +1347,8 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
static int ctnetlink_get_ct_dying(struct net *net, struct sock *ctnl,
struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+ const struct nlattr * const cda[],
+ struct netlink_ext_ack *extack)
{
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
@@ -1367,7 +1370,8 @@ ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
static int ctnetlink_get_ct_unconfirmed(struct net *net, struct sock *ctnl,
struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+ const struct nlattr * const cda[],
+ struct netlink_ext_ack *extack)
{
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
@@ -1906,7 +1910,8 @@ err1:
static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+ const struct nlattr * const cda[],
+ struct netlink_ext_ack *extack)
{
struct nf_conntrack_tuple otuple, rtuple;
struct nf_conntrack_tuple_hash *h = NULL;
@@ -2071,7 +2076,8 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
static int ctnetlink_stat_ct_cpu(struct net *net, struct sock *ctnl,
struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+ const struct nlattr * const cda[],
+ struct netlink_ext_ack *extack)
{
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
@@ -2116,7 +2122,8 @@ nlmsg_failure:
static int ctnetlink_stat_ct(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+ const struct nlattr * const cda[],
+ struct netlink_ext_ack *extack)
{
struct sk_buff *skb2;
int err;
@@ -2778,7 +2785,8 @@ out:
static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl,
struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+ const struct nlattr * const cda[],
+ struct netlink_ext_ack *extack)
{
int err;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -2822,7 +2830,8 @@ static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl,
static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+ const struct nlattr * const cda[],
+ struct netlink_ext_ack *extack)
{
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
@@ -2834,7 +2843,8 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
if (cda[CTA_EXPECT_MASTER])
- return ctnetlink_dump_exp_ct(net, ctnl, skb, nlh, cda);
+ return ctnetlink_dump_exp_ct(net, ctnl, skb, nlh, cda,
+ extack);
else {
struct netlink_dump_control c = {
.dump = ctnetlink_exp_dump_table,
@@ -2902,7 +2912,8 @@ out:
static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+ const struct nlattr * const cda[],
+ struct netlink_ext_ack *extack)
{
struct nf_conntrack_expect *exp;
struct nf_conntrack_tuple tuple;
@@ -3190,7 +3201,8 @@ err_ct:
static int ctnetlink_new_expect(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+ const struct nlattr * const cda[],
+ struct netlink_ext_ack *extack)
{
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
@@ -3296,7 +3308,8 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
static int ctnetlink_stat_exp_cpu(struct net *net, struct sock *ctnl,
struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+ const struct nlattr * const cda[],
+ struct netlink_ext_ack *extack)
{
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 2de6c1fe3261..1dcad229c3cc 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -28,8 +28,8 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h>
-static struct nf_conntrack_l4proto __rcu **nf_ct_protos[PF_MAX] __read_mostly;
-struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX] __read_mostly;
+static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly;
+struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_l3protos);
static DEFINE_MUTEX(nf_ct_proto_mutex);
@@ -68,7 +68,7 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header,
struct nf_conntrack_l4proto *
__nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
{
- if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL))
+ if (unlikely(l3proto >= NFPROTO_NUMPROTO || nf_ct_protos[l3proto] == NULL))
return &nf_conntrack_l4proto_generic;
return rcu_dereference(nf_ct_protos[l3proto][l4proto]);
@@ -212,7 +212,7 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
int ret = 0;
struct nf_conntrack_l3proto *old;
- if (proto->l3proto >= AF_MAX)
+ if (proto->l3proto >= NFPROTO_NUMPROTO)
return -EBUSY;
if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size)
@@ -254,7 +254,7 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
{
- BUG_ON(proto->l3proto >= AF_MAX);
+ BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO);
mutex_lock(&nf_ct_proto_mutex);
BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
@@ -265,6 +265,8 @@ void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
mutex_unlock(&nf_ct_proto_mutex);
synchronize_rcu();
+ /* Remove all contrack entries for this protocol */
+ nf_ct_iterate_destroy(kill_l3proto, proto);
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
@@ -280,9 +282,6 @@ void nf_ct_l3proto_pernet_unregister(struct net *net,
*/
if (proto->net_ns_put)
proto->net_ns_put(net);
-
- /* Remove all contrack entries for this protocol */
- nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
@@ -342,7 +341,7 @@ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
- if (l4proto->l3proto >= PF_MAX)
+ if (l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos))
return -EBUSY;
if ((l4proto->to_nlattr && !l4proto->nlattr_size) ||
@@ -421,17 +420,23 @@ out:
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
-void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+
{
- BUG_ON(l4proto->l3proto >= PF_MAX);
+ BUG_ON(l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos));
- mutex_lock(&nf_ct_proto_mutex);
BUG_ON(rcu_dereference_protected(
nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
lockdep_is_held(&nf_ct_proto_mutex)
) != l4proto);
rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
&nf_conntrack_l4proto_generic);
+}
+
+void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+{
+ mutex_lock(&nf_ct_proto_mutex);
+ __nf_ct_l4proto_unregister_one(l4proto);
mutex_unlock(&nf_ct_proto_mutex);
synchronize_rcu();
@@ -448,9 +453,6 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net,
pn->users--;
nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
-
- /* Remove all contrack entries for this protocol */
- nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0);
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
@@ -500,8 +502,14 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[],
unsigned int num_proto)
{
+ mutex_lock(&nf_ct_proto_mutex);
while (num_proto-- != 0)
- nf_ct_l4proto_unregister_one(l4proto[num_proto]);
+ __nf_ct_l4proto_unregister_one(l4proto[num_proto]);
+ mutex_unlock(&nf_ct_proto_mutex);
+
+ synchronize_net();
+ /* Remove all contrack entries for this protocol */
+ nf_ct_iterate_destroy(kill_l4proto, l4proto);
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
@@ -548,7 +556,7 @@ void nf_conntrack_proto_pernet_fini(struct net *net)
int nf_conntrack_proto_init(void)
{
unsigned int i;
- for (i = 0; i < AF_MAX; i++)
+ for (i = 0; i < NFPROTO_NUMPROTO; i++)
rcu_assign_pointer(nf_ct_l3protos[i],
&nf_conntrack_l3proto_generic);
return 0;
@@ -558,6 +566,6 @@ void nf_conntrack_proto_fini(void)
{
unsigned int i;
/* free l3proto protocol tables */
- for (i = 0; i < PF_MAX; i++)
+ for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
kfree(nf_ct_protos[i]);
}
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 1c5b14a6cab3..31c6c8ee9d5d 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -190,7 +190,7 @@ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
}
#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \
-for ((offset) = (dataoff) + sizeof(sctp_sctphdr_t), (count) = 0; \
+for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \
(offset) < (skb)->len && \
((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))); \
(offset) += (ntohs((sch)->length) + 3) & ~3, (count)++)
@@ -202,7 +202,7 @@ static int do_basic_checks(struct nf_conn *ct,
unsigned long *map)
{
u_int32_t offset, count;
- sctp_chunkhdr_t _sch, *sch;
+ struct sctp_chunkhdr _sch, *sch;
int flag;
flag = 0;
@@ -395,9 +395,9 @@ static int sctp_packet(struct nf_conn *ct,
/* If it is an INIT or an INIT ACK note down the vtag */
if (sch->type == SCTP_CID_INIT ||
sch->type == SCTP_CID_INIT_ACK) {
- sctp_inithdr_t _inithdr, *ih;
+ struct sctp_inithdr _inithdr, *ih;
- ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
+ ih = skb_header_pointer(skb, offset + sizeof(_sch),
sizeof(_inithdr), &_inithdr);
if (ih == NULL)
goto out_unlock;
@@ -471,23 +471,20 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
/* Copy the vtag into the state info */
if (sch->type == SCTP_CID_INIT) {
- if (sh->vtag == 0) {
- sctp_inithdr_t _inithdr, *ih;
+ struct sctp_inithdr _inithdr, *ih;
+ /* Sec 8.5.1 (A) */
+ if (sh->vtag)
+ return false;
- ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
- sizeof(_inithdr), &_inithdr);
- if (ih == NULL)
- return false;
+ ih = skb_header_pointer(skb, offset + sizeof(_sch),
+ sizeof(_inithdr), &_inithdr);
+ if (!ih)
+ return false;
- pr_debug("Setting vtag %x for new conn\n",
- ih->init_tag);
+ pr_debug("Setting vtag %x for new conn\n",
+ ih->init_tag);
- ct->proto.sctp.vtag[IP_CT_DIR_REPLY] =
- ih->init_tag;
- } else {
- /* Sec 8.5.1 (A) */
- return false;
- }
+ ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag;
} else if (sch->type == SCTP_CID_HEARTBEAT) {
pr_debug("Setting vtag %x for secondary conntrack\n",
sh->vtag);
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index c9d7f95768ab..f4a566e67213 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -13,6 +13,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_dup_netdev.h>
static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev)
{
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 6c72922d20ca..832c5a08d9a5 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -582,12 +582,8 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
.l3proto = l3proto,
.l4proto = l4proto,
};
- struct net *net;
- rtnl_lock();
- for_each_net(net)
- nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
- rtnl_unlock();
+ nf_ct_iterate_destroy(nf_nat_proto_remove, &clean);
}
static void nf_nat_l3proto_clean(u8 l3proto)
@@ -595,13 +591,8 @@ static void nf_nat_l3proto_clean(u8 l3proto)
struct nf_nat_proto_clean clean = {
.l3proto = l3proto,
};
- struct net *net;
- rtnl_lock();
-
- for_each_net(net)
- nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
- rtnl_unlock();
+ nf_ct_iterate_destroy(nf_nat_proto_remove, &clean);
}
/* Protocol registration. */
@@ -822,17 +813,6 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
}
#endif
-static void __net_exit nf_nat_net_exit(struct net *net)
-{
- struct nf_nat_proto_clean clean = {};
-
- nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0);
-}
-
-static struct pernet_operations nf_nat_net_ops = {
- .exit = nf_nat_net_exit,
-};
-
static struct nf_ct_helper_expectfn follow_master_nat = {
.name = "nat-follow-master",
.expectfn = nf_nat_follow_master,
@@ -853,10 +833,6 @@ static int __init nf_nat_init(void)
return ret;
}
- ret = register_pernet_subsys(&nf_nat_net_ops);
- if (ret < 0)
- goto cleanup_extend;
-
nf_ct_helper_expectfn_register(&follow_master_nat);
BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
@@ -867,18 +843,15 @@ static int __init nf_nat_init(void)
RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session);
#endif
return 0;
-
- cleanup_extend:
- rhltable_destroy(&nf_nat_bysource_table);
- nf_ct_extend_unregister(&nat_extend);
- return ret;
}
static void __exit nf_nat_cleanup(void)
{
+ struct nf_nat_proto_clean clean = {};
unsigned int i;
- unregister_pernet_subsys(&nf_nat_net_ops);
+ nf_ct_iterate_destroy(nf_nat_proto_clean, &clean);
+
nf_ct_extend_unregister(&nat_extend);
nf_ct_helper_expectfn_unregister(&follow_master_nat);
RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL);
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index 804e8a0ab36e..c57ee3240b1d 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -32,7 +32,7 @@ sctp_manip_pkt(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- sctp_sctphdr_t *hdr;
+ struct sctphdr *hdr;
int hdrsize = 8;
/* This could be an inner header returned in imcp packet; in such
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index da314be0c048..7843efa33c59 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -13,6 +13,7 @@
#include <linux/list.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
+#include <linux/vmalloc.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nf_tables.h>
@@ -386,7 +387,7 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table)
return ++table->hgenerator;
}
-static const struct nf_chain_type *chain_type[AF_MAX][NFT_CHAIN_T_MAX];
+static const struct nf_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX];
static const struct nf_chain_type *
__nf_tables_chain_type_lookup(int family, const struct nlattr *nla)
@@ -534,7 +535,8 @@ done:
static int nf_tables_gettable(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_cur(net);
@@ -677,7 +679,8 @@ err:
static int nf_tables_newtable(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
@@ -830,7 +833,8 @@ out:
static int nf_tables_deltable(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
@@ -869,6 +873,9 @@ int nft_register_chain_type(const struct nf_chain_type *ctype)
{
int err = 0;
+ if (WARN_ON(ctype->family >= NFPROTO_NUMPROTO))
+ return -EINVAL;
+
nfnl_lock(NFNL_SUBSYS_NFTABLES);
if (chain_type[ctype->family][ctype->type] != NULL) {
err = -EBUSY;
@@ -1123,7 +1130,8 @@ done:
static int nf_tables_getchain(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_cur(net);
@@ -1319,7 +1327,8 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook)
static int nf_tables_newchain(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nlattr * uninitialized_var(name);
@@ -1557,7 +1566,8 @@ err1:
static int nf_tables_delchain(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
@@ -2038,7 +2048,8 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb)
static int nf_tables_getrule(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_cur(net);
@@ -2131,7 +2142,8 @@ static struct nft_expr_info *info;
static int nf_tables_newrule(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
@@ -2313,7 +2325,8 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
static int nf_tables_delrule(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
@@ -2377,64 +2390,77 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
* Sets
*/
-static LIST_HEAD(nf_tables_set_ops);
+static LIST_HEAD(nf_tables_set_types);
-int nft_register_set(struct nft_set_ops *ops)
+int nft_register_set(struct nft_set_type *type)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_add_tail_rcu(&ops->list, &nf_tables_set_ops);
+ list_add_tail_rcu(&type->list, &nf_tables_set_types);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
return 0;
}
EXPORT_SYMBOL_GPL(nft_register_set);
-void nft_unregister_set(struct nft_set_ops *ops)
+void nft_unregister_set(struct nft_set_type *type)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_del_rcu(&ops->list);
+ list_del_rcu(&type->list);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_unregister_set);
+#define NFT_SET_FEATURES (NFT_SET_INTERVAL | NFT_SET_MAP | \
+ NFT_SET_TIMEOUT | NFT_SET_OBJECT)
+
+static bool nft_set_ops_candidate(const struct nft_set_ops *ops, u32 flags)
+{
+ return (flags & ops->features) == (flags & NFT_SET_FEATURES);
+}
+
/*
* Select a set implementation based on the data characteristics and the
* given policy. The total memory use might not be known if no size is
* given, in that case the amount of memory per element is used.
*/
static const struct nft_set_ops *
-nft_select_set_ops(const struct nlattr * const nla[],
+nft_select_set_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const nla[],
const struct nft_set_desc *desc,
enum nft_set_policies policy)
{
const struct nft_set_ops *ops, *bops;
struct nft_set_estimate est, best;
- u32 features;
+ const struct nft_set_type *type;
+ u32 flags = 0;
#ifdef CONFIG_MODULES
- if (list_empty(&nf_tables_set_ops)) {
+ if (list_empty(&nf_tables_set_types)) {
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
request_module("nft-set");
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- if (!list_empty(&nf_tables_set_ops))
+ if (!list_empty(&nf_tables_set_types))
return ERR_PTR(-EAGAIN);
}
#endif
- features = 0;
- if (nla[NFTA_SET_FLAGS] != NULL) {
- features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
- features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT |
- NFT_SET_OBJECT;
- }
+ if (nla[NFTA_SET_FLAGS] != NULL)
+ flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
bops = NULL;
best.size = ~0;
best.lookup = ~0;
best.space = ~0;
- list_for_each_entry(ops, &nf_tables_set_ops, list) {
- if ((ops->features & features) != features)
+ list_for_each_entry(type, &nf_tables_set_types, list) {
+ if (!type->select_ops)
+ ops = type->ops;
+ else
+ ops = type->select_ops(ctx, desc, flags);
+ if (!ops)
+ continue;
+
+ if (!nft_set_ops_candidate(ops, flags))
continue;
- if (!ops->estimate(desc, features, &est))
+ if (!ops->estimate(desc, flags, &est))
continue;
switch (policy) {
@@ -2465,10 +2491,10 @@ nft_select_set_ops(const struct nlattr * const nla[],
break;
}
- if (!try_module_get(ops->owner))
+ if (!try_module_get(type->owner))
continue;
if (bops != NULL)
- module_put(bops->owner);
+ module_put(bops->type->owner);
bops = ops;
best = est;
@@ -2816,7 +2842,8 @@ static int nf_tables_dump_sets_done(struct netlink_callback *cb)
static int nf_tables_getset(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
{
u8 genmask = nft_genmask_cur(net);
const struct nft_set *set;
@@ -2892,7 +2919,8 @@ static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
static int nf_tables_newset(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
@@ -3029,7 +3057,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
if (!(nlh->nlmsg_flags & NLM_F_CREATE))
return -ENOENT;
- ops = nft_select_set_ops(nla, &desc, policy);
+ ops = nft_select_set_ops(&ctx, nla, &desc, policy);
if (IS_ERR(ops))
return PTR_ERR(ops);
@@ -3039,12 +3067,13 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
size = 0;
if (ops->privsize != NULL)
- size = ops->privsize(nla);
+ size = ops->privsize(nla, &desc);
- err = -ENOMEM;
- set = kzalloc(sizeof(*set) + size + udlen, GFP_KERNEL);
- if (set == NULL)
+ set = kvzalloc(sizeof(*set) + size + udlen, GFP_KERNEL);
+ if (!set) {
+ err = -ENOMEM;
goto err1;
+ }
nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name));
err = nf_tables_set_alloc_name(&ctx, set, name);
@@ -3087,17 +3116,17 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
err3:
ops->destroy(set);
err2:
- kfree(set);
+ kvfree(set);
err1:
- module_put(ops->owner);
+ module_put(ops->type->owner);
return err;
}
static void nft_set_destroy(struct nft_set *set)
{
set->ops->destroy(set);
- module_put(set->ops->owner);
- kfree(set);
+ module_put(set->ops->type->owner);
+ kvfree(set);
}
static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
@@ -3109,7 +3138,8 @@ static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set
static int nf_tables_delset(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
@@ -3469,7 +3499,8 @@ static int nf_tables_dump_set_done(struct netlink_callback *cb)
static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
{
u8 genmask = nft_genmask_cur(net);
const struct nft_set *set;
@@ -3870,7 +3901,8 @@ err1:
static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
{
u8 genmask = nft_genmask_next(net);
const struct nlattr *attr;
@@ -4067,7 +4099,8 @@ err1:
static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
{
u8 genmask = nft_genmask_next(net);
const struct nlattr *attr;
@@ -4277,7 +4310,8 @@ static const struct nft_object_type *nft_obj_type_get(u32 objtype)
static int nf_tables_newobj(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nft_object_type *type;
@@ -4471,7 +4505,8 @@ nft_obj_filter_alloc(const struct nlattr * const nla[])
static int nf_tables_getobj(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_cur(net);
@@ -4549,8 +4584,9 @@ static void nft_obj_destroy(struct nft_object *obj)
}
static int nf_tables_delobj(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
@@ -4680,7 +4716,8 @@ err:
static int nf_tables_getgen(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
{
struct sk_buff *skb2;
int err;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 80f5ecf2c3d7..92b05e188fd1 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -201,7 +201,8 @@ replay:
if (nc->call_rcu) {
err = nc->call_rcu(net, net->nfnl, skb, nlh,
- (const struct nlattr **)cda);
+ (const struct nlattr **)cda,
+ extack);
rcu_read_unlock();
} else {
rcu_read_unlock();
@@ -211,7 +212,8 @@ replay:
err = -EAGAIN;
else if (nc->call)
err = nc->call(net, net->nfnl, skb, nlh,
- (const struct nlattr **)cda);
+ (const struct nlattr **)cda,
+ extack);
else
err = -EINVAL;
nfnl_unlock(subsys_id);
@@ -226,9 +228,11 @@ struct nfnl_err {
struct list_head head;
struct nlmsghdr *nlh;
int err;
+ struct netlink_ext_ack extack;
};
-static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err)
+static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err,
+ const struct netlink_ext_ack *extack)
{
struct nfnl_err *nfnl_err;
@@ -238,6 +242,7 @@ static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err)
nfnl_err->nlh = nlh;
nfnl_err->err = err;
+ nfnl_err->extack = *extack;
list_add_tail(&nfnl_err->head, list);
return 0;
@@ -262,7 +267,8 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb)
struct nfnl_err *nfnl_err, *next;
list_for_each_entry_safe(nfnl_err, next, err_list, head) {
- netlink_ack(skb, nfnl_err->nlh, nfnl_err->err, NULL);
+ netlink_ack(skb, nfnl_err->nlh, nfnl_err->err,
+ &nfnl_err->extack);
nfnl_err_del(nfnl_err);
}
}
@@ -280,6 +286,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
const struct nfnetlink_subsystem *ss;
const struct nfnl_callback *nc;
+ struct netlink_ext_ack extack;
LIST_HEAD(err_list);
u32 status;
int err;
@@ -325,6 +332,7 @@ replay:
while (skb->len >= nlmsg_total_size(0)) {
int msglen, type;
+ memset(&extack, 0, sizeof(extack));
nlh = nlmsg_hdr(skb);
err = 0;
@@ -384,7 +392,8 @@ replay:
if (nc->call_batch) {
err = nc->call_batch(net, net->nfnl, skb, nlh,
- (const struct nlattr **)cda);
+ (const struct nlattr **)cda,
+ &extack);
}
/* The lock was released to autoload some module, we
@@ -402,7 +411,7 @@ ack:
* processed, this avoids that the same error is
* reported several times when replaying the batch.
*/
- if (nfnl_err_add(&err_list, nlh, err) < 0) {
+ if (nfnl_err_add(&err_list, nlh, err, &extack) < 0) {
/* We failed to enqueue an error, reset the
* list of errors and send OOM to userspace
* pointing to the batch header.
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 9898fb4d0512..c45e6d4358ab 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -49,7 +49,8 @@ struct nfacct_filter {
static int nfnl_acct_new(struct net *net, struct sock *nfnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const tb[])
+ const struct nlattr * const tb[],
+ struct netlink_ext_ack *extack)
{
struct nf_acct *nfacct, *matching = NULL;
char *acct_name;
@@ -264,7 +265,8 @@ nfacct_filter_alloc(const struct nlattr * const attr)
static int nfnl_acct_get(struct net *net, struct sock *nfnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const tb[])
+ const struct nlattr * const tb[],
+ struct netlink_ext_ack *extack)
{
int ret = -ENOENT;
struct nf_acct *cur;
@@ -343,7 +345,8 @@ static int nfnl_acct_try_del(struct nf_acct *cur)
static int nfnl_acct_del(struct net *net, struct sock *nfnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const tb[])
+ const struct nlattr * const tb[],
+ struct netlink_ext_ack *extack)
{
struct nf_acct *cur, *tmp;
int ret = -ENOENT;
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index be678a323598..41628b393673 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -398,7 +398,8 @@ nfnl_cthelper_update(const struct nlattr * const tb[],
static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const tb[])
+ const struct nlattr * const tb[],
+ struct netlink_ext_ack *extack)
{
const char *helper_name;
struct nf_conntrack_helper *cur, *helper = NULL;
@@ -599,7 +600,8 @@ out:
static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const tb[])
+ const struct nlattr * const tb[],
+ struct netlink_ext_ack *extack)
{
int ret = -ENOENT;
struct nf_conntrack_helper *cur;
@@ -666,7 +668,8 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const tb[])
+ const struct nlattr * const tb[],
+ struct netlink_ext_ack *extack)
{
char *helper_name = NULL;
struct nf_conntrack_helper *cur;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index a3e7bb54d96a..400e9ae97153 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -69,7 +69,8 @@ ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto,
static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+ const struct nlattr * const cda[],
+ struct netlink_ext_ack *extack)
{
__u16 l3num;
__u8 l4num;
@@ -239,7 +240,8 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
static int cttimeout_get_timeout(struct net *net, struct sock *ctnl,
struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+ const struct nlattr * const cda[],
+ struct netlink_ext_ack *extack)
{
int ret = -ENOENT;
char *name;
@@ -287,49 +289,20 @@ static int cttimeout_get_timeout(struct net *net, struct sock *ctnl,
return ret;
}
-static void untimeout(struct nf_conntrack_tuple_hash *i,
- struct ctnl_timeout *timeout)
+static int untimeout(struct nf_conn *ct, void *timeout)
{
- struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct);
if (timeout_ext && (!timeout || timeout_ext->timeout == timeout))
RCU_INIT_POINTER(timeout_ext->timeout, NULL);
+
+ /* We are not intended to delete this conntrack. */
+ return 0;
}
static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
{
- struct nf_conntrack_tuple_hash *h;
- const struct hlist_nulls_node *nn;
- unsigned int last_hsize;
- spinlock_t *lock;
- int i, cpu;
-
- for_each_possible_cpu(cpu) {
- struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
-
- spin_lock_bh(&pcpu->lock);
- hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
- untimeout(h, timeout);
- spin_unlock_bh(&pcpu->lock);
- }
-
- local_bh_disable();
-restart:
- last_hsize = nf_conntrack_htable_size;
- for (i = 0; i < last_hsize; i++) {
- lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS];
- nf_conntrack_lock(lock);
- if (last_hsize != nf_conntrack_htable_size) {
- spin_unlock(lock);
- goto restart;
- }
-
- hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
- untimeout(h, timeout);
- spin_unlock(lock);
- }
- local_bh_enable();
+ nf_ct_iterate_cleanup_net(net, untimeout, timeout, 0, 0);
}
/* try to delete object, fail if it is still in use. */
@@ -355,7 +328,8 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+ const struct nlattr * const cda[],
+ struct netlink_ext_ack *extack)
{
struct ctnl_timeout *cur, *tmp;
int ret = -ENOENT;
@@ -386,7 +360,8 @@ static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
static int cttimeout_default_set(struct net *net, struct sock *ctnl,
struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+ const struct nlattr * const cda[],
+ struct netlink_ext_ack *extack)
{
__u16 l3num;
__u8 l4num;
@@ -475,7 +450,8 @@ nla_put_failure:
static int cttimeout_default_get(struct net *net, struct sock *ctnl,
struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+ const struct nlattr * const cda[],
+ struct netlink_ext_ack *extack)
{
__u16 l3num;
__u8 l4num;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 94ec0d0765a8..c684ba95dbb4 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -795,7 +795,8 @@ static struct notifier_block nfulnl_rtnl_notifier = {
static int nfulnl_recv_unsupp(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nfqa[])
+ const struct nlattr * const nfqa[],
+ struct netlink_ext_ack *extack)
{
return -ENOTSUPP;
}
@@ -818,7 +819,8 @@ static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = {
static int nfulnl_recv_config(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nfula[])
+ const struct nlattr * const nfula[],
+ struct netlink_ext_ack *extack)
{
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int16_t group_num = ntohs(nfmsg->res_id);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 1b17a1b445a3..16fa04086880 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1032,7 +1032,8 @@ static int nfq_id_after(unsigned int id, unsigned int max)
static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl,
struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const nfqa[])
+ const struct nlattr * const nfqa[],
+ struct netlink_ext_ack *extack)
{
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nf_queue_entry *entry, *tmp;
@@ -1136,7 +1137,8 @@ static int nfqa_parse_bridge(struct nf_queue_entry *entry,
static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const nfqa[])
+ const struct nlattr * const nfqa[],
+ struct netlink_ext_ack *extack)
{
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int16_t queue_num = ntohs(nfmsg->res_id);
@@ -1200,7 +1202,8 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nfqa[])
+ const struct nlattr * const nfqa[],
+ struct netlink_ext_ack *extack)
{
return -ENOTSUPP;
}
@@ -1217,7 +1220,8 @@ static const struct nf_queue_handler nfqh = {
static int nfqnl_recv_config(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nfqa[])
+ const struct nlattr * const nfqa[],
+ struct netlink_ext_ack *extack)
{
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int16_t queue_num = ntohs(nfmsg->res_id);
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index f753ec69f790..f5a7cb68694e 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -530,7 +530,8 @@ nla_put_failure:
static int nfnl_compat_get(struct net *net, struct sock *nfnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const tb[])
+ const struct nlattr * const tb[],
+ struct netlink_ext_ack *extack)
{
int ret = 0, target;
struct nfgenmsg *nfmsg;
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index d3eb640bc784..c7383d8f88d0 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -23,9 +23,9 @@ struct nft_rt {
enum nft_registers dreg:8;
};
-void nft_rt_get_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+static void nft_rt_get_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
const struct nft_rt *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
@@ -72,9 +72,9 @@ const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
[NFTA_RT_KEY] = { .type = NLA_U32 },
};
-int nft_rt_get_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static int nft_rt_get_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_rt *priv = nft_expr_priv(expr);
unsigned int len;
@@ -103,8 +103,8 @@ int nft_rt_get_init(const struct nft_ctx *ctx,
NFT_DATA_VALUE, len);
}
-int nft_rt_get_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
+static int nft_rt_get_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
{
const struct nft_rt *priv = nft_expr_priv(expr);
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index b988162b5b15..734989c40579 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -236,7 +236,8 @@ static inline u32 nft_bitmap_total_size(u32 klen)
return sizeof(struct nft_bitmap) + nft_bitmap_size(klen);
}
-static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[])
+static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[],
+ const struct nft_set_desc *desc)
{
u32 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
@@ -278,7 +279,9 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
return true;
}
+static struct nft_set_type nft_bitmap_type;
static struct nft_set_ops nft_bitmap_ops __read_mostly = {
+ .type = &nft_bitmap_type,
.privsize = nft_bitmap_privsize,
.elemsize = offsetof(struct nft_bitmap_elem, ext),
.estimate = nft_bitmap_estimate,
@@ -291,17 +294,21 @@ static struct nft_set_ops nft_bitmap_ops __read_mostly = {
.activate = nft_bitmap_activate,
.lookup = nft_bitmap_lookup,
.walk = nft_bitmap_walk,
+};
+
+static struct nft_set_type nft_bitmap_type __read_mostly = {
+ .ops = &nft_bitmap_ops,
.owner = THIS_MODULE,
};
static int __init nft_bitmap_module_init(void)
{
- return nft_register_set(&nft_bitmap_ops);
+ return nft_register_set(&nft_bitmap_type);
}
static void __exit nft_bitmap_module_exit(void)
{
- nft_unregister_set(&nft_bitmap_ops);
+ nft_unregister_set(&nft_bitmap_type);
}
module_init(nft_bitmap_module_init);
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 3d3a6df4ce70..0fa01d772c5e 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -22,45 +22,43 @@
#include <net/netfilter/nf_tables.h>
/* We target a hash table size of 4, element hint is 75% of final size */
-#define NFT_HASH_ELEMENT_HINT 3
+#define NFT_RHASH_ELEMENT_HINT 3
-struct nft_hash {
+struct nft_rhash {
struct rhashtable ht;
struct delayed_work gc_work;
};
-struct nft_hash_elem {
+struct nft_rhash_elem {
struct rhash_head node;
struct nft_set_ext ext;
};
-struct nft_hash_cmp_arg {
+struct nft_rhash_cmp_arg {
const struct nft_set *set;
const u32 *key;
u8 genmask;
};
-static const struct rhashtable_params nft_hash_params;
-
-static inline u32 nft_hash_key(const void *data, u32 len, u32 seed)
+static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed)
{
- const struct nft_hash_cmp_arg *arg = data;
+ const struct nft_rhash_cmp_arg *arg = data;
return jhash(arg->key, len, seed);
}
-static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed)
+static inline u32 nft_rhash_obj(const void *data, u32 len, u32 seed)
{
- const struct nft_hash_elem *he = data;
+ const struct nft_rhash_elem *he = data;
return jhash(nft_set_ext_key(&he->ext), len, seed);
}
-static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
- const void *ptr)
+static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
{
- const struct nft_hash_cmp_arg *x = arg->key;
- const struct nft_hash_elem *he = ptr;
+ const struct nft_rhash_cmp_arg *x = arg->key;
+ const struct nft_rhash_elem *he = ptr;
if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
return 1;
@@ -71,41 +69,49 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
return 0;
}
-static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+static const struct rhashtable_params nft_rhash_params = {
+ .head_offset = offsetof(struct nft_rhash_elem, node),
+ .hashfn = nft_rhash_key,
+ .obj_hashfn = nft_rhash_obj,
+ .obj_cmpfn = nft_rhash_cmp,
+ .automatic_shrinking = true,
+};
+
+static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
{
- struct nft_hash *priv = nft_set_priv(set);
- const struct nft_hash_elem *he;
- struct nft_hash_cmp_arg arg = {
+ struct nft_rhash *priv = nft_set_priv(set);
+ const struct nft_rhash_elem *he;
+ struct nft_rhash_cmp_arg arg = {
.genmask = nft_genmask_cur(net),
.set = set,
.key = key,
};
- he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
if (he != NULL)
*ext = &he->ext;
return !!he;
}
-static bool nft_hash_update(struct nft_set *set, const u32 *key,
- void *(*new)(struct nft_set *,
- const struct nft_expr *,
- struct nft_regs *regs),
- const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_set_ext **ext)
+static bool nft_rhash_update(struct nft_set *set, const u32 *key,
+ void *(*new)(struct nft_set *,
+ const struct nft_expr *,
+ struct nft_regs *regs),
+ const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_set_ext **ext)
{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he, *prev;
- struct nft_hash_cmp_arg arg = {
+ struct nft_rhash *priv = nft_set_priv(set);
+ struct nft_rhash_elem *he, *prev;
+ struct nft_rhash_cmp_arg arg = {
.genmask = NFT_GENMASK_ANY,
.set = set,
.key = key,
};
- he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
if (he != NULL)
goto out;
@@ -114,7 +120,7 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key,
goto err1;
prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
- nft_hash_params);
+ nft_rhash_params);
if (IS_ERR(prev))
goto err2;
@@ -134,21 +140,21 @@ err1:
return false;
}
-static int nft_hash_insert(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem,
- struct nft_set_ext **ext)
+static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem,
+ struct nft_set_ext **ext)
{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he = elem->priv;
- struct nft_hash_cmp_arg arg = {
+ struct nft_rhash *priv = nft_set_priv(set);
+ struct nft_rhash_elem *he = elem->priv;
+ struct nft_rhash_cmp_arg arg = {
.genmask = nft_genmask_next(net),
.set = set,
.key = elem->key.val.data,
};
- struct nft_hash_elem *prev;
+ struct nft_rhash_elem *prev;
prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
- nft_hash_params);
+ nft_rhash_params);
if (IS_ERR(prev))
return PTR_ERR(prev);
if (prev) {
@@ -158,19 +164,19 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set,
return 0;
}
-static void nft_hash_activate(const struct net *net, const struct nft_set *set,
- const struct nft_set_elem *elem)
+static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
- struct nft_hash_elem *he = elem->priv;
+ struct nft_rhash_elem *he = elem->priv;
nft_set_elem_change_active(net, set, &he->ext);
nft_set_elem_clear_busy(&he->ext);
}
-static bool nft_hash_flush(const struct net *net,
- const struct nft_set *set, void *priv)
+static bool nft_rhash_flush(const struct net *net,
+ const struct nft_set *set, void *priv)
{
- struct nft_hash_elem *he = priv;
+ struct nft_rhash_elem *he = priv;
if (!nft_set_elem_mark_busy(&he->ext) ||
!nft_is_active(net, &he->ext)) {
@@ -180,22 +186,22 @@ static bool nft_hash_flush(const struct net *net,
return false;
}
-static void *nft_hash_deactivate(const struct net *net,
- const struct nft_set *set,
- const struct nft_set_elem *elem)
+static void *nft_rhash_deactivate(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he;
- struct nft_hash_cmp_arg arg = {
+ struct nft_rhash *priv = nft_set_priv(set);
+ struct nft_rhash_elem *he;
+ struct nft_rhash_cmp_arg arg = {
.genmask = nft_genmask_next(net),
.set = set,
.key = elem->key.val.data,
};
rcu_read_lock();
- he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
if (he != NULL &&
- !nft_hash_flush(net, set, he))
+ !nft_rhash_flush(net, set, he))
he = NULL;
rcu_read_unlock();
@@ -203,21 +209,21 @@ static void *nft_hash_deactivate(const struct net *net,
return he;
}
-static void nft_hash_remove(const struct net *net,
- const struct nft_set *set,
- const struct nft_set_elem *elem)
+static void nft_rhash_remove(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he = elem->priv;
+ struct nft_rhash *priv = nft_set_priv(set);
+ struct nft_rhash_elem *he = elem->priv;
- rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+ rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
}
-static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
- struct nft_set_iter *iter)
+static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_iter *iter)
{
- struct nft_hash *priv = nft_set_priv(set);
- struct nft_hash_elem *he;
+ struct nft_rhash *priv = nft_set_priv(set);
+ struct nft_rhash_elem *he;
struct rhashtable_iter hti;
struct nft_set_elem elem;
int err;
@@ -266,16 +272,16 @@ out:
rhashtable_walk_exit(&hti);
}
-static void nft_hash_gc(struct work_struct *work)
+static void nft_rhash_gc(struct work_struct *work)
{
struct nft_set *set;
- struct nft_hash_elem *he;
- struct nft_hash *priv;
+ struct nft_rhash_elem *he;
+ struct nft_rhash *priv;
struct nft_set_gc_batch *gcb = NULL;
struct rhashtable_iter hti;
int err;
- priv = container_of(work, struct nft_hash, gc_work.work);
+ priv = container_of(work, struct nft_rhash, gc_work.work);
set = nft_set_container_of(priv);
err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
@@ -301,7 +307,7 @@ static void nft_hash_gc(struct work_struct *work)
gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
if (gcb == NULL)
goto out;
- rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+ rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
atomic_dec(&set->nelems);
nft_set_gc_batch_add(gcb, he);
}
@@ -315,82 +321,290 @@ schedule:
nft_set_gc_interval(set));
}
-static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
+static unsigned int nft_rhash_privsize(const struct nlattr * const nla[],
+ const struct nft_set_desc *desc)
{
- return sizeof(struct nft_hash);
+ return sizeof(struct nft_rhash);
}
-static const struct rhashtable_params nft_hash_params = {
- .head_offset = offsetof(struct nft_hash_elem, node),
- .hashfn = nft_hash_key,
- .obj_hashfn = nft_hash_obj,
- .obj_cmpfn = nft_hash_cmp,
- .automatic_shrinking = true,
-};
-
-static int nft_hash_init(const struct nft_set *set,
- const struct nft_set_desc *desc,
- const struct nlattr * const tb[])
+static int nft_rhash_init(const struct nft_set *set,
+ const struct nft_set_desc *desc,
+ const struct nlattr * const tb[])
{
- struct nft_hash *priv = nft_set_priv(set);
- struct rhashtable_params params = nft_hash_params;
+ struct nft_rhash *priv = nft_set_priv(set);
+ struct rhashtable_params params = nft_rhash_params;
int err;
- params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT;
+ params.nelem_hint = desc->size ?: NFT_RHASH_ELEMENT_HINT;
params.key_len = set->klen;
err = rhashtable_init(&priv->ht, &params);
if (err < 0)
return err;
- INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc);
+ INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc);
if (set->flags & NFT_SET_TIMEOUT)
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
return 0;
}
-static void nft_hash_elem_destroy(void *ptr, void *arg)
+static void nft_rhash_elem_destroy(void *ptr, void *arg)
{
nft_set_elem_destroy(arg, ptr, true);
}
-static void nft_hash_destroy(const struct nft_set *set)
+static void nft_rhash_destroy(const struct nft_set *set)
{
- struct nft_hash *priv = nft_set_priv(set);
+ struct nft_rhash *priv = nft_set_priv(set);
cancel_delayed_work_sync(&priv->gc_work);
- rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
+ rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
(void *)set);
}
-static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
- struct nft_set_estimate *est)
+static u32 nft_hash_buckets(u32 size)
{
- unsigned int esize;
+ return roundup_pow_of_two(size * 4 / 3);
+}
- esize = sizeof(struct nft_hash_elem);
- if (desc->size) {
- est->size = sizeof(struct nft_hash) +
- roundup_pow_of_two(desc->size * 4 / 3) *
- sizeof(struct nft_hash_elem *) +
- desc->size * esize;
- } else {
- /* Resizing happens when the load drops below 30% or goes
- * above 75%. The average of 52.5% load (approximated by 50%)
- * is used for the size estimation of the hash buckets,
- * meaning we calculate two buckets per element.
- */
- est->size = esize + 2 * sizeof(struct nft_hash_elem *);
+static bool nft_rhash_estimate(const struct nft_set_desc *desc, u32 features,
+ struct nft_set_estimate *est)
+{
+ est->size = ~0;
+ est->lookup = NFT_SET_CLASS_O_1;
+ est->space = NFT_SET_CLASS_O_N;
+
+ return true;
+}
+
+struct nft_hash {
+ u32 seed;
+ u32 buckets;
+ struct hlist_head table[];
+};
+
+struct nft_hash_elem {
+ struct hlist_node node;
+ struct nft_set_ext ext;
+};
+
+static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_cur(net);
+ const struct nft_hash_elem *he;
+ u32 hash;
+
+ hash = jhash(key, set->klen, priv->seed);
+ hash = reciprocal_scale(hash, priv->buckets);
+ hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
+ if (!memcmp(nft_set_ext_key(&he->ext), key, set->klen) &&
+ nft_set_elem_active(&he->ext, genmask)) {
+ *ext = &he->ext;
+ return true;
+ }
+ }
+ return false;
+}
+
+/* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */
+static inline u32 nft_hash_key(const u32 *key, u32 klen)
+{
+ if (klen == 4)
+ return *key;
+
+ return *(u16 *)key;
+}
+
+static bool nft_hash_lookup_fast(const struct net *net,
+ const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_cur(net);
+ const struct nft_hash_elem *he;
+ u32 hash, k1, k2;
+
+ k1 = nft_hash_key(key, set->klen);
+ hash = jhash_1word(k1, priv->seed);
+ hash = reciprocal_scale(hash, priv->buckets);
+ hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
+ k2 = nft_hash_key(nft_set_ext_key(&he->ext)->data, set->klen);
+ if (k1 == k2 &&
+ nft_set_elem_active(&he->ext, genmask)) {
+ *ext = &he->ext;
+ return true;
+ }
+ }
+ return false;
+}
+
+static int nft_hash_insert(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem,
+ struct nft_set_ext **ext)
+{
+ struct nft_hash_elem *this = elem->priv, *he;
+ struct nft_hash *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_next(net);
+ u32 hash;
+
+ hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed);
+ hash = reciprocal_scale(hash, priv->buckets);
+ hlist_for_each_entry(he, &priv->table[hash], node) {
+ if (!memcmp(nft_set_ext_key(&this->ext),
+ nft_set_ext_key(&he->ext), set->klen) &&
+ nft_set_elem_active(&he->ext, genmask)) {
+ *ext = &he->ext;
+ return -EEXIST;
+ }
+ }
+ hlist_add_head_rcu(&this->node, &priv->table[hash]);
+ return 0;
+}
+
+static void nft_hash_activate(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_hash_elem *he = elem->priv;
+
+ nft_set_elem_change_active(net, set, &he->ext);
+}
+
+static bool nft_hash_flush(const struct net *net,
+ const struct nft_set *set, void *priv)
+{
+ struct nft_hash_elem *he = priv;
+
+ nft_set_elem_change_active(net, set, &he->ext);
+ return true;
+}
+
+static void *nft_hash_deactivate(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *this = elem->priv, *he;
+ u8 genmask = nft_genmask_next(net);
+ u32 hash;
+
+ hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed);
+ hash = reciprocal_scale(hash, priv->buckets);
+ hlist_for_each_entry(he, &priv->table[hash], node) {
+ if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val,
+ set->klen) ||
+ nft_set_elem_active(&he->ext, genmask)) {
+ nft_set_elem_change_active(net, set, &he->ext);
+ return he;
+ }
}
+ return NULL;
+}
+
+static void nft_hash_remove(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_hash_elem *he = elem->priv;
+
+ hlist_del_rcu(&he->node);
+}
+
+static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_iter *iter)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+ struct nft_set_elem elem;
+ int i;
+
+ for (i = 0; i < priv->buckets; i++) {
+ hlist_for_each_entry_rcu(he, &priv->table[i], node) {
+ if (iter->count < iter->skip)
+ goto cont;
+ if (!nft_set_elem_active(&he->ext, iter->genmask))
+ goto cont;
+
+ elem.priv = he;
+
+ iter->err = iter->fn(ctx, set, iter, &elem);
+ if (iter->err < 0)
+ return;
+cont:
+ iter->count++;
+ }
+ }
+}
+
+static unsigned int nft_hash_privsize(const struct nlattr * const nla[],
+ const struct nft_set_desc *desc)
+{
+ return sizeof(struct nft_hash) +
+ nft_hash_buckets(desc->size) * sizeof(struct hlist_head);
+}
+static int nft_hash_init(const struct nft_set *set,
+ const struct nft_set_desc *desc,
+ const struct nlattr * const tb[])
+{
+ struct nft_hash *priv = nft_set_priv(set);
+
+ priv->buckets = nft_hash_buckets(desc->size);
+ get_random_bytes(&priv->seed, sizeof(priv->seed));
+
+ return 0;
+}
+
+static void nft_hash_destroy(const struct nft_set *set)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+ struct hlist_node *next;
+ int i;
+
+ for (i = 0; i < priv->buckets; i++) {
+ hlist_for_each_entry_safe(he, next, &priv->table[i], node) {
+ hlist_del_rcu(&he->node);
+ nft_set_elem_destroy(set, he, true);
+ }
+ }
+}
+
+static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
+ struct nft_set_estimate *est)
+{
+ est->size = sizeof(struct nft_hash) +
+ nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
+ desc->size * sizeof(struct nft_hash_elem);
est->lookup = NFT_SET_CLASS_O_1;
est->space = NFT_SET_CLASS_O_N;
return true;
}
+static struct nft_set_type nft_hash_type;
+static struct nft_set_ops nft_rhash_ops __read_mostly = {
+ .type = &nft_hash_type,
+ .privsize = nft_rhash_privsize,
+ .elemsize = offsetof(struct nft_rhash_elem, ext),
+ .estimate = nft_rhash_estimate,
+ .init = nft_rhash_init,
+ .destroy = nft_rhash_destroy,
+ .insert = nft_rhash_insert,
+ .activate = nft_rhash_activate,
+ .deactivate = nft_rhash_deactivate,
+ .flush = nft_rhash_flush,
+ .remove = nft_rhash_remove,
+ .lookup = nft_rhash_lookup,
+ .update = nft_rhash_update,
+ .walk = nft_rhash_walk,
+ .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
+};
+
static struct nft_set_ops nft_hash_ops __read_mostly = {
+ .type = &nft_hash_type,
.privsize = nft_hash_privsize,
.elemsize = offsetof(struct nft_hash_elem, ext),
.estimate = nft_hash_estimate,
@@ -402,20 +616,57 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
.flush = nft_hash_flush,
.remove = nft_hash_remove,
.lookup = nft_hash_lookup,
- .update = nft_hash_update,
.walk = nft_hash_walk,
- .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
+ .features = NFT_SET_MAP | NFT_SET_OBJECT,
+};
+
+static struct nft_set_ops nft_hash_fast_ops __read_mostly = {
+ .type = &nft_hash_type,
+ .privsize = nft_hash_privsize,
+ .elemsize = offsetof(struct nft_hash_elem, ext),
+ .estimate = nft_hash_estimate,
+ .init = nft_hash_init,
+ .destroy = nft_hash_destroy,
+ .insert = nft_hash_insert,
+ .activate = nft_hash_activate,
+ .deactivate = nft_hash_deactivate,
+ .flush = nft_hash_flush,
+ .remove = nft_hash_remove,
+ .lookup = nft_hash_lookup_fast,
+ .walk = nft_hash_walk,
+ .features = NFT_SET_MAP | NFT_SET_OBJECT,
+};
+
+static const struct nft_set_ops *
+nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc,
+ u32 flags)
+{
+ if (desc->size) {
+ switch (desc->klen) {
+ case 2:
+ case 4:
+ return &nft_hash_fast_ops;
+ default:
+ return &nft_hash_ops;
+ }
+ }
+
+ return &nft_rhash_ops;
+}
+
+static struct nft_set_type nft_hash_type __read_mostly = {
+ .select_ops = nft_hash_select_ops,
.owner = THIS_MODULE,
};
static int __init nft_hash_module_init(void)
{
- return nft_register_set(&nft_hash_ops);
+ return nft_register_set(&nft_hash_type);
}
static void __exit nft_hash_module_exit(void)
{
- nft_unregister_set(&nft_hash_ops);
+ nft_unregister_set(&nft_hash_type);
}
module_init(nft_hash_module_init);
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index fbdbaa00dd5f..bce5382f1d49 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -251,7 +251,8 @@ cont:
read_unlock_bh(&priv->lock);
}
-static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
+static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[],
+ const struct nft_set_desc *desc)
{
return sizeof(struct nft_rbtree);
}
@@ -283,13 +284,11 @@ static void nft_rbtree_destroy(const struct nft_set *set)
static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
struct nft_set_estimate *est)
{
- unsigned int nsize;
-
- nsize = sizeof(struct nft_rbtree_elem);
if (desc->size)
- est->size = sizeof(struct nft_rbtree) + desc->size * nsize;
+ est->size = sizeof(struct nft_rbtree) +
+ desc->size * sizeof(struct nft_rbtree_elem);
else
- est->size = nsize;
+ est->size = ~0;
est->lookup = NFT_SET_CLASS_O_LOG_N;
est->space = NFT_SET_CLASS_O_N;
@@ -297,7 +296,9 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
return true;
}
+static struct nft_set_type nft_rbtree_type;
static struct nft_set_ops nft_rbtree_ops __read_mostly = {
+ .type = &nft_rbtree_type,
.privsize = nft_rbtree_privsize,
.elemsize = offsetof(struct nft_rbtree_elem, ext),
.estimate = nft_rbtree_estimate,
@@ -311,17 +312,21 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = {
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
+};
+
+static struct nft_set_type nft_rbtree_type __read_mostly = {
+ .ops = &nft_rbtree_ops,
.owner = THIS_MODULE,
};
static int __init nft_rbtree_module_init(void)
{
- return nft_register_set(&nft_rbtree_ops);
+ return nft_register_set(&nft_rbtree_type);
}
static void __exit nft_rbtree_module_exit(void)
{
- nft_unregister_set(&nft_rbtree_ops);
+ nft_unregister_set(&nft_rbtree_type);
}
module_init(nft_rbtree_module_init);
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index df7f1df00330..d767e35fff6b 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -127,7 +127,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
daddr, dport,
in->ifindex);
- if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+ if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
@@ -197,7 +197,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
daddr, ntohs(dport),
in->ifindex);
- if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+ if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index c05fefcec238..71cfa9551d08 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -63,7 +63,8 @@ static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = {
static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const osf_attrs[])
+ const struct nlattr * const osf_attrs[],
+ struct netlink_ext_ack *extack)
{
struct xt_osf_user_finger *f;
struct xt_osf_finger *kf = NULL, *sf;
@@ -107,7 +108,8 @@ static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
static int xt_osf_remove_callback(struct net *net, struct sock *ctnl,
struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const osf_attrs[])
+ const struct nlattr * const osf_attrs[],
+ struct netlink_ext_ack *extack)
{
struct xt_osf_user_finger *f;
struct xt_osf_finger *sf;
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 4dedb96d1a06..2d2fa1d53ea6 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -42,8 +42,8 @@ match_packet(const struct sk_buff *skb,
bool *hotdrop)
{
u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)];
- const sctp_chunkhdr_t *sch;
- sctp_chunkhdr_t _sch;
+ const struct sctp_chunkhdr *sch;
+ struct sctp_chunkhdr _sch;
int chunk_match_type = info->chunk_match_type;
const struct xt_sctp_flag_info *flag_info = info->flag_info;
int flag_count = info->flag_count;
@@ -118,8 +118,8 @@ static bool
sctp_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_sctp_info *info = par->matchinfo;
- const sctp_sctphdr_t *sh;
- sctp_sctphdr_t _sh;
+ const struct sctphdr *sh;
+ struct sctphdr _sh;
if (par->fragoff != 0) {
pr_debug("Dropping non-first fragment.. FIXME\n");
@@ -136,13 +136,13 @@ sctp_mt(const struct sk_buff *skb, struct xt_action_param *par)
return SCCHECK(ntohs(sh->source) >= info->spts[0]
&& ntohs(sh->source) <= info->spts[1],
- XT_SCTP_SRC_PORTS, info->flags, info->invflags)
- && SCCHECK(ntohs(sh->dest) >= info->dpts[0]
+ XT_SCTP_SRC_PORTS, info->flags, info->invflags) &&
+ SCCHECK(ntohs(sh->dest) >= info->dpts[0]
&& ntohs(sh->dest) <= info->dpts[1],
- XT_SCTP_DEST_PORTS, info->flags, info->invflags)
- && SCCHECK(match_packet(skb, par->thoff + sizeof(sctp_sctphdr_t),
- info, &par->hotdrop),
- XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
+ XT_SCTP_DEST_PORTS, info->flags, info->invflags) &&
+ SCCHECK(match_packet(skb, par->thoff + sizeof(_sh),
+ info, &par->hotdrop),
+ XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
}
static int sctp_mt_check(const struct xt_mtchk_param *par)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index a88745e4b7df..5acee49db90b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -372,7 +372,7 @@ static void netlink_sock_destruct(struct sock *sk)
}
WARN_ON(atomic_read(&sk->sk_rmem_alloc));
- WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+ WARN_ON(refcount_read(&sk->sk_wmem_alloc));
WARN_ON(nlk_sk(sk)->groups);
}
@@ -575,7 +575,7 @@ static void netlink_remove(struct sock *sk)
table = &nl_table[sk->sk_protocol];
if (!rhashtable_remove_fast(&table->hash, &nlk_sk(sk)->node,
netlink_rhashtable_params)) {
- WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
+ WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
__sock_put(sk);
}
@@ -691,7 +691,7 @@ static void deferred_put_nlk_sk(struct rcu_head *head)
struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
struct sock *sk = &nlk->sk;
- if (!atomic_dec_and_test(&sk->sk_refcnt))
+ if (!refcount_dec_and_test(&sk->sk_refcnt))
return;
if (nlk->cb_running && nlk->cb.done) {
@@ -1848,7 +1848,7 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
}
if (dst_group) {
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL);
}
err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT);
@@ -2226,7 +2226,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
struct netlink_sock *nlk;
int ret;
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
if (sk == NULL) {
@@ -2431,7 +2431,7 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
int exclude_portid = 0;
if (report) {
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
exclude_portid = portid;
}
@@ -2568,7 +2568,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
sk_rmem_alloc_get(s),
sk_wmem_alloc_get(s),
nlk->cb_running,
- atomic_read(&s->sk_refcnt),
+ refcount_read(&s->sk_refcnt),
atomic_read(&s->sk_drops),
sock_i_ino(s)
);
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 122bb81da918..5cf33df888c3 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -982,6 +982,8 @@ static void nfc_release(struct device *d)
kfree(se);
}
+ ida_simple_remove(&nfc_index_ida, dev->idx);
+
kfree(dev);
}
@@ -1056,6 +1058,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
int tx_headroom, int tx_tailroom)
{
struct nfc_dev *dev;
+ int rc;
if (!ops->start_poll || !ops->stop_poll || !ops->activate_target ||
!ops->deactivate_target || !ops->im_transceive)
@@ -1068,6 +1071,15 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
if (!dev)
return NULL;
+ rc = ida_simple_get(&nfc_index_ida, 0, 0, GFP_KERNEL);
+ if (rc < 0)
+ goto err_free_dev;
+ dev->idx = rc;
+
+ dev->dev.class = &nfc_class;
+ dev_set_name(&dev->dev, "nfc%d", dev->idx);
+ device_initialize(&dev->dev);
+
dev->ops = ops;
dev->supported_protocols = supported_protocols;
dev->tx_headroom = tx_headroom;
@@ -1090,6 +1102,11 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
}
return dev;
+
+err_free_dev:
+ kfree(dev);
+
+ return ERR_PTR(rc);
}
EXPORT_SYMBOL(nfc_allocate_device);
@@ -1104,14 +1121,6 @@ int nfc_register_device(struct nfc_dev *dev)
pr_debug("dev_name=%s\n", dev_name(&dev->dev));
- dev->idx = ida_simple_get(&nfc_index_ida, 0, 0, GFP_KERNEL);
- if (dev->idx < 0)
- return dev->idx;
-
- dev->dev.class = &nfc_class;
- dev_set_name(&dev->dev, "nfc%d", dev->idx);
- device_initialize(&dev->dev);
-
mutex_lock(&nfc_devlist_mutex);
nfc_devlist_generation++;
rc = device_add(&dev->dev);
@@ -1149,12 +1158,10 @@ EXPORT_SYMBOL(nfc_register_device);
*/
void nfc_unregister_device(struct nfc_dev *dev)
{
- int rc, id;
+ int rc;
pr_debug("dev_name=%s\n", dev_name(&dev->dev));
- id = dev->idx;
-
if (dev->rfkill) {
rfkill_unregister(dev->rfkill);
rfkill_destroy(dev->rfkill);
@@ -1179,8 +1186,6 @@ void nfc_unregister_device(struct nfc_dev *dev)
nfc_devlist_generation++;
device_del(&dev->dev);
mutex_unlock(&nfc_devlist_mutex);
-
- ida_simple_remove(&nfc_index_ida, id);
}
EXPORT_SYMBOL(nfc_unregister_device);
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index ebeace7a8278..de6dd37d04c7 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -240,7 +240,7 @@ int digital_send_cmd(struct nfc_digital_dev *ddev, u8 cmd_type,
{
struct digital_cmd *cmd;
- cmd = kzalloc(sizeof(struct digital_cmd), GFP_KERNEL);
+ cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
@@ -287,7 +287,7 @@ static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech)
{
struct digital_tg_mdaa_params *params;
- params = kzalloc(sizeof(struct digital_tg_mdaa_params), GFP_KERNEL);
+ params = kzalloc(sizeof(*params), GFP_KERNEL);
if (!params)
return -ENOMEM;
@@ -706,11 +706,9 @@ static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target,
struct digital_data_exch *data_exch;
int rc;
- data_exch = kzalloc(sizeof(struct digital_data_exch), GFP_KERNEL);
- if (!data_exch) {
- pr_err("Failed to allocate data_exch struct\n");
+ data_exch = kzalloc(sizeof(*data_exch), GFP_KERNEL);
+ if (!data_exch)
return -ENOMEM;
- }
data_exch->cb = cb;
data_exch->cb_context = cb_context;
@@ -764,7 +762,7 @@ struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops,
!ops->switch_rf || (ops->tg_listen_md && !ops->tg_get_rf_tech))
return NULL;
- ddev = kzalloc(sizeof(struct nfc_digital_dev), GFP_KERNEL);
+ ddev = kzalloc(sizeof(*ddev), GFP_KERNEL);
if (!ddev)
return NULL;
diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c
index 74ccc2dd79d0..4f9a973988b2 100644
--- a/net/nfc/digital_dep.c
+++ b/net/nfc/digital_dep.c
@@ -151,7 +151,7 @@ static const u8 digital_payload_bits_map[4] = {
* 0 <= wt <= 14 (given by the target by the TO field of ATR_RES response)
*/
#define DIGITAL_NFC_DEP_IN_MAX_WT 14
-#define DIGITAL_NFC_DEP_TG_MAX_WT 8
+#define DIGITAL_NFC_DEP_TG_MAX_WT 14
static const u16 digital_rwt_map[DIGITAL_NFC_DEP_IN_MAX_WT + 1] = {
100, 101, 101, 102, 105,
110, 119, 139, 177, 255,
diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c
index 3cc3448da524..2021d1d58a75 100644
--- a/net/nfc/digital_technology.c
+++ b/net/nfc/digital_technology.c
@@ -27,6 +27,7 @@
#define DIGITAL_SDD_RES_CT 0x88
#define DIGITAL_SDD_RES_LEN 5
+#define DIGITAL_SEL_RES_LEN 1
#define DIGITAL_SEL_RES_NFCID1_COMPLETE(sel_res) (!((sel_res) & 0x04))
#define DIGITAL_SEL_RES_IS_T2T(sel_res) (!((sel_res) & 0x60))
@@ -299,7 +300,7 @@ static void digital_in_recv_sel_res(struct nfc_digital_dev *ddev, void *arg,
}
}
- if (!resp->len) {
+ if (resp->len != DIGITAL_SEL_RES_LEN) {
rc = -EIO;
goto exit;
}
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 2ffb18e73df6..fb7afcaa3004 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -77,7 +77,8 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
struct sockaddr_nfc_llcp llcp_addr;
int len, ret = 0;
- if (!addr || addr->sa_family != AF_NFC)
+ if (!addr || alen < offsetofend(struct sockaddr, sa_family) ||
+ addr->sa_family != AF_NFC)
return -EINVAL;
pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family);
@@ -151,7 +152,8 @@ static int llcp_raw_sock_bind(struct socket *sock, struct sockaddr *addr,
struct sockaddr_nfc_llcp llcp_addr;
int len, ret = 0;
- if (!addr || addr->sa_family != AF_NFC)
+ if (!addr || alen < offsetofend(struct sockaddr, sa_family) ||
+ addr->sa_family != AF_NFC)
return -EINVAL;
pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family);
@@ -662,8 +664,7 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
pr_debug("sock %p sk %p flags 0x%x\n", sock, sk, flags);
- if (!addr || len < sizeof(struct sockaddr_nfc) ||
- addr->sa_family != AF_NFC)
+ if (!addr || len < sizeof(*addr) || addr->sa_family != AF_NFC)
return -EINVAL;
if (addr->service_name_len == 0 && addr->dsap == 0)
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index a3dac34cf790..c25e9b4179c3 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -73,11 +73,10 @@ int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type,
if (conn_info->dest_type == dest_type) {
if (!params)
return conn_info->conn_id;
- if (conn_info) {
- if (params->id == conn_info->dest_params->id &&
- params->protocol == conn_info->dest_params->protocol)
- return conn_info->conn_id;
- }
+
+ if (params->id == conn_info->dest_params->id &&
+ params->protocol == conn_info->dest_params->protocol)
+ return conn_info->conn_id;
}
}
@@ -1173,8 +1172,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops,
return ndev;
free_nfc:
- kfree(ndev->nfc_dev);
-
+ nfc_free_device(ndev->nfc_dev);
free_nci:
kfree(ndev);
return NULL;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 6b0850e63e09..b251fb936a27 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -907,7 +907,9 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info)
u32 device_idx, target_idx, protocol;
int rc;
- if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
+ if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+ !info->attrs[NFC_ATTR_TARGET_INDEX] ||
+ !info->attrs[NFC_ATTR_PROTOCOLS])
return -EINVAL;
device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index d772e9a4b4f8..45fe8c8a884d 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1090,6 +1090,58 @@ static struct sw_flow_actions *get_flow_actions(struct net *net,
return acts;
}
+/* Factor out match-init and action-copy to avoid
+ * "Wframe-larger-than=1024" warning. Because mask is only
+ * used to get actions, we new a function to save some
+ * stack space.
+ *
+ * If there are not key and action attrs, we return 0
+ * directly. In the case, the caller will also not use the
+ * match as before. If there is action attr, we try to get
+ * actions and save them to *acts. Before returning from
+ * the function, we reset the match->mask pointer. Because
+ * we should not to return match object with dangling reference
+ * to mask.
+ * */
+static int ovs_nla_init_match_and_action(struct net *net,
+ struct sw_flow_match *match,
+ struct sw_flow_key *key,
+ struct nlattr **a,
+ struct sw_flow_actions **acts,
+ bool log)
+{
+ struct sw_flow_mask mask;
+ int error = 0;
+
+ if (a[OVS_FLOW_ATTR_KEY]) {
+ ovs_match_init(match, key, true, &mask);
+ error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
+ a[OVS_FLOW_ATTR_MASK], log);
+ if (error)
+ goto error;
+ }
+
+ if (a[OVS_FLOW_ATTR_ACTIONS]) {
+ if (!a[OVS_FLOW_ATTR_KEY]) {
+ OVS_NLERR(log,
+ "Flow key attribute not present in set flow.");
+ return -EINVAL;
+ }
+
+ *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
+ &mask, log);
+ if (IS_ERR(*acts)) {
+ error = PTR_ERR(*acts);
+ goto error;
+ }
+ }
+
+ /* On success, error is 0. */
+error:
+ match->mask = NULL;
+ return error;
+}
+
static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
@@ -1097,7 +1149,6 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct ovs_header *ovs_header = info->userhdr;
struct sw_flow_key key;
struct sw_flow *flow;
- struct sw_flow_mask mask;
struct sk_buff *reply = NULL;
struct datapath *dp;
struct sw_flow_actions *old_acts = NULL, *acts = NULL;
@@ -1109,34 +1160,18 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
bool ufid_present;
ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
- if (a[OVS_FLOW_ATTR_KEY]) {
- ovs_match_init(&match, &key, true, &mask);
- error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
- a[OVS_FLOW_ATTR_MASK], log);
- } else if (!ufid_present) {
+ if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
OVS_NLERR(log,
"Flow set message rejected, Key attribute missing.");
- error = -EINVAL;
+ return -EINVAL;
}
+
+ error = ovs_nla_init_match_and_action(net, &match, &key, a,
+ &acts, log);
if (error)
goto error;
- /* Validate actions. */
- if (a[OVS_FLOW_ATTR_ACTIONS]) {
- if (!a[OVS_FLOW_ATTR_KEY]) {
- OVS_NLERR(log,
- "Flow key attribute not present in set flow.");
- error = -EINVAL;
- goto error;
- }
-
- acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key,
- &mask, log);
- if (IS_ERR(acts)) {
- error = PTR_ERR(acts);
- goto error;
- }
-
+ if (acts) {
/* Can allocate before locking if have acts. */
reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
ufid_flags);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f9349a495caf..e3beb28203eb 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1317,7 +1317,7 @@ static void packet_sock_destruct(struct sock *sk)
skb_queue_purge(&sk->sk_error_queue);
WARN_ON(atomic_read(&sk->sk_rmem_alloc));
- WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+ WARN_ON(refcount_read(&sk->sk_wmem_alloc));
if (!sock_flag(sk, SOCK_DEAD)) {
pr_err("Attempt to release alive packet socket: %p\n", sk);
@@ -1739,7 +1739,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
match->flags = flags;
INIT_LIST_HEAD(&match->list);
spin_lock_init(&match->lock);
- atomic_set(&match->sk_ref, 0);
+ refcount_set(&match->sk_ref, 0);
fanout_init_data(match);
match->prot_hook.type = po->prot_hook.type;
match->prot_hook.dev = po->prot_hook.dev;
@@ -1753,10 +1753,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
match->prot_hook.type == po->prot_hook.type &&
match->prot_hook.dev == po->prot_hook.dev) {
err = -ENOSPC;
- if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
+ if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
__dev_remove_pack(&po->prot_hook);
po->fanout = match;
- atomic_inc(&match->sk_ref);
+ refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
__fanout_link(sk, po);
err = 0;
}
@@ -1785,7 +1785,7 @@ static struct packet_fanout *fanout_release(struct sock *sk)
if (f) {
po->fanout = NULL;
- if (atomic_dec_and_test(&f->sk_ref))
+ if (refcount_dec_and_test(&f->sk_ref))
list_del(&f->list);
else
f = NULL;
@@ -2523,7 +2523,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb->data_len = to_write;
skb->len += to_write;
skb->truesize += to_write;
- atomic_add(to_write, &po->sk.sk_wmem_alloc);
+ refcount_add(to_write, &po->sk.sk_wmem_alloc);
while (likely(to_write)) {
nr_frags = skb_shinfo(skb)->nr_frags;
@@ -4495,7 +4495,7 @@ static int packet_seq_show(struct seq_file *seq, void *v)
seq_printf(seq,
"%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
s,
- atomic_read(&s->sk_refcnt),
+ refcount_read(&s->sk_refcnt),
s->sk_type,
ntohs(po->num),
po->ifindex,
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 9ee46314b7d7..94d1d405a116 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -1,6 +1,8 @@
#ifndef __PACKET_INTERNAL_H__
#define __PACKET_INTERNAL_H__
+#include <linux/refcount.h>
+
struct packet_mclist {
struct packet_mclist *next;
int ifindex;
@@ -86,7 +88,7 @@ struct packet_fanout {
struct list_head list;
struct sock *arr[PACKET_FANOUT_MAX];
spinlock_t lock;
- atomic_t sk_ref;
+ refcount_t sk_ref;
struct packet_type prot_hook ____cacheline_aligned_in_smp;
};
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 64634e3ec2fc..1b050dd17393 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -360,7 +360,7 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
return POLLHUP;
if (sk->sk_state == TCP_ESTABLISHED &&
- atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf &&
+ refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf &&
atomic_read(&pn->tx_credits))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
@@ -614,7 +614,7 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v)
sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk),
from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
sock_i_ino(sk),
- atomic_read(&sk->sk_refcnt), sk,
+ refcount_read(&sk->sk_refcnt), sk,
atomic_read(&sk->sk_drops));
}
seq_pad(seq, '\n');
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 52d11d7725c8..0d8616aa5bad 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -202,7 +202,7 @@ void rds_tcp_write_space(struct sock *sk)
tc->t_last_seen_una = rds_tcp_snd_una(tc);
rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked);
- if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
+ if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
out:
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 58ae0db52ea1..a2ad4482376f 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -53,7 +53,7 @@ static void rxrpc_sock_destructor(struct sock *);
*/
static inline int rxrpc_writable(struct sock *sk)
{
- return atomic_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf;
+ return refcount_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf;
}
/*
@@ -730,7 +730,7 @@ static void rxrpc_sock_destructor(struct sock *sk)
rxrpc_purge_queue(&sk->sk_receive_queue);
- WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+ WARN_ON(refcount_read(&sk->sk_wmem_alloc));
WARN_ON(!sk_unhashed(sk));
WARN_ON(sk->sk_socket);
@@ -747,7 +747,7 @@ static int rxrpc_release_sock(struct sock *sk)
{
struct rxrpc_sock *rx = rxrpc_sk(sk);
- _enter("%p{%d,%d}", sk, sk->sk_state, atomic_read(&sk->sk_refcnt));
+ _enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt));
/* declare the socket closed for business */
sock_orphan(sk);
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index 67b02c45271b..b8985d01876a 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -27,7 +27,7 @@ void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
{
const void *here = __builtin_return_address(0);
int n = atomic_inc_return(select_skb_count(op));
- trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+ trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
}
/*
@@ -38,7 +38,7 @@ void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
const void *here = __builtin_return_address(0);
if (skb) {
int n = atomic_read(select_skb_count(op));
- trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+ trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
}
}
@@ -49,7 +49,7 @@ void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
{
const void *here = __builtin_return_address(0);
int n = atomic_inc_return(select_skb_count(op));
- trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+ trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
skb_get(skb);
}
@@ -63,7 +63,7 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
int n;
CHECK_SLAB_OKAY(&skb->users);
n = atomic_dec_return(select_skb_count(op));
- trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+ trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
kfree_skb(skb);
}
}
@@ -78,7 +78,7 @@ void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
int n;
CHECK_SLAB_OKAY(&skb->users);
n = atomic_dec_return(select_skb_count(op));
- trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+ trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
kfree_skb(skb);
}
}
@@ -93,7 +93,7 @@ void rxrpc_purge_queue(struct sk_buff_head *list)
while ((skb = skb_dequeue((list))) != NULL) {
int n = atomic_dec_return(select_skb_count(rxrpc_skb_rx_purged));
trace_rxrpc_skb(skb, rxrpc_skb_rx_purged,
- atomic_read(&skb->users), n, here);
+ refcount_read(&skb->users), n, here);
kfree_skb(skb);
}
}
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index eb0e9bab54c1..d6e97115500b 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -340,7 +340,7 @@ META_COLLECTOR(int_sk_refcnt)
*err = -1;
return;
}
- dst->value = atomic_read(&skb->sk->sk_refcnt);
+ dst->value = refcount_read(&skb->sk->sk_refcnt);
}
META_COLLECTOR(int_sk_rcvbuf)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 5d95401bbc02..43b94c7b69bd 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1019,7 +1019,8 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
return sch;
}
/* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
- ops->destroy(sch);
+ if (ops->destroy)
+ ops->destroy(sch);
err_out3:
dev_put(dev);
kfree((char *) sch - sch->padded);
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index de162592eee0..572fe2584e48 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -498,7 +498,7 @@ static void sch_atm_dequeue(unsigned long data)
ATM_SKB(skb)->vcc = flow->vcc;
memcpy(skb_push(skb, flow->hdr_len), flow->hdr,
flow->hdr_len);
- atomic_add(skb->truesize,
+ refcount_add(skb->truesize,
&sk_atm(flow->vcc)->sk_wmem_alloc);
/* atm.atm_options are already set by atm_tc_enqueue */
flow->vcc->send(flow->vcc, skb);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 757be416f778..fa4f530ab7e1 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -71,7 +71,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
{
struct net *net = sock_net(sk);
struct sctp_sock *sp;
- sctp_paramhdr_t *p;
+ struct sctp_paramhdr *p;
int i;
/* Retrieve the SCTP per socket area. */
@@ -284,9 +284,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
ntohs(ep->auth_chunk_list->param_hdr.length));
/* Get the AUTH random number for this association */
- p = (sctp_paramhdr_t *)asoc->c.auth_random;
+ p = (struct sctp_paramhdr *)asoc->c.auth_random;
p->type = SCTP_PARAM_RANDOM;
- p->length = htons(sizeof(sctp_paramhdr_t) + SCTP_AUTH_RANDOM_LENGTH);
+ p->length = htons(sizeof(*p) + SCTP_AUTH_RANDOM_LENGTH);
get_random_bytes(p+1, SCTP_AUTH_RANDOM_LENGTH);
return asoc;
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index f99d4855d3de..8ffa5985cd6e 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -538,7 +538,8 @@ struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc)
if (!hmacs)
return NULL;
- n_elt = (ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t)) >> 1;
+ n_elt = (ntohs(hmacs->param_hdr.length) -
+ sizeof(struct sctp_paramhdr)) >> 1;
for (i = 0; i < n_elt; i++) {
id = ntohs(hmacs->hmac_ids[i]);
@@ -589,7 +590,8 @@ int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc,
return 0;
hmacs = (struct sctp_hmac_algo_param *)asoc->c.auth_hmacs;
- n_elt = (ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t)) >> 1;
+ n_elt = (ntohs(hmacs->param_hdr.length) -
+ sizeof(struct sctp_paramhdr)) >> 1;
return __sctp_auth_find_hmacid(hmacs->hmac_ids, n_elt, hmac_id);
}
@@ -612,8 +614,8 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc,
if (asoc->default_hmac_id)
return;
- n_params = (ntohs(hmacs->param_hdr.length)
- - sizeof(sctp_paramhdr_t)) >> 1;
+ n_params = (ntohs(hmacs->param_hdr.length) -
+ sizeof(struct sctp_paramhdr)) >> 1;
ep = asoc->ep;
for (i = 0; i < n_params; i++) {
id = ntohs(hmacs->hmac_ids[i]);
@@ -632,7 +634,7 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc,
/* Check to see if the given chunk is supposed to be authenticated */
-static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param)
+static int __sctp_auth_cid(enum sctp_cid chunk, struct sctp_chunks_param *param)
{
unsigned short len;
int found = 0;
@@ -641,7 +643,7 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param)
if (!param || param->param_hdr.length == 0)
return 0;
- len = ntohs(param->param_hdr.length) - sizeof(sctp_paramhdr_t);
+ len = ntohs(param->param_hdr.length) - sizeof(struct sctp_paramhdr);
/* SCTP-AUTH, Section 3.2
* The chunk types for INIT, INIT-ACK, SHUTDOWN-COMPLETE and AUTH
@@ -668,7 +670,7 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param)
}
/* Check if peer requested that this chunk is authenticated */
-int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
+int sctp_auth_send_cid(enum sctp_cid chunk, const struct sctp_association *asoc)
{
if (!asoc)
return 0;
@@ -680,7 +682,7 @@ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
}
/* Check if we requested that peer authenticate this chunk. */
-int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
+int sctp_auth_recv_cid(enum sctp_cid chunk, const struct sctp_association *asoc)
{
if (!asoc)
return 0;
@@ -775,7 +777,7 @@ int sctp_auth_ep_add_chunkid(struct sctp_endpoint *ep, __u8 chunk_id)
/* Check if we can add this chunk to the array */
param_len = ntohs(p->param_hdr.length);
- nchunks = param_len - sizeof(sctp_paramhdr_t);
+ nchunks = param_len - sizeof(struct sctp_paramhdr);
if (nchunks == SCTP_NUM_CHUNK_TYPES)
return -EINVAL;
@@ -812,9 +814,11 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
return -EINVAL;
for (i = 0; i < hmacs->shmac_num_idents; i++)
- ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]);
- ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) +
- hmacs->shmac_num_idents * sizeof(__u16));
+ ep->auth_hmacs_list->hmac_ids[i] =
+ htons(hmacs->shmac_idents[i]);
+ ep->auth_hmacs_list->param_hdr.length =
+ htons(sizeof(struct sctp_paramhdr) +
+ hmacs->shmac_num_idents * sizeof(__u16));
return 0;
}
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 3dcd0ecf3d99..efbc31877804 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -90,12 +90,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
*/
auth_hmacs->param_hdr.type = SCTP_PARAM_HMAC_ALGO;
auth_hmacs->param_hdr.length =
- htons(sizeof(sctp_paramhdr_t) + 2);
+ htons(sizeof(struct sctp_paramhdr) + 2);
auth_hmacs->hmac_ids[0] = htons(SCTP_AUTH_HMAC_ID_SHA1);
/* Initialize the CHUNKS parameter */
auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS;
- auth_chunks->param_hdr.length = htons(sizeof(sctp_paramhdr_t));
+ auth_chunks->param_hdr.length =
+ htons(sizeof(struct sctp_paramhdr));
/* If the Add-IP functionality is enabled, we must
* authenticate, ASCONF and ASCONF-ACK chunks
@@ -104,7 +105,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
auth_chunks->chunks[0] = SCTP_CID_ASCONF;
auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK;
auth_chunks->param_hdr.length =
- htons(sizeof(sctp_paramhdr_t) + 2);
+ htons(sizeof(struct sctp_paramhdr) + 2);
}
}
@@ -268,16 +269,14 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
memset(ep->secret_key, 0, sizeof(ep->secret_key));
- /* Give up our hold on the sock. */
sk = ep->base.sk;
- if (sk != NULL) {
- /* Remove and free the port */
- if (sctp_sk(sk)->bind_hash)
- sctp_put_port(sk);
+ /* Remove and free the port */
+ if (sctp_sk(sk)->bind_hash)
+ sctp_put_port(sk);
- sctp_sk(sk)->ep = NULL;
- sock_put(sk);
- }
+ sctp_sk(sk)->ep = NULL;
+ /* Give up our hold on the sock */
+ sock_put(sk);
kfree(ep);
SCTP_DBG_OBJCNT_DEC(ep);
diff --git a/net/sctp/input.c b/net/sctp/input.c
index ba9ad32fc447..41eb2ec10460 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -663,19 +663,19 @@ out_unlock:
*/
static int sctp_rcv_ootb(struct sk_buff *skb)
{
- sctp_chunkhdr_t *ch, _ch;
+ struct sctp_chunkhdr *ch, _ch;
int ch_end, offset = 0;
/* Scan through all the chunks in the packet. */
do {
/* Make sure we have at least the header there */
- if (offset + sizeof(sctp_chunkhdr_t) > skb->len)
+ if (offset + sizeof(_ch) > skb->len)
break;
ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch);
/* Break out if chunk length is less then minimal. */
- if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
+ if (ntohs(ch->length) < sizeof(_ch))
break;
ch_end = offset + SCTP_PAD4(ntohs(ch->length));
@@ -1051,7 +1051,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
union sctp_addr *paddr = &addr;
struct sctphdr *sh = sctp_hdr(skb);
union sctp_params params;
- sctp_init_chunk_t *init;
+ struct sctp_init_chunk *init;
struct sctp_af *af;
/*
@@ -1070,7 +1070,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
/* Find the start of the TLVs and the end of the chunk. This is
* the region we search for address parameters.
*/
- init = (sctp_init_chunk_t *)skb->data;
+ init = (struct sctp_init_chunk *)skb->data;
/* Walk the parameters looking for embedded addresses. */
sctp_walk_params(params, init, init_hdr.params) {
@@ -1106,7 +1106,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
*/
static struct sctp_association *__sctp_rcv_asconf_lookup(
struct net *net,
- sctp_chunkhdr_t *ch,
+ struct sctp_chunkhdr *ch,
const union sctp_addr *laddr,
__be16 peer_port,
struct sctp_transport **transportp)
@@ -1144,7 +1144,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
struct sctp_transport **transportp)
{
struct sctp_association *asoc = NULL;
- sctp_chunkhdr_t *ch;
+ struct sctp_chunkhdr *ch;
int have_auth = 0;
unsigned int chunk_num = 1;
__u8 *ch_end;
@@ -1152,10 +1152,10 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
/* Walk through the chunks looking for AUTH or ASCONF chunks
* to help us find the association.
*/
- ch = (sctp_chunkhdr_t *) skb->data;
+ ch = (struct sctp_chunkhdr *)skb->data;
do {
/* Break out if chunk length is less then minimal. */
- if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
+ if (ntohs(ch->length) < sizeof(*ch))
break;
ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
@@ -1192,7 +1192,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
if (asoc)
break;
- ch = (sctp_chunkhdr_t *) ch_end;
+ ch = (struct sctp_chunkhdr *)ch_end;
chunk_num++;
} while (ch_end < skb_tail_pointer(skb));
@@ -1210,7 +1210,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
const union sctp_addr *laddr,
struct sctp_transport **transportp)
{
- sctp_chunkhdr_t *ch;
+ struct sctp_chunkhdr *ch;
/* We do not allow GSO frames here as we need to linearize and
* then cannot guarantee frame boundaries. This shouldn't be an
@@ -1220,7 +1220,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP)
return NULL;
- ch = (sctp_chunkhdr_t *) skb->data;
+ ch = (struct sctp_chunkhdr *)skb->data;
/* The code below will attempt to walk the chunk and extract
* parameter information. Before we do that, we need to verify
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index f731de3e8428..48392552ee7c 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -99,7 +99,7 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk)
struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue)
{
struct sctp_chunk *chunk;
- sctp_chunkhdr_t *ch = NULL;
+ struct sctp_chunkhdr *ch = NULL;
chunk = queue->in_progress;
/* If there is no more chunks in this packet, say so */
@@ -108,7 +108,7 @@ struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue)
chunk->pdiscard)
return NULL;
- ch = (sctp_chunkhdr_t *)chunk->chunk_end;
+ ch = (struct sctp_chunkhdr *)chunk->chunk_end;
return ch;
}
@@ -122,7 +122,7 @@ struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue)
struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
{
struct sctp_chunk *chunk;
- sctp_chunkhdr_t *ch = NULL;
+ struct sctp_chunkhdr *ch = NULL;
/* The assumption is that we are safe to process the chunks
* at this time.
@@ -151,7 +151,7 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
chunk = queue->in_progress = NULL;
} else {
/* Nothing to do. Next chunk in the packet, please. */
- ch = (sctp_chunkhdr_t *) chunk->chunk_end;
+ ch = (struct sctp_chunkhdr *)chunk->chunk_end;
/* Force chunk->skb->data to chunk->chunk_end. */
skb_pull(chunk->skb, chunk->chunk_end - chunk->skb->data);
/* We are guaranteed to pull a SCTP header. */
@@ -195,7 +195,7 @@ next_chunk:
new_skb:
/* This is the first chunk in the packet. */
- ch = (sctp_chunkhdr_t *) chunk->skb->data;
+ ch = (struct sctp_chunkhdr *)chunk->skb->data;
chunk->singleton = 1;
chunk->data_accepted = 0;
chunk->pdiscard = 0;
@@ -214,11 +214,10 @@ new_skb:
chunk->chunk_hdr = ch;
chunk->chunk_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
- skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
+ skb_pull(chunk->skb, sizeof(*ch));
chunk->subh.v = NULL; /* Subheader is no longer valid. */
- if (chunk->chunk_end + sizeof(sctp_chunkhdr_t) <
- skb_tail_pointer(chunk->skb)) {
+ if (chunk->chunk_end + sizeof(*ch) < skb_tail_pointer(chunk->skb)) {
/* This is not a singleton */
chunk->singleton = 0;
} else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) {
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 89cee1482d35..9d8504985744 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -402,7 +402,7 @@ static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
* therefore only reserve a single byte to keep socket around until
* the packet has been transmitted.
*/
- atomic_inc(&sk->sk_wmem_alloc);
+ refcount_inc(&sk->sk_wmem_alloc);
}
static int sctp_packet_pack(struct sctp_packet *packet,
@@ -723,8 +723,8 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
/* Check whether this chunk and all the rest of pending data will fit
* or delay in hopes of bundling a full sized packet.
*/
- if (chunk->skb->len + q->out_qlen >
- transport->pathmtu - packet->overhead - sizeof(sctp_data_chunk_t) - 4)
+ if (chunk->skb->len + q->out_qlen > transport->pathmtu -
+ packet->overhead - sizeof(struct sctp_data_chunk) - 4)
/* Enough data queued to fill a packet */
return SCTP_XMIT_OK;
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 20299df163b9..e8762702a313 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1102,7 +1102,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
"illegal chunk", ntohl(chunk->subh.data_hdr->tsn),
chunk->skb ? chunk->skb->head : NULL, chunk->skb ?
- atomic_read(&chunk->skb->users) : -1);
+ refcount_read(&chunk->skb->users) : -1);
/* Add the chunk to the packet. */
status = sctp_packet_transmit_chunk(packet, chunk, 0, gfp);
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 8e34db56bc1d..26b4be6b4172 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -363,7 +363,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
assoc->stream.outcnt, assoc->max_retrans,
assoc->init_retries, assoc->shutdown_retries,
assoc->rtx_data_chunks,
- atomic_read(&sk->sk_wmem_alloc),
+ refcount_read(&sk->sk_wmem_alloc),
sk->sk_wmem_queued,
sk->sk_sndbuf,
sk->sk_rcvbuf);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 4b1967997c16..3af4dd024ec0 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -217,7 +217,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
{
struct net *net = sock_net(asoc->base.sk);
struct sctp_endpoint *ep = asoc->ep;
- sctp_inithdr_t init;
+ struct sctp_inithdr init;
union sctp_params addrs;
size_t chunksize;
struct sctp_chunk *retval = NULL;
@@ -229,7 +229,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
sctp_supported_ext_param_t ext_param;
int num_ext = 0;
__u8 extensions[3];
- sctp_paramhdr_t *auth_chunks = NULL,
+ struct sctp_paramhdr *auth_chunks = NULL,
*auth_hmacs = NULL;
/* RFC 2960 3.3.2 Initiation (INIT) (1)
@@ -286,14 +286,14 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
chunksize += sizeof(asoc->c.auth_random);
/* Add HMACS parameter length if any were defined */
- auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
+ auth_hmacs = (struct sctp_paramhdr *)asoc->c.auth_hmacs;
if (auth_hmacs->length)
chunksize += SCTP_PAD4(ntohs(auth_hmacs->length));
else
auth_hmacs = NULL;
/* Add CHUNKS parameter length */
- auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
+ auth_chunks = (struct sctp_paramhdr *)asoc->c.auth_chunks;
if (auth_chunks->length)
chunksize += SCTP_PAD4(ntohs(auth_chunks->length));
else
@@ -385,7 +385,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
const struct sctp_chunk *chunk,
gfp_t gfp, int unkparam_len)
{
- sctp_inithdr_t initack;
+ struct sctp_inithdr initack;
struct sctp_chunk *retval;
union sctp_params addrs;
struct sctp_sock *sp;
@@ -397,7 +397,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
sctp_supported_ext_param_t ext_param;
int num_ext = 0;
__u8 extensions[3];
- sctp_paramhdr_t *auth_chunks = NULL,
+ struct sctp_paramhdr *auth_chunks = NULL,
*auth_hmacs = NULL,
*auth_random = NULL;
@@ -448,16 +448,16 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
chunksize += sizeof(aiparam);
if (asoc->peer.auth_capable) {
- auth_random = (sctp_paramhdr_t *)asoc->c.auth_random;
+ auth_random = (struct sctp_paramhdr *)asoc->c.auth_random;
chunksize += ntohs(auth_random->length);
- auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
+ auth_hmacs = (struct sctp_paramhdr *)asoc->c.auth_hmacs;
if (auth_hmacs->length)
chunksize += SCTP_PAD4(ntohs(auth_hmacs->length));
else
auth_hmacs = NULL;
- auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
+ auth_chunks = (struct sctp_paramhdr *)asoc->c.auth_chunks;
if (auth_chunks->length)
chunksize += SCTP_PAD4(ntohs(auth_chunks->length));
else
@@ -1085,18 +1085,18 @@ struct sctp_chunk *sctp_make_abort_violation(
struct sctp_chunk *retval;
struct sctp_paramhdr phdr;
- retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen
- + sizeof(sctp_paramhdr_t));
+ retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen +
+ sizeof(phdr));
if (!retval)
goto end;
- sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, paylen
- + sizeof(sctp_paramhdr_t));
+ sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, paylen +
+ sizeof(phdr));
phdr.type = htons(chunk->chunk_hdr->type);
phdr.length = chunk->chunk_hdr->length;
sctp_addto_chunk(retval, paylen, payload);
- sctp_addto_param(retval, sizeof(sctp_paramhdr_t), &phdr);
+ sctp_addto_param(retval, sizeof(phdr), &phdr);
end:
return retval;
@@ -1110,16 +1110,16 @@ struct sctp_chunk *sctp_make_violation_paramlen(
struct sctp_chunk *retval;
static const char error[] = "The following parameter had invalid length:";
size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) +
- sizeof(sctp_paramhdr_t);
+ sizeof(*param);
retval = sctp_make_abort(asoc, chunk, payload_len);
if (!retval)
goto nodata;
sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION,
- sizeof(error) + sizeof(sctp_paramhdr_t));
+ sizeof(error) + sizeof(*param));
sctp_addto_chunk(retval, sizeof(error), error);
- sctp_addto_param(retval, sizeof(sctp_paramhdr_t), param);
+ sctp_addto_param(retval, sizeof(*param), param);
nodata:
return retval;
@@ -1379,20 +1379,20 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
gfp_t gfp)
{
struct sctp_chunk *retval;
- sctp_chunkhdr_t *chunk_hdr;
+ struct sctp_chunkhdr *chunk_hdr;
struct sk_buff *skb;
struct sock *sk;
/* No need to allocate LL here, as this is only a chunk. */
- skb = alloc_skb(SCTP_PAD4(sizeof(sctp_chunkhdr_t) + paylen), gfp);
+ skb = alloc_skb(SCTP_PAD4(sizeof(*chunk_hdr) + paylen), gfp);
if (!skb)
goto nodata;
/* Make room for the chunk header. */
- chunk_hdr = skb_put(skb, sizeof(sctp_chunkhdr_t));
+ chunk_hdr = (struct sctp_chunkhdr *)skb_put(skb, sizeof(*chunk_hdr));
chunk_hdr->type = type;
chunk_hdr->flags = flags;
- chunk_hdr->length = htons(sizeof(sctp_chunkhdr_t));
+ chunk_hdr->length = htons(sizeof(*chunk_hdr));
sk = asoc ? asoc->base.sk : NULL;
retval = sctp_chunkify(skb, asoc, sk, gfp);
@@ -1402,7 +1402,7 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
}
retval->chunk_hdr = chunk_hdr;
- retval->chunk_end = ((__u8 *)chunk_hdr) + sizeof(struct sctp_chunkhdr);
+ retval->chunk_end = ((__u8 *)chunk_hdr) + sizeof(*chunk_hdr);
/* Determine if the chunk needs to be authenticated */
if (sctp_auth_send_cid(type, asoc))
@@ -1614,7 +1614,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
/* Header size is static data prior to the actual cookie, including
* any padding.
*/
- headersize = sizeof(sctp_paramhdr_t) +
+ headersize = sizeof(struct sctp_paramhdr) +
(sizeof(struct sctp_signed_cookie) -
sizeof(struct sctp_cookie));
bodysize = sizeof(struct sctp_cookie)
@@ -1710,7 +1710,7 @@ struct sctp_association *sctp_unpack_cookie(
/* Header size is static data prior to the actual cookie, including
* any padding.
*/
- headersize = sizeof(sctp_chunkhdr_t) +
+ headersize = sizeof(struct sctp_chunkhdr) +
(sizeof(struct sctp_signed_cookie) -
sizeof(struct sctp_cookie));
bodysize = ntohs(chunk->chunk_hdr->length) - headersize;
@@ -1882,7 +1882,7 @@ struct __sctp_missing {
* Report a missing mandatory parameter.
*/
static int sctp_process_missing_param(const struct sctp_association *asoc,
- sctp_param_t paramtype,
+ enum sctp_param paramtype,
struct sctp_chunk *chunk,
struct sctp_chunk **errp)
{
@@ -1975,7 +1975,7 @@ static int sctp_process_hn_param(const struct sctp_association *asoc,
static int sctp_verify_ext_param(struct net *net, union sctp_params param)
{
- __u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
+ __u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
int have_auth = 0;
int have_asconf = 0;
int i;
@@ -2010,7 +2010,7 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
union sctp_params param)
{
struct net *net = sock_net(asoc->base.sk);
- __u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
+ __u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
int i;
for (i = 0; i < num_ext; i++) {
@@ -2123,7 +2123,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
union sctp_params param,
- sctp_cid_t cid,
+ enum sctp_cid cid,
struct sctp_chunk *chunk,
struct sctp_chunk **err_chunk)
{
@@ -2180,7 +2180,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
* cause 'Protocol Violation'.
*/
if (SCTP_AUTH_RANDOM_LENGTH !=
- ntohs(param.p->length) - sizeof(sctp_paramhdr_t)) {
+ ntohs(param.p->length) - sizeof(struct sctp_paramhdr)) {
sctp_process_inv_paramlength(asoc, param.p,
chunk, err_chunk);
retval = SCTP_IERROR_ABORT;
@@ -2208,7 +2208,8 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
goto fallthrough;
hmacs = (struct sctp_hmac_algo_param *)param.p;
- n_elt = (ntohs(param.p->length) - sizeof(sctp_paramhdr_t)) >> 1;
+ n_elt = (ntohs(param.p->length) -
+ sizeof(struct sctp_paramhdr)) >> 1;
/* SCTP-AUTH: Section 6.1
* The HMAC algorithm based on SHA-1 MUST be supported and
@@ -2240,9 +2241,9 @@ fallthrough:
/* Verify the INIT packet before we process it. */
int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
- const struct sctp_association *asoc, sctp_cid_t cid,
- sctp_init_chunk_t *peer_init, struct sctp_chunk *chunk,
- struct sctp_chunk **errp)
+ const struct sctp_association *asoc, enum sctp_cid cid,
+ struct sctp_init_chunk *peer_init,
+ struct sctp_chunk *chunk, struct sctp_chunk **errp)
{
union sctp_params param;
bool has_cookie = false;
@@ -2306,7 +2307,7 @@ int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
*/
int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
const union sctp_addr *peer_addr,
- sctp_init_chunk_t *peer_init, gfp_t gfp)
+ struct sctp_init_chunk *peer_init, gfp_t gfp)
{
struct net *net = sock_net(asoc->base.sk);
union sctp_params param;
@@ -2565,7 +2566,7 @@ do_addr_param:
asoc->peer.ipv4_address = 1;
/* Cycle through address types; avoid divide by 0. */
- sat = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
+ sat = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
if (sat)
sat /= sizeof(__u16);
@@ -2592,7 +2593,7 @@ do_addr_param:
case SCTP_PARAM_STATE_COOKIE:
asoc->peer.cookie_len =
- ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
+ ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
asoc->peer.cookie = param.cookie->body;
break;
@@ -3176,7 +3177,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
return false;
length = ntohs(param.addip->param_hdr.length);
if (length < sizeof(sctp_addip_param_t) +
- sizeof(sctp_paramhdr_t))
+ sizeof(**errp))
return false;
break;
case SCTP_PARAM_SUCCESS_REPORT:
@@ -3218,7 +3219,8 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
int chunk_len;
__u32 serial;
- chunk_len = ntohs(asconf->chunk_hdr->length) - sizeof(sctp_chunkhdr_t);
+ chunk_len = ntohs(asconf->chunk_hdr->length) -
+ sizeof(struct sctp_chunkhdr);
hdr = (sctp_addiphdr_t *)asconf->skb->data;
serial = ntohl(hdr->serial);
@@ -3364,7 +3366,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
err_code = SCTP_ERROR_REQ_REFUSED;
asconf_ack_len = ntohs(asconf_ack->chunk_hdr->length) -
- sizeof(sctp_chunkhdr_t);
+ sizeof(struct sctp_chunkhdr);
/* Skip the addiphdr from the asconf_ack chunk and store a pointer to
* the first asconf_ack parameter.
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index dfe1fcb520ba..d6e5e9e0fd6d 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -647,7 +647,7 @@ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
static int sctp_cmd_process_init(sctp_cmd_seq_t *commands,
struct sctp_association *asoc,
struct sctp_chunk *chunk,
- sctp_init_chunk_t *peer_init,
+ struct sctp_init_chunk *peer_init,
gfp_t gfp)
{
int error;
@@ -955,9 +955,10 @@ static void sctp_cmd_process_operr(sctp_cmd_seq_t *cmds,
switch (err_hdr->cause) {
case SCTP_ERROR_UNKNOWN_CHUNK:
{
- sctp_chunkhdr_t *unk_chunk_hdr;
+ struct sctp_chunkhdr *unk_chunk_hdr;
- unk_chunk_hdr = (sctp_chunkhdr_t *)err_hdr->variable;
+ unk_chunk_hdr = (struct sctp_chunkhdr *)
+ err_hdr->variable;
switch (unk_chunk_hdr->type) {
/* ADDIP 4.1 A9) If the peer responds to an ASCONF with
* an ERROR chunk reporting that it did not recognized
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 8feff96a5bef..b2a74c3823ee 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -235,7 +235,7 @@ sctp_disposition_t sctp_sf_do_4_C(struct net *net,
return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands);
/* Make sure that the SHUTDOWN_COMPLETE chunk has a valid length. */
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -345,7 +345,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
* error, but since we don't have an association, we'll
* just discard the packet.
*/
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* If the INIT is coming toward a closing socket, we'll send back
@@ -360,7 +360,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
/* Verify the INIT chunk before processing it. */
err_chunk = NULL;
if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
- (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
+ (struct sctp_init_chunk *)chunk->chunk_hdr, chunk,
&err_chunk)) {
/* This chunk contains fatal error. It is to be discarded.
* Send an ABORT, with causes if there is any.
@@ -368,9 +368,9 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
if (err_chunk) {
packet = sctp_abort_pkt_new(net, ep, asoc, arg,
(__u8 *)(err_chunk->chunk_hdr) +
- sizeof(sctp_chunkhdr_t),
+ sizeof(struct sctp_chunkhdr),
ntohs(err_chunk->chunk_hdr->length) -
- sizeof(sctp_chunkhdr_t));
+ sizeof(struct sctp_chunkhdr));
sctp_chunk_free(err_chunk);
@@ -389,10 +389,10 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
}
/* Grab the INIT header. */
- chunk->subh.init_hdr = (sctp_inithdr_t *)chunk->skb->data;
+ chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data;
/* Tag the variable length parameters. */
- chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t));
+ chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr));
new_asoc = sctp_make_temp_asoc(ep, chunk, GFP_ATOMIC);
if (!new_asoc)
@@ -405,7 +405,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
/* The call, sctp_process_init(), can fail on memory allocation. */
if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk),
- (sctp_init_chunk_t *)chunk->chunk_hdr,
+ (struct sctp_init_chunk *)chunk->chunk_hdr,
GFP_ATOMIC))
goto nomem_init;
@@ -417,7 +417,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
len = 0;
if (err_chunk)
len = ntohs(err_chunk->chunk_hdr->length) -
- sizeof(sctp_chunkhdr_t);
+ sizeof(struct sctp_chunkhdr);
repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len);
if (!repl)
@@ -437,7 +437,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
*/
unk_param = (sctp_unrecognized_param_t *)
((__u8 *)(err_chunk->chunk_hdr) +
- sizeof(sctp_chunkhdr_t));
+ sizeof(struct sctp_chunkhdr));
/* Replace the cause code with the "Unrecognized parameter"
* parameter type.
*/
@@ -503,7 +503,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
sctp_cmd_seq_t *commands)
{
struct sctp_chunk *chunk = arg;
- sctp_init_chunk_t *initchunk;
+ struct sctp_init_chunk *initchunk;
struct sctp_chunk *err_chunk;
struct sctp_packet *packet;
@@ -522,12 +522,12 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Grab the INIT header. */
- chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data;
+ chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data;
/* Verify the INIT chunk before processing it. */
err_chunk = NULL;
if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
- (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
+ (struct sctp_init_chunk *)chunk->chunk_hdr, chunk,
&err_chunk)) {
sctp_error_t error = SCTP_ERROR_NO_RESOURCE;
@@ -540,9 +540,9 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
if (err_chunk) {
packet = sctp_abort_pkt_new(net, ep, asoc, arg,
(__u8 *)(err_chunk->chunk_hdr) +
- sizeof(sctp_chunkhdr_t),
+ sizeof(struct sctp_chunkhdr),
ntohs(err_chunk->chunk_hdr->length) -
- sizeof(sctp_chunkhdr_t));
+ sizeof(struct sctp_chunkhdr));
sctp_chunk_free(err_chunk);
@@ -576,9 +576,9 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
/* Tag the variable length parameters. Note that we never
* convert the parameters in an INIT chunk.
*/
- chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t));
+ chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr));
- initchunk = (sctp_init_chunk_t *) chunk->chunk_hdr;
+ initchunk = (struct sctp_init_chunk *)chunk->chunk_hdr;
sctp_add_cmd_sf(commands, SCTP_CMD_PEER_INIT,
SCTP_PEER_INIT(initchunk));
@@ -653,7 +653,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
{
struct sctp_chunk *chunk = arg;
struct sctp_association *new_asoc;
- sctp_init_chunk_t *peer_init;
+ struct sctp_init_chunk *peer_init;
struct sctp_chunk *repl;
struct sctp_ulpevent *ev, *ai_ev = NULL;
int error = 0;
@@ -673,7 +673,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
* chunk header. More detailed verification is done
* in sctp_unpack_cookie().
*/
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* If the endpoint is not listening or if the number of associations
@@ -691,7 +691,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
chunk->subh.cookie_hdr =
(struct sctp_signed_cookie *)chunk->skb->data;
if (!pskb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) -
- sizeof(sctp_chunkhdr_t)))
+ sizeof(struct sctp_chunkhdr)))
goto nomem;
/* 5.1 D) Upon reception of the COOKIE ECHO chunk, Endpoint
@@ -770,9 +770,10 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
auth.skb = chunk->auth_chunk;
auth.asoc = chunk->asoc;
auth.sctp_hdr = chunk->sctp_hdr;
- auth.chunk_hdr = skb_push(chunk->auth_chunk,
- sizeof(sctp_chunkhdr_t));
- skb_pull(chunk->auth_chunk, sizeof(sctp_chunkhdr_t));
+ auth.chunk_hdr = (struct sctp_chunkhdr *)
+ skb_push(chunk->auth_chunk,
+ sizeof(struct sctp_chunkhdr));
+ skb_pull(chunk->auth_chunk, sizeof(struct sctp_chunkhdr));
auth.transport = chunk->transport;
ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth);
@@ -886,7 +887,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net,
/* Verify that the chunk length for the COOKIE-ACK is OK.
* If we don't do this, any bundled chunks may be junked.
*/
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -1080,7 +1081,7 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
void *arg,
sctp_cmd_seq_t *commands)
{
- sctp_paramhdr_t *param_hdr;
+ struct sctp_paramhdr *param_hdr;
struct sctp_chunk *chunk = arg;
struct sctp_chunk *reply;
size_t paylen = 0;
@@ -1097,9 +1098,9 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
* respond with a HEARTBEAT ACK that contains the Heartbeat
* Information field copied from the received HEARTBEAT chunk.
*/
- chunk->subh.hb_hdr = (sctp_heartbeathdr_t *) chunk->skb->data;
- param_hdr = (sctp_paramhdr_t *) chunk->subh.hb_hdr;
- paylen = ntohs(chunk->chunk_hdr->length) - sizeof(sctp_chunkhdr_t);
+ chunk->subh.hb_hdr = (sctp_heartbeathdr_t *)chunk->skb->data;
+ param_hdr = (struct sctp_paramhdr *)chunk->subh.hb_hdr;
+ paylen = ntohs(chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr);
if (ntohs(param_hdr->length) > paylen)
return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
@@ -1164,7 +1165,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net,
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the HEARTBEAT-ACK chunk has a valid length. */
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t) +
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr) +
sizeof(sctp_sender_hb_info_t)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -1449,19 +1450,19 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
* In this case, we generate a protocol violation since we have
* an association established.
*/
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Grab the INIT header. */
- chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data;
+ chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data;
/* Tag the variable length parameters. */
- chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t));
+ chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr));
/* Verify the INIT chunk before processing it. */
err_chunk = NULL;
if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
- (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
+ (struct sctp_init_chunk *)chunk->chunk_hdr, chunk,
&err_chunk)) {
/* This chunk contains fatal error. It is to be discarded.
* Send an ABORT, with causes if there is any.
@@ -1469,9 +1470,9 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
if (err_chunk) {
packet = sctp_abort_pkt_new(net, ep, asoc, arg,
(__u8 *)(err_chunk->chunk_hdr) +
- sizeof(sctp_chunkhdr_t),
+ sizeof(struct sctp_chunkhdr),
ntohs(err_chunk->chunk_hdr->length) -
- sizeof(sctp_chunkhdr_t));
+ sizeof(struct sctp_chunkhdr));
if (packet) {
sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
@@ -1508,7 +1509,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
* place (local tie-tag and per tie-tag) within the state cookie.
*/
if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk),
- (sctp_init_chunk_t *)chunk->chunk_hdr,
+ (struct sctp_init_chunk *)chunk->chunk_hdr,
GFP_ATOMIC))
goto nomem;
@@ -1535,7 +1536,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
len = 0;
if (err_chunk) {
len = ntohs(err_chunk->chunk_hdr->length) -
- sizeof(sctp_chunkhdr_t);
+ sizeof(struct sctp_chunkhdr);
}
repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len);
@@ -1556,7 +1557,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
*/
unk_param = (sctp_unrecognized_param_t *)
((__u8 *)(err_chunk->chunk_hdr) +
- sizeof(sctp_chunkhdr_t));
+ sizeof(struct sctp_chunkhdr));
/* Replace the cause code with the "Unrecognized parameter"
* parameter type.
*/
@@ -1729,7 +1730,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net,
sctp_cmd_seq_t *commands,
struct sctp_association *new_asoc)
{
- sctp_init_chunk_t *peer_init;
+ struct sctp_init_chunk *peer_init;
struct sctp_ulpevent *ev;
struct sctp_chunk *repl;
struct sctp_chunk *err;
@@ -1844,7 +1845,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(struct net *net,
sctp_cmd_seq_t *commands,
struct sctp_association *new_asoc)
{
- sctp_init_chunk_t *peer_init;
+ struct sctp_init_chunk *peer_init;
struct sctp_chunk *repl;
/* new_asoc is a brand-new association, so these are not yet
@@ -2044,7 +2045,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
* enough for the chunk header. Cookie length verification is
* done later.
*/
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -2053,7 +2054,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
*/
chunk->subh.cookie_hdr = (struct sctp_signed_cookie *)chunk->skb->data;
if (!pskb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) -
- sizeof(sctp_chunkhdr_t)))
+ sizeof(struct sctp_chunkhdr)))
goto nomem;
/* In RFC 2960 5.2.4 3, if both Verification Tags in the State Cookie
@@ -2806,7 +2807,7 @@ sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net,
struct sctp_chunk *reply;
/* Make sure that the chunk has a valid length */
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -2989,7 +2990,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_data_chunk)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -3009,7 +3010,8 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
return SCTP_DISPOSITION_ABORT;
case SCTP_IERROR_PROTO_VIOLATION:
return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
- (u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t));
+ (u8 *)chunk->subh.data_hdr,
+ sizeof(struct sctp_datahdr));
default:
BUG();
}
@@ -3107,7 +3109,7 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_data_chunk)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -3123,7 +3125,8 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
return SCTP_DISPOSITION_ABORT;
case SCTP_IERROR_PROTO_VIOLATION:
return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
- (u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t));
+ (u8 *)chunk->subh.data_hdr,
+ sizeof(struct sctp_datahdr));
default:
BUG();
}
@@ -3358,7 +3361,7 @@ sctp_disposition_t sctp_sf_do_9_2_final(struct net *net,
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the SHUTDOWN_ACK chunk has a valid length. */
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* 10.2 H) SHUTDOWN COMPLETE notification
@@ -3435,7 +3438,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
{
struct sctp_chunk *chunk = arg;
struct sk_buff *skb = chunk->skb;
- sctp_chunkhdr_t *ch;
+ struct sctp_chunkhdr *ch;
sctp_errhdr_t *err;
__u8 *ch_end;
int ootb_shut_ack = 0;
@@ -3443,10 +3446,10 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
- ch = (sctp_chunkhdr_t *) chunk->chunk_hdr;
+ ch = (struct sctp_chunkhdr *)chunk->chunk_hdr;
do {
/* Report violation if the chunk is less then minimal */
- if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
+ if (ntohs(ch->length) < sizeof(*ch))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -3487,7 +3490,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
}
}
- ch = (sctp_chunkhdr_t *) ch_end;
+ ch = (struct sctp_chunkhdr *)ch_end;
} while (ch_end < skb_tail_pointer(skb));
if (ootb_shut_ack)
@@ -3560,7 +3563,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
/* If the chunk length is invalid, we don't want to process
* the reset of the packet.
*/
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* We need to discard the rest of the packet to prevent
@@ -3591,7 +3594,7 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net,
struct sctp_chunk *chunk = arg;
/* Make sure that the SHUTDOWN_ACK chunk has a valid length. */
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -4256,7 +4259,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
{
struct sctp_chunk *unk_chunk = arg;
struct sctp_chunk *err_chunk;
- sctp_chunkhdr_t *hdr;
+ struct sctp_chunkhdr *hdr;
pr_debug("%s: processing unknown chunk id:%d\n", __func__, type.chunk);
@@ -4267,7 +4270,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
* Since we don't know the chunk type, we use a general
* chunkhdr structure to make a comparison.
*/
- if (!sctp_chunk_length_valid(unk_chunk, sizeof(sctp_chunkhdr_t)))
+ if (!sctp_chunk_length_valid(unk_chunk, sizeof(*hdr)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -4340,7 +4343,7 @@ sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
* Since we don't know the chunk type, we use a general
* chunkhdr structure to make a comparison.
*/
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -4405,7 +4408,7 @@ sctp_disposition_t sctp_sf_violation(struct net *net,
struct sctp_chunk *chunk = arg;
/* Make sure that the chunk has a valid length. */
- if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
@@ -6121,9 +6124,9 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
switch (chunk->chunk_hdr->type) {
case SCTP_CID_INIT:
{
- sctp_init_chunk_t *init;
+ struct sctp_init_chunk *init;
- init = (sctp_init_chunk_t *)chunk->chunk_hdr;
+ init = (struct sctp_init_chunk *)chunk->chunk_hdr;
vtag = ntohl(init->init_hdr.init_tag);
break;
}
@@ -6196,7 +6199,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
struct sctp_chunk *chunk,
sctp_cmd_seq_t *commands)
{
- sctp_datahdr_t *data_hdr;
+ struct sctp_datahdr *data_hdr;
struct sctp_chunk *err;
size_t datalen;
sctp_verb_t deliver;
@@ -6209,8 +6212,9 @@ static int sctp_eat_data(const struct sctp_association *asoc,
u16 sid;
u8 ordered = 0;
- data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data;
- skb_pull(chunk->skb, sizeof(sctp_datahdr_t));
+ data_hdr = (struct sctp_datahdr *)chunk->skb->data;
+ chunk->subh.data_hdr = data_hdr;
+ skb_pull(chunk->skb, sizeof(*data_hdr));
tsn = ntohl(data_hdr->tsn);
pr_debug("%s: TSN 0x%x\n", __func__, tsn);
@@ -6258,7 +6262,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
* Actually, allow a little bit of overflow (up to a MTU).
*/
datalen = ntohs(chunk->chunk_hdr->length);
- datalen -= sizeof(sctp_data_chunk_t);
+ datalen -= sizeof(struct sctp_data_chunk);
deliver = SCTP_CMD_CHUNK_ULP;
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 419b18ebb056..3e958c1c4b95 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -53,7 +53,7 @@ static const sctp_sm_table_entry_t
timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES];
static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
- sctp_cid_t cid,
+ enum sctp_cid cid,
sctp_state_t state);
@@ -968,7 +968,7 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S
};
static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
- sctp_cid_t cid,
+ enum sctp_cid cid,
sctp_state_t state)
{
if (state > SCTP_STATE_MAX)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 7b6e20eb9451..1db478e34520 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -164,7 +164,7 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
sizeof(struct sk_buff) +
sizeof(struct sctp_chunk);
- atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
+ refcount_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
sk->sk_wmem_queued += chunk->skb->truesize;
sk_mem_charge(sk, chunk->skb->truesize);
}
@@ -4933,11 +4933,47 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
}
EXPORT_SYMBOL(sctp_do_peeloff);
+static int sctp_getsockopt_peeloff_common(struct sock *sk, sctp_peeloff_arg_t *peeloff,
+ struct file **newfile, unsigned flags)
+{
+ struct socket *newsock;
+ int retval;
+
+ retval = sctp_do_peeloff(sk, peeloff->associd, &newsock);
+ if (retval < 0)
+ goto out;
+
+ /* Map the socket to an unused fd that can be returned to the user. */
+ retval = get_unused_fd_flags(flags & SOCK_CLOEXEC);
+ if (retval < 0) {
+ sock_release(newsock);
+ goto out;
+ }
+
+ *newfile = sock_alloc_file(newsock, 0, NULL);
+ if (IS_ERR(*newfile)) {
+ put_unused_fd(retval);
+ sock_release(newsock);
+ retval = PTR_ERR(*newfile);
+ *newfile = NULL;
+ return retval;
+ }
+
+ pr_debug("%s: sk:%p, newsk:%p, sd:%d\n", __func__, sk, newsock->sk,
+ retval);
+
+ peeloff->sd = retval;
+
+ if (flags & SOCK_NONBLOCK)
+ (*newfile)->f_flags |= O_NONBLOCK;
+out:
+ return retval;
+}
+
static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval, int __user *optlen)
{
sctp_peeloff_arg_t peeloff;
- struct socket *newsock;
- struct file *newfile;
+ struct file *newfile = NULL;
int retval = 0;
if (len < sizeof(sctp_peeloff_arg_t))
@@ -4946,26 +4982,44 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval
if (copy_from_user(&peeloff, optval, len))
return -EFAULT;
- retval = sctp_do_peeloff(sk, peeloff.associd, &newsock);
+ retval = sctp_getsockopt_peeloff_common(sk, &peeloff, &newfile, 0);
if (retval < 0)
goto out;
- /* Map the socket to an unused fd that can be returned to the user. */
- retval = get_unused_fd_flags(0);
- if (retval < 0) {
- sock_release(newsock);
- goto out;
+ /* Return the fd mapped to the new socket. */
+ if (put_user(len, optlen)) {
+ fput(newfile);
+ put_unused_fd(retval);
+ return -EFAULT;
}
- newfile = sock_alloc_file(newsock, 0, NULL);
- if (IS_ERR(newfile)) {
+ if (copy_to_user(optval, &peeloff, len)) {
+ fput(newfile);
put_unused_fd(retval);
- sock_release(newsock);
- return PTR_ERR(newfile);
+ return -EFAULT;
}
+ fd_install(retval, newfile);
+out:
+ return retval;
+}
- pr_debug("%s: sk:%p, newsk:%p, sd:%d\n", __func__, sk, newsock->sk,
- retval);
+static int sctp_getsockopt_peeloff_flags(struct sock *sk, int len,
+ char __user *optval, int __user *optlen)
+{
+ sctp_peeloff_flags_arg_t peeloff;
+ struct file *newfile = NULL;
+ int retval = 0;
+
+ if (len < sizeof(sctp_peeloff_flags_arg_t))
+ return -EINVAL;
+ len = sizeof(sctp_peeloff_flags_arg_t);
+ if (copy_from_user(&peeloff, optval, len))
+ return -EFAULT;
+
+ retval = sctp_getsockopt_peeloff_common(sk, &peeloff.p_arg,
+ &newfile, peeloff.flags);
+ if (retval < 0)
+ goto out;
/* Return the fd mapped to the new socket. */
if (put_user(len, optlen)) {
@@ -4973,7 +5027,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval
put_unused_fd(retval);
return -EFAULT;
}
- peeloff.sd = retval;
+
if (copy_to_user(optval, &peeloff, len)) {
fput(newfile);
put_unused_fd(retval);
@@ -6033,7 +6087,8 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
return -EACCES;
hmacs = ep->auth_hmacs_list;
- data_len = ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t);
+ data_len = ntohs(hmacs->param_hdr.length) -
+ sizeof(struct sctp_paramhdr);
if (len < sizeof(struct sctp_hmacalgo) + data_len)
return -EINVAL;
@@ -6117,7 +6172,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
goto num;
/* See if the user provided enough room for all the data */
- num_chunks = ntohs(ch->param_hdr.length) - sizeof(sctp_paramhdr_t);
+ num_chunks = ntohs(ch->param_hdr.length) - sizeof(struct sctp_paramhdr);
if (len < num_chunks)
return -EINVAL;
@@ -6165,7 +6220,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
if (!ch)
goto num;
- num_chunks = ntohs(ch->param_hdr.length) - sizeof(sctp_paramhdr_t);
+ num_chunks = ntohs(ch->param_hdr.length) - sizeof(struct sctp_paramhdr);
if (len < sizeof(struct sctp_authchunks) + num_chunks)
return -EINVAL;
@@ -6758,6 +6813,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
case SCTP_SOCKOPT_PEELOFF:
retval = sctp_getsockopt_peeloff(sk, len, optval, optlen);
break;
+ case SCTP_SOCKOPT_PEELOFF_FLAGS:
+ retval = sctp_getsockopt_peeloff_flags(sk, len, optval, optlen);
+ break;
case SCTP_PEER_ADDR_PARAMS:
retval = sctp_getsockopt_peer_addr_params(sk, len, optval,
optlen);
@@ -7563,7 +7621,7 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
if (flags & MSG_PEEK) {
skb = skb_peek(&sk->sk_receive_queue);
if (skb)
- atomic_inc(&skb->users);
+ refcount_inc(&skb->users);
} else {
skb = __skb_dequeue(&sk->sk_receive_queue);
}
@@ -7684,7 +7742,7 @@ static void sctp_wfree(struct sk_buff *skb)
sizeof(struct sk_buff) +
sizeof(struct sctp_chunk);
- atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
+ WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc));
/*
* This undoes what is done via sctp_set_owner_w and sk_mem_charge
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 82e6d40052a8..63ea15503714 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -304,7 +304,7 @@ out:
return retval;
}
-static sctp_paramhdr_t *sctp_chunk_lookup_strreset_param(
+static struct sctp_paramhdr *sctp_chunk_lookup_strreset_param(
struct sctp_association *asoc, __u32 resp_seq,
__be16 type)
{
@@ -749,7 +749,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
struct sctp_strreset_resp *resp = param.v;
struct sctp_transport *t;
__u16 i, nums, flags = 0;
- sctp_paramhdr_t *req;
+ struct sctp_paramhdr *req;
__u32 result;
req = sctp_chunk_lookup_strreset_param(asoc, resp->response_seq, 0);
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 17854fb0e512..5f86c5062a98 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -158,7 +158,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_change(
/* Trim the buffer to the right length. */
skb_trim(skb, sizeof(struct sctp_assoc_change) +
ntohs(chunk->chunk_hdr->length) -
- sizeof(sctp_chunkhdr_t));
+ sizeof(struct sctp_chunkhdr));
} else {
event = sctp_ulpevent_new(sizeof(struct sctp_assoc_change),
MSG_NOTIFICATION, gfp);
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 25f7e4140566..0225d62a869f 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1090,7 +1090,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
if (chunk) {
needed = ntohs(chunk->chunk_hdr->length);
- needed -= sizeof(sctp_data_chunk_t);
+ needed -= sizeof(struct sctp_data_chunk);
} else
needed = SCTP_DEFAULT_MAXWINDOW;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 1b92b72e812f..101e3597338f 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2313,7 +2313,7 @@ static void tipc_sk_remove(struct tipc_sock *tsk)
struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) {
- WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
+ WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
__sock_put(sk);
}
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 1a0c961f4ffe..b9ee766054f6 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -212,7 +212,7 @@ EXPORT_SYMBOL_GPL(unix_peer_get);
static inline void unix_release_addr(struct unix_address *addr)
{
- if (atomic_dec_and_test(&addr->refcnt))
+ if (refcount_dec_and_test(&addr->refcnt))
kfree(addr);
}
@@ -442,7 +442,7 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
static int unix_writable(const struct sock *sk)
{
return sk->sk_state != TCP_LISTEN &&
- (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
+ (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
}
static void unix_write_space(struct sock *sk)
@@ -487,7 +487,7 @@ static void unix_sock_destructor(struct sock *sk)
skb_queue_purge(&sk->sk_receive_queue);
- WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+ WARN_ON(refcount_read(&sk->sk_wmem_alloc));
WARN_ON(!sk_unhashed(sk));
WARN_ON(sk->sk_socket);
if (!sock_flag(sk, SOCK_DEAD)) {
@@ -864,7 +864,7 @@ static int unix_autobind(struct socket *sock)
goto out;
addr->name->sun_family = AF_UNIX;
- atomic_set(&addr->refcnt, 1);
+ refcount_set(&addr->refcnt, 1);
retry:
addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
@@ -1040,7 +1040,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
memcpy(addr->name, sunaddr, addr_len);
addr->len = addr_len;
addr->hash = hash ^ sk->sk_type;
- atomic_set(&addr->refcnt, 1);
+ refcount_set(&addr->refcnt, 1);
if (sun_path[0]) {
addr->hash = UNIX_HASH_SIZE;
@@ -1335,7 +1335,7 @@ restart:
/* copy address information from listening to new sock*/
if (otheru->addr) {
- atomic_inc(&otheru->addr->refcnt);
+ refcount_inc(&otheru->addr->refcnt);
newu->addr = otheru->addr;
}
if (otheru->path.dentry) {
@@ -2033,7 +2033,7 @@ alloc_skb:
skb->len += size;
skb->data_len += size;
skb->truesize += size;
- atomic_add(size, &sk->sk_wmem_alloc);
+ refcount_add(size, &sk->sk_wmem_alloc);
if (newskb) {
err = unix_scm_to_skb(&scm, skb, false);
@@ -2847,7 +2847,7 @@ static int unix_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
s,
- atomic_read(&s->sk_refcnt),
+ refcount_read(&s->sk_refcnt),
0,
s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
s->sk_type,
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index abf81b329dc1..55b2ac300995 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -4,8 +4,7 @@
obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
xfrm_input.o xfrm_output.o \
- xfrm_sysctl.o xfrm_replay.o
-obj-$(CONFIG_XFRM_OFFLOAD) += xfrm_device.o
+ xfrm_sysctl.o xfrm_replay.o xfrm_device.o
obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
obj-$(CONFIG_XFRM_USER) += xfrm_user.o
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 6d4a60d1bf19..5f7e8bfa0c2d 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -22,6 +22,7 @@
#include <net/xfrm.h>
#include <linux/notifier.h>
+#ifdef CONFIG_XFRM_OFFLOAD
int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
{
int err;
@@ -137,6 +138,7 @@ ok:
return true;
}
EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok);
+#endif
static int xfrm_dev_register(struct net_device *dev)
{
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index a3dc7ab0b7ed..4706df612170 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1006,10 +1006,6 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
err = -ESRCH;
out:
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
-
- if (cnt)
- xfrm_garbage_collect(net);
-
return err;
}
EXPORT_SYMBOL(xfrm_policy_flush);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 6197c7231bc7..2be4c6af008a 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2027,6 +2027,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
return err;
}
+ xfrm_garbage_collect(net);
c.data.type = type;
c.event = nlh->nlmsg_type;
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index e7ec9b8539a5..9c650589e80f 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -36,6 +36,7 @@ hostprogs-y += lwt_len_hist
hostprogs-y += xdp_tx_iptunnel
hostprogs-y += test_map_in_map
hostprogs-y += per_socket_stats_example
+hostprogs-y += load_sock_ops
# Libbpf dependencies
LIBBPF := ../../tools/lib/bpf/bpf.o
@@ -52,6 +53,7 @@ tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o
tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o
tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o
tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o
+load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o
test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o
trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o
lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o
@@ -111,6 +113,12 @@ always += lwt_len_hist_kern.o
always += xdp_tx_iptunnel_kern.o
always += test_map_in_map_kern.o
always += cookie_uid_helper_example.o
+always += tcp_synrto_kern.o
+always += tcp_rwnd_kern.o
+always += tcp_bufs_kern.o
+always += tcp_cong_kern.o
+always += tcp_iw_kern.o
+always += tcp_clamp_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -130,6 +138,7 @@ HOSTLOADLIBES_tracex4 += -lelf -lrt
HOSTLOADLIBES_tracex5 += -lelf
HOSTLOADLIBES_tracex6 += -lelf
HOSTLOADLIBES_test_cgrp2_sock2 += -lelf
+HOSTLOADLIBES_load_sock_ops += -lelf
HOSTLOADLIBES_test_probe_write_user += -lelf
HOSTLOADLIBES_trace_output += -lelf -lrt
HOSTLOADLIBES_lathist += -lelf
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index f4840b8bb8f9..d50ac342dc92 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -60,6 +60,9 @@ static unsigned long long (*bpf_get_prandom_u32)(void) =
(void *) BPF_FUNC_get_prandom_u32;
static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
(void *) BPF_FUNC_xdp_adjust_head;
+static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
+ int optlen) =
+ (void *) BPF_FUNC_setsockopt;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index a91c57dd8571..a4be7cfa6519 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -64,6 +64,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
+ bool is_sockops = strncmp(event, "sockops", 7) == 0;
size_t insns_cnt = size / sizeof(struct bpf_insn);
enum bpf_prog_type prog_type;
char buf[256];
@@ -89,6 +90,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
prog_type = BPF_PROG_TYPE_CGROUP_SKB;
} else if (is_cgroup_sk) {
prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
+ } else if (is_sockops) {
+ prog_type = BPF_PROG_TYPE_SOCK_OPS;
} else {
printf("Unknown event '%s'\n", event);
return -1;
@@ -106,8 +109,11 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
return 0;
- if (is_socket) {
- event += 6;
+ if (is_socket || is_sockops) {
+ if (is_socket)
+ event += 6;
+ else
+ event += 7;
if (*event != '/')
return 0;
event++;
@@ -560,7 +566,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
memcmp(shname, "xdp", 3) == 0 ||
memcmp(shname, "perf_event", 10) == 0 ||
memcmp(shname, "socket", 6) == 0 ||
- memcmp(shname, "cgroup/", 7) == 0)
+ memcmp(shname, "cgroup/", 7) == 0 ||
+ memcmp(shname, "sockops", 7) == 0)
load_and_attach(shname, data->d_buf, data->d_size);
}
diff --git a/samples/bpf/load_sock_ops.c b/samples/bpf/load_sock_ops.c
new file mode 100644
index 000000000000..e5da6cf71a3e
--- /dev/null
+++ b/samples/bpf/load_sock_ops.c
@@ -0,0 +1,97 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/unistd.h>
+
+static void usage(char *pname)
+{
+ printf("USAGE:\n %s [-l] <cg-path> <prog filename>\n", pname);
+ printf("\tLoad and attach a sock_ops program to the specified "
+ "cgroup\n");
+ printf("\tIf \"-l\" is used, the program will continue to run\n");
+ printf("\tprinting the BPF log buffer\n");
+ printf("\tIf the specified filename does not end in \".o\", it\n");
+ printf("\tappends \"_kern.o\" to the name\n");
+ printf("\n");
+ printf(" %s -r <cg-path>\n", pname);
+ printf("\tDetaches the currently attached sock_ops program\n");
+ printf("\tfrom the specified cgroup\n");
+ printf("\n");
+ exit(1);
+}
+
+int main(int argc, char **argv)
+{
+ int logFlag = 0;
+ int error = 0;
+ char *cg_path;
+ char fn[500];
+ char *prog;
+ int cg_fd;
+
+ if (argc < 3)
+ usage(argv[0]);
+
+ if (!strcmp(argv[1], "-r")) {
+ cg_path = argv[2];
+ cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY);
+ error = bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
+ if (error) {
+ printf("ERROR: bpf_prog_detach: %d (%s)\n",
+ error, strerror(errno));
+ return 2;
+ }
+ return 0;
+ } else if (!strcmp(argv[1], "-h")) {
+ usage(argv[0]);
+ } else if (!strcmp(argv[1], "-l")) {
+ logFlag = 1;
+ if (argc < 4)
+ usage(argv[0]);
+ }
+
+ prog = argv[argc - 1];
+ cg_path = argv[argc - 2];
+ if (strlen(prog) > 480) {
+ fprintf(stderr, "ERROR: program name too long (> 480 chars)\n");
+ return 3;
+ }
+ cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY);
+
+ if (!strcmp(prog + strlen(prog)-2, ".o"))
+ strcpy(fn, prog);
+ else
+ sprintf(fn, "%s_kern.o", prog);
+ if (logFlag)
+ printf("loading bpf file:%s\n", fn);
+ if (load_bpf_file(fn)) {
+ printf("ERROR: load_bpf_file failed for: %s\n", fn);
+ printf("%s", bpf_log_buf);
+ return 4;
+ }
+ if (logFlag)
+ printf("TCP BPF Loaded %s\n", fn);
+
+ error = bpf_prog_attach(prog_fd[0], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+ if (error) {
+ printf("ERROR: bpf_prog_attach: %d (%s)\n",
+ error, strerror(errno));
+ return 5;
+ } else if (logFlag) {
+ read_trace_pipe();
+ }
+
+ return error;
+}
diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c
index b5524d417eb5..877ecf8fc5ac 100644
--- a/samples/bpf/sockex3_user.c
+++ b/samples/bpf/sockex3_user.c
@@ -8,6 +8,10 @@
#include <arpa/inet.h>
#include <sys/resource.h>
+#define PARSE_IP 3
+#define PARSE_IP_PROG_FD (prog_fd[0])
+#define PROG_ARRAY_FD (map_fd[0])
+
struct bpf_flow_keys {
__be32 src;
__be32 dst;
@@ -28,7 +32,9 @@ int main(int argc, char **argv)
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
char filename[256];
FILE *f;
- int i, sock;
+ int i, sock, err, id, key = PARSE_IP;
+ struct bpf_prog_info info = {};
+ uint32_t info_len = sizeof(info);
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
setrlimit(RLIMIT_MEMLOCK, &r);
@@ -38,6 +44,13 @@ int main(int argc, char **argv)
return 1;
}
+ /* Test fd array lookup which returns the id of the bpf_prog */
+ err = bpf_obj_get_info_by_fd(PARSE_IP_PROG_FD, &info, &info_len);
+ assert(!err);
+ err = bpf_map_lookup_elem(PROG_ARRAY_FD, &key, &id);
+ assert(!err);
+ assert(id == info.id);
+
sock = open_raw_sock("lo");
assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[4],
diff --git a/samples/bpf/tcp_bufs_kern.c b/samples/bpf/tcp_bufs_kern.c
new file mode 100644
index 000000000000..ee83bbabd17c
--- /dev/null
+++ b/samples/bpf/tcp_bufs_kern.c
@@ -0,0 +1,86 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set initial receive window to 40 packets and send
+ * and receive buffers to 1.5MB. This would usually be done after
+ * doing appropriate checks that indicate the hosts are far enough
+ * away (i.e. large RTT).
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+SEC("sockops")
+int bpf_bufs(struct bpf_sock_ops *skops)
+{
+ int bufsize = 1500000;
+ int rwnd_init = 40;
+ int rv = 0;
+ int op;
+
+ /* For testing purposes, only execute rest of BPF program
+ * if neither port numberis 55601
+ */
+ if (bpf_ntohl(skops->remote_port) != 55601 &&
+ skops->local_port != 55601)
+ return -1;
+
+ op = (int) skops->op;
+
+#ifdef DEBUG
+ bpf_printk("Returning %d\n", rv);
+#endif
+
+ /* Usually there would be a check to insure the hosts are far
+ * from each other so it makes sense to increase buffer sizes
+ */
+ switch (op) {
+ case BPF_SOCK_OPS_RWND_INIT:
+ rv = rwnd_init;
+ break;
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ /* Set sndbuf and rcvbuf of active connections */
+ rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
+ sizeof(bufsize));
+ rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+ &bufsize, sizeof(bufsize));
+ break;
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ /* Nothing to do */
+ break;
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ /* Set sndbuf and rcvbuf of passive connections */
+ rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
+ sizeof(bufsize));
+ rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+ &bufsize, sizeof(bufsize));
+ break;
+ default:
+ rv = -1;
+ }
+#ifdef DEBUG
+ bpf_printk("Returning %d\n", rv);
+#endif
+ skops->reply = rv;
+ return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tcp_clamp_kern.c b/samples/bpf/tcp_clamp_kern.c
new file mode 100644
index 000000000000..d68eadd9ca2d
--- /dev/null
+++ b/samples/bpf/tcp_clamp_kern.c
@@ -0,0 +1,102 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Sample BPF program to set send and receive buffers to 150KB, sndcwnd clamp
+ * to 100 packets and SYN and SYN_ACK RTOs to 10ms when both hosts are within
+ * the same datacenter. For his example, we assume they are within the same
+ * datacenter when the first 5.5 bytes of their IPv6 addresses are the same.
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+SEC("sockops")
+int bpf_clamp(struct bpf_sock_ops *skops)
+{
+ int bufsize = 150000;
+ int to_init = 10;
+ int clamp = 100;
+ int rv = 0;
+ int op;
+
+ /* For testing purposes, only execute rest of BPF program
+ * if neither port numberis 55601
+ */
+ if (bpf_ntohl(skops->remote_port) != 55601 && skops->local_port != 55601)
+ return -1;
+
+ op = (int) skops->op;
+
+#ifdef DEBUG
+ bpf_printk("BPF command: %d\n", op);
+#endif
+
+ /* Check that both hosts are within same datacenter. For this example
+ * it is the case when the first 5.5 bytes of their IPv6 addresses are
+ * the same.
+ */
+ if (skops->family == AF_INET6 &&
+ skops->local_ip6[0] == skops->remote_ip6[0] &&
+ (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) ==
+ (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) {
+ switch (op) {
+ case BPF_SOCK_OPS_TIMEOUT_INIT:
+ rv = to_init;
+ break;
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ /* Set sndbuf and rcvbuf of active connections */
+ rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF,
+ &bufsize, sizeof(bufsize));
+ rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET,
+ SO_RCVBUF, &bufsize,
+ sizeof(bufsize));
+ break;
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ rv = bpf_setsockopt(skops, SOL_TCP,
+ TCP_BPF_SNDCWND_CLAMP,
+ &clamp, sizeof(clamp));
+ break;
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ /* Set sndbuf and rcvbuf of passive connections */
+ rv = bpf_setsockopt(skops, SOL_TCP,
+ TCP_BPF_SNDCWND_CLAMP,
+ &clamp, sizeof(clamp));
+ rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET,
+ SO_SNDBUF, &bufsize,
+ sizeof(bufsize));
+ rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET,
+ SO_RCVBUF, &bufsize,
+ sizeof(bufsize));
+ break;
+ default:
+ rv = -1;
+ }
+ } else {
+ rv = -1;
+ }
+#ifdef DEBUG
+ bpf_printk("Returning %d\n", rv);
+#endif
+ skops->reply = rv;
+ return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tcp_cong_kern.c b/samples/bpf/tcp_cong_kern.c
new file mode 100644
index 000000000000..dac15bce1fa9
--- /dev/null
+++ b/samples/bpf/tcp_cong_kern.c
@@ -0,0 +1,83 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set congestion control to dctcp when both hosts are
+ * in the same datacenter (as deteremined by IPv6 prefix).
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/tcp.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+SEC("sockops")
+int bpf_cong(struct bpf_sock_ops *skops)
+{
+ char cong[] = "dctcp";
+ int rv = 0;
+ int op;
+
+ /* For testing purposes, only execute rest of BPF program
+ * if neither port numberis 55601
+ */
+ if (bpf_ntohl(skops->remote_port) != 55601 &&
+ skops->local_port != 55601)
+ return -1;
+
+ op = (int) skops->op;
+
+#ifdef DEBUG
+ bpf_printk("BPF command: %d\n", op);
+#endif
+
+ /* Check if both hosts are in the same datacenter. For this
+ * example they are if the 1st 5.5 bytes in the IPv6 address
+ * are the same.
+ */
+ if (skops->family == AF_INET6 &&
+ skops->local_ip6[0] == skops->remote_ip6[0] &&
+ (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) ==
+ (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) {
+ switch (op) {
+ case BPF_SOCK_OPS_NEEDS_ECN:
+ rv = 1;
+ break;
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ rv = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION,
+ cong, sizeof(cong));
+ break;
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ rv = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION,
+ cong, sizeof(cong));
+ break;
+ default:
+ rv = -1;
+ }
+ } else {
+ rv = -1;
+ }
+#ifdef DEBUG
+ bpf_printk("Returning %d\n", rv);
+#endif
+ skops->reply = rv;
+ return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tcp_iw_kern.c b/samples/bpf/tcp_iw_kern.c
new file mode 100644
index 000000000000..23c5122ef819
--- /dev/null
+++ b/samples/bpf/tcp_iw_kern.c
@@ -0,0 +1,88 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set initial congestion window and initial receive
+ * window to 40 packets and send and receive buffers to 1.5MB. This
+ * would usually be done after doing appropriate checks that indicate
+ * the hosts are far enough away (i.e. large RTT).
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+SEC("sockops")
+int bpf_iw(struct bpf_sock_ops *skops)
+{
+ int bufsize = 1500000;
+ int rwnd_init = 40;
+ int iw = 40;
+ int rv = 0;
+ int op;
+
+ /* For testing purposes, only execute rest of BPF program
+ * if neither port numberis 55601
+ */
+ if (bpf_ntohl(skops->remote_port) != 55601 &&
+ skops->local_port != 55601)
+ return -1;
+
+ op = (int) skops->op;
+
+#ifdef DEBUG
+ bpf_printk("BPF command: %d\n", op);
+#endif
+
+ /* Usually there would be a check to insure the hosts are far
+ * from each other so it makes sense to increase buffer sizes
+ */
+ switch (op) {
+ case BPF_SOCK_OPS_RWND_INIT:
+ rv = rwnd_init;
+ break;
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ /* Set sndbuf and rcvbuf of active connections */
+ rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
+ sizeof(bufsize));
+ rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+ &bufsize, sizeof(bufsize));
+ break;
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ rv = bpf_setsockopt(skops, SOL_TCP, TCP_BPF_IW, &iw,
+ sizeof(iw));
+ break;
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ /* Set sndbuf and rcvbuf of passive connections */
+ rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
+ sizeof(bufsize));
+ rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+ &bufsize, sizeof(bufsize));
+ break;
+ default:
+ rv = -1;
+ }
+#ifdef DEBUG
+ bpf_printk("Returning %d\n", rv);
+#endif
+ skops->reply = rv;
+ return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tcp_rwnd_kern.c b/samples/bpf/tcp_rwnd_kern.c
new file mode 100644
index 000000000000..3f2a228f81ce
--- /dev/null
+++ b/samples/bpf/tcp_rwnd_kern.c
@@ -0,0 +1,69 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set initial receive window to 40 packets when using IPv6
+ * and the first 5.5 bytes of the IPv6 addresses are not the same (in this
+ * example that means both hosts are not the same datacenter).
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+SEC("sockops")
+int bpf_rwnd(struct bpf_sock_ops *skops)
+{
+ int rv = -1;
+ int op;
+
+ /* For testing purposes, only execute rest of BPF program
+ * if neither port numberis 55601
+ */
+ if (bpf_ntohl(skops->remote_port) !=
+ 55601 && skops->local_port != 55601)
+ return -1;
+
+ op = (int) skops->op;
+
+#ifdef DEBUG
+ bpf_printk("BPF command: %d\n", op);
+#endif
+
+ /* Check for RWND_INIT operation and IPv6 addresses */
+ if (op == BPF_SOCK_OPS_RWND_INIT &&
+ skops->family == AF_INET6) {
+
+ /* If the first 5.5 bytes of the IPv6 address are not the same
+ * then both hosts are not in the same datacenter
+ * so use a larger initial advertized window (40 packets)
+ */
+ if (skops->local_ip6[0] != skops->remote_ip6[0] ||
+ (bpf_ntohl(skops->local_ip6[1]) & 0xfffff000) !=
+ (bpf_ntohl(skops->remote_ip6[1]) & 0xfffff000))
+ rv = 40;
+ }
+#ifdef DEBUG
+ bpf_printk("Returning %d\n", rv);
+#endif
+ skops->reply = rv;
+ return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tcp_synrto_kern.c b/samples/bpf/tcp_synrto_kern.c
new file mode 100644
index 000000000000..3c3fc83d81cb
--- /dev/null
+++ b/samples/bpf/tcp_synrto_kern.c
@@ -0,0 +1,69 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set SYN and SYN-ACK RTOs to 10ms when using IPv6 addresses
+ * and the first 5.5 bytes of the IPv6 addresses are the same (in this example
+ * that means both hosts are in the same datacenter).
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+SEC("sockops")
+int bpf_synrto(struct bpf_sock_ops *skops)
+{
+ int rv = -1;
+ int op;
+
+ /* For testing purposes, only execute rest of BPF program
+ * if neither port numberis 55601
+ */
+ if (bpf_ntohl(skops->remote_port) != 55601 &&
+ skops->local_port != 55601)
+ return -1;
+
+ op = (int) skops->op;
+
+#ifdef DEBUG
+ bpf_printk("BPF command: %d\n", op);
+#endif
+
+ /* Check for TIMEOUT_INIT operation and IPv6 addresses */
+ if (op == BPF_SOCK_OPS_TIMEOUT_INIT &&
+ skops->family == AF_INET6) {
+
+ /* If the first 5.5 bytes of the IPv6 address are the same
+ * then both hosts are in the same datacenter
+ * so use an RTO of 10ms
+ */
+ if (skops->local_ip6[0] == skops->remote_ip6[0] &&
+ (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) ==
+ (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000))
+ rv = 10;
+ }
+#ifdef DEBUG
+ bpf_printk("Returning %d\n", rv);
+#endif
+ skops->reply = rv;
+ return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c
index f62fdc2bd428..1aca18539d8d 100644
--- a/samples/bpf/test_map_in_map_user.c
+++ b/samples/bpf/test_map_in_map_user.c
@@ -32,6 +32,20 @@ static const char * const test_names[] = {
#define NR_TESTS (sizeof(test_names) / sizeof(*test_names))
+static void check_map_id(int inner_map_fd, int map_in_map_fd, uint32_t key)
+{
+ struct bpf_map_info info = {};
+ uint32_t info_len = sizeof(info);
+ int ret, id;
+
+ ret = bpf_obj_get_info_by_fd(inner_map_fd, &info, &info_len);
+ assert(!ret);
+
+ ret = bpf_map_lookup_elem(map_in_map_fd, &key, &id);
+ assert(!ret);
+ assert(id == info.id);
+}
+
static void populate_map(uint32_t port_key, int magic_result)
{
int ret;
@@ -45,12 +59,15 @@ static void populate_map(uint32_t port_key, int magic_result)
ret = bpf_map_update_elem(A_OF_PORT_A, &port_key, &PORT_A, BPF_ANY);
assert(!ret);
+ check_map_id(PORT_A, A_OF_PORT_A, port_key);
ret = bpf_map_update_elem(H_OF_PORT_A, &port_key, &PORT_A, BPF_NOEXIST);
assert(!ret);
+ check_map_id(PORT_A, H_OF_PORT_A, port_key);
ret = bpf_map_update_elem(H_OF_PORT_H, &port_key, &PORT_H, BPF_NOEXIST);
assert(!ret);
+ check_map_id(PORT_H, H_OF_PORT_H, port_key);
}
static void test_map_in_map(void)
diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index ce753a408c56..c583a1e1bd3c 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst
@@ -14,7 +14,15 @@ __headers:
include scripts/Kbuild.include
srcdir := $(srctree)/$(obj)
-subdirs := $(patsubst $(srcdir)/%/.,%,$(wildcard $(srcdir)/*/.))
+
+# When make is run under a fakechroot environment, the function
+# $(wildcard $(srcdir)/*/.) doesn't only return directories, but also regular
+# files. So, we are using a combination of sort/dir/wildcard which works
+# with fakechroot.
+subdirs := $(patsubst $(srcdir)/%/,%,\
+ $(filter-out $(srcdir)/,\
+ $(sort $(dir $(wildcard $(srcdir)/*/)))))
+
# caller may set destination dir (when installing to asm/)
_dst := $(if $(dst),$(dst),$(obj))
diff --git a/scripts/genksyms/genksyms.h b/scripts/genksyms/genksyms.h
index 3bffdcaaa274..b724a0290c75 100644
--- a/scripts/genksyms/genksyms.h
+++ b/scripts/genksyms/genksyms.h
@@ -75,7 +75,7 @@ struct string_list *copy_list_range(struct string_list *start,
int yylex(void);
int yyparse(void);
-void error_with_pos(const char *, ...);
+void error_with_pos(const char *, ...) __attribute__ ((format(printf, 1, 2)));
/*----------------------------------------------------------------------*/
#define xmalloc(size) ({ void *__ptr = malloc(size); \
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index 90a091b6ae4d..eb8144643b78 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -196,7 +196,7 @@ clean-files += config.pot linux.pot
# Check that we have the required ncurses stuff installed for lxdialog (menuconfig)
PHONY += $(obj)/dochecklxdialog
-$(addprefix $(obj)/,$(lxdialog)): $(obj)/dochecklxdialog
+$(addprefix $(obj)/, mconf.o $(lxdialog)): $(obj)/dochecklxdialog
$(obj)/dochecklxdialog:
$(Q)$(CONFIG_SHELL) $(check-lxdialog) -check $(HOSTCC) $(HOST_EXTRACFLAGS) $(HOSTLOADLIBES_mconf)
diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c
index a9bc5334a478..003114779815 100644
--- a/scripts/kconfig/nconf.c
+++ b/scripts/kconfig/nconf.c
@@ -271,7 +271,7 @@ static struct mitem k_menu_items[MAX_MENU_ITEMS];
static int items_num;
static int global_exit;
/* the currently selected button */
-const char *current_instructions = menu_instructions;
+static const char *current_instructions = menu_instructions;
static char *dialog_input_result;
static int dialog_input_result_len;
@@ -305,7 +305,7 @@ struct function_keys {
};
static const int function_keys_num = 9;
-struct function_keys function_keys[] = {
+static struct function_keys function_keys[] = {
{
.key_str = "F1",
.func = "Help",
@@ -508,7 +508,7 @@ static int get_mext_match(const char *match_str, match_f flag)
index = (index + items_num) % items_num;
while (true) {
char *str = k_menu_items[index].str;
- if (strcasestr(str, match_str) != 0)
+ if (strcasestr(str, match_str) != NULL)
return index;
if (flag == FIND_NEXT_MATCH_UP ||
flag == MATCH_TINKER_PATTERN_UP)
@@ -1067,7 +1067,7 @@ static int do_match(int key, struct match_state *state, int *ans)
static void conf(struct menu *menu)
{
- struct menu *submenu = 0;
+ struct menu *submenu = NULL;
const char *prompt = menu_get_prompt(menu);
struct symbol *sym;
int res;
@@ -1234,7 +1234,7 @@ static void show_help(struct menu *menu)
static void conf_choice(struct menu *menu)
{
const char *prompt = _(menu_get_prompt(menu));
- struct menu *child = 0;
+ struct menu *child = NULL;
struct symbol *active;
int selected_index = 0;
int last_top_row = 0;
@@ -1456,7 +1456,7 @@ static void conf_save(void)
}
}
-void setup_windows(void)
+static void setup_windows(void)
{
int lines, columns;
diff --git a/scripts/kconfig/nconf.gui.c b/scripts/kconfig/nconf.gui.c
index 4b2f44c20caf..a64b1c31253e 100644
--- a/scripts/kconfig/nconf.gui.c
+++ b/scripts/kconfig/nconf.gui.c
@@ -129,7 +129,7 @@ static void no_colors_theme(void)
mkattrn(FUNCTION_TEXT, A_REVERSE);
}
-void set_colors()
+void set_colors(void)
{
start_color();
use_default_colors();
@@ -192,7 +192,7 @@ const char *get_line(const char *text, int line_no)
int lines = 0;
if (!text)
- return 0;
+ return NULL;
for (i = 0; text[i] != '\0' && lines < line_no; i++)
if (text[i] == '\n')
diff --git a/scripts/tags.sh b/scripts/tags.sh
index d661f2f3ef61..d23dcbf17457 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -106,6 +106,7 @@ all_compiled_sources()
case "$i" in
*.[cS])
j=${i/\.[cS]/\.o}
+ j="${j#$tree}"
if [ -e $j ]; then
echo $i
fi
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index 5088d4b8db22..009e6c98754e 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -2492,7 +2492,7 @@ static int pcm_chmap_ctl_get(struct snd_kcontrol *kcontrol,
struct snd_pcm_substream *substream;
const struct snd_pcm_chmap_elem *map;
- if (snd_BUG_ON(!info->chmap))
+ if (!info->chmap)
return -EINVAL;
substream = snd_pcm_chmap_substream(info, idx);
if (!substream)
@@ -2524,7 +2524,7 @@ static int pcm_chmap_ctl_tlv(struct snd_kcontrol *kcontrol, int op_flag,
unsigned int __user *dst;
int c, count = 0;
- if (snd_BUG_ON(!info->chmap))
+ if (!info->chmap)
return -EINVAL;
if (size < 8)
return -ENOMEM;
diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c
index 9e6f54f8c45d..1e26854b3425 100644
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c
@@ -682,7 +682,9 @@ static void out_stream_callback(struct fw_iso_context *context, u32 tstamp,
cycle = increment_cycle_count(cycle, 1);
if (s->handle_packet(s, 0, cycle, i) < 0) {
s->packet_index = -1;
- amdtp_stream_pcm_abort(s);
+ if (in_interrupt())
+ amdtp_stream_pcm_abort(s);
+ WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN);
return;
}
}
@@ -734,7 +736,9 @@ static void in_stream_callback(struct fw_iso_context *context, u32 tstamp,
/* Queueing error or detecting invalid payload. */
if (i < packets) {
s->packet_index = -1;
- amdtp_stream_pcm_abort(s);
+ if (in_interrupt())
+ amdtp_stream_pcm_abort(s);
+ WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN);
return;
}
diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h
index 7e8831722821..ea1a91e99875 100644
--- a/sound/firewire/amdtp-stream.h
+++ b/sound/firewire/amdtp-stream.h
@@ -135,7 +135,7 @@ struct amdtp_stream {
/* For a PCM substream processing. */
struct snd_pcm_substream *pcm;
struct tasklet_struct period_tasklet;
- unsigned int pcm_buffer_pointer;
+ snd_pcm_uframes_t pcm_buffer_pointer;
unsigned int pcm_period_pointer;
/* To wait for first packet. */
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 1770f085c2a6..01eb1dc7b5b3 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -370,10 +370,12 @@ enum {
#define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71)
#define IS_KBL_H(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa2f0)
#define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
+#define IS_BXT_T(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x1a98)
#define IS_GLK(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x3198)
-#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \
- IS_KBL(pci) || IS_KBL_LP(pci) || IS_KBL_H(pci) || \
- IS_GLK(pci)
+#define IS_CFL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa348)
+#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci) || \
+ IS_BXT_T(pci) || IS_KBL(pci) || IS_KBL_LP(pci) || \
+ IS_KBL_H(pci) || IS_GLK(pci) || IS_CFL(pci))
static char *driver_short_names[] = {
[AZX_DRIVER_ICH] = "HDA Intel",
@@ -2378,6 +2380,9 @@ static const struct pci_device_id azx_ids[] = {
/* Kabylake-H */
{ PCI_DEVICE(0x8086, 0xa2f0),
.driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
+ /* Coffelake */
+ { PCI_DEVICE(0x8086, 0xa348),
+ .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE},
/* Broxton-P(Apollolake) */
{ PCI_DEVICE(0x8086, 0x5a98),
.driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON },
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f94b48b168dc..ce2988be4f0e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -120,12 +120,14 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LWT_IN,
BPF_PROG_TYPE_LWT_OUT,
BPF_PROG_TYPE_LWT_XMIT,
+ BPF_PROG_TYPE_SOCK_OPS,
};
enum bpf_attach_type {
BPF_CGROUP_INET_INGRESS,
BPF_CGROUP_INET_EGRESS,
BPF_CGROUP_INET_SOCK_CREATE,
+ BPF_CGROUP_SOCK_OPS,
__MAX_BPF_ATTACH_TYPE
};
@@ -518,6 +520,25 @@ union bpf_attr {
* Set full skb->hash.
* @skb: pointer to skb
* @hash: hash to set
+ *
+ * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen)
+ * Calls setsockopt. Not all opts are available, only those with
+ * integer optvals plus TCP_CONGESTION.
+ * Supported levels: SOL_SOCKET and IPROTO_TCP
+ * @bpf_socket: pointer to bpf_socket
+ * @level: SOL_SOCKET or IPROTO_TCP
+ * @optname: option name
+ * @optval: pointer to option value
+ * @optlen: length of optval in byes
+ * Return: 0 or negative error
+ *
+ * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
+ * Grow or shrink room in sk_buff.
+ * @skb: pointer to skb
+ * @len_diff: (signed) amount of room to grow/shrink
+ * @mode: operation mode (enum bpf_adj_room_mode)
+ * @flags: reserved for future use
+ * Return: 0 on success or negative error code
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -568,7 +589,9 @@ union bpf_attr {
FN(probe_read_str), \
FN(get_socket_cookie), \
FN(get_socket_uid), \
- FN(set_hash),
+ FN(set_hash), \
+ FN(setsockopt), \
+ FN(skb_adjust_room),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -618,6 +641,11 @@ enum bpf_func_id {
/* BPF_FUNC_perf_event_output for sk_buff input context. */
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
+/* Mode for BPF_FUNC_skb_adjust_room helper. */
+enum bpf_adj_room_mode {
+ BPF_ADJ_ROOM_NET_OPTS,
+};
+
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
*/
@@ -720,4 +748,54 @@ struct bpf_map_info {
__u32 map_flags;
} __attribute__((aligned(8)));
+/* User bpf_sock_ops struct to access socket values and specify request ops
+ * and their replies.
+ * New fields can only be added at the end of this structure
+ */
+struct bpf_sock_ops {
+ __u32 op;
+ union {
+ __u32 reply;
+ __u32 replylong[4];
+ };
+ __u32 family;
+ __u32 remote_ip4;
+ __u32 local_ip4;
+ __u32 remote_ip6[4];
+ __u32 local_ip6[4];
+ __u32 remote_port;
+ __u32 local_port;
+};
+
+/* List of known BPF sock_ops operators.
+ * New entries can only be added at the end
+ */
+enum {
+ BPF_SOCK_OPS_VOID,
+ BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or
+ * -1 if default value should be used
+ */
+ BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized
+ * window (in packets) or -1 if default
+ * value should be used
+ */
+ BPF_SOCK_OPS_TCP_CONNECT_CB, /* Calls BPF program right before an
+ * active connection is initialized
+ */
+ BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, /* Calls BPF program when an
+ * active connection is
+ * established
+ */
+ BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, /* Calls BPF program when a
+ * passive connection is
+ * established
+ */
+ BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control
+ * needs ECN
+ */
+};
+
+#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
+#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 84e7e698411e..a2670e9d652d 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -619,7 +619,7 @@ static int post_process_probe_trace_point(struct probe_trace_point *tp,
struct map *map, unsigned long offs)
{
struct symbol *sym;
- u64 addr = tp->address + tp->offset - offs;
+ u64 addr = tp->address - offs;
sym = map__find_symbol(map, addr);
if (!sym)
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index c0af0195432f..404aec520812 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2658,6 +2658,171 @@ static struct bpf_test tests[] = {
.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
},
{
+ "direct packet access: test18 (imm += pkt_ptr, 1)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_IMM(BPF_REG_0, 8),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test19 (imm += pkt_ptr, 2)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ BPF_MOV64_IMM(BPF_REG_4, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+ BPF_STX_MEM(BPF_B, BPF_REG_4, BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test20 (x += pkt_ptr, 1)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xffff - 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "direct packet access: test21 (x += pkt_ptr, 2)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9),
+ BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 0xffff),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xffff - 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "direct packet access: test22 (x += pkt_ptr, 3)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_3, -16),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -16),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 11),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8),
+ BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
+ BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 48),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_STX_MEM(BPF_H, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "direct packet access: test23 (x += pkt_ptr, 4)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_0, 31),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "cannot add integer value with 47 upper zero bits to ptr_to_packet",
+ },
+ {
+ "direct packet access: test24 (x += pkt_ptr, 5)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_0, 64),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
"helper access to packet: test1, valid packet_ptr range",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
@@ -3767,6 +3932,72 @@ static struct bpf_test tests[] = {
.errstr = "invalid bpf_context access",
},
{
+ "leak pointer into ctx 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_2,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 2 },
+ .errstr_unpriv = "R2 leaks addr into mem",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "leak pointer into ctx 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_10,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R10 leaks addr into mem",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "leak pointer into ctx 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 1 },
+ .errstr_unpriv = "R2 leaks addr into ctx",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "leak pointer into map val",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .errstr_unpriv = "R6 leaks addr into mem",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
"helper access to map: full range",
.insns = {
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),