aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/index.rst1
-rw-r--r--Documentation/bpf/prog_cgroup_sockopt.rst93
-rw-r--r--Documentation/devicetree/bindings/net/dsa/qca8k.txt6
-rw-r--r--Documentation/networking/af_xdp.rst16
-rw-r--r--Documentation/networking/device_drivers/aquantia/atlantic.txt439
-rw-r--r--Documentation/networking/device_drivers/google/gve.rst123
-rw-r--r--Documentation/networking/device_drivers/index.rst1
-rw-r--r--Documentation/networking/device_drivers/mellanox/mlx5.rst19
-rw-r--r--Documentation/networking/ip-sysctl.txt4
-rw-r--r--MAINTAINERS23
-rw-r--r--Makefile2
-rw-r--r--arch/arm/boot/dts/gemini-dlink-dir-685.dts2
-rw-r--r--arch/arm/boot/dts/gemini-dlink-dns-313.dts2
-rw-r--r--arch/arm/boot/dts/imx6ul.dtsi8
-rw-r--r--arch/arm/boot/dts/meson8.dtsi5
-rw-r--r--arch/arm/boot/dts/meson8b.dtsi11
-rw-r--r--arch/arm/mach-omap2/prm3xxx.c2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi18
-rw-r--r--arch/arm64/configs/defconfig1
-rw-r--r--arch/csky/kernel/signal.c5
-rw-r--r--arch/parisc/kernel/module.c4
-rw-r--r--arch/powerpc/include/asm/page.h7
-rw-r--r--arch/powerpc/kernel/head_32.S1
-rw-r--r--arch/powerpc/kernel/head_booke.h10
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S2
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c1
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S15
-rw-r--r--arch/powerpc/mm/book3s64/mmu_context.c55
-rw-r--r--arch/powerpc/mm/mem.c3
-rw-r--r--arch/powerpc/platforms/powermac/Kconfig1
-rw-r--r--drivers/auxdisplay/cfag12864bfb.c5
-rw-r--r--drivers/auxdisplay/ht16k33.c4
-rw-r--r--drivers/clk/clk.c2
-rw-r--r--drivers/clk/meson/g12a.c4
-rw-r--r--drivers/clk/meson/g12a.h2
-rw-r--r--drivers/clk/meson/meson8b.c10
-rw-r--r--drivers/clk/socfpga/clk-s10.c4
-rw-r--r--drivers/clk/tegra/clk-tegra210.c2
-rw-r--r--drivers/clk/ti/clkctrl.c7
-rw-r--r--drivers/hid/hid-ids.h3
-rw-r--r--drivers/hid/hid-logitech-dj.c4
-rw-r--r--drivers/hid/hid-multitouch.c4
-rw-r--r--drivers/hid/hid-quirks.c1
-rw-r--r--drivers/hid/hid-uclogic-core.c2
-rw-r--r--drivers/hid/hid-uclogic-params.c2
-rw-r--r--drivers/hid/intel-ish-hid/ishtp-fw-loader.c2
-rw-r--r--drivers/hid/intel-ish-hid/ishtp-hid-client.c4
-rw-r--r--drivers/hid/intel-ish-hid/ishtp/bus.c15
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c13
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c18
-rw-r--r--drivers/infiniband/hw/mlx5/flow.c13
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c24
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.h2
-rw-r--r--drivers/infiniband/hw/mlx5/main.c79
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h3
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c10
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c33
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c2
-rw-r--r--drivers/iommu/intel-iommu.c7
-rw-r--r--drivers/md/dm-init.c10
-rw-r--r--drivers/md/dm-log-writes.c23
-rw-r--r--drivers/md/dm-table.c2
-rw-r--r--drivers/md/dm-verity-target.c4
-rw-r--r--drivers/mfd/stmfx.c12
-rw-r--r--drivers/mtd/nand/raw/nand_base.c3
-rw-r--r--drivers/mtd/spi-nor/spi-nor.c119
-rw-r--r--drivers/net/bonding/bond_main.c35
-rw-r--r--drivers/net/bonding/bond_netlink.c14
-rw-r--r--drivers/net/bonding/bond_options.c71
-rw-r--r--drivers/net/bonding/bond_procfs.c2
-rw-r--r--drivers/net/bonding/bond_sysfs.c13
-rw-r--r--drivers/net/can/softing/softing_main.c4
-rw-r--r--drivers/net/dsa/microchip/Kconfig1
-rw-r--r--drivers/net/dsa/microchip/ksz9477.c163
-rw-r--r--drivers/net/dsa/microchip/ksz9477_spi.c114
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c12
-rw-r--r--drivers/net/dsa/microchip/ksz_common.h169
-rw-r--r--drivers/net/dsa/microchip/ksz_priv.h23
-rw-r--r--drivers/net/dsa/microchip/ksz_spi.h69
-rw-r--r--drivers/net/dsa/qca8k.c15
-rw-r--r--drivers/net/dsa/qca8k.h2
-rw-r--r--drivers/net/dsa/sja1105/Makefile2
-rw-r--r--drivers/net/dsa/sja1105/sja1105_dynamic_config.c154
-rw-r--r--drivers/net/dsa/sja1105/sja1105_main.c283
-rw-r--r--drivers/net/dsa/sja1105/sja1105_ptp.c13
-rw-r--r--drivers/net/dsa/sja1105/sja1105_spi.c2
-rw-r--r--drivers/net/dsa/sja1105/sja1105_static_config.c12
-rw-r--r--drivers/net/dsa/sja1105/sja1105_static_config.h3
-rw-r--r--drivers/net/ethernet/Kconfig1
-rw-r--r--drivers/net/ethernet/Makefile1
-rw-r--r--drivers/net/ethernet/allwinner/sun4i-emac.c1
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_cfg.h7
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c2
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.h2
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_filters.c12
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_filters.h2
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_main.c34
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_nic.c29
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_nic.h3
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_ring.c4
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_ring.h9
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c2
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c81
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h7
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c16
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h5
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h18
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/ver.h5
-rw-r--r--drivers/net/ethernet/atheros/atl1c/atl1c_main.c2
-rw-r--r--drivers/net/ethernet/broadcom/Kconfig1
-rw-r--r--drivers/net/ethernet/broadcom/bcm63xx_enet.c1
-rw-r--r--drivers/net/ethernet/broadcom/bcmsysport.c20
-rw-r--r--drivers/net/ethernet/broadcom/bcmsysport.h4
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c12
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h4
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c6
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c9
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c18
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.h4
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c2
-rw-r--r--drivers/net/ethernet/calxeda/xgmac.c4
-rw-r--r--drivers/net/ethernet/emulex/benet/be_ethtool.c28
-rw-r--r--drivers/net/ethernet/google/Kconfig27
-rw-r--r--drivers/net/ethernet/google/Makefile5
-rw-r--r--drivers/net/ethernet/google/gve/Makefile4
-rw-r--r--drivers/net/ethernet/google/gve/gve.h459
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.c387
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.h217
-rw-r--r--drivers/net/ethernet/google/gve/gve_desc.h113
-rw-r--r--drivers/net/ethernet/google/gve/gve_ethtool.c245
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c1230
-rw-r--r--drivers/net/ethernet/google/gve/gve_register.h27
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx.c446
-rw-r--r--drivers/net/ethernet/google/gve/gve_tx.c584
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c75
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.h22
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c58
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h10
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c158
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c7
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c25
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h3
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c57
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h14
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c49
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c3
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_ethtool.c47
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h2
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h15
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_main.c29
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_port.c30
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_port.h13
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_rx.c13
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_tx.c17
-rw-r--r--drivers/net/ethernet/intel/e1000/e1000_main.c6
-rw-r--r--drivers/net/ethernet/intel/e1000e/80003es2lan.c2
-rw-r--r--drivers/net/ethernet/intel/e1000e/82571.c2
-rw-r--r--drivers/net/ethernet/intel/e1000e/defines.h3
-rw-r--r--drivers/net/ethernet/intel/e1000e/e1000.h5
-rw-r--r--drivers/net/ethernet/intel/e1000e/ethtool.c14
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.c20
-rw-r--r--drivers/net/ethernet/intel/e1000e/mac.c2
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c90
-rw-r--r--drivers/net/ethernet/intel/e1000e/nvm.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h30
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_common.c3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_debugfs.c5
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c596
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_prototype.h4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ptp.c3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c15
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.c12
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_osdep.h10
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_txrx.c6
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_virtchnl.c37
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sched.c4
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_regs.h2
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ethtool.c75
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c1
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c3
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c3
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h1
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c14
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c15
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ethtool.c10
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c3
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_path.c81
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c7
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.h129
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c45
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c60
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h283
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c108
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.h118
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h208
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c231
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c192
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c223
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c111
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c267
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c93
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h97
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c459
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dim.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c803
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c104
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c143
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c98
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c54
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c502
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c235
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h74
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c736
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h75
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c76
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c231
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c72
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c65
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mr.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c334
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/rdma.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sriov.c52
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/Kconfig1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.h19
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h345
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c371
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c869
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h146
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/switchx2.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/trap.h6
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/action.c59
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/cmsg.h57
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/match.c103
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/offload.c133
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net.h2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_ptp.c8
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c2
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c50
-rw-r--r--drivers/net/ethernet/sis/sis900.c28
-rw-r--r--drivers/net/ethernet/socionext/Kconfig1
-rw-r--r--drivers/net/ethernet/socionext/netsec.c481
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Kconfig2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h9
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c13
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h16
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c14
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c27
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c190
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c13
-rw-r--r--drivers/net/ethernet/sun/niu.c2
-rw-r--r--drivers/net/loopback.c78
-rw-r--r--drivers/net/ppp/ppp_mppe.c1
-rw-r--r--drivers/net/team/team.c2
-rw-r--r--drivers/net/usb/qmi_wwan.c2
-rw-r--r--drivers/net/veth.c60
-rw-r--r--drivers/net/vrf.c2
-rw-r--r--drivers/nfc/st-nci/i2c.c2
-rw-r--r--drivers/pci/p2pdma.c4
-rw-r--r--drivers/s390/net/qeth_core.h94
-rw-r--r--drivers/s390/net/qeth_core_main.c517
-rw-r--r--drivers/s390/net/qeth_core_mpc.h49
-rw-r--r--drivers/s390/net/qeth_l2_main.c201
-rw-r--r--drivers/s390/net/qeth_l3_main.c210
-rw-r--r--drivers/scsi/qedi/qedi_main.c3
-rw-r--r--drivers/scsi/qedi/qedi_version.h6
-rw-r--r--drivers/scsi/qla2xxx/qla_os.c2
-rw-r--r--drivers/scsi/ufs/ufshcd-pltfrm.c11
-rw-r--r--fs/afs/callback.c4
-rw-r--r--fs/afs/inode.c31
-rw-r--r--fs/afs/internal.h8
-rw-r--r--fs/afs/volume.c1
-rw-r--r--fs/proc/base.c3
-rw-r--r--include/dt-bindings/clock/g12a-clkc.h2
-rw-r--r--include/linux/avf/virtchnl.h4
-rw-r--r--include/linux/bpf-cgroup.h45
-rw-r--r--include/linux/bpf.h2
-rw-r--r--include/linux/bpf_types.h1
-rw-r--r--include/linux/dim.h366
-rw-r--r--include/linux/filter.h13
-rw-r--r--include/linux/intel-ish-client-if.h1
-rw-r--r--include/linux/list.h14
-rw-r--r--include/linux/mlx5/accel.h2
-rw-r--r--include/linux/mlx5/cq.h6
-rw-r--r--include/linux/mlx5/device.h20
-rw-r--r--include/linux/mlx5/driver.h45
-rw-r--r--include/linux/mlx5/eq.h25
-rw-r--r--include/linux/mlx5/eswitch.h40
-rw-r--r--include/linux/mlx5/fs.h18
-rw-r--r--include/linux/mlx5/mlx5_ifc.h361
-rw-r--r--include/linux/mlx5/qp.h12
-rw-r--r--include/linux/mlx5/vport.h7
-rw-r--r--include/linux/mtd/spi-nor.h3
-rw-r--r--include/linux/net_dim.h418
-rw-r--r--include/linux/netdevice.h2
-rw-r--r--include/linux/netlink.h9
-rw-r--r--include/linux/skbuff.h8
-rw-r--r--include/net/bond_options.h1
-rw-r--r--include/net/bonding.h1
-rw-r--r--include/net/devlink.h6
-rw-r--r--include/net/inet_common.h1
-rw-r--r--include/net/ip6_route.h4
-rw-r--r--include/net/ipv6.h7
-rw-r--r--include/net/page_pool.h9
-rw-r--r--include/net/pkt_cls.h2
-rw-r--r--include/net/route.h1
-rw-r--r--include/net/sch_generic.h2
-rw-r--r--include/net/tcp.h8
-rw-r--r--include/net/tls.h15
-rw-r--r--include/net/xdp_sock.h27
-rw-r--r--include/net/xfrm.h53
-rw-r--r--include/trace/events/xdp.h34
-rw-r--r--include/uapi/linux/batadv_packet.h8
-rw-r--r--include/uapi/linux/bpf.h33
-rw-r--r--include/uapi/linux/if_link.h1
-rw-r--r--include/uapi/linux/if_xdp.h8
-rw-r--r--include/uapi/linux/pkt_sched.h9
-rw-r--r--kernel/bpf/cgroup.c352
-rw-r--r--kernel/bpf/core.c10
-rw-r--r--kernel/bpf/cpumap.c105
-rw-r--r--kernel/bpf/devmap.c112
-rw-r--r--kernel/bpf/syscall.c19
-rw-r--r--kernel/bpf/verifier.c136
-rw-r--r--kernel/bpf/xskmap.c3
-rw-r--r--kernel/fork.c58
-rw-r--r--kernel/trace/bpf_trace.c27
-rw-r--r--lib/Kconfig8
-rw-r--r--lib/Kconfig.debug9
-rw-r--r--lib/Makefile2
-rw-r--r--lib/dim/Makefile9
-rw-r--r--lib/dim/dim.c83
-rw-r--r--lib/dim/net_dim.c190
-rw-r--r--lib/test_blackhole_dev.c100
-rw-r--r--net/batman-adv/bat_algo.h7
-rw-r--r--net/batman-adv/bat_v.c3
-rw-r--r--net/batman-adv/bat_v_elp.h4
-rw-r--r--net/batman-adv/bat_v_ogm.h3
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h9
-rw-r--r--net/batman-adv/debugfs.c99
-rw-r--r--net/batman-adv/debugfs.h9
-rw-r--r--net/batman-adv/distributed-arp-table.h7
-rw-r--r--net/batman-adv/fragmentation.h3
-rw-r--r--net/batman-adv/gateway_client.h9
-rw-r--r--net/batman-adv/gateway_common.c1
-rw-r--r--net/batman-adv/gateway_common.h3
-rw-r--r--net/batman-adv/hard-interface.c7
-rw-r--r--net/batman-adv/hard-interface.h5
-rw-r--r--net/batman-adv/hash.h3
-rw-r--r--net/batman-adv/icmp_socket.c20
-rw-r--r--net/batman-adv/icmp_socket.h5
-rw-r--r--net/batman-adv/log.c17
-rw-r--r--net/batman-adv/log.h1
-rw-r--r--net/batman-adv/main.h12
-rw-r--r--net/batman-adv/multicast.c1080
-rw-r--r--net/batman-adv/multicast.h6
-rw-r--r--net/batman-adv/netlink.c4
-rw-r--r--net/batman-adv/netlink.h3
-rw-r--r--net/batman-adv/network-coding.c29
-rw-r--r--net/batman-adv/network-coding.h14
-rw-r--r--net/batman-adv/originator.c4
-rw-r--r--net/batman-adv/originator.h7
-rw-r--r--net/batman-adv/routing.h3
-rw-r--r--net/batman-adv/send.h3
-rw-r--r--net/batman-adv/soft-interface.c6
-rw-r--r--net/batman-adv/soft-interface.h7
-rw-r--r--net/batman-adv/sysfs.c1
-rw-r--r--net/batman-adv/sysfs.h5
-rw-r--r--net/batman-adv/tp_meter.c1
-rw-r--r--net/batman-adv/tp_meter.h3
-rw-r--r--net/batman-adv/translation-table.h9
-rw-r--r--net/batman-adv/tvlv.h3
-rw-r--r--net/batman-adv/types.h69
-rw-r--r--net/bluetooth/6lowpan.c4
-rw-r--r--net/bluetooth/hci_conn.c18
-rw-r--r--net/bluetooth/l2cap_core.c33
-rw-r--r--net/core/dev.c4
-rw-r--r--net/core/devlink.c6
-rw-r--r--net/core/dst.c2
-rw-r--r--net/core/ethtool.c24
-rw-r--r--net/core/filter.c269
-rw-r--r--net/core/link_watch.c13
-rw-r--r--net/core/xdp.c2
-rw-r--r--net/ipv4/af_inet.c31
-rw-r--r--net/ipv4/ah4.c3
-rw-r--r--net/ipv4/devinet.c3
-rw-r--r--net/ipv4/esp4.c30
-rw-r--r--net/ipv4/esp4_offload.c4
-rw-r--r--net/ipv4/fib_trie.c22
-rw-r--r--net/ipv4/gre_demux.c2
-rw-r--r--net/ipv4/ip_output.c25
-rw-r--r--net/ipv4/ipcomp.c3
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c48
-rw-r--r--net/ipv4/tcp_input.c4
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv4/xfrm4_state.c45
-rw-r--r--net/ipv4/xfrm4_tunnel.c3
-rw-r--r--net/ipv6/af_inet6.c43
-rw-r--r--net/ipv6/ah6.c4
-rw-r--r--net/ipv6/esp6.c23
-rw-r--r--net/ipv6/esp6_offload.c4
-rw-r--r--net/ipv6/icmp.c3
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/ipcomp6.c3
-rw-r--r--net/ipv6/mip6.c6
-rw-r--r--net/ipv6/route.c125
-rw-r--r--net/ipv6/sysctl_net_ipv6.c6
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/ipv6/udp.c3
-rw-r--r--net/ipv6/xfrm6_state.c137
-rw-r--r--net/netfilter/nf_flow_table_ip.c2
-rw-r--r--net/packet/af_packet.c23
-rw-r--r--net/packet/internal.h1
-rw-r--r--net/rxrpc/output.c3
-rw-r--r--net/sched/act_mirred.c23
-rw-r--r--net/sched/em_ipt.c48
-rw-r--r--net/sched/sch_cbs.c9
-rw-r--r--net/sched/sch_etf.c10
-rw-r--r--net/sched/sch_taprio.c421
-rw-r--r--net/sctp/endpointola.c8
-rw-r--r--net/smc/af_smc.c78
-rw-r--r--net/smc/smc_core.c3
-rw-r--r--net/socket.c53
-rw-r--r--net/tipc/core.c12
-rw-r--r--net/tipc/link.c9
-rw-r--r--net/tipc/netlink_compat.c18
-rw-r--r--net/tipc/udp_media.c85
-rw-r--r--net/tls/tls_main.c3
-rw-r--r--net/xdp/xsk.c36
-rw-r--r--net/xdp/xsk_queue.h14
-rw-r--r--net/xfrm/xfrm_device.c5
-rw-r--r--net/xfrm/xfrm_input.c25
-rw-r--r--net/xfrm/xfrm_interface.c6
-rw-r--r--net/xfrm/xfrm_policy.c2
-rw-r--r--net/xfrm/xfrm_state.c437
-rw-r--r--samples/bpf/Makefile3
-rwxr-xr-xsamples/bpf/do_hbm_test.sh22
-rw-r--r--samples/bpf/hbm.c18
-rw-r--r--samples/bpf/hbm_edt_kern.c168
-rw-r--r--samples/bpf/hbm_kern.h40
-rw-r--r--samples/bpf/ibumad_kern.c18
-rw-r--r--samples/bpf/tcp_bpf.readme2
-rw-r--r--samples/bpf/tcp_dumpstats_kern.c68
-rw-r--r--samples/bpf/xdp_adjust_tail_user.c12
-rw-r--r--samples/bpf/xdp_redirect_map_user.c15
-rw-r--r--samples/bpf/xdp_redirect_user.c15
-rw-r--r--samples/bpf/xdp_tx_iptunnel_user.c12
-rw-r--r--samples/bpf/xdpsock_user.c44
-rw-r--r--samples/pidfd/pidfd-metadata.c8
-rw-r--r--samples/pktgen/README.rst1
-rw-r--r--samples/pktgen/functions.sh34
-rw-r--r--samples/pktgen/parameters.sh7
-rwxr-xr-xsamples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh11
-rwxr-xr-xsamples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh11
-rwxr-xr-xsamples/pktgen/pktgen_sample01_simple.sh11
-rwxr-xr-xsamples/pktgen/pktgen_sample02_multiqueue.sh11
-rwxr-xr-xsamples/pktgen/pktgen_sample03_burst_single_flow.sh11
-rwxr-xr-xsamples/pktgen/pktgen_sample04_many_flows.sh11
-rwxr-xr-xsamples/pktgen/pktgen_sample05_flow_per_thread.sh12
-rwxr-xr-xsamples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh11
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst7
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst3
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool9
-rw-r--r--tools/bpf/bpftool/cgroup.c5
-rw-r--r--tools/bpf/bpftool/main.h1
-rw-r--r--tools/bpf/bpftool/prog.c3
-rw-r--r--tools/include/uapi/linux/bpf.h26
-rw-r--r--tools/include/uapi/linux/if_link.h1
-rw-r--r--tools/include/uapi/linux/if_xdp.h8
-rw-r--r--tools/lib/bpf/libbpf.c23
-rw-r--r--tools/lib/bpf/libbpf_probes.c1
-rw-r--r--tools/lib/bpf/xsk.c15
-rw-r--r--tools/lib/bpf/xsk.h2
-rw-r--r--tools/testing/selftests/bpf/.gitignore3
-rw-r--r--tools/testing/selftests/bpf/Makefile10
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf.h9
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_multi.c71
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_sk.c111
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h36
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_rtt.c61
-rw-r--r--tools/testing/selftests/bpf/progs/test_jhash.h3
-rw-r--r--tools/testing/selftests/bpf/progs/test_seg6_loop.c23
-rw-r--r--tools/testing/selftests/bpf/progs/test_verif_scale2.c2
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_redirect_map.c31
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_tx.c12
-rw-r--r--tools/testing/selftests/bpf/test_section_names.c10
-rw-r--r--tools/testing/selftests/bpf/test_sockopt.c1021
-rw-r--r--tools/testing/selftests/bpf/test_sockopt_multi.c374
-rw-r--r--tools/testing/selftests/bpf/test_sockopt_sk.c211
-rw-r--r--tools/testing/selftests/bpf/test_tcp_rtt.c254
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_veth.sh118
-rw-r--r--tools/testing/selftests/net/Makefile2
-rw-r--r--tools/testing/selftests/net/config1
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh36
-rwxr-xr-xtools/testing/selftests/net/test_blackhole_dev.sh11
-rw-r--r--tools/testing/selftests/powerpc/mm/.gitignore3
-rw-r--r--tools/testing/selftests/powerpc/mm/Makefile4
-rw-r--r--tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c87
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json94
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json276
552 files changed, 23852 insertions, 6051 deletions
diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index d3fe4cac0c90..801a6ed3f2e5 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -42,6 +42,7 @@ Program types
.. toctree::
:maxdepth: 1
+ prog_cgroup_sockopt
prog_cgroup_sysctl
prog_flow_dissector
diff --git a/Documentation/bpf/prog_cgroup_sockopt.rst b/Documentation/bpf/prog_cgroup_sockopt.rst
new file mode 100644
index 000000000000..c47d974629ae
--- /dev/null
+++ b/Documentation/bpf/prog_cgroup_sockopt.rst
@@ -0,0 +1,93 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================
+BPF_PROG_TYPE_CGROUP_SOCKOPT
+============================
+
+``BPF_PROG_TYPE_CGROUP_SOCKOPT`` program type can be attached to two
+cgroup hooks:
+
+* ``BPF_CGROUP_GETSOCKOPT`` - called every time process executes ``getsockopt``
+ system call.
+* ``BPF_CGROUP_SETSOCKOPT`` - called every time process executes ``setsockopt``
+ system call.
+
+The context (``struct bpf_sockopt``) has associated socket (``sk``) and
+all input arguments: ``level``, ``optname``, ``optval`` and ``optlen``.
+
+BPF_CGROUP_SETSOCKOPT
+=====================
+
+``BPF_CGROUP_SETSOCKOPT`` is triggered *before* the kernel handling of
+sockopt and it has writable context: it can modify the supplied arguments
+before passing them down to the kernel. This hook has access to the cgroup
+and socket local storage.
+
+If BPF program sets ``optlen`` to -1, the control will be returned
+back to the userspace after all other BPF programs in the cgroup
+chain finish (i.e. kernel ``setsockopt`` handling will *not* be executed).
+
+Note, that ``optlen`` can not be increased beyond the user-supplied
+value. It can only be decreased or set to -1. Any other value will
+trigger ``EFAULT``.
+
+Return Type
+-----------
+
+* ``0`` - reject the syscall, ``EPERM`` will be returned to the userspace.
+* ``1`` - success, continue with next BPF program in the cgroup chain.
+
+BPF_CGROUP_GETSOCKOPT
+=====================
+
+``BPF_CGROUP_GETSOCKOPT`` is triggered *after* the kernel handing of
+sockopt. The BPF hook can observe ``optval``, ``optlen`` and ``retval``
+if it's interested in whatever kernel has returned. BPF hook can override
+the values above, adjust ``optlen`` and reset ``retval`` to 0. If ``optlen``
+has been increased above initial ``getsockopt`` value (i.e. userspace
+buffer is too small), ``EFAULT`` is returned.
+
+This hook has access to the cgroup and socket local storage.
+
+Note, that the only acceptable value to set to ``retval`` is 0 and the
+original value that the kernel returned. Any other value will trigger
+``EFAULT``.
+
+Return Type
+-----------
+
+* ``0`` - reject the syscall, ``EPERM`` will be returned to the userspace.
+* ``1`` - success: copy ``optval`` and ``optlen`` to userspace, return
+ ``retval`` from the syscall (note that this can be overwritten by
+ the BPF program from the parent cgroup).
+
+Cgroup Inheritance
+==================
+
+Suppose, there is the following cgroup hierarchy where each cgroup
+has ``BPF_CGROUP_GETSOCKOPT`` attached at each level with
+``BPF_F_ALLOW_MULTI`` flag::
+
+ A (root, parent)
+ \
+ B (child)
+
+When the application calls ``getsockopt`` syscall from the cgroup B,
+the programs are executed from the bottom up: B, A. First program
+(B) sees the result of kernel's ``getsockopt``. It can optionally
+adjust ``optval``, ``optlen`` and reset ``retval`` to 0. After that
+control will be passed to the second (A) program which will see the
+same context as B including any potential modifications.
+
+Same for ``BPF_CGROUP_SETSOCKOPT``: if the program is attached to
+A and B, the trigger order is B, then A. If B does any changes
+to the input arguments (``level``, ``optname``, ``optval``, ``optlen``),
+then the next program in the chain (A) will see those changes,
+*not* the original input ``setsockopt`` arguments. The potentially
+modified values will be then passed down to the kernel.
+
+Example
+=======
+
+See ``tools/testing/selftests/bpf/progs/sockopt_sk.c`` for an example
+of BPF program that handles socket options.
diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt
index 93a7469e70d4..ccbc6d89325d 100644
--- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt
+++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt
@@ -9,6 +9,10 @@ Required properties:
- #size-cells: must be 0
- #address-cells: must be 1
+Optional properties:
+
+- reset-gpios: GPIO to be used to reset the whole device
+
Subnodes:
The integrated switch subnode should be specified according to the binding
@@ -66,6 +70,7 @@ for the external mdio-bus configuration:
#address-cells = <1>;
#size-cells = <0>;
+ reset-gpios = <&gpio 42 GPIO_ACTIVE_LOW>;
reg = <0x10>;
ports {
@@ -123,6 +128,7 @@ for the internal master mdio-bus configuration:
#address-cells = <1>;
#size-cells = <0>;
+ reset-gpios = <&gpio 42 GPIO_ACTIVE_LOW>;
reg = <0x10>;
ports {
diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst
index 50bccbf68308..eeedc2e826aa 100644
--- a/Documentation/networking/af_xdp.rst
+++ b/Documentation/networking/af_xdp.rst
@@ -220,7 +220,21 @@ Usage
In order to use AF_XDP sockets there are two parts needed. The
user-space application and the XDP program. For a complete setup and
usage example, please refer to the sample application. The user-space
-side is xdpsock_user.c and the XDP side xdpsock_kern.c.
+side is xdpsock_user.c and the XDP side is part of libbpf.
+
+The XDP code sample included in tools/lib/bpf/xsk.c is the following::
+
+ SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
+ {
+ int index = ctx->rx_queue_index;
+
+ // A set entry here means that the correspnding queue_id
+ // has an active AF_XDP socket bound to it.
+ if (bpf_map_lookup_elem(&xsks_map, &index))
+ return bpf_redirect_map(&xsks_map, index, 0);
+
+ return XDP_PASS;
+ }
Naive ring dequeue and enqueue could look like this::
diff --git a/Documentation/networking/device_drivers/aquantia/atlantic.txt b/Documentation/networking/device_drivers/aquantia/atlantic.txt
new file mode 100644
index 000000000000..d235cbaeccc6
--- /dev/null
+++ b/Documentation/networking/device_drivers/aquantia/atlantic.txt
@@ -0,0 +1,439 @@
+aQuantia AQtion Driver for the aQuantia Multi-Gigabit PCI Express Family of
+Ethernet Adapters
+=============================================================================
+
+Contents
+========
+
+- Identifying Your Adapter
+- Configuration
+- Supported ethtool options
+- Command Line Parameters
+- Config file parameters
+- Support
+- License
+
+Identifying Your Adapter
+========================
+
+The driver in this release is compatible with AQC-100, AQC-107, AQC-108 based ethernet adapters.
+
+
+SFP+ Devices (for AQC-100 based adapters)
+----------------------------------
+
+This release tested with passive Direct Attach Cables (DAC) and SFP+/LC Optical Transceiver.
+
+Configuration
+=========================
+ Viewing Link Messages
+ ---------------------
+ Link messages will not be displayed to the console if the distribution is
+ restricting system messages. In order to see network driver link messages on
+ your console, set dmesg to eight by entering the following:
+
+ dmesg -n 8
+
+ NOTE: This setting is not saved across reboots.
+
+ Jumbo Frames
+ ------------
+ The driver supports Jumbo Frames for all adapters. Jumbo Frames support is
+ enabled by changing the MTU to a value larger than the default of 1500.
+ The maximum value for the MTU is 16000. Use the `ip` command to
+ increase the MTU size. For example:
+
+ ip link set mtu 16000 dev enp1s0
+
+ ethtool
+ -------
+ The driver utilizes the ethtool interface for driver configuration and
+ diagnostics, as well as displaying statistical information. The latest
+ ethtool version is required for this functionality.
+
+ NAPI
+ ----
+ NAPI (Rx polling mode) is supported in the atlantic driver.
+
+Supported ethtool options
+============================
+ Viewing adapter settings
+ ---------------------
+ ethtool <ethX>
+
+ Output example:
+
+ Settings for enp1s0:
+ Supported ports: [ TP ]
+ Supported link modes: 100baseT/Full
+ 1000baseT/Full
+ 10000baseT/Full
+ 2500baseT/Full
+ 5000baseT/Full
+ Supported pause frame use: Symmetric
+ Supports auto-negotiation: Yes
+ Supported FEC modes: Not reported
+ Advertised link modes: 100baseT/Full
+ 1000baseT/Full
+ 10000baseT/Full
+ 2500baseT/Full
+ 5000baseT/Full
+ Advertised pause frame use: Symmetric
+ Advertised auto-negotiation: Yes
+ Advertised FEC modes: Not reported
+ Speed: 10000Mb/s
+ Duplex: Full
+ Port: Twisted Pair
+ PHYAD: 0
+ Transceiver: internal
+ Auto-negotiation: on
+ MDI-X: Unknown
+ Supports Wake-on: g
+ Wake-on: d
+ Link detected: yes
+
+ ---
+ Note: AQrate speeds (2.5/5 Gb/s) will be displayed only with linux kernels > 4.10.
+ But you can still use these speeds:
+ ethtool -s eth0 autoneg off speed 2500
+
+ Viewing adapter information
+ ---------------------
+ ethtool -i <ethX>
+
+ Output example:
+
+ driver: atlantic
+ version: 5.2.0-050200rc5-generic-kern
+ firmware-version: 3.1.78
+ expansion-rom-version:
+ bus-info: 0000:01:00.0
+ supports-statistics: yes
+ supports-test: no
+ supports-eeprom-access: no
+ supports-register-dump: yes
+ supports-priv-flags: no
+
+
+ Viewing Ethernet adapter statistics:
+ ---------------------
+ ethtool -S <ethX>
+
+ Output example:
+ NIC statistics:
+ InPackets: 13238607
+ InUCast: 13293852
+ InMCast: 52
+ InBCast: 3
+ InErrors: 0
+ OutPackets: 23703019
+ OutUCast: 23704941
+ OutMCast: 67
+ OutBCast: 11
+ InUCastOctects: 213182760
+ OutUCastOctects: 22698443
+ InMCastOctects: 6600
+ OutMCastOctects: 8776
+ InBCastOctects: 192
+ OutBCastOctects: 704
+ InOctects: 2131839552
+ OutOctects: 226938073
+ InPacketsDma: 95532300
+ OutPacketsDma: 59503397
+ InOctetsDma: 1137102462
+ OutOctetsDma: 2394339518
+ InDroppedDma: 0
+ Queue[0] InPackets: 23567131
+ Queue[0] OutPackets: 20070028
+ Queue[0] InJumboPackets: 0
+ Queue[0] InLroPackets: 0
+ Queue[0] InErrors: 0
+ Queue[1] InPackets: 45428967
+ Queue[1] OutPackets: 11306178
+ Queue[1] InJumboPackets: 0
+ Queue[1] InLroPackets: 0
+ Queue[1] InErrors: 0
+ Queue[2] InPackets: 3187011
+ Queue[2] OutPackets: 13080381
+ Queue[2] InJumboPackets: 0
+ Queue[2] InLroPackets: 0
+ Queue[2] InErrors: 0
+ Queue[3] InPackets: 23349136
+ Queue[3] OutPackets: 15046810
+ Queue[3] InJumboPackets: 0
+ Queue[3] InLroPackets: 0
+ Queue[3] InErrors: 0
+
+ Interrupt coalescing support
+ ---------------------------------
+ ITR mode, TX/RX coalescing timings could be viewed with:
+
+ ethtool -c <ethX>
+
+ and changed with:
+
+ ethtool -C <ethX> tx-usecs <usecs> rx-usecs <usecs>
+
+ To disable coalescing:
+
+ ethtool -C <ethX> tx-usecs 0 rx-usecs 0 tx-max-frames 1 tx-max-frames 1
+
+ Wake on LAN support
+ ---------------------------------
+
+ WOL support by magic packet:
+
+ ethtool -s <ethX> wol g
+
+ To disable WOL:
+
+ ethtool -s <ethX> wol d
+
+ Set and check the driver message level
+ ---------------------------------
+
+ Set message level
+
+ ethtool -s <ethX> msglvl <level>
+
+ Level values:
+
+ 0x0001 - general driver status.
+ 0x0002 - hardware probing.
+ 0x0004 - link state.
+ 0x0008 - periodic status check.
+ 0x0010 - interface being brought down.
+ 0x0020 - interface being brought up.
+ 0x0040 - receive error.
+ 0x0080 - transmit error.
+ 0x0200 - interrupt handling.
+ 0x0400 - transmit completion.
+ 0x0800 - receive completion.
+ 0x1000 - packet contents.
+ 0x2000 - hardware status.
+ 0x4000 - Wake-on-LAN status.
+
+ By default, the level of debugging messages is set 0x0001(general driver status).
+
+ Check message level
+
+ ethtool <ethX> | grep "Current message level"
+
+ If you want to disable the output of messages
+
+ ethtool -s <ethX> msglvl 0
+
+ RX flow rules (ntuple filters)
+ ---------------------------------
+ There are separate rules supported, that applies in that order:
+ 1. 16 VLAN ID rules
+ 2. 16 L2 EtherType rules
+ 3. 8 L3/L4 5-Tuple rules
+
+
+ The driver utilizes the ethtool interface for configuring ntuple filters,
+ via "ethtool -N <device> <filter>".
+
+ To enable or disable the RX flow rules:
+
+ ethtool -K ethX ntuple <on|off>
+
+ When disabling ntuple filters, all the user programed filters are
+ flushed from the driver cache and hardware. All needed filters must
+ be re-added when ntuple is re-enabled.
+
+ Because of the fixed order of the rules, the location of filters is also fixed:
+ - Locations 0 - 15 for VLAN ID filters
+ - Locations 16 - 31 for L2 EtherType filters
+ - Locations 32 - 39 for L3/L4 5-tuple filters (locations 32, 36 for IPv6)
+
+ The L3/L4 5-tuple (protocol, source and destination IP address, source and
+ destination TCP/UDP/SCTP port) is compared against 8 filters. For IPv4, up to
+ 8 source and destination addresses can be matched. For IPv6, up to 2 pairs of
+ addresses can be supported. Source and destination ports are only compared for
+ TCP/UDP/SCTP packets.
+
+ To add a filter that directs packet to queue 5, use <-N|-U|--config-nfc|--config-ntuple> switch:
+
+ ethtool -N <ethX> flow-type udp4 src-ip 10.0.0.1 dst-ip 10.0.0.2 src-port 2000 dst-port 2001 action 5 <loc 32>
+
+ - action is the queue number.
+ - loc is the rule number.
+
+ For "flow-type ip4|udp4|tcp4|sctp4|ip6|udp6|tcp6|sctp6" you must set the loc
+ number within 32 - 39.
+ For "flow-type ip4|udp4|tcp4|sctp4|ip6|udp6|tcp6|sctp6" you can set 8 rules
+ for traffic IPv4 or you can set 2 rules for traffic IPv6. Loc number traffic
+ IPv6 is 32 and 36.
+ At the moment you can not use IPv4 and IPv6 filters at the same time.
+
+ Example filter for IPv6 filter traffic:
+
+ sudo ethtool -N <ethX> flow-type tcp6 src-ip 2001:db8:0:f101::1 dst-ip 2001:db8:0:f101::2 action 1 loc 32
+ sudo ethtool -N <ethX> flow-type ip6 src-ip 2001:db8:0:f101::2 dst-ip 2001:db8:0:f101::5 action -1 loc 36
+
+ Example filter for IPv4 filter traffic:
+
+ sudo ethtool -N <ethX> flow-type udp4 src-ip 10.0.0.4 dst-ip 10.0.0.7 src-port 2000 dst-port 2001 loc 32
+ sudo ethtool -N <ethX> flow-type tcp4 src-ip 10.0.0.3 dst-ip 10.0.0.9 src-port 2000 dst-port 2001 loc 33
+ sudo ethtool -N <ethX> flow-type ip4 src-ip 10.0.0.6 dst-ip 10.0.0.4 loc 34
+
+ If you set action -1, then all traffic corresponding to the filter will be discarded.
+ The maximum value action is 31.
+
+
+ The VLAN filter (VLAN id) is compared against 16 filters.
+ VLAN id must be accompanied by mask 0xF000. That is to distinguish VLAN filter
+ from L2 Ethertype filter with UserPriority since both User Priority and VLAN ID
+ are passed in the same 'vlan' parameter.
+
+ To add a filter that directs packets from VLAN 2001 to queue 5:
+ ethtool -N <ethX> flow-type ip4 vlan 2001 m 0xF000 action 1 loc 0
+
+
+ L2 EtherType filters allows filter packet by EtherType field or both EtherType
+ and User Priority (PCP) field of 802.1Q.
+ UserPriority (vlan) parameter must be accompanied by mask 0x1FFF. That is to
+ distinguish VLAN filter from L2 Ethertype filter with UserPriority since both
+ User Priority and VLAN ID are passed in the same 'vlan' parameter.
+
+ To add a filter that directs IP4 packess of priority 3 to queue 3:
+ ethtool -N <ethX> flow-type ether proto 0x800 vlan 0x600 m 0x1FFF action 3 loc 16
+
+
+ To see the list of filters currently present:
+
+ ethtool <-u|-n|--show-nfc|--show-ntuple> <ethX>
+
+ Rules may be deleted from the table itself. This is done using:
+
+ sudo ethtool <-N|-U|--config-nfc|--config-ntuple> <ethX> delete <loc>
+
+ - loc is the rule number to be deleted.
+
+ Rx filters is an interface to load the filter table that funnels all flow
+ into queue 0 unless an alternative queue is specified using "action". In that
+ case, any flow that matches the filter criteria will be directed to the
+ appropriate queue. RX filters is supported on all kernels 2.6.30 and later.
+
+ RSS for UDP
+ ---------------------------------
+ Currently, NIC does not support RSS for fragmented IP packets, which leads to
+ incorrect working of RSS for fragmented UDP traffic. To disable RSS for UDP the
+ RX Flow L3/L4 rule may be used.
+
+ Example:
+ ethtool -N eth0 flow-type udp4 action 0 loc 32
+
+Command Line Parameters
+=======================
+The following command line parameters are available on atlantic driver:
+
+aq_itr -Interrupt throttling mode
+----------------------------------------
+Accepted values: 0, 1, 0xFFFF
+Default value: 0xFFFF
+0 - Disable interrupt throttling.
+1 - Enable interrupt throttling and use specified tx and rx rates.
+0xFFFF - Auto throttling mode. Driver will choose the best RX and TX
+ interrupt throtting settings based on link speed.
+
+aq_itr_tx - TX interrupt throttle rate
+----------------------------------------
+Accepted values: 0 - 0x1FF
+Default value: 0
+TX side throttling in microseconds. Adapter will setup maximum interrupt delay
+to this value. Minimum interrupt delay will be a half of this value
+
+aq_itr_rx - RX interrupt throttle rate
+----------------------------------------
+Accepted values: 0 - 0x1FF
+Default value: 0
+RX side throttling in microseconds. Adapter will setup maximum interrupt delay
+to this value. Minimum interrupt delay will be a half of this value
+
+Note: ITR settings could be changed in runtime by ethtool -c means (see below)
+
+Config file parameters
+=======================
+For some fine tuning and performance optimizations,
+some parameters can be changed in the {source_dir}/aq_cfg.h file.
+
+AQ_CFG_RX_PAGEORDER
+----------------------------------------
+Default value: 0
+RX page order override. Thats a power of 2 number of RX pages allocated for
+each descriptor. Received descriptor size is still limited by AQ_CFG_RX_FRAME_MAX.
+Increasing pageorder makes page reuse better (actual on iommu enabled systems).
+
+AQ_CFG_RX_REFILL_THRES
+----------------------------------------
+Default value: 32
+RX refill threshold. RX path will not refill freed descriptors until the
+specified number of free descriptors is observed. Larger values may help
+better page reuse but may lead to packet drops as well.
+
+AQ_CFG_VECS_DEF
+------------------------------------------------------------
+Number of queues
+Valid Range: 0 - 8 (up to AQ_CFG_VECS_MAX)
+Default value: 8
+Notice this value will be capped by the number of cores available on the system.
+
+AQ_CFG_IS_RSS_DEF
+------------------------------------------------------------
+Enable/disable Receive Side Scaling
+
+This feature allows the adapter to distribute receive processing
+across multiple CPU-cores and to prevent from overloading a single CPU core.
+
+Valid values
+0 - disabled
+1 - enabled
+
+Default value: 1
+
+AQ_CFG_NUM_RSS_QUEUES_DEF
+------------------------------------------------------------
+Number of queues for Receive Side Scaling
+Valid Range: 0 - 8 (up to AQ_CFG_VECS_DEF)
+
+Default value: AQ_CFG_VECS_DEF
+
+AQ_CFG_IS_LRO_DEF
+------------------------------------------------------------
+Enable/disable Large Receive Offload
+
+This offload enables the adapter to coalesce multiple TCP segments and indicate
+them as a single coalesced unit to the OS networking subsystem.
+The system consumes less energy but it also introduces more latency in packets processing.
+
+Valid values
+0 - disabled
+1 - enabled
+
+Default value: 1
+
+AQ_CFG_TX_CLEAN_BUDGET
+----------------------------------------
+Maximum descriptors to cleanup on TX at once.
+Default value: 256
+
+After the aq_cfg.h file changed the driver must be rebuilt to take effect.
+
+Support
+=======
+
+If an issue is identified with the released source code on the supported
+kernel with a supported adapter, email the specific information related
+to the issue to [email protected]
+
+License
+=======
+
+aQuantia Corporation Network Driver
+Copyright(c) 2014 - 2019 aQuantia Corporation.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms and conditions of the GNU General Public License,
+version 2, as published by the Free Software Foundation.
diff --git a/Documentation/networking/device_drivers/google/gve.rst b/Documentation/networking/device_drivers/google/gve.rst
new file mode 100644
index 000000000000..793693cef6e3
--- /dev/null
+++ b/Documentation/networking/device_drivers/google/gve.rst
@@ -0,0 +1,123 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+==============================================================
+Linux kernel driver for Compute Engine Virtual Ethernet (gve):
+==============================================================
+
+Supported Hardware
+===================
+The GVE driver binds to a single PCI device id used by the virtual
+Ethernet device found in some Compute Engine VMs.
+
++--------------+----------+---------+
+|Field | Value | Comments|
++==============+==========+=========+
+|Vendor ID | `0x1AE0` | Google |
++--------------+----------+---------+
+|Device ID | `0x0042` | |
++--------------+----------+---------+
+|Sub-vendor ID | `0x1AE0` | Google |
++--------------+----------+---------+
+|Sub-device ID | `0x0058` | |
++--------------+----------+---------+
+|Revision ID | `0x0` | |
++--------------+----------+---------+
+|Device Class | `0x200` | Ethernet|
++--------------+----------+---------+
+
+PCI Bars
+========
+The gVNIC PCI device exposes three 32-bit memory BARS:
+- Bar0 - Device configuration and status registers.
+- Bar1 - MSI-X vector table
+- Bar2 - IRQ, RX and TX doorbells
+
+Device Interactions
+===================
+The driver interacts with the device in the following ways:
+ - Registers
+ - A block of MMIO registers
+ - See gve_register.h for more detail
+ - Admin Queue
+ - See description below
+ - Reset
+ - At any time the device can be reset
+ - Interrupts
+ - See supported interrupts below
+ - Transmit and Receive Queues
+ - See description below
+
+Registers
+---------
+All registers are MMIO and big endian.
+
+The registers are used for initializing and configuring the device as well as
+querying device status in response to management interrupts.
+
+Admin Queue (AQ)
+----------------
+The Admin Queue is a PAGE_SIZE memory block, treated as an array of AQ
+commands, used by the driver to issue commands to the device and set up
+resources.The driver and the device maintain a count of how many commands
+have been submitted and executed. To issue AQ commands, the driver must do
+the following (with proper locking):
+
+1) Copy new commands into next available slots in the AQ array
+2) Increment its counter by he number of new commands
+3) Write the counter into the GVE_ADMIN_QUEUE_DOORBELL register
+4) Poll the ADMIN_QUEUE_EVENT_COUNTER register until it equals
+ the value written to the doorbell, or until a timeout.
+
+The device will update the status field in each AQ command reported as
+executed through the ADMIN_QUEUE_EVENT_COUNTER register.
+
+Device Resets
+-------------
+A device reset is triggered by writing 0x0 to the AQ PFN register.
+This causes the device to release all resources allocated by the
+driver, including the AQ itself.
+
+Interrupts
+----------
+The following interrupts are supported by the driver:
+
+Management Interrupt
+~~~~~~~~~~~~~~~~~~~~
+The management interrupt is used by the device to tell the driver to
+look at the GVE_DEVICE_STATUS register.
+
+The handler for the management irq simply queues the service task in
+the workqueue to check the register and acks the irq.
+
+Notification Block Interrupts
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The notification block interrupts are used to tell the driver to poll
+the queues associated with that interrupt.
+
+The handler for these irqs schedule the napi for that block to run
+and poll the queues.
+
+Traffic Queues
+--------------
+gVNIC's queues are composed of a descriptor ring and a buffer and are
+assigned to a notification block.
+
+The descriptor rings are power-of-two-sized ring buffers consisting of
+fixed-size descriptors. They advance their head pointer using a __be32
+doorbell located in Bar2. The tail pointers are advanced by consuming
+descriptors in-order and updating a __be32 counter. Both the doorbell
+and the counter overflow to zero.
+
+Each queue's buffers must be registered in advance with the device as a
+queue page list, and packet data can only be put in those pages.
+
+Transmit
+~~~~~~~~
+gve maps the buffers for transmit rings into a FIFO and copies the packets
+into the FIFO before sending them to the NIC.
+
+Receive
+~~~~~~~
+The buffers for receive rings are put into a data ring that is the same
+length as the descriptor ring and the head and tail pointers advance over
+the rings together.
diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 24598d5f8ffa..2b7fefe72351 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -21,6 +21,7 @@ Contents:
intel/i40e
intel/iavf
intel/ice
+ google/gve
mellanox/mlx5
.. only:: subproject
diff --git a/Documentation/networking/device_drivers/mellanox/mlx5.rst b/Documentation/networking/device_drivers/mellanox/mlx5.rst
index 4eeef2df912f..214325897732 100644
--- a/Documentation/networking/device_drivers/mellanox/mlx5.rst
+++ b/Documentation/networking/device_drivers/mellanox/mlx5.rst
@@ -10,6 +10,7 @@ Contents
========
- `Enabling the driver and kconfig options`_
+- `Devlink info`_
- `Devlink health reporters`_
Enabling the driver and kconfig options
@@ -101,6 +102,24 @@ Enabling the driver and kconfig options
- CONFIG_VXLAN: When chosen, mlx5 vxaln support will be enabled.
- CONFIG_MLXFW: When chosen, mlx5 firmware flashing support will be enabled (via devlink and ethtool).
+Devlink info
+============
+
+The devlink info reports the running and stored firmware versions on device.
+It also prints the device PSID which represents the HCA board type ID.
+
+User command example::
+
+ $ devlink dev info pci/0000:00:06.0
+ pci/0000:00:06.0:
+ driver mlx5_core
+ versions:
+ fixed:
+ fw.psid MT_0000000009
+ running:
+ fw.version 16.26.0100
+ stored:
+ fw.version 16.26.0100
Devlink health reporters
========================
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index e0d8a96e2c67..f0e6d1f53485 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1452,7 +1452,7 @@ flowlabel_reflect - INTEGER
environments. See RFC 7690 and:
https://tools.ietf.org/html/draft-wang-6man-flow-label-reflection-01
- This is a mask of two bits.
+ This is a bitmask.
1: enabled for established flows
Note that this prevents automatic flowlabel changes, as done
@@ -1463,6 +1463,8 @@ flowlabel_reflect - INTEGER
If set, a RST packet sent in response to a SYN packet on a closed
port will reflect the incoming flow label.
+ 4: enabled for ICMPv6 echo reply messages.
+
Default: 0
fib_multipath_hash_policy - INTEGER
diff --git a/MAINTAINERS b/MAINTAINERS
index a75f8478b872..449e7cdb3303 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1140,6 +1140,15 @@ L: [email protected]
S: Maintained
F: drivers/media/i2c/aptina-pll.*
+AQUANTIA ETHERNET DRIVER (atlantic)
+M: Igor Russkikh <[email protected]>
+S: Supported
+W: http://www.aquantia.com
+Q: http://patchwork.ozlabs.org/project/netdev/list/
+F: drivers/net/ethernet/aquantia/atlantic/
+F: Documentation/networking/device_drivers/aquantia/atlantic.txt
+
ARC FRAMEBUFFER DRIVER
M: Jaya Kumar <[email protected]>
S: Maintained
@@ -3122,6 +3131,7 @@ F: arch/arm/mach-bcm/
BROADCOM BCM2835 ARM ARCHITECTURE
M: Eric Anholt <[email protected]>
M: Stefan Wahren <[email protected]>
L: [email protected] (moderated for non-subscribers)
L: [email protected] (moderated for non-subscribers)
T: git git://github.com/anholt/linux
@@ -3151,6 +3161,7 @@ F: arch/arm/boot/dts/bcm953012*
BROADCOM BCM53573 ARM ARCHITECTURE
M: RafaÅ‚ MiÅ‚ecki <[email protected]>
S: Maintained
F: arch/arm/boot/dts/bcm53573*
@@ -5593,7 +5604,8 @@ F: include/linux/dynamic_debug.h
DYNAMIC INTERRUPT MODERATION
M: Tal Gilboa <[email protected]>
S: Maintained
-F: include/linux/net_dim.h
+F: include/linux/dim.h
+F: lib/dim/
DZ DECSTATION DZ11 SERIAL DRIVER
M: "Maciej W. Rozycki" <[email protected]>
@@ -6708,6 +6720,15 @@ L: [email protected]
S: Maintained
F: drivers/input/touchscreen/goodix.c
+GOOGLE ETHERNET DRIVERS
+M: Catherine Sullivan <[email protected]>
+R: Sagi Shahar <[email protected]>
+R: Jon Olson <[email protected]>
+S: Supported
+F: Documentation/networking/device_drivers/google/gve.txt
+F: drivers/net/ethernet/google
+
GPD POCKET FAN DRIVER
M: Hans de Goede <[email protected]>
diff --git a/Makefile b/Makefile
index 9514dac2660a..7a7c17eb0cbf 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 2
SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc6
NAME = Golden Lions
# *DOCUMENTATION*
diff --git a/arch/arm/boot/dts/gemini-dlink-dir-685.dts b/arch/arm/boot/dts/gemini-dlink-dir-685.dts
index cfbfbc91a1e1..3613f05f8a80 100644
--- a/arch/arm/boot/dts/gemini-dlink-dir-685.dts
+++ b/arch/arm/boot/dts/gemini-dlink-dir-685.dts
@@ -20,7 +20,7 @@
};
chosen {
- bootargs = "console=ttyS0,19200n8 root=/dev/sda1 rw rootwait";
+ bootargs = "console=ttyS0,19200n8 root=/dev/sda1 rw rootwait consoleblank=300";
stdout-path = "uart0:19200n8";
};
diff --git a/arch/arm/boot/dts/gemini-dlink-dns-313.dts b/arch/arm/boot/dts/gemini-dlink-dns-313.dts
index b12504e10f0b..360642a02a48 100644
--- a/arch/arm/boot/dts/gemini-dlink-dns-313.dts
+++ b/arch/arm/boot/dts/gemini-dlink-dns-313.dts
@@ -11,7 +11,7 @@
/ {
model = "D-Link DNS-313 1-Bay Network Storage Enclosure";
- compatible = "dlink,dir-313", "cortina,gemini";
+ compatible = "dlink,dns-313", "cortina,gemini";
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi
index bbf010c73336..a7f6d1d58e20 100644
--- a/arch/arm/boot/dts/imx6ul.dtsi
+++ b/arch/arm/boot/dts/imx6ul.dtsi
@@ -358,7 +358,7 @@
pwm1: pwm@2080000 {
compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
reg = <0x02080000 0x4000>;
- interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX6UL_CLK_PWM1>,
<&clks IMX6UL_CLK_PWM1>;
clock-names = "ipg", "per";
@@ -369,7 +369,7 @@
pwm2: pwm@2084000 {
compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
reg = <0x02084000 0x4000>;
- interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX6UL_CLK_PWM2>,
<&clks IMX6UL_CLK_PWM2>;
clock-names = "ipg", "per";
@@ -380,7 +380,7 @@
pwm3: pwm@2088000 {
compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
reg = <0x02088000 0x4000>;
- interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX6UL_CLK_PWM3>,
<&clks IMX6UL_CLK_PWM3>;
clock-names = "ipg", "per";
@@ -391,7 +391,7 @@
pwm4: pwm@208c000 {
compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
reg = <0x0208c000 0x4000>;
- interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX6UL_CLK_PWM4>,
<&clks IMX6UL_CLK_PWM4>;
clock-names = "ipg", "per";
diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi
index 7ef442462ea4..40c11b6b217a 100644
--- a/arch/arm/boot/dts/meson8.dtsi
+++ b/arch/arm/boot/dts/meson8.dtsi
@@ -248,8 +248,8 @@
<GIC_SPI 167 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 172 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 173 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 172 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 173 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>,
@@ -264,7 +264,6 @@
clocks = <&clkc CLKID_CLK81>, <&clkc CLKID_MALI>;
clock-names = "bus", "core";
operating-points-v2 = <&gpu_opp_table>;
- switch-delay = <0xffff>;
};
};
}; /* end of / */
diff --git a/arch/arm/boot/dts/meson8b.dtsi b/arch/arm/boot/dts/meson8b.dtsi
index 800cd65fc50a..ec67f49116d9 100644
--- a/arch/arm/boot/dts/meson8b.dtsi
+++ b/arch/arm/boot/dts/meson8b.dtsi
@@ -163,23 +163,23 @@
opp-255000000 {
opp-hz = /bits/ 64 <255000000>;
- opp-microvolt = <1150000>;
+ opp-microvolt = <1100000>;
};
opp-364300000 {
opp-hz = /bits/ 64 <364300000>;
- opp-microvolt = <1150000>;
+ opp-microvolt = <1100000>;
};
opp-425000000 {
opp-hz = /bits/ 64 <425000000>;
- opp-microvolt = <1150000>;
+ opp-microvolt = <1100000>;
};
opp-510000000 {
opp-hz = /bits/ 64 <510000000>;
- opp-microvolt = <1150000>;
+ opp-microvolt = <1100000>;
};
opp-637500000 {
opp-hz = /bits/ 64 <637500000>;
- opp-microvolt = <1150000>;
+ opp-microvolt = <1100000>;
turbo-mode;
};
};
@@ -229,7 +229,6 @@
clocks = <&clkc CLKID_CLK81>, <&clkc CLKID_MALI>;
clock-names = "bus", "core";
operating-points-v2 = <&gpu_opp_table>;
- switch-delay = <0xffff>;
};
};
}; /* end of / */
diff --git a/arch/arm/mach-omap2/prm3xxx.c b/arch/arm/mach-omap2/prm3xxx.c
index fd4a3bf27993..1b442b128569 100644
--- a/arch/arm/mach-omap2/prm3xxx.c
+++ b/arch/arm/mach-omap2/prm3xxx.c
@@ -430,7 +430,7 @@ static void omap3_prm_reconfigure_io_chain(void)
* registers, and omap3xxx_prm_reconfigure_io_chain() must be called.
* No return value.
*/
-static void __init omap3xxx_prm_enable_io_wakeup(void)
+static void omap3xxx_prm_enable_io_wakeup(void)
{
if (prm_features & PRM_HAS_IO_WAKEUP)
omap2_prm_set_mod_reg_bits(OMAP3430_EN_IO_MASK, WKUP_MOD,
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
index 4cdf84c63320..22a1c74dddf3 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
@@ -28,7 +28,7 @@
enable-method = "psci";
clocks = <&clockgen 1 0>;
next-level-cache = <&l2>;
- cpu-idle-states = <&CPU_PH20>;
+ cpu-idle-states = <&CPU_PW20>;
};
cpu1: cpu@1 {
@@ -38,7 +38,7 @@
enable-method = "psci";
clocks = <&clockgen 1 0>;
next-level-cache = <&l2>;
- cpu-idle-states = <&CPU_PH20>;
+ cpu-idle-states = <&CPU_PW20>;
};
l2: l2-cache {
@@ -53,13 +53,13 @@
*/
entry-method = "arm,psci";
- CPU_PH20: cpu-ph20 {
- compatible = "arm,idle-state";
- idle-state-name = "PH20";
- arm,psci-suspend-param = <0x00010000>;
- entry-latency-us = <1000>;
- exit-latency-us = <1000>;
- min-residency-us = <3000>;
+ CPU_PW20: cpu-pw20 {
+ compatible = "arm,idle-state";
+ idle-state-name = "PW20";
+ arm,psci-suspend-param = <0x0>;
+ entry-latency-us = <2000>;
+ exit-latency-us = <2000>;
+ min-residency-us = <6000>;
};
};
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 4d583514258c..6bca5b082ea4 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -613,6 +613,7 @@ CONFIG_RTC_DRV_TEGRA=y
CONFIG_RTC_DRV_IMX_SC=m
CONFIG_RTC_DRV_XGENE=y
CONFIG_DMADEVICES=y
+CONFIG_FSL_EDMA=y
CONFIG_DMA_BCM2835=m
CONFIG_K3_DMA=y
CONFIG_MV_XOR=y
diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c
index 04a43cfd4e09..d47a3381aad8 100644
--- a/arch/csky/kernel/signal.c
+++ b/arch/csky/kernel/signal.c
@@ -39,6 +39,11 @@ static int save_fpu_state(struct sigcontext __user *sc)
#endif
struct rt_sigframe {
+ /*
+ * pad[3] is compatible with the same struct defined in
+ * gcc/libgcc/config/csky/linux-unwind.h
+ */
+ int pad[3];
struct siginfo info;
struct ucontext uc;
};
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index f241ded9239b..1f0f29a289d3 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -786,6 +786,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
/* 32-bit PC relative address */
*loc = val - dot - 8 + addend;
break;
+ case R_PARISC_PCREL64:
+ /* 64-bit PC relative address */
+ *loc64 = val - dot - 8 + addend;
+ break;
case R_PARISC_DIR64:
/* 64-bit effective address */
*loc64 = val + addend;
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index b8286a2013b4..0d52f57fca04 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -319,6 +319,13 @@ struct vm_area_struct;
#endif /* __ASSEMBLY__ */
#include <asm/slice.h>
+/*
+ * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks.
+ */
+#ifdef CONFIG_PPC32
+#define ARCH_ZONE_DMA_BITS 30
+#else
#define ARCH_ZONE_DMA_BITS 31
+#endif
#endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 1d5f1bd0dacd..f255e22184b4 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -752,6 +752,7 @@ __secondary_start:
stw r0,0(r3)
/* load up the MMU */
+ bl load_segment_registers
bl load_up_mmu
/* ptr to phys current thread */
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index bfeb469e8106..2ae635df9026 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -83,7 +83,7 @@ END_BTB_FLUSH_SECTION
SAVE_4GPRS(3, r11); \
SAVE_2GPRS(7, r11)
-.macro SYSCALL_ENTRY trapno intno
+.macro SYSCALL_ENTRY trapno intno srr1
mfspr r10, SPRN_SPRG_THREAD
#ifdef CONFIG_KVM_BOOKE_HV
BEGIN_FTR_SECTION
@@ -94,7 +94,7 @@ BEGIN_FTR_SECTION
mfspr r11, SPRN_SRR1
mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */
bf 3, 1975f
- b kvmppc_handler_BOOKE_INTERRUPT_\intno\()_SPRN_SRR1
+ b kvmppc_handler_\intno\()_\srr1
1975:
mr r12, r13
lwz r13, THREAD_NORMSAVE(2)(r10)
@@ -145,9 +145,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
tophys(r11,r11)
addi r11,r11,global_dbcr0@l
#ifdef CONFIG_SMP
- lwz r9,TASK_CPU(r2)
- slwi r9,r9,3
- add r11,r11,r9
+ lwz r10, TASK_CPU(r2)
+ slwi r10, r10, 3
+ add r11, r11, r10
#endif
lwz r12,0(r11)
mtspr SPRN_DBCR0,r12
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 0bf4651380f3..adf0505dbe02 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -409,7 +409,7 @@ interrupt_base:
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
- SYSCALL_ENTRY 0xc00 SYSCALL
+ SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL SPRN_SRR1
/* Auxiliary Processor Unavailable Interrupt */
EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 41f93dbcd29f..cb05ccc8bc6a 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -830,6 +830,7 @@ static void flush_guest_tlb(struct kvm *kvm)
}
}
asm volatile("ptesync": : :"memory");
+ asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
}
void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index d885a5831daa..337e64468d78 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2500,17 +2500,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
LOAD_REG_ADDR(r11, dawr_force_enable)
lbz r11, 0(r11)
cmpdi r11, 0
+ bne 3f
li r3, H_HARDWARE
- beqlr
+ blr
+3:
/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
rlwimi r5, r4, 2, DAWRX_WT
clrrdi r4, r4, 3
std r4, VCPU_DAWR(r3)
std r5, VCPU_DAWRX(r3)
+ /*
+ * If came in through the real mode hcall handler then it is necessary
+ * to write the registers since the return path won't. Otherwise it is
+ * sufficient to store then in the vcpu struct as they will be loaded
+ * next time the vcpu is run.
+ */
+ mfmsr r6
+ andi. r6, r6, MSR_DR /* in real mode? */
+ bne 4f
mtspr SPRN_DAWR, r4
mtspr SPRN_DAWRX, r5
- li r3, 0
+4: li r3, 0
blr
_GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */
diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c
index bb70391401f7..794404d50a85 100644
--- a/arch/powerpc/mm/book3s64/mmu_context.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -50,20 +50,52 @@ EXPORT_SYMBOL_GPL(hash__alloc_context_id);
void slb_setup_new_exec(void);
+static int realloc_context_ids(mm_context_t *ctx)
+{
+ int i, id;
+
+ /*
+ * id 0 (aka. ctx->id) is special, we always allocate a new one, even if
+ * there wasn't one allocated previously (which happens in the exec
+ * case where ctx is newly allocated).
+ *
+ * We have to be a bit careful here. We must keep the existing ids in
+ * the array, so that we can test if they're non-zero to decide if we
+ * need to allocate a new one. However in case of error we must free the
+ * ids we've allocated but *not* any of the existing ones (or risk a
+ * UAF). That's why we decrement i at the start of the error handling
+ * loop, to skip the id that we just tested but couldn't reallocate.
+ */
+ for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) {
+ if (i == 0 || ctx->extended_id[i]) {
+ id = hash__alloc_context_id();
+ if (id < 0)
+ goto error;
+
+ ctx->extended_id[i] = id;
+ }
+ }
+
+ /* The caller expects us to return id */
+ return ctx->id;
+
+error:
+ for (i--; i >= 0; i--) {
+ if (ctx->extended_id[i])
+ ida_free(&mmu_context_ida, ctx->extended_id[i]);
+ }
+
+ return id;
+}
+
static int hash__init_new_context(struct mm_struct *mm)
{
int index;
- index = hash__alloc_context_id();
- if (index < 0)
- return index;
-
mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context),
GFP_KERNEL);
- if (!mm->context.hash_context) {
- ida_free(&mmu_context_ida, index);
+ if (!mm->context.hash_context)
return -ENOMEM;
- }
/*
* The old code would re-promote on fork, we don't do that when using
@@ -91,13 +123,20 @@ static int hash__init_new_context(struct mm_struct *mm)
mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table),
GFP_KERNEL);
if (!mm->context.hash_context->spt) {
- ida_free(&mmu_context_ida, index);
kfree(mm->context.hash_context);
return -ENOMEM;
}
}
#endif
+ }
+ index = realloc_context_ids(&mm->context);
+ if (index < 0) {
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+ kfree(mm->context.hash_context->spt);
+#endif
+ kfree(mm->context.hash_context);
+ return index;
}
pkey_mm_init(mm);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index cba29131bccc..2540d3b2588c 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -248,7 +248,8 @@ void __init paging_init(void)
(long int)((top_of_ram - total_ram) >> 20));
#ifdef CONFIG_ZONE_DMA
- max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 0x7fffffffUL >> PAGE_SHIFT);
+ max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
+ ((1UL << ARCH_ZONE_DMA_BITS) - 1) >> PAGE_SHIFT);
#endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_HIGHMEM
diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig
index f834a19ed772..c02d8c503b29 100644
--- a/arch/powerpc/platforms/powermac/Kconfig
+++ b/arch/powerpc/platforms/powermac/Kconfig
@@ -7,6 +7,7 @@ config PPC_PMAC
select PPC_INDIRECT_PCI if PPC32
select PPC_MPC106 if PPC32
select PPC_NATIVE
+ select ZONE_DMA if PPC32
default y
config PPC_PMAC64
diff --git a/drivers/auxdisplay/cfag12864bfb.c b/drivers/auxdisplay/cfag12864bfb.c
index 40c8a552a478..4074886b7bc8 100644
--- a/drivers/auxdisplay/cfag12864bfb.c
+++ b/drivers/auxdisplay/cfag12864bfb.c
@@ -52,8 +52,9 @@ static const struct fb_var_screeninfo cfag12864bfb_var = {
static int cfag12864bfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
{
- return vm_insert_page(vma, vma->vm_start,
- virt_to_page(cfag12864b_buffer));
+ struct page *pages = virt_to_page(cfag12864b_buffer);
+
+ return vm_map_pages_zero(vma, &pages, 1);
}
static struct fb_ops cfag12864bfb_ops = {
diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c
index 21393ec3b9a4..9c0bb771751d 100644
--- a/drivers/auxdisplay/ht16k33.c
+++ b/drivers/auxdisplay/ht16k33.c
@@ -223,9 +223,9 @@ static const struct backlight_ops ht16k33_bl_ops = {
static int ht16k33_mmap(struct fb_info *info, struct vm_area_struct *vma)
{
struct ht16k33_priv *priv = info->par;
+ struct page *pages = virt_to_page(priv->fbdev.buffer);
- return vm_insert_page(vma, vma->vm_start,
- virt_to_page(priv->fbdev.buffer));
+ return vm_map_pages_zero(vma, &pages, 1);
}
static struct fb_ops ht16k33_fb_ops = {
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index aa51756fd4d6..87b410d6e51d 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -368,7 +368,7 @@ static struct clk_core *clk_core_get(struct clk_core *core, u8 p_index)
const char *dev_id = dev ? dev_name(dev) : NULL;
struct device_node *np = core->of_node;
- if (np && index >= 0)
+ if (np && (name || index >= 0))
hw = of_clk_get_hw(np, index, name);
/*
diff --git a/drivers/clk/meson/g12a.c b/drivers/clk/meson/g12a.c
index 739f64fdf1e3..206fafd299ea 100644
--- a/drivers/clk/meson/g12a.c
+++ b/drivers/clk/meson/g12a.c
@@ -2734,8 +2734,8 @@ static struct clk_hw_onecell_data g12a_hw_onecell_data = {
[CLKID_MALI_1_DIV] = &g12a_mali_1_div.hw,
[CLKID_MALI_1] = &g12a_mali_1.hw,
[CLKID_MALI] = &g12a_mali.hw,
- [CLKID_MPLL_5OM_DIV] = &g12a_mpll_50m_div.hw,
- [CLKID_MPLL_5OM] = &g12a_mpll_50m.hw,
+ [CLKID_MPLL_50M_DIV] = &g12a_mpll_50m_div.hw,
+ [CLKID_MPLL_50M] = &g12a_mpll_50m.hw,
[CLKID_SYS_PLL_DIV16_EN] = &g12a_sys_pll_div16_en.hw,
[CLKID_SYS_PLL_DIV16] = &g12a_sys_pll_div16.hw,
[CLKID_CPU_CLK_DYN0_SEL] = &g12a_cpu_clk_premux0.hw,
diff --git a/drivers/clk/meson/g12a.h b/drivers/clk/meson/g12a.h
index 39c41af70804..bcc05cd9882f 100644
--- a/drivers/clk/meson/g12a.h
+++ b/drivers/clk/meson/g12a.h
@@ -166,7 +166,7 @@
#define CLKID_HDMI_DIV 167
#define CLKID_MALI_0_DIV 170
#define CLKID_MALI_1_DIV 173
-#define CLKID_MPLL_5OM_DIV 176
+#define CLKID_MPLL_50M_DIV 176
#define CLKID_SYS_PLL_DIV16_EN 178
#define CLKID_SYS_PLL_DIV16 179
#define CLKID_CPU_CLK_DYN0_SEL 180
diff --git a/drivers/clk/meson/meson8b.c b/drivers/clk/meson/meson8b.c
index 37cf0f01bb5d..62cd3a7f1f65 100644
--- a/drivers/clk/meson/meson8b.c
+++ b/drivers/clk/meson/meson8b.c
@@ -1761,7 +1761,7 @@ static struct clk_regmap meson8m2_gp_pll = {
},
};
-static const char * const mmeson8b_vpu_0_1_parent_names[] = {
+static const char * const meson8b_vpu_0_1_parent_names[] = {
"fclk_div4", "fclk_div3", "fclk_div5", "fclk_div7"
};
@@ -1778,8 +1778,8 @@ static struct clk_regmap meson8b_vpu_0_sel = {
.hw.init = &(struct clk_init_data){
.name = "vpu_0_sel",
.ops = &clk_regmap_mux_ops,
- .parent_names = mmeson8b_vpu_0_1_parent_names,
- .num_parents = ARRAY_SIZE(mmeson8b_vpu_0_1_parent_names),
+ .parent_names = meson8b_vpu_0_1_parent_names,
+ .num_parents = ARRAY_SIZE(meson8b_vpu_0_1_parent_names),
.flags = CLK_SET_RATE_PARENT,
},
};
@@ -1837,8 +1837,8 @@ static struct clk_regmap meson8b_vpu_1_sel = {
.hw.init = &(struct clk_init_data){
.name = "vpu_1_sel",
.ops = &clk_regmap_mux_ops,
- .parent_names = mmeson8b_vpu_0_1_parent_names,
- .num_parents = ARRAY_SIZE(mmeson8b_vpu_0_1_parent_names),
+ .parent_names = meson8b_vpu_0_1_parent_names,
+ .num_parents = ARRAY_SIZE(meson8b_vpu_0_1_parent_names),
.flags = CLK_SET_RATE_PARENT,
},
};
diff --git a/drivers/clk/socfpga/clk-s10.c b/drivers/clk/socfpga/clk-s10.c
index 8281dfbf38c2..5bed36e12951 100644
--- a/drivers/clk/socfpga/clk-s10.c
+++ b/drivers/clk/socfpga/clk-s10.c
@@ -103,9 +103,9 @@ static const struct stratix10_perip_cnt_clock s10_main_perip_cnt_clks[] = {
{ STRATIX10_NOC_CLK, "noc_clk", NULL, noc_mux, ARRAY_SIZE(noc_mux),
0, 0, 0, 0x3C, 1},
{ STRATIX10_EMAC_A_FREE_CLK, "emaca_free_clk", NULL, emaca_free_mux, ARRAY_SIZE(emaca_free_mux),
- 0, 0, 4, 0xB0, 0},
+ 0, 0, 2, 0xB0, 0},
{ STRATIX10_EMAC_B_FREE_CLK, "emacb_free_clk", NULL, emacb_free_mux, ARRAY_SIZE(emacb_free_mux),
- 0, 0, 4, 0xB0, 1},
+ 0, 0, 2, 0xB0, 1},
{ STRATIX10_EMAC_PTP_FREE_CLK, "emac_ptp_free_clk", NULL, emac_ptp_free_mux,
ARRAY_SIZE(emac_ptp_free_mux), 0, 0, 4, 0xB0, 2},
{ STRATIX10_GPIO_DB_FREE_CLK, "gpio_db_free_clk", NULL, gpio_db_free_mux,
diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index e1ba62d2b1a0..ac1d27a8c650 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -3366,6 +3366,8 @@ static struct tegra_clk_init_table init_table[] __initdata = {
{ TEGRA210_CLK_I2S3_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 },
{ TEGRA210_CLK_I2S4_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 },
{ TEGRA210_CLK_VIMCLK_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 },
+ { TEGRA210_CLK_HDA, TEGRA210_CLK_PLL_P, 51000000, 0 },
+ { TEGRA210_CLK_HDA2CODEC_2X, TEGRA210_CLK_PLL_P, 48000000, 0 },
/* This MUST be the last entry. */
{ TEGRA210_CLK_CLK_MAX, TEGRA210_CLK_CLK_MAX, 0, 0 },
};
diff --git a/drivers/clk/ti/clkctrl.c b/drivers/clk/ti/clkctrl.c
index 8e834317c97d..975995eea15c 100644
--- a/drivers/clk/ti/clkctrl.c
+++ b/drivers/clk/ti/clkctrl.c
@@ -229,6 +229,7 @@ static struct clk_hw *_ti_omap4_clkctrl_xlate(struct of_phandle_args *clkspec,
{
struct omap_clkctrl_provider *provider = data;
struct omap_clkctrl_clk *entry;
+ bool found = false;
if (clkspec->args_count != 2)
return ERR_PTR(-EINVAL);
@@ -238,11 +239,13 @@ static struct clk_hw *_ti_omap4_clkctrl_xlate(struct of_phandle_args *clkspec,
list_for_each_entry(entry, &provider->clocks, node) {
if (entry->reg_offset == clkspec->args[0] &&
- entry->bit_offset == clkspec->args[1])
+ entry->bit_offset == clkspec->args[1]) {
+ found = true;
break;
+ }
}
- if (!entry)
+ if (!found)
return ERR_PTR(-EINVAL);
return entry->clk;
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index eac0c54c5970..b032d3899fa3 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -80,6 +80,7 @@
#define HID_DEVICE_ID_ALPS_U1_DUAL_3BTN_PTP 0x1220
#define HID_DEVICE_ID_ALPS_U1 0x1215
#define HID_DEVICE_ID_ALPS_T4_BTNLESS 0x120C
+#define HID_DEVICE_ID_ALPS_1222 0x1222
#define USB_VENDOR_ID_AMI 0x046b
@@ -269,6 +270,7 @@
#define USB_DEVICE_ID_CHICONY_MULTI_TOUCH 0xb19d
#define USB_DEVICE_ID_CHICONY_WIRELESS 0x0618
#define USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE 0x1053
+#define USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE2 0x0939
#define USB_DEVICE_ID_CHICONY_WIRELESS2 0x1123
#define USB_DEVICE_ID_ASUS_AK1D 0x1125
#define USB_DEVICE_ID_CHICONY_TOSHIBA_WT10A 0x1408
@@ -569,6 +571,7 @@
#define USB_VENDOR_ID_HUION 0x256c
#define USB_DEVICE_ID_HUION_TABLET 0x006e
+#define USB_DEVICE_ID_HUION_HS64 0x006d
#define USB_VENDOR_ID_IBM 0x04b3
#define USB_DEVICE_ID_IBM_SCROLLPOINT_III 0x3100
diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
index e564bff86515..bfcf2ee58d14 100644
--- a/drivers/hid/hid-logitech-dj.c
+++ b/drivers/hid/hid-logitech-dj.c
@@ -30,6 +30,7 @@
#define REPORT_ID_HIDPP_SHORT 0x10
#define REPORT_ID_HIDPP_LONG 0x11
+#define REPORT_ID_HIDPP_VERY_LONG 0x12
#define HIDPP_REPORT_SHORT_LENGTH 7
#define HIDPP_REPORT_LONG_LENGTH 20
@@ -1242,7 +1243,8 @@ static int logi_dj_ll_raw_request(struct hid_device *hid,
int ret;
if ((buf[0] == REPORT_ID_HIDPP_SHORT) ||
- (buf[0] == REPORT_ID_HIDPP_LONG)) {
+ (buf[0] == REPORT_ID_HIDPP_LONG) ||
+ (buf[0] == REPORT_ID_HIDPP_VERY_LONG)) {
if (count < 2)
return -EINVAL;
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 5df5dd56ecc8..b603c14d043b 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -1776,6 +1776,10 @@ static const struct hid_device_id mt_devices[] = {
HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
USB_VENDOR_ID_ALPS_JP,
HID_DEVICE_ID_ALPS_U1_DUAL_3BTN_PTP) },
+ { .driver_data = MT_CLS_WIN_8_DUAL,
+ HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
+ USB_VENDOR_ID_ALPS_JP,
+ HID_DEVICE_ID_ALPS_1222) },
/* Lenovo X1 TAB Gen 2 */
{ .driver_data = MT_CLS_WIN_8_DUAL,
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index e5ca6fe2ca57..671a285724f9 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -42,6 +42,7 @@ static const struct hid_device_id hid_quirks[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_UC100KM), HID_QUIRK_NOGET },
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_MULTI_TOUCH), HID_QUIRK_MULTI_INPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE2), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS), HID_QUIRK_MULTI_INPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_CHIC, USB_DEVICE_ID_CHIC_GAMEPAD), HID_QUIRK_BADPAD },
{ HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_3AXIS_5BUTTON_STICK), HID_QUIRK_NOGET },
diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c
index 8fe02d81265d..914fb527ae7a 100644
--- a/drivers/hid/hid-uclogic-core.c
+++ b/drivers/hid/hid-uclogic-core.c
@@ -369,6 +369,8 @@ static const struct hid_device_id uclogic_devices[] = {
USB_DEVICE_ID_UCLOGIC_TABLET_TWHA60) },
{ HID_USB_DEVICE(USB_VENDOR_ID_HUION,
USB_DEVICE_ID_HUION_TABLET) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_HUION,
+ USB_DEVICE_ID_HUION_HS64) },
{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
USB_DEVICE_ID_HUION_TABLET) },
{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
diff --git a/drivers/hid/hid-uclogic-params.c b/drivers/hid/hid-uclogic-params.c
index 0187c9f8fc22..273d784fff66 100644
--- a/drivers/hid/hid-uclogic-params.c
+++ b/drivers/hid/hid-uclogic-params.c
@@ -977,6 +977,8 @@ int uclogic_params_init(struct uclogic_params *params,
/* FALL THROUGH */
case VID_PID(USB_VENDOR_ID_HUION,
USB_DEVICE_ID_HUION_TABLET):
+ case VID_PID(USB_VENDOR_ID_HUION,
+ USB_DEVICE_ID_HUION_HS64):
case VID_PID(USB_VENDOR_ID_UCLOGIC,
USB_DEVICE_ID_HUION_TABLET):
case VID_PID(USB_VENDOR_ID_UCLOGIC,
diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
index 22ba21457035..aa2dbed30fc3 100644
--- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
+++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
@@ -816,9 +816,9 @@ static int load_fw_from_host(struct ishtp_cl_data *client_data)
goto end_err_fw_release;
release_firmware(fw);
- kfree(filename);
dev_info(cl_data_to_dev(client_data), "ISH firmware %s loaded\n",
filename);
+ kfree(filename);
return 0;
end_err_fw_release:
diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
index c0487b34d2cf..6ba944b40fdb 100644
--- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c
+++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
@@ -891,7 +891,7 @@ static int hid_ishtp_cl_reset(struct ishtp_cl_device *cl_device)
*/
static int hid_ishtp_cl_suspend(struct device *device)
{
- struct ishtp_cl_device *cl_device = dev_get_drvdata(device);
+ struct ishtp_cl_device *cl_device = ishtp_dev_to_cl_device(device);
struct ishtp_cl *hid_ishtp_cl = ishtp_get_drvdata(cl_device);
struct ishtp_cl_data *client_data = ishtp_get_client_data(hid_ishtp_cl);
@@ -912,7 +912,7 @@ static int hid_ishtp_cl_suspend(struct device *device)
*/
static int hid_ishtp_cl_resume(struct device *device)
{
- struct ishtp_cl_device *cl_device = dev_get_drvdata(device);
+ struct ishtp_cl_device *cl_device = ishtp_dev_to_cl_device(device);
struct ishtp_cl *hid_ishtp_cl = ishtp_get_drvdata(cl_device);
struct ishtp_cl_data *client_data = ishtp_get_client_data(hid_ishtp_cl);
diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c
index 794e700d65f7..c47c3328a0f4 100644
--- a/drivers/hid/intel-ish-hid/ishtp/bus.c
+++ b/drivers/hid/intel-ish-hid/ishtp/bus.c
@@ -471,7 +471,6 @@ static struct ishtp_cl_device *ishtp_bus_add_device(struct ishtp_device *dev,
}
ishtp_device_ready = true;
- dev_set_drvdata(&device->dev, device);
return device;
}
@@ -640,6 +639,20 @@ void *ishtp_get_drvdata(struct ishtp_cl_device *cl_device)
EXPORT_SYMBOL(ishtp_get_drvdata);
/**
+ * ishtp_dev_to_cl_device() - get ishtp_cl_device instance from device instance
+ * @device: device instance
+ *
+ * Get ish_cl_device instance which embeds device instance in it.
+ *
+ * Return: pointer to ishtp_cl_device instance
+ */
+struct ishtp_cl_device *ishtp_dev_to_cl_device(struct device *device)
+{
+ return to_ishtp_cl_device(device);
+}
+EXPORT_SYMBOL(ishtp_dev_to_cl_device);
+
+/**
* ishtp_bus_new_client() - Create a new client
* @dev: ISHTP device instance
*
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 2e2e65f00257..4efbbd2fce0c 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -37,7 +37,7 @@
#include "mlx5_ib.h"
#include "srq.h"
-static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
+static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe)
{
struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
@@ -522,9 +522,9 @@ repoll:
case MLX5_CQE_SIG_ERR:
sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
- read_lock(&dev->mdev->priv.mkey_table.lock);
- mmkey = __mlx5_mr_lookup(dev->mdev,
- mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
+ xa_lock(&dev->mdev->priv.mkey_table);
+ mmkey = xa_load(&dev->mdev->priv.mkey_table,
+ mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
mr = to_mibmr(mmkey);
get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
mr->sig->sig_err_exists = true;
@@ -537,7 +537,7 @@ repoll:
mr->sig->err_item.expected,
mr->sig->err_item.actual);
- read_unlock(&dev->mdev->priv.mkey_table.lock);
+ xa_unlock(&dev->mdev->priv.mkey_table);
goto repoll;
}
@@ -891,6 +891,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
int entries = attr->cqe;
int vector = attr->comp_vector;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
struct mlx5_ib_cq *cq;
int uninitialized_var(index);
int uninitialized_var(inlen);
@@ -958,7 +959,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN)
MLX5_SET(cqc, cqc, oi, 1);
- err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen);
+ err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, sizeof(out));
if (err)
goto err_cqb;
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 80b42d069328..931f587dfb8f 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1043,13 +1043,10 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj,
struct mlx5_ib_dev *dev,
void *in, void *out)
{
- struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr;
- unsigned long flags;
struct mlx5_core_mkey *mkey;
void *mkc;
u8 key;
- int err;
mkey = &devx_mr->mmkey;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
@@ -1062,11 +1059,8 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj,
mkey->pd = MLX5_GET(mkc, mkc, pd);
devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
- write_lock_irqsave(&table->lock, flags);
- err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key),
- mkey);
- write_unlock_irqrestore(&table->lock, flags);
- return err;
+ return xa_err(xa_store(&dev->mdev->priv.mkey_table,
+ mlx5_base_mkey(mkey->key), mkey, GFP_KERNEL));
}
static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
@@ -1117,12 +1111,8 @@ static void devx_free_indirect_mkey(struct rcu_head *rcu)
*/
static void devx_cleanup_mkey(struct devx_obj *obj)
{
- struct mlx5_mkey_table *table = &obj->mdev->priv.mkey_table;
- unsigned long flags;
-
- write_lock_irqsave(&table->lock, flags);
- radix_tree_delete(&table->tree, mlx5_base_mkey(obj->devx_mr.mmkey.key));
- write_unlock_irqrestore(&table->lock, flags);
+ xa_erase(&obj->mdev->priv.mkey_table,
+ mlx5_base_mkey(obj->devx_mr.mmkey.key));
}
static int devx_obj_cleanup(struct ib_uobject *uobject,
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
index 1fc302d41a53..b8841355fcd5 100644
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -65,11 +65,12 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
struct uverbs_attr_bundle *attrs)
{
- struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
+ struct mlx5_flow_context flow_context = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
struct mlx5_ib_flow_handler *flow_handler;
struct mlx5_ib_flow_matcher *fs_matcher;
struct ib_uobject **arr_flow_actions;
struct ib_uflow_resources *uflow_res;
+ struct mlx5_flow_act flow_act = {};
void *devx_obj;
int dest_id, dest_type;
void *cmd_in;
@@ -172,17 +173,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
arr_flow_actions[i]->object);
}
- ret = uverbs_copy_from(&flow_act.flow_tag, attrs,
+ ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
MLX5_IB_ATTR_CREATE_FLOW_TAG);
if (!ret) {
- if (flow_act.flow_tag >= BIT(24)) {
+ if (flow_context.flow_tag >= BIT(24)) {
ret = -EINVAL;
goto err_out;
}
- flow_act.flags |= FLOW_ACT_HAS_TAG;
+ flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
}
- flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act,
+ flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher,
+ &flow_context,
+ &flow_act,
counter_id,
cmd_in, inlen,
dest_id, dest_type);
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index aa9acebfcc23..74ce9249e75a 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -14,9 +14,10 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
int vport_index;
ibdev = mlx5_ib_get_uplink_ibdev(dev->priv.eswitch);
- vport_index = ibdev->free_port++;
+ vport_index = rep->vport_index;
ibdev->port[vport_index].rep = rep;
+ rep->rep_data[REP_IB].priv = ibdev;
write_lock(&ibdev->port[vport_index].roce.netdev_lock);
ibdev->port[vport_index].roce.netdev =
mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport);
@@ -28,7 +29,7 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
static int
mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
- int num_ports = MLX5_TOTAL_VPORTS(dev);
+ int num_ports = mlx5_eswitch_get_total_vports(dev);
const struct mlx5_ib_profile *profile;
struct mlx5_ib_dev *ibdev;
int vport_index;
@@ -50,7 +51,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
}
ibdev->is_rep = true;
- vport_index = ibdev->free_port++;
+ vport_index = rep->vport_index;
ibdev->port[vport_index].rep = rep;
ibdev->port[vport_index].roce.netdev =
mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport);
@@ -68,15 +69,18 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
static void
mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
{
- struct mlx5_ib_dev *dev;
-
- if (!rep->rep_data[REP_IB].priv ||
- rep->vport != MLX5_VPORT_UPLINK)
- return;
+ struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
+ struct mlx5_ib_port *port;
- dev = mlx5_ib_rep_to_dev(rep);
- __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+ port = &dev->port[rep->vport_index];
+ write_lock(&port->roce.netdev_lock);
+ port->roce.netdev = NULL;
+ write_unlock(&port->roce.netdev_lock);
rep->rep_data[REP_IB].priv = NULL;
+ port->rep = NULL;
+
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
}
static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
index 7a917e6d5c09..de43b423bafc 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.h
+++ b/drivers/infiniband/hw/mlx5/ib_rep.h
@@ -28,7 +28,7 @@ struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
#else /* CONFIG_MLX5_ESWITCH */
static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
{
- return SRIOV_NONE;
+ return MLX5_ESWITCH_NONE;
}
static inline
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 340290b883fe..ba312bf59c7a 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2666,11 +2666,15 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
}
}
-static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
- u32 *match_v, const union ib_flow_spec *ib_spec,
+static int parse_flow_attr(struct mlx5_core_dev *mdev,
+ struct mlx5_flow_spec *spec,
+ const union ib_flow_spec *ib_spec,
const struct ib_flow_attr *flow_attr,
struct mlx5_flow_act *action, u32 prev_type)
{
+ struct mlx5_flow_context *flow_context = &spec->flow_context;
+ u32 *match_c = spec->match_criteria;
+ u32 *match_v = spec->match_value;
void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
misc_parameters);
void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
@@ -2989,8 +2993,8 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
if (ib_spec->flow_tag.tag_id >= BIT(24))
return -EINVAL;
- action->flow_tag = ib_spec->flow_tag.tag_id;
- action->flags |= FLOW_ACT_HAS_TAG;
+ flow_context->flow_tag = ib_spec->flow_tag.tag_id;
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
break;
case IB_FLOW_SPEC_ACTION_DROP:
if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
@@ -3084,7 +3088,8 @@ is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
return VALID_SPEC_NA;
return is_crypto && is_ipsec &&
- (!egress || (!is_drop && !(flow_act->flags & FLOW_ACT_HAS_TAG))) ?
+ (!egress || (!is_drop &&
+ !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ?
VALID_SPEC_VALID : VALID_SPEC_INVALID;
}
@@ -3464,6 +3469,37 @@ free:
return ret;
}
+static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ void *misc;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw,
+ rep->vport));
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+
+ MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ }
+}
+
static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
const struct ib_flow_attr *flow_attr,
@@ -3473,7 +3509,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
{
struct mlx5_flow_table *ft = ft_prio->flow_table;
struct mlx5_ib_flow_handler *handler;
- struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
+ struct mlx5_flow_act flow_act = {};
struct mlx5_flow_spec *spec;
struct mlx5_flow_destination dest_arr[2] = {};
struct mlx5_flow_destination *rule_dst = dest_arr;
@@ -3504,8 +3540,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
}
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- err = parse_flow_attr(dev->mdev, spec->match_criteria,
- spec->match_value,
+ err = parse_flow_attr(dev->mdev, spec,
ib_flow, flow_attr, &flow_act,
prev_type);
if (err < 0)
@@ -3519,19 +3554,15 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
set_underlay_qp(dev, spec, underlay_qpn);
if (dev->is_rep) {
- void *misc;
+ struct mlx5_eswitch_rep *rep;
- if (!dev->port[flow_attr->port - 1].rep) {
+ rep = dev->port[flow_attr->port - 1].rep;
+ if (!rep) {
err = -EINVAL;
goto free;
}
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port,
- dev->port[flow_attr->port - 1].rep->vport);
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+ mlx5_ib_set_rule_source_port(dev, spec, rep);
}
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
@@ -3572,11 +3603,11 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
}
- if ((flow_act.flags & FLOW_ACT_HAS_TAG) &&
+ if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) &&
(flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
- flow_act.flow_tag, flow_attr->type);
+ spec->flow_context.flow_tag, flow_attr->type);
err = -EINVAL;
goto free;
}
@@ -3947,6 +3978,7 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
struct mlx5_flow_destination *dst,
struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context,
struct mlx5_flow_act *flow_act,
void *cmd_in, int inlen,
int dst_num)
@@ -3969,6 +4001,7 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
fs_matcher->mask_len);
spec->match_criteria_enable = fs_matcher->match_criteria_enable;
+ spec->flow_context = *flow_context;
handler->rule = mlx5_add_flow_rules(ft, spec,
flow_act, dst, dst_num);
@@ -4033,6 +4066,7 @@ static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
struct mlx5_ib_flow_handler *
mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context,
struct mlx5_flow_act *flow_act,
u32 counter_id,
void *cmd_in, int inlen, int dest_id,
@@ -4085,7 +4119,8 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
dst_num++;
}
- handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act,
+ handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher,
+ flow_context, flow_act,
cmd_in, inlen, dst_num);
if (IS_ERR(handler)) {
@@ -4457,7 +4492,7 @@ static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
* lock/unlock above locks Now need to arm all involved CQs.
*/
list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
- mcq->comp(mcq);
+ mcq->comp(mcq, NULL);
}
spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
}
@@ -6779,7 +6814,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
printk_once(KERN_INFO "%s", mlx5_version);
if (MLX5_ESWITCH_MANAGER(mdev) &&
- mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+ mlx5_ib_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) {
if (!mlx5_core_mp_enabled(mdev))
mlx5_ib_register_vport_reps(mdev);
return mdev;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 40eb8be482e4..ee73dc122d28 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -920,6 +920,7 @@ struct mlx5_ib_lb_state {
};
struct mlx5_ib_pf_eq {
+ struct notifier_block irq_nb;
struct mlx5_ib_dev *dev;
struct mlx5_eq *core;
struct work_struct work;
@@ -977,7 +978,6 @@ struct mlx5_ib_dev {
u16 devx_whitelist_uid;
struct mlx5_srq_table srq_table;
struct mlx5_async_ctx async_ctx;
- int free_port;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1316,6 +1316,7 @@ extern const struct uapi_definition mlx5_ib_devx_defs[];
extern const struct uapi_definition mlx5_ib_flow_defs[];
struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context,
struct mlx5_flow_act *flow_act, u32 counter_id,
void *cmd_in, int inlen, int dest_id, int dest_type);
bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 5f09699fab98..83b452d977d4 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -130,7 +130,7 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
struct mlx5_cache_ent *ent = &cache->ent[c];
u8 key;
unsigned long flags;
- struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
+ struct xarray *mkeys = &dev->mdev->priv.mkey_table;
int err;
spin_lock_irqsave(&ent->lock, flags);
@@ -158,12 +158,12 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
ent->size++;
spin_unlock_irqrestore(&ent->lock, flags);
- write_lock_irqsave(&table->lock, flags);
- err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key),
- &mr->mmkey);
+ xa_lock_irqsave(mkeys, flags);
+ err = xa_err(__xa_store(mkeys, mlx5_base_mkey(mr->mmkey.key),
+ &mr->mmkey, GFP_ATOMIC));
+ xa_unlock_irqrestore(mkeys, flags);
if (err)
pr_err("Error inserting to mkey tree. 0x%x\n", -err);
- write_unlock_irqrestore(&table->lock, flags);
if (!completion_done(&ent->compl))
complete(&ent->compl);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 91507a2e9290..831c450b271a 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -768,7 +768,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
bcnt -= *bytes_committed;
next_mr:
- mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(key));
+ mmkey = xa_load(&dev->mdev->priv.mkey_table, mlx5_base_mkey(key));
if (!mkey_is_eq(mmkey, key)) {
mlx5_ib_dbg(dev, "failed to find mkey %x\n", key);
ret = -EFAULT;
@@ -1488,9 +1488,11 @@ static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
mlx5_eq_update_ci(eq->core, cc, 1);
}
-static irqreturn_t mlx5_ib_eq_pf_int(int irq, void *eq_ptr)
+static int mlx5_ib_eq_pf_int(struct notifier_block *nb, unsigned long type,
+ void *data)
{
- struct mlx5_ib_pf_eq *eq = eq_ptr;
+ struct mlx5_ib_pf_eq *eq =
+ container_of(nb, struct mlx5_ib_pf_eq, irq_nb);
unsigned long flags;
if (spin_trylock_irqsave(&eq->lock, flags)) {
@@ -1553,20 +1555,26 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
goto err_mempool;
}
+ eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int;
param = (struct mlx5_eq_param) {
- .index = MLX5_EQ_PFAULT_IDX,
- .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
+ .irq_index = 0,
.nent = MLX5_IB_NUM_PF_EQE,
- .context = eq,
- .handler = mlx5_ib_eq_pf_int
};
- eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", &param);
+ param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_FAULT;
+ eq->core = mlx5_eq_create_generic(dev->mdev, &param);
if (IS_ERR(eq->core)) {
err = PTR_ERR(eq->core);
goto err_wq;
}
+ err = mlx5_eq_enable(dev->mdev, eq->core, &eq->irq_nb);
+ if (err) {
+ mlx5_ib_err(dev, "failed to enable odp EQ %d\n", err);
+ goto err_eq;
+ }
return 0;
+err_eq:
+ mlx5_eq_destroy_generic(dev->mdev, eq->core);
err_wq:
destroy_workqueue(eq->wq);
err_mempool:
@@ -1579,6 +1587,7 @@ mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
{
int err;
+ mlx5_eq_disable(dev->mdev, eq->core, &eq->irq_nb);
err = mlx5_eq_destroy_generic(dev->mdev, eq->core);
cancel_work_sync(&eq->work);
destroy_workqueue(eq->wq);
@@ -1677,8 +1686,8 @@ static void num_pending_prefetch_dec(struct mlx5_ib_dev *dev,
struct mlx5_core_mkey *mmkey;
struct mlx5_ib_mr *mr;
- mmkey = __mlx5_mr_lookup(dev->mdev,
- mlx5_base_mkey(sg_list[i].lkey));
+ mmkey = xa_load(&dev->mdev->priv.mkey_table,
+ mlx5_base_mkey(sg_list[i].lkey));
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
atomic_dec(&mr->num_pending_prefetch);
}
@@ -1697,8 +1706,8 @@ static bool num_pending_prefetch_inc(struct ib_pd *pd,
struct mlx5_core_mkey *mmkey;
struct mlx5_ib_mr *mr;
- mmkey = __mlx5_mr_lookup(dev->mdev,
- mlx5_base_mkey(sg_list[i].lkey));
+ mmkey = xa_load(&dev->mdev->priv.mkey_table,
+ mlx5_base_mkey(sg_list[i].lkey));
if (!mmkey || mmkey->key != sg_list[i].lkey) {
ret = false;
break;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index f6623c77443a..768c7e81f688 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -6297,7 +6297,7 @@ static void handle_drain_completion(struct ib_cq *cq,
/* Run the CQ handler - this makes sure that the drain WR will
* be processed if wasn't processed yet.
*/
- mcq->mcq.comp(&mcq->mcq);
+ mcq->mcq.comp(&mcq->mcq, NULL);
}
wait_for_completion(&sdrain->done);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 56297298d6ee..162b3236e72c 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2504,7 +2504,6 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
}
}
- spin_lock(&iommu->lock);
spin_lock_irqsave(&device_domain_lock, flags);
if (dev)
found = find_domain(dev);
@@ -2520,16 +2519,17 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (found) {
spin_unlock_irqrestore(&device_domain_lock, flags);
- spin_unlock(&iommu->lock);
free_devinfo_mem(info);
/* Caller must free the original domain */
return found;
}
+ spin_lock(&iommu->lock);
ret = domain_attach_iommu(domain, iommu);
+ spin_unlock(&iommu->lock);
+
if (ret) {
spin_unlock_irqrestore(&device_domain_lock, flags);
- spin_unlock(&iommu->lock);
free_devinfo_mem(info);
return NULL;
}
@@ -2539,7 +2539,6 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (dev)
dev->archdata.iommu = info;
spin_unlock_irqrestore(&device_domain_lock, flags);
- spin_unlock(&iommu->lock);
/* PASID table is mandatory for a PCI device in scalable mode. */
if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c
index 352e803f566e..728733a514c7 100644
--- a/drivers/md/dm-init.c
+++ b/drivers/md/dm-init.c
@@ -140,8 +140,8 @@ static char __init *dm_parse_table_entry(struct dm_device *dev, char *str)
return ERR_PTR(-EINVAL);
}
/* target_args */
- dev->target_args_array[n] = kstrndup(field[3], GFP_KERNEL,
- DM_MAX_STR_SIZE);
+ dev->target_args_array[n] = kstrndup(field[3], DM_MAX_STR_SIZE,
+ GFP_KERNEL);
if (!dev->target_args_array[n])
return ERR_PTR(-ENOMEM);
@@ -272,10 +272,10 @@ static int __init dm_init_init(void)
return 0;
if (strlen(create) >= DM_MAX_STR_SIZE) {
- DMERR("Argument is too big. Limit is %d\n", DM_MAX_STR_SIZE);
+ DMERR("Argument is too big. Limit is %d", DM_MAX_STR_SIZE);
return -EINVAL;
}
- str = kstrndup(create, GFP_KERNEL, DM_MAX_STR_SIZE);
+ str = kstrndup(create, DM_MAX_STR_SIZE, GFP_KERNEL);
if (!str)
return -ENOMEM;
@@ -283,7 +283,7 @@ static int __init dm_init_init(void)
if (r)
goto out;
- DMINFO("waiting for all devices to be available before creating mapped devices\n");
+ DMINFO("waiting for all devices to be available before creating mapped devices");
wait_for_device_probe();
list_for_each_entry(dev, &devices, list) {
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 9ea2b0291f20..e549392e0ea5 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -60,6 +60,7 @@
#define WRITE_LOG_VERSION 1ULL
#define WRITE_LOG_MAGIC 0x6a736677736872ULL
+#define WRITE_LOG_SUPER_SECTOR 0
/*
* The disk format for this is braindead simple.
@@ -115,6 +116,7 @@ struct log_writes_c {
struct list_head logging_blocks;
wait_queue_head_t wait;
struct task_struct *log_kthread;
+ struct completion super_done;
};
struct pending_block {
@@ -180,6 +182,14 @@ static void log_end_io(struct bio *bio)
bio_put(bio);
}
+static void log_end_super(struct bio *bio)
+{
+ struct log_writes_c *lc = bio->bi_private;
+
+ complete(&lc->super_done);
+ log_end_io(bio);
+}
+
/*
* Meant to be called if there is an error, it will free all the pages
* associated with the block.
@@ -215,7 +225,8 @@ static int write_metadata(struct log_writes_c *lc, void *entry,
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, lc->logdev->bdev);
- bio->bi_end_io = log_end_io;
+ bio->bi_end_io = (sector == WRITE_LOG_SUPER_SECTOR) ?
+ log_end_super : log_end_io;
bio->bi_private = lc;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
@@ -418,11 +429,18 @@ static int log_super(struct log_writes_c *lc)
super.nr_entries = cpu_to_le64(lc->logged_entries);
super.sectorsize = cpu_to_le32(lc->sectorsize);
- if (write_metadata(lc, &super, sizeof(super), NULL, 0, 0)) {
+ if (write_metadata(lc, &super, sizeof(super), NULL, 0,
+ WRITE_LOG_SUPER_SECTOR)) {
DMERR("Couldn't write super");
return -1;
}
+ /*
+ * Super sector should be writen in-order, otherwise the
+ * nr_entries could be rewritten incorrectly by an old bio.
+ */
+ wait_for_completion_io(&lc->super_done);
+
return 0;
}
@@ -531,6 +549,7 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv)
INIT_LIST_HEAD(&lc->unflushed_blocks);
INIT_LIST_HEAD(&lc->logging_blocks);
init_waitqueue_head(&lc->wait);
+ init_completion(&lc->super_done);
atomic_set(&lc->io_blocks, 0);
atomic_set(&lc->pending_blocks, 0);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 350cf0451456..ec8b27e20de3 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -561,7 +561,7 @@ static char **realloc_argv(unsigned *size, char **old_argv)
gfp = GFP_NOIO;
}
argv = kmalloc_array(new_size, sizeof(*argv), gfp);
- if (argv) {
+ if (argv && old_argv) {
memcpy(argv, old_argv, *size * sizeof(*argv));
*size = new_size;
}
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 720d06531aa3..ea24ff0612e3 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -235,8 +235,8 @@ static int verity_handle_err(struct dm_verity *v, enum verity_block_type type,
BUG();
}
- DMERR("%s: %s block %llu is corrupted", v->data_dev->name, type_str,
- block);
+ DMERR_LIMIT("%s: %s block %llu is corrupted", v->data_dev->name,
+ type_str, block);
if (v->corrupted_errs == DM_VERITY_MAX_CORRUPTED_ERRS)
DMERR("%s: reached maximum errors", v->data_dev->name);
diff --git a/drivers/mfd/stmfx.c b/drivers/mfd/stmfx.c
index fe8efba2d45f..857991cb3cbb 100644
--- a/drivers/mfd/stmfx.c
+++ b/drivers/mfd/stmfx.c
@@ -204,12 +204,11 @@ static struct irq_chip stmfx_irq_chip = {
static irqreturn_t stmfx_irq_handler(int irq, void *data)
{
struct stmfx *stmfx = data;
- unsigned long n, pending;
- u32 ack;
- int ret;
+ unsigned long bits;
+ u32 pending, ack;
+ int n, ret;
- ret = regmap_read(stmfx->map, STMFX_REG_IRQ_PENDING,
- (u32 *)&pending);
+ ret = regmap_read(stmfx->map, STMFX_REG_IRQ_PENDING, &pending);
if (ret)
return IRQ_NONE;
@@ -224,7 +223,8 @@ static irqreturn_t stmfx_irq_handler(int irq, void *data)
return IRQ_NONE;
}
- for_each_set_bit(n, &pending, STMFX_REG_IRQ_SRC_MAX)
+ bits = pending;
+ for_each_set_bit(n, &bits, STMFX_REG_IRQ_SRC_MAX)
handle_nested_irq(irq_find_mapping(stmfx->irq_domain, n));
return IRQ_HANDLED;
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index b5b68aa16eb3..6eb131292eb2 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -4662,7 +4662,6 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type)
memorg = nanddev_get_memorg(&chip->base);
memorg->planes_per_lun = 1;
memorg->luns_per_target = 1;
- memorg->ntargets = 1;
/*
* Reset the chip, required by some chips (e.g. Micron MT29FxGxxxxx)
@@ -5027,6 +5026,8 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips,
if (ret)
return ret;
+ memorg->ntargets = maxchips;
+
/* Read the flash type */
ret = nand_detect(chip, table);
if (ret) {
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 73172d7f512b..0c2ec1c21434 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1636,6 +1636,95 @@ static int sr2_bit7_quad_enable(struct spi_nor *nor)
return 0;
}
+/**
+ * spi_nor_clear_sr_bp() - clear the Status Register Block Protection bits.
+ * @nor: pointer to a 'struct spi_nor'
+ *
+ * Read-modify-write function that clears the Block Protection bits from the
+ * Status Register without affecting other bits.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_clear_sr_bp(struct spi_nor *nor)
+{
+ int ret;
+ u8 mask = SR_BP2 | SR_BP1 | SR_BP0;
+
+ ret = read_sr(nor);
+ if (ret < 0) {
+ dev_err(nor->dev, "error while reading status register\n");
+ return ret;
+ }
+
+ write_enable(nor);
+
+ ret = write_sr(nor, ret & ~mask);
+ if (ret) {
+ dev_err(nor->dev, "write to status register failed\n");
+ return ret;
+ }
+
+ ret = spi_nor_wait_till_ready(nor);
+ if (ret)
+ dev_err(nor->dev, "timeout while writing status register\n");
+ return ret;
+}
+
+/**
+ * spi_nor_spansion_clear_sr_bp() - clear the Status Register Block Protection
+ * bits on spansion flashes.
+ * @nor: pointer to a 'struct spi_nor'
+ *
+ * Read-modify-write function that clears the Block Protection bits from the
+ * Status Register without affecting other bits. The function is tightly
+ * coupled with the spansion_quad_enable() function. Both assume that the Write
+ * Register with 16 bits, together with the Read Configuration Register (35h)
+ * instructions are supported.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_spansion_clear_sr_bp(struct spi_nor *nor)
+{
+ int ret;
+ u8 mask = SR_BP2 | SR_BP1 | SR_BP0;
+ u8 sr_cr[2] = {0};
+
+ /* Check current Quad Enable bit value. */
+ ret = read_cr(nor);
+ if (ret < 0) {
+ dev_err(nor->dev,
+ "error while reading configuration register\n");
+ return ret;
+ }
+
+ /*
+ * When the configuration register Quad Enable bit is one, only the
+ * Write Status (01h) command with two data bytes may be used.
+ */
+ if (ret & CR_QUAD_EN_SPAN) {
+ sr_cr[1] = ret;
+
+ ret = read_sr(nor);
+ if (ret < 0) {
+ dev_err(nor->dev,
+ "error while reading status register\n");
+ return ret;
+ }
+ sr_cr[0] = ret & ~mask;
+
+ ret = write_sr_cr(nor, sr_cr);
+ if (ret)
+ dev_err(nor->dev, "16-bit write register failed\n");
+ return ret;
+ }
+
+ /*
+ * If the Quad Enable bit is zero, use the Write Status (01h) command
+ * with one data byte.
+ */
+ return spi_nor_clear_sr_bp(nor);
+}
+
/* Used when the "_ext_id" is two bytes at most */
#define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags) \
.id = { \
@@ -3660,6 +3749,8 @@ static int spi_nor_init_params(struct spi_nor *nor,
default:
/* Kept only for backward compatibility purpose. */
params->quad_enable = spansion_quad_enable;
+ if (nor->clear_sr_bp)
+ nor->clear_sr_bp = spi_nor_spansion_clear_sr_bp;
break;
}
@@ -3912,17 +4003,13 @@ static int spi_nor_init(struct spi_nor *nor)
{
int err;
- /*
- * Atmel, SST, Intel/Numonyx, and others serial NOR tend to power up
- * with the software protection bits set
- */
- if (JEDEC_MFR(nor->info) == SNOR_MFR_ATMEL ||
- JEDEC_MFR(nor->info) == SNOR_MFR_INTEL ||
- JEDEC_MFR(nor->info) == SNOR_MFR_SST ||
- nor->info->flags & SPI_NOR_HAS_LOCK) {
- write_enable(nor);
- write_sr(nor, 0);
- spi_nor_wait_till_ready(nor);
+ if (nor->clear_sr_bp) {
+ err = nor->clear_sr_bp(nor);
+ if (err) {
+ dev_err(nor->dev,
+ "fail to clear block protection bits\n");
+ return err;
+ }
}
if (nor->quad_enable) {
@@ -4047,6 +4134,16 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
if (info->flags & SPI_S3AN)
nor->flags |= SNOR_F_READY_XSR_RDY;
+ /*
+ * Atmel, SST, Intel/Numonyx, and others serial NOR tend to power up
+ * with the software protection bits set.
+ */
+ if (JEDEC_MFR(nor->info) == SNOR_MFR_ATMEL ||
+ JEDEC_MFR(nor->info) == SNOR_MFR_INTEL ||
+ JEDEC_MFR(nor->info) == SNOR_MFR_SST ||
+ nor->info->flags & SPI_NOR_HAS_LOCK)
+ nor->clear_sr_bp = spi_nor_clear_sr_bp;
+
/* Parse the Serial Flash Discoverable Parameters table. */
ret = spi_nor_init_params(nor, &params);
if (ret)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 4f5b3baf04c3..302499ae05e6 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -796,6 +796,8 @@ static bool bond_should_notify_peers(struct bonding *bond)
slave ? slave->dev->name : "NULL");
if (!slave || !bond->send_peer_notif ||
+ bond->send_peer_notif %
+ max(1, bond->params.peer_notif_delay) != 0 ||
!netif_carrier_ok(bond->dev) ||
test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
return false;
@@ -886,15 +888,18 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
if (netif_running(bond->dev)) {
bond->send_peer_notif =
- bond->params.num_peer_notif;
+ bond->params.num_peer_notif *
+ max(1, bond->params.peer_notif_delay);
should_notify_peers =
bond_should_notify_peers(bond);
}
call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev);
- if (should_notify_peers)
+ if (should_notify_peers) {
+ bond->send_peer_notif--;
call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
bond->dev);
+ }
}
}
@@ -937,7 +942,7 @@ void bond_select_active_slave(struct bonding *bond)
return;
if (netif_carrier_ok(bond->dev))
- slave_info(bond->dev, best_slave->dev, "active interface up!\n");
+ netdev_info(bond->dev, "active interface up!\n");
else
netdev_info(bond->dev, "now running without any active interface!\n");
}
@@ -2279,6 +2284,7 @@ static void bond_mii_monitor(struct work_struct *work)
struct bonding *bond = container_of(work, struct bonding,
mii_work.work);
bool should_notify_peers = false;
+ bool commit;
unsigned long delay;
struct slave *slave;
struct list_head *iter;
@@ -2289,12 +2295,19 @@ static void bond_mii_monitor(struct work_struct *work)
goto re_arm;
rcu_read_lock();
-
should_notify_peers = bond_should_notify_peers(bond);
-
- if (bond_miimon_inspect(bond)) {
+ commit = !!bond_miimon_inspect(bond);
+ if (bond->send_peer_notif) {
+ rcu_read_unlock();
+ if (rtnl_trylock()) {
+ bond->send_peer_notif--;
+ rtnl_unlock();
+ }
+ } else {
rcu_read_unlock();
+ }
+ if (commit) {
/* Race avoidance with bond_close cancel of workqueue */
if (!rtnl_trylock()) {
delay = 1;
@@ -2308,8 +2321,7 @@ static void bond_mii_monitor(struct work_struct *work)
bond_miimon_commit(bond);
rtnl_unlock(); /* might sleep, hold no other locks */
- } else
- rcu_read_unlock();
+ }
re_arm:
if (bond->params.miimon)
@@ -3065,10 +3077,6 @@ static int bond_master_netdev_event(unsigned long event,
case NETDEV_REGISTER:
bond_create_proc_entry(event_bond);
break;
- case NETDEV_NOTIFY_PEERS:
- if (event_bond->send_peer_notif)
- event_bond->send_peer_notif--;
- break;
default:
break;
}
@@ -4304,12 +4312,12 @@ void bond_setup(struct net_device *bond_dev)
bond_dev->features |= NETIF_F_NETNS_LOCAL;
bond_dev->hw_features = BOND_VLAN_FEATURES |
- NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_CTAG_FILTER;
bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4;
bond_dev->features |= bond_dev->hw_features;
+ bond_dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
}
/* Destroy a bonding device.
@@ -4691,6 +4699,7 @@ static int bond_check_params(struct bond_params *params)
params->arp_all_targets = arp_all_targets_value;
params->updelay = updelay;
params->downdelay = downdelay;
+ params->peer_notif_delay = 0;
params->use_carrier = use_carrier;
params->lacp_fast = lacp_fast;
params->primary[0] = 0;
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index b24cce48ae35..a259860a7208 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -108,6 +108,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = {
[IFLA_BOND_AD_ACTOR_SYSTEM] = { .type = NLA_BINARY,
.len = ETH_ALEN },
[IFLA_BOND_TLB_DYNAMIC_LB] = { .type = NLA_U8 },
+ [IFLA_BOND_PEER_NOTIF_DELAY] = { .type = NLA_U32 },
};
static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = {
@@ -215,6 +216,14 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[],
if (err)
return err;
}
+ if (data[IFLA_BOND_PEER_NOTIF_DELAY]) {
+ int delay = nla_get_u32(data[IFLA_BOND_PEER_NOTIF_DELAY]);
+
+ bond_opt_initval(&newval, delay);
+ err = __bond_opt_set(bond, BOND_OPT_PEER_NOTIF_DELAY, &newval);
+ if (err)
+ return err;
+ }
if (data[IFLA_BOND_USE_CARRIER]) {
int use_carrier = nla_get_u8(data[IFLA_BOND_USE_CARRIER]);
@@ -494,6 +503,7 @@ static size_t bond_get_size(const struct net_device *bond_dev)
nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_USER_PORT_KEY */
nla_total_size(ETH_ALEN) + /* IFLA_BOND_AD_ACTOR_SYSTEM */
nla_total_size(sizeof(u8)) + /* IFLA_BOND_TLB_DYNAMIC_LB */
+ nla_total_size(sizeof(u32)) + /* IFLA_BOND_PEER_NOTIF_DELAY */
0;
}
@@ -536,6 +546,10 @@ static int bond_fill_info(struct sk_buff *skb,
bond->params.downdelay * bond->params.miimon))
goto nla_put_failure;
+ if (nla_put_u32(skb, IFLA_BOND_PEER_NOTIF_DELAY,
+ bond->params.downdelay * bond->params.miimon))
+ goto nla_put_failure;
+
if (nla_put_u8(skb, IFLA_BOND_USE_CARRIER, bond->params.use_carrier))
goto nla_put_failure;
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 0d852fe9da7c..ddb3916d3506 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -24,6 +24,8 @@ static int bond_option_updelay_set(struct bonding *bond,
const struct bond_opt_value *newval);
static int bond_option_downdelay_set(struct bonding *bond,
const struct bond_opt_value *newval);
+static int bond_option_peer_notif_delay_set(struct bonding *bond,
+ const struct bond_opt_value *newval);
static int bond_option_use_carrier_set(struct bonding *bond,
const struct bond_opt_value *newval);
static int bond_option_arp_interval_set(struct bonding *bond,
@@ -424,6 +426,13 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = {
.desc = "Number of peer notifications to send on failover event",
.values = bond_num_peer_notif_tbl,
.set = bond_option_num_peer_notif_set
+ },
+ [BOND_OPT_PEER_NOTIF_DELAY] = {
+ .id = BOND_OPT_PEER_NOTIF_DELAY,
+ .name = "peer_notif_delay",
+ .desc = "Delay between each peer notification on failover event, in milliseconds",
+ .values = bond_intmax_tbl,
+ .set = bond_option_peer_notif_delay_set
}
};
@@ -841,6 +850,9 @@ static int bond_option_miimon_set(struct bonding *bond,
if (bond->params.downdelay)
netdev_dbg(bond->dev, "Note: Updating downdelay (to %d) since it is a multiple of the miimon value\n",
bond->params.downdelay * bond->params.miimon);
+ if (bond->params.peer_notif_delay)
+ netdev_dbg(bond->dev, "Note: Updating peer_notif_delay (to %d) since it is a multiple of the miimon value\n",
+ bond->params.peer_notif_delay * bond->params.miimon);
if (newval->value && bond->params.arp_interval) {
netdev_dbg(bond->dev, "MII monitoring cannot be used with ARP monitoring - disabling ARP monitoring...\n");
bond->params.arp_interval = 0;
@@ -864,52 +876,59 @@ static int bond_option_miimon_set(struct bonding *bond,
return 0;
}
-/* Set up and down delays. These must be multiples of the
- * MII monitoring value, and are stored internally as the multiplier.
- * Thus, we must translate to MS for the real world.
+/* Set up, down and peer notification delays. These must be multiples
+ * of the MII monitoring value, and are stored internally as the
+ * multiplier. Thus, we must translate to MS for the real world.
*/
-static int bond_option_updelay_set(struct bonding *bond,
- const struct bond_opt_value *newval)
+static int _bond_option_delay_set(struct bonding *bond,
+ const struct bond_opt_value *newval,
+ const char *name,
+ int *target)
{
int value = newval->value;
if (!bond->params.miimon) {
- netdev_err(bond->dev, "Unable to set up delay as MII monitoring is disabled\n");
+ netdev_err(bond->dev, "Unable to set %s as MII monitoring is disabled\n",
+ name);
return -EPERM;
}
if ((value % bond->params.miimon) != 0) {
- netdev_warn(bond->dev, "up delay (%d) is not a multiple of miimon (%d), updelay rounded to %d ms\n",
+ netdev_warn(bond->dev,
+ "%s (%d) is not a multiple of miimon (%d), value rounded to %d ms\n",
+ name,
value, bond->params.miimon,
(value / bond->params.miimon) *
bond->params.miimon);
}
- bond->params.updelay = value / bond->params.miimon;
- netdev_dbg(bond->dev, "Setting up delay to %d\n",
- bond->params.updelay * bond->params.miimon);
+ *target = value / bond->params.miimon;
+ netdev_dbg(bond->dev, "Setting %s to %d\n",
+ name,
+ *target * bond->params.miimon);
return 0;
}
+static int bond_option_updelay_set(struct bonding *bond,
+ const struct bond_opt_value *newval)
+{
+ return _bond_option_delay_set(bond, newval, "up delay",
+ &bond->params.updelay);
+}
+
static int bond_option_downdelay_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
- int value = newval->value;
-
- if (!bond->params.miimon) {
- netdev_err(bond->dev, "Unable to set down delay as MII monitoring is disabled\n");
- return -EPERM;
- }
- if ((value % bond->params.miimon) != 0) {
- netdev_warn(bond->dev, "down delay (%d) is not a multiple of miimon (%d), delay rounded to %d ms\n",
- value, bond->params.miimon,
- (value / bond->params.miimon) *
- bond->params.miimon);
- }
- bond->params.downdelay = value / bond->params.miimon;
- netdev_dbg(bond->dev, "Setting down delay to %d\n",
- bond->params.downdelay * bond->params.miimon);
+ return _bond_option_delay_set(bond, newval, "down delay",
+ &bond->params.downdelay);
+}
- return 0;
+static int bond_option_peer_notif_delay_set(struct bonding *bond,
+ const struct bond_opt_value *newval)
+{
+ int ret = _bond_option_delay_set(bond, newval,
+ "peer notification delay",
+ &bond->params.peer_notif_delay);
+ return ret;
}
static int bond_option_use_carrier_set(struct bonding *bond,
diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index 9f7d83e827c3..fd5c9cbe45b1 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -104,6 +104,8 @@ static void bond_info_show_master(struct seq_file *seq)
bond->params.updelay * bond->params.miimon);
seq_printf(seq, "Down Delay (ms): %d\n",
bond->params.downdelay * bond->params.miimon);
+ seq_printf(seq, "Peer Notification Delay (ms): %d\n",
+ bond->params.peer_notif_delay * bond->params.miimon);
/* ARP information */
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 94214eaf53c5..2d615a93685e 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -327,6 +327,18 @@ static ssize_t bonding_show_updelay(struct device *d,
static DEVICE_ATTR(updelay, 0644,
bonding_show_updelay, bonding_sysfs_store_option);
+static ssize_t bonding_show_peer_notif_delay(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct bonding *bond = to_bond(d);
+
+ return sprintf(buf, "%d\n",
+ bond->params.peer_notif_delay * bond->params.miimon);
+}
+static DEVICE_ATTR(peer_notif_delay, 0644,
+ bonding_show_peer_notif_delay, bonding_sysfs_store_option);
+
/* Show the LACP interval. */
static ssize_t bonding_show_lacp(struct device *d,
struct device_attribute *attr,
@@ -718,6 +730,7 @@ static struct attribute *per_bond_attrs[] = {
&dev_attr_arp_ip_target.attr,
&dev_attr_downdelay.attr,
&dev_attr_updelay.attr,
+ &dev_attr_peer_notif_delay.attr,
&dev_attr_lacp_rate.attr,
&dev_attr_ad_select.attr,
&dev_attr_xmit_hash_policy.attr,
diff --git a/drivers/net/can/softing/softing_main.c b/drivers/net/can/softing/softing_main.c
index 68bb58a57f3b..8242fb287cbb 100644
--- a/drivers/net/can/softing/softing_main.c
+++ b/drivers/net/can/softing/softing_main.c
@@ -683,7 +683,7 @@ static void softing_netdev_cleanup(struct net_device *netdev)
static ssize_t show_##name(struct device *dev, \
struct device_attribute *attr, char *buf) \
{ \
- struct softing *card = platform_get_drvdata(to_platform_device(dev)); \
+ struct softing *card = dev_get_drvdata(dev); \
return sprintf(buf, "%u\n", card->member); \
} \
static DEVICE_ATTR(name, 0444, show_##name, NULL)
@@ -692,7 +692,7 @@ static DEVICE_ATTR(name, 0444, show_##name, NULL)
static ssize_t show_##name(struct device *dev, \
struct device_attribute *attr, char *buf) \
{ \
- struct softing *card = platform_get_drvdata(to_platform_device(dev)); \
+ struct softing *card = dev_get_drvdata(dev); \
return sprintf(buf, "%s\n", card->member); \
} \
static DEVICE_ATTR(name, 0444, show_##name, NULL)
diff --git a/drivers/net/dsa/microchip/Kconfig b/drivers/net/dsa/microchip/Kconfig
index 2c3a6751bdaf..fe0a13b79c4b 100644
--- a/drivers/net/dsa/microchip/Kconfig
+++ b/drivers/net/dsa/microchip/Kconfig
@@ -13,5 +13,6 @@ menuconfig NET_DSA_MICROCHIP_KSZ9477
config NET_DSA_MICROCHIP_KSZ9477_SPI
tristate "KSZ9477 series SPI connected switch driver"
depends on NET_DSA_MICROCHIP_KSZ9477 && SPI
+ select REGMAP_SPI
help
Select to enable support for registering switches configured through SPI.
diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index 508380f80875..a8c97f7a79b7 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -65,51 +65,36 @@ static const struct {
{ 0x83, "tx_discards" },
};
-static void ksz9477_cfg32(struct ksz_device *dev, u32 addr, u32 bits, bool set)
+static void ksz_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set)
{
- u32 data;
+ regmap_update_bits(dev->regmap[0], addr, bits, set ? bits : 0);
+}
- ksz_read32(dev, addr, &data);
- if (set)
- data |= bits;
- else
- data &= ~bits;
- ksz_write32(dev, addr, data);
+static void ksz_port_cfg(struct ksz_device *dev, int port, int offset, u8 bits,
+ bool set)
+{
+ regmap_update_bits(dev->regmap[0], PORT_CTRL_ADDR(port, offset),
+ bits, set ? bits : 0);
+}
+
+static void ksz9477_cfg32(struct ksz_device *dev, u32 addr, u32 bits, bool set)
+{
+ regmap_update_bits(dev->regmap[2], addr, bits, set ? bits : 0);
}
static void ksz9477_port_cfg32(struct ksz_device *dev, int port, int offset,
u32 bits, bool set)
{
- u32 addr;
- u32 data;
-
- addr = PORT_CTRL_ADDR(port, offset);
- ksz_read32(dev, addr, &data);
-
- if (set)
- data |= bits;
- else
- data &= ~bits;
-
- ksz_write32(dev, addr, data);
+ regmap_update_bits(dev->regmap[2], PORT_CTRL_ADDR(port, offset),
+ bits, set ? bits : 0);
}
-static int ksz9477_wait_vlan_ctrl_ready(struct ksz_device *dev, u32 waiton,
- int timeout)
+static int ksz9477_wait_vlan_ctrl_ready(struct ksz_device *dev)
{
- u8 data;
+ unsigned int val;
- do {
- ksz_read8(dev, REG_SW_VLAN_CTRL, &data);
- if (!(data & waiton))
- break;
- usleep_range(1, 10);
- } while (timeout-- > 0);
-
- if (timeout <= 0)
- return -ETIMEDOUT;
-
- return 0;
+ return regmap_read_poll_timeout(dev->regmap[0], REG_SW_VLAN_CTRL,
+ val, !(val & VLAN_START), 10, 1000);
}
static int ksz9477_get_vlan_table(struct ksz_device *dev, u16 vid,
@@ -123,8 +108,8 @@ static int ksz9477_get_vlan_table(struct ksz_device *dev, u16 vid,
ksz_write8(dev, REG_SW_VLAN_CTRL, VLAN_READ | VLAN_START);
/* wait to be cleared */
- ret = ksz9477_wait_vlan_ctrl_ready(dev, VLAN_START, 1000);
- if (ret < 0) {
+ ret = ksz9477_wait_vlan_ctrl_ready(dev);
+ if (ret) {
dev_dbg(dev->dev, "Failed to read vlan table\n");
goto exit;
}
@@ -156,8 +141,8 @@ static int ksz9477_set_vlan_table(struct ksz_device *dev, u16 vid,
ksz_write8(dev, REG_SW_VLAN_CTRL, VLAN_START | VLAN_WRITE);
/* wait to be cleared */
- ret = ksz9477_wait_vlan_ctrl_ready(dev, VLAN_START, 1000);
- if (ret < 0) {
+ ret = ksz9477_wait_vlan_ctrl_ready(dev);
+ if (ret) {
dev_dbg(dev->dev, "Failed to write vlan table\n");
goto exit;
}
@@ -191,55 +176,35 @@ static void ksz9477_write_table(struct ksz_device *dev, u32 *table)
ksz_write32(dev, REG_SW_ALU_VAL_D, table[3]);
}
-static int ksz9477_wait_alu_ready(struct ksz_device *dev, u32 waiton,
- int timeout)
+static int ksz9477_wait_alu_ready(struct ksz_device *dev)
{
- u32 data;
-
- do {
- ksz_read32(dev, REG_SW_ALU_CTRL__4, &data);
- if (!(data & waiton))
- break;
- usleep_range(1, 10);
- } while (timeout-- > 0);
-
- if (timeout <= 0)
- return -ETIMEDOUT;
+ unsigned int val;
- return 0;
+ return regmap_read_poll_timeout(dev->regmap[2], REG_SW_ALU_CTRL__4,
+ val, !(val & ALU_START), 10, 1000);
}
-static int ksz9477_wait_alu_sta_ready(struct ksz_device *dev, u32 waiton,
- int timeout)
+static int ksz9477_wait_alu_sta_ready(struct ksz_device *dev)
{
- u32 data;
-
- do {
- ksz_read32(dev, REG_SW_ALU_STAT_CTRL__4, &data);
- if (!(data & waiton))
- break;
- usleep_range(1, 10);
- } while (timeout-- > 0);
-
- if (timeout <= 0)
- return -ETIMEDOUT;
+ unsigned int val;
- return 0;
+ return regmap_read_poll_timeout(dev->regmap[2],
+ REG_SW_ALU_STAT_CTRL__4,
+ val, !(val & ALU_STAT_START),
+ 10, 1000);
}
static int ksz9477_reset_switch(struct ksz_device *dev)
{
u8 data8;
- u16 data16;
u32 data32;
/* reset switch */
ksz_cfg(dev, REG_SW_OPERATION, SW_RESET, true);
/* turn off SPI DO Edge select */
- ksz_read8(dev, REG_SW_GLOBAL_SERIAL_CTRL_0, &data8);
- data8 &= ~SPI_AUTO_EDGE_DETECTION;
- ksz_write8(dev, REG_SW_GLOBAL_SERIAL_CTRL_0, data8);
+ regmap_update_bits(dev->regmap[0], REG_SW_GLOBAL_SERIAL_CTRL_0,
+ SPI_AUTO_EDGE_DETECTION, 0);
/* default configuration */
ksz_read8(dev, REG_SW_LUE_CTRL_1, &data8);
@@ -253,10 +218,10 @@ static int ksz9477_reset_switch(struct ksz_device *dev)
ksz_read32(dev, REG_SW_PORT_INT_STATUS__4, &data32);
/* set broadcast storm protection 10% rate */
- ksz_read16(dev, REG_SW_MAC_CTRL_2, &data16);
- data16 &= ~BROADCAST_STORM_RATE;
- data16 |= (BROADCAST_STORM_VALUE * BROADCAST_STORM_PROT_RATE) / 100;
- ksz_write16(dev, REG_SW_MAC_CTRL_2, data16);
+ regmap_update_bits(dev->regmap[1], REG_SW_MAC_CTRL_2,
+ BROADCAST_STORM_RATE,
+ (BROADCAST_STORM_VALUE *
+ BROADCAST_STORM_PROT_RATE) / 100);
if (dev->synclko_125)
ksz_write8(dev, REG_SW_GLOBAL_OUTPUT_CTRL__1,
@@ -268,12 +233,8 @@ static int ksz9477_reset_switch(struct ksz_device *dev)
static void ksz9477_r_mib_cnt(struct ksz_device *dev, int port, u16 addr,
u64 *cnt)
{
- struct ksz_poll_ctx ctx = {
- .dev = dev,
- .port = port,
- .offset = REG_PORT_MIB_CTRL_STAT__4,
- };
struct ksz_port *p = &dev->ports[port];
+ unsigned int val;
u32 data;
int ret;
@@ -283,11 +244,11 @@ static void ksz9477_r_mib_cnt(struct ksz_device *dev, int port, u16 addr,
data |= (addr << MIB_COUNTER_INDEX_S);
ksz_pwrite32(dev, port, REG_PORT_MIB_CTRL_STAT__4, data);
- ret = readx_poll_timeout(ksz_pread32_poll, &ctx, data,
- !(data & MIB_COUNTER_READ), 10, 1000);
-
+ ret = regmap_read_poll_timeout(dev->regmap[2],
+ PORT_CTRL_ADDR(port, REG_PORT_MIB_CTRL_STAT__4),
+ val, !(val & MIB_COUNTER_READ), 10, 1000);
/* failed to read MIB. get out of loop */
- if (ret < 0) {
+ if (ret) {
dev_dbg(dev->dev, "Failed to get MIB\n");
return;
}
@@ -522,10 +483,10 @@ static void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port)
{
u8 data;
- ksz_read8(dev, REG_SW_LUE_CTRL_2, &data);
- data &= ~(SW_FLUSH_OPTION_M << SW_FLUSH_OPTION_S);
- data |= (SW_FLUSH_OPTION_DYN_MAC << SW_FLUSH_OPTION_S);
- ksz_write8(dev, REG_SW_LUE_CTRL_2, data);
+ regmap_update_bits(dev->regmap[0], REG_SW_LUE_CTRL_2,
+ SW_FLUSH_OPTION_M << SW_FLUSH_OPTION_S,
+ SW_FLUSH_OPTION_DYN_MAC << SW_FLUSH_OPTION_S);
+
if (port < dev->mib_port_cnt) {
/* flush individual port */
ksz_pread8(dev, port, P_STP_CTRL, &data);
@@ -652,8 +613,8 @@ static int ksz9477_port_fdb_add(struct dsa_switch *ds, int port,
ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_READ | ALU_START);
/* wait to be finished */
- ret = ksz9477_wait_alu_ready(dev, ALU_START, 1000);
- if (ret < 0) {
+ ret = ksz9477_wait_alu_ready(dev);
+ if (ret) {
dev_dbg(dev->dev, "Failed to read ALU\n");
goto exit;
}
@@ -676,8 +637,8 @@ static int ksz9477_port_fdb_add(struct dsa_switch *ds, int port,
ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_WRITE | ALU_START);
/* wait to be finished */
- ret = ksz9477_wait_alu_ready(dev, ALU_START, 1000);
- if (ret < 0)
+ ret = ksz9477_wait_alu_ready(dev);
+ if (ret)
dev_dbg(dev->dev, "Failed to write ALU\n");
exit:
@@ -709,8 +670,8 @@ static int ksz9477_port_fdb_del(struct dsa_switch *ds, int port,
ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_READ | ALU_START);
/* wait to be finished */
- ret = ksz9477_wait_alu_ready(dev, ALU_START, 1000);
- if (ret < 0) {
+ ret = ksz9477_wait_alu_ready(dev);
+ if (ret) {
dev_dbg(dev->dev, "Failed to read ALU\n");
goto exit;
}
@@ -743,8 +704,8 @@ static int ksz9477_port_fdb_del(struct dsa_switch *ds, int port,
ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_WRITE | ALU_START);
/* wait to be finished */
- ret = ksz9477_wait_alu_ready(dev, ALU_START, 1000);
- if (ret < 0)
+ ret = ksz9477_wait_alu_ready(dev);
+ if (ret)
dev_dbg(dev->dev, "Failed to write ALU\n");
exit:
@@ -850,7 +811,7 @@ static void ksz9477_port_mdb_add(struct dsa_switch *ds, int port,
ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data);
/* wait to be finished */
- if (ksz9477_wait_alu_sta_ready(dev, ALU_STAT_START, 1000) < 0) {
+ if (ksz9477_wait_alu_sta_ready(dev)) {
dev_dbg(dev->dev, "Failed to read ALU STATIC\n");
goto exit;
}
@@ -891,7 +852,7 @@ static void ksz9477_port_mdb_add(struct dsa_switch *ds, int port,
ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data);
/* wait to be finished */
- if (ksz9477_wait_alu_sta_ready(dev, ALU_STAT_START, 1000) < 0)
+ if (ksz9477_wait_alu_sta_ready(dev))
dev_dbg(dev->dev, "Failed to read ALU STATIC\n");
exit:
@@ -921,8 +882,8 @@ static int ksz9477_port_mdb_del(struct dsa_switch *ds, int port,
ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data);
/* wait to be finished */
- ret = ksz9477_wait_alu_sta_ready(dev, ALU_STAT_START, 1000);
- if (ret < 0) {
+ ret = ksz9477_wait_alu_sta_ready(dev);
+ if (ret) {
dev_dbg(dev->dev, "Failed to read ALU STATIC\n");
goto exit;
}
@@ -963,8 +924,8 @@ static int ksz9477_port_mdb_del(struct dsa_switch *ds, int port,
ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data);
/* wait to be finished */
- ret = ksz9477_wait_alu_sta_ready(dev, ALU_STAT_START, 1000);
- if (ret < 0)
+ ret = ksz9477_wait_alu_sta_ready(dev);
+ if (ret)
dev_dbg(dev->dev, "Failed to read ALU STATIC\n");
exit:
diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c
index 75178624d3f5..5a9e27b337a8 100644
--- a/drivers/net/dsa/microchip/ksz9477_spi.c
+++ b/drivers/net/dsa/microchip/ksz9477_spi.c
@@ -10,119 +10,43 @@
#include <linux/delay.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/regmap.h>
#include <linux/spi/spi.h>
#include "ksz_priv.h"
-#include "ksz_spi.h"
-
-/* SPI frame opcodes */
-#define KS_SPIOP_RD 3
-#define KS_SPIOP_WR 2
+#include "ksz_common.h"
#define SPI_ADDR_SHIFT 24
-#define SPI_ADDR_MASK (BIT(SPI_ADDR_SHIFT) - 1)
+#define SPI_ADDR_ALIGN 3
#define SPI_TURNAROUND_SHIFT 5
-/* Enough to read all switch port registers. */
-#define SPI_TX_BUF_LEN 0x100
-
-static int ksz9477_spi_read_reg(struct spi_device *spi, u32 reg, u8 *val,
- unsigned int len)
-{
- u32 txbuf;
- int ret;
-
- txbuf = reg & SPI_ADDR_MASK;
- txbuf |= KS_SPIOP_RD << SPI_ADDR_SHIFT;
- txbuf <<= SPI_TURNAROUND_SHIFT;
- txbuf = cpu_to_be32(txbuf);
-
- ret = spi_write_then_read(spi, &txbuf, 4, val, len);
- return ret;
-}
-
-static int ksz9477_spi_write_reg(struct spi_device *spi, u32 reg, u8 *val,
- unsigned int len)
-{
- u32 *txbuf = (u32 *)val;
-
- *txbuf = reg & SPI_ADDR_MASK;
- *txbuf |= (KS_SPIOP_WR << SPI_ADDR_SHIFT);
- *txbuf <<= SPI_TURNAROUND_SHIFT;
- *txbuf = cpu_to_be32(*txbuf);
-
- return spi_write(spi, txbuf, 4 + len);
-}
-
-static int ksz_spi_read(struct ksz_device *dev, u32 reg, u8 *data,
- unsigned int len)
-{
- struct spi_device *spi = dev->priv;
-
- return ksz9477_spi_read_reg(spi, reg, data, len);
-}
-
-static int ksz_spi_write(struct ksz_device *dev, u32 reg, void *data,
- unsigned int len)
-{
- struct spi_device *spi = dev->priv;
-
- if (len > SPI_TX_BUF_LEN)
- len = SPI_TX_BUF_LEN;
- memcpy(&dev->txbuf[4], data, len);
- return ksz9477_spi_write_reg(spi, reg, dev->txbuf, len);
-}
-
-static int ksz_spi_read24(struct ksz_device *dev, u32 reg, u32 *val)
-{
- int ret;
-
- *val = 0;
- ret = ksz_spi_read(dev, reg, (u8 *)val, 3);
- if (!ret) {
- *val = be32_to_cpu(*val);
- /* convert to 24bit */
- *val >>= 8;
- }
-
- return ret;
-}
-
-static int ksz_spi_write24(struct ksz_device *dev, u32 reg, u32 value)
-{
- /* make it to big endian 24bit from MSB */
- value <<= 8;
- value = cpu_to_be32(value);
- return ksz_spi_write(dev, reg, &value, 3);
-}
-
-static const struct ksz_io_ops ksz9477_spi_ops = {
- .read8 = ksz_spi_read8,
- .read16 = ksz_spi_read16,
- .read24 = ksz_spi_read24,
- .read32 = ksz_spi_read32,
- .write8 = ksz_spi_write8,
- .write16 = ksz_spi_write16,
- .write24 = ksz_spi_write24,
- .write32 = ksz_spi_write32,
- .get = ksz_spi_get,
- .set = ksz_spi_set,
-};
+KSZ_REGMAP_TABLE(ksz9477, 32, SPI_ADDR_SHIFT,
+ SPI_TURNAROUND_SHIFT, SPI_ADDR_ALIGN);
static int ksz9477_spi_probe(struct spi_device *spi)
{
struct ksz_device *dev;
- int ret;
+ int i, ret;
- dev = ksz_switch_alloc(&spi->dev, &ksz9477_spi_ops, spi);
+ dev = ksz_switch_alloc(&spi->dev, spi);
if (!dev)
return -ENOMEM;
+ for (i = 0; i < ARRAY_SIZE(ksz9477_regmap_config); i++) {
+ dev->regmap[i] = devm_regmap_init_spi(spi,
+ &ksz9477_regmap_config[i]);
+ if (IS_ERR(dev->regmap[i])) {
+ ret = PTR_ERR(dev->regmap[i]);
+ dev_err(&spi->dev,
+ "Failed to initialize regmap%i: %d\n",
+ ksz9477_regmap_config[i].val_bits, ret);
+ return ret;
+ }
+ }
+
if (spi->dev.platform_data)
dev->pdata = spi->dev.platform_data;
- dev->txbuf = devm_kzalloc(dev->dev, 4 + SPI_TX_BUF_LEN, GFP_KERNEL);
-
ret = ksz9477_switch_register(dev);
/* Main DSA driver may not be started yet. */
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 4f6648d5ac8b..a3d2d67894bd 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -396,9 +396,7 @@ void ksz_disable_port(struct dsa_switch *ds, int port)
}
EXPORT_SYMBOL_GPL(ksz_disable_port);
-struct ksz_device *ksz_switch_alloc(struct device *base,
- const struct ksz_io_ops *ops,
- void *priv)
+struct ksz_device *ksz_switch_alloc(struct device *base, void *priv)
{
struct dsa_switch *ds;
struct ksz_device *swdev;
@@ -416,7 +414,6 @@ struct ksz_device *ksz_switch_alloc(struct device *base,
swdev->ds = ds;
swdev->priv = priv;
- swdev->ops = ops;
return swdev;
}
@@ -436,13 +433,12 @@ int ksz_switch_register(struct ksz_device *dev,
return PTR_ERR(dev->reset_gpio);
if (dev->reset_gpio) {
- gpiod_set_value(dev->reset_gpio, 1);
+ gpiod_set_value_cansleep(dev->reset_gpio, 1);
mdelay(10);
- gpiod_set_value(dev->reset_gpio, 0);
+ gpiod_set_value_cansleep(dev->reset_gpio, 0);
}
mutex_init(&dev->dev_mutex);
- mutex_init(&dev->reg_mutex);
mutex_init(&dev->stats_mutex);
mutex_init(&dev->alu_mutex);
mutex_init(&dev->vlan_mutex);
@@ -489,7 +485,7 @@ void ksz_switch_remove(struct ksz_device *dev)
dsa_unregister_switch(dev->ds);
if (dev->reset_gpio)
- gpiod_set_value(dev->reset_gpio, 1);
+ gpiod_set_value_cansleep(dev->reset_gpio, 1);
}
EXPORT_SYMBOL(ksz_switch_remove);
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index 21cd794e18f1..ee7096d8af07 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -7,6 +7,8 @@
#ifndef __KSZ_COMMON_H
#define __KSZ_COMMON_H
+#include <linux/regmap.h>
+
void ksz_port_cleanup(struct ksz_device *dev, int port);
void ksz_update_port_member(struct ksz_device *dev, int port);
void ksz_init_mib_timer(struct ksz_device *dev);
@@ -41,114 +43,44 @@ void ksz_disable_port(struct dsa_switch *ds, int port);
static inline int ksz_read8(struct ksz_device *dev, u32 reg, u8 *val)
{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->read8(dev, reg, val);
- mutex_unlock(&dev->reg_mutex);
+ unsigned int value;
+ int ret = regmap_read(dev->regmap[0], reg, &value);
+ *val = value;
return ret;
}
static inline int ksz_read16(struct ksz_device *dev, u32 reg, u16 *val)
{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->read16(dev, reg, val);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
-
-static inline int ksz_read24(struct ksz_device *dev, u32 reg, u32 *val)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->read24(dev, reg, val);
- mutex_unlock(&dev->reg_mutex);
+ unsigned int value;
+ int ret = regmap_read(dev->regmap[1], reg, &value);
+ *val = value;
return ret;
}
static inline int ksz_read32(struct ksz_device *dev, u32 reg, u32 *val)
{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->read32(dev, reg, val);
- mutex_unlock(&dev->reg_mutex);
+ unsigned int value;
+ int ret = regmap_read(dev->regmap[2], reg, &value);
+ *val = value;
return ret;
}
static inline int ksz_write8(struct ksz_device *dev, u32 reg, u8 value)
{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->write8(dev, reg, value);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
+ return regmap_write(dev->regmap[0], reg, value);
}
static inline int ksz_write16(struct ksz_device *dev, u32 reg, u16 value)
{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->write16(dev, reg, value);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
-
-static inline int ksz_write24(struct ksz_device *dev, u32 reg, u32 value)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->write24(dev, reg, value);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
+ return regmap_write(dev->regmap[1], reg, value);
}
static inline int ksz_write32(struct ksz_device *dev, u32 reg, u32 value)
{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->write32(dev, reg, value);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
-
-static inline int ksz_get(struct ksz_device *dev, u32 reg, void *data,
- size_t len)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->get(dev, reg, data, len);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
-
-static inline int ksz_set(struct ksz_device *dev, u32 reg, void *data,
- size_t len)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->set(dev, reg, data, len);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
+ return regmap_write(dev->regmap[2], reg, value);
}
static inline void ksz_pread8(struct ksz_device *dev, int port, int offset,
@@ -187,47 +119,36 @@ static inline void ksz_pwrite32(struct ksz_device *dev, int port, int offset,
ksz_write32(dev, dev->dev_ops->get_port_addr(port, offset), data);
}
-static void ksz_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set)
-{
- u8 data;
-
- ksz_read8(dev, addr, &data);
- if (set)
- data |= bits;
- else
- data &= ~bits;
- ksz_write8(dev, addr, data);
-}
-
-static void ksz_port_cfg(struct ksz_device *dev, int port, int offset, u8 bits,
- bool set)
-{
- u32 addr;
- u8 data;
-
- addr = dev->dev_ops->get_port_addr(port, offset);
- ksz_read8(dev, addr, &data);
-
- if (set)
- data |= bits;
- else
- data &= ~bits;
-
- ksz_write8(dev, addr, data);
-}
-
-struct ksz_poll_ctx {
- struct ksz_device *dev;
- int port;
- int offset;
-};
-
-static inline u32 ksz_pread32_poll(struct ksz_poll_ctx *ctx)
-{
- u32 data;
-
- ksz_pread32(ctx->dev, ctx->port, ctx->offset, &data);
- return data;
-}
+/* Regmap tables generation */
+#define KSZ_SPI_OP_RD 3
+#define KSZ_SPI_OP_WR 2
+
+#define KSZ_SPI_OP_FLAG_MASK(opcode, swp, regbits, regpad) \
+ swab##swp((opcode) << ((regbits) + (regpad)))
+
+#define KSZ_REGMAP_ENTRY(width, swp, regbits, regpad, regalign) \
+ { \
+ .val_bits = (width), \
+ .reg_stride = (width) / 8, \
+ .reg_bits = (regbits) + (regalign), \
+ .pad_bits = (regpad), \
+ .max_register = BIT(regbits) - 1, \
+ .cache_type = REGCACHE_NONE, \
+ .read_flag_mask = \
+ KSZ_SPI_OP_FLAG_MASK(KSZ_SPI_OP_RD, swp, \
+ regbits, regpad), \
+ .write_flag_mask = \
+ KSZ_SPI_OP_FLAG_MASK(KSZ_SPI_OP_WR, swp, \
+ regbits, regpad), \
+ .reg_format_endian = REGMAP_ENDIAN_BIG, \
+ .val_format_endian = REGMAP_ENDIAN_BIG \
+ }
+
+#define KSZ_REGMAP_TABLE(ksz, swp, regbits, regpad, regalign) \
+ static const struct regmap_config ksz##_regmap_config[] = { \
+ KSZ_REGMAP_ENTRY(8, swp, (regbits), (regpad), (regalign)), \
+ KSZ_REGMAP_ENTRY(16, swp, (regbits), (regpad), (regalign)), \
+ KSZ_REGMAP_ENTRY(32, swp, (regbits), (regpad), (regalign)), \
+ }
#endif
diff --git a/drivers/net/dsa/microchip/ksz_priv.h b/drivers/net/dsa/microchip/ksz_priv.h
index c615d2a81dd5..beacf0e40f42 100644
--- a/drivers/net/dsa/microchip/ksz_priv.h
+++ b/drivers/net/dsa/microchip/ksz_priv.h
@@ -14,8 +14,6 @@
#include <linux/etherdevice.h>
#include <net/dsa.h>
-struct ksz_io_ops;
-
struct vlan_table {
u32 table[3];
};
@@ -49,14 +47,13 @@ struct ksz_device {
const char *name;
struct mutex dev_mutex; /* device access */
- struct mutex reg_mutex; /* register access */
struct mutex stats_mutex; /* status access */
struct mutex alu_mutex; /* ALU access */
struct mutex vlan_mutex; /* vlan access */
- const struct ksz_io_ops *ops;
const struct ksz_dev_ops *dev_ops;
struct device *dev;
+ struct regmap *regmap[3];
void *priv;
@@ -82,8 +79,6 @@ struct ksz_device {
struct vlan_table *vlan_cache;
- u8 *txbuf;
-
struct ksz_port *ports;
struct timer_list mib_read_timer;
struct work_struct mib_read;
@@ -102,19 +97,6 @@ struct ksz_device {
u16 port_mask;
};
-struct ksz_io_ops {
- int (*read8)(struct ksz_device *dev, u32 reg, u8 *value);
- int (*read16)(struct ksz_device *dev, u32 reg, u16 *value);
- int (*read24)(struct ksz_device *dev, u32 reg, u32 *value);
- int (*read32)(struct ksz_device *dev, u32 reg, u32 *value);
- int (*write8)(struct ksz_device *dev, u32 reg, u8 value);
- int (*write16)(struct ksz_device *dev, u32 reg, u16 value);
- int (*write24)(struct ksz_device *dev, u32 reg, u32 value);
- int (*write32)(struct ksz_device *dev, u32 reg, u32 value);
- int (*get)(struct ksz_device *dev, u32 reg, void *data, size_t len);
- int (*set)(struct ksz_device *dev, u32 reg, void *data, size_t len);
-};
-
struct alu_struct {
/* entry 1 */
u8 is_static:1;
@@ -163,8 +145,7 @@ struct ksz_dev_ops {
void (*exit)(struct ksz_device *dev);
};
-struct ksz_device *ksz_switch_alloc(struct device *base,
- const struct ksz_io_ops *ops, void *priv);
+struct ksz_device *ksz_switch_alloc(struct device *base, void *priv);
int ksz_switch_register(struct ksz_device *dev,
const struct ksz_dev_ops *ops);
void ksz_switch_remove(struct ksz_device *dev);
diff --git a/drivers/net/dsa/microchip/ksz_spi.h b/drivers/net/dsa/microchip/ksz_spi.h
deleted file mode 100644
index 427811bd60b3..000000000000
--- a/drivers/net/dsa/microchip/ksz_spi.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Microchip KSZ series SPI access common header
- *
- * Copyright (C) 2017-2018 Microchip Technology Inc.
- * Tristram Ha <[email protected]>
- */
-
-#ifndef __KSZ_SPI_H
-#define __KSZ_SPI_H
-
-/* Chip dependent SPI access */
-static int ksz_spi_read(struct ksz_device *dev, u32 reg, u8 *data,
- unsigned int len);
-static int ksz_spi_write(struct ksz_device *dev, u32 reg, void *data,
- unsigned int len);
-
-static int ksz_spi_read8(struct ksz_device *dev, u32 reg, u8 *val)
-{
- return ksz_spi_read(dev, reg, val, 1);
-}
-
-static int ksz_spi_read16(struct ksz_device *dev, u32 reg, u16 *val)
-{
- int ret = ksz_spi_read(dev, reg, (u8 *)val, 2);
-
- if (!ret)
- *val = be16_to_cpu(*val);
-
- return ret;
-}
-
-static int ksz_spi_read32(struct ksz_device *dev, u32 reg, u32 *val)
-{
- int ret = ksz_spi_read(dev, reg, (u8 *)val, 4);
-
- if (!ret)
- *val = be32_to_cpu(*val);
-
- return ret;
-}
-
-static int ksz_spi_write8(struct ksz_device *dev, u32 reg, u8 value)
-{
- return ksz_spi_write(dev, reg, &value, 1);
-}
-
-static int ksz_spi_write16(struct ksz_device *dev, u32 reg, u16 value)
-{
- value = cpu_to_be16(value);
- return ksz_spi_write(dev, reg, &value, 2);
-}
-
-static int ksz_spi_write32(struct ksz_device *dev, u32 reg, u32 value)
-{
- value = cpu_to_be32(value);
- return ksz_spi_write(dev, reg, &value, 4);
-}
-
-static int ksz_spi_get(struct ksz_device *dev, u32 reg, void *data, size_t len)
-{
- return ksz_spi_read(dev, reg, data, len);
-}
-
-static int ksz_spi_set(struct ksz_device *dev, u32 reg, void *data, size_t len)
-{
- return ksz_spi_write(dev, reg, data, len);
-}
-
-#endif
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index c4fa400efdcc..27709f866c23 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -14,6 +14,7 @@
#include <linux/of_platform.h>
#include <linux/if_bridge.h>
#include <linux/mdio.h>
+#include <linux/gpio.h>
#include <linux/etherdevice.h>
#include "qca8k.h"
@@ -1046,6 +1047,20 @@ qca8k_sw_probe(struct mdio_device *mdiodev)
priv->bus = mdiodev->bus;
priv->dev = &mdiodev->dev;
+ priv->reset_gpio = devm_gpiod_get_optional(priv->dev, "reset",
+ GPIOD_ASIS);
+ if (IS_ERR(priv->reset_gpio))
+ return PTR_ERR(priv->reset_gpio);
+
+ if (priv->reset_gpio) {
+ gpiod_set_value_cansleep(priv->reset_gpio, 1);
+ /* The active low duration must be greater than 10 ms
+ * and checkpatch.pl wants 20 ms.
+ */
+ msleep(20);
+ gpiod_set_value_cansleep(priv->reset_gpio, 0);
+ }
+
/* read the switches ID register */
id = qca8k_read(priv, QCA8K_REG_MASK_CTRL);
id >>= QCA8K_MASK_CTRL_ID_S;
diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h
index 91557433ce2f..42d6ea24eb14 100644
--- a/drivers/net/dsa/qca8k.h
+++ b/drivers/net/dsa/qca8k.h
@@ -10,6 +10,7 @@
#include <linux/delay.h>
#include <linux/regmap.h>
+#include <linux/gpio.h>
#define QCA8K_NUM_PORTS 7
@@ -174,6 +175,7 @@ struct qca8k_priv {
struct mutex reg_mutex;
struct device *dev;
struct dsa_switch_ops ops;
+ struct gpio_desc *reset_gpio;
};
struct qca8k_mib_desc {
diff --git a/drivers/net/dsa/sja1105/Makefile b/drivers/net/dsa/sja1105/Makefile
index 9a22f68b39e9..4483113e6259 100644
--- a/drivers/net/dsa/sja1105/Makefile
+++ b/drivers/net/dsa/sja1105/Makefile
@@ -10,5 +10,5 @@ sja1105-objs := \
sja1105_dynamic_config.o \
ifdef CONFIG_NET_DSA_SJA1105_PTP
-obj-$(CONFIG_NET_DSA_SJA1105) += sja1105_ptp.o
+sja1105-objs += sja1105_ptp.o
endif
diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
index 56c83b9d52e4..6bfb1696a6f2 100644
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
@@ -3,6 +3,98 @@
*/
#include "sja1105.h"
+/* In the dynamic configuration interface, the switch exposes a register-like
+ * view of some of the static configuration tables.
+ * Many times the field organization of the dynamic tables is abbreviated (not
+ * all fields are dynamically reconfigurable) and different from the static
+ * ones, but the key reason for having it is that we can spare a switch reset
+ * for settings that can be changed dynamically.
+ *
+ * This file creates a per-switch-family abstraction called
+ * struct sja1105_dynamic_table_ops and two operations that work with it:
+ * - sja1105_dynamic_config_write
+ * - sja1105_dynamic_config_read
+ *
+ * Compared to the struct sja1105_table_ops from sja1105_static_config.c,
+ * the dynamic accessors work with a compound buffer:
+ *
+ * packed_buf
+ *
+ * |
+ * V
+ * +-----------------------------------------+------------------+
+ * | ENTRY BUFFER | COMMAND BUFFER |
+ * +-----------------------------------------+------------------+
+ *
+ * <----------------------- packed_size ------------------------>
+ *
+ * The ENTRY BUFFER may or may not have the same layout, or size, as its static
+ * configuration table entry counterpart. When it does, the same packing
+ * function is reused (bar exceptional cases - see
+ * sja1105pqrs_dyn_l2_lookup_entry_packing).
+ *
+ * The reason for the COMMAND BUFFER being at the end is to be able to send
+ * a dynamic write command through a single SPI burst. By the time the switch
+ * reacts to the command, the ENTRY BUFFER is already populated with the data
+ * sent by the core.
+ *
+ * The COMMAND BUFFER is always SJA1105_SIZE_DYN_CMD bytes (one 32-bit word) in
+ * size.
+ *
+ * Sometimes the ENTRY BUFFER does not really exist (when the number of fields
+ * that can be reconfigured is small), then the switch repurposes some of the
+ * unused 32 bits of the COMMAND BUFFER to hold ENTRY data.
+ *
+ * The key members of struct sja1105_dynamic_table_ops are:
+ * - .entry_packing: A function that deals with packing an ENTRY structure
+ * into an SPI buffer, or retrieving an ENTRY structure
+ * from one.
+ * The @packed_buf pointer it's given does always point to
+ * the ENTRY portion of the buffer.
+ * - .cmd_packing: A function that deals with packing/unpacking the COMMAND
+ * structure to/from the SPI buffer.
+ * It is given the same @packed_buf pointer as .entry_packing,
+ * so most of the time, the @packed_buf points *behind* the
+ * COMMAND offset inside the buffer.
+ * To access the COMMAND portion of the buffer, the function
+ * knows its correct offset.
+ * Giving both functions the same pointer is handy because in
+ * extreme cases (see sja1105pqrs_dyn_l2_lookup_entry_packing)
+ * the .entry_packing is able to jump to the COMMAND portion,
+ * or vice-versa (sja1105pqrs_l2_lookup_cmd_packing).
+ * - .access: A bitmap of:
+ * OP_READ: Set if the hardware manual marks the ENTRY portion of the
+ * dynamic configuration table buffer as R (readable) after
+ * an SPI read command (the switch will populate the buffer).
+ * OP_WRITE: Set if the manual marks the ENTRY portion of the dynamic
+ * table buffer as W (writable) after an SPI write command
+ * (the switch will read the fields provided in the buffer).
+ * OP_DEL: Set if the manual says the VALIDENT bit is supported in the
+ * COMMAND portion of this dynamic config buffer (i.e. the
+ * specified entry can be invalidated through a SPI write
+ * command).
+ * OP_SEARCH: Set if the manual says that the index of an entry can
+ * be retrieved in the COMMAND portion of the buffer based
+ * on its ENTRY portion, as a result of a SPI write command.
+ * Only the TCAM-based FDB table on SJA1105 P/Q/R/S supports
+ * this.
+ * - .max_entry_count: The number of entries, counting from zero, that can be
+ * reconfigured through the dynamic interface. If a static
+ * table can be reconfigured at all dynamically, this
+ * number always matches the maximum number of supported
+ * static entries.
+ * - .packed_size: The length in bytes of the compound ENTRY + COMMAND BUFFER.
+ * Note that sometimes the compound buffer may contain holes in
+ * it (see sja1105_vlan_lookup_cmd_packing). The @packed_buf is
+ * contiguous however, so @packed_size includes any unused
+ * bytes.
+ * - .addr: The base SPI address at which the buffer must be written to the
+ * switch's memory. When looking at the hardware manual, this must
+ * always match the lowest documented address for the ENTRY, and not
+ * that of the COMMAND, since the other 32-bit words will follow along
+ * at the correct addresses.
+ */
+
#define SJA1105_SIZE_DYN_CMD 4
#define SJA1105ET_SIZE_MAC_CONFIG_DYN_ENTRY \
@@ -57,13 +149,11 @@ sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
{
u8 *p = buf + SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY;
const int size = SJA1105_SIZE_DYN_CMD;
- u64 lockeds = 0;
u64 hostcmd;
sja1105_packing(p, &cmd->valid, 31, 31, size, op);
sja1105_packing(p, &cmd->rdwrset, 30, 30, size, op);
sja1105_packing(p, &cmd->errors, 29, 29, size, op);
- sja1105_packing(p, &lockeds, 28, 28, size, op);
sja1105_packing(p, &cmd->valident, 27, 27, size, op);
/* VALIDENT is supposed to indicate "keep or not", but in SJA1105 E/T,
@@ -113,6 +203,64 @@ sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY, op);
}
+/* The switch is so retarded that it makes our command/entry abstraction
+ * crumble apart.
+ *
+ * On P/Q/R/S, the switch tries to say whether a FDB entry
+ * is statically programmed or dynamically learned via a flag called LOCKEDS.
+ * The hardware manual says about this fiels:
+ *
+ * On write will specify the format of ENTRY.
+ * On read the flag will be found cleared at times the VALID flag is found
+ * set. The flag will also be found cleared in response to a read having the
+ * MGMTROUTE flag set. In response to a read with the MGMTROUTE flag
+ * cleared, the flag be set if the most recent access operated on an entry
+ * that was either loaded by configuration or through dynamic reconfiguration
+ * (as opposed to automatically learned entries).
+ *
+ * The trouble with this flag is that it's part of the *command* to access the
+ * dynamic interface, and not part of the *entry* retrieved from it.
+ * Otherwise said, for a sja1105_dynamic_config_read, LOCKEDS is supposed to be
+ * an output from the switch into the command buffer, and for a
+ * sja1105_dynamic_config_write, the switch treats LOCKEDS as an input
+ * (hence we can write either static, or automatically learned entries, from
+ * the core).
+ * But the manual contradicts itself in the last phrase where it says that on
+ * read, LOCKEDS will be set to 1 for all FDB entries written through the
+ * dynamic interface (therefore, the value of LOCKEDS from the
+ * sja1105_dynamic_config_write is not really used for anything, it'll store a
+ * 1 anyway).
+ * This means you can't really write a FDB entry with LOCKEDS=0 (automatically
+ * learned) into the switch, which kind of makes sense.
+ * As for reading through the dynamic interface, it doesn't make too much sense
+ * to put LOCKEDS into the command, since the switch will inevitably have to
+ * ignore it (otherwise a command would be like "read the FDB entry 123, but
+ * only if it's dynamically learned" <- well how am I supposed to know?) and
+ * just use it as an output buffer for its findings. But guess what... that's
+ * what the entry buffer is for!
+ * Unfortunately, what really breaks this abstraction is the fact that it
+ * wasn't designed having the fact in mind that the switch can output
+ * entry-related data as writeback through the command buffer.
+ * However, whether a FDB entry is statically or dynamically learned *is* part
+ * of the entry and not the command data, no matter what the switch thinks.
+ * In order to do that, we'll need to wrap around the
+ * sja1105pqrs_l2_lookup_entry_packing from sja1105_static_config.c, and take
+ * a peek outside of the caller-supplied @buf (the entry buffer), to reach the
+ * command buffer.
+ */
+static size_t
+sja1105pqrs_dyn_l2_lookup_entry_packing(void *buf, void *entry_ptr,
+ enum packing_op op)
+{
+ struct sja1105_l2_lookup_entry *entry = entry_ptr;
+ u8 *cmd = buf + SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY;
+ const int size = SJA1105_SIZE_DYN_CMD;
+
+ sja1105_packing(cmd, &entry->lockeds, 28, 28, size, op);
+
+ return sja1105pqrs_l2_lookup_entry_packing(buf, entry_ptr, op);
+}
+
static void
sja1105et_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
enum packing_op op)
@@ -393,7 +541,7 @@ struct sja1105_dynamic_table_ops sja1105et_dyn_ops[BLK_IDX_MAX_DYN] = {
/* SJA1105P/Q/R/S: Second generation */
struct sja1105_dynamic_table_ops sja1105pqrs_dyn_ops[BLK_IDX_MAX_DYN] = {
[BLK_IDX_L2_LOOKUP] = {
- .entry_packing = sja1105pqrs_l2_lookup_entry_packing,
+ .entry_packing = sja1105pqrs_dyn_l2_lookup_entry_packing,
.cmd_packing = sja1105pqrs_l2_lookup_cmd_packing,
.access = (OP_READ | OP_WRITE | OP_DEL | OP_SEARCH),
.max_entry_count = SJA1105_MAX_L2_LOOKUP_COUNT,
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 9395e8f5f790..32bf3a7cc3b6 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -80,7 +80,7 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv)
.maxage = 0xFF,
/* Internal VLAN (pvid) to apply to untagged ingress */
.vlanprio = 0,
- .vlanid = 0,
+ .vlanid = 1,
.ing_mirr = false,
.egr_mirr = false,
/* Don't drop traffic with other EtherType than ETH_P_IP */
@@ -203,6 +203,7 @@ static int sja1105_init_static_fdb(struct sja1105_private *priv)
static int sja1105_init_l2_lookup_params(struct sja1105_private *priv)
{
struct sja1105_table *table;
+ u64 max_fdb_entries = SJA1105_MAX_L2_LOOKUP_COUNT / SJA1105_NUM_PORTS;
struct sja1105_l2_lookup_params_entry default_l2_lookup_params = {
/* Learned FDB entries are forgotten after 300 seconds */
.maxage = SJA1105_AGEING_TIME_MS(300000),
@@ -210,6 +211,8 @@ static int sja1105_init_l2_lookup_params(struct sja1105_private *priv)
.dyn_tbsz = SJA1105ET_FDB_BIN_SIZE,
/* And the P/Q/R/S equivalent setting: */
.start_dynspc = 0,
+ .maxaddrp = {max_fdb_entries, max_fdb_entries, max_fdb_entries,
+ max_fdb_entries, max_fdb_entries, },
/* 2^8 + 2^5 + 2^3 + 2^2 + 2^1 + 1 in Koopman notation */
.poly = 0x97,
/* This selects between Independent VLAN Learning (IVL) and
@@ -264,20 +267,15 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv)
.vmemb_port = 0,
.vlan_bc = 0,
.tag_port = 0,
- .vlanid = 0,
+ .vlanid = 1,
};
int i;
table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
- /* The static VLAN table will only contain the initial pvid of 0.
+ /* The static VLAN table will only contain the initial pvid of 1.
* All other VLANs are to be configured through dynamic entries,
* and kept in the static configuration table as backing memory.
- * The pvid of 0 is sufficient to pass traffic while the ports are
- * standalone and when vlan_filtering is disabled. When filtering
- * gets enabled, the switchdev core sets up the VLAN ID 1 and sets
- * it as the new pvid. Actually 'pvid 1' still comes up in 'bridge
- * vlan' even when vlan_filtering is off, but it has no effect.
*/
if (table->entry_count) {
kfree(table->entries);
@@ -291,7 +289,7 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv)
table->entry_count = 1;
- /* VLAN ID 0: all DT-defined ports are members; no restrictions on
+ /* VLAN 1: all DT-defined ports are members; no restrictions on
* forwarding; always transmit priority-tagged frames as untagged.
*/
for (i = 0; i < SJA1105_NUM_PORTS; i++) {
@@ -717,7 +715,13 @@ static int sja1105_adjust_port_config(struct sja1105_private *priv, int port,
switch (speed_mbps) {
case SPEED_UNKNOWN:
- /* No speed update requested */
+ /* PHYLINK called sja1105_mac_config() to inform us about
+ * the state->interface, but AN has not completed and the
+ * speed is not yet valid. UM10944.pdf says that setting
+ * SJA1105_SPEED_AUTO at runtime disables the port, so that is
+ * ok for power consumption in case AN will never complete -
+ * otherwise PHYLINK should come back with a new update.
+ */
speed = SJA1105_SPEED_AUTO;
break;
case SPEED_10:
@@ -762,14 +766,50 @@ static int sja1105_adjust_port_config(struct sja1105_private *priv, int port,
return sja1105_clocking_setup_port(priv, port);
}
+/* The SJA1105 MAC programming model is through the static config (the xMII
+ * Mode table cannot be dynamically reconfigured), and we have to program
+ * that early (earlier than PHYLINK calls us, anyway).
+ * So just error out in case the connected PHY attempts to change the initial
+ * system interface MII protocol from what is defined in the DT, at least for
+ * now.
+ */
+static bool sja1105_phy_mode_mismatch(struct sja1105_private *priv, int port,
+ phy_interface_t interface)
+{
+ struct sja1105_xmii_params_entry *mii;
+ sja1105_phy_interface_t phy_mode;
+
+ mii = priv->static_config.tables[BLK_IDX_XMII_PARAMS].entries;
+ phy_mode = mii->xmii_mode[port];
+
+ switch (interface) {
+ case PHY_INTERFACE_MODE_MII:
+ return (phy_mode != XMII_MODE_MII);
+ case PHY_INTERFACE_MODE_RMII:
+ return (phy_mode != XMII_MODE_RMII);
+ case PHY_INTERFACE_MODE_RGMII:
+ case PHY_INTERFACE_MODE_RGMII_ID:
+ case PHY_INTERFACE_MODE_RGMII_RXID:
+ case PHY_INTERFACE_MODE_RGMII_TXID:
+ return (phy_mode != XMII_MODE_RGMII);
+ default:
+ return true;
+ }
+}
+
static void sja1105_mac_config(struct dsa_switch *ds, int port,
unsigned int link_an_mode,
const struct phylink_link_state *state)
{
struct sja1105_private *priv = ds->priv;
- if (!state->link)
+ if (sja1105_phy_mode_mismatch(priv, port, state->interface))
+ return;
+
+ if (link_an_mode == MLO_AN_INBAND) {
+ dev_err(ds->dev, "In-band AN not supported!\n");
return;
+ }
sja1105_adjust_port_config(priv, port, state->speed);
}
@@ -803,6 +843,16 @@ static void sja1105_phylink_validate(struct dsa_switch *ds, int port,
mii = priv->static_config.tables[BLK_IDX_XMII_PARAMS].entries;
+ /* include/linux/phylink.h says:
+ * When @state->interface is %PHY_INTERFACE_MODE_NA, phylink
+ * expects the MAC driver to return all supported link modes.
+ */
+ if (state->interface != PHY_INTERFACE_MODE_NA &&
+ sja1105_phy_mode_mismatch(priv, port, state->interface)) {
+ bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ return;
+ }
+
/* The MAC does not support pause frames, and also doesn't
* support half-duplex traffic modes.
*/
@@ -818,6 +868,77 @@ static void sja1105_phylink_validate(struct dsa_switch *ds, int port,
__ETHTOOL_LINK_MODE_MASK_NBITS);
}
+static int
+sja1105_find_static_fdb_entry(struct sja1105_private *priv, int port,
+ const struct sja1105_l2_lookup_entry *requested)
+{
+ struct sja1105_l2_lookup_entry *l2_lookup;
+ struct sja1105_table *table;
+ int i;
+
+ table = &priv->static_config.tables[BLK_IDX_L2_LOOKUP];
+ l2_lookup = table->entries;
+
+ for (i = 0; i < table->entry_count; i++)
+ if (l2_lookup[i].macaddr == requested->macaddr &&
+ l2_lookup[i].vlanid == requested->vlanid &&
+ l2_lookup[i].destports & BIT(port))
+ return i;
+
+ return -1;
+}
+
+/* We want FDB entries added statically through the bridge command to persist
+ * across switch resets, which are a common thing during normal SJA1105
+ * operation. So we have to back them up in the static configuration tables
+ * and hence apply them on next static config upload... yay!
+ */
+static int
+sja1105_static_fdb_change(struct sja1105_private *priv, int port,
+ const struct sja1105_l2_lookup_entry *requested,
+ bool keep)
+{
+ struct sja1105_l2_lookup_entry *l2_lookup;
+ struct sja1105_table *table;
+ int rc, match;
+
+ table = &priv->static_config.tables[BLK_IDX_L2_LOOKUP];
+
+ match = sja1105_find_static_fdb_entry(priv, port, requested);
+ if (match < 0) {
+ /* Can't delete a missing entry. */
+ if (!keep)
+ return 0;
+
+ /* No match => new entry */
+ rc = sja1105_table_resize(table, table->entry_count + 1);
+ if (rc)
+ return rc;
+
+ match = table->entry_count - 1;
+ }
+
+ /* Assign pointer after the resize (it may be new memory) */
+ l2_lookup = table->entries;
+
+ /* We have a match.
+ * If the job was to add this FDB entry, it's already done (mostly
+ * anyway, since the port forwarding mask may have changed, case in
+ * which we update it).
+ * Otherwise we have to delete it.
+ */
+ if (keep) {
+ l2_lookup[match] = *requested;
+ return 0;
+ }
+
+ /* To remove, the strategy is to overwrite the element with
+ * the last one, and then reduce the array size by 1
+ */
+ l2_lookup[match] = l2_lookup[table->entry_count - 1];
+ return sja1105_table_resize(table, table->entry_count - 1);
+}
+
/* First-generation switches have a 4-way set associative TCAM that
* holds the FDB entries. An FDB index spans from 0 to 1023 and is comprised of
* a "bin" (grouping of 4 entries) and a "way" (an entry within a bin).
@@ -868,7 +989,7 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port,
struct sja1105_private *priv = ds->priv;
struct device *dev = ds->dev;
int last_unused = -1;
- int bin, way;
+ int bin, way, rc;
bin = sja1105et_fdb_hash(priv, addr, vid);
@@ -912,9 +1033,13 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port,
}
l2_lookup.index = sja1105et_fdb_index(bin, way);
- return sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
- l2_lookup.index, &l2_lookup,
- true);
+ rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
+ l2_lookup.index, &l2_lookup,
+ true);
+ if (rc < 0)
+ return rc;
+
+ return sja1105_static_fdb_change(priv, port, &l2_lookup, true);
}
int sja1105et_fdb_del(struct dsa_switch *ds, int port,
@@ -922,7 +1047,7 @@ int sja1105et_fdb_del(struct dsa_switch *ds, int port,
{
struct sja1105_l2_lookup_entry l2_lookup = {0};
struct sja1105_private *priv = ds->priv;
- int index, bin, way;
+ int index, bin, way, rc;
bool keep;
bin = sja1105et_fdb_hash(priv, addr, vid);
@@ -944,8 +1069,12 @@ int sja1105et_fdb_del(struct dsa_switch *ds, int port,
else
keep = false;
- return sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
- index, &l2_lookup, keep);
+ rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
+ index, &l2_lookup, keep);
+ if (rc < 0)
+ return rc;
+
+ return sja1105_static_fdb_change(priv, port, &l2_lookup, keep);
}
int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port,
@@ -993,12 +1122,17 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port,
dev_err(ds->dev, "FDB is full, cannot add entry.\n");
return -EINVAL;
}
+ l2_lookup.lockeds = true;
l2_lookup.index = i;
skip_finding_an_index:
- return sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
- l2_lookup.index, &l2_lookup,
- true);
+ rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
+ l2_lookup.index, &l2_lookup,
+ true);
+ if (rc < 0)
+ return rc;
+
+ return sja1105_static_fdb_change(priv, port, &l2_lookup, true);
}
int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port,
@@ -1032,52 +1166,72 @@ int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port,
else
keep = false;
- return sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
- l2_lookup.index, &l2_lookup, keep);
+ rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
+ l2_lookup.index, &l2_lookup, keep);
+ if (rc < 0)
+ return rc;
+
+ return sja1105_static_fdb_change(priv, port, &l2_lookup, keep);
}
static int sja1105_fdb_add(struct dsa_switch *ds, int port,
const unsigned char *addr, u16 vid)
{
struct sja1105_private *priv = ds->priv;
- int rc;
+ u16 rx_vid, tx_vid;
+ int rc, i;
+
+ if (dsa_port_is_vlan_filtering(&ds->ports[port]))
+ return priv->info->fdb_add_cmd(ds, port, addr, vid);
/* Since we make use of VLANs even when the bridge core doesn't tell us
* to, translate these FDB entries into the correct dsa_8021q ones.
+ * The basic idea (also repeats for removal below) is:
+ * - Each of the other front-panel ports needs to be able to forward a
+ * pvid-tagged (aka tagged with their rx_vid) frame that matches this
+ * DMAC.
+ * - The CPU port (aka the tx_vid of this port) needs to be able to
+ * send a frame matching this DMAC to the specified port.
+ * For a better picture see net/dsa/tag_8021q.c.
*/
- if (!dsa_port_is_vlan_filtering(&ds->ports[port])) {
- unsigned int upstream = dsa_upstream_port(priv->ds, port);
- u16 tx_vid = dsa_8021q_tx_vid(ds, port);
- u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+ for (i = 0; i < SJA1105_NUM_PORTS; i++) {
+ if (i == port)
+ continue;
+ if (i == dsa_upstream_port(priv->ds, port))
+ continue;
- rc = priv->info->fdb_add_cmd(ds, port, addr, tx_vid);
+ rx_vid = dsa_8021q_rx_vid(ds, i);
+ rc = priv->info->fdb_add_cmd(ds, port, addr, rx_vid);
if (rc < 0)
return rc;
- return priv->info->fdb_add_cmd(ds, upstream, addr, rx_vid);
}
- return priv->info->fdb_add_cmd(ds, port, addr, vid);
+ tx_vid = dsa_8021q_tx_vid(ds, port);
+ return priv->info->fdb_add_cmd(ds, port, addr, tx_vid);
}
static int sja1105_fdb_del(struct dsa_switch *ds, int port,
const unsigned char *addr, u16 vid)
{
struct sja1105_private *priv = ds->priv;
- int rc;
+ u16 rx_vid, tx_vid;
+ int rc, i;
- /* Since we make use of VLANs even when the bridge core doesn't tell us
- * to, translate these FDB entries into the correct dsa_8021q ones.
- */
- if (!dsa_port_is_vlan_filtering(&ds->ports[port])) {
- unsigned int upstream = dsa_upstream_port(priv->ds, port);
- u16 tx_vid = dsa_8021q_tx_vid(ds, port);
- u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+ if (dsa_port_is_vlan_filtering(&ds->ports[port]))
+ return priv->info->fdb_del_cmd(ds, port, addr, vid);
- rc = priv->info->fdb_del_cmd(ds, port, addr, tx_vid);
+ for (i = 0; i < SJA1105_NUM_PORTS; i++) {
+ if (i == port)
+ continue;
+ if (i == dsa_upstream_port(priv->ds, port))
+ continue;
+
+ rx_vid = dsa_8021q_rx_vid(ds, i);
+ rc = priv->info->fdb_del_cmd(ds, port, addr, rx_vid);
if (rc < 0)
return rc;
- return priv->info->fdb_del_cmd(ds, upstream, addr, rx_vid);
}
- return priv->info->fdb_del_cmd(ds, port, addr, vid);
+ tx_vid = dsa_8021q_tx_vid(ds, port);
+ return priv->info->fdb_del_cmd(ds, port, addr, tx_vid);
}
static int sja1105_fdb_dump(struct dsa_switch *ds, int port,
@@ -1085,8 +1239,12 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port,
{
struct sja1105_private *priv = ds->priv;
struct device *dev = ds->dev;
+ u16 rx_vid, tx_vid;
int i;
+ rx_vid = dsa_8021q_rx_vid(ds, port);
+ tx_vid = dsa_8021q_tx_vid(ds, port);
+
for (i = 0; i < SJA1105_MAX_L2_LOOKUP_COUNT; i++) {
struct sja1105_l2_lookup_entry l2_lookup = {0};
u8 macaddr[ETH_ALEN];
@@ -1112,15 +1270,40 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port,
continue;
u64_to_ether_addr(l2_lookup.macaddr, macaddr);
- /* We need to hide the dsa_8021q VLAN from the user.
- * Convert the TX VID into the pvid that is active in
- * standalone and non-vlan_filtering modes, aka 1.
- * The RX VID is applied on the CPU port, which is not seen by
- * the bridge core anyway, so there's nothing to hide.
+ /* On SJA1105 E/T, the switch doesn't implement the LOCKEDS
+ * bit, so it doesn't tell us whether a FDB entry is static
+ * or not.
+ * But, of course, we can find out - we're the ones who added
+ * it in the first place.
*/
- if (!dsa_port_is_vlan_filtering(&ds->ports[port]))
- l2_lookup.vlanid = 1;
- cb(macaddr, l2_lookup.vlanid, false, data);
+ if (priv->info->device_id == SJA1105E_DEVICE_ID ||
+ priv->info->device_id == SJA1105T_DEVICE_ID) {
+ int match;
+
+ match = sja1105_find_static_fdb_entry(priv, port,
+ &l2_lookup);
+ l2_lookup.lockeds = (match >= 0);
+ }
+
+ /* We need to hide the dsa_8021q VLANs from the user. This
+ * basically means hiding the duplicates and only showing
+ * the pvid that is supposed to be active in standalone and
+ * non-vlan_filtering modes (aka 1).
+ * - For statically added FDB entries (bridge fdb add), we
+ * can convert the TX VID (coming from the CPU port) into the
+ * pvid and ignore the RX VIDs of the other ports.
+ * - For dynamically learned FDB entries, a single entry with
+ * no duplicates is learned - that which has the real port's
+ * pvid, aka RX VID.
+ */
+ if (!dsa_port_is_vlan_filtering(&ds->ports[port])) {
+ if (l2_lookup.vlanid == tx_vid ||
+ l2_lookup.vlanid == rx_vid)
+ l2_lookup.vlanid = 1;
+ else
+ continue;
+ }
+ cb(macaddr, l2_lookup.vlanid, l2_lookup.lockeds, data);
}
return 0;
}
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index 3041cf9d5856..d19cfdf681af 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -77,7 +77,6 @@ int sja1105_get_ts_info(struct dsa_switch *ds, int port,
info->phc_index = ptp_clock_index(priv->clock);
return 0;
}
-EXPORT_SYMBOL_GPL(sja1105_get_ts_info);
int sja1105et_ptp_cmd(const void *ctx, const void *data)
{
@@ -95,7 +94,6 @@ int sja1105et_ptp_cmd(const void *ctx, const void *data)
return sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->ptp_control,
buf, SJA1105_SIZE_PTP_CMD);
}
-EXPORT_SYMBOL_GPL(sja1105et_ptp_cmd);
int sja1105pqrs_ptp_cmd(const void *ctx, const void *data)
{
@@ -113,7 +111,6 @@ int sja1105pqrs_ptp_cmd(const void *ctx, const void *data)
return sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->ptp_control,
buf, SJA1105_SIZE_PTP_CMD);
}
-EXPORT_SYMBOL_GPL(sja1105pqrs_ptp_cmd);
/* The switch returns partial timestamps (24 bits for SJA1105 E/T, which wrap
* around in 0.135 seconds, and 32 bits for P/Q/R/S, wrapping around in 34.35
@@ -146,7 +143,6 @@ u64 sja1105_tstamp_reconstruct(struct sja1105_private *priv, u64 now,
return ts_reconstructed;
}
-EXPORT_SYMBOL_GPL(sja1105_tstamp_reconstruct);
/* Reads the SPI interface for an egress timestamp generated by the switch
* for frames sent using management routes.
@@ -219,7 +215,6 @@ int sja1105_ptpegr_ts_poll(struct sja1105_private *priv, int port, u64 *ts)
return 0;
}
-EXPORT_SYMBOL_GPL(sja1105_ptpegr_ts_poll);
int sja1105_ptp_reset(struct sja1105_private *priv)
{
@@ -240,7 +235,6 @@ int sja1105_ptp_reset(struct sja1105_private *priv)
return rc;
}
-EXPORT_SYMBOL_GPL(sja1105_ptp_reset);
static int sja1105_ptp_gettime(struct ptp_clock_info *ptp,
struct timespec64 *ts)
@@ -387,18 +381,13 @@ int sja1105_ptp_clock_register(struct sja1105_private *priv)
return sja1105_ptp_reset(priv);
}
-EXPORT_SYMBOL_GPL(sja1105_ptp_clock_register);
void sja1105_ptp_clock_unregister(struct sja1105_private *priv)
{
if (IS_ERR_OR_NULL(priv->clock))
return;
+ cancel_delayed_work_sync(&priv->refresh_work);
ptp_clock_unregister(priv->clock);
priv->clock = NULL;
}
-EXPORT_SYMBOL_GPL(sja1105_ptp_clock_unregister);
-
-MODULE_AUTHOR("Vladimir Oltean <[email protected]>");
-MODULE_DESCRIPTION("SJA1105 PHC Driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index f7e51debb930..84dc603138cf 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -100,7 +100,6 @@ int sja1105_spi_send_packed_buf(const struct sja1105_private *priv,
return 0;
}
-EXPORT_SYMBOL_GPL(sja1105_spi_send_packed_buf);
/* If @rw is:
* - SPI_WRITE: creates and sends an SPI write message at absolute
@@ -136,7 +135,6 @@ int sja1105_spi_send_int(const struct sja1105_private *priv,
return rc;
}
-EXPORT_SYMBOL_GPL(sja1105_spi_send_int);
/* Should be used if a @packed_buf larger than SJA1105_SIZE_SPI_MSG_MAXLEN
* must be sent/received. Splitting the buffer into chunks and assembling
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c
index 58f273eaf1ea..b31c737dc560 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.c
@@ -35,7 +35,6 @@ void sja1105_pack(void *buf, const u64 *val, int start, int end, size_t len)
}
dump_stack();
}
-EXPORT_SYMBOL_GPL(sja1105_pack);
void sja1105_unpack(const void *buf, u64 *val, int start, int end, size_t len)
{
@@ -53,7 +52,6 @@ void sja1105_unpack(const void *buf, u64 *val, int start, int end, size_t len)
start, end);
dump_stack();
}
-EXPORT_SYMBOL_GPL(sja1105_unpack);
void sja1105_packing(void *buf, u64 *val, int start, int end,
size_t len, enum packing_op op)
@@ -76,7 +74,6 @@ void sja1105_packing(void *buf, u64 *val, int start, int end,
}
dump_stack();
}
-EXPORT_SYMBOL_GPL(sja1105_packing);
/* Little-endian Ethernet CRC32 of data packed as big-endian u32 words */
u32 sja1105_crc32(const void *buf, size_t len)
@@ -233,11 +230,20 @@ sja1105pqrs_l2_lookup_params_entry_packing(void *buf, void *entry_ptr,
{
const size_t size = SJA1105PQRS_SIZE_L2_LOOKUP_PARAMS_ENTRY;
struct sja1105_l2_lookup_params_entry *entry = entry_ptr;
+ int offset, i;
+ for (i = 0, offset = 58; i < 5; i++, offset += 11)
+ sja1105_packing(buf, &entry->maxaddrp[i],
+ offset + 10, offset + 0, size, op);
sja1105_packing(buf, &entry->maxage, 57, 43, size, op);
+ sja1105_packing(buf, &entry->start_dynspc, 42, 33, size, op);
+ sja1105_packing(buf, &entry->drpnolearn, 32, 28, size, op);
sja1105_packing(buf, &entry->shared_learn, 27, 27, size, op);
sja1105_packing(buf, &entry->no_enf_hostprt, 26, 26, size, op);
sja1105_packing(buf, &entry->no_mgmt_learn, 25, 25, size, op);
+ sja1105_packing(buf, &entry->use_static, 24, 24, size, op);
+ sja1105_packing(buf, &entry->owr_dyn, 23, 23, size, op);
+ sja1105_packing(buf, &entry->learn_once, 22, 22, size, op);
return size;
}
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h
index a9586d0b4b3b..684465fc0882 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.h
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.h
@@ -132,7 +132,7 @@ struct sja1105_l2_lookup_entry {
u64 mask_vlanid;
u64 mask_macaddr;
u64 iotag;
- bool lockeds;
+ u64 lockeds;
union {
/* LOCKEDS=1: Static FDB entries */
struct {
@@ -151,6 +151,7 @@ struct sja1105_l2_lookup_entry {
};
struct sja1105_l2_lookup_params_entry {
+ u64 maxaddrp[5]; /* P/Q/R/S only */
u64 start_dynspc; /* P/Q/R/S only */
u64 drpnolearn; /* P/Q/R/S only */
u64 use_static; /* P/Q/R/S only */
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index fe115b7caba0..93a2d4deb27c 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -76,6 +76,7 @@ source "drivers/net/ethernet/ezchip/Kconfig"
source "drivers/net/ethernet/faraday/Kconfig"
source "drivers/net/ethernet/freescale/Kconfig"
source "drivers/net/ethernet/fujitsu/Kconfig"
+source "drivers/net/ethernet/google/Kconfig"
source "drivers/net/ethernet/hisilicon/Kconfig"
source "drivers/net/ethernet/hp/Kconfig"
source "drivers/net/ethernet/huawei/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 7b5bf9682066..fb9155cffcff 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_NET_VENDOR_EZCHIP) += ezchip/
obj-$(CONFIG_NET_VENDOR_FARADAY) += faraday/
obj-$(CONFIG_NET_VENDOR_FREESCALE) += freescale/
obj-$(CONFIG_NET_VENDOR_FUJITSU) += fujitsu/
+obj-$(CONFIG_NET_VENDOR_GOOGLE) += google/
obj-$(CONFIG_NET_VENDOR_HISILICON) += hisilicon/
obj-$(CONFIG_NET_VENDOR_HP) += hp/
obj-$(CONFIG_NET_VENDOR_HUAWEI) += huawei/
diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c
index 9e06dff619c3..6253e5ed6e16 100644
--- a/drivers/net/ethernet/allwinner/sun4i-emac.c
+++ b/drivers/net/ethernet/allwinner/sun4i-emac.c
@@ -818,7 +818,6 @@ static int emac_probe(struct platform_device *pdev)
SET_NETDEV_DEV(ndev, &pdev->dev);
db = netdev_priv(ndev);
- memset(db, 0, sizeof(*db));
db->dev = &pdev->dev;
db->ndev = ndev;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
index 173be45463ee..02f1b70c4e25 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
@@ -9,6 +9,8 @@
#ifndef AQ_CFG_H
#define AQ_CFG_H
+#include <generated/utsrelease.h>
+
#define AQ_CFG_VECS_DEF 8U
#define AQ_CFG_TCS_DEF 1U
@@ -86,10 +88,7 @@
#define AQ_CFG_DRV_AUTHOR "aQuantia"
#define AQ_CFG_DRV_DESC "aQuantia Corporation(R) Network Driver"
#define AQ_CFG_DRV_NAME "atlantic"
-#define AQ_CFG_DRV_VERSION __stringify(NIC_MAJOR_DRIVER_VERSION)"."\
- __stringify(NIC_MINOR_DRIVER_VERSION)"."\
- __stringify(NIC_BUILD_DRIVER_VERSION)"."\
- __stringify(NIC_REVISION_DRIVER_VERSION) \
+#define AQ_CFG_DRV_VERSION UTS_RELEASE \
AQ_CFG_DRV_VERSION_SUFFIX
#endif /* AQ_CFG_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c
index adad6a7acabe..6da65099047d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (C) 2014-2019 aQuantia Corporation. */
/* File aq_drvinfo.c: Definition of common code for firmware info in sys.*/
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.h b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.h
index 41fbb1358068..23a0487893a7 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright (C) 2014-2017 aQuantia Corporation. */
/* File aq_drvinfo.h: Declaration of common code for firmware info in sys.*/
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
index 18bc035da850..440690b18734 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (C) 2014-2017 aQuantia Corporation. */
/* File aq_filters.c: RX filters related functions. */
@@ -843,9 +843,14 @@ int aq_filters_vlans_update(struct aq_nic_s *aq_nic)
return err;
if (aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) {
- if (hweight < AQ_VLAN_MAX_FILTERS)
- err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, true);
+ if (hweight < AQ_VLAN_MAX_FILTERS && hweight > 0) {
+ err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw,
+ !(aq_nic->packet_filter & IFF_PROMISC));
+ aq_nic->aq_nic_cfg.is_vlan_force_promisc = false;
+ } else {
/* otherwise left in promiscue mode */
+ aq_nic->aq_nic_cfg.is_vlan_force_promisc = true;
+ }
}
return err;
@@ -866,6 +871,7 @@ int aq_filters_vlan_offload_off(struct aq_nic_s *aq_nic)
if (unlikely(!aq_hw_ops->hw_filter_vlan_ctrl))
return -EOPNOTSUPP;
+ aq_nic->aq_nic_cfg.is_vlan_force_promisc = true;
err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, false);
if (err)
return err;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.h b/drivers/net/ethernet/aquantia/atlantic/aq_filters.h
index c6a08c6585d5..122e06c88a33 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright (C) 2014-2017 aQuantia Corporation. */
/* File aq_filters.h: RX filters related functions. */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
index 5315df5ff6f8..100722ad5c2d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
@@ -108,11 +108,16 @@ err_exit:
static int aq_ndev_set_features(struct net_device *ndev,
netdev_features_t features)
{
+ bool is_vlan_rx_strip = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
+ bool is_vlan_tx_insert = !!(features & NETIF_F_HW_VLAN_CTAG_TX);
struct aq_nic_s *aq_nic = netdev_priv(ndev);
- struct aq_nic_cfg_s *aq_cfg = aq_nic_get_cfg(aq_nic);
+ bool need_ndev_restart = false;
+ struct aq_nic_cfg_s *aq_cfg;
bool is_lro = false;
int err = 0;
+ aq_cfg = aq_nic_get_cfg(aq_nic);
+
if (!(features & NETIF_F_NTUPLE)) {
if (aq_nic->ndev->features & NETIF_F_NTUPLE) {
err = aq_clear_rxnfc_all_rules(aq_nic);
@@ -135,17 +140,32 @@ static int aq_ndev_set_features(struct net_device *ndev,
if (aq_cfg->is_lro != is_lro) {
aq_cfg->is_lro = is_lro;
-
- if (netif_running(ndev)) {
- aq_ndev_close(ndev);
- aq_ndev_open(ndev);
- }
+ need_ndev_restart = true;
}
}
- if ((aq_nic->ndev->features ^ features) & NETIF_F_RXCSUM)
+
+ if ((aq_nic->ndev->features ^ features) & NETIF_F_RXCSUM) {
err = aq_nic->aq_hw_ops->hw_set_offload(aq_nic->aq_hw,
aq_cfg);
+ if (unlikely(err))
+ goto err_exit;
+ }
+
+ if (aq_cfg->is_vlan_rx_strip != is_vlan_rx_strip) {
+ aq_cfg->is_vlan_rx_strip = is_vlan_rx_strip;
+ need_ndev_restart = true;
+ }
+ if (aq_cfg->is_vlan_tx_insert != is_vlan_tx_insert) {
+ aq_cfg->is_vlan_tx_insert = is_vlan_tx_insert;
+ need_ndev_restart = true;
+ }
+
+ if (need_ndev_restart && netif_running(ndev)) {
+ aq_ndev_close(ndev);
+ aq_ndev_open(ndev);
+ }
+
err_exit:
return err;
}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 0da5e161ec5d..e1392766e21e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -126,6 +126,9 @@ void aq_nic_cfg_start(struct aq_nic_s *self)
cfg->link_speed_msk &= cfg->aq_hw_caps->link_speed_msk;
cfg->features = cfg->aq_hw_caps->hw_features;
+ cfg->is_vlan_rx_strip = !!(cfg->features & NETIF_F_HW_VLAN_CTAG_RX);
+ cfg->is_vlan_tx_insert = !!(cfg->features & NETIF_F_HW_VLAN_CTAG_TX);
+ cfg->is_vlan_force_promisc = true;
}
static int aq_nic_update_link_status(struct aq_nic_s *self)
@@ -285,7 +288,8 @@ void aq_nic_ndev_init(struct aq_nic_s *self)
self->ndev->hw_features |= aq_hw_caps->hw_features;
self->ndev->features = aq_hw_caps->hw_features;
self->ndev->vlan_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
- NETIF_F_RXHASH | NETIF_F_SG | NETIF_F_LRO;
+ NETIF_F_RXHASH | NETIF_F_SG |
+ NETIF_F_LRO | NETIF_F_TSO;
self->ndev->priv_flags = aq_hw_caps->hw_priv_flags;
self->ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
@@ -426,26 +430,37 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self,
unsigned int dx = ring->sw_tail;
struct aq_ring_buff_s *first = NULL;
struct aq_ring_buff_s *dx_buff = &ring->buff_ring[dx];
+ bool need_context_tag = false;
+
+ dx_buff->flags = 0U;
if (unlikely(skb_is_gso(skb))) {
- dx_buff->flags = 0U;
+ dx_buff->mss = skb_shinfo(skb)->gso_size;
+ dx_buff->is_gso = 1U;
dx_buff->len_pkt = skb->len;
dx_buff->len_l2 = ETH_HLEN;
dx_buff->len_l3 = ip_hdrlen(skb);
dx_buff->len_l4 = tcp_hdrlen(skb);
- dx_buff->mss = skb_shinfo(skb)->gso_size;
- dx_buff->is_txc = 1U;
dx_buff->eop_index = 0xffffU;
-
dx_buff->is_ipv6 =
(ip_hdr(skb)->version == 6) ? 1U : 0U;
+ need_context_tag = true;
+ }
+
+ if (self->aq_nic_cfg.is_vlan_tx_insert && skb_vlan_tag_present(skb)) {
+ dx_buff->vlan_tx_tag = skb_vlan_tag_get(skb);
+ dx_buff->len_pkt = skb->len;
+ dx_buff->is_vlan = 1U;
+ need_context_tag = true;
+ }
+ if (need_context_tag) {
dx = aq_ring_next_dx(ring, dx);
dx_buff = &ring->buff_ring[dx];
+ dx_buff->flags = 0U;
++ret;
}
- dx_buff->flags = 0U;
dx_buff->len = skb_headlen(skb);
dx_buff->pa = dma_map_single(aq_nic_get_dev(self),
skb->data,
@@ -534,7 +549,7 @@ mapping_error:
--ret, dx = aq_ring_next_dx(ring, dx)) {
dx_buff = &ring->buff_ring[dx];
- if (!dx_buff->is_txc && dx_buff->pa) {
+ if (!dx_buff->is_gso && !dx_buff->is_vlan && dx_buff->pa) {
if (unlikely(dx_buff->is_sop)) {
dma_unmap_single(aq_nic_get_dev(self),
dx_buff->pa,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index eb2e3c7c36f9..255b54a6ae07 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -35,6 +35,9 @@ struct aq_nic_cfg_s {
u32 flow_control;
u32 link_speed_msk;
u32 wol;
+ u8 is_vlan_rx_strip;
+ u8 is_vlan_tx_insert;
+ bool is_vlan_force_promisc;
u16 is_mc_list_enabled;
u16 mc_list_count;
bool is_autoneg;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 2a7b91ed17c5..3901d7994ca1 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -409,6 +409,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
}
}
+ if (buff->is_vlan)
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+ buff->vlan_rx_tag);
+
skb->protocol = eth_type_trans(skb, ndev);
aq_rx_checksum(self, buff, skb);
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
index 6bd67210d0b7..47abd09d06c2 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
@@ -27,7 +27,7 @@ struct aq_rxpage {
* +----------+----------+----------+-----------
* 4/8bytes|len pkt |len pkt | | skb
* +----------+----------+----------+-----------
- * 4/8bytes|is_txc |len,flags |len |len,is_eop
+ * 4/8bytes|is_gso |len,flags |len |len,is_eop
* +----------+----------+----------+-----------
*
* This aq_ring_buff_s doesn't have endianness dependency.
@@ -44,6 +44,7 @@ struct __packed aq_ring_buff_s {
u8 is_hash_l4;
u8 rsvd1;
struct aq_rxpage rxdata;
+ u16 vlan_rx_tag;
};
/* EOP */
struct {
@@ -59,6 +60,7 @@ struct __packed aq_ring_buff_s {
u8 is_ipv6:1;
u8 rsvd2:7;
u32 len_pkt;
+ u16 vlan_tx_tag;
};
};
union {
@@ -70,11 +72,12 @@ struct __packed aq_ring_buff_s {
u32 is_cso_err:1;
u32 is_sop:1;
u32 is_eop:1;
- u32 is_txc:1;
+ u32 is_gso:1;
u32 is_mapped:1;
u32 is_cleaned:1;
u32 is_error:1;
- u32 rsvd3:6;
+ u32 is_vlan:1;
+ u32 rsvd3:5;
u16 eop_index;
u16 rsvd4;
};
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index 0f140a9fe404..359a4d387185 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -451,7 +451,7 @@ static int hw_atl_a0_hw_ring_tx_xmit(struct aq_hw_s *self,
buff = &ring->buff_ring[ring->sw_tail];
- if (buff->is_txc) {
+ if (buff->is_gso) {
txd->ctl |= (buff->len_l3 << 31) |
(buff->len_l2 << 24) |
HW_ATL_A0_TXD_CTL_CMD_TCP |
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 1c7593d54035..30f7fc4c97ff 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -40,7 +40,9 @@
NETIF_F_TSO | \
NETIF_F_LRO | \
NETIF_F_NTUPLE | \
- NETIF_F_HW_VLAN_CTAG_FILTER, \
+ NETIF_F_HW_VLAN_CTAG_FILTER | \
+ NETIF_F_HW_VLAN_CTAG_RX | \
+ NETIF_F_HW_VLAN_CTAG_TX, \
.hw_priv_flags = IFF_UNICAST_FLT, \
.flow_control = true, \
.mtu = HW_ATL_B0_MTU_JUMBO, \
@@ -245,6 +247,9 @@ static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self,
/* LSO offloads*/
hw_atl_tdm_large_send_offload_en_set(self, 0xFFFFFFFFU);
+ /* Outer VLAN tag offload */
+ hw_atl_rpo_outer_vlan_tag_mode_set(self, 1U);
+
/* LRO offloads */
{
unsigned int val = (8U < HW_ATL_B0_LRO_RXD_MAX) ? 0x3U :
@@ -487,6 +492,7 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self,
unsigned int buff_pa_len = 0U;
unsigned int pkt_len = 0U;
unsigned int frag_count = 0U;
+ bool is_vlan = false;
bool is_gso = false;
buff = &ring->buff_ring[ring->sw_tail];
@@ -501,36 +507,44 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self,
buff = &ring->buff_ring[ring->sw_tail];
- if (buff->is_txc) {
+ if (buff->is_gso) {
+ txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_TCP;
+ txd->ctl |= HW_ATL_B0_TXD_CTL_DESC_TYPE_TXC;
txd->ctl |= (buff->len_l3 << 31) |
- (buff->len_l2 << 24) |
- HW_ATL_B0_TXD_CTL_CMD_TCP |
- HW_ATL_B0_TXD_CTL_DESC_TYPE_TXC;
- txd->ctl2 |= (buff->mss << 16) |
- (buff->len_l4 << 8) |
- (buff->len_l3 >> 1);
+ (buff->len_l2 << 24);
+ txd->ctl2 |= (buff->mss << 16);
+ is_gso = true;
pkt_len -= (buff->len_l4 +
buff->len_l3 +
buff->len_l2);
- is_gso = true;
-
if (buff->is_ipv6)
txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_IPV6;
- } else {
+ txd->ctl2 |= (buff->len_l4 << 8) |
+ (buff->len_l3 >> 1);
+ }
+ if (buff->is_vlan) {
+ txd->ctl |= HW_ATL_B0_TXD_CTL_DESC_TYPE_TXC;
+ txd->ctl |= buff->vlan_tx_tag << 4;
+ is_vlan = true;
+ }
+ if (!buff->is_gso && !buff->is_vlan) {
buff_pa_len = buff->len;
txd->buf_addr = buff->pa;
txd->ctl |= (HW_ATL_B0_TXD_CTL_BLEN &
((u32)buff_pa_len << 4));
txd->ctl |= HW_ATL_B0_TXD_CTL_DESC_TYPE_TXD;
+
/* PAY_LEN */
txd->ctl2 |= HW_ATL_B0_TXD_CTL2_LEN & (pkt_len << 14);
- if (is_gso) {
- txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_LSO;
+ if (is_gso || is_vlan) {
+ /* enable tx context */
txd->ctl2 |= HW_ATL_B0_TXD_CTL2_CTX_EN;
}
+ if (is_gso)
+ txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_LSO;
/* Tx checksum offloads */
if (buff->is_ip_cso)
@@ -539,13 +553,16 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self,
if (buff->is_udp_cso || buff->is_tcp_cso)
txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_TUCSO;
+ if (is_vlan)
+ txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_VLAN;
+
if (unlikely(buff->is_eop)) {
txd->ctl |= HW_ATL_B0_TXD_CTL_EOP;
txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_WB;
is_gso = false;
+ is_vlan = false;
}
}
-
ring->sw_tail = aq_ring_next_dx(ring, ring->sw_tail);
}
@@ -559,6 +576,7 @@ static int hw_atl_b0_hw_ring_rx_init(struct aq_hw_s *self,
{
u32 dma_desc_addr_lsw = (u32)aq_ring->dx_ring_pa;
u32 dma_desc_addr_msw = (u32)(((u64)aq_ring->dx_ring_pa) >> 32);
+ u32 vlan_rx_stripping = self->aq_nic_cfg->is_vlan_rx_strip;
hw_atl_rdm_rx_desc_en_set(self, false, aq_ring->idx);
@@ -578,7 +596,8 @@ static int hw_atl_b0_hw_ring_rx_init(struct aq_hw_s *self,
hw_atl_rdm_rx_desc_head_buff_size_set(self, 0U, aq_ring->idx);
hw_atl_rdm_rx_desc_head_splitting_set(self, 0U, aq_ring->idx);
- hw_atl_rpo_rx_desc_vlan_stripping_set(self, 0U, aq_ring->idx);
+ hw_atl_rpo_rx_desc_vlan_stripping_set(self, !!vlan_rx_stripping,
+ aq_ring->idx);
/* Rx ring set mode */
@@ -681,11 +700,15 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self,
buff = &ring->buff_ring[ring->hw_head];
+ buff->flags = 0U;
+ buff->is_hash_l4 = 0U;
+
rx_stat = (0x0000003CU & rxd_wb->status) >> 2;
is_rx_check_sum_enabled = (rxd_wb->type >> 19) & 0x3U;
- pkt_type = 0xFFU & (rxd_wb->type >> 4);
+ pkt_type = (rxd_wb->type & HW_ATL_B0_RXD_WB_STAT_PKTTYPE) >>
+ HW_ATL_B0_RXD_WB_STAT_PKTTYPE_SHIFT;
if (is_rx_check_sum_enabled & BIT(0) &&
(0x0U == (pkt_type & 0x3U)))
@@ -706,6 +729,13 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self,
buff->is_cso_err = 0U;
}
+ if (self->aq_nic_cfg->is_vlan_rx_strip &&
+ ((pkt_type & HW_ATL_B0_RXD_WB_PKTTYPE_VLAN) ||
+ (pkt_type & HW_ATL_B0_RXD_WB_PKTTYPE_VLAN_DOUBLE))) {
+ buff->is_vlan = 1;
+ buff->vlan_rx_tag = le16_to_cpu(rxd_wb->vlan);
+ }
+
if ((rx_stat & BIT(0)) || rxd_wb->type & 0x1000U) {
/* MAC error or DMA error */
buff->is_error = 1U;
@@ -778,8 +808,15 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self,
unsigned int packet_filter)
{
unsigned int i = 0U;
+ struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
+
+ hw_atl_rpfl2promiscuous_mode_en_set(self,
+ IS_FILTER_ENABLED(IFF_PROMISC));
+
+ hw_atl_rpf_vlan_prom_mode_en_set(self,
+ IS_FILTER_ENABLED(IFF_PROMISC) ||
+ cfg->is_vlan_force_promisc);
- hw_atl_rpfl2promiscuous_mode_en_set(self, IS_FILTER_ENABLED(IFF_PROMISC));
hw_atl_rpfl2multicast_flr_en_set(self,
IS_FILTER_ENABLED(IFF_ALLMULTI), 0);
@@ -788,13 +825,13 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self,
hw_atl_rpfl2broadcast_en_set(self, IS_FILTER_ENABLED(IFF_BROADCAST));
- self->aq_nic_cfg->is_mc_list_enabled = IS_FILTER_ENABLED(IFF_MULTICAST);
+ cfg->is_mc_list_enabled = IS_FILTER_ENABLED(IFF_MULTICAST);
for (i = HW_ATL_B0_MAC_MIN; i < HW_ATL_B0_MAC_MAX; ++i)
hw_atl_rpfl2_uc_flr_en_set(self,
- (self->aq_nic_cfg->is_mc_list_enabled &&
- (i <= self->aq_nic_cfg->mc_list_count)) ?
- 1U : 0U, i);
+ (cfg->is_mc_list_enabled &&
+ (i <= cfg->mc_list_count)) ?
+ 1U : 0U, i);
return aq_hw_err_from_flags(self);
}
@@ -1086,7 +1123,7 @@ static int hw_atl_b0_hw_vlan_set(struct aq_hw_s *self,
static int hw_atl_b0_hw_vlan_ctrl(struct aq_hw_s *self, bool enable)
{
/* set promisc in case of disabing the vland filter */
- hw_atl_rpf_vlan_prom_mode_en_set(self, !!!enable);
+ hw_atl_rpf_vlan_prom_mode_en_set(self, !enable);
return aq_hw_err_from_flags(self);
}
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
index e4ba2ccf9830..808d8cd4252a 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
@@ -107,10 +107,17 @@
#define HW_ATL_B0_RXD_NCEA0 (0x1)
#define HW_ATL_B0_RXD_WB_STAT_RSSTYPE (0x0000000F)
+#define HW_ATL_B0_RXD_WB_STAT_RSSTYPE_SHIFT (0x0)
#define HW_ATL_B0_RXD_WB_STAT_PKTTYPE (0x00000FF0)
+#define HW_ATL_B0_RXD_WB_STAT_PKTTYPE_SHIFT (0x4)
#define HW_ATL_B0_RXD_WB_STAT_RXCTRL (0x00180000)
+#define HW_ATL_B0_RXD_WB_STAT_RXCTRL_SHIFT (0x13)
#define HW_ATL_B0_RXD_WB_STAT_SPLHDR (0x00200000)
#define HW_ATL_B0_RXD_WB_STAT_HDRLEN (0xFFC00000)
+#define HW_ATL_B0_RXD_WB_STAT_HDRLEN_SHIFT (0x16)
+
+#define HW_ATL_B0_RXD_WB_PKTTYPE_VLAN BIT(5)
+#define HW_ATL_B0_RXD_WB_PKTTYPE_VLAN_DOUBLE BIT(6)
#define HW_ATL_B0_RXD_WB_STAT2_DD (0x0001)
#define HW_ATL_B0_RXD_WB_STAT2_EOP (0x0002)
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
index 451529069f28..1149812ae463 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
@@ -1004,6 +1004,22 @@ void hw_atl_rpo_rx_desc_vlan_stripping_set(struct aq_hw_s *aq_hw,
rx_desc_vlan_stripping);
}
+void hw_atl_rpo_outer_vlan_tag_mode_set(void *context,
+ u32 outervlantagmode)
+{
+ aq_hw_write_reg_bit(context, HW_ATL_RPO_OUTER_VL_INS_MODE_ADR,
+ HW_ATL_RPO_OUTER_VL_INS_MODE_MSK,
+ HW_ATL_RPO_OUTER_VL_INS_MODE_SHIFT,
+ outervlantagmode);
+}
+
+u32 hw_atl_rpo_outer_vlan_tag_mode_get(void *context)
+{
+ return aq_hw_read_reg_bit(context, HW_ATL_RPO_OUTER_VL_INS_MODE_ADR,
+ HW_ATL_RPO_OUTER_VL_INS_MODE_MSK,
+ HW_ATL_RPO_OUTER_VL_INS_MODE_SHIFT);
+}
+
void hw_atl_rpo_tcp_udp_crc_offload_en_set(struct aq_hw_s *aq_hw,
u32 tcp_udp_crc_offload_en)
{
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
index 34b42ce43512..0c37abbabca5 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
@@ -488,6 +488,11 @@ void hw_atl_rpo_rx_desc_vlan_stripping_set(struct aq_hw_s *aq_hw,
u32 rx_desc_vlan_stripping,
u32 descriptor);
+void hw_atl_rpo_outer_vlan_tag_mode_set(void *context,
+ u32 outervlantagmode);
+
+u32 hw_atl_rpo_outer_vlan_tag_mode_get(void *context);
+
/* set tcp/udp checksum offload enable */
void hw_atl_rpo_tcp_udp_crc_offload_en_set(struct aq_hw_s *aq_hw,
u32 tcp_udp_crc_offload_en);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
index fc1446f737bb..c3febcdfa92e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
@@ -1383,6 +1383,24 @@
/* default value of bitfield l4_chk_en */
#define HW_ATL_RPOL4CHK_EN_DEFAULT 0x0
+/* RX outer_vl_ins_mode Bitfield Definitions
+ * Preprocessor definitions for the bitfield "outer_vl_ins_mode".
+ * PORT="pif_rpo_outer_vl_mode_i"
+ */
+
+/* Register address for bitfield outer_vl_ins_mode */
+#define HW_ATL_RPO_OUTER_VL_INS_MODE_ADR 0x00005580
+/* Bitmask for bitfield outer_vl_ins_mode */
+#define HW_ATL_RPO_OUTER_VL_INS_MODE_MSK 0x00000004
+/* Inverted bitmask for bitfield outer_vl_ins_mode */
+#define HW_ATL_RPO_OUTER_VL_INS_MODE_MSKN 0xFFFFFFFB
+/* Lower bit position of bitfield outer_vl_ins_mode */
+#define HW_ATL_RPO_OUTER_VL_INS_MODE_SHIFT 2
+/* Width of bitfield outer_vl_ins_mode */
+#define HW_ATL_RPO_OUTER_VL_INS_MODE_WIDTH 1
+/* Default value of bitfield outer_vl_ins_mode */
+#define HW_ATL_RPO_OUTER_VL_INS_MODE_DEFAULT 0x0
+
/* rx reg_res_dsbl bitfield definitions
* preprocessor definitions for the bitfield "reg_res_dsbl".
* port="pif_rx_reg_res_dsbl_i"
diff --git a/drivers/net/ethernet/aquantia/atlantic/ver.h b/drivers/net/ethernet/aquantia/atlantic/ver.h
index 23374bffa92b..597654b51e01 100644
--- a/drivers/net/ethernet/aquantia/atlantic/ver.h
+++ b/drivers/net/ethernet/aquantia/atlantic/ver.h
@@ -7,11 +7,6 @@
#ifndef VER_H
#define VER_H
-#define NIC_MAJOR_DRIVER_VERSION 2
-#define NIC_MINOR_DRIVER_VERSION 0
-#define NIC_BUILD_DRIVER_VERSION 4
-#define NIC_REVISION_DRIVER_VERSION 0
-
#define AQ_CFG_DRV_VERSION_SUFFIX "-kern"
#endif /* VER_H */
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 25bf085324b8..be7f9cebb675 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -2201,7 +2201,7 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
struct net_device *netdev)
{
struct atl1c_adapter *adapter = netdev_priv(netdev);
- u16 tpd_req = 1;
+ u16 tpd_req;
struct atl1c_tpd_desc *tpd;
enum atl1c_trans_queue type = atl1c_trans_normal;
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index b123509d385f..2e4a8c7237ef 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -8,6 +8,7 @@ config NET_VENDOR_BROADCOM
default y
depends on (SSB_POSSIBLE && HAS_DMA) || PCI || BCM63XX || \
SIBYTE_SB1xxx_SOC
+ select DIMLIB
---help---
If you have a network (Ethernet) chipset belonging to this class,
say Y.
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 85e610210477..291e4afd4a1a 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -2659,7 +2659,6 @@ static int bcm_enetsw_probe(struct platform_device *pdev)
if (!dev)
return -ENOMEM;
priv = netdev_priv(dev);
- memset(priv, 0, sizeof(*priv));
/* initialize default and fetch platform data */
priv->enet_is_sw = true;
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index cae9b77ff44b..b9c5cea8db16 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -609,7 +609,7 @@ static int bcm_sysport_set_coalesce(struct net_device *dev,
struct ethtool_coalesce *ec)
{
struct bcm_sysport_priv *priv = netdev_priv(dev);
- struct net_dim_cq_moder moder;
+ struct dim_cq_moder moder;
u32 usecs, pkts;
unsigned int i;
@@ -992,7 +992,7 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget)
{
struct bcm_sysport_priv *priv =
container_of(napi, struct bcm_sysport_priv, napi);
- struct net_dim_sample dim_sample;
+ struct dim_sample dim_sample;
unsigned int work_done = 0;
work_done = bcm_sysport_desc_rx(priv, budget);
@@ -1016,8 +1016,8 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget)
}
if (priv->dim.use_dim) {
- net_dim_sample(priv->dim.event_ctr, priv->dim.packets,
- priv->dim.bytes, &dim_sample);
+ dim_update_sample(priv->dim.event_ctr, priv->dim.packets,
+ priv->dim.bytes, &dim_sample);
net_dim(&priv->dim.dim, dim_sample);
}
@@ -1087,16 +1087,16 @@ static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv)
static void bcm_sysport_dim_work(struct work_struct *work)
{
- struct net_dim *dim = container_of(work, struct net_dim, work);
+ struct dim *dim = container_of(work, struct dim, work);
struct bcm_sysport_net_dim *ndim =
container_of(dim, struct bcm_sysport_net_dim, dim);
struct bcm_sysport_priv *priv =
container_of(ndim, struct bcm_sysport_priv, dim);
- struct net_dim_cq_moder cur_profile =
- net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
+ struct dim_cq_moder cur_profile = net_dim_get_rx_moderation(dim->mode,
+ dim->profile_ix);
bcm_sysport_set_rx_coalesce(priv, cur_profile.usec, cur_profile.pkts);
- dim->state = NET_DIM_START_MEASURE;
+ dim->state = DIM_START_MEASURE;
}
/* RX and misc interrupt routine */
@@ -1437,7 +1437,7 @@ static void bcm_sysport_init_dim(struct bcm_sysport_priv *priv,
struct bcm_sysport_net_dim *dim = &priv->dim;
INIT_WORK(&dim->dim.work, cb);
- dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ dim->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
dim->event_ctr = 0;
dim->packets = 0;
dim->bytes = 0;
@@ -1446,7 +1446,7 @@ static void bcm_sysport_init_dim(struct bcm_sysport_priv *priv,
static void bcm_sysport_init_rx_coalesce(struct bcm_sysport_priv *priv)
{
struct bcm_sysport_net_dim *dim = &priv->dim;
- struct net_dim_cq_moder moder;
+ struct dim_cq_moder moder;
u32 usecs, pkts;
usecs = priv->rx_coalesce_usecs;
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h
index 86193931203a..6d80735fbc7f 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.h
+++ b/drivers/net/ethernet/broadcom/bcmsysport.h
@@ -11,7 +11,7 @@
#include <linux/bitmap.h>
#include <linux/ethtool.h>
#include <linux/if_vlan.h>
-#include <linux/net_dim.h>
+#include <linux/dim.h>
/* Receive/transmit descriptor format */
#define DESC_ADDR_HI_STATUS_LEN 0x00
@@ -702,7 +702,7 @@ struct bcm_sysport_net_dim {
u16 event_ctr;
unsigned long packets;
unsigned long bytes;
- struct net_dim dim;
+ struct dim dim;
};
/* Software view of the TX ring */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index f758b2e0591f..b7b62273c955 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2130,12 +2130,12 @@ static int bnxt_poll(struct napi_struct *napi, int budget)
}
}
if (bp->flags & BNXT_FLAG_DIM) {
- struct net_dim_sample dim_sample;
+ struct dim_sample dim_sample;
- net_dim_sample(cpr->event_ctr,
- cpr->rx_packets,
- cpr->rx_bytes,
- &dim_sample);
+ dim_update_sample(cpr->event_ctr,
+ cpr->rx_packets,
+ cpr->rx_bytes,
+ &dim_sample);
net_dim(&cpr->dim, dim_sample);
}
return work_done;
@@ -7813,7 +7813,7 @@ static void bnxt_enable_napi(struct bnxt *bp)
if (bp->bnapi[i]->rx_ring) {
INIT_WORK(&cpr->dim.work, bnxt_dim_work);
- cpr->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ cpr->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
}
napi_enable(&bp->bnapi[i]->napi);
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index be438d82f939..4b3ae92a082b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -24,7 +24,7 @@
#include <net/devlink.h>
#include <net/dst_metadata.h>
#include <net/xdp.h>
-#include <linux/net_dim.h>
+#include <linux/dim.h>
struct tx_bd {
__le32 tx_bd_len_flags_type;
@@ -810,7 +810,7 @@ struct bnxt_cp_ring_info {
u64 rx_bytes;
u64 event_ctr;
- struct net_dim dim;
+ struct dim dim;
union {
struct tx_cmp *cp_desc_ring[MAX_CP_PAGES];
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c
index 94e208e9789f..61393f351a77 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c
@@ -11,7 +11,7 @@
#include <linux/module.h>
#include <linux/pci.h>
#include "bnxt_hsi.h"
-#include <linux/net_dim.h>
+#include <linux/dim.h>
#include "bnxt.h"
#include "bnxt_debugfs.h"
@@ -21,7 +21,7 @@ static ssize_t debugfs_dim_read(struct file *filep,
char __user *buffer,
size_t count, loff_t *ppos)
{
- struct net_dim *dim = filep->private_data;
+ struct dim *dim = filep->private_data;
int len;
char *buf;
@@ -61,7 +61,7 @@ static const struct file_operations debugfs_dim_fops = {
.read = debugfs_dim_read,
};
-static struct dentry *debugfs_dim_ring_init(struct net_dim *dim, int ring_idx,
+static struct dentry *debugfs_dim_ring_init(struct dim *dim, int ring_idx,
struct dentry *dd)
{
static char qname[16];
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c
index afa97c8bb081..6f6576dc417a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c
@@ -7,26 +7,25 @@
* the Free Software Foundation.
*/
-#include <linux/net_dim.h>
+#include <linux/dim.h>
#include "bnxt_hsi.h"
#include "bnxt.h"
void bnxt_dim_work(struct work_struct *work)
{
- struct net_dim *dim = container_of(work, struct net_dim,
- work);
+ struct dim *dim = container_of(work, struct dim, work);
struct bnxt_cp_ring_info *cpr = container_of(dim,
struct bnxt_cp_ring_info,
dim);
struct bnxt_napi *bnapi = container_of(cpr,
struct bnxt_napi,
cp_ring);
- struct net_dim_cq_moder cur_moder =
+ struct dim_cq_moder cur_moder =
net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
cpr->rx_ring_coal.coal_ticks = cur_moder.usec;
cpr->rx_ring_coal.coal_bufs = cur_moder.pkts;
bnxt_hwrm_set_ring_coal(bnapi->bp, bnapi);
- dim->state = NET_DIM_START_MEASURE;
+ dim->state = DIM_START_MEASURE;
}
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 41b50e6570ea..34466b827dde 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -640,7 +640,7 @@ static void bcmgenet_set_rx_coalesce(struct bcmgenet_rx_ring *ring,
static void bcmgenet_set_ring_rx_coalesce(struct bcmgenet_rx_ring *ring,
struct ethtool_coalesce *ec)
{
- struct net_dim_cq_moder moder;
+ struct dim_cq_moder moder;
u32 usecs, pkts;
ring->rx_coalesce_usecs = ec->rx_coalesce_usecs;
@@ -1895,7 +1895,7 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget)
{
struct bcmgenet_rx_ring *ring = container_of(napi,
struct bcmgenet_rx_ring, napi);
- struct net_dim_sample dim_sample;
+ struct dim_sample dim_sample;
unsigned int work_done;
work_done = bcmgenet_desc_rx(ring, budget);
@@ -1906,8 +1906,8 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget)
}
if (ring->dim.use_dim) {
- net_dim_sample(ring->dim.event_ctr, ring->dim.packets,
- ring->dim.bytes, &dim_sample);
+ dim_update_sample(ring->dim.event_ctr, ring->dim.packets,
+ ring->dim.bytes, &dim_sample);
net_dim(&ring->dim.dim, dim_sample);
}
@@ -1916,16 +1916,16 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget)
static void bcmgenet_dim_work(struct work_struct *work)
{
- struct net_dim *dim = container_of(work, struct net_dim, work);
+ struct dim *dim = container_of(work, struct dim, work);
struct bcmgenet_net_dim *ndim =
container_of(dim, struct bcmgenet_net_dim, dim);
struct bcmgenet_rx_ring *ring =
container_of(ndim, struct bcmgenet_rx_ring, dim);
- struct net_dim_cq_moder cur_profile =
+ struct dim_cq_moder cur_profile =
net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
bcmgenet_set_rx_coalesce(ring, cur_profile.usec, cur_profile.pkts);
- dim->state = NET_DIM_START_MEASURE;
+ dim->state = DIM_START_MEASURE;
}
/* Assign skb to RX DMA descriptor. */
@@ -2082,7 +2082,7 @@ static void bcmgenet_init_dim(struct bcmgenet_rx_ring *ring,
struct bcmgenet_net_dim *dim = &ring->dim;
INIT_WORK(&dim->dim.work, cb);
- dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ dim->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
dim->event_ctr = 0;
dim->packets = 0;
dim->bytes = 0;
@@ -2091,7 +2091,7 @@ static void bcmgenet_init_dim(struct bcmgenet_rx_ring *ring,
static void bcmgenet_init_rx_coalesce(struct bcmgenet_rx_ring *ring)
{
struct bcmgenet_net_dim *dim = &ring->dim;
- struct net_dim_cq_moder moder;
+ struct dim_cq_moder moder;
u32 usecs, pkts;
usecs = ring->rx_coalesce_usecs;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 9ad835aee1bc..4a8fc03d82fd 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -13,7 +13,7 @@
#include <linux/mii.h>
#include <linux/if_vlan.h>
#include <linux/phy.h>
-#include <linux/net_dim.h>
+#include <linux/dim.h>
/* total number of Buffer Descriptors, same for Rx/Tx */
#define TOTAL_DESC 256
@@ -578,7 +578,7 @@ struct bcmgenet_net_dim {
u16 event_ctr;
unsigned long packets;
unsigned long bytes;
- struct net_dim dim;
+ struct dim dim;
};
struct bcmgenet_rx_ring {
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 5d41e41a889c..a27d32f69de9 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -4303,7 +4303,7 @@ static int macb_probe(struct platform_device *pdev)
if (PTR_ERR(mac) == -EPROBE_DEFER) {
err = -EPROBE_DEFER;
goto err_out_free_netdev;
- } else if (!IS_ERR(mac)) {
+ } else if (!IS_ERR_OR_NULL(mac)) {
ether_addr_copy(bp->dev->dev_addr, mac);
} else {
macb_get_hwaddr(bp);
diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c
index 11d4e91ea754..99f49d059414 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c
@@ -1855,7 +1855,7 @@ static void xgmac_pmt(void __iomem *ioaddr, unsigned long mode)
static int xgmac_suspend(struct device *dev)
{
- struct net_device *ndev = platform_get_drvdata(to_platform_device(dev));
+ struct net_device *ndev = dev_get_drvdata(dev);
struct xgmac_priv *priv = netdev_priv(ndev);
u32 value;
@@ -1881,7 +1881,7 @@ static int xgmac_suspend(struct device *dev)
static int xgmac_resume(struct device *dev)
{
- struct net_device *ndev = platform_get_drvdata(to_platform_device(dev));
+ struct net_device *ndev = dev_get_drvdata(dev);
struct xgmac_priv *priv = netdev_priv(ndev);
void __iomem *ioaddr = priv->base;
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index 8a6785173228..492f8769ac12 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -891,7 +891,7 @@ static void be_self_test(struct net_device *netdev, struct ethtool_test *test,
u64 *data)
{
struct be_adapter *adapter = netdev_priv(netdev);
- int status;
+ int status, cnt;
u8 link_status = 0;
if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
@@ -902,6 +902,9 @@ static void be_self_test(struct net_device *netdev, struct ethtool_test *test,
memset(data, 0, sizeof(u64) * ETHTOOL_TESTS_NUM);
+ /* check link status before offline tests */
+ link_status = netif_carrier_ok(netdev);
+
if (test->flags & ETH_TEST_FL_OFFLINE) {
if (be_loopback_test(adapter, BE_MAC_LOOPBACK, &data[0]) != 0)
test->flags |= ETH_TEST_FL_FAILED;
@@ -922,13 +925,26 @@ static void be_self_test(struct net_device *netdev, struct ethtool_test *test,
test->flags |= ETH_TEST_FL_FAILED;
}
- status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
- if (status) {
- test->flags |= ETH_TEST_FL_FAILED;
- data[4] = -1;
- } else if (!link_status) {
+ /* link status was down prior to test */
+ if (!link_status) {
test->flags |= ETH_TEST_FL_FAILED;
data[4] = 1;
+ return;
+ }
+
+ for (cnt = 10; cnt; cnt--) {
+ status = be_cmd_link_status_query(adapter, NULL, &link_status,
+ 0);
+ if (status) {
+ test->flags |= ETH_TEST_FL_FAILED;
+ data[4] = -1;
+ break;
+ }
+
+ if (link_status)
+ break;
+
+ msleep_interruptible(500);
}
}
diff --git a/drivers/net/ethernet/google/Kconfig b/drivers/net/ethernet/google/Kconfig
new file mode 100644
index 000000000000..b8f04d052fda
--- /dev/null
+++ b/drivers/net/ethernet/google/Kconfig
@@ -0,0 +1,27 @@
+#
+# Google network device configuration
+#
+
+config NET_VENDOR_GOOGLE
+ bool "Google Devices"
+ default y
+ help
+ If you have a network (Ethernet) device belonging to this class, say Y.
+
+ Note that the answer to this question doesn't directly affect the
+ kernel: saying N will just cause the configurator to skip all
+ the questions about Google devices. If you say Y, you will be asked
+ for your specific device in the following questions.
+
+if NET_VENDOR_GOOGLE
+
+config GVE
+ tristate "Google Virtual NIC (gVNIC) support"
+ depends on PCI_MSI
+ help
+ This driver supports Google Virtual NIC (gVNIC)"
+
+ To compile this driver as a module, choose M here.
+ The module will be called gve.
+
+endif #NET_VENDOR_GOOGLE
diff --git a/drivers/net/ethernet/google/Makefile b/drivers/net/ethernet/google/Makefile
new file mode 100644
index 000000000000..402cc3ba1639
--- /dev/null
+++ b/drivers/net/ethernet/google/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the Google network device drivers.
+#
+
+obj-$(CONFIG_GVE) += gve/
diff --git a/drivers/net/ethernet/google/gve/Makefile b/drivers/net/ethernet/google/gve/Makefile
new file mode 100644
index 000000000000..3354ce40eb97
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/Makefile
@@ -0,0 +1,4 @@
+# Makefile for the Google virtual Ethernet (gve) driver
+
+obj-$(CONFIG_GVE) += gve.o
+gve-objs := gve_main.o gve_tx.o gve_rx.o gve_ethtool.o gve_adminq.o
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
new file mode 100644
index 000000000000..92372dc43be8
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -0,0 +1,459 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
+ * Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#ifndef _GVE_H_
+#define _GVE_H_
+
+#include <linux/dma-mapping.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/u64_stats_sync.h>
+#include "gve_desc.h"
+
+#ifndef PCI_VENDOR_ID_GOOGLE
+#define PCI_VENDOR_ID_GOOGLE 0x1ae0
+#endif
+
+#define PCI_DEV_ID_GVNIC 0x0042
+
+#define GVE_REGISTER_BAR 0
+#define GVE_DOORBELL_BAR 2
+
+/* Driver can alloc up to 2 segments for the header and 2 for the payload. */
+#define GVE_TX_MAX_IOVEC 4
+/* 1 for management, 1 for rx, 1 for tx */
+#define GVE_MIN_MSIX 3
+
+/* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
+struct gve_rx_desc_queue {
+ struct gve_rx_desc *desc_ring; /* the descriptor ring */
+ dma_addr_t bus; /* the bus for the desc_ring */
+ u32 cnt; /* free-running total number of completed packets */
+ u32 fill_cnt; /* free-running total number of descriptors posted */
+ u32 mask; /* masks the cnt to the size of the ring */
+ u8 seqno; /* the next expected seqno for this desc*/
+};
+
+/* The page info for a single slot in the RX data queue */
+struct gve_rx_slot_page_info {
+ struct page *page;
+ void *page_address;
+ u32 page_offset; /* offset to write to in page */
+};
+
+/* A list of pages registered with the device during setup and used by a queue
+ * as buffers
+ */
+struct gve_queue_page_list {
+ u32 id; /* unique id */
+ u32 num_entries;
+ struct page **pages; /* list of num_entries pages */
+ dma_addr_t *page_buses; /* the dma addrs of the pages */
+};
+
+/* Each slot in the data ring has a 1:1 mapping to a slot in the desc ring */
+struct gve_rx_data_queue {
+ struct gve_rx_data_slot *data_ring; /* read by NIC */
+ dma_addr_t data_bus; /* dma mapping of the slots */
+ struct gve_rx_slot_page_info *page_info; /* page info of the buffers */
+ struct gve_queue_page_list *qpl; /* qpl assigned to this queue */
+ u32 mask; /* masks the cnt to the size of the ring */
+ u32 cnt; /* free-running total number of completed packets */
+};
+
+struct gve_priv;
+
+/* An RX ring that contains a power-of-two sized desc and data ring. */
+struct gve_rx_ring {
+ struct gve_priv *gve;
+ struct gve_rx_desc_queue desc;
+ struct gve_rx_data_queue data;
+ u64 rbytes; /* free-running bytes received */
+ u64 rpackets; /* free-running packets received */
+ u32 q_num; /* queue index */
+ u32 ntfy_id; /* notification block index */
+ struct gve_queue_resources *q_resources; /* head and tail pointer idx */
+ dma_addr_t q_resources_bus; /* dma address for the queue resources */
+ struct u64_stats_sync statss; /* sync stats for 32bit archs */
+};
+
+/* A TX desc ring entry */
+union gve_tx_desc {
+ struct gve_tx_pkt_desc pkt; /* first desc for a packet */
+ struct gve_tx_seg_desc seg; /* subsequent descs for a packet */
+};
+
+/* Tracks the memory in the fifo occupied by a segment of a packet */
+struct gve_tx_iovec {
+ u32 iov_offset; /* offset into this segment */
+ u32 iov_len; /* length */
+ u32 iov_padding; /* padding associated with this segment */
+};
+
+/* Tracks the memory in the fifo occupied by the skb. Mapped 1:1 to a desc
+ * ring entry but only used for a pkt_desc not a seg_desc
+ */
+struct gve_tx_buffer_state {
+ struct sk_buff *skb; /* skb for this pkt */
+ struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */
+};
+
+/* A TX buffer - each queue has one */
+struct gve_tx_fifo {
+ void *base; /* address of base of FIFO */
+ u32 size; /* total size */
+ atomic_t available; /* how much space is still available */
+ u32 head; /* offset to write at */
+ struct gve_queue_page_list *qpl; /* QPL mapped into this FIFO */
+};
+
+/* A TX ring that contains a power-of-two sized desc ring and a FIFO buffer */
+struct gve_tx_ring {
+ /* Cacheline 0 -- Accessed & dirtied during transmit */
+ struct gve_tx_fifo tx_fifo;
+ u32 req; /* driver tracked head pointer */
+ u32 done; /* driver tracked tail pointer */
+
+ /* Cacheline 1 -- Accessed & dirtied during gve_clean_tx_done */
+ __be32 last_nic_done ____cacheline_aligned; /* NIC tail pointer */
+ u64 pkt_done; /* free-running - total packets completed */
+ u64 bytes_done; /* free-running - total bytes completed */
+
+ /* Cacheline 2 -- Read-mostly fields */
+ union gve_tx_desc *desc ____cacheline_aligned;
+ struct gve_tx_buffer_state *info; /* Maps 1:1 to a desc */
+ struct netdev_queue *netdev_txq;
+ struct gve_queue_resources *q_resources; /* head and tail pointer idx */
+ u32 mask; /* masks req and done down to queue size */
+
+ /* Slow-path fields */
+ u32 q_num ____cacheline_aligned; /* queue idx */
+ u32 stop_queue; /* count of queue stops */
+ u32 wake_queue; /* count of queue wakes */
+ u32 ntfy_id; /* notification block index */
+ dma_addr_t bus; /* dma address of the descr ring */
+ dma_addr_t q_resources_bus; /* dma address of the queue resources */
+ struct u64_stats_sync statss; /* sync stats for 32bit archs */
+} ____cacheline_aligned;
+
+/* Wraps the info for one irq including the napi struct and the queues
+ * associated with that irq.
+ */
+struct gve_notify_block {
+ __be32 irq_db_index; /* idx into Bar2 - set by device, must be 1st */
+ char name[IFNAMSIZ + 16]; /* name registered with the kernel */
+ struct napi_struct napi; /* kernel napi struct for this block */
+ struct gve_priv *priv;
+ struct gve_tx_ring *tx; /* tx rings on this block */
+ struct gve_rx_ring *rx; /* rx rings on this block */
+} ____cacheline_aligned;
+
+/* Tracks allowed and current queue settings */
+struct gve_queue_config {
+ u16 max_queues;
+ u16 num_queues; /* current */
+};
+
+/* Tracks the available and used qpl IDs */
+struct gve_qpl_config {
+ u32 qpl_map_size; /* map memory size */
+ unsigned long *qpl_id_map; /* bitmap of used qpl ids */
+};
+
+struct gve_priv {
+ struct net_device *dev;
+ struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */
+ struct gve_rx_ring *rx; /* array of rx_cfg.num_queues */
+ struct gve_queue_page_list *qpls; /* array of num qpls */
+ struct gve_notify_block *ntfy_blocks; /* array of num_ntfy_blks */
+ dma_addr_t ntfy_block_bus;
+ struct msix_entry *msix_vectors; /* array of num_ntfy_blks + 1 */
+ char mgmt_msix_name[IFNAMSIZ + 16];
+ u32 mgmt_msix_idx;
+ __be32 *counter_array; /* array of num_event_counters */
+ dma_addr_t counter_array_bus;
+
+ u16 num_event_counters;
+ u16 tx_desc_cnt; /* num desc per ring */
+ u16 rx_desc_cnt; /* num desc per ring */
+ u16 tx_pages_per_qpl; /* tx buffer length */
+ u16 rx_pages_per_qpl; /* rx buffer length */
+ u64 max_registered_pages;
+ u64 num_registered_pages; /* num pages registered with NIC */
+ u32 rx_copybreak; /* copy packets smaller than this */
+ u16 default_num_queues; /* default num queues to set up */
+
+ struct gve_queue_config tx_cfg;
+ struct gve_queue_config rx_cfg;
+ struct gve_qpl_config qpl_cfg; /* map used QPL ids */
+ u32 num_ntfy_blks; /* spilt between TX and RX so must be even */
+
+ struct gve_registers __iomem *reg_bar0; /* see gve_register.h */
+ __be32 __iomem *db_bar2; /* "array" of doorbells */
+ u32 msg_enable; /* level for netif* netdev print macros */
+ struct pci_dev *pdev;
+
+ /* metrics */
+ u32 tx_timeo_cnt;
+
+ /* Admin queue - see gve_adminq.h*/
+ union gve_adminq_command *adminq;
+ dma_addr_t adminq_bus_addr;
+ u32 adminq_mask; /* masks prod_cnt to adminq size */
+ u32 adminq_prod_cnt; /* free-running count of AQ cmds executed */
+
+ struct workqueue_struct *gve_wq;
+ struct work_struct service_task;
+ unsigned long service_task_flags;
+ unsigned long state_flags;
+};
+
+enum gve_service_task_flags {
+ GVE_PRIV_FLAGS_DO_RESET = BIT(1),
+ GVE_PRIV_FLAGS_RESET_IN_PROGRESS = BIT(2),
+ GVE_PRIV_FLAGS_PROBE_IN_PROGRESS = BIT(3),
+};
+
+enum gve_state_flags {
+ GVE_PRIV_FLAGS_ADMIN_QUEUE_OK = BIT(1),
+ GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK = BIT(2),
+ GVE_PRIV_FLAGS_DEVICE_RINGS_OK = BIT(3),
+ GVE_PRIV_FLAGS_NAPI_ENABLED = BIT(4),
+};
+
+static inline bool gve_get_do_reset(struct gve_priv *priv)
+{
+ return test_bit(GVE_PRIV_FLAGS_DO_RESET, &priv->service_task_flags);
+}
+
+static inline void gve_set_do_reset(struct gve_priv *priv)
+{
+ set_bit(GVE_PRIV_FLAGS_DO_RESET, &priv->service_task_flags);
+}
+
+static inline void gve_clear_do_reset(struct gve_priv *priv)
+{
+ clear_bit(GVE_PRIV_FLAGS_DO_RESET, &priv->service_task_flags);
+}
+
+static inline bool gve_get_reset_in_progress(struct gve_priv *priv)
+{
+ return test_bit(GVE_PRIV_FLAGS_RESET_IN_PROGRESS,
+ &priv->service_task_flags);
+}
+
+static inline void gve_set_reset_in_progress(struct gve_priv *priv)
+{
+ set_bit(GVE_PRIV_FLAGS_RESET_IN_PROGRESS, &priv->service_task_flags);
+}
+
+static inline void gve_clear_reset_in_progress(struct gve_priv *priv)
+{
+ clear_bit(GVE_PRIV_FLAGS_RESET_IN_PROGRESS, &priv->service_task_flags);
+}
+
+static inline bool gve_get_probe_in_progress(struct gve_priv *priv)
+{
+ return test_bit(GVE_PRIV_FLAGS_PROBE_IN_PROGRESS,
+ &priv->service_task_flags);
+}
+
+static inline void gve_set_probe_in_progress(struct gve_priv *priv)
+{
+ set_bit(GVE_PRIV_FLAGS_PROBE_IN_PROGRESS, &priv->service_task_flags);
+}
+
+static inline void gve_clear_probe_in_progress(struct gve_priv *priv)
+{
+ clear_bit(GVE_PRIV_FLAGS_PROBE_IN_PROGRESS, &priv->service_task_flags);
+}
+
+static inline bool gve_get_admin_queue_ok(struct gve_priv *priv)
+{
+ return test_bit(GVE_PRIV_FLAGS_ADMIN_QUEUE_OK, &priv->state_flags);
+}
+
+static inline void gve_set_admin_queue_ok(struct gve_priv *priv)
+{
+ set_bit(GVE_PRIV_FLAGS_ADMIN_QUEUE_OK, &priv->state_flags);
+}
+
+static inline void gve_clear_admin_queue_ok(struct gve_priv *priv)
+{
+ clear_bit(GVE_PRIV_FLAGS_ADMIN_QUEUE_OK, &priv->state_flags);
+}
+
+static inline bool gve_get_device_resources_ok(struct gve_priv *priv)
+{
+ return test_bit(GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK, &priv->state_flags);
+}
+
+static inline void gve_set_device_resources_ok(struct gve_priv *priv)
+{
+ set_bit(GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK, &priv->state_flags);
+}
+
+static inline void gve_clear_device_resources_ok(struct gve_priv *priv)
+{
+ clear_bit(GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK, &priv->state_flags);
+}
+
+static inline bool gve_get_device_rings_ok(struct gve_priv *priv)
+{
+ return test_bit(GVE_PRIV_FLAGS_DEVICE_RINGS_OK, &priv->state_flags);
+}
+
+static inline void gve_set_device_rings_ok(struct gve_priv *priv)
+{
+ set_bit(GVE_PRIV_FLAGS_DEVICE_RINGS_OK, &priv->state_flags);
+}
+
+static inline void gve_clear_device_rings_ok(struct gve_priv *priv)
+{
+ clear_bit(GVE_PRIV_FLAGS_DEVICE_RINGS_OK, &priv->state_flags);
+}
+
+static inline bool gve_get_napi_enabled(struct gve_priv *priv)
+{
+ return test_bit(GVE_PRIV_FLAGS_NAPI_ENABLED, &priv->state_flags);
+}
+
+static inline void gve_set_napi_enabled(struct gve_priv *priv)
+{
+ set_bit(GVE_PRIV_FLAGS_NAPI_ENABLED, &priv->state_flags);
+}
+
+static inline void gve_clear_napi_enabled(struct gve_priv *priv)
+{
+ clear_bit(GVE_PRIV_FLAGS_NAPI_ENABLED, &priv->state_flags);
+}
+
+/* Returns the address of the ntfy_blocks irq doorbell
+ */
+static inline __be32 __iomem *gve_irq_doorbell(struct gve_priv *priv,
+ struct gve_notify_block *block)
+{
+ return &priv->db_bar2[be32_to_cpu(block->irq_db_index)];
+}
+
+/* Returns the index into ntfy_blocks of the given tx ring's block
+ */
+static inline u32 gve_tx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx)
+{
+ return queue_idx;
+}
+
+/* Returns the index into ntfy_blocks of the given rx ring's block
+ */
+static inline u32 gve_rx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx)
+{
+ return (priv->num_ntfy_blks / 2) + queue_idx;
+}
+
+/* Returns the number of tx queue page lists
+ */
+static inline u32 gve_num_tx_qpls(struct gve_priv *priv)
+{
+ return priv->tx_cfg.num_queues;
+}
+
+/* Returns the number of rx queue page lists
+ */
+static inline u32 gve_num_rx_qpls(struct gve_priv *priv)
+{
+ return priv->rx_cfg.num_queues;
+}
+
+/* Returns a pointer to the next available tx qpl in the list of qpls
+ */
+static inline
+struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv)
+{
+ int id = find_first_zero_bit(priv->qpl_cfg.qpl_id_map,
+ priv->qpl_cfg.qpl_map_size);
+
+ /* we are out of tx qpls */
+ if (id >= gve_num_tx_qpls(priv))
+ return NULL;
+
+ set_bit(id, priv->qpl_cfg.qpl_id_map);
+ return &priv->qpls[id];
+}
+
+/* Returns a pointer to the next available rx qpl in the list of qpls
+ */
+static inline
+struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv)
+{
+ int id = find_next_zero_bit(priv->qpl_cfg.qpl_id_map,
+ priv->qpl_cfg.qpl_map_size,
+ gve_num_tx_qpls(priv));
+
+ /* we are out of rx qpls */
+ if (id == priv->qpl_cfg.qpl_map_size)
+ return NULL;
+
+ set_bit(id, priv->qpl_cfg.qpl_id_map);
+ return &priv->qpls[id];
+}
+
+/* Unassigns the qpl with the given id
+ */
+static inline void gve_unassign_qpl(struct gve_priv *priv, int id)
+{
+ clear_bit(id, priv->qpl_cfg.qpl_id_map);
+}
+
+/* Returns the correct dma direction for tx and rx qpls
+ */
+static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv,
+ int id)
+{
+ if (id < gve_num_tx_qpls(priv))
+ return DMA_TO_DEVICE;
+ else
+ return DMA_FROM_DEVICE;
+}
+
+/* Returns true if the max mtu allows page recycling */
+static inline bool gve_can_recycle_pages(struct net_device *dev)
+{
+ /* We can't recycle the pages if we can't fit a packet into half a
+ * page.
+ */
+ return dev->max_mtu <= PAGE_SIZE / 2;
+}
+
+/* buffers */
+int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma,
+ enum dma_data_direction);
+void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
+ enum dma_data_direction);
+/* tx handling */
+netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev);
+bool gve_tx_poll(struct gve_notify_block *block, int budget);
+int gve_tx_alloc_rings(struct gve_priv *priv);
+void gve_tx_free_rings(struct gve_priv *priv);
+__be32 gve_tx_load_event_counter(struct gve_priv *priv,
+ struct gve_tx_ring *tx);
+/* rx handling */
+void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx);
+bool gve_rx_poll(struct gve_notify_block *block, int budget);
+int gve_rx_alloc_rings(struct gve_priv *priv);
+void gve_rx_free_rings(struct gve_priv *priv);
+bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
+ netdev_features_t feat);
+/* Reset */
+void gve_schedule_reset(struct gve_priv *priv);
+int gve_reset(struct gve_priv *priv, bool attempt_teardown);
+int gve_adjust_queues(struct gve_priv *priv,
+ struct gve_queue_config new_rx_config,
+ struct gve_queue_config new_tx_config);
+/* exported by ethtool.c */
+extern const struct ethtool_ops gve_ethtool_ops;
+/* needed by ethtool */
+extern const char gve_version_str[];
+#endif /* _GVE_H_ */
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
new file mode 100644
index 000000000000..c3ba7baf0107
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/pci.h>
+#include "gve.h"
+#include "gve_adminq.h"
+#include "gve_register.h"
+
+#define GVE_MAX_ADMINQ_RELEASE_CHECK 500
+#define GVE_ADMINQ_SLEEP_LEN 20
+#define GVE_MAX_ADMINQ_EVENT_COUNTER_CHECK 100
+
+int gve_adminq_alloc(struct device *dev, struct gve_priv *priv)
+{
+ priv->adminq = dma_alloc_coherent(dev, PAGE_SIZE,
+ &priv->adminq_bus_addr, GFP_KERNEL);
+ if (unlikely(!priv->adminq))
+ return -ENOMEM;
+
+ priv->adminq_mask = (PAGE_SIZE / sizeof(union gve_adminq_command)) - 1;
+ priv->adminq_prod_cnt = 0;
+
+ /* Setup Admin queue with the device */
+ iowrite32be(priv->adminq_bus_addr / PAGE_SIZE,
+ &priv->reg_bar0->adminq_pfn);
+
+ gve_set_admin_queue_ok(priv);
+ return 0;
+}
+
+void gve_adminq_release(struct gve_priv *priv)
+{
+ int i = 0;
+
+ /* Tell the device the adminq is leaving */
+ iowrite32be(0x0, &priv->reg_bar0->adminq_pfn);
+ while (ioread32be(&priv->reg_bar0->adminq_pfn)) {
+ /* If this is reached the device is unrecoverable and still
+ * holding memory. Continue looping to avoid memory corruption,
+ * but WARN so it is visible what is going on.
+ */
+ if (i == GVE_MAX_ADMINQ_RELEASE_CHECK)
+ WARN(1, "Unrecoverable platform error!");
+ i++;
+ msleep(GVE_ADMINQ_SLEEP_LEN);
+ }
+ gve_clear_device_rings_ok(priv);
+ gve_clear_device_resources_ok(priv);
+ gve_clear_admin_queue_ok(priv);
+}
+
+void gve_adminq_free(struct device *dev, struct gve_priv *priv)
+{
+ if (!gve_get_admin_queue_ok(priv))
+ return;
+ gve_adminq_release(priv);
+ dma_free_coherent(dev, PAGE_SIZE, priv->adminq, priv->adminq_bus_addr);
+ gve_clear_admin_queue_ok(priv);
+}
+
+static void gve_adminq_kick_cmd(struct gve_priv *priv, u32 prod_cnt)
+{
+ iowrite32be(prod_cnt, &priv->reg_bar0->adminq_doorbell);
+}
+
+static bool gve_adminq_wait_for_cmd(struct gve_priv *priv, u32 prod_cnt)
+{
+ int i;
+
+ for (i = 0; i < GVE_MAX_ADMINQ_EVENT_COUNTER_CHECK; i++) {
+ if (ioread32be(&priv->reg_bar0->adminq_event_counter)
+ == prod_cnt)
+ return true;
+ msleep(GVE_ADMINQ_SLEEP_LEN);
+ }
+
+ return false;
+}
+
+static int gve_adminq_parse_err(struct device *dev, u32 status)
+{
+ if (status != GVE_ADMINQ_COMMAND_PASSED &&
+ status != GVE_ADMINQ_COMMAND_UNSET)
+ dev_err(dev, "AQ command failed with status %d\n", status);
+
+ switch (status) {
+ case GVE_ADMINQ_COMMAND_PASSED:
+ return 0;
+ case GVE_ADMINQ_COMMAND_UNSET:
+ dev_err(dev, "parse_aq_err: err and status both unset, this should not be possible.\n");
+ return -EINVAL;
+ case GVE_ADMINQ_COMMAND_ERROR_ABORTED:
+ case GVE_ADMINQ_COMMAND_ERROR_CANCELLED:
+ case GVE_ADMINQ_COMMAND_ERROR_DATALOSS:
+ case GVE_ADMINQ_COMMAND_ERROR_FAILED_PRECONDITION:
+ case GVE_ADMINQ_COMMAND_ERROR_UNAVAILABLE:
+ return -EAGAIN;
+ case GVE_ADMINQ_COMMAND_ERROR_ALREADY_EXISTS:
+ case GVE_ADMINQ_COMMAND_ERROR_INTERNAL_ERROR:
+ case GVE_ADMINQ_COMMAND_ERROR_INVALID_ARGUMENT:
+ case GVE_ADMINQ_COMMAND_ERROR_NOT_FOUND:
+ case GVE_ADMINQ_COMMAND_ERROR_OUT_OF_RANGE:
+ case GVE_ADMINQ_COMMAND_ERROR_UNKNOWN_ERROR:
+ return -EINVAL;
+ case GVE_ADMINQ_COMMAND_ERROR_DEADLINE_EXCEEDED:
+ return -ETIME;
+ case GVE_ADMINQ_COMMAND_ERROR_PERMISSION_DENIED:
+ case GVE_ADMINQ_COMMAND_ERROR_UNAUTHENTICATED:
+ return -EACCES;
+ case GVE_ADMINQ_COMMAND_ERROR_RESOURCE_EXHAUSTED:
+ return -ENOMEM;
+ case GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED:
+ return -ENOTSUPP;
+ default:
+ dev_err(dev, "parse_aq_err: unknown status code %d\n", status);
+ return -EINVAL;
+ }
+}
+
+/* This function is not threadsafe - the caller is responsible for any
+ * necessary locks.
+ */
+int gve_adminq_execute_cmd(struct gve_priv *priv,
+ union gve_adminq_command *cmd_orig)
+{
+ union gve_adminq_command *cmd;
+ u32 status = 0;
+ u32 prod_cnt;
+
+ cmd = &priv->adminq[priv->adminq_prod_cnt & priv->adminq_mask];
+ priv->adminq_prod_cnt++;
+ prod_cnt = priv->adminq_prod_cnt;
+
+ memcpy(cmd, cmd_orig, sizeof(*cmd_orig));
+
+ gve_adminq_kick_cmd(priv, prod_cnt);
+ if (!gve_adminq_wait_for_cmd(priv, prod_cnt)) {
+ dev_err(&priv->pdev->dev, "AQ command timed out, need to reset AQ\n");
+ return -ENOTRECOVERABLE;
+ }
+
+ memcpy(cmd_orig, cmd, sizeof(*cmd));
+ status = be32_to_cpu(READ_ONCE(cmd->status));
+ return gve_adminq_parse_err(&priv->pdev->dev, status);
+}
+
+/* The device specifies that the management vector can either be the first irq
+ * or the last irq. ntfy_blk_msix_base_idx indicates the first irq assigned to
+ * the ntfy blks. It if is 0 then the management vector is last, if it is 1 then
+ * the management vector is first.
+ *
+ * gve arranges the msix vectors so that the management vector is last.
+ */
+#define GVE_NTFY_BLK_BASE_MSIX_IDX 0
+int gve_adminq_configure_device_resources(struct gve_priv *priv,
+ dma_addr_t counter_array_bus_addr,
+ u32 num_counters,
+ dma_addr_t db_array_bus_addr,
+ u32 num_ntfy_blks)
+{
+ union gve_adminq_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES);
+ cmd.configure_device_resources =
+ (struct gve_adminq_configure_device_resources) {
+ .counter_array = cpu_to_be64(counter_array_bus_addr),
+ .num_counters = cpu_to_be32(num_counters),
+ .irq_db_addr = cpu_to_be64(db_array_bus_addr),
+ .num_irq_dbs = cpu_to_be32(num_ntfy_blks),
+ .irq_db_stride = cpu_to_be32(sizeof(priv->ntfy_blocks[0])),
+ .ntfy_blk_msix_base_idx =
+ cpu_to_be32(GVE_NTFY_BLK_BASE_MSIX_IDX),
+ };
+
+ return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_deconfigure_device_resources(struct gve_priv *priv)
+{
+ union gve_adminq_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES);
+
+ return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
+{
+ struct gve_tx_ring *tx = &priv->tx[queue_index];
+ union gve_adminq_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_TX_QUEUE);
+ cmd.create_tx_queue = (struct gve_adminq_create_tx_queue) {
+ .queue_id = cpu_to_be32(queue_index),
+ .reserved = 0,
+ .queue_resources_addr = cpu_to_be64(tx->q_resources_bus),
+ .tx_ring_addr = cpu_to_be64(tx->bus),
+ .queue_page_list_id = cpu_to_be32(tx->tx_fifo.qpl->id),
+ .ntfy_id = cpu_to_be32(tx->ntfy_id),
+ };
+
+ return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
+{
+ struct gve_rx_ring *rx = &priv->rx[queue_index];
+ union gve_adminq_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE);
+ cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) {
+ .queue_id = cpu_to_be32(queue_index),
+ .index = cpu_to_be32(queue_index),
+ .reserved = 0,
+ .ntfy_id = cpu_to_be32(rx->ntfy_id),
+ .queue_resources_addr = cpu_to_be64(rx->q_resources_bus),
+ .rx_desc_ring_addr = cpu_to_be64(rx->desc.bus),
+ .rx_data_ring_addr = cpu_to_be64(rx->data.data_bus),
+ .queue_page_list_id = cpu_to_be32(rx->data.qpl->id),
+ };
+
+ return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index)
+{
+ union gve_adminq_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_TX_QUEUE);
+ cmd.destroy_tx_queue = (struct gve_adminq_destroy_tx_queue) {
+ .queue_id = cpu_to_be32(queue_index),
+ };
+
+ return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_index)
+{
+ union gve_adminq_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE);
+ cmd.destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) {
+ .queue_id = cpu_to_be32(queue_index),
+ };
+
+ return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_describe_device(struct gve_priv *priv)
+{
+ struct gve_device_descriptor *descriptor;
+ union gve_adminq_command cmd;
+ dma_addr_t descriptor_bus;
+ int err = 0;
+ u8 *mac;
+ u16 mtu;
+
+ memset(&cmd, 0, sizeof(cmd));
+ descriptor = dma_alloc_coherent(&priv->pdev->dev, PAGE_SIZE,
+ &descriptor_bus, GFP_KERNEL);
+ if (!descriptor)
+ return -ENOMEM;
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESCRIBE_DEVICE);
+ cmd.describe_device.device_descriptor_addr =
+ cpu_to_be64(descriptor_bus);
+ cmd.describe_device.device_descriptor_version =
+ cpu_to_be32(GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION);
+ cmd.describe_device.available_length = cpu_to_be32(PAGE_SIZE);
+
+ err = gve_adminq_execute_cmd(priv, &cmd);
+ if (err)
+ goto free_device_descriptor;
+
+ priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
+ if (priv->tx_desc_cnt * sizeof(priv->tx->desc[0]) < PAGE_SIZE) {
+ netif_err(priv, drv, priv->dev, "Tx desc count %d too low\n",
+ priv->tx_desc_cnt);
+ err = -EINVAL;
+ goto free_device_descriptor;
+ }
+ priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
+ if (priv->rx_desc_cnt * sizeof(priv->rx->desc.desc_ring[0])
+ < PAGE_SIZE ||
+ priv->rx_desc_cnt * sizeof(priv->rx->data.data_ring[0])
+ < PAGE_SIZE) {
+ netif_err(priv, drv, priv->dev, "Rx desc count %d too low\n",
+ priv->rx_desc_cnt);
+ err = -EINVAL;
+ goto free_device_descriptor;
+ }
+ priv->max_registered_pages =
+ be64_to_cpu(descriptor->max_registered_pages);
+ mtu = be16_to_cpu(descriptor->mtu);
+ if (mtu < ETH_MIN_MTU) {
+ netif_err(priv, drv, priv->dev, "MTU %d below minimum MTU\n",
+ mtu);
+ err = -EINVAL;
+ goto free_device_descriptor;
+ }
+ priv->dev->max_mtu = mtu;
+ priv->num_event_counters = be16_to_cpu(descriptor->counters);
+ ether_addr_copy(priv->dev->dev_addr, descriptor->mac);
+ mac = descriptor->mac;
+ netif_info(priv, drv, priv->dev, "MAC addr: %pM\n", mac);
+ priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl);
+ priv->rx_pages_per_qpl = be16_to_cpu(descriptor->rx_pages_per_qpl);
+ if (priv->rx_pages_per_qpl < priv->rx_desc_cnt) {
+ netif_err(priv, drv, priv->dev, "rx_pages_per_qpl cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n",
+ priv->rx_pages_per_qpl);
+ priv->rx_desc_cnt = priv->rx_pages_per_qpl;
+ }
+ priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);
+
+free_device_descriptor:
+ dma_free_coherent(&priv->pdev->dev, sizeof(*descriptor), descriptor,
+ descriptor_bus);
+ return err;
+}
+
+int gve_adminq_register_page_list(struct gve_priv *priv,
+ struct gve_queue_page_list *qpl)
+{
+ struct device *hdev = &priv->pdev->dev;
+ u32 num_entries = qpl->num_entries;
+ u32 size = num_entries * sizeof(qpl->page_buses[0]);
+ union gve_adminq_command cmd;
+ dma_addr_t page_list_bus;
+ __be64 *page_list;
+ int err;
+ int i;
+
+ memset(&cmd, 0, sizeof(cmd));
+ page_list = dma_alloc_coherent(hdev, size, &page_list_bus, GFP_KERNEL);
+ if (!page_list)
+ return -ENOMEM;
+
+ for (i = 0; i < num_entries; i++)
+ page_list[i] = cpu_to_be64(qpl->page_buses[i]);
+
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_REGISTER_PAGE_LIST);
+ cmd.reg_page_list = (struct gve_adminq_register_page_list) {
+ .page_list_id = cpu_to_be32(qpl->id),
+ .num_pages = cpu_to_be32(num_entries),
+ .page_address_list_addr = cpu_to_be64(page_list_bus),
+ };
+
+ err = gve_adminq_execute_cmd(priv, &cmd);
+ dma_free_coherent(hdev, size, page_list, page_list_bus);
+ return err;
+}
+
+int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id)
+{
+ union gve_adminq_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_UNREGISTER_PAGE_LIST);
+ cmd.unreg_page_list = (struct gve_adminq_unregister_page_list) {
+ .page_list_id = cpu_to_be32(page_list_id),
+ };
+
+ return gve_adminq_execute_cmd(priv, &cmd);
+}
+
+int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu)
+{
+ union gve_adminq_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_SET_DRIVER_PARAMETER);
+ cmd.set_driver_param = (struct gve_adminq_set_driver_parameter) {
+ .parameter_type = cpu_to_be32(GVE_SET_PARAM_MTU),
+ .parameter_value = cpu_to_be64(mtu),
+ };
+
+ return gve_adminq_execute_cmd(priv, &cmd);
+}
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
new file mode 100644
index 000000000000..4dfa06edc0f8
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
+ * Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#ifndef _GVE_ADMINQ_H
+#define _GVE_ADMINQ_H
+
+#include <linux/build_bug.h>
+
+/* Admin queue opcodes */
+enum gve_adminq_opcodes {
+ GVE_ADMINQ_DESCRIBE_DEVICE = 0x1,
+ GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES = 0x2,
+ GVE_ADMINQ_REGISTER_PAGE_LIST = 0x3,
+ GVE_ADMINQ_UNREGISTER_PAGE_LIST = 0x4,
+ GVE_ADMINQ_CREATE_TX_QUEUE = 0x5,
+ GVE_ADMINQ_CREATE_RX_QUEUE = 0x6,
+ GVE_ADMINQ_DESTROY_TX_QUEUE = 0x7,
+ GVE_ADMINQ_DESTROY_RX_QUEUE = 0x8,
+ GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES = 0x9,
+ GVE_ADMINQ_SET_DRIVER_PARAMETER = 0xB,
+};
+
+/* Admin queue status codes */
+enum gve_adminq_statuses {
+ GVE_ADMINQ_COMMAND_UNSET = 0x0,
+ GVE_ADMINQ_COMMAND_PASSED = 0x1,
+ GVE_ADMINQ_COMMAND_ERROR_ABORTED = 0xFFFFFFF0,
+ GVE_ADMINQ_COMMAND_ERROR_ALREADY_EXISTS = 0xFFFFFFF1,
+ GVE_ADMINQ_COMMAND_ERROR_CANCELLED = 0xFFFFFFF2,
+ GVE_ADMINQ_COMMAND_ERROR_DATALOSS = 0xFFFFFFF3,
+ GVE_ADMINQ_COMMAND_ERROR_DEADLINE_EXCEEDED = 0xFFFFFFF4,
+ GVE_ADMINQ_COMMAND_ERROR_FAILED_PRECONDITION = 0xFFFFFFF5,
+ GVE_ADMINQ_COMMAND_ERROR_INTERNAL_ERROR = 0xFFFFFFF6,
+ GVE_ADMINQ_COMMAND_ERROR_INVALID_ARGUMENT = 0xFFFFFFF7,
+ GVE_ADMINQ_COMMAND_ERROR_NOT_FOUND = 0xFFFFFFF8,
+ GVE_ADMINQ_COMMAND_ERROR_OUT_OF_RANGE = 0xFFFFFFF9,
+ GVE_ADMINQ_COMMAND_ERROR_PERMISSION_DENIED = 0xFFFFFFFA,
+ GVE_ADMINQ_COMMAND_ERROR_UNAUTHENTICATED = 0xFFFFFFFB,
+ GVE_ADMINQ_COMMAND_ERROR_RESOURCE_EXHAUSTED = 0xFFFFFFFC,
+ GVE_ADMINQ_COMMAND_ERROR_UNAVAILABLE = 0xFFFFFFFD,
+ GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED = 0xFFFFFFFE,
+ GVE_ADMINQ_COMMAND_ERROR_UNKNOWN_ERROR = 0xFFFFFFFF,
+};
+
+#define GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION 1
+
+/* All AdminQ command structs should be naturally packed. The static_assert
+ * calls make sure this is the case at compile time.
+ */
+
+struct gve_adminq_describe_device {
+ __be64 device_descriptor_addr;
+ __be32 device_descriptor_version;
+ __be32 available_length;
+};
+
+static_assert(sizeof(struct gve_adminq_describe_device) == 16);
+
+struct gve_device_descriptor {
+ __be64 max_registered_pages;
+ __be16 reserved1;
+ __be16 tx_queue_entries;
+ __be16 rx_queue_entries;
+ __be16 default_num_queues;
+ __be16 mtu;
+ __be16 counters;
+ __be16 tx_pages_per_qpl;
+ __be16 rx_pages_per_qpl;
+ u8 mac[ETH_ALEN];
+ __be16 num_device_options;
+ __be16 total_length;
+ u8 reserved2[6];
+};
+
+static_assert(sizeof(struct gve_device_descriptor) == 40);
+
+struct device_option {
+ __be32 option_id;
+ __be32 option_length;
+};
+
+static_assert(sizeof(struct device_option) == 8);
+
+struct gve_adminq_configure_device_resources {
+ __be64 counter_array;
+ __be64 irq_db_addr;
+ __be32 num_counters;
+ __be32 num_irq_dbs;
+ __be32 irq_db_stride;
+ __be32 ntfy_blk_msix_base_idx;
+};
+
+static_assert(sizeof(struct gve_adminq_configure_device_resources) == 32);
+
+struct gve_adminq_register_page_list {
+ __be32 page_list_id;
+ __be32 num_pages;
+ __be64 page_address_list_addr;
+};
+
+static_assert(sizeof(struct gve_adminq_register_page_list) == 16);
+
+struct gve_adminq_unregister_page_list {
+ __be32 page_list_id;
+};
+
+static_assert(sizeof(struct gve_adminq_unregister_page_list) == 4);
+
+struct gve_adminq_create_tx_queue {
+ __be32 queue_id;
+ __be32 reserved;
+ __be64 queue_resources_addr;
+ __be64 tx_ring_addr;
+ __be32 queue_page_list_id;
+ __be32 ntfy_id;
+};
+
+static_assert(sizeof(struct gve_adminq_create_tx_queue) == 32);
+
+struct gve_adminq_create_rx_queue {
+ __be32 queue_id;
+ __be32 index;
+ __be32 reserved;
+ __be32 ntfy_id;
+ __be64 queue_resources_addr;
+ __be64 rx_desc_ring_addr;
+ __be64 rx_data_ring_addr;
+ __be32 queue_page_list_id;
+ u8 padding[4];
+};
+
+static_assert(sizeof(struct gve_adminq_create_rx_queue) == 48);
+
+/* Queue resources that are shared with the device */
+struct gve_queue_resources {
+ union {
+ struct {
+ __be32 db_index; /* Device -> Guest */
+ __be32 counter_index; /* Device -> Guest */
+ };
+ u8 reserved[64];
+ };
+};
+
+static_assert(sizeof(struct gve_queue_resources) == 64);
+
+struct gve_adminq_destroy_tx_queue {
+ __be32 queue_id;
+};
+
+static_assert(sizeof(struct gve_adminq_destroy_tx_queue) == 4);
+
+struct gve_adminq_destroy_rx_queue {
+ __be32 queue_id;
+};
+
+static_assert(sizeof(struct gve_adminq_destroy_rx_queue) == 4);
+
+/* GVE Set Driver Parameter Types */
+enum gve_set_driver_param_types {
+ GVE_SET_PARAM_MTU = 0x1,
+};
+
+struct gve_adminq_set_driver_parameter {
+ __be32 parameter_type;
+ u8 reserved[4];
+ __be64 parameter_value;
+};
+
+static_assert(sizeof(struct gve_adminq_set_driver_parameter) == 16);
+
+union gve_adminq_command {
+ struct {
+ __be32 opcode;
+ __be32 status;
+ union {
+ struct gve_adminq_configure_device_resources
+ configure_device_resources;
+ struct gve_adminq_create_tx_queue create_tx_queue;
+ struct gve_adminq_create_rx_queue create_rx_queue;
+ struct gve_adminq_destroy_tx_queue destroy_tx_queue;
+ struct gve_adminq_destroy_rx_queue destroy_rx_queue;
+ struct gve_adminq_describe_device describe_device;
+ struct gve_adminq_register_page_list reg_page_list;
+ struct gve_adminq_unregister_page_list unreg_page_list;
+ struct gve_adminq_set_driver_parameter set_driver_param;
+ };
+ };
+ u8 reserved[64];
+};
+
+static_assert(sizeof(union gve_adminq_command) == 64);
+
+int gve_adminq_alloc(struct device *dev, struct gve_priv *priv);
+void gve_adminq_free(struct device *dev, struct gve_priv *priv);
+void gve_adminq_release(struct gve_priv *priv);
+int gve_adminq_execute_cmd(struct gve_priv *priv,
+ union gve_adminq_command *cmd_orig);
+int gve_adminq_describe_device(struct gve_priv *priv);
+int gve_adminq_configure_device_resources(struct gve_priv *priv,
+ dma_addr_t counter_array_bus_addr,
+ u32 num_counters,
+ dma_addr_t db_array_bus_addr,
+ u32 num_ntfy_blks);
+int gve_adminq_deconfigure_device_resources(struct gve_priv *priv);
+int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_register_page_list(struct gve_priv *priv,
+ struct gve_queue_page_list *qpl);
+int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id);
+int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu);
+#endif /* _GVE_ADMINQ_H */
diff --git a/drivers/net/ethernet/google/gve/gve_desc.h b/drivers/net/ethernet/google/gve/gve_desc.h
new file mode 100644
index 000000000000..54779871d52e
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_desc.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
+ * Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+/* GVE Transmit Descriptor formats */
+
+#ifndef _GVE_DESC_H_
+#define _GVE_DESC_H_
+
+#include <linux/build_bug.h>
+
+/* A note on seg_addrs
+ *
+ * Base addresses encoded in seg_addr are not assumed to be physical
+ * addresses. The ring format assumes these come from some linear address
+ * space. This could be physical memory, kernel virtual memory, user virtual
+ * memory. gVNIC uses lists of registered pages. Each queue is assumed
+ * to be associated with a single such linear address space to ensure a
+ * consistent meaning for seg_addrs posted to its rings.
+ */
+
+struct gve_tx_pkt_desc {
+ u8 type_flags; /* desc type is lower 4 bits, flags upper */
+ u8 l4_csum_offset; /* relative offset of L4 csum word */
+ u8 l4_hdr_offset; /* Offset of start of L4 headers in packet */
+ u8 desc_cnt; /* Total descriptors for this packet */
+ __be16 len; /* Total length of this packet (in bytes) */
+ __be16 seg_len; /* Length of this descriptor's segment */
+ __be64 seg_addr; /* Base address (see note) of this segment */
+} __packed;
+
+struct gve_tx_seg_desc {
+ u8 type_flags; /* type is lower 4 bits, flags upper */
+ u8 l3_offset; /* TSO: 2 byte units to start of IPH */
+ __be16 reserved;
+ __be16 mss; /* TSO MSS */
+ __be16 seg_len;
+ __be64 seg_addr;
+} __packed;
+
+/* GVE Transmit Descriptor Types */
+#define GVE_TXD_STD (0x0 << 4) /* Std with Host Address */
+#define GVE_TXD_TSO (0x1 << 4) /* TSO with Host Address */
+#define GVE_TXD_SEG (0x2 << 4) /* Seg with Host Address */
+
+/* GVE Transmit Descriptor Flags for Std Pkts */
+#define GVE_TXF_L4CSUM BIT(0) /* Need csum offload */
+#define GVE_TXF_TSTAMP BIT(2) /* Timestamp required */
+
+/* GVE Transmit Descriptor Flags for TSO Segs */
+#define GVE_TXSF_IPV6 BIT(1) /* IPv6 TSO */
+
+/* GVE Receive Packet Descriptor */
+/* The start of an ethernet packet comes 2 bytes into the rx buffer.
+ * gVNIC adds this padding so that both the DMA and the L3/4 protocol header
+ * access is aligned.
+ */
+#define GVE_RX_PAD 2
+
+struct gve_rx_desc {
+ u8 padding[48];
+ __be32 rss_hash; /* Receive-side scaling hash (Toeplitz for gVNIC) */
+ __be16 mss;
+ __be16 reserved; /* Reserved to zero */
+ u8 hdr_len; /* Header length (L2-L4) including padding */
+ u8 hdr_off; /* 64-byte-scaled offset into RX_DATA entry */
+ __sum16 csum; /* 1's-complement partial checksum of L3+ bytes */
+ __be16 len; /* Length of the received packet */
+ __be16 flags_seq; /* Flags [15:3] and sequence number [2:0] (1-7) */
+} __packed;
+static_assert(sizeof(struct gve_rx_desc) == 64);
+
+/* As with the Tx ring format, the qpl_offset entries below are offsets into an
+ * ordered list of registered pages.
+ */
+struct gve_rx_data_slot {
+ /* byte offset into the rx registered segment of this slot */
+ __be64 qpl_offset;
+};
+
+/* GVE Recive Packet Descriptor Seq No */
+#define GVE_SEQNO(x) (be16_to_cpu(x) & 0x7)
+
+/* GVE Recive Packet Descriptor Flags */
+#define GVE_RXFLG(x) cpu_to_be16(1 << (3 + (x)))
+#define GVE_RXF_FRAG GVE_RXFLG(3) /* IP Fragment */
+#define GVE_RXF_IPV4 GVE_RXFLG(4) /* IPv4 */
+#define GVE_RXF_IPV6 GVE_RXFLG(5) /* IPv6 */
+#define GVE_RXF_TCP GVE_RXFLG(6) /* TCP Packet */
+#define GVE_RXF_UDP GVE_RXFLG(7) /* UDP Packet */
+#define GVE_RXF_ERR GVE_RXFLG(8) /* Packet Error Detected */
+
+/* GVE IRQ */
+#define GVE_IRQ_ACK BIT(31)
+#define GVE_IRQ_MASK BIT(30)
+#define GVE_IRQ_EVENT BIT(29)
+
+static inline bool gve_needs_rss(__be16 flag)
+{
+ if (flag & GVE_RXF_FRAG)
+ return false;
+ if (flag & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
+ return true;
+ return false;
+}
+
+static inline u8 gve_next_seqno(u8 seq)
+{
+ return (seq + 1) == 8 ? 1 : seq + 1;
+}
+#endif /* _GVE_DESC_H_ */
diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
new file mode 100644
index 000000000000..26540b856541
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#include <linux/rtnetlink.h>
+#include "gve.h"
+
+static void gve_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *info)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+
+ strlcpy(info->driver, "gve", sizeof(info->driver));
+ strlcpy(info->version, gve_version_str, sizeof(info->version));
+ strlcpy(info->bus_info, pci_name(priv->pdev), sizeof(info->bus_info));
+}
+
+static void gve_set_msglevel(struct net_device *netdev, u32 value)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+
+ priv->msg_enable = value;
+}
+
+static u32 gve_get_msglevel(struct net_device *netdev)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+
+ return priv->msg_enable;
+}
+
+static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = {
+ "rx_packets", "tx_packets", "rx_bytes", "tx_bytes",
+ "rx_dropped", "tx_dropped", "tx_timeouts",
+};
+
+#define GVE_MAIN_STATS_LEN ARRAY_SIZE(gve_gstrings_main_stats)
+#define NUM_GVE_TX_CNTS 5
+#define NUM_GVE_RX_CNTS 2
+
+static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+ char *s = (char *)data;
+ int i;
+
+ if (stringset != ETH_SS_STATS)
+ return;
+
+ memcpy(s, *gve_gstrings_main_stats,
+ sizeof(gve_gstrings_main_stats));
+ s += sizeof(gve_gstrings_main_stats);
+ for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+ snprintf(s, ETH_GSTRING_LEN, "rx_desc_cnt[%u]", i);
+ s += ETH_GSTRING_LEN;
+ snprintf(s, ETH_GSTRING_LEN, "rx_desc_fill_cnt[%u]", i);
+ s += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+ snprintf(s, ETH_GSTRING_LEN, "tx_req[%u]", i);
+ s += ETH_GSTRING_LEN;
+ snprintf(s, ETH_GSTRING_LEN, "tx_done[%u]", i);
+ s += ETH_GSTRING_LEN;
+ snprintf(s, ETH_GSTRING_LEN, "tx_wake[%u]", i);
+ s += ETH_GSTRING_LEN;
+ snprintf(s, ETH_GSTRING_LEN, "tx_stop[%u]", i);
+ s += ETH_GSTRING_LEN;
+ snprintf(s, ETH_GSTRING_LEN, "tx_event_counter[%u]", i);
+ s += ETH_GSTRING_LEN;
+ }
+}
+
+static int gve_get_sset_count(struct net_device *netdev, int sset)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+
+ switch (sset) {
+ case ETH_SS_STATS:
+ return GVE_MAIN_STATS_LEN +
+ (priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS) +
+ (priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void
+gve_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+ u64 rx_pkts, rx_bytes, tx_pkts, tx_bytes;
+ unsigned int start;
+ int ring;
+ int i;
+
+ ASSERT_RTNL();
+
+ for (rx_pkts = 0, rx_bytes = 0, ring = 0;
+ ring < priv->rx_cfg.num_queues; ring++) {
+ if (priv->rx) {
+ do {
+ start =
+ u64_stats_fetch_begin(&priv->rx[ring].statss);
+ rx_pkts += priv->rx[ring].rpackets;
+ rx_bytes += priv->rx[ring].rbytes;
+ } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
+ start));
+ }
+ }
+ for (tx_pkts = 0, tx_bytes = 0, ring = 0;
+ ring < priv->tx_cfg.num_queues; ring++) {
+ if (priv->tx) {
+ do {
+ start =
+ u64_stats_fetch_begin(&priv->tx[ring].statss);
+ tx_pkts += priv->tx[ring].pkt_done;
+ tx_bytes += priv->tx[ring].bytes_done;
+ } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
+ start));
+ }
+ }
+
+ i = 0;
+ data[i++] = rx_pkts;
+ data[i++] = tx_pkts;
+ data[i++] = rx_bytes;
+ data[i++] = tx_bytes;
+ /* Skip rx_dropped and tx_dropped */
+ i += 2;
+ data[i++] = priv->tx_timeo_cnt;
+ i = GVE_MAIN_STATS_LEN;
+
+ /* walk RX rings */
+ if (priv->rx) {
+ for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
+ struct gve_rx_ring *rx = &priv->rx[ring];
+
+ data[i++] = rx->desc.cnt;
+ data[i++] = rx->desc.fill_cnt;
+ }
+ } else {
+ i += priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS;
+ }
+ /* walk TX rings */
+ if (priv->tx) {
+ for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
+ struct gve_tx_ring *tx = &priv->tx[ring];
+
+ data[i++] = tx->req;
+ data[i++] = tx->done;
+ data[i++] = tx->wake_queue;
+ data[i++] = tx->stop_queue;
+ data[i++] = be32_to_cpu(gve_tx_load_event_counter(priv,
+ tx));
+ }
+ } else {
+ i += priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS;
+ }
+}
+
+static void gve_get_channels(struct net_device *netdev,
+ struct ethtool_channels *cmd)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+
+ cmd->max_rx = priv->rx_cfg.max_queues;
+ cmd->max_tx = priv->tx_cfg.max_queues;
+ cmd->max_other = 0;
+ cmd->max_combined = 0;
+ cmd->rx_count = priv->rx_cfg.num_queues;
+ cmd->tx_count = priv->tx_cfg.num_queues;
+ cmd->other_count = 0;
+ cmd->combined_count = 0;
+}
+
+static int gve_set_channels(struct net_device *netdev,
+ struct ethtool_channels *cmd)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+ struct gve_queue_config new_tx_cfg = priv->tx_cfg;
+ struct gve_queue_config new_rx_cfg = priv->rx_cfg;
+ struct ethtool_channels old_settings;
+ int new_tx = cmd->tx_count;
+ int new_rx = cmd->rx_count;
+
+ gve_get_channels(netdev, &old_settings);
+
+ /* Changing combined is not allowed allowed */
+ if (cmd->combined_count != old_settings.combined_count)
+ return -EINVAL;
+
+ if (!new_rx || !new_tx)
+ return -EINVAL;
+
+ if (!netif_carrier_ok(netdev)) {
+ priv->tx_cfg.num_queues = new_tx;
+ priv->rx_cfg.num_queues = new_rx;
+ return 0;
+ }
+
+ new_tx_cfg.num_queues = new_tx;
+ new_rx_cfg.num_queues = new_rx;
+
+ return gve_adjust_queues(priv, new_rx_cfg, new_tx_cfg);
+}
+
+static void gve_get_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *cmd)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+
+ cmd->rx_max_pending = priv->rx_desc_cnt;
+ cmd->tx_max_pending = priv->tx_desc_cnt;
+ cmd->rx_pending = priv->rx_desc_cnt;
+ cmd->tx_pending = priv->tx_desc_cnt;
+}
+
+static int gve_user_reset(struct net_device *netdev, u32 *flags)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+
+ if (*flags == ETH_RESET_ALL) {
+ *flags = 0;
+ return gve_reset(priv, true);
+ }
+
+ return -EOPNOTSUPP;
+}
+
+const struct ethtool_ops gve_ethtool_ops = {
+ .get_drvinfo = gve_get_drvinfo,
+ .get_strings = gve_get_strings,
+ .get_sset_count = gve_get_sset_count,
+ .get_ethtool_stats = gve_get_ethtool_stats,
+ .set_msglevel = gve_set_msglevel,
+ .get_msglevel = gve_get_msglevel,
+ .set_channels = gve_set_channels,
+ .get_channels = gve_get_channels,
+ .get_link = ethtool_op_get_link,
+ .get_ringparam = gve_get_ringparam,
+ .reset = gve_user_reset,
+};
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
new file mode 100644
index 000000000000..eef500bd2ff7
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -0,0 +1,1230 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <net/sch_generic.h>
+#include "gve.h"
+#include "gve_adminq.h"
+#include "gve_register.h"
+
+#define GVE_DEFAULT_RX_COPYBREAK (256)
+
+#define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK)
+#define GVE_VERSION "1.0.0"
+#define GVE_VERSION_PREFIX "GVE-"
+
+const char gve_version_str[] = GVE_VERSION;
+static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
+
+static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ unsigned int start;
+ int ring;
+
+ if (priv->rx) {
+ for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
+ do {
+ start =
+ u64_stats_fetch_begin(&priv->rx[ring].statss);
+ s->rx_packets += priv->rx[ring].rpackets;
+ s->rx_bytes += priv->rx[ring].rbytes;
+ } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
+ start));
+ }
+ }
+ if (priv->tx) {
+ for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
+ do {
+ start =
+ u64_stats_fetch_begin(&priv->tx[ring].statss);
+ s->tx_packets += priv->tx[ring].pkt_done;
+ s->tx_bytes += priv->tx[ring].bytes_done;
+ } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
+ start));
+ }
+ }
+}
+
+static int gve_alloc_counter_array(struct gve_priv *priv)
+{
+ priv->counter_array =
+ dma_alloc_coherent(&priv->pdev->dev,
+ priv->num_event_counters *
+ sizeof(*priv->counter_array),
+ &priv->counter_array_bus, GFP_KERNEL);
+ if (!priv->counter_array)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void gve_free_counter_array(struct gve_priv *priv)
+{
+ dma_free_coherent(&priv->pdev->dev,
+ priv->num_event_counters *
+ sizeof(*priv->counter_array),
+ priv->counter_array, priv->counter_array_bus);
+ priv->counter_array = NULL;
+}
+
+static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
+{
+ struct gve_priv *priv = arg;
+
+ queue_work(priv->gve_wq, &priv->service_task);
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t gve_intr(int irq, void *arg)
+{
+ struct gve_notify_block *block = arg;
+ struct gve_priv *priv = block->priv;
+
+ iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
+ napi_schedule_irqoff(&block->napi);
+ return IRQ_HANDLED;
+}
+
+static int gve_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct gve_notify_block *block;
+ __be32 __iomem *irq_doorbell;
+ bool reschedule = false;
+ struct gve_priv *priv;
+
+ block = container_of(napi, struct gve_notify_block, napi);
+ priv = block->priv;
+
+ if (block->tx)
+ reschedule |= gve_tx_poll(block, budget);
+ if (block->rx)
+ reschedule |= gve_rx_poll(block, budget);
+
+ if (reschedule)
+ return budget;
+
+ napi_complete(napi);
+ irq_doorbell = gve_irq_doorbell(priv, block);
+ iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
+
+ /* Double check we have no extra work.
+ * Ensure unmask synchronizes with checking for work.
+ */
+ dma_rmb();
+ if (block->tx)
+ reschedule |= gve_tx_poll(block, -1);
+ if (block->rx)
+ reschedule |= gve_rx_poll(block, -1);
+ if (reschedule && napi_reschedule(napi))
+ iowrite32be(GVE_IRQ_MASK, irq_doorbell);
+
+ return 0;
+}
+
+static int gve_alloc_notify_blocks(struct gve_priv *priv)
+{
+ int num_vecs_requested = priv->num_ntfy_blks + 1;
+ char *name = priv->dev->name;
+ unsigned int active_cpus;
+ int vecs_enabled;
+ int i, j;
+ int err;
+
+ priv->msix_vectors = kvzalloc(num_vecs_requested *
+ sizeof(*priv->msix_vectors), GFP_KERNEL);
+ if (!priv->msix_vectors)
+ return -ENOMEM;
+ for (i = 0; i < num_vecs_requested; i++)
+ priv->msix_vectors[i].entry = i;
+ vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
+ GVE_MIN_MSIX, num_vecs_requested);
+ if (vecs_enabled < 0) {
+ dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
+ GVE_MIN_MSIX, vecs_enabled);
+ err = vecs_enabled;
+ goto abort_with_msix_vectors;
+ }
+ if (vecs_enabled != num_vecs_requested) {
+ int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
+ int vecs_per_type = new_num_ntfy_blks / 2;
+ int vecs_left = new_num_ntfy_blks % 2;
+
+ priv->num_ntfy_blks = new_num_ntfy_blks;
+ priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
+ vecs_per_type);
+ priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
+ vecs_per_type + vecs_left);
+ dev_err(&priv->pdev->dev,
+ "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
+ vecs_enabled, priv->tx_cfg.max_queues,
+ priv->rx_cfg.max_queues);
+ if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
+ priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
+ if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
+ priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
+ }
+ /* Half the notification blocks go to TX and half to RX */
+ active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
+
+ /* Setup Management Vector - the last vector */
+ snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "%s-mgmnt",
+ name);
+ err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
+ gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
+ if (err) {
+ dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
+ goto abort_with_msix_enabled;
+ }
+ priv->ntfy_blocks =
+ dma_alloc_coherent(&priv->pdev->dev,
+ priv->num_ntfy_blks *
+ sizeof(*priv->ntfy_blocks),
+ &priv->ntfy_block_bus, GFP_KERNEL);
+ if (!priv->ntfy_blocks) {
+ err = -ENOMEM;
+ goto abort_with_mgmt_vector;
+ }
+ /* Setup the other blocks - the first n-1 vectors */
+ for (i = 0; i < priv->num_ntfy_blks; i++) {
+ struct gve_notify_block *block = &priv->ntfy_blocks[i];
+ int msix_idx = i;
+
+ snprintf(block->name, sizeof(block->name), "%s-ntfy-block.%d",
+ name, i);
+ block->priv = priv;
+ err = request_irq(priv->msix_vectors[msix_idx].vector,
+ gve_intr, 0, block->name, block);
+ if (err) {
+ dev_err(&priv->pdev->dev,
+ "Failed to receive msix vector %d\n", i);
+ goto abort_with_some_ntfy_blocks;
+ }
+ irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
+ get_cpu_mask(i % active_cpus));
+ }
+ return 0;
+abort_with_some_ntfy_blocks:
+ for (j = 0; j < i; j++) {
+ struct gve_notify_block *block = &priv->ntfy_blocks[j];
+ int msix_idx = j;
+
+ irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
+ NULL);
+ free_irq(priv->msix_vectors[msix_idx].vector, block);
+ }
+ dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
+ sizeof(*priv->ntfy_blocks),
+ priv->ntfy_blocks, priv->ntfy_block_bus);
+ priv->ntfy_blocks = NULL;
+abort_with_mgmt_vector:
+ free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
+abort_with_msix_enabled:
+ pci_disable_msix(priv->pdev);
+abort_with_msix_vectors:
+ kfree(priv->msix_vectors);
+ priv->msix_vectors = NULL;
+ return err;
+}
+
+static void gve_free_notify_blocks(struct gve_priv *priv)
+{
+ int i;
+
+ /* Free the irqs */
+ for (i = 0; i < priv->num_ntfy_blks; i++) {
+ struct gve_notify_block *block = &priv->ntfy_blocks[i];
+ int msix_idx = i;
+
+ irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
+ NULL);
+ free_irq(priv->msix_vectors[msix_idx].vector, block);
+ }
+ dma_free_coherent(&priv->pdev->dev,
+ priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks),
+ priv->ntfy_blocks, priv->ntfy_block_bus);
+ priv->ntfy_blocks = NULL;
+ free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
+ pci_disable_msix(priv->pdev);
+ kfree(priv->msix_vectors);
+ priv->msix_vectors = NULL;
+}
+
+static int gve_setup_device_resources(struct gve_priv *priv)
+{
+ int err;
+
+ err = gve_alloc_counter_array(priv);
+ if (err)
+ return err;
+ err = gve_alloc_notify_blocks(priv);
+ if (err)
+ goto abort_with_counter;
+ err = gve_adminq_configure_device_resources(priv,
+ priv->counter_array_bus,
+ priv->num_event_counters,
+ priv->ntfy_block_bus,
+ priv->num_ntfy_blks);
+ if (unlikely(err)) {
+ dev_err(&priv->pdev->dev,
+ "could not setup device_resources: err=%d\n", err);
+ err = -ENXIO;
+ goto abort_with_ntfy_blocks;
+ }
+ gve_set_device_resources_ok(priv);
+ return 0;
+abort_with_ntfy_blocks:
+ gve_free_notify_blocks(priv);
+abort_with_counter:
+ gve_free_counter_array(priv);
+ return err;
+}
+
+static void gve_trigger_reset(struct gve_priv *priv);
+
+static void gve_teardown_device_resources(struct gve_priv *priv)
+{
+ int err;
+
+ /* Tell device its resources are being freed */
+ if (gve_get_device_resources_ok(priv)) {
+ err = gve_adminq_deconfigure_device_resources(priv);
+ if (err) {
+ dev_err(&priv->pdev->dev,
+ "Could not deconfigure device resources: err=%d\n",
+ err);
+ gve_trigger_reset(priv);
+ }
+ }
+ gve_free_counter_array(priv);
+ gve_free_notify_blocks(priv);
+ gve_clear_device_resources_ok(priv);
+}
+
+static void gve_add_napi(struct gve_priv *priv, int ntfy_idx)
+{
+ struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+ netif_napi_add(priv->dev, &block->napi, gve_napi_poll,
+ NAPI_POLL_WEIGHT);
+}
+
+static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
+{
+ struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+ netif_napi_del(&block->napi);
+}
+
+static int gve_register_qpls(struct gve_priv *priv)
+{
+ int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+ int err;
+ int i;
+
+ for (i = 0; i < num_qpls; i++) {
+ err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "failed to register queue page list %d\n",
+ priv->qpls[i].id);
+ /* This failure will trigger a reset - no need to clean
+ * up
+ */
+ return err;
+ }
+ }
+ return 0;
+}
+
+static int gve_unregister_qpls(struct gve_priv *priv)
+{
+ int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+ int err;
+ int i;
+
+ for (i = 0; i < num_qpls; i++) {
+ err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
+ /* This failure will trigger a reset - no need to clean up */
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "Failed to unregister queue page list %d\n",
+ priv->qpls[i].id);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static int gve_create_rings(struct gve_priv *priv)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+ err = gve_adminq_create_tx_queue(priv, i);
+ if (err) {
+ netif_err(priv, drv, priv->dev, "failed to create tx queue %d\n",
+ i);
+ /* This failure will trigger a reset - no need to clean
+ * up
+ */
+ return err;
+ }
+ netif_dbg(priv, drv, priv->dev, "created tx queue %d\n", i);
+ }
+ for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+ err = gve_adminq_create_rx_queue(priv, i);
+ if (err) {
+ netif_err(priv, drv, priv->dev, "failed to create rx queue %d\n",
+ i);
+ /* This failure will trigger a reset - no need to clean
+ * up
+ */
+ return err;
+ }
+ /* Rx data ring has been prefilled with packet buffers at
+ * queue allocation time.
+ * Write the doorbell to provide descriptor slots and packet
+ * buffers to the NIC.
+ */
+ gve_rx_write_doorbell(priv, &priv->rx[i]);
+ netif_dbg(priv, drv, priv->dev, "created rx queue %d\n", i);
+ }
+
+ return 0;
+}
+
+static int gve_alloc_rings(struct gve_priv *priv)
+{
+ int ntfy_idx;
+ int err;
+ int i;
+
+ /* Setup tx rings */
+ priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx),
+ GFP_KERNEL);
+ if (!priv->tx)
+ return -ENOMEM;
+ err = gve_tx_alloc_rings(priv);
+ if (err)
+ goto free_tx;
+ /* Setup rx rings */
+ priv->rx = kvzalloc(priv->rx_cfg.num_queues * sizeof(*priv->rx),
+ GFP_KERNEL);
+ if (!priv->rx) {
+ err = -ENOMEM;
+ goto free_tx_queue;
+ }
+ err = gve_rx_alloc_rings(priv);
+ if (err)
+ goto free_rx;
+ /* Add tx napi & init sync stats*/
+ for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+ u64_stats_init(&priv->tx[i].statss);
+ ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
+ gve_add_napi(priv, ntfy_idx);
+ }
+ /* Add rx napi & init sync stats*/
+ for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+ u64_stats_init(&priv->rx[i].statss);
+ ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
+ gve_add_napi(priv, ntfy_idx);
+ }
+
+ return 0;
+
+free_rx:
+ kfree(priv->rx);
+ priv->rx = NULL;
+free_tx_queue:
+ gve_tx_free_rings(priv);
+free_tx:
+ kfree(priv->tx);
+ priv->tx = NULL;
+ return err;
+}
+
+static int gve_destroy_rings(struct gve_priv *priv)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+ err = gve_adminq_destroy_tx_queue(priv, i);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "failed to destroy tx queue %d\n",
+ i);
+ /* This failure will trigger a reset - no need to clean
+ * up
+ */
+ return err;
+ }
+ netif_dbg(priv, drv, priv->dev, "destroyed tx queue %d\n", i);
+ }
+ for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+ err = gve_adminq_destroy_rx_queue(priv, i);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "failed to destroy rx queue %d\n",
+ i);
+ /* This failure will trigger a reset - no need to clean
+ * up
+ */
+ return err;
+ }
+ netif_dbg(priv, drv, priv->dev, "destroyed rx queue %d\n", i);
+ }
+ return 0;
+}
+
+static void gve_free_rings(struct gve_priv *priv)
+{
+ int ntfy_idx;
+ int i;
+
+ if (priv->tx) {
+ for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+ ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
+ gve_remove_napi(priv, ntfy_idx);
+ }
+ gve_tx_free_rings(priv);
+ kfree(priv->tx);
+ priv->tx = NULL;
+ }
+ if (priv->rx) {
+ for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+ ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
+ gve_remove_napi(priv, ntfy_idx);
+ }
+ gve_rx_free_rings(priv);
+ kfree(priv->rx);
+ priv->rx = NULL;
+ }
+}
+
+int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma,
+ enum dma_data_direction dir)
+{
+ *page = alloc_page(GFP_KERNEL);
+ if (!*page)
+ return -ENOMEM;
+ *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
+ if (dma_mapping_error(dev, *dma)) {
+ put_page(*page);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
+ int pages)
+{
+ struct gve_queue_page_list *qpl = &priv->qpls[id];
+ int err;
+ int i;
+
+ if (pages + priv->num_registered_pages > priv->max_registered_pages) {
+ netif_err(priv, drv, priv->dev,
+ "Reached max number of registered pages %llu > %llu\n",
+ pages + priv->num_registered_pages,
+ priv->max_registered_pages);
+ return -EINVAL;
+ }
+
+ qpl->id = id;
+ qpl->num_entries = pages;
+ qpl->pages = kvzalloc(pages * sizeof(*qpl->pages), GFP_KERNEL);
+ /* caller handles clean up */
+ if (!qpl->pages)
+ return -ENOMEM;
+ qpl->page_buses = kvzalloc(pages * sizeof(*qpl->page_buses),
+ GFP_KERNEL);
+ /* caller handles clean up */
+ if (!qpl->page_buses)
+ return -ENOMEM;
+
+ for (i = 0; i < pages; i++) {
+ err = gve_alloc_page(&priv->pdev->dev, &qpl->pages[i],
+ &qpl->page_buses[i],
+ gve_qpl_dma_dir(priv, id));
+ /* caller handles clean up */
+ if (err)
+ return -ENOMEM;
+ }
+ priv->num_registered_pages += pages;
+
+ return 0;
+}
+
+void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
+ enum dma_data_direction dir)
+{
+ if (!dma_mapping_error(dev, dma))
+ dma_unmap_page(dev, dma, PAGE_SIZE, dir);
+ if (page)
+ put_page(page);
+}
+
+static void gve_free_queue_page_list(struct gve_priv *priv,
+ int id)
+{
+ struct gve_queue_page_list *qpl = &priv->qpls[id];
+ int i;
+
+ if (!qpl->pages)
+ return;
+ if (!qpl->page_buses)
+ goto free_pages;
+
+ for (i = 0; i < qpl->num_entries; i++)
+ gve_free_page(&priv->pdev->dev, qpl->pages[i],
+ qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
+
+ kfree(qpl->page_buses);
+free_pages:
+ kfree(qpl->pages);
+ priv->num_registered_pages -= qpl->num_entries;
+}
+
+static int gve_alloc_qpls(struct gve_priv *priv)
+{
+ int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+ int i, j;
+ int err;
+
+ priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL);
+ if (!priv->qpls)
+ return -ENOMEM;
+
+ for (i = 0; i < gve_num_tx_qpls(priv); i++) {
+ err = gve_alloc_queue_page_list(priv, i,
+ priv->tx_pages_per_qpl);
+ if (err)
+ goto free_qpls;
+ }
+ for (; i < num_qpls; i++) {
+ err = gve_alloc_queue_page_list(priv, i,
+ priv->rx_pages_per_qpl);
+ if (err)
+ goto free_qpls;
+ }
+
+ priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) *
+ sizeof(unsigned long) * BITS_PER_BYTE;
+ priv->qpl_cfg.qpl_id_map = kvzalloc(BITS_TO_LONGS(num_qpls) *
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!priv->qpl_cfg.qpl_id_map)
+ goto free_qpls;
+
+ return 0;
+
+free_qpls:
+ for (j = 0; j <= i; j++)
+ gve_free_queue_page_list(priv, j);
+ kfree(priv->qpls);
+ return err;
+}
+
+static void gve_free_qpls(struct gve_priv *priv)
+{
+ int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+ int i;
+
+ kfree(priv->qpl_cfg.qpl_id_map);
+
+ for (i = 0; i < num_qpls; i++)
+ gve_free_queue_page_list(priv, i);
+
+ kfree(priv->qpls);
+}
+
+/* Use this to schedule a reset when the device is capable of continuing
+ * to handle other requests in its current state. If it is not, do a reset
+ * in thread instead.
+ */
+void gve_schedule_reset(struct gve_priv *priv)
+{
+ gve_set_do_reset(priv);
+ queue_work(priv->gve_wq, &priv->service_task);
+}
+
+static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
+static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
+static void gve_turndown(struct gve_priv *priv);
+static void gve_turnup(struct gve_priv *priv);
+
+static int gve_open(struct net_device *dev)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ int err;
+
+ err = gve_alloc_qpls(priv);
+ if (err)
+ return err;
+ err = gve_alloc_rings(priv);
+ if (err)
+ goto free_qpls;
+
+ err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
+ if (err)
+ goto free_rings;
+ err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
+ if (err)
+ goto free_rings;
+
+ err = gve_register_qpls(priv);
+ if (err)
+ goto reset;
+ err = gve_create_rings(priv);
+ if (err)
+ goto reset;
+ gve_set_device_rings_ok(priv);
+
+ gve_turnup(priv);
+ netif_carrier_on(dev);
+ return 0;
+
+free_rings:
+ gve_free_rings(priv);
+free_qpls:
+ gve_free_qpls(priv);
+ return err;
+
+reset:
+ /* This must have been called from a reset due to the rtnl lock
+ * so just return at this point.
+ */
+ if (gve_get_reset_in_progress(priv))
+ return err;
+ /* Otherwise reset before returning */
+ gve_reset_and_teardown(priv, true);
+ /* if this fails there is nothing we can do so just ignore the return */
+ gve_reset_recovery(priv, false);
+ /* return the original error */
+ return err;
+}
+
+static int gve_close(struct net_device *dev)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ int err;
+
+ netif_carrier_off(dev);
+ if (gve_get_device_rings_ok(priv)) {
+ gve_turndown(priv);
+ err = gve_destroy_rings(priv);
+ if (err)
+ goto err;
+ err = gve_unregister_qpls(priv);
+ if (err)
+ goto err;
+ gve_clear_device_rings_ok(priv);
+ }
+
+ gve_free_rings(priv);
+ gve_free_qpls(priv);
+ return 0;
+
+err:
+ /* This must have been called from a reset due to the rtnl lock
+ * so just return at this point.
+ */
+ if (gve_get_reset_in_progress(priv))
+ return err;
+ /* Otherwise reset before returning */
+ gve_reset_and_teardown(priv, true);
+ return gve_reset_recovery(priv, false);
+}
+
+int gve_adjust_queues(struct gve_priv *priv,
+ struct gve_queue_config new_rx_config,
+ struct gve_queue_config new_tx_config)
+{
+ int err;
+
+ if (netif_carrier_ok(priv->dev)) {
+ /* To make this process as simple as possible we teardown the
+ * device, set the new configuration, and then bring the device
+ * up again.
+ */
+ err = gve_close(priv->dev);
+ /* we have already tried to reset in close,
+ * just fail at this point
+ */
+ if (err)
+ return err;
+ priv->tx_cfg = new_tx_config;
+ priv->rx_cfg = new_rx_config;
+
+ err = gve_open(priv->dev);
+ if (err)
+ goto err;
+
+ return 0;
+ }
+ /* Set the config for the next up. */
+ priv->tx_cfg = new_tx_config;
+ priv->rx_cfg = new_rx_config;
+
+ return 0;
+err:
+ netif_err(priv, drv, priv->dev,
+ "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
+ gve_turndown(priv);
+ return err;
+}
+
+static void gve_turndown(struct gve_priv *priv)
+{
+ int idx;
+
+ if (netif_carrier_ok(priv->dev))
+ netif_carrier_off(priv->dev);
+
+ if (!gve_get_napi_enabled(priv))
+ return;
+
+ /* Disable napi to prevent more work from coming in */
+ for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
+ int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
+ struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+ napi_disable(&block->napi);
+ }
+ for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
+ int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
+ struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+ napi_disable(&block->napi);
+ }
+
+ /* Stop tx queues */
+ netif_tx_disable(priv->dev);
+
+ gve_clear_napi_enabled(priv);
+}
+
+static void gve_turnup(struct gve_priv *priv)
+{
+ int idx;
+
+ /* Start the tx queues */
+ netif_tx_start_all_queues(priv->dev);
+
+ /* Enable napi and unmask interrupts for all queues */
+ for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
+ int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
+ struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+ napi_enable(&block->napi);
+ iowrite32be(0, gve_irq_doorbell(priv, block));
+ }
+ for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
+ int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
+ struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+ napi_enable(&block->napi);
+ iowrite32be(0, gve_irq_doorbell(priv, block));
+ }
+
+ gve_set_napi_enabled(priv);
+}
+
+static void gve_tx_timeout(struct net_device *dev)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+
+ gve_schedule_reset(priv);
+ priv->tx_timeo_cnt++;
+}
+
+static const struct net_device_ops gve_netdev_ops = {
+ .ndo_start_xmit = gve_tx,
+ .ndo_open = gve_open,
+ .ndo_stop = gve_close,
+ .ndo_get_stats64 = gve_get_stats,
+ .ndo_tx_timeout = gve_tx_timeout,
+};
+
+static void gve_handle_status(struct gve_priv *priv, u32 status)
+{
+ if (GVE_DEVICE_STATUS_RESET_MASK & status) {
+ dev_info(&priv->pdev->dev, "Device requested reset.\n");
+ gve_set_do_reset(priv);
+ }
+}
+
+static void gve_handle_reset(struct gve_priv *priv)
+{
+ /* A service task will be scheduled at the end of probe to catch any
+ * resets that need to happen, and we don't want to reset until
+ * probe is done.
+ */
+ if (gve_get_probe_in_progress(priv))
+ return;
+
+ if (gve_get_do_reset(priv)) {
+ rtnl_lock();
+ gve_reset(priv, false);
+ rtnl_unlock();
+ }
+}
+
+/* Handle NIC status register changes and reset requests */
+static void gve_service_task(struct work_struct *work)
+{
+ struct gve_priv *priv = container_of(work, struct gve_priv,
+ service_task);
+
+ gve_handle_status(priv,
+ ioread32be(&priv->reg_bar0->device_status));
+
+ gve_handle_reset(priv);
+}
+
+static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
+{
+ int num_ntfy;
+ int err;
+
+ /* Set up the adminq */
+ err = gve_adminq_alloc(&priv->pdev->dev, priv);
+ if (err) {
+ dev_err(&priv->pdev->dev,
+ "Failed to alloc admin queue: err=%d\n", err);
+ return err;
+ }
+
+ if (skip_describe_device)
+ goto setup_device;
+
+ /* Get the initial information we need from the device */
+ err = gve_adminq_describe_device(priv);
+ if (err) {
+ dev_err(&priv->pdev->dev,
+ "Could not get device information: err=%d\n", err);
+ goto err;
+ }
+ if (priv->dev->max_mtu > PAGE_SIZE) {
+ priv->dev->max_mtu = PAGE_SIZE;
+ err = gve_adminq_set_mtu(priv, priv->dev->mtu);
+ if (err) {
+ netif_err(priv, drv, priv->dev, "Could not set mtu");
+ goto err;
+ }
+ }
+ priv->dev->mtu = priv->dev->max_mtu;
+ num_ntfy = pci_msix_vec_count(priv->pdev);
+ if (num_ntfy <= 0) {
+ dev_err(&priv->pdev->dev,
+ "could not count MSI-x vectors: err=%d\n", num_ntfy);
+ err = num_ntfy;
+ goto err;
+ } else if (num_ntfy < GVE_MIN_MSIX) {
+ dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
+ GVE_MIN_MSIX, num_ntfy);
+ err = -EINVAL;
+ goto err;
+ }
+
+ priv->num_registered_pages = 0;
+ priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
+ /* gvnic has one Notification Block per MSI-x vector, except for the
+ * management vector
+ */
+ priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
+ priv->mgmt_msix_idx = priv->num_ntfy_blks;
+
+ priv->tx_cfg.max_queues =
+ min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
+ priv->rx_cfg.max_queues =
+ min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
+
+ priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
+ priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
+ if (priv->default_num_queues > 0) {
+ priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
+ priv->tx_cfg.num_queues);
+ priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
+ priv->rx_cfg.num_queues);
+ }
+
+ netif_info(priv, drv, priv->dev, "TX queues %d, RX queues %d\n",
+ priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
+ netif_info(priv, drv, priv->dev, "Max TX queues %d, Max RX queues %d\n",
+ priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
+
+setup_device:
+ err = gve_setup_device_resources(priv);
+ if (!err)
+ return 0;
+err:
+ gve_adminq_free(&priv->pdev->dev, priv);
+ return err;
+}
+
+static void gve_teardown_priv_resources(struct gve_priv *priv)
+{
+ gve_teardown_device_resources(priv);
+ gve_adminq_free(&priv->pdev->dev, priv);
+}
+
+static void gve_trigger_reset(struct gve_priv *priv)
+{
+ /* Reset the device by releasing the AQ */
+ gve_adminq_release(priv);
+}
+
+static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
+{
+ gve_trigger_reset(priv);
+ /* With the reset having already happened, close cannot fail */
+ if (was_up)
+ gve_close(priv->dev);
+ gve_teardown_priv_resources(priv);
+}
+
+static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
+{
+ int err;
+
+ err = gve_init_priv(priv, true);
+ if (err)
+ goto err;
+ if (was_up) {
+ err = gve_open(priv->dev);
+ if (err)
+ goto err;
+ }
+ return 0;
+err:
+ dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
+ gve_turndown(priv);
+ return err;
+}
+
+int gve_reset(struct gve_priv *priv, bool attempt_teardown)
+{
+ bool was_up = netif_carrier_ok(priv->dev);
+ int err;
+
+ dev_info(&priv->pdev->dev, "Performing reset\n");
+ gve_clear_do_reset(priv);
+ gve_set_reset_in_progress(priv);
+ /* If we aren't attempting to teardown normally, just go turndown and
+ * reset right away.
+ */
+ if (!attempt_teardown) {
+ gve_turndown(priv);
+ gve_reset_and_teardown(priv, was_up);
+ } else {
+ /* Otherwise attempt to close normally */
+ if (was_up) {
+ err = gve_close(priv->dev);
+ /* If that fails reset as we did above */
+ if (err)
+ gve_reset_and_teardown(priv, was_up);
+ }
+ /* Clean up any remaining resources */
+ gve_teardown_priv_resources(priv);
+ }
+
+ /* Set it all back up */
+ err = gve_reset_recovery(priv, was_up);
+ gve_clear_reset_in_progress(priv);
+ return err;
+}
+
+static void gve_write_version(u8 __iomem *driver_version_register)
+{
+ const char *c = gve_version_prefix;
+
+ while (*c) {
+ writeb(*c, driver_version_register);
+ c++;
+ }
+
+ c = gve_version_str;
+ while (*c) {
+ writeb(*c, driver_version_register);
+ c++;
+ }
+ writeb('\n', driver_version_register);
+}
+
+static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ int max_tx_queues, max_rx_queues;
+ struct net_device *dev;
+ __be32 __iomem *db_bar;
+ struct gve_registers __iomem *reg_bar;
+ struct gve_priv *priv;
+ int err;
+
+ err = pci_enable_device(pdev);
+ if (err)
+ return -ENXIO;
+
+ err = pci_request_regions(pdev, "gvnic-cfg");
+ if (err)
+ goto abort_with_enabled;
+
+ pci_set_master(pdev);
+
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
+ goto abort_with_pci_region;
+ }
+
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_err(&pdev->dev,
+ "Failed to set consistent dma mask: err=%d\n", err);
+ goto abort_with_pci_region;
+ }
+
+ reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
+ if (!reg_bar) {
+ dev_err(&pdev->dev, "Failed to map pci bar!\n");
+ err = -ENOMEM;
+ goto abort_with_pci_region;
+ }
+
+ db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
+ if (!db_bar) {
+ dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
+ err = -ENOMEM;
+ goto abort_with_reg_bar;
+ }
+
+ gve_write_version(&reg_bar->driver_version);
+ /* Get max queues to alloc etherdev */
+ max_rx_queues = ioread32be(&reg_bar->max_tx_queues);
+ max_tx_queues = ioread32be(&reg_bar->max_rx_queues);
+ /* Alloc and setup the netdev and priv */
+ dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
+ if (!dev) {
+ dev_err(&pdev->dev, "could not allocate netdev\n");
+ goto abort_with_db_bar;
+ }
+ SET_NETDEV_DEV(dev, &pdev->dev);
+ pci_set_drvdata(pdev, dev);
+ dev->ethtool_ops = &gve_ethtool_ops;
+ dev->netdev_ops = &gve_netdev_ops;
+ /* advertise features */
+ dev->hw_features = NETIF_F_HIGHDMA;
+ dev->hw_features |= NETIF_F_SG;
+ dev->hw_features |= NETIF_F_HW_CSUM;
+ dev->hw_features |= NETIF_F_TSO;
+ dev->hw_features |= NETIF_F_TSO6;
+ dev->hw_features |= NETIF_F_TSO_ECN;
+ dev->hw_features |= NETIF_F_RXCSUM;
+ dev->hw_features |= NETIF_F_RXHASH;
+ dev->features = dev->hw_features;
+ dev->watchdog_timeo = 5 * HZ;
+ dev->min_mtu = ETH_MIN_MTU;
+ netif_carrier_off(dev);
+
+ priv = netdev_priv(dev);
+ priv->dev = dev;
+ priv->pdev = pdev;
+ priv->msg_enable = DEFAULT_MSG_LEVEL;
+ priv->reg_bar0 = reg_bar;
+ priv->db_bar2 = db_bar;
+ priv->service_task_flags = 0x0;
+ priv->state_flags = 0x0;
+
+ gve_set_probe_in_progress(priv);
+ priv->gve_wq = alloc_ordered_workqueue("gve", 0);
+ if (!priv->gve_wq) {
+ dev_err(&pdev->dev, "Could not allocate workqueue");
+ err = -ENOMEM;
+ goto abort_with_netdev;
+ }
+ INIT_WORK(&priv->service_task, gve_service_task);
+ priv->tx_cfg.max_queues = max_tx_queues;
+ priv->rx_cfg.max_queues = max_rx_queues;
+
+ err = gve_init_priv(priv, false);
+ if (err)
+ goto abort_with_wq;
+
+ err = register_netdev(dev);
+ if (err)
+ goto abort_with_wq;
+
+ dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
+ gve_clear_probe_in_progress(priv);
+ queue_work(priv->gve_wq, &priv->service_task);
+ return 0;
+
+abort_with_wq:
+ destroy_workqueue(priv->gve_wq);
+
+abort_with_netdev:
+ free_netdev(dev);
+
+abort_with_db_bar:
+ pci_iounmap(pdev, db_bar);
+
+abort_with_reg_bar:
+ pci_iounmap(pdev, reg_bar);
+
+abort_with_pci_region:
+ pci_release_regions(pdev);
+
+abort_with_enabled:
+ pci_disable_device(pdev);
+ return -ENXIO;
+}
+EXPORT_SYMBOL(gve_probe);
+
+static void gve_remove(struct pci_dev *pdev)
+{
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct gve_priv *priv = netdev_priv(netdev);
+ __be32 __iomem *db_bar = priv->db_bar2;
+ void __iomem *reg_bar = priv->reg_bar0;
+
+ unregister_netdev(netdev);
+ gve_teardown_priv_resources(priv);
+ destroy_workqueue(priv->gve_wq);
+ free_netdev(netdev);
+ pci_iounmap(pdev, db_bar);
+ pci_iounmap(pdev, reg_bar);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+}
+
+static const struct pci_device_id gve_id_table[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
+ { }
+};
+
+static struct pci_driver gvnic_driver = {
+ .name = "gvnic",
+ .id_table = gve_id_table,
+ .probe = gve_probe,
+ .remove = gve_remove,
+};
+
+module_pci_driver(gvnic_driver);
+
+MODULE_DEVICE_TABLE(pci, gve_id_table);
+MODULE_AUTHOR("Google, Inc.");
+MODULE_DESCRIPTION("gVNIC Driver");
+MODULE_LICENSE("Dual MIT/GPL");
+MODULE_VERSION(GVE_VERSION);
diff --git a/drivers/net/ethernet/google/gve/gve_register.h b/drivers/net/ethernet/google/gve/gve_register.h
new file mode 100644
index 000000000000..84ab8893aadd
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_register.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
+ * Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#ifndef _GVE_REGISTER_H_
+#define _GVE_REGISTER_H_
+
+/* Fixed Configuration Registers */
+struct gve_registers {
+ __be32 device_status;
+ __be32 driver_status;
+ __be32 max_tx_queues;
+ __be32 max_rx_queues;
+ __be32 adminq_pfn;
+ __be32 adminq_doorbell;
+ __be32 adminq_event_counter;
+ u8 reserved[3];
+ u8 driver_version;
+};
+
+enum gve_device_status_flags {
+ GVE_DEVICE_STATUS_RESET_MASK = BIT(1),
+ GVE_DEVICE_STATUS_LINK_STATUS_MASK = BIT(2),
+};
+#endif /* _GVE_REGISTER_H_ */
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
new file mode 100644
index 000000000000..84e0ecce14c4
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -0,0 +1,446 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#include "gve.h"
+#include "gve_adminq.h"
+#include <linux/etherdevice.h>
+
+static void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx)
+{
+ struct gve_notify_block *block =
+ &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_idx)];
+
+ block->rx = NULL;
+}
+
+static void gve_rx_free_ring(struct gve_priv *priv, int idx)
+{
+ struct gve_rx_ring *rx = &priv->rx[idx];
+ struct device *dev = &priv->pdev->dev;
+ size_t bytes;
+ u32 slots;
+
+ gve_rx_remove_from_block(priv, idx);
+
+ bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
+ dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
+ rx->desc.desc_ring = NULL;
+
+ dma_free_coherent(dev, sizeof(*rx->q_resources),
+ rx->q_resources, rx->q_resources_bus);
+ rx->q_resources = NULL;
+
+ gve_unassign_qpl(priv, rx->data.qpl->id);
+ rx->data.qpl = NULL;
+ kfree(rx->data.page_info);
+
+ slots = rx->data.mask + 1;
+ bytes = sizeof(*rx->data.data_ring) * slots;
+ dma_free_coherent(dev, bytes, rx->data.data_ring,
+ rx->data.data_bus);
+ rx->data.data_ring = NULL;
+ netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
+}
+
+static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
+ struct gve_rx_data_slot *slot,
+ dma_addr_t addr, struct page *page)
+{
+ page_info->page = page;
+ page_info->page_offset = 0;
+ page_info->page_address = page_address(page);
+ slot->qpl_offset = cpu_to_be64(addr);
+}
+
+static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
+{
+ struct gve_priv *priv = rx->gve;
+ u32 slots;
+ int i;
+
+ /* Allocate one page per Rx queue slot. Each page is split into two
+ * packet buffers, when possible we "page flip" between the two.
+ */
+ slots = rx->data.mask + 1;
+
+ rx->data.page_info = kvzalloc(slots *
+ sizeof(*rx->data.page_info), GFP_KERNEL);
+ if (!rx->data.page_info)
+ return -ENOMEM;
+
+ rx->data.qpl = gve_assign_rx_qpl(priv);
+
+ for (i = 0; i < slots; i++) {
+ struct page *page = rx->data.qpl->pages[i];
+ dma_addr_t addr = i * PAGE_SIZE;
+
+ gve_setup_rx_buffer(&rx->data.page_info[i],
+ &rx->data.data_ring[i], addr, page);
+ }
+
+ return slots;
+}
+
+static void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx)
+{
+ u32 ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx);
+ struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+ struct gve_rx_ring *rx = &priv->rx[queue_idx];
+
+ block->rx = rx;
+ rx->ntfy_id = ntfy_idx;
+}
+
+static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
+{
+ struct gve_rx_ring *rx = &priv->rx[idx];
+ struct device *hdev = &priv->pdev->dev;
+ u32 slots, npages;
+ int filled_pages;
+ size_t bytes;
+ int err;
+
+ netif_dbg(priv, drv, priv->dev, "allocating rx ring\n");
+ /* Make sure everything is zeroed to start with */
+ memset(rx, 0, sizeof(*rx));
+
+ rx->gve = priv;
+ rx->q_num = idx;
+
+ slots = priv->rx_pages_per_qpl;
+ rx->data.mask = slots - 1;
+
+ /* alloc rx data ring */
+ bytes = sizeof(*rx->data.data_ring) * slots;
+ rx->data.data_ring = dma_alloc_coherent(hdev, bytes,
+ &rx->data.data_bus,
+ GFP_KERNEL);
+ if (!rx->data.data_ring)
+ return -ENOMEM;
+ filled_pages = gve_prefill_rx_pages(rx);
+ if (filled_pages < 0) {
+ err = -ENOMEM;
+ goto abort_with_slots;
+ }
+ rx->desc.fill_cnt = filled_pages;
+ /* Ensure data ring slots (packet buffers) are visible. */
+ dma_wmb();
+
+ /* Alloc gve_queue_resources */
+ rx->q_resources =
+ dma_alloc_coherent(hdev,
+ sizeof(*rx->q_resources),
+ &rx->q_resources_bus,
+ GFP_KERNEL);
+ if (!rx->q_resources) {
+ err = -ENOMEM;
+ goto abort_filled;
+ }
+ netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx,
+ (unsigned long)rx->data.data_bus);
+
+ /* alloc rx desc ring */
+ bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
+ npages = bytes / PAGE_SIZE;
+ if (npages * PAGE_SIZE != bytes) {
+ err = -EIO;
+ goto abort_with_q_resources;
+ }
+
+ rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus,
+ GFP_KERNEL);
+ if (!rx->desc.desc_ring) {
+ err = -ENOMEM;
+ goto abort_with_q_resources;
+ }
+ rx->desc.mask = slots - 1;
+ rx->desc.cnt = 0;
+ rx->desc.seqno = 1;
+ gve_rx_add_to_block(priv, idx);
+
+ return 0;
+
+abort_with_q_resources:
+ dma_free_coherent(hdev, sizeof(*rx->q_resources),
+ rx->q_resources, rx->q_resources_bus);
+ rx->q_resources = NULL;
+abort_filled:
+ kfree(rx->data.page_info);
+abort_with_slots:
+ bytes = sizeof(*rx->data.data_ring) * slots;
+ dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus);
+ rx->data.data_ring = NULL;
+
+ return err;
+}
+
+int gve_rx_alloc_rings(struct gve_priv *priv)
+{
+ int err = 0;
+ int i;
+
+ for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+ err = gve_rx_alloc_ring(priv, i);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "Failed to alloc rx ring=%d: err=%d\n",
+ i, err);
+ break;
+ }
+ }
+ /* Unallocate if there was an error */
+ if (err) {
+ int j;
+
+ for (j = 0; j < i; j++)
+ gve_rx_free_ring(priv, j);
+ }
+ return err;
+}
+
+void gve_rx_free_rings(struct gve_priv *priv)
+{
+ int i;
+
+ for (i = 0; i < priv->rx_cfg.num_queues; i++)
+ gve_rx_free_ring(priv, i);
+}
+
+void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx)
+{
+ u32 db_idx = be32_to_cpu(rx->q_resources->db_index);
+
+ iowrite32be(rx->desc.fill_cnt, &priv->db_bar2[db_idx]);
+}
+
+static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
+{
+ if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP)))
+ return PKT_HASH_TYPE_L4;
+ if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
+ return PKT_HASH_TYPE_L3;
+ return PKT_HASH_TYPE_L2;
+}
+
+static struct sk_buff *gve_rx_copy(struct net_device *dev,
+ struct napi_struct *napi,
+ struct gve_rx_slot_page_info *page_info,
+ u16 len)
+{
+ struct sk_buff *skb = napi_alloc_skb(napi, len);
+ void *va = page_info->page_address + GVE_RX_PAD +
+ page_info->page_offset;
+
+ if (unlikely(!skb))
+ return NULL;
+
+ __skb_put(skb, len);
+
+ skb_copy_to_linear_data(skb, va, len);
+
+ skb->protocol = eth_type_trans(skb, dev);
+ return skb;
+}
+
+static struct sk_buff *gve_rx_add_frags(struct net_device *dev,
+ struct napi_struct *napi,
+ struct gve_rx_slot_page_info *page_info,
+ u16 len)
+{
+ struct sk_buff *skb = napi_get_frags(napi);
+
+ if (unlikely(!skb))
+ return NULL;
+
+ skb_add_rx_frag(skb, 0, page_info->page,
+ page_info->page_offset +
+ GVE_RX_PAD, len, PAGE_SIZE / 2);
+
+ return skb;
+}
+
+static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info,
+ struct gve_rx_data_slot *data_ring)
+{
+ u64 addr = be64_to_cpu(data_ring->qpl_offset);
+
+ page_info->page_offset ^= PAGE_SIZE / 2;
+ addr ^= PAGE_SIZE / 2;
+ data_ring->qpl_offset = cpu_to_be64(addr);
+}
+
+static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
+ netdev_features_t feat)
+{
+ struct gve_rx_slot_page_info *page_info;
+ struct gve_priv *priv = rx->gve;
+ struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
+ struct net_device *dev = priv->dev;
+ struct sk_buff *skb;
+ int pagecount;
+ u16 len;
+ u32 idx;
+
+ /* drop this packet */
+ if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR))
+ return true;
+
+ len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD;
+ idx = rx->data.cnt & rx->data.mask;
+ page_info = &rx->data.page_info[idx];
+
+ /* gvnic can only receive into registered segments. If the buffer
+ * can't be recycled, our only choice is to copy the data out of
+ * it so that we can return it to the device.
+ */
+
+#if PAGE_SIZE == 4096
+ if (len <= priv->rx_copybreak) {
+ /* Just copy small packets */
+ skb = gve_rx_copy(dev, napi, page_info, len);
+ goto have_skb;
+ }
+ if (unlikely(!gve_can_recycle_pages(dev))) {
+ skb = gve_rx_copy(dev, napi, page_info, len);
+ goto have_skb;
+ }
+ pagecount = page_count(page_info->page);
+ if (pagecount == 1) {
+ /* No part of this page is used by any SKBs; we attach
+ * the page fragment to a new SKB and pass it up the
+ * stack.
+ */
+ skb = gve_rx_add_frags(dev, napi, page_info, len);
+ if (!skb)
+ return true;
+ /* Make sure the kernel stack can't release the page */
+ get_page(page_info->page);
+ /* "flip" to other packet buffer on this page */
+ gve_rx_flip_buff(page_info, &rx->data.data_ring[idx]);
+ } else if (pagecount >= 2) {
+ /* We have previously passed the other half of this
+ * page up the stack, but it has not yet been freed.
+ */
+ skb = gve_rx_copy(dev, napi, page_info, len);
+ } else {
+ WARN(pagecount < 1, "Pagecount should never be < 1");
+ return false;
+ }
+#else
+ skb = gve_rx_copy(dev, napi, page_info, len);
+#endif
+
+have_skb:
+ /* We didn't manage to allocate an skb but we haven't had any
+ * reset worthy failures.
+ */
+ if (!skb)
+ return true;
+
+ rx->data.cnt++;
+
+ if (likely(feat & NETIF_F_RXCSUM)) {
+ /* NIC passes up the partial sum */
+ if (rx_desc->csum)
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ else
+ skb->ip_summed = CHECKSUM_NONE;
+ skb->csum = csum_unfold(rx_desc->csum);
+ }
+
+ /* parse flags & pass relevant info up */
+ if (likely(feat & NETIF_F_RXHASH) &&
+ gve_needs_rss(rx_desc->flags_seq))
+ skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash),
+ gve_rss_type(rx_desc->flags_seq));
+
+ if (skb_is_nonlinear(skb))
+ napi_gro_frags(napi);
+ else
+ napi_gro_receive(napi, skb);
+ return true;
+}
+
+static bool gve_rx_work_pending(struct gve_rx_ring *rx)
+{
+ struct gve_rx_desc *desc;
+ __be16 flags_seq;
+ u32 next_idx;
+
+ next_idx = rx->desc.cnt & rx->desc.mask;
+ desc = rx->desc.desc_ring + next_idx;
+
+ flags_seq = desc->flags_seq;
+ /* Make sure we have synchronized the seq no with the device */
+ smp_rmb();
+
+ return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
+}
+
+bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
+ netdev_features_t feat)
+{
+ struct gve_priv *priv = rx->gve;
+ struct gve_rx_desc *desc;
+ u32 cnt = rx->desc.cnt;
+ u32 idx = cnt & rx->desc.mask;
+ u32 work_done = 0;
+ u64 bytes = 0;
+
+ desc = rx->desc.desc_ring + idx;
+ while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) &&
+ work_done < budget) {
+ netif_info(priv, rx_status, priv->dev,
+ "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
+ rx->q_num, idx, desc, desc->flags_seq);
+ netif_info(priv, rx_status, priv->dev,
+ "[%d] seqno=%d rx->desc.seqno=%d\n",
+ rx->q_num, GVE_SEQNO(desc->flags_seq),
+ rx->desc.seqno);
+ bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
+ if (!gve_rx(rx, desc, feat))
+ gve_schedule_reset(priv);
+ cnt++;
+ idx = cnt & rx->desc.mask;
+ desc = rx->desc.desc_ring + idx;
+ rx->desc.seqno = gve_next_seqno(rx->desc.seqno);
+ work_done++;
+ }
+
+ if (!work_done)
+ return false;
+
+ u64_stats_update_begin(&rx->statss);
+ rx->rpackets += work_done;
+ rx->rbytes += bytes;
+ u64_stats_update_end(&rx->statss);
+ rx->desc.cnt = cnt;
+ rx->desc.fill_cnt += work_done;
+
+ /* restock desc ring slots */
+ dma_wmb(); /* Ensure descs are visible before ringing doorbell */
+ gve_rx_write_doorbell(priv, rx);
+ return gve_rx_work_pending(rx);
+}
+
+bool gve_rx_poll(struct gve_notify_block *block, int budget)
+{
+ struct gve_rx_ring *rx = block->rx;
+ netdev_features_t feat;
+ bool repoll = false;
+
+ feat = block->napi.dev->features;
+
+ /* If budget is 0, do all the work */
+ if (budget == 0)
+ budget = INT_MAX;
+
+ if (budget > 0)
+ repoll |= gve_clean_rx_done(rx, budget, feat);
+ else
+ repoll |= gve_rx_work_pending(rx);
+ return repoll;
+}
diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
new file mode 100644
index 000000000000..778b87b5a06c
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -0,0 +1,584 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2019 Google, Inc.
+ */
+
+#include "gve.h"
+#include "gve_adminq.h"
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/vmalloc.h>
+#include <linux/skbuff.h>
+
+static inline void gve_tx_put_doorbell(struct gve_priv *priv,
+ struct gve_queue_resources *q_resources,
+ u32 val)
+{
+ iowrite32be(val, &priv->db_bar2[be32_to_cpu(q_resources->db_index)]);
+}
+
+/* gvnic can only transmit from a Registered Segment.
+ * We copy skb payloads into the registered segment before writing Tx
+ * descriptors and ringing the Tx doorbell.
+ *
+ * gve_tx_fifo_* manages the Registered Segment as a FIFO - clients must
+ * free allocations in the order they were allocated.
+ */
+
+static int gve_tx_fifo_init(struct gve_priv *priv, struct gve_tx_fifo *fifo)
+{
+ fifo->base = vmap(fifo->qpl->pages, fifo->qpl->num_entries, VM_MAP,
+ PAGE_KERNEL);
+ if (unlikely(!fifo->base)) {
+ netif_err(priv, drv, priv->dev, "Failed to vmap fifo, qpl_id = %d\n",
+ fifo->qpl->id);
+ return -ENOMEM;
+ }
+
+ fifo->size = fifo->qpl->num_entries * PAGE_SIZE;
+ atomic_set(&fifo->available, fifo->size);
+ fifo->head = 0;
+ return 0;
+}
+
+static void gve_tx_fifo_release(struct gve_priv *priv, struct gve_tx_fifo *fifo)
+{
+ WARN(atomic_read(&fifo->available) != fifo->size,
+ "Releasing non-empty fifo");
+
+ vunmap(fifo->base);
+}
+
+static int gve_tx_fifo_pad_alloc_one_frag(struct gve_tx_fifo *fifo,
+ size_t bytes)
+{
+ return (fifo->head + bytes < fifo->size) ? 0 : fifo->size - fifo->head;
+}
+
+static bool gve_tx_fifo_can_alloc(struct gve_tx_fifo *fifo, size_t bytes)
+{
+ return (atomic_read(&fifo->available) <= bytes) ? false : true;
+}
+
+/* gve_tx_alloc_fifo - Allocate fragment(s) from Tx FIFO
+ * @fifo: FIFO to allocate from
+ * @bytes: Allocation size
+ * @iov: Scatter-gather elements to fill with allocation fragment base/len
+ *
+ * Returns number of valid elements in iov[] or negative on error.
+ *
+ * Allocations from a given FIFO must be externally synchronized but concurrent
+ * allocation and frees are allowed.
+ */
+static int gve_tx_alloc_fifo(struct gve_tx_fifo *fifo, size_t bytes,
+ struct gve_tx_iovec iov[2])
+{
+ size_t overflow, padding;
+ u32 aligned_head;
+ int nfrags = 0;
+
+ if (!bytes)
+ return 0;
+
+ /* This check happens before we know how much padding is needed to
+ * align to a cacheline boundary for the payload, but that is fine,
+ * because the FIFO head always start aligned, and the FIFO's boundaries
+ * are aligned, so if there is space for the data, there is space for
+ * the padding to the next alignment.
+ */
+ WARN(!gve_tx_fifo_can_alloc(fifo, bytes),
+ "Reached %s when there's not enough space in the fifo", __func__);
+
+ nfrags++;
+
+ iov[0].iov_offset = fifo->head;
+ iov[0].iov_len = bytes;
+ fifo->head += bytes;
+
+ if (fifo->head > fifo->size) {
+ /* If the allocation did not fit in the tail fragment of the
+ * FIFO, also use the head fragment.
+ */
+ nfrags++;
+ overflow = fifo->head - fifo->size;
+ iov[0].iov_len -= overflow;
+ iov[1].iov_offset = 0; /* Start of fifo*/
+ iov[1].iov_len = overflow;
+
+ fifo->head = overflow;
+ }
+
+ /* Re-align to a cacheline boundary */
+ aligned_head = L1_CACHE_ALIGN(fifo->head);
+ padding = aligned_head - fifo->head;
+ iov[nfrags - 1].iov_padding = padding;
+ atomic_sub(bytes + padding, &fifo->available);
+ fifo->head = aligned_head;
+
+ if (fifo->head == fifo->size)
+ fifo->head = 0;
+
+ return nfrags;
+}
+
+/* gve_tx_free_fifo - Return space to Tx FIFO
+ * @fifo: FIFO to return fragments to
+ * @bytes: Bytes to free
+ */
+static void gve_tx_free_fifo(struct gve_tx_fifo *fifo, size_t bytes)
+{
+ atomic_add(bytes, &fifo->available);
+}
+
+static void gve_tx_remove_from_block(struct gve_priv *priv, int queue_idx)
+{
+ struct gve_notify_block *block =
+ &priv->ntfy_blocks[gve_tx_idx_to_ntfy(priv, queue_idx)];
+
+ block->tx = NULL;
+}
+
+static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
+ u32 to_do, bool try_to_wake);
+
+static void gve_tx_free_ring(struct gve_priv *priv, int idx)
+{
+ struct gve_tx_ring *tx = &priv->tx[idx];
+ struct device *hdev = &priv->pdev->dev;
+ size_t bytes;
+ u32 slots;
+
+ gve_tx_remove_from_block(priv, idx);
+ slots = tx->mask + 1;
+ gve_clean_tx_done(priv, tx, tx->req, false);
+ netdev_tx_reset_queue(tx->netdev_txq);
+
+ dma_free_coherent(hdev, sizeof(*tx->q_resources),
+ tx->q_resources, tx->q_resources_bus);
+ tx->q_resources = NULL;
+
+ gve_tx_fifo_release(priv, &tx->tx_fifo);
+ gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
+ tx->tx_fifo.qpl = NULL;
+
+ bytes = sizeof(*tx->desc) * slots;
+ dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
+ tx->desc = NULL;
+
+ vfree(tx->info);
+ tx->info = NULL;
+
+ netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx);
+}
+
+static void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx)
+{
+ int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx);
+ struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+ struct gve_tx_ring *tx = &priv->tx[queue_idx];
+
+ block->tx = tx;
+ tx->ntfy_id = ntfy_idx;
+}
+
+static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
+{
+ struct gve_tx_ring *tx = &priv->tx[idx];
+ struct device *hdev = &priv->pdev->dev;
+ u32 slots = priv->tx_desc_cnt;
+ size_t bytes;
+
+ /* Make sure everything is zeroed to start */
+ memset(tx, 0, sizeof(*tx));
+ tx->q_num = idx;
+
+ tx->mask = slots - 1;
+
+ /* alloc metadata */
+ tx->info = vzalloc(sizeof(*tx->info) * slots);
+ if (!tx->info)
+ return -ENOMEM;
+
+ /* alloc tx queue */
+ bytes = sizeof(*tx->desc) * slots;
+ tx->desc = dma_alloc_coherent(hdev, bytes, &tx->bus, GFP_KERNEL);
+ if (!tx->desc)
+ goto abort_with_info;
+
+ tx->tx_fifo.qpl = gve_assign_tx_qpl(priv);
+
+ /* map Tx FIFO */
+ if (gve_tx_fifo_init(priv, &tx->tx_fifo))
+ goto abort_with_desc;
+
+ tx->q_resources =
+ dma_alloc_coherent(hdev,
+ sizeof(*tx->q_resources),
+ &tx->q_resources_bus,
+ GFP_KERNEL);
+ if (!tx->q_resources)
+ goto abort_with_fifo;
+
+ netif_dbg(priv, drv, priv->dev, "tx[%d]->bus=%lx\n", idx,
+ (unsigned long)tx->bus);
+ tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
+ gve_tx_add_to_block(priv, idx);
+
+ return 0;
+
+abort_with_fifo:
+ gve_tx_fifo_release(priv, &tx->tx_fifo);
+abort_with_desc:
+ dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
+ tx->desc = NULL;
+abort_with_info:
+ vfree(tx->info);
+ tx->info = NULL;
+ return -ENOMEM;
+}
+
+int gve_tx_alloc_rings(struct gve_priv *priv)
+{
+ int err = 0;
+ int i;
+
+ for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+ err = gve_tx_alloc_ring(priv, i);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "Failed to alloc tx ring=%d: err=%d\n",
+ i, err);
+ break;
+ }
+ }
+ /* Unallocate if there was an error */
+ if (err) {
+ int j;
+
+ for (j = 0; j < i; j++)
+ gve_tx_free_ring(priv, j);
+ }
+ return err;
+}
+
+void gve_tx_free_rings(struct gve_priv *priv)
+{
+ int i;
+
+ for (i = 0; i < priv->tx_cfg.num_queues; i++)
+ gve_tx_free_ring(priv, i);
+}
+
+/* gve_tx_avail - Calculates the number of slots available in the ring
+ * @tx: tx ring to check
+ *
+ * Returns the number of slots available
+ *
+ * The capacity of the queue is mask + 1. We don't need to reserve an entry.
+ **/
+static inline u32 gve_tx_avail(struct gve_tx_ring *tx)
+{
+ return tx->mask + 1 - (tx->req - tx->done);
+}
+
+static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx,
+ struct sk_buff *skb)
+{
+ int pad_bytes, align_hdr_pad;
+ int bytes;
+ int hlen;
+
+ hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) +
+ tcp_hdrlen(skb) : skb_headlen(skb);
+
+ pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo,
+ hlen);
+ /* We need to take into account the header alignment padding. */
+ align_hdr_pad = L1_CACHE_ALIGN(hlen) - hlen;
+ bytes = align_hdr_pad + pad_bytes + skb->len;
+
+ return bytes;
+}
+
+/* The most descriptors we could need are 3 - 1 for the headers, 1 for
+ * the beginning of the payload at the end of the FIFO, and 1 if the
+ * payload wraps to the beginning of the FIFO.
+ */
+#define MAX_TX_DESC_NEEDED 3
+
+/* Check if sufficient resources (descriptor ring space, FIFO space) are
+ * available to transmit the given number of bytes.
+ */
+static inline bool gve_can_tx(struct gve_tx_ring *tx, int bytes_required)
+{
+ return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED &&
+ gve_tx_fifo_can_alloc(&tx->tx_fifo, bytes_required));
+}
+
+/* Stops the queue if the skb cannot be transmitted. */
+static int gve_maybe_stop_tx(struct gve_tx_ring *tx, struct sk_buff *skb)
+{
+ int bytes_required;
+
+ bytes_required = gve_skb_fifo_bytes_required(tx, skb);
+ if (likely(gve_can_tx(tx, bytes_required)))
+ return 0;
+
+ /* No space, so stop the queue */
+ tx->stop_queue++;
+ netif_tx_stop_queue(tx->netdev_txq);
+ smp_mb(); /* sync with restarting queue in gve_clean_tx_done() */
+
+ /* Now check for resources again, in case gve_clean_tx_done() freed
+ * resources after we checked and we stopped the queue after
+ * gve_clean_tx_done() checked.
+ *
+ * gve_maybe_stop_tx() gve_clean_tx_done()
+ * nsegs/can_alloc test failed
+ * gve_tx_free_fifo()
+ * if (tx queue stopped)
+ * netif_tx_queue_wake()
+ * netif_tx_stop_queue()
+ * Need to check again for space here!
+ */
+ if (likely(!gve_can_tx(tx, bytes_required)))
+ return -EBUSY;
+
+ netif_tx_start_queue(tx->netdev_txq);
+ tx->wake_queue++;
+ return 0;
+}
+
+static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc,
+ struct sk_buff *skb, bool is_gso,
+ int l4_hdr_offset, u32 desc_cnt,
+ u16 hlen, u64 addr)
+{
+ /* l4_hdr_offset and csum_offset are in units of 16-bit words */
+ if (is_gso) {
+ pkt_desc->pkt.type_flags = GVE_TXD_TSO | GVE_TXF_L4CSUM;
+ pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1;
+ pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
+ } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+ pkt_desc->pkt.type_flags = GVE_TXD_STD | GVE_TXF_L4CSUM;
+ pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1;
+ pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
+ } else {
+ pkt_desc->pkt.type_flags = GVE_TXD_STD;
+ pkt_desc->pkt.l4_csum_offset = 0;
+ pkt_desc->pkt.l4_hdr_offset = 0;
+ }
+ pkt_desc->pkt.desc_cnt = desc_cnt;
+ pkt_desc->pkt.len = cpu_to_be16(skb->len);
+ pkt_desc->pkt.seg_len = cpu_to_be16(hlen);
+ pkt_desc->pkt.seg_addr = cpu_to_be64(addr);
+}
+
+static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc,
+ struct sk_buff *skb, bool is_gso,
+ u16 len, u64 addr)
+{
+ seg_desc->seg.type_flags = GVE_TXD_SEG;
+ if (is_gso) {
+ if (skb_is_gso_v6(skb))
+ seg_desc->seg.type_flags |= GVE_TXSF_IPV6;
+ seg_desc->seg.l3_offset = skb_network_offset(skb) >> 1;
+ seg_desc->seg.mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
+ }
+ seg_desc->seg.seg_len = cpu_to_be16(len);
+ seg_desc->seg.seg_addr = cpu_to_be64(addr);
+}
+
+static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb)
+{
+ int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset;
+ union gve_tx_desc *pkt_desc, *seg_desc;
+ struct gve_tx_buffer_state *info;
+ bool is_gso = skb_is_gso(skb);
+ u32 idx = tx->req & tx->mask;
+ int payload_iov = 2;
+ int copy_offset;
+ u32 next_idx;
+ int i;
+
+ info = &tx->info[idx];
+ pkt_desc = &tx->desc[idx];
+
+ l4_hdr_offset = skb_checksum_start_offset(skb);
+ /* If the skb is gso, then we want the tcp header in the first segment
+ * otherwise we want the linear portion of the skb (which will contain
+ * the checksum because skb->csum_start and skb->csum_offset are given
+ * relative to skb->head) in the first segment.
+ */
+ hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) :
+ skb_headlen(skb);
+
+ info->skb = skb;
+ /* We don't want to split the header, so if necessary, pad to the end
+ * of the fifo and then put the header at the beginning of the fifo.
+ */
+ pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, hlen);
+ hdr_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, hlen + pad_bytes,
+ &info->iov[0]);
+ WARN(!hdr_nfrags, "hdr_nfrags should never be 0!");
+ payload_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, skb->len - hlen,
+ &info->iov[payload_iov]);
+
+ gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
+ 1 + payload_nfrags, hlen,
+ info->iov[hdr_nfrags - 1].iov_offset);
+
+ skb_copy_bits(skb, 0,
+ tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset,
+ hlen);
+ copy_offset = hlen;
+
+ for (i = payload_iov; i < payload_nfrags + payload_iov; i++) {
+ next_idx = (tx->req + 1 + i - payload_iov) & tx->mask;
+ seg_desc = &tx->desc[next_idx];
+
+ gve_tx_fill_seg_desc(seg_desc, skb, is_gso,
+ info->iov[i].iov_len,
+ info->iov[i].iov_offset);
+
+ skb_copy_bits(skb, copy_offset,
+ tx->tx_fifo.base + info->iov[i].iov_offset,
+ info->iov[i].iov_len);
+ copy_offset += info->iov[i].iov_len;
+ }
+
+ return 1 + payload_nfrags;
+}
+
+netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct gve_tx_ring *tx;
+ int nsegs;
+
+ WARN(skb_get_queue_mapping(skb) > priv->tx_cfg.num_queues,
+ "skb queue index out of range");
+ tx = &priv->tx[skb_get_queue_mapping(skb)];
+ if (unlikely(gve_maybe_stop_tx(tx, skb))) {
+ /* We need to ring the txq doorbell -- we have stopped the Tx
+ * queue for want of resources, but prior calls to gve_tx()
+ * may have added descriptors without ringing the doorbell.
+ */
+
+ /* Ensure tx descs from a prior gve_tx are visible before
+ * ringing doorbell.
+ */
+ dma_wmb();
+ gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
+ return NETDEV_TX_BUSY;
+ }
+ nsegs = gve_tx_add_skb(tx, skb);
+
+ netdev_tx_sent_queue(tx->netdev_txq, skb->len);
+ skb_tx_timestamp(skb);
+
+ /* give packets to NIC */
+ tx->req += nsegs;
+
+ if (!netif_xmit_stopped(tx->netdev_txq) && netdev_xmit_more())
+ return NETDEV_TX_OK;
+
+ /* Ensure tx descs are visible before ringing doorbell */
+ dma_wmb();
+ gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
+ return NETDEV_TX_OK;
+}
+
+#define GVE_TX_START_THRESH PAGE_SIZE
+
+static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
+ u32 to_do, bool try_to_wake)
+{
+ struct gve_tx_buffer_state *info;
+ u64 pkts = 0, bytes = 0;
+ size_t space_freed = 0;
+ struct sk_buff *skb;
+ int i, j;
+ u32 idx;
+
+ for (j = 0; j < to_do; j++) {
+ idx = tx->done & tx->mask;
+ netif_info(priv, tx_done, priv->dev,
+ "[%d] %s: idx=%d (req=%u done=%u)\n",
+ tx->q_num, __func__, idx, tx->req, tx->done);
+ info = &tx->info[idx];
+ skb = info->skb;
+
+ /* Mark as free */
+ if (skb) {
+ info->skb = NULL;
+ bytes += skb->len;
+ pkts++;
+ dev_consume_skb_any(skb);
+ /* FIFO free */
+ for (i = 0; i < ARRAY_SIZE(info->iov); i++) {
+ space_freed += info->iov[i].iov_len +
+ info->iov[i].iov_padding;
+ info->iov[i].iov_len = 0;
+ info->iov[i].iov_padding = 0;
+ }
+ }
+ tx->done++;
+ }
+
+ gve_tx_free_fifo(&tx->tx_fifo, space_freed);
+ u64_stats_update_begin(&tx->statss);
+ tx->bytes_done += bytes;
+ tx->pkt_done += pkts;
+ u64_stats_update_end(&tx->statss);
+ netdev_tx_completed_queue(tx->netdev_txq, pkts, bytes);
+
+ /* start the queue if we've stopped it */
+#ifndef CONFIG_BQL
+ /* Make sure that the doorbells are synced */
+ smp_mb();
+#endif
+ if (try_to_wake && netif_tx_queue_stopped(tx->netdev_txq) &&
+ likely(gve_can_tx(tx, GVE_TX_START_THRESH))) {
+ tx->wake_queue++;
+ netif_tx_wake_queue(tx->netdev_txq);
+ }
+
+ return pkts;
+}
+
+__be32 gve_tx_load_event_counter(struct gve_priv *priv,
+ struct gve_tx_ring *tx)
+{
+ u32 counter_index = be32_to_cpu((tx->q_resources->counter_index));
+
+ return READ_ONCE(priv->counter_array[counter_index]);
+}
+
+bool gve_tx_poll(struct gve_notify_block *block, int budget)
+{
+ struct gve_priv *priv = block->priv;
+ struct gve_tx_ring *tx = block->tx;
+ bool repoll = false;
+ u32 nic_done;
+ u32 to_do;
+
+ /* If budget is 0, do all the work */
+ if (budget == 0)
+ budget = INT_MAX;
+
+ /* Find out how much work there is to be done */
+ tx->last_nic_done = gve_tx_load_event_counter(priv, tx);
+ nic_done = be32_to_cpu(tx->last_nic_done);
+ if (budget > 0) {
+ /* Do as much work as we have that the budget will
+ * allow
+ */
+ to_do = min_t(u32, (nic_done - tx->done), budget);
+ gve_clean_tx_done(priv, tx, to_do, true);
+ }
+ /* If we still have work we want to repoll */
+ repoll |= (nic_done != tx->done);
+ return repoll;
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index b23652cd2922..310afa708831 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -28,8 +28,7 @@
#define hns3_set_field(origin, shift, val) ((origin) |= ((val) << (shift)))
#define hns3_tx_bd_count(S) DIV_ROUND_UP(S, HNS3_MAX_BD_SIZE)
-static void hns3_clear_all_ring(struct hnae3_handle *h);
-static void hns3_force_clear_all_ring(struct hnae3_handle *h);
+static void hns3_clear_all_ring(struct hnae3_handle *h, bool force);
static void hns3_remove_hw_addr(struct net_device *netdev);
static const char hns3_driver_name[] = "hns3";
@@ -463,6 +462,20 @@ static int hns3_nic_net_open(struct net_device *netdev)
return 0;
}
+static void hns3_reset_tx_queue(struct hnae3_handle *h)
+{
+ struct net_device *ndev = h->kinfo.netdev;
+ struct hns3_nic_priv *priv = netdev_priv(ndev);
+ struct netdev_queue *dev_queue;
+ u32 i;
+
+ for (i = 0; i < h->kinfo.num_tqps; i++) {
+ dev_queue = netdev_get_tx_queue(ndev,
+ priv->ring_data[i].queue_index);
+ netdev_tx_reset_queue(dev_queue);
+ }
+}
+
static void hns3_nic_net_down(struct net_device *netdev)
{
struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -493,7 +506,9 @@ static void hns3_nic_net_down(struct net_device *netdev)
* to disable the ring through firmware when downing the netdev.
*/
if (!hns3_nic_resetting(netdev))
- hns3_clear_all_ring(priv->ae_handle);
+ hns3_clear_all_ring(priv->ae_handle, false);
+
+ hns3_reset_tx_queue(priv->ae_handle);
}
static int hns3_nic_net_stop(struct net_device *netdev)
@@ -936,8 +951,9 @@ static int hns3_set_l2l3l4(struct sk_buff *skb, u8 ol4_proto,
static void hns3_set_txbd_baseinfo(u16 *bdtp_fe_sc_vld_ra_ri, int frag_end)
{
/* Config bd buffer end */
- hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, !!frag_end);
- hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1);
+ if (!!frag_end)
+ hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, 1U);
+ hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1U);
}
static int hns3_fill_desc_vtags(struct sk_buff *skb,
@@ -1475,12 +1491,10 @@ static void hns3_nic_get_stats64(struct net_device *netdev,
start = u64_stats_fetch_begin_irq(&ring->syncp);
rx_bytes += ring->stats.rx_bytes;
rx_pkts += ring->stats.rx_pkts;
- rx_drop += ring->stats.non_vld_descs;
rx_drop += ring->stats.l2_err;
- rx_errors += ring->stats.non_vld_descs;
rx_errors += ring->stats.l2_err;
+ rx_errors += ring->stats.l3l4_csum_err;
rx_crc_errors += ring->stats.l2_err;
- rx_crc_errors += ring->stats.l3l4_csum_err;
rx_multicast += ring->stats.rx_multicast;
rx_length_errors += ring->stats.err_pkt_len;
} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
@@ -2562,7 +2576,7 @@ static bool hns3_parse_vlan_tag(struct hns3_enet_ring *ring,
}
}
-static int hns3_alloc_skb(struct hns3_enet_ring *ring, int length,
+static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
unsigned char *va)
{
#define HNS3_NEED_ADD_FRAG 1
@@ -2754,14 +2768,6 @@ static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb)
vlan_tag);
}
- if (unlikely(!(bd_base_info & BIT(HNS3_RXD_VLD_B)))) {
- u64_stats_update_begin(&ring->syncp);
- ring->stats.non_vld_descs++;
- u64_stats_update_end(&ring->syncp);
-
- return -EINVAL;
- }
-
if (unlikely(!desc->rx.pkt_len || (l234info & (BIT(HNS3_RXD_TRUNCAT_B) |
BIT(HNS3_RXD_L2E_B))))) {
u64_stats_update_begin(&ring->syncp);
@@ -2813,8 +2819,8 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
struct sk_buff *skb = ring->skb;
struct hns3_desc_cb *desc_cb;
struct hns3_desc *desc;
+ unsigned int length;
u32 bd_base_info;
- int length;
int ret;
desc = &ring->desc[ring->next_to_clean];
@@ -3921,7 +3927,7 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
hns3_del_all_fd_rules(netdev, true);
- hns3_force_clear_all_ring(handle);
+ hns3_clear_all_ring(handle, true);
hns3_nic_uninit_vector_data(priv);
@@ -4090,43 +4096,26 @@ static void hns3_force_clear_rx_ring(struct hns3_enet_ring *ring)
}
}
-static void hns3_force_clear_all_ring(struct hnae3_handle *h)
+static void hns3_clear_all_ring(struct hnae3_handle *h, bool force)
{
struct net_device *ndev = h->kinfo.netdev;
struct hns3_nic_priv *priv = netdev_priv(ndev);
- struct hns3_enet_ring *ring;
u32 i;
for (i = 0; i < h->kinfo.num_tqps; i++) {
- ring = priv->ring_data[i].ring;
- hns3_clear_tx_ring(ring);
-
- ring = priv->ring_data[i + h->kinfo.num_tqps].ring;
- hns3_force_clear_rx_ring(ring);
- }
-}
-
-static void hns3_clear_all_ring(struct hnae3_handle *h)
-{
- struct net_device *ndev = h->kinfo.netdev;
- struct hns3_nic_priv *priv = netdev_priv(ndev);
- u32 i;
-
- for (i = 0; i < h->kinfo.num_tqps; i++) {
- struct netdev_queue *dev_queue;
struct hns3_enet_ring *ring;
ring = priv->ring_data[i].ring;
hns3_clear_tx_ring(ring);
- dev_queue = netdev_get_tx_queue(ndev,
- priv->ring_data[i].queue_index);
- netdev_tx_reset_queue(dev_queue);
ring = priv->ring_data[i + h->kinfo.num_tqps].ring;
/* Continue to clear other rings even if clearing some
* rings failed.
*/
- hns3_clear_rx_ring(ring);
+ if (force)
+ hns3_force_clear_rx_ring(ring);
+ else
+ hns3_clear_rx_ring(ring);
}
}
@@ -4331,8 +4320,8 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
return 0;
}
- hns3_clear_all_ring(handle);
- hns3_force_clear_all_ring(handle);
+ hns3_clear_all_ring(handle, true);
+ hns3_reset_tx_queue(priv->ae_handle);
hns3_nic_uninit_vector_data(priv);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 3ac1411df7a8..848b866761df 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -384,7 +384,6 @@ struct ring_stats {
u64 rx_err_cnt;
u64 reuse_pg_cnt;
u64 err_pkt_len;
- u64 non_vld_descs;
u64 err_bd_num;
u64 l2_err;
u64 l3l4_csum_err;
@@ -446,25 +445,6 @@ enum hns3_flow_level_range {
HNS3_FLOW_ULTRA = 3,
};
-enum hns3_link_mode_bits {
- HNS3_LM_FIBRE_BIT = BIT(0),
- HNS3_LM_AUTONEG_BIT = BIT(1),
- HNS3_LM_TP_BIT = BIT(2),
- HNS3_LM_PAUSE_BIT = BIT(3),
- HNS3_LM_BACKPLANE_BIT = BIT(4),
- HNS3_LM_10BASET_HALF_BIT = BIT(5),
- HNS3_LM_10BASET_FULL_BIT = BIT(6),
- HNS3_LM_100BASET_HALF_BIT = BIT(7),
- HNS3_LM_100BASET_FULL_BIT = BIT(8),
- HNS3_LM_1000BASET_FULL_BIT = BIT(9),
- HNS3_LM_10000BASEKR_FULL_BIT = BIT(10),
- HNS3_LM_25000BASEKR_FULL_BIT = BIT(11),
- HNS3_LM_40000BASELR4_FULL_BIT = BIT(12),
- HNS3_LM_50000BASEKR2_FULL_BIT = BIT(13),
- HNS3_LM_100000BASEKR4_FULL_BIT = BIT(14),
- HNS3_LM_COUNT = 15
-};
-
#define HNS3_INT_GL_MAX 0x1FE0
#define HNS3_INT_GL_50K 0x0014
#define HNS3_INT_GL_20K 0x0032
@@ -630,7 +610,7 @@ static inline bool hns3_nic_resetting(struct net_device *netdev)
#define hnae3_buf_size(_ring) ((_ring)->buf_size)
#define hnae3_page_order(_ring) (get_order(hnae3_buf_size(_ring)))
-#define hnae3_page_size(_ring) (PAGE_SIZE << hnae3_page_order(_ring))
+#define hnae3_page_size(_ring) (PAGE_SIZE << (u32)hnae3_page_order(_ring))
/* iterator for handling rings in ring group */
#define hns3_for_each_ring(pos, head) \
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 16034afc43f9..5bff98a9b0dc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -44,7 +44,6 @@ static const struct hns3_stats hns3_rxq_stats[] = {
HNS3_TQP_STAT("errors", rx_err_cnt),
HNS3_TQP_STAT("reuse_pg_cnt", reuse_pg_cnt),
HNS3_TQP_STAT("err_pkt_len", err_pkt_len),
- HNS3_TQP_STAT("non_vld_descs", non_vld_descs),
HNS3_TQP_STAT("err_bd_num", err_bd_num),
HNS3_TQP_STAT("l2_err", l2_err),
HNS3_TQP_STAT("l3l4_csum_err", l3l4_csum_err),
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 7a3bde724151..22f6acd45d9a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -188,12 +188,43 @@ static bool hclge_is_special_opcode(u16 opcode)
return false;
}
+static int hclge_cmd_convert_err_code(u16 desc_ret)
+{
+ switch (desc_ret) {
+ case HCLGE_CMD_EXEC_SUCCESS:
+ return 0;
+ case HCLGE_CMD_NO_AUTH:
+ return -EPERM;
+ case HCLGE_CMD_NOT_SUPPORTED:
+ return -EOPNOTSUPP;
+ case HCLGE_CMD_QUEUE_FULL:
+ return -EXFULL;
+ case HCLGE_CMD_NEXT_ERR:
+ return -ENOSR;
+ case HCLGE_CMD_UNEXE_ERR:
+ return -ENOTBLK;
+ case HCLGE_CMD_PARA_ERR:
+ return -EINVAL;
+ case HCLGE_CMD_RESULT_ERR:
+ return -ERANGE;
+ case HCLGE_CMD_TIMEOUT:
+ return -ETIME;
+ case HCLGE_CMD_HILINK_ERR:
+ return -ENOLINK;
+ case HCLGE_CMD_QUEUE_ILLEGAL:
+ return -ENXIO;
+ case HCLGE_CMD_INVALID:
+ return -EBADR;
+ default:
+ return -EIO;
+ }
+}
+
static int hclge_cmd_check_retval(struct hclge_hw *hw, struct hclge_desc *desc,
int num, int ntc)
{
u16 opcode, desc_ret;
int handle;
- int retval;
opcode = le16_to_cpu(desc[0].opcode);
for (handle = 0; handle < num; handle++) {
@@ -207,17 +238,9 @@ static int hclge_cmd_check_retval(struct hclge_hw *hw, struct hclge_desc *desc,
else
desc_ret = le16_to_cpu(desc[0].retval);
- if (desc_ret == HCLGE_CMD_EXEC_SUCCESS)
- retval = 0;
- else if (desc_ret == HCLGE_CMD_NO_AUTH)
- retval = -EPERM;
- else if (desc_ret == HCLGE_CMD_NOT_SUPPORTED)
- retval = -EOPNOTSUPP;
- else
- retval = -EIO;
hw->cmq.last_status = desc_ret;
- return retval;
+ return hclge_cmd_convert_err_code(desc_ret);
}
/**
@@ -232,6 +255,7 @@ static int hclge_cmd_check_retval(struct hclge_hw *hw, struct hclge_desc *desc,
int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
{
struct hclge_dev *hdev = container_of(hw, struct hclge_dev, hw);
+ struct hclge_cmq_ring *csq = &hw->cmq.csq;
struct hclge_desc *desc_to_use;
bool complete = false;
u32 timeout = 0;
@@ -241,8 +265,16 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
spin_lock_bh(&hw->cmq.csq.lock);
- if (num > hclge_ring_space(&hw->cmq.csq) ||
- test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) {
+ if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) {
+ spin_unlock_bh(&hw->cmq.csq.lock);
+ return -EBUSY;
+ }
+
+ if (num > hclge_ring_space(&hw->cmq.csq)) {
+ /* If CMDQ ring is full, SW HEAD and HW HEAD may be different,
+ * need update the SW HEAD pointer csq->next_to_clean
+ */
+ csq->next_to_clean = hclge_read_dev(hw, HCLGE_NIC_CSQ_HEAD_REG);
spin_unlock_bh(&hw->cmq.csq.lock);
return -EBUSY;
}
@@ -280,7 +312,7 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
}
if (!complete) {
- retval = -EAGAIN;
+ retval = -EBADE;
} else {
retval = hclge_cmd_check_retval(hw, desc, num, ntc);
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index cf52cdf13270..96840d8f3e24 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -41,6 +41,14 @@ enum hclge_cmd_return_status {
HCLGE_CMD_NO_AUTH = 1,
HCLGE_CMD_NOT_SUPPORTED = 2,
HCLGE_CMD_QUEUE_FULL = 3,
+ HCLGE_CMD_NEXT_ERR = 4,
+ HCLGE_CMD_UNEXE_ERR = 5,
+ HCLGE_CMD_PARA_ERR = 6,
+ HCLGE_CMD_RESULT_ERR = 7,
+ HCLGE_CMD_TIMEOUT = 8,
+ HCLGE_CMD_HILINK_ERR = 9,
+ HCLGE_CMD_QUEUE_ILLEGAL = 10,
+ HCLGE_CMD_INVALID = 11,
};
enum hclge_cmd_status {
@@ -884,7 +892,7 @@ struct hclge_serdes_lb_cmd {
#define HCLGE_TOTAL_PKT_BUF 0x108000 /* 1.03125M bytes */
#define HCLGE_DEFAULT_DV 0xA000 /* 40k byte */
#define HCLGE_DEFAULT_NON_DCB_DV 0x7800 /* 30K byte */
-#define HCLGE_NON_DCB_ADDITIONAL_BUF 0x200 /* 512 byte */
+#define HCLGE_NON_DCB_ADDITIONAL_BUF 0x1400 /* 5120 byte */
#define HCLGE_TYPE_CRQ 0
#define HCLGE_TYPE_CSQ 1
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
index 1161361a973b..bac4ce13f6ae 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
@@ -325,6 +325,8 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
hdev->tm_info.hw_pfc_map = pfc_map;
hdev->tm_info.pfc_en = pfc->pfc_en;
+ hclge_tm_pfc_info_update(hdev);
+
return hclge_pause_setup_hw(hdev, false);
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index b25365c343d1..3fde5471e1c0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -30,6 +30,9 @@
#define HCLGE_BUF_SIZE_UNIT 256U
#define HCLGE_BUF_MUL_BY 2
#define HCLGE_BUF_DIV_BY 2
+#define NEED_RESERVE_TC_NUM 2
+#define BUF_MAX_PERCENT 100
+#define BUF_RESERVE_PERCENT 90
#define HCLGE_RESET_MAX_FAIL_CNT 5
@@ -561,8 +564,7 @@ static u8 *hclge_comm_get_strings(u32 stringset,
return buff;
for (i = 0; i < size; i++) {
- snprintf(buff, ETH_GSTRING_LEN,
- strs[i].desc);
+ snprintf(buff, ETH_GSTRING_LEN, "%s", strs[i].desc);
buff = buff + ETH_GSTRING_LEN;
}
@@ -1059,6 +1061,7 @@ static void hclge_parse_copper_link_mode(struct hclge_dev *hdev,
linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, supported);
linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, supported);
linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported);
+ linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported);
}
static void hclge_parse_link_mode(struct hclge_dev *hdev, u8 speed_ability)
@@ -1357,8 +1360,9 @@ static int hclge_map_tqps_to_func(struct hclge_dev *hdev, u16 func_id,
req = (struct hclge_tqp_map_cmd *)desc.data;
req->tqp_id = cpu_to_le16(tqp_pid);
req->tqp_vf = func_id;
- req->tqp_flag = !is_pf << HCLGE_TQP_MAP_TYPE_B |
- 1 << HCLGE_TQP_MAP_EN_B;
+ req->tqp_flag = 1U << HCLGE_TQP_MAP_EN_B;
+ if (!is_pf)
+ req->tqp_flag |= 1U << HCLGE_TQP_MAP_TYPE_B;
req->tqp_vid = cpu_to_le16(tqp_vid);
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -1694,10 +1698,14 @@ static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev,
}
if (hnae3_dev_dcb_supported(hdev)) {
+ hi_thrd = shared_buf - hdev->dv_buf_size;
+
+ if (tc_num <= NEED_RESERVE_TC_NUM)
+ hi_thrd = hi_thrd * BUF_RESERVE_PERCENT
+ / BUF_MAX_PERCENT;
+
if (tc_num)
- hi_thrd = (shared_buf - hdev->dv_buf_size) / tc_num;
- else
- hi_thrd = shared_buf - hdev->dv_buf_size;
+ hi_thrd = hi_thrd / tc_num;
hi_thrd = max_t(u32, hi_thrd, HCLGE_BUF_MUL_BY * aligned_mps);
hi_thrd = rounddown(hi_thrd, HCLGE_BUF_SIZE_UNIT);
@@ -1837,6 +1845,55 @@ static bool hclge_drop_pfc_buf_till_fit(struct hclge_dev *hdev,
return hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all);
}
+static int hclge_only_alloc_priv_buff(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
+{
+#define COMPENSATE_BUFFER 0x3C00
+#define COMPENSATE_HALF_MPS_NUM 5
+#define PRIV_WL_GAP 0x1800
+
+ u32 rx_priv = hdev->pkt_buf_size - hclge_get_tx_buff_alloced(buf_alloc);
+ u32 tc_num = hclge_get_tc_num(hdev);
+ u32 half_mps = hdev->mps >> 1;
+ u32 min_rx_priv;
+ unsigned int i;
+
+ if (tc_num)
+ rx_priv = rx_priv / tc_num;
+
+ if (tc_num <= NEED_RESERVE_TC_NUM)
+ rx_priv = rx_priv * BUF_RESERVE_PERCENT / BUF_MAX_PERCENT;
+
+ min_rx_priv = hdev->dv_buf_size + COMPENSATE_BUFFER +
+ COMPENSATE_HALF_MPS_NUM * half_mps;
+ min_rx_priv = round_up(min_rx_priv, HCLGE_BUF_SIZE_UNIT);
+ rx_priv = round_down(rx_priv, HCLGE_BUF_SIZE_UNIT);
+
+ if (rx_priv < min_rx_priv)
+ return false;
+
+ for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+ struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i];
+
+ priv->enable = 0;
+ priv->wl.low = 0;
+ priv->wl.high = 0;
+ priv->buf_size = 0;
+
+ if (!(hdev->hw_tc_map & BIT(i)))
+ continue;
+
+ priv->enable = 1;
+ priv->buf_size = rx_priv;
+ priv->wl.high = rx_priv - hdev->dv_buf_size;
+ priv->wl.low = priv->wl.high - PRIV_WL_GAP;
+ }
+
+ buf_alloc->s_buf.buf_size = 0;
+
+ return true;
+}
+
/* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs
* @hdev: pointer to struct hclge_dev
* @buf_alloc: pointer to buffer calculation data
@@ -1856,6 +1913,9 @@ static int hclge_rx_buffer_calc(struct hclge_dev *hdev,
return 0;
}
+ if (hclge_only_alloc_priv_buff(hdev, buf_alloc))
+ return 0;
+
if (hclge_rx_buf_calc_all(hdev, true, buf_alloc))
return 0;
@@ -2261,7 +2321,8 @@ static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable)
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_AN_MODE, false);
req = (struct hclge_config_auto_neg_cmd *)desc.data;
- hnae3_set_bit(flag, HCLGE_MAC_CFG_AN_EN_B, !!enable);
+ if (enable)
+ hnae3_set_bit(flag, HCLGE_MAC_CFG_AN_EN_B, 1U);
req->cfg_an_cmd_flag = cpu_to_le32(flag);
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -2604,6 +2665,7 @@ static int hclge_get_sfp_info(struct hclge_dev *hdev, struct hclge_mac *mac)
mac->speed_ability = le32_to_cpu(resp->speed_ability);
mac->autoneg = resp->autoneg;
mac->support_autoneg = resp->autoneg_ability;
+ mac->speed_type = QUERY_ACTIVE_SPEED;
if (!resp->active_fec)
mac->fec_mode = 0;
else
@@ -2724,8 +2786,9 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
/* check for vector0 msix event source */
if (msix_src_reg & HCLGE_VECTOR0_REG_MSIX_MASK) {
- dev_dbg(&hdev->pdev->dev, "received event 0x%x\n",
- msix_src_reg);
+ dev_info(&hdev->pdev->dev, "received event 0x%x\n",
+ msix_src_reg);
+ *clearval = msix_src_reg;
return HCLGE_VECTOR0_EVENT_ERR;
}
@@ -2737,8 +2800,11 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
}
/* print other vector0 event source */
- dev_dbg(&hdev->pdev->dev, "cmdq_src_reg:0x%x, msix_src_reg:0x%x\n",
- cmdq_src_reg, msix_src_reg);
+ dev_info(&hdev->pdev->dev,
+ "CMDQ INT status:0x%x, other INT status:0x%x\n",
+ cmdq_src_reg, msix_src_reg);
+ *clearval = msix_src_reg;
+
return HCLGE_VECTOR0_EVENT_OTHER;
}
@@ -2817,7 +2883,8 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data)
}
/* clear the source of interrupt if it is not cause by reset */
- if (event_cause == HCLGE_VECTOR0_EVENT_MBX) {
+ if (!clearval ||
+ event_cause == HCLGE_VECTOR0_EVENT_MBX) {
hclge_clear_event_cause(hdev, event_cause, clearval);
hclge_enable_vector(&hdev->misc_vector, true);
}
@@ -4027,11 +4094,11 @@ int hclge_rss_init_hw(struct hclge_dev *hdev)
struct hclge_vport *vport = hdev->vport;
u8 *rss_indir = vport[0].rss_indirection_tbl;
u16 rss_size = vport[0].alloc_rss_size;
+ u16 tc_offset[HCLGE_MAX_TC_NUM] = {0};
+ u16 tc_size[HCLGE_MAX_TC_NUM] = {0};
u8 *key = vport[0].rss_hash_key;
u8 hfunc = vport[0].rss_algo;
- u16 tc_offset[HCLGE_MAX_TC_NUM];
u16 tc_valid[HCLGE_MAX_TC_NUM];
- u16 tc_size[HCLGE_MAX_TC_NUM];
u16 roundup_size;
unsigned int i;
int ret;
@@ -5870,20 +5937,20 @@ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
int ret;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAC_MODE, false);
- hnae3_set_bit(loop_en, HCLGE_MAC_TX_EN_B, enable);
- hnae3_set_bit(loop_en, HCLGE_MAC_RX_EN_B, enable);
- hnae3_set_bit(loop_en, HCLGE_MAC_PAD_TX_B, enable);
- hnae3_set_bit(loop_en, HCLGE_MAC_PAD_RX_B, enable);
- hnae3_set_bit(loop_en, HCLGE_MAC_1588_TX_B, 0);
- hnae3_set_bit(loop_en, HCLGE_MAC_1588_RX_B, 0);
- hnae3_set_bit(loop_en, HCLGE_MAC_APP_LP_B, 0);
- hnae3_set_bit(loop_en, HCLGE_MAC_LINE_LP_B, 0);
- hnae3_set_bit(loop_en, HCLGE_MAC_FCS_TX_B, enable);
- hnae3_set_bit(loop_en, HCLGE_MAC_RX_FCS_B, enable);
- hnae3_set_bit(loop_en, HCLGE_MAC_RX_FCS_STRIP_B, enable);
- hnae3_set_bit(loop_en, HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B, enable);
- hnae3_set_bit(loop_en, HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B, enable);
- hnae3_set_bit(loop_en, HCLGE_MAC_TX_UNDER_MIN_ERR_B, enable);
+
+ if (enable) {
+ hnae3_set_bit(loop_en, HCLGE_MAC_TX_EN_B, 1U);
+ hnae3_set_bit(loop_en, HCLGE_MAC_RX_EN_B, 1U);
+ hnae3_set_bit(loop_en, HCLGE_MAC_PAD_TX_B, 1U);
+ hnae3_set_bit(loop_en, HCLGE_MAC_PAD_RX_B, 1U);
+ hnae3_set_bit(loop_en, HCLGE_MAC_FCS_TX_B, 1U);
+ hnae3_set_bit(loop_en, HCLGE_MAC_RX_FCS_B, 1U);
+ hnae3_set_bit(loop_en, HCLGE_MAC_RX_FCS_STRIP_B, 1U);
+ hnae3_set_bit(loop_en, HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B, 1U);
+ hnae3_set_bit(loop_en, HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B, 1U);
+ hnae3_set_bit(loop_en, HCLGE_MAC_TX_UNDER_MIN_ERR_B, 1U);
+ }
+
req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -6245,8 +6312,8 @@ static int hclge_update_desc_vfid(struct hclge_desc *desc, int vfid, bool clr)
{
#define HCLGE_VF_NUM_IN_FIRST_DESC 192
- int word_num;
- int bit_num;
+ unsigned int word_num;
+ unsigned int bit_num;
if (vfid > 255 || vfid < 0)
return -EIO;
@@ -7907,7 +7974,8 @@ static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id,
req = (struct hclge_reset_tqp_queue_cmd *)desc.data;
req->tqp_id = cpu_to_le16(queue_id & HCLGE_RING_ID_MASK);
- hnae3_set_bit(req->reset_req, HCLGE_TQP_RESET_B, enable);
+ if (enable)
+ hnae3_set_bit(req->reset_req, HCLGE_TQP_RESET_B, 1U);
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
if (ret) {
@@ -8115,8 +8183,9 @@ static void hclge_get_pauseparam(struct hnae3_handle *handle, u32 *auto_neg,
{
struct hclge_vport *vport = hclge_get_vport(handle);
struct hclge_dev *hdev = vport->back;
+ struct phy_device *phydev = hdev->hw.mac.phydev;
- *auto_neg = hclge_get_autoneg(handle);
+ *auto_neg = phydev ? hclge_get_autoneg(handle) : 0;
if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) {
*rx_en = 0;
@@ -8147,11 +8216,13 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg,
struct phy_device *phydev = hdev->hw.mac.phydev;
u32 fc_autoneg;
- fc_autoneg = hclge_get_autoneg(handle);
- if (auto_neg != fc_autoneg) {
- dev_info(&hdev->pdev->dev,
- "To change autoneg please use: ethtool -s <dev> autoneg <on|off>\n");
- return -EOPNOTSUPP;
+ if (phydev) {
+ fc_autoneg = hclge_get_autoneg(handle);
+ if (auto_neg != fc_autoneg) {
+ dev_info(&hdev->pdev->dev,
+ "To change autoneg please use: ethtool -s <dev> autoneg <on|off>\n");
+ return -EOPNOTSUPP;
+ }
}
if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) {
@@ -8162,16 +8233,13 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg,
hclge_set_flowctrl_adv(hdev, rx_en, tx_en);
- if (!fc_autoneg)
+ if (!auto_neg)
return hclge_cfg_pauseparam(hdev, rx_en, tx_en);
if (phydev)
return phy_start_aneg(phydev);
- if (hdev->pdev->revision == 0x20)
- return -EOPNOTSUPP;
-
- return hclge_restart_autoneg(handle);
+ return -EOPNOTSUPP;
}
static void hclge_get_ksettings_an_result(struct hnae3_handle *handle,
@@ -8971,12 +9039,12 @@ static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num,
{
struct hclge_vport *vport = hclge_get_vport(handle);
struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+ u16 tc_offset[HCLGE_MAX_TC_NUM] = {0};
struct hclge_dev *hdev = vport->back;
+ u16 tc_size[HCLGE_MAX_TC_NUM] = {0};
int cur_rss_size = kinfo->rss_size;
int cur_tqps = kinfo->num_tqps;
- u16 tc_offset[HCLGE_MAX_TC_NUM];
u16 tc_valid[HCLGE_MAX_TC_NUM];
- u16 tc_size[HCLGE_MAX_TC_NUM];
u16 roundup_size;
u32 *rss_indir;
unsigned int i;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 9adeba931902..a38ac7cfe16b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -29,6 +29,10 @@ static int hclge_gen_resp_to_vf(struct hclge_vport *vport,
"PF fail to gen resp to VF len %d exceeds max len %d\n",
resp_data_len,
HCLGE_MBX_MAX_RESP_DATA_SIZE);
+ /* If resp_data_len is too long, set the value to max length
+ * and return the msg to VF
+ */
+ resp_data_len = HCLGE_MBX_MAX_RESP_DATA_SIZE;
}
hclge_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_PF_TO_VF, false);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index d906d09bee72..abb1b438564e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -224,6 +224,13 @@ int hclge_mac_connect_phy(struct hnae3_handle *handle)
linkmode_and(phydev->supported, phydev->supported, mask);
linkmode_copy(phydev->advertising, phydev->supported);
+ /* supported flag is Pause and Asym Pause, but default advertising
+ * should be rx on, tx on, so need clear Asym Pause in advertising
+ * flag
+ */
+ linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
+ phydev->advertising);
+
return 0;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 9edae5f15ffb..3f41fa2bc414 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -58,7 +58,8 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
u32 tick;
/* Calc tick */
- if (shaper_level >= HCLGE_SHAPER_LVL_CNT)
+ if (shaper_level >= HCLGE_SHAPER_LVL_CNT ||
+ ir > HCLGE_ETHER_MAX_RATE)
return -EINVAL;
tick = tick_array[shaper_level];
@@ -597,8 +598,10 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev)
hdev->tm_info.prio_tc[i] =
(i >= hdev->tm_info.num_tc) ? 0 : i;
- /* DCB is enabled if we have more than 1 TC */
- if (hdev->tm_info.num_tc > 1)
+ /* DCB is enabled if we have more than 1 TC or pfc_en is
+ * non-zero.
+ */
+ if (hdev->tm_info.num_tc > 1 || hdev->tm_info.pfc_en)
hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
else
hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
@@ -1136,6 +1139,9 @@ static int hclge_tm_schd_mode_vnet_base_cfg(struct hclge_vport *vport)
int ret;
u8 i;
+ if (vport->vport_id >= HNAE3_MAX_TC)
+ return -EINVAL;
+
ret = hclge_tm_pri_schd_mode_cfg(hdev, vport->vport_id);
if (ret)
return ret;
@@ -1388,6 +1394,19 @@ void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc)
hclge_tm_schd_info_init(hdev);
}
+void hclge_tm_pfc_info_update(struct hclge_dev *hdev)
+{
+ /* DCB is enabled if we have more than 1 TC or pfc_en is
+ * non-zero.
+ */
+ if (hdev->tm_info.num_tc > 1 || hdev->tm_info.pfc_en)
+ hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
+ else
+ hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
+
+ hclge_pfc_info_init(hdev);
+}
+
int hclge_tm_init_hw(struct hclge_dev *hdev, bool init)
{
int ret;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index f60e540c7a62..818610988d34 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -12,7 +12,7 @@
#define HCLGE_TM_PORT_BASE_MODE_MSK BIT(0)
-#define HCLGE_DEFAULT_PAUSE_TRANS_GAP 0xFF
+#define HCLGE_DEFAULT_PAUSE_TRANS_GAP 0x7F
#define HCLGE_DEFAULT_PAUSE_TRANS_TIME 0xFFFF
/* SP or DWRR */
@@ -147,6 +147,7 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev, bool init);
int hclge_tm_schd_setup_hw(struct hclge_dev *hdev);
void hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc);
void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc);
+void hclge_tm_pfc_info_update(struct hclge_dev *hdev);
int hclge_tm_dwrr_cfg(struct hclge_dev *hdev);
int hclge_tm_init_hw(struct hclge_dev *hdev, bool init);
int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
index e1588c0e8bb9..652b796044e3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
@@ -177,6 +177,38 @@ void hclgevf_cmd_setup_basic_desc(struct hclgevf_desc *desc,
desc->flag &= cpu_to_le16(~HCLGEVF_CMD_FLAG_WR);
}
+static int hclgevf_cmd_convert_err_code(u16 desc_ret)
+{
+ switch (desc_ret) {
+ case HCLGEVF_CMD_EXEC_SUCCESS:
+ return 0;
+ case HCLGEVF_CMD_NO_AUTH:
+ return -EPERM;
+ case HCLGEVF_CMD_NOT_SUPPORTED:
+ return -EOPNOTSUPP;
+ case HCLGEVF_CMD_QUEUE_FULL:
+ return -EXFULL;
+ case HCLGEVF_CMD_NEXT_ERR:
+ return -ENOSR;
+ case HCLGEVF_CMD_UNEXE_ERR:
+ return -ENOTBLK;
+ case HCLGEVF_CMD_PARA_ERR:
+ return -EINVAL;
+ case HCLGEVF_CMD_RESULT_ERR:
+ return -ERANGE;
+ case HCLGEVF_CMD_TIMEOUT:
+ return -ETIME;
+ case HCLGEVF_CMD_HILINK_ERR:
+ return -ENOLINK;
+ case HCLGEVF_CMD_QUEUE_ILLEGAL:
+ return -ENXIO;
+ case HCLGEVF_CMD_INVALID:
+ return -EBADR;
+ default:
+ return -EIO;
+ }
+}
+
/* hclgevf_cmd_send - send command to command queue
* @hw: pointer to the hw struct
* @desc: prefilled descriptor for describing the command
@@ -188,6 +220,7 @@ void hclgevf_cmd_setup_basic_desc(struct hclgevf_desc *desc,
int hclgevf_cmd_send(struct hclgevf_hw *hw, struct hclgevf_desc *desc, int num)
{
struct hclgevf_dev *hdev = (struct hclgevf_dev *)hw->hdev;
+ struct hclgevf_cmq_ring *csq = &hw->cmq.csq;
struct hclgevf_desc *desc_to_use;
bool complete = false;
u32 timeout = 0;
@@ -199,8 +232,17 @@ int hclgevf_cmd_send(struct hclgevf_hw *hw, struct hclgevf_desc *desc, int num)
spin_lock_bh(&hw->cmq.csq.lock);
- if (num > hclgevf_ring_space(&hw->cmq.csq) ||
- test_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state)) {
+ if (test_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state)) {
+ spin_unlock_bh(&hw->cmq.csq.lock);
+ return -EBUSY;
+ }
+
+ if (num > hclgevf_ring_space(&hw->cmq.csq)) {
+ /* If CMDQ ring is full, SW HEAD and HW HEAD may be different,
+ * need update the SW HEAD pointer csq->next_to_clean
+ */
+ csq->next_to_clean = hclgevf_read_dev(hw,
+ HCLGEVF_NIC_CSQ_HEAD_REG);
spin_unlock_bh(&hw->cmq.csq.lock);
return -EBUSY;
}
@@ -249,11 +291,7 @@ int hclgevf_cmd_send(struct hclgevf_hw *hw, struct hclgevf_desc *desc, int num)
else
retval = le16_to_cpu(desc[0].retval);
- if ((enum hclgevf_cmd_return_status)retval ==
- HCLGEVF_CMD_EXEC_SUCCESS)
- status = 0;
- else
- status = -EIO;
+ status = hclgevf_cmd_convert_err_code(retval);
hw->cmq.last_status = (enum hclgevf_cmd_status)retval;
ntc++;
handle++;
@@ -263,14 +301,13 @@ int hclgevf_cmd_send(struct hclgevf_hw *hw, struct hclgevf_desc *desc, int num)
}
if (!complete)
- status = -EAGAIN;
+ status = -EBADE;
/* Clean the command send queue */
handle = hclgevf_cmd_csq_clean(hw);
- if (handle != num) {
+ if (handle != num)
dev_warn(&hdev->pdev->dev,
"cleaned %d, need to clean %d\n", handle, num);
- }
spin_unlock_bh(&hw->cmq.csq.lock);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
index 47030b42341f..127a434a56f3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
@@ -46,9 +46,17 @@ struct hclgevf_cmq_ring {
enum hclgevf_cmd_return_status {
HCLGEVF_CMD_EXEC_SUCCESS = 0,
- HCLGEVF_CMD_NO_AUTH = 1,
- HCLGEVF_CMD_NOT_EXEC = 2,
- HCLGEVF_CMD_QUEUE_FULL = 3,
+ HCLGEVF_CMD_NO_AUTH = 1,
+ HCLGEVF_CMD_NOT_SUPPORTED = 2,
+ HCLGEVF_CMD_QUEUE_FULL = 3,
+ HCLGEVF_CMD_NEXT_ERR = 4,
+ HCLGEVF_CMD_UNEXE_ERR = 5,
+ HCLGEVF_CMD_PARA_ERR = 6,
+ HCLGEVF_CMD_RESULT_ERR = 7,
+ HCLGEVF_CMD_TIMEOUT = 8,
+ HCLGEVF_CMD_HILINK_ERR = 9,
+ HCLGEVF_CMD_QUEUE_ILLEGAL = 10,
+ HCLGEVF_CMD_INVALID = 11,
};
enum hclgevf_cmd_status {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 21736e5024be..a13a0e101c3b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -11,6 +11,8 @@
#define HCLGEVF_NAME "hclgevf"
+#define HCLGEVF_RESET_MAX_FAIL_CNT 5
+
static int hclgevf_reset_hdev(struct hclgevf_dev *hdev);
static struct hnae3_ae_algo ae_algovf;
@@ -992,6 +994,8 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
u8 type;
req = (struct hclge_mbx_vf_to_pf_cmd *)desc.data;
+ type = en ? HCLGE_MBX_MAP_RING_TO_VECTOR :
+ HCLGE_MBX_UNMAP_RING_TO_VECTOR;
for (node = ring_chain; node; node = node->next) {
int idx_offset = HCLGE_MBX_RING_MAP_BASIC_MSG_NUM +
@@ -1001,9 +1005,6 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
hclgevf_cmd_setup_basic_desc(&desc,
HCLGEVF_OPC_MBX_VF_TO_PF,
false);
- type = en ?
- HCLGE_MBX_MAP_RING_TO_VECTOR :
- HCLGE_MBX_UNMAP_RING_TO_VECTOR;
req->msg[0] = type;
req->msg[1] = vector_id;
}
@@ -1481,6 +1482,24 @@ static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev)
return ret;
}
+static void hclgevf_reset_err_handle(struct hclgevf_dev *hdev)
+{
+ hdev->rst_stats.rst_fail_cnt++;
+ dev_err(&hdev->pdev->dev, "failed to reset VF(%d)\n",
+ hdev->rst_stats.rst_fail_cnt);
+
+ if (hdev->rst_stats.rst_fail_cnt < HCLGEVF_RESET_MAX_FAIL_CNT)
+ set_bit(hdev->reset_type, &hdev->reset_pending);
+
+ if (hclgevf_is_reset_pending(hdev)) {
+ set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
+ hclgevf_reset_task_schedule(hdev);
+ } else {
+ hclgevf_write_dev(&hdev->hw, HCLGEVF_NIC_CSQ_DEPTH_REG,
+ HCLGEVF_NIC_CMQ_ENABLE);
+ }
+}
+
static int hclgevf_reset(struct hclgevf_dev *hdev)
{
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
@@ -1537,19 +1556,13 @@ static int hclgevf_reset(struct hclgevf_dev *hdev)
hdev->last_reset_time = jiffies;
ae_dev->reset_type = HNAE3_NONE_RESET;
hdev->rst_stats.rst_done_cnt++;
+ hdev->rst_stats.rst_fail_cnt = 0;
return ret;
err_reset_lock:
rtnl_unlock();
err_reset:
- /* When VF reset failed, only the higher level reset asserted by PF
- * can restore it, so re-initialize the command queue to receive
- * this higher reset event.
- */
- hclgevf_cmd_init(hdev);
- dev_err(&hdev->pdev->dev, "failed to reset VF\n");
- if (hclgevf_is_reset_pending(hdev))
- hclgevf_reset_task_schedule(hdev);
+ hclgevf_reset_err_handle(hdev);
return ret;
}
@@ -2575,6 +2588,12 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev)
return ret;
}
+ if (pdev->revision >= 0x21) {
+ ret = hclgevf_set_promisc_mode(hdev, true);
+ if (ret)
+ return ret;
+ }
+
dev_info(&hdev->pdev->dev, "Reset done\n");
return 0;
@@ -2654,9 +2673,11 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
* firmware makes sure broadcast packets can be accepted.
* For revision 0x21, default to enable broadcast promisc mode.
*/
- ret = hclgevf_set_promisc_mode(hdev, true);
- if (ret)
- goto err_config;
+ if (pdev->revision >= 0x21) {
+ ret = hclgevf_set_promisc_mode(hdev, true);
+ if (ret)
+ goto err_config;
+ }
/* Initialize RSS for this VF */
ret = hclgevf_rss_init_hw(hdev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index b0ee986bfcc2..5a9e30998a8f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -226,6 +226,7 @@ struct hclgevf_rst_stats {
u32 vf_rst_cnt; /* the number of VF reset */
u32 rst_done_cnt; /* the number of reset completed */
u32 hw_rst_done_cnt; /* the number of HW reset completed */
+ u32 rst_fail_cnt; /* the number of VF reset fail */
};
struct hclgevf_dev {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
index 30f2e9352cf3..f60b80bd605e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
@@ -102,7 +102,8 @@ int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, u16 code, u16 subcode,
~HCLGE_MBX_NEED_RESP_BIT;
req->msg[0] = code;
req->msg[1] = subcode;
- memcpy(&req->msg[2], msg_data, msg_len);
+ if (msg_data)
+ memcpy(&req->msg[2], msg_data, msg_len);
/* synchronous send */
if (need_resp) {
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
index 8d98f37c88a8..73a20f01ad4c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
@@ -440,35 +440,6 @@ static u32 hinic_get_rxfh_indir_size(struct net_device *netdev)
#define ARRAY_LEN(arr) ((int)((int)sizeof(arr) / (int)sizeof(arr[0])))
-#define HINIC_NETDEV_STAT(_stat_item) { \
- .name = #_stat_item, \
- .size = FIELD_SIZEOF(struct rtnl_link_stats64, _stat_item), \
- .offset = offsetof(struct rtnl_link_stats64, _stat_item) \
-}
-
-static struct hinic_stats hinic_netdev_stats[] = {
- HINIC_NETDEV_STAT(rx_packets),
- HINIC_NETDEV_STAT(tx_packets),
- HINIC_NETDEV_STAT(rx_bytes),
- HINIC_NETDEV_STAT(tx_bytes),
- HINIC_NETDEV_STAT(rx_errors),
- HINIC_NETDEV_STAT(tx_errors),
- HINIC_NETDEV_STAT(rx_dropped),
- HINIC_NETDEV_STAT(tx_dropped),
- HINIC_NETDEV_STAT(multicast),
- HINIC_NETDEV_STAT(collisions),
- HINIC_NETDEV_STAT(rx_length_errors),
- HINIC_NETDEV_STAT(rx_over_errors),
- HINIC_NETDEV_STAT(rx_crc_errors),
- HINIC_NETDEV_STAT(rx_frame_errors),
- HINIC_NETDEV_STAT(rx_fifo_errors),
- HINIC_NETDEV_STAT(rx_missed_errors),
- HINIC_NETDEV_STAT(tx_aborted_errors),
- HINIC_NETDEV_STAT(tx_carrier_errors),
- HINIC_NETDEV_STAT(tx_fifo_errors),
- HINIC_NETDEV_STAT(tx_heartbeat_errors),
-};
-
#define HINIC_FUNC_STAT(_stat_item) { \
.name = #_stat_item, \
.size = FIELD_SIZEOF(struct hinic_vport_stats, _stat_item), \
@@ -658,20 +629,11 @@ static void hinic_get_ethtool_stats(struct net_device *netdev,
{
struct hinic_dev *nic_dev = netdev_priv(netdev);
struct hinic_vport_stats vport_stats = {0};
- const struct rtnl_link_stats64 *net_stats;
struct hinic_phy_port_stats *port_stats;
- struct rtnl_link_stats64 temp;
u16 i = 0, j = 0;
char *p;
int err;
- net_stats = dev_get_stats(netdev, &temp);
- for (j = 0; j < ARRAY_LEN(hinic_netdev_stats); j++, i++) {
- p = (char *)net_stats + hinic_netdev_stats[j].offset;
- data[i] = (hinic_netdev_stats[j].size ==
- sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
- }
-
err = hinic_get_vport_stats(nic_dev, &vport_stats);
if (err)
netif_err(nic_dev, drv, netdev,
@@ -716,8 +678,7 @@ static int hinic_get_sset_count(struct net_device *netdev, int sset)
switch (sset) {
case ETH_SS_STATS:
q_num = nic_dev->num_qps;
- count = ARRAY_LEN(hinic_netdev_stats) +
- ARRAY_LEN(hinic_function_stats) +
+ count = ARRAY_LEN(hinic_function_stats) +
(ARRAY_LEN(hinic_tx_queue_stats) +
ARRAY_LEN(hinic_rx_queue_stats)) * q_num;
@@ -738,12 +699,6 @@ static void hinic_get_strings(struct net_device *netdev,
switch (stringset) {
case ETH_SS_STATS:
- for (i = 0; i < ARRAY_LEN(hinic_netdev_stats); i++) {
- memcpy(p, hinic_netdev_stats[i].name,
- ETH_GSTRING_LEN);
- p += ETH_GSTRING_LEN;
- }
-
for (i = 0; i < ARRAY_LEN(hinic_function_stats); i++) {
memcpy(p, hinic_function_stats[i].name,
ETH_GSTRING_LEN);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
index e83e3bf850d5..984c98f33258 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
@@ -45,6 +45,8 @@ enum hinic_port_cmd {
HINIC_PORT_CMD_SET_RX_CSUM = 26,
+ HINIC_PORT_CMD_SET_RX_VLAN_OFFLOAD = 27,
+
HINIC_PORT_CMD_GET_PORT_STATISTICS = 28,
HINIC_PORT_CMD_CLEAR_PORT_STATISTICS = 29,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
index c6b809e24983..f4b6d2c1061f 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
@@ -222,6 +222,8 @@
#define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_SHIFT 0
#define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_MASK 0xFFFU
+#define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_SHIFT 21
+#define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_MASK 0x1U
#define RQ_CQE_OFFOLAD_TYPE_GET(val, member) (((val) >> \
RQ_CQE_OFFOLAD_TYPE_##member##_SHIFT) & \
@@ -230,6 +232,19 @@
#define HINIC_GET_RX_PKT_TYPE(offload_type) \
RQ_CQE_OFFOLAD_TYPE_GET(offload_type, PKT_TYPE)
+#define HINIC_GET_RX_VLAN_OFFLOAD_EN(offload_type) \
+ RQ_CQE_OFFOLAD_TYPE_GET(offload_type, VLAN_EN)
+
+#define RQ_CQE_SGE_VLAN_MASK 0xFFFFU
+#define RQ_CQE_SGE_VLAN_SHIFT 0
+
+#define RQ_CQE_SGE_GET(val, member) (((val) >> \
+ RQ_CQE_SGE_##member##_SHIFT) & \
+ RQ_CQE_SGE_##member##_MASK)
+
+#define HINIC_GET_RX_VLAN_TAG(vlan_len) \
+ RQ_CQE_SGE_GET(vlan_len, VLAN)
+
#define HINIC_RSS_TYPE_VALID_SHIFT 23
#define HINIC_RSS_TYPE_TCP_IPV6_EXT_SHIFT 24
#define HINIC_RSS_TYPE_IPV6_EXT_SHIFT 25
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 1b917543feac..2411ad270c98 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -256,37 +256,43 @@ static int hinic_configure_max_qnum(struct hinic_dev *nic_dev)
static int hinic_rss_init(struct hinic_dev *nic_dev)
{
- u32 indir_tbl[HINIC_RSS_INDIR_SIZE] = { 0 };
u8 default_rss_key[HINIC_RSS_KEY_SIZE];
u8 tmpl_idx = nic_dev->rss_tmpl_idx;
+ u32 *indir_tbl;
int err, i;
+ indir_tbl = kcalloc(HINIC_RSS_INDIR_SIZE, sizeof(u32), GFP_KERNEL);
+ if (!indir_tbl)
+ return -ENOMEM;
+
netdev_rss_key_fill(default_rss_key, sizeof(default_rss_key));
for (i = 0; i < HINIC_RSS_INDIR_SIZE; i++)
indir_tbl[i] = ethtool_rxfh_indir_default(i, nic_dev->num_rss);
err = hinic_rss_set_template_tbl(nic_dev, tmpl_idx, default_rss_key);
if (err)
- return err;
+ goto out;
err = hinic_rss_set_indir_tbl(nic_dev, tmpl_idx, indir_tbl);
if (err)
- return err;
+ goto out;
err = hinic_set_rss_type(nic_dev, tmpl_idx, nic_dev->rss_type);
if (err)
- return err;
+ goto out;
err = hinic_rss_set_hash_engine(nic_dev, tmpl_idx,
nic_dev->rss_hash_engine);
if (err)
- return err;
+ goto out;
err = hinic_rss_cfg(nic_dev, 1, tmpl_idx);
if (err)
- return err;
+ goto out;
- return 0;
+out:
+ kfree(indir_tbl);
+ return err;
}
static void hinic_rss_deinit(struct hinic_dev *nic_dev)
@@ -830,14 +836,14 @@ static const struct net_device_ops hinic_netdev_ops = {
.ndo_get_stats64 = hinic_get_stats64,
.ndo_fix_features = hinic_fix_features,
.ndo_set_features = hinic_set_features,
-
};
static void netdev_features_init(struct net_device *netdev)
{
netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM |
NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 |
- NETIF_F_RXCSUM | NETIF_F_LRO;
+ NETIF_F_RXCSUM | NETIF_F_LRO |
+ NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
netdev->vlan_features = netdev->hw_features;
@@ -917,6 +923,11 @@ static int set_features(struct hinic_dev *nic_dev,
HINIC_LRO_MAX_WQE_NUM_DEFAULT);
}
+ if (changed & NETIF_F_HW_VLAN_CTAG_RX)
+ err = hinic_set_rx_vlan_offload(nic_dev,
+ !!(features &
+ NETIF_F_HW_VLAN_CTAG_RX));
+
return err;
}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c
index c07adf793215..1bbeb91be808 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c
@@ -431,6 +431,36 @@ int hinic_set_rx_csum_offload(struct hinic_dev *nic_dev, u32 en)
return 0;
}
+int hinic_set_rx_vlan_offload(struct hinic_dev *nic_dev, u8 en)
+{
+ struct hinic_hwdev *hwdev = nic_dev->hwdev;
+ struct hinic_vlan_cfg vlan_cfg;
+ struct hinic_hwif *hwif;
+ struct pci_dev *pdev;
+ u16 out_size;
+ int err;
+
+ if (!hwdev)
+ return -EINVAL;
+
+ hwif = hwdev->hwif;
+ pdev = hwif->pdev;
+ vlan_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+ vlan_cfg.vlan_rx_offload = en;
+
+ err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_RX_VLAN_OFFLOAD,
+ &vlan_cfg, sizeof(vlan_cfg),
+ &vlan_cfg, &out_size);
+ if (err || !out_size || vlan_cfg.status) {
+ dev_err(&pdev->dev,
+ "Failed to set rx vlan offload, err: %d, status: 0x%x, out size: 0x%x\n",
+ err, vlan_cfg.status, out_size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int hinic_set_max_qnum(struct hinic_dev *nic_dev, u8 num_rqs)
{
struct hinic_hwdev *hwdev = nic_dev->hwdev;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.h b/drivers/net/ethernet/huawei/hinic/hinic_port.h
index 16140a13000b..1bc47c7a5c00 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.h
@@ -223,6 +223,16 @@ struct hinic_lro_timer {
u32 timer;
};
+struct hinic_vlan_cfg {
+ u8 status;
+ u8 version;
+ u8 rsvd0[6];
+
+ u16 func_id;
+ u8 vlan_rx_offload;
+ u8 rsvd1[5];
+};
+
struct hinic_rss_template_mgmt {
u8 status;
u8 version;
@@ -558,4 +568,7 @@ int hinic_get_phy_port_stats(struct hinic_dev *nic_dev,
int hinic_get_vport_stats(struct hinic_dev *nic_dev,
struct hinic_vport_stats *stats);
+
+int hinic_set_rx_vlan_offload(struct hinic_dev *nic_dev, u8 en);
+
#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
index 609ad4333cdd..56ea6d692f1c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
@@ -18,6 +18,7 @@
#include <linux/dma-mapping.h>
#include <linux/prefetch.h>
#include <linux/cpumask.h>
+#include <linux/if_vlan.h>
#include <asm/barrier.h>
#include "hinic_common.h"
@@ -325,6 +326,7 @@ static int rx_recv_jumbo_pkt(struct hinic_rxq *rxq, struct sk_buff *head_skb,
static int rxq_recv(struct hinic_rxq *rxq, int budget)
{
struct hinic_qp *qp = container_of(rxq->rq, struct hinic_qp, rq);
+ struct net_device *netdev = rxq->netdev;
u64 pkt_len = 0, rx_bytes = 0;
struct hinic_rq *rq = rxq->rq;
struct hinic_rq_wqe *rq_wqe;
@@ -334,8 +336,11 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget)
struct hinic_sge sge;
unsigned int status;
struct sk_buff *skb;
+ u32 offload_type;
u16 ci, num_lro;
u16 num_wqe = 0;
+ u32 vlan_len;
+ u16 vid;
while (pkts < budget) {
num_wqes = 0;
@@ -368,6 +373,14 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget)
hinic_rq_put_wqe(rq, ci,
(num_wqes + 1) * HINIC_RQ_WQE_SIZE);
+ offload_type = be32_to_cpu(cqe->offload_type);
+ vlan_len = be32_to_cpu(cqe->len);
+ if ((netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+ HINIC_GET_RX_VLAN_OFFLOAD_EN(offload_type)) {
+ vid = HINIC_GET_RX_VLAN_TAG(vlan_len);
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+ }
+
skb_record_rx_queue(skb, qp->q_id);
skb->protocol = eth_type_trans(skb, rxq->netdev);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index f4f76370cd65..9c78251f9c39 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -407,10 +407,20 @@ static int offload_csum(struct hinic_sq_task *task, u32 *queue_info,
return 1;
}
+static void offload_vlan(struct hinic_sq_task *task, u32 *queue_info,
+ u16 vlan_tag, u16 vlan_pri)
+{
+ task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(vlan_tag, VLAN_TAG) |
+ HINIC_SQ_TASK_INFO0_SET(1U, VLAN_OFFLOAD);
+
+ *queue_info |= HINIC_SQ_CTRL_SET(vlan_pri, QUEUE_INFO_PRI);
+}
+
static int hinic_tx_offload(struct sk_buff *skb, struct hinic_sq_task *task,
u32 *queue_info)
{
enum hinic_offload_type offload = 0;
+ u16 vlan_tag;
int enabled;
enabled = offload_tso(task, queue_info, skb);
@@ -424,6 +434,13 @@ static int hinic_tx_offload(struct sk_buff *skb, struct hinic_sq_task *task,
return -EPROTONOSUPPORT;
}
+ if (unlikely(skb_vlan_tag_present(skb))) {
+ vlan_tag = skb_vlan_tag_get(skb);
+ offload_vlan(task, queue_info, vlan_tag,
+ vlan_tag >> VLAN_PRIO_SHIFT);
+ offload |= TX_OFFLOAD_VLAN;
+ }
+
if (offload)
hinic_task_set_l2hdr(task, skb_network_offset(skb));
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 551de8c2fef2..f703fa58458e 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -3019,7 +3019,7 @@ static void e1000_tx_queue(struct e1000_adapter *adapter,
* applicable for weak-ordered memory model archs,
* such as IA-64).
*/
- wmb();
+ dma_wmb();
tx_ring->next_to_use = i;
}
@@ -4540,7 +4540,7 @@ e1000_alloc_jumbo_rx_buffers(struct e1000_adapter *adapter,
* applicable for weak-ordered memory model archs,
* such as IA-64).
*/
- wmb();
+ dma_wmb();
writel(i, adapter->hw.hw_addr + rx_ring->rdt);
}
}
@@ -4655,7 +4655,7 @@ static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
* applicable for weak-ordered memory model archs,
* such as IA-64).
*/
- wmb();
+ dma_wmb();
writel(i, hw->hw_addr + rx_ring->rdt);
}
}
diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
index f86d55657959..4b103cca8a39 100644
--- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c
+++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
@@ -680,7 +680,7 @@ static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw)
ew32(TCTL, E1000_TCTL_PSP);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
ctrl = er32(CTRL);
diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
index b9309302c29e..2c1bab377b2a 100644
--- a/drivers/net/ethernet/intel/e1000e/82571.c
+++ b/drivers/net/ethernet/intel/e1000e/82571.c
@@ -959,7 +959,7 @@ static s32 e1000_reset_hw_82571(struct e1000_hw *hw)
ew32(TCTL, tctl);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
/* Must acquire the MDIO ownership before MAC reset.
* Ownership defaults to firmware after a reset.
diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
index fd550dee4982..63c3c79380a1 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -222,6 +222,9 @@
#define E1000_STATUS_PHYRA 0x00000400 /* PHY Reset Asserted */
#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Master Req status */
+/* PCIm function state */
+#define E1000_STATUS_PCIM_STATE 0x40000000
+
#define HALF_DUPLEX 1
#define FULL_DUPLEX 2
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index be13227f1697..34cd67951aec 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -186,12 +186,13 @@ struct e1000_phy_regs {
/* board specific private data structure */
struct e1000_adapter {
- struct timer_list watchdog_timer;
struct timer_list phy_info_timer;
struct timer_list blink_timer;
struct work_struct reset_task;
- struct work_struct watchdog_task;
+ struct delayed_work watchdog_task;
+
+ struct workqueue_struct *e1000_workqueue;
const struct e1000_info *ei;
diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index 02ebf208f48b..08342698386d 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -1014,7 +1014,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
/* Disable all the interrupts */
ew32(IMC, 0xFFFFFFFF);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
/* Test each interrupt */
for (i = 0; i < 10; i++) {
@@ -1046,7 +1046,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
ew32(IMC, mask);
ew32(ICS, mask);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
if (adapter->test_icr & mask) {
*data = 3;
@@ -1064,7 +1064,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
ew32(IMS, mask);
ew32(ICS, mask);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
if (!(adapter->test_icr & mask)) {
*data = 4;
@@ -1082,7 +1082,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
ew32(IMC, ~mask & 0x00007FFF);
ew32(ICS, ~mask & 0x00007FFF);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
if (adapter->test_icr) {
*data = 5;
@@ -1094,7 +1094,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
/* Disable all the interrupts */
ew32(IMC, 0xFFFFFFFF);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
/* Unhook test interrupt handler */
free_irq(irq, netdev);
@@ -1470,7 +1470,7 @@ static int e1000_set_82571_fiber_loopback(struct e1000_adapter *adapter)
*/
ew32(SCTL, E1000_SCTL_ENABLE_SERDES_LOOPBACK);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
return 0;
}
@@ -1584,7 +1584,7 @@ static void e1000_loopback_cleanup(struct e1000_adapter *adapter)
hw->phy.media_type == e1000_media_type_internal_serdes) {
ew32(SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
break;
}
/* Fall Through */
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index cdae0efde8e6..395b05701480 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -271,7 +271,7 @@ static void e1000_toggle_lanphypc_pch_lpt(struct e1000_hw *hw)
u16 count = 20;
do {
- usleep_range(5000, 10000);
+ usleep_range(5000, 6000);
} while (!(er32(CTRL_EXT) & E1000_CTRL_EXT_LPCD) && count--);
msleep(30);
@@ -405,7 +405,7 @@ out:
/* Ungate automatic PHY configuration on non-managed 82579 */
if ((hw->mac.type == e1000_pch2lan) &&
!(fwsm & E1000_ICH_FWSM_FW_VALID)) {
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
e1000_gate_hw_phy_config_ich8lan(hw, false);
}
@@ -531,7 +531,7 @@ static s32 e1000_init_phy_params_ich8lan(struct e1000_hw *hw)
phy->id = 0;
while ((e1000_phy_unknown == e1000e_get_phy_type_from_id(phy->id)) &&
(i++ < 100)) {
- usleep_range(1000, 2000);
+ usleep_range(1000, 1100);
ret_val = e1000e_get_phy_id(hw);
if (ret_val)
return ret_val;
@@ -1244,7 +1244,7 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
goto out;
}
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
}
e_dbg("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10);
@@ -1999,7 +1999,7 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw)
while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) &&
(i++ < 30))
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
return blocked ? E1000_BLK_PHY_RESET : 0;
}
@@ -2818,7 +2818,7 @@ static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw)
return 0;
/* Allow time for h/w to get to quiescent state after reset */
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
/* Perform any necessary post-reset workarounds */
switch (hw->mac.type) {
@@ -2854,7 +2854,7 @@ static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw)
if (hw->mac.type == e1000_pch2lan) {
/* Ungate automatic PHY configuration on non-managed 82579 */
if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
e1000_gate_hw_phy_config_ich8lan(hw, false);
}
@@ -3875,7 +3875,7 @@ release:
*/
if (!ret_val) {
nvm->ops.reload(hw);
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
}
out:
@@ -4026,7 +4026,7 @@ release:
*/
if (!ret_val) {
nvm->ops.reload(hw);
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
}
out:
@@ -4650,7 +4650,7 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw)
ew32(TCTL, E1000_TCTL_PSP);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
/* Workaround for ICH8 bit corruption issue in FIFO memory */
if (hw->mac.type == e1000_ich8lan) {
diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
index 4abd55d646c5..e531976f8a67 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.c
+++ b/drivers/net/ethernet/intel/e1000e/mac.c
@@ -797,7 +797,7 @@ static s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw)
* milliseconds even if the other end is doing it in SW).
*/
for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) {
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
status = er32(STATUS);
if (status & E1000_STATUS_LU)
break;
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index b081a1ef6859..e4baa13b3cda 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1780,7 +1780,8 @@ static irqreturn_t e1000_intr_msi(int __always_unused irq, void *data)
}
/* guard against interrupt when we're going down */
if (!test_bit(__E1000_DOWN, &adapter->state))
- mod_timer(&adapter->watchdog_timer, jiffies + 1);
+ queue_delayed_work(adapter->e1000_workqueue,
+ &adapter->watchdog_task, 1);
}
/* Reset on uncorrectable ECC error */
@@ -1860,7 +1861,8 @@ static irqreturn_t e1000_intr(int __always_unused irq, void *data)
}
/* guard against interrupt when we're going down */
if (!test_bit(__E1000_DOWN, &adapter->state))
- mod_timer(&adapter->watchdog_timer, jiffies + 1);
+ queue_delayed_work(adapter->e1000_workqueue,
+ &adapter->watchdog_task, 1);
}
/* Reset on uncorrectable ECC error */
@@ -1905,7 +1907,8 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
hw->mac.get_link_status = true;
/* guard against interrupt when we're going down */
if (!test_bit(__E1000_DOWN, &adapter->state))
- mod_timer(&adapter->watchdog_timer, jiffies + 1);
+ queue_delayed_work(adapter->e1000_workqueue,
+ &adapter->watchdog_task, 1);
}
if (!test_bit(__E1000_DOWN, &adapter->state))
@@ -3208,7 +3211,7 @@ static void e1000_configure_rx(struct e1000_adapter *adapter)
if (!(adapter->flags2 & FLAG2_NO_DISABLE_RX))
ew32(RCTL, rctl & ~E1000_RCTL_EN);
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
if (adapter->flags2 & FLAG2_DMA_BURST) {
/* set the writeback threshold (only takes effect if the RDTR
@@ -4046,12 +4049,12 @@ void e1000e_reset(struct e1000_adapter *adapter)
case e1000_pch_lpt:
case e1000_pch_spt:
case e1000_pch_cnp:
- fc->refresh_time = 0x0400;
+ fc->refresh_time = 0xFFFF;
+ fc->pause_time = 0xFFFF;
if (adapter->netdev->mtu <= ETH_DATA_LEN) {
fc->high_water = 0x05C20;
fc->low_water = 0x05048;
- fc->pause_time = 0x0650;
break;
}
@@ -4272,13 +4275,12 @@ void e1000e_down(struct e1000_adapter *adapter, bool reset)
/* flush both disables and wait for them to finish */
e1e_flush();
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
e1000_irq_disable(adapter);
napi_synchronize(&adapter->napi);
- del_timer_sync(&adapter->watchdog_timer);
del_timer_sync(&adapter->phy_info_timer);
spin_lock(&adapter->stats64_lock);
@@ -4310,7 +4312,7 @@ void e1000e_reinit_locked(struct e1000_adapter *adapter)
{
might_sleep();
while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
- usleep_range(1000, 2000);
+ usleep_range(1000, 1100);
e1000e_down(adapter, true);
e1000e_up(adapter);
clear_bit(__E1000_RESETTING, &adapter->state);
@@ -4707,7 +4709,7 @@ int e1000e_close(struct net_device *netdev)
int count = E1000_CHECK_RESET_COUNT;
while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
@@ -5150,31 +5152,18 @@ static void e1000e_check_82574_phy_workaround(struct e1000_adapter *adapter)
}
}
-/**
- * e1000_watchdog - Timer Call-back
- * @data: pointer to adapter cast into an unsigned long
- **/
-static void e1000_watchdog(struct timer_list *t)
-{
- struct e1000_adapter *adapter = from_timer(adapter, t, watchdog_timer);
-
- /* Do the rest outside of interrupt context */
- schedule_work(&adapter->watchdog_task);
-
- /* TODO: make this use queue_delayed_work() */
-}
-
static void e1000_watchdog_task(struct work_struct *work)
{
struct e1000_adapter *adapter = container_of(work,
struct e1000_adapter,
- watchdog_task);
+ watchdog_task.work);
struct net_device *netdev = adapter->netdev;
struct e1000_mac_info *mac = &adapter->hw.mac;
struct e1000_phy_info *phy = &adapter->hw.phy;
struct e1000_ring *tx_ring = adapter->tx_ring;
+ u32 dmoff_exit_timeout = 100, tries = 0;
struct e1000_hw *hw = &adapter->hw;
- u32 link, tctl;
+ u32 link, tctl, pcim_state;
if (test_bit(__E1000_DOWN, &adapter->state))
return;
@@ -5199,6 +5188,21 @@ static void e1000_watchdog_task(struct work_struct *work)
/* Cancel scheduled suspend requests. */
pm_runtime_resume(netdev->dev.parent);
+ /* Checking if MAC is in DMoff state*/
+ pcim_state = er32(STATUS);
+ while (pcim_state & E1000_STATUS_PCIM_STATE) {
+ if (tries++ == dmoff_exit_timeout) {
+ e_dbg("Error in exiting dmoff\n");
+ break;
+ }
+ usleep_range(10000, 20000);
+ pcim_state = er32(STATUS);
+
+ /* Checking if MAC exited DMoff state */
+ if (!(pcim_state & E1000_STATUS_PCIM_STATE))
+ e1000_phy_hw_reset(&adapter->hw);
+ }
+
/* update snapshot of PHY registers on LSC */
e1000_phy_read_status(adapter);
mac->ops.get_link_up_info(&adapter->hw,
@@ -5400,8 +5404,9 @@ link_up:
/* Reset the timer */
if (!test_bit(__E1000_DOWN, &adapter->state))
- mod_timer(&adapter->watchdog_timer,
- round_jiffies(jiffies + 2 * HZ));
+ queue_delayed_work(adapter->e1000_workqueue,
+ &adapter->watchdog_task,
+ round_jiffies(2 * HZ));
}
#define E1000_TX_FLAGS_CSUM 0x00000001
@@ -6021,7 +6026,7 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu)
}
while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
- usleep_range(1000, 2000);
+ usleep_range(1000, 1100);
/* e1000e_down -> e1000e_reset dependent on max_frame_size & mtu */
adapter->max_frame_size = max_frame;
e_info("changing MTU from %d to %d\n", netdev->mtu, new_mtu);
@@ -6301,7 +6306,7 @@ static int e1000e_pm_freeze(struct device *dev)
int count = E1000_CHECK_RESET_COUNT;
while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
@@ -6716,7 +6721,7 @@ static int e1000e_pm_runtime_suspend(struct device *dev)
int count = E1000_CHECK_RESET_COUNT;
while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
@@ -7256,11 +7261,21 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_eeprom;
}
- timer_setup(&adapter->watchdog_timer, e1000_watchdog, 0);
+ adapter->e1000_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0,
+ e1000e_driver_name);
+
+ if (!adapter->e1000_workqueue) {
+ err = -ENOMEM;
+ goto err_workqueue;
+ }
+
+ INIT_DELAYED_WORK(&adapter->watchdog_task, e1000_watchdog_task);
+ queue_delayed_work(adapter->e1000_workqueue, &adapter->watchdog_task,
+ 0);
+
timer_setup(&adapter->phy_info_timer, e1000_update_phy_info, 0);
INIT_WORK(&adapter->reset_task, e1000_reset_task);
- INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task);
INIT_WORK(&adapter->downshift_task, e1000e_downshift_workaround);
INIT_WORK(&adapter->update_phy_task, e1000e_update_phy_task);
INIT_WORK(&adapter->print_hang_task, e1000_print_hw_hang);
@@ -7354,6 +7369,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
return 0;
err_register:
+ flush_workqueue(adapter->e1000_workqueue);
+ destroy_workqueue(adapter->e1000_workqueue);
+err_workqueue:
if (!(adapter->flags & FLAG_HAS_AMT))
e1000e_release_hw_control(adapter);
err_eeprom:
@@ -7400,15 +7418,17 @@ static void e1000_remove(struct pci_dev *pdev)
*/
if (!down)
set_bit(__E1000_DOWN, &adapter->state);
- del_timer_sync(&adapter->watchdog_timer);
del_timer_sync(&adapter->phy_info_timer);
cancel_work_sync(&adapter->reset_task);
- cancel_work_sync(&adapter->watchdog_task);
cancel_work_sync(&adapter->downshift_task);
cancel_work_sync(&adapter->update_phy_task);
cancel_work_sync(&adapter->print_hang_task);
+ cancel_delayed_work(&adapter->watchdog_task);
+ flush_workqueue(adapter->e1000_workqueue);
+ destroy_workqueue(adapter->e1000_workqueue);
+
if (adapter->flags & FLAG_HAS_HW_TIMESTAMP) {
cancel_work_sync(&adapter->tx_hwtstamp_work);
if (adapter->tx_hwtstamp_skb) {
diff --git a/drivers/net/ethernet/intel/e1000e/nvm.c b/drivers/net/ethernet/intel/e1000e/nvm.c
index 937f9af22d26..e609f4df86f4 100644
--- a/drivers/net/ethernet/intel/e1000e/nvm.c
+++ b/drivers/net/ethernet/intel/e1000e/nvm.c
@@ -392,7 +392,7 @@ s32 e1000e_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
break;
}
}
- usleep_range(10000, 20000);
+ usleep_range(10000, 11000);
nvm->ops.release(hw);
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 8dc98d1d2e86..84bd06901014 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -27,6 +27,7 @@
#include <net/ip6_checksum.h>
#include <linux/ethtool.h>
#include <linux/if_vlan.h>
+#include <linux/if_macvlan.h>
#include <linux/if_bridge.h>
#include <linux/clocksource.h>
#include <linux/net_tstamp.h>
@@ -412,6 +413,11 @@ struct i40e_flex_pit {
u8 pit_index;
};
+struct i40e_fwd_adapter {
+ struct net_device *netdev;
+ int bit_no;
+};
+
struct i40e_channel {
struct list_head list;
bool initialized;
@@ -426,11 +432,25 @@ struct i40e_channel {
struct i40e_aqc_vsi_properties_data info;
u64 max_tx_rate;
+ struct i40e_fwd_adapter *fwd;
/* track this channel belongs to which VSI */
struct i40e_vsi *parent_vsi;
};
+static inline bool i40e_is_channel_macvlan(struct i40e_channel *ch)
+{
+ return !!ch->fwd;
+}
+
+static inline u8 *i40e_channel_mac(struct i40e_channel *ch)
+{
+ if (i40e_is_channel_macvlan(ch))
+ return ch->fwd->netdev->dev_addr;
+ else
+ return NULL;
+}
+
/* struct that defines the Ethernet device */
struct i40e_pf {
struct pci_dev *pdev;
@@ -775,7 +795,8 @@ struct i40e_vsi {
u16 alloc_queue_pairs; /* Allocated Tx/Rx queues */
u16 req_queue_pairs; /* User requested queue pairs */
u16 num_queue_pairs; /* Used tx and rx pairs */
- u16 num_desc;
+ u16 num_tx_desc;
+ u16 num_rx_desc;
enum i40e_vsi_type type; /* VSI type, e.g., LAN, FCoE, etc */
s16 vf_id; /* Virtual function ID for SRIOV VSIs */
@@ -812,6 +833,13 @@ struct i40e_vsi {
struct list_head ch_list;
u16 tc_seid_map[I40E_MAX_TRAFFIC_CLASS];
+ /* macvlan fields */
+#define I40E_MAX_MACVLANS 128 /* Max HW vectors - 1 on FVL */
+#define I40E_MIN_MACVLAN_VECTORS 2 /* Min vectors to enable macvlans */
+ DECLARE_BITMAP(fwd_bitmask, I40E_MAX_MACVLANS);
+ struct list_head macvlan_list;
+ int macvlan_cnt;
+
void *priv; /* client driver data reference. */
/* VSI specific handlers */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 641b500ad919..906cf68d3453 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1861,8 +1861,7 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw,
hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE)
hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU;
- if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
- hw->aq.api_min_ver >= 7) {
+ if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
__le32 tmp;
memcpy(&tmp, resp->link_type, sizeof(tmp));
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index dc5b40013e61..55d20acfcf70 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -333,8 +333,9 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
" seid = %d, id = %d, uplink_seid = %d\n",
vsi->seid, vsi->id, vsi->uplink_seid);
dev_info(&pf->pdev->dev,
- " base_queue = %d, num_queue_pairs = %d, num_desc = %d\n",
- vsi->base_queue, vsi->num_queue_pairs, vsi->num_desc);
+ " base_queue = %d, num_queue_pairs = %d, num_tx_desc = %d, num_rx_desc = %d\n",
+ vsi->base_queue, vsi->num_queue_pairs, vsi->num_tx_desc,
+ vsi->num_rx_desc);
dev_info(&pf->pdev->dev, " type = %i\n", vsi->type);
if (vsi->type == I40E_VSI_SRIOV)
dev_info(&pf->pdev->dev, " VF ID = %i\n", vsi->vf_id);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index a6c5e10421dd..527eb52c5401 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1982,6 +1982,8 @@ static int i40e_set_ringparam(struct net_device *netdev,
if (i40e_enabled_xdp_vsi(vsi))
vsi->xdp_rings[i]->count = new_tx_count;
}
+ vsi->num_tx_desc = new_tx_count;
+ vsi->num_rx_desc = new_rx_count;
goto done;
}
@@ -2118,6 +2120,8 @@ rx_unwind:
rx_rings = NULL;
}
+ vsi->num_tx_desc = new_tx_count;
+ vsi->num_rx_desc = new_rx_count;
i40e_up(vsi);
free_tx:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 7c43ec533385..5361c08328f7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -32,7 +32,7 @@ static const char i40e_driver_string[] =
__stringify(DRV_VERSION_MINOR) "." \
__stringify(DRV_VERSION_BUILD) DRV_KERN
const char i40e_driver_version_str[] = DRV_VERSION;
-static const char i40e_copyright[] = "Copyright (c) 2013 - 2014 Intel Corporation.";
+static const char i40e_copyright[] = "Copyright (c) 2013 - 2019 Intel Corporation.";
/* a bit of forward declarations */
static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi);
@@ -5861,8 +5861,10 @@ static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid,
return -ENOENT;
}
- /* Success, update channel */
- ch->enabled_tc = enabled_tc;
+ /* Success, update channel, set enabled_tc only if the channel
+ * is not a macvlan
+ */
+ ch->enabled_tc = !i40e_is_channel_macvlan(ch) && enabled_tc;
ch->seid = ctxt.seid;
ch->vsi_number = ctxt.vsi_number;
ch->stat_counter_idx = cpu_to_le16(ctxt.info.stat_counter_idx);
@@ -6410,6 +6412,50 @@ static int i40e_resume_port_tx(struct i40e_pf *pf)
}
/**
+ * i40e_update_dcb_config
+ * @hw: pointer to the HW struct
+ * @enable_mib_change: enable MIB change event
+ *
+ * Update DCB configuration from the firmware
+ **/
+static enum i40e_status_code
+i40e_update_dcb_config(struct i40e_hw *hw, bool enable_mib_change)
+{
+ struct i40e_lldp_variables lldp_cfg;
+ i40e_status ret;
+
+ if (!hw->func_caps.dcb)
+ return I40E_NOT_SUPPORTED;
+
+ /* Read LLDP NVM area */
+ ret = i40e_read_lldp_cfg(hw, &lldp_cfg);
+ if (ret)
+ return I40E_ERR_NOT_READY;
+
+ /* Get DCBX status */
+ ret = i40e_get_dcbx_status(hw, &hw->dcbx_status);
+ if (ret)
+ return ret;
+
+ /* Check the DCBX Status */
+ if (hw->dcbx_status == I40E_DCBX_STATUS_DONE ||
+ hw->dcbx_status == I40E_DCBX_STATUS_IN_PROGRESS) {
+ /* Get current DCBX configuration */
+ ret = i40e_get_dcb_config(hw);
+ if (ret)
+ return ret;
+ } else if (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED) {
+ return I40E_ERR_NOT_READY;
+ }
+
+ /* Configure the LLDP MIB change event */
+ if (enable_mib_change)
+ ret = i40e_aq_cfg_lldp_mib_change_event(hw, true, NULL);
+
+ return ret;
+}
+
+/**
* i40e_init_pf_dcb - Initialize DCB configuration
* @pf: PF being configured
*
@@ -6425,11 +6471,13 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf)
* Also do not enable DCBx if FW LLDP agent is disabled
*/
if ((pf->hw_features & I40E_HW_NO_DCB_SUPPORT) ||
- (pf->flags & I40E_FLAG_DISABLE_FW_LLDP))
+ (pf->flags & I40E_FLAG_DISABLE_FW_LLDP)) {
+ dev_info(&pf->pdev->dev, "DCB is not supported or FW LLDP is disabled\n");
+ err = I40E_NOT_SUPPORTED;
goto out;
+ }
- /* Get the initial DCB configuration */
- err = i40e_init_dcb(hw, true);
+ err = i40e_update_dcb_config(hw, true);
if (!err) {
/* Device/Function is not DCBX capable */
if ((!hw->func_caps.dcb) ||
@@ -6866,6 +6914,489 @@ static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi)
}
/**
+ * i40e_del_macvlan_filter
+ * @hw: pointer to the HW structure
+ * @seid: seid of the channel VSI
+ * @macaddr: the mac address to apply as a filter
+ * @aq_err: store the admin Q error
+ *
+ * This function deletes a mac filter on the channel VSI which serves as the
+ * macvlan. Returns 0 on success.
+ **/
+static i40e_status i40e_del_macvlan_filter(struct i40e_hw *hw, u16 seid,
+ const u8 *macaddr, int *aq_err)
+{
+ struct i40e_aqc_remove_macvlan_element_data element;
+ i40e_status status;
+
+ memset(&element, 0, sizeof(element));
+ ether_addr_copy(element.mac_addr, macaddr);
+ element.vlan_tag = 0;
+ element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
+ status = i40e_aq_remove_macvlan(hw, seid, &element, 1, NULL);
+ *aq_err = hw->aq.asq_last_status;
+
+ return status;
+}
+
+/**
+ * i40e_add_macvlan_filter
+ * @hw: pointer to the HW structure
+ * @seid: seid of the channel VSI
+ * @macaddr: the mac address to apply as a filter
+ * @aq_err: store the admin Q error
+ *
+ * This function adds a mac filter on the channel VSI which serves as the
+ * macvlan. Returns 0 on success.
+ **/
+static i40e_status i40e_add_macvlan_filter(struct i40e_hw *hw, u16 seid,
+ const u8 *macaddr, int *aq_err)
+{
+ struct i40e_aqc_add_macvlan_element_data element;
+ i40e_status status;
+ u16 cmd_flags = 0;
+
+ ether_addr_copy(element.mac_addr, macaddr);
+ element.vlan_tag = 0;
+ element.queue_number = 0;
+ element.match_method = I40E_AQC_MM_ERR_NO_RES;
+ cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH;
+ element.flags = cpu_to_le16(cmd_flags);
+ status = i40e_aq_add_macvlan(hw, seid, &element, 1, NULL);
+ *aq_err = hw->aq.asq_last_status;
+
+ return status;
+}
+
+/**
+ * i40e_reset_ch_rings - Reset the queue contexts in a channel
+ * @vsi: the VSI we want to access
+ * @ch: the channel we want to access
+ */
+static void i40e_reset_ch_rings(struct i40e_vsi *vsi, struct i40e_channel *ch)
+{
+ struct i40e_ring *tx_ring, *rx_ring;
+ u16 pf_q;
+ int i;
+
+ for (i = 0; i < ch->num_queue_pairs; i++) {
+ pf_q = ch->base_queue + i;
+ tx_ring = vsi->tx_rings[pf_q];
+ tx_ring->ch = NULL;
+ rx_ring = vsi->rx_rings[pf_q];
+ rx_ring->ch = NULL;
+ }
+}
+
+/**
+ * i40e_free_macvlan_channels
+ * @vsi: the VSI we want to access
+ *
+ * This function frees the Qs of the channel VSI from
+ * the stack and also deletes the channel VSIs which
+ * serve as macvlans.
+ */
+static void i40e_free_macvlan_channels(struct i40e_vsi *vsi)
+{
+ struct i40e_channel *ch, *ch_tmp;
+ int ret;
+
+ if (list_empty(&vsi->macvlan_list))
+ return;
+
+ list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
+ struct i40e_vsi *parent_vsi;
+
+ if (i40e_is_channel_macvlan(ch)) {
+ i40e_reset_ch_rings(vsi, ch);
+ clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
+ netdev_unbind_sb_channel(vsi->netdev, ch->fwd->netdev);
+ netdev_set_sb_channel(ch->fwd->netdev, 0);
+ kfree(ch->fwd);
+ ch->fwd = NULL;
+ }
+
+ list_del(&ch->list);
+ parent_vsi = ch->parent_vsi;
+ if (!parent_vsi || !ch->initialized) {
+ kfree(ch);
+ continue;
+ }
+
+ /* remove the VSI */
+ ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid,
+ NULL);
+ if (ret)
+ dev_err(&vsi->back->pdev->dev,
+ "unable to remove channel (%d) for parent VSI(%d)\n",
+ ch->seid, parent_vsi->seid);
+ kfree(ch);
+ }
+ vsi->macvlan_cnt = 0;
+}
+
+/**
+ * i40e_fwd_ring_up - bring the macvlan device up
+ * @vsi: the VSI we want to access
+ * @vdev: macvlan netdevice
+ * @fwd: the private fwd structure
+ */
+static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev,
+ struct i40e_fwd_adapter *fwd)
+{
+ int ret = 0, num_tc = 1, i, aq_err;
+ struct i40e_channel *ch, *ch_tmp;
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+
+ if (list_empty(&vsi->macvlan_list))
+ return -EINVAL;
+
+ /* Go through the list and find an available channel */
+ list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
+ if (!i40e_is_channel_macvlan(ch)) {
+ ch->fwd = fwd;
+ /* record configuration for macvlan interface in vdev */
+ for (i = 0; i < num_tc; i++)
+ netdev_bind_sb_channel_queue(vsi->netdev, vdev,
+ i,
+ ch->num_queue_pairs,
+ ch->base_queue);
+ for (i = 0; i < ch->num_queue_pairs; i++) {
+ struct i40e_ring *tx_ring, *rx_ring;
+ u16 pf_q;
+
+ pf_q = ch->base_queue + i;
+
+ /* Get to TX ring ptr */
+ tx_ring = vsi->tx_rings[pf_q];
+ tx_ring->ch = ch;
+
+ /* Get the RX ring ptr */
+ rx_ring = vsi->rx_rings[pf_q];
+ rx_ring->ch = ch;
+ }
+ break;
+ }
+ }
+
+ /* Guarantee all rings are updated before we update the
+ * MAC address filter.
+ */
+ wmb();
+
+ /* Add a mac filter */
+ ret = i40e_add_macvlan_filter(hw, ch->seid, vdev->dev_addr, &aq_err);
+ if (ret) {
+ /* if we cannot add the MAC rule then disable the offload */
+ macvlan_release_l2fw_offload(vdev);
+ for (i = 0; i < ch->num_queue_pairs; i++) {
+ struct i40e_ring *rx_ring;
+ u16 pf_q;
+
+ pf_q = ch->base_queue + i;
+ rx_ring = vsi->rx_rings[pf_q];
+ rx_ring->netdev = NULL;
+ }
+ dev_info(&pf->pdev->dev,
+ "Error adding mac filter on macvlan err %s, aq_err %s\n",
+ i40e_stat_str(hw, ret),
+ i40e_aq_str(hw, aq_err));
+ netdev_err(vdev, "L2fwd offload disabled to L2 filter error\n");
+ }
+
+ return ret;
+}
+
+/**
+ * i40e_setup_macvlans - create the channels which will be macvlans
+ * @vsi: the VSI we want to access
+ * @macvlan_cnt: no. of macvlans to be setup
+ * @qcnt: no. of Qs per macvlan
+ * @vdev: macvlan netdevice
+ */
+static int i40e_setup_macvlans(struct i40e_vsi *vsi, u16 macvlan_cnt, u16 qcnt,
+ struct net_device *vdev)
+{
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+ struct i40e_vsi_context ctxt;
+ u16 sections, qmap, num_qps;
+ struct i40e_channel *ch;
+ int i, pow, ret = 0;
+ u8 offset = 0;
+
+ if (vsi->type != I40E_VSI_MAIN || !macvlan_cnt)
+ return -EINVAL;
+
+ num_qps = vsi->num_queue_pairs - (macvlan_cnt * qcnt);
+
+ /* find the next higher power-of-2 of num queue pairs */
+ pow = fls(roundup_pow_of_two(num_qps) - 1);
+
+ qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+ (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
+
+ /* Setup context bits for the main VSI */
+ sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+ sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
+ memset(&ctxt, 0, sizeof(ctxt));
+ ctxt.seid = vsi->seid;
+ ctxt.pf_num = vsi->back->hw.pf_id;
+ ctxt.vf_num = 0;
+ ctxt.uplink_seid = vsi->uplink_seid;
+ ctxt.info = vsi->info;
+ ctxt.info.tc_mapping[0] = cpu_to_le16(qmap);
+ ctxt.info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+ ctxt.info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
+ ctxt.info.valid_sections |= cpu_to_le16(sections);
+
+ /* Reconfigure RSS for main VSI with new max queue count */
+ vsi->rss_size = max_t(u16, num_qps, qcnt);
+ ret = i40e_vsi_config_rss(vsi);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "Failed to reconfig RSS for num_queues (%u)\n",
+ vsi->rss_size);
+ return ret;
+ }
+ vsi->reconfig_rss = true;
+ dev_dbg(&vsi->back->pdev->dev,
+ "Reconfigured RSS with num_queues (%u)\n", vsi->rss_size);
+ vsi->next_base_queue = num_qps;
+ vsi->cnt_q_avail = vsi->num_queue_pairs - num_qps;
+
+ /* Update the VSI after updating the VSI queue-mapping
+ * information
+ */
+ ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "Update vsi tc config failed, err %s aq_err %s\n",
+ i40e_stat_str(hw, ret),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ return ret;
+ }
+ /* update the local VSI info with updated queue map */
+ i40e_vsi_update_queue_map(vsi, &ctxt);
+ vsi->info.valid_sections = 0;
+
+ /* Create channels for macvlans */
+ INIT_LIST_HEAD(&vsi->macvlan_list);
+ for (i = 0; i < macvlan_cnt; i++) {
+ ch = kzalloc(sizeof(*ch), GFP_KERNEL);
+ if (!ch) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
+ INIT_LIST_HEAD(&ch->list);
+ ch->num_queue_pairs = qcnt;
+ if (!i40e_setup_channel(pf, vsi, ch)) {
+ ret = -EINVAL;
+ goto err_free;
+ }
+ ch->parent_vsi = vsi;
+ vsi->cnt_q_avail -= ch->num_queue_pairs;
+ vsi->macvlan_cnt++;
+ list_add_tail(&ch->list, &vsi->macvlan_list);
+ }
+
+ return ret;
+
+err_free:
+ dev_info(&pf->pdev->dev, "Failed to setup macvlans\n");
+ i40e_free_macvlan_channels(vsi);
+
+ return ret;
+}
+
+/**
+ * i40e_fwd_add - configure macvlans
+ * @netdev: net device to configure
+ * @vdev: macvlan netdevice
+ **/
+static void *i40e_fwd_add(struct net_device *netdev, struct net_device *vdev)
+{
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+ u16 q_per_macvlan = 0, macvlan_cnt = 0, vectors;
+ struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_fwd_adapter *fwd;
+ int avail_macvlan, ret;
+
+ if ((pf->flags & I40E_FLAG_DCB_ENABLED)) {
+ netdev_info(netdev, "Macvlans are not supported when DCB is enabled\n");
+ return ERR_PTR(-EINVAL);
+ }
+ if ((pf->flags & I40E_FLAG_TC_MQPRIO)) {
+ netdev_info(netdev, "Macvlans are not supported when HW TC offload is on\n");
+ return ERR_PTR(-EINVAL);
+ }
+ if (pf->num_lan_msix < I40E_MIN_MACVLAN_VECTORS) {
+ netdev_info(netdev, "Not enough vectors available to support macvlans\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* The macvlan device has to be a single Q device so that the
+ * tc_to_txq field can be reused to pick the tx queue.
+ */
+ if (netif_is_multiqueue(vdev))
+ return ERR_PTR(-ERANGE);
+
+ if (!vsi->macvlan_cnt) {
+ /* reserve bit 0 for the pf device */
+ set_bit(0, vsi->fwd_bitmask);
+
+ /* Try to reserve as many queues as possible for macvlans. First
+ * reserve 3/4th of max vectors, then half, then quarter and
+ * calculate Qs per macvlan as you go
+ */
+ vectors = pf->num_lan_msix;
+ if (vectors <= I40E_MAX_MACVLANS && vectors > 64) {
+ /* allocate 4 Qs per macvlan and 32 Qs to the PF*/
+ q_per_macvlan = 4;
+ macvlan_cnt = (vectors - 32) / 4;
+ } else if (vectors <= 64 && vectors > 32) {
+ /* allocate 2 Qs per macvlan and 16 Qs to the PF*/
+ q_per_macvlan = 2;
+ macvlan_cnt = (vectors - 16) / 2;
+ } else if (vectors <= 32 && vectors > 16) {
+ /* allocate 1 Q per macvlan and 16 Qs to the PF*/
+ q_per_macvlan = 1;
+ macvlan_cnt = vectors - 16;
+ } else if (vectors <= 16 && vectors > 8) {
+ /* allocate 1 Q per macvlan and 8 Qs to the PF */
+ q_per_macvlan = 1;
+ macvlan_cnt = vectors - 8;
+ } else {
+ /* allocate 1 Q per macvlan and 1 Q to the PF */
+ q_per_macvlan = 1;
+ macvlan_cnt = vectors - 1;
+ }
+
+ if (macvlan_cnt == 0)
+ return ERR_PTR(-EBUSY);
+
+ /* Quiesce VSI queues */
+ i40e_quiesce_vsi(vsi);
+
+ /* sets up the macvlans but does not "enable" them */
+ ret = i40e_setup_macvlans(vsi, macvlan_cnt, q_per_macvlan,
+ vdev);
+ if (ret)
+ return ERR_PTR(ret);
+
+ /* Unquiesce VSI */
+ i40e_unquiesce_vsi(vsi);
+ }
+ avail_macvlan = find_first_zero_bit(vsi->fwd_bitmask,
+ vsi->macvlan_cnt);
+ if (avail_macvlan >= I40E_MAX_MACVLANS)
+ return ERR_PTR(-EBUSY);
+
+ /* create the fwd struct */
+ fwd = kzalloc(sizeof(*fwd), GFP_KERNEL);
+ if (!fwd)
+ return ERR_PTR(-ENOMEM);
+
+ set_bit(avail_macvlan, vsi->fwd_bitmask);
+ fwd->bit_no = avail_macvlan;
+ netdev_set_sb_channel(vdev, avail_macvlan);
+ fwd->netdev = vdev;
+
+ if (!netif_running(netdev))
+ return fwd;
+
+ /* Set fwd ring up */
+ ret = i40e_fwd_ring_up(vsi, vdev, fwd);
+ if (ret) {
+ /* unbind the queues and drop the subordinate channel config */
+ netdev_unbind_sb_channel(netdev, vdev);
+ netdev_set_sb_channel(vdev, 0);
+
+ kfree(fwd);
+ return ERR_PTR(-EINVAL);
+ }
+
+ return fwd;
+}
+
+/**
+ * i40e_del_all_macvlans - Delete all the mac filters on the channels
+ * @vsi: the VSI we want to access
+ */
+static void i40e_del_all_macvlans(struct i40e_vsi *vsi)
+{
+ struct i40e_channel *ch, *ch_tmp;
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+ int aq_err, ret = 0;
+
+ if (list_empty(&vsi->macvlan_list))
+ return;
+
+ list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
+ if (i40e_is_channel_macvlan(ch)) {
+ ret = i40e_del_macvlan_filter(hw, ch->seid,
+ i40e_channel_mac(ch),
+ &aq_err);
+ if (!ret) {
+ /* Reset queue contexts */
+ i40e_reset_ch_rings(vsi, ch);
+ clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
+ netdev_unbind_sb_channel(vsi->netdev,
+ ch->fwd->netdev);
+ netdev_set_sb_channel(ch->fwd->netdev, 0);
+ kfree(ch->fwd);
+ ch->fwd = NULL;
+ }
+ }
+ }
+}
+
+/**
+ * i40e_fwd_del - delete macvlan interfaces
+ * @netdev: net device to configure
+ * @vdev: macvlan netdevice
+ */
+static void i40e_fwd_del(struct net_device *netdev, void *vdev)
+{
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+ struct i40e_fwd_adapter *fwd = vdev;
+ struct i40e_channel *ch, *ch_tmp;
+ struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+ int aq_err, ret = 0;
+
+ /* Find the channel associated with the macvlan and del mac filter */
+ list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
+ if (i40e_is_channel_macvlan(ch) &&
+ ether_addr_equal(i40e_channel_mac(ch),
+ fwd->netdev->dev_addr)) {
+ ret = i40e_del_macvlan_filter(hw, ch->seid,
+ i40e_channel_mac(ch),
+ &aq_err);
+ if (!ret) {
+ /* Reset queue contexts */
+ i40e_reset_ch_rings(vsi, ch);
+ clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
+ netdev_unbind_sb_channel(netdev, fwd->netdev);
+ netdev_set_sb_channel(fwd->netdev, 0);
+ kfree(ch->fwd);
+ ch->fwd = NULL;
+ } else {
+ dev_info(&pf->pdev->dev,
+ "Error deleting mac filter on macvlan err %s, aq_err %s\n",
+ i40e_stat_str(hw, ret),
+ i40e_aq_str(hw, aq_err));
+ }
+ break;
+ }
+ }
+}
+
+/**
* i40e_setup_tc - configure multiple traffic classes
* @netdev: net device to configure
* @type_data: tc offload data
@@ -6960,6 +7491,10 @@ config_tc:
vsi->seid);
need_reset = true;
goto exit;
+ } else {
+ dev_info(&vsi->back->pdev->dev,
+ "Setup channel (id:%u) utilizing num_queues %d\n",
+ vsi->seid, vsi->tc_config.tc_info[0].qcount);
}
if (pf->flags & I40E_FLAG_TC_MQPRIO) {
@@ -10028,8 +10563,12 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi)
switch (vsi->type) {
case I40E_VSI_MAIN:
vsi->alloc_queue_pairs = pf->num_lan_qps;
- vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
- I40E_REQ_DESCRIPTOR_MULTIPLE);
+ if (!vsi->num_tx_desc)
+ vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+ I40E_REQ_DESCRIPTOR_MULTIPLE);
+ if (!vsi->num_rx_desc)
+ vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+ I40E_REQ_DESCRIPTOR_MULTIPLE);
if (pf->flags & I40E_FLAG_MSIX_ENABLED)
vsi->num_q_vectors = pf->num_lan_msix;
else
@@ -10039,22 +10578,32 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi)
case I40E_VSI_FDIR:
vsi->alloc_queue_pairs = 1;
- vsi->num_desc = ALIGN(I40E_FDIR_RING_COUNT,
- I40E_REQ_DESCRIPTOR_MULTIPLE);
+ vsi->num_tx_desc = ALIGN(I40E_FDIR_RING_COUNT,
+ I40E_REQ_DESCRIPTOR_MULTIPLE);
+ vsi->num_rx_desc = ALIGN(I40E_FDIR_RING_COUNT,
+ I40E_REQ_DESCRIPTOR_MULTIPLE);
vsi->num_q_vectors = pf->num_fdsb_msix;
break;
case I40E_VSI_VMDQ2:
vsi->alloc_queue_pairs = pf->num_vmdq_qps;
- vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
- I40E_REQ_DESCRIPTOR_MULTIPLE);
+ if (!vsi->num_tx_desc)
+ vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+ I40E_REQ_DESCRIPTOR_MULTIPLE);
+ if (!vsi->num_rx_desc)
+ vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+ I40E_REQ_DESCRIPTOR_MULTIPLE);
vsi->num_q_vectors = pf->num_vmdq_msix;
break;
case I40E_VSI_SRIOV:
vsi->alloc_queue_pairs = pf->num_vf_qps;
- vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
- I40E_REQ_DESCRIPTOR_MULTIPLE);
+ if (!vsi->num_tx_desc)
+ vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+ I40E_REQ_DESCRIPTOR_MULTIPLE);
+ if (!vsi->num_rx_desc)
+ vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+ I40E_REQ_DESCRIPTOR_MULTIPLE);
break;
default:
@@ -10330,7 +10879,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
ring->vsi = vsi;
ring->netdev = vsi->netdev;
ring->dev = &pf->pdev->dev;
- ring->count = vsi->num_desc;
+ ring->count = vsi->num_tx_desc;
ring->size = 0;
ring->dcb_tc = 0;
if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
@@ -10347,7 +10896,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
ring->vsi = vsi;
ring->netdev = NULL;
ring->dev = &pf->pdev->dev;
- ring->count = vsi->num_desc;
+ ring->count = vsi->num_tx_desc;
ring->size = 0;
ring->dcb_tc = 0;
if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
@@ -10363,7 +10912,7 @@ setup_rx:
ring->vsi = vsi;
ring->netdev = vsi->netdev;
ring->dev = &pf->pdev->dev;
- ring->count = vsi->num_desc;
+ ring->count = vsi->num_rx_desc;
ring->size = 0;
ring->dcb_tc = 0;
ring->itr_setting = pf->rx_itr_default;
@@ -11601,6 +12150,9 @@ static int i40e_set_features(struct net_device *netdev,
return -EINVAL;
}
+ if (!(features & NETIF_F_HW_L2FW_DOFFLOAD) && vsi->macvlan_cnt)
+ i40e_del_all_macvlans(vsi);
+
need_reset = i40e_set_ntuple(pf, features);
if (need_reset)
@@ -12345,6 +12897,8 @@ static const struct net_device_ops i40e_netdev_ops = {
.ndo_bpf = i40e_xdp,
.ndo_xdp_xmit = i40e_xdp_xmit,
.ndo_xsk_async_xmit = i40e_xsk_async_xmit,
+ .ndo_dfwd_add_station = i40e_fwd_add,
+ .ndo_dfwd_del_station = i40e_fwd_del,
};
/**
@@ -12404,6 +12958,9 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
/* record features VLANs can make use of */
netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID;
+ /* enable macvlan offloads */
+ netdev->hw_features |= NETIF_F_HW_L2FW_DOFFLOAD;
+
hw_features = hw_enc_features |
NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_CTAG_RX;
@@ -14397,6 +14954,11 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pci_set_drvdata(pdev, pf);
pci_save_state(pdev);
+ dev_info(&pdev->dev,
+ (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) ?
+ "FW LLDP is disabled\n" :
+ "FW LLDP is enabled\n");
+
/* Enable FW to write default DCB config on link-up */
i40e_aq_set_dcb_parameters(hw, true, NULL);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index 882627073dce..eac88bcc6c06 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -350,6 +350,10 @@ i40e_virtchnl_link_speed(enum i40e_aq_link_speed link_speed)
return VIRTCHNL_LINK_SPEED_100MB;
case I40E_LINK_SPEED_1GB:
return VIRTCHNL_LINK_SPEED_1GB;
+ case I40E_LINK_SPEED_2_5GB:
+ return VIRTCHNL_LINK_SPEED_2_5GB;
+ case I40E_LINK_SPEED_5GB:
+ return VIRTCHNL_LINK_SPEED_5GB;
case I40E_LINK_SPEED_10GB:
return VIRTCHNL_LINK_SPEED_10GB;
case I40E_LINK_SPEED_40GB:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 439c35f0c581..11394a52e21c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -140,8 +140,7 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
* @ptp: The PTP clock structure
* @delta: Offset in nanoseconds to adjust the PHC time by
*
- * Adjust the frequency of the PHC by the indicated parts per billion from the
- * base frequency.
+ * Adjust the current clock time by a delta specified in nanoseconds.
**/
static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
{
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index ac3a130ee7d4..02b09a8ad54c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -440,7 +440,7 @@ static int i40e_config_iwarp_qvlist(struct i40e_vf *vf,
struct virtchnl_iwarp_qv_info *qv_info;
u32 v_idx, i, reg_idx, reg;
u32 next_q_idx, next_q_type;
- u32 msix_vf, size;
+ u32 msix_vf;
int ret = 0;
msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
@@ -454,11 +454,10 @@ static int i40e_config_iwarp_qvlist(struct i40e_vf *vf,
goto err_out;
}
- size = sizeof(struct virtchnl_iwarp_qvlist_info) +
- (sizeof(struct virtchnl_iwarp_qv_info) *
- (qvlist_info->num_vectors - 1));
kfree(vf->qvlist_info);
- vf->qvlist_info = kzalloc(size, GFP_KERNEL);
+ vf->qvlist_info = kzalloc(struct_size(vf->qvlist_info, qv_info,
+ qvlist_info->num_vectors - 1),
+ GFP_KERNEL);
if (!vf->qvlist_info) {
ret = -ENOMEM;
goto err_out;
@@ -1846,7 +1845,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
i40e_status aq_ret = 0;
struct i40e_vsi *vsi;
int num_vsis = 1;
- int len = 0;
+ size_t len = 0;
int ret;
if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
@@ -1854,9 +1853,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
goto err;
}
- len = (sizeof(struct virtchnl_vf_resource) +
- sizeof(struct virtchnl_vsi_resource) * num_vsis);
-
+ len = struct_size(vfres, vsi_res, num_vsis);
vfres = kzalloc(len, GFP_KERNEL);
if (!vfres) {
aq_ret = I40E_ERR_NO_MEMORY;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 557c565c26fc..32bad014d76c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -641,8 +641,8 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
struct i40e_tx_desc *tx_desc = NULL;
struct i40e_tx_buffer *tx_bi;
bool work_done = true;
+ struct xdp_desc desc;
dma_addr_t dma;
- u32 len;
while (budget-- > 0) {
if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) {
@@ -651,21 +651,23 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
break;
}
- if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &dma, &len))
+ if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
break;
- dma_sync_single_for_device(xdp_ring->dev, dma, len,
+ dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr);
+
+ dma_sync_single_for_device(xdp_ring->dev, dma, desc.len,
DMA_BIDIRECTIONAL);
tx_bi = &xdp_ring->tx_bi[xdp_ring->next_to_use];
- tx_bi->bytecount = len;
+ tx_bi->bytecount = desc.len;
tx_desc = I40E_TX_DESC(xdp_ring, xdp_ring->next_to_use);
tx_desc->buffer_addr = cpu_to_le64(dma);
tx_desc->cmd_type_offset_bsz =
build_ctob(I40E_TX_DESC_CMD_ICRC
| I40E_TX_DESC_CMD_EOP,
- 0, len, 0);
+ 0, desc.len, 0);
xdp_ring->next_to_use++;
if (xdp_ring->next_to_use == xdp_ring->count)
diff --git a/drivers/net/ethernet/intel/iavf/iavf_osdep.h b/drivers/net/ethernet/intel/iavf/iavf_osdep.h
index d39684558597..a452ce90679a 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_osdep.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_osdep.h
@@ -44,8 +44,12 @@ struct iavf_virt_mem {
#define iavf_allocate_virt_mem(h, m, s) iavf_allocate_virt_mem_d(h, m, s)
#define iavf_free_virt_mem(h, m) iavf_free_virt_mem_d(h, m)
-#define iavf_debug(h, m, s, ...) iavf_debug_d(h, m, s, ##__VA_ARGS__)
-extern void iavf_debug_d(void *hw, u32 mask, char *fmt_str, ...)
- __printf(3, 4);
+#define iavf_debug(h, m, s, ...) \
+do { \
+ if (((m) & (h)->debug_mask)) \
+ pr_info("iavf %02x:%02x.%x " s, \
+ (h)->bus.bus_id, (h)->bus.device, \
+ (h)->bus.func, ##__VA_ARGS__); \
+} while (0)
#endif /* _IAVF_OSDEP_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
index 1cde1601bc32..0cca1b589b56 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -1296,7 +1296,7 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring,
struct iavf_rx_buffer *rx_buffer,
unsigned int size)
{
- void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+ void *va;
#if (PAGE_SIZE < 8192)
unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2;
#else
@@ -1308,6 +1308,7 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring,
if (!rx_buffer)
return NULL;
/* prefetch first cache line of first page */
+ va = page_address(rx_buffer->page) + rx_buffer->page_offset;
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
@@ -1362,7 +1363,7 @@ static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring,
struct iavf_rx_buffer *rx_buffer,
unsigned int size)
{
- void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+ void *va;
#if (PAGE_SIZE < 8192)
unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2;
#else
@@ -1374,6 +1375,7 @@ static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring,
if (!rx_buffer)
return NULL;
/* prefetch first cache line of first page */
+ va = page_address(rx_buffer->page) + rx_buffer->page_offset;
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index cb7c56c5afe6..d49d58a6de80 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -242,7 +242,8 @@ void iavf_configure_queues(struct iavf_adapter *adapter)
struct virtchnl_vsi_queue_config_info *vqci;
struct virtchnl_queue_pair_info *vqpi;
int pairs = adapter->num_active_queues;
- int i, len, max_frame = IAVF_MAX_RXBUFFER;
+ int i, max_frame = IAVF_MAX_RXBUFFER;
+ size_t len;
if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
/* bail because we already have a command pending */
@@ -251,8 +252,7 @@ void iavf_configure_queues(struct iavf_adapter *adapter)
return;
}
adapter->current_op = VIRTCHNL_OP_CONFIG_VSI_QUEUES;
- len = sizeof(struct virtchnl_vsi_queue_config_info) +
- (sizeof(struct virtchnl_queue_pair_info) * pairs);
+ len = struct_size(vqci, qpair, pairs);
vqci = kzalloc(len, GFP_KERNEL);
if (!vqci)
return;
@@ -351,8 +351,9 @@ void iavf_map_queues(struct iavf_adapter *adapter)
{
struct virtchnl_irq_map_info *vimi;
struct virtchnl_vector_map *vecmap;
- int v_idx, q_vectors, len;
struct iavf_q_vector *q_vector;
+ int v_idx, q_vectors;
+ size_t len;
if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
/* bail because we already have a command pending */
@@ -364,9 +365,7 @@ void iavf_map_queues(struct iavf_adapter *adapter)
q_vectors = adapter->num_msix_vectors - NONQ_VECS;
- len = sizeof(struct virtchnl_irq_map_info) +
- (adapter->num_msix_vectors *
- sizeof(struct virtchnl_vector_map));
+ len = struct_size(vimi, vecmap, adapter->num_msix_vectors);
vimi = kzalloc(len, GFP_KERNEL);
if (!vimi)
return;
@@ -433,9 +432,10 @@ int iavf_request_queues(struct iavf_adapter *adapter, int num)
void iavf_add_ether_addrs(struct iavf_adapter *adapter)
{
struct virtchnl_ether_addr_list *veal;
- int len, i = 0, count = 0;
struct iavf_mac_filter *f;
+ int i = 0, count = 0;
bool more = false;
+ size_t len;
if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
/* bail because we already have a command pending */
@@ -457,15 +457,13 @@ void iavf_add_ether_addrs(struct iavf_adapter *adapter)
}
adapter->current_op = VIRTCHNL_OP_ADD_ETH_ADDR;
- len = sizeof(struct virtchnl_ether_addr_list) +
- (count * sizeof(struct virtchnl_ether_addr));
+ len = struct_size(veal, list, count);
if (len > IAVF_MAX_AQ_BUF_SIZE) {
dev_warn(&adapter->pdev->dev, "Too many add MAC changes in one request\n");
count = (IAVF_MAX_AQ_BUF_SIZE -
sizeof(struct virtchnl_ether_addr_list)) /
sizeof(struct virtchnl_ether_addr);
- len = sizeof(struct virtchnl_ether_addr_list) +
- (count * sizeof(struct virtchnl_ether_addr));
+ len = struct_size(veal, list, count);
more = true;
}
@@ -505,8 +503,9 @@ void iavf_del_ether_addrs(struct iavf_adapter *adapter)
{
struct virtchnl_ether_addr_list *veal;
struct iavf_mac_filter *f, *ftmp;
- int len, i = 0, count = 0;
+ int i = 0, count = 0;
bool more = false;
+ size_t len;
if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
/* bail because we already have a command pending */
@@ -528,15 +527,13 @@ void iavf_del_ether_addrs(struct iavf_adapter *adapter)
}
adapter->current_op = VIRTCHNL_OP_DEL_ETH_ADDR;
- len = sizeof(struct virtchnl_ether_addr_list) +
- (count * sizeof(struct virtchnl_ether_addr));
+ len = struct_size(veal, list, count);
if (len > IAVF_MAX_AQ_BUF_SIZE) {
dev_warn(&adapter->pdev->dev, "Too many delete MAC changes in one request\n");
count = (IAVF_MAX_AQ_BUF_SIZE -
sizeof(struct virtchnl_ether_addr_list)) /
sizeof(struct virtchnl_ether_addr);
- len = sizeof(struct virtchnl_ether_addr_list) +
- (count * sizeof(struct virtchnl_ether_addr));
+ len = struct_size(veal, list, count);
more = true;
}
veal = kzalloc(len, GFP_ATOMIC);
@@ -973,7 +970,7 @@ static void iavf_print_link_message(struct iavf_adapter *adapter)
void iavf_enable_channels(struct iavf_adapter *adapter)
{
struct virtchnl_tc_info *vti = NULL;
- u16 len;
+ size_t len;
int i;
if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
@@ -983,9 +980,7 @@ void iavf_enable_channels(struct iavf_adapter *adapter)
return;
}
- len = ((adapter->num_tc - 1) * sizeof(struct virtchnl_channel_info)) +
- sizeof(struct virtchnl_tc_info);
-
+ len = struct_size(vti, list, adapter->num_tc - 1);
vti = kzalloc(len, GFP_KERNEL);
if (!vti)
return;
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index 8d49f83be7a5..2a232504379d 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -683,10 +683,10 @@ ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
u16 i, num_groups_added = 0;
enum ice_status status = 0;
struct ice_hw *hw = pi->hw;
- u16 buf_size;
+ size_t buf_size;
u32 teid;
- buf_size = sizeof(*buf) + sizeof(*buf->generic) * (num_nodes - 1);
+ buf_size = struct_size(buf, generic, num_nodes - 1);
buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
if (!buf)
return ICE_ERR_NO_MEMORY;
diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h
index 0ad737d2f289..9cb49980ec2d 100644
--- a/drivers/net/ethernet/intel/igb/e1000_regs.h
+++ b/drivers/net/ethernet/intel/igb/e1000_regs.h
@@ -409,6 +409,8 @@ do { \
#define E1000_I210_TQAVCC(_n) (0x3004 + ((_n) * 0x40))
#define E1000_I210_TQAVHC(_n) (0x300C + ((_n) * 0x40))
+#define E1000_I210_RR2DCDELAY 0x5BF4
+
#define E1000_INVM_DATA_REG(_n) (0x12120 + 4*(_n))
#define E1000_INVM_SIZE 64 /* Number of INVM Data Registers */
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index c645d9e648e0..3182b059bf55 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -448,7 +448,7 @@ static void igb_set_msglevel(struct net_device *netdev, u32 data)
static int igb_get_regs_len(struct net_device *netdev)
{
-#define IGB_REGS_LEN 739
+#define IGB_REGS_LEN 740
return IGB_REGS_LEN * sizeof(u32);
}
@@ -675,41 +675,44 @@ static void igb_get_regs(struct net_device *netdev,
regs_buff[554] = adapter->stats.b2ogprc;
}
- if (hw->mac.type != e1000_82576)
- return;
- for (i = 0; i < 12; i++)
- regs_buff[555 + i] = rd32(E1000_SRRCTL(i + 4));
- for (i = 0; i < 4; i++)
- regs_buff[567 + i] = rd32(E1000_PSRTYPE(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[571 + i] = rd32(E1000_RDBAL(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[583 + i] = rd32(E1000_RDBAH(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[595 + i] = rd32(E1000_RDLEN(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[607 + i] = rd32(E1000_RDH(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[619 + i] = rd32(E1000_RDT(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[631 + i] = rd32(E1000_RXDCTL(i + 4));
-
- for (i = 0; i < 12; i++)
- regs_buff[643 + i] = rd32(E1000_TDBAL(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[655 + i] = rd32(E1000_TDBAH(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[667 + i] = rd32(E1000_TDLEN(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[679 + i] = rd32(E1000_TDH(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[691 + i] = rd32(E1000_TDT(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[703 + i] = rd32(E1000_TXDCTL(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[715 + i] = rd32(E1000_TDWBAL(i + 4));
- for (i = 0; i < 12; i++)
- regs_buff[727 + i] = rd32(E1000_TDWBAH(i + 4));
+ if (hw->mac.type == e1000_82576) {
+ for (i = 0; i < 12; i++)
+ regs_buff[555 + i] = rd32(E1000_SRRCTL(i + 4));
+ for (i = 0; i < 4; i++)
+ regs_buff[567 + i] = rd32(E1000_PSRTYPE(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[571 + i] = rd32(E1000_RDBAL(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[583 + i] = rd32(E1000_RDBAH(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[595 + i] = rd32(E1000_RDLEN(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[607 + i] = rd32(E1000_RDH(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[619 + i] = rd32(E1000_RDT(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[631 + i] = rd32(E1000_RXDCTL(i + 4));
+
+ for (i = 0; i < 12; i++)
+ regs_buff[643 + i] = rd32(E1000_TDBAL(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[655 + i] = rd32(E1000_TDBAH(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[667 + i] = rd32(E1000_TDLEN(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[679 + i] = rd32(E1000_TDH(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[691 + i] = rd32(E1000_TDT(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[703 + i] = rd32(E1000_TXDCTL(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[715 + i] = rd32(E1000_TDWBAL(i + 4));
+ for (i = 0; i < 12; i++)
+ regs_buff[727 + i] = rd32(E1000_TDWBAH(i + 4));
+ }
+
+ if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211)
+ regs_buff[739] = rd32(E1000_I210_RR2DCDELAY);
}
static int igb_get_eeprom_len(struct net_device *netdev)
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index fc925adbd9fa..f66dae72fe37 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -5688,6 +5688,7 @@ static void igb_tx_ctxtdesc(struct igb_ring *tx_ring,
*/
if (tx_ring->launchtime_enable) {
ts = ns_to_timespec64(first->skb->tstamp);
+ first->skb->tstamp = 0;
context_desc->seqnum_seed = cpu_to_le32(ts.tv_nsec / 32);
} else {
context_desc->seqnum_seed = 0;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index acba067cc15a..7c52ae8ac005 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -3226,7 +3226,8 @@ static int ixgbe_get_module_info(struct net_device *dev,
page_swap = true;
}
- if (sff8472_rev == IXGBE_SFF_SFF_8472_UNSUP || page_swap) {
+ if (sff8472_rev == IXGBE_SFF_SFF_8472_UNSUP || page_swap ||
+ !(addr_mode & IXGBE_SFF_DDM_IMPLEMENTED)) {
/* We have a SFP, but it does not support SFF-8472 */
modinfo->type = ETH_MODULE_SFF_8079;
modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index ff85ce5791a3..31629fc7e820 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -842,6 +842,9 @@ void ixgbe_ipsec_vf_clear(struct ixgbe_adapter *adapter, u32 vf)
struct ixgbe_ipsec *ipsec = adapter->ipsec;
int i;
+ if (!ipsec)
+ return;
+
/* search rx sa table */
for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT && ipsec->num_rx_sa; i++) {
if (!ipsec->rx_tbl[i].used)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
index 214b01085718..6544c4539c0d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
@@ -45,6 +45,7 @@
#define IXGBE_SFF_SOFT_RS_SELECT_10G 0x8
#define IXGBE_SFF_SOFT_RS_SELECT_1G 0x0
#define IXGBE_SFF_ADDRESSING_MODE 0x4
+#define IXGBE_SFF_DDM_IMPLEMENTED 0x40
#define IXGBE_SFF_QSFP_DA_ACTIVE_CABLE 0x1
#define IXGBE_SFF_QSFP_DA_PASSIVE_CABLE 0x8
#define IXGBE_SFF_QSFP_CONNECTOR_NOT_SEPARABLE 0x23
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index 2c4d327fcc2e..0be13a90ff79 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -205,11 +205,8 @@ static void ixgbe_ptp_setup_sdp_X540(struct ixgbe_adapter *adapter)
*/
rem = (NS_PER_SEC - rem);
- /* Adjust the clock edge to align with the next full second. This
- * assumes that the cycle counter shift is small enough to avoid
- * overflowing when shifting the remainder.
- */
- clock_edge += div_u64((rem << cc->shift), cc->mult);
+ /* Adjust the clock edge to align with the next full second. */
+ clock_edge += div_u64(((u64)rem << cc->shift), cc->mult);
trgttiml = (u32)clock_edge;
trgttimh = (u32)(clock_edge >> 32);
@@ -291,11 +288,8 @@ static void ixgbe_ptp_setup_sdp_X550(struct ixgbe_adapter *adapter)
*/
rem = (NS_PER_SEC - rem);
- /* Adjust the clock edge to align with the next full second. This
- * assumes that the cycle counter shift is small enough to avoid
- * overflowing when shifting the remainder.
- */
- clock_edge += div_u64((rem << cc->shift), cc->mult);
+ /* Adjust the clock edge to align with the next full second. */
+ clock_edge += div_u64(((u64)rem << cc->shift), cc->mult);
/* X550 hardware stores the time in 32bits of 'billions of cycles' and
* 32bits of 'cycles'. There's no guarantee that cycles represents
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index 6af55bb3bef3..6b609553329f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -571,8 +571,9 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
union ixgbe_adv_tx_desc *tx_desc = NULL;
struct ixgbe_tx_buffer *tx_bi;
bool work_done = true;
- u32 len, cmd_type;
+ struct xdp_desc desc;
dma_addr_t dma;
+ u32 cmd_type;
while (budget-- > 0) {
if (unlikely(!ixgbe_desc_unused(xdp_ring)) ||
@@ -581,14 +582,16 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
break;
}
- if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &dma, &len))
+ if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
break;
- dma_sync_single_for_device(xdp_ring->dev, dma, len,
+ dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr);
+
+ dma_sync_single_for_device(xdp_ring->dev, dma, desc.len,
DMA_BIDIRECTIONAL);
tx_bi = &xdp_ring->tx_buffer_info[xdp_ring->next_to_use];
- tx_bi->bytecount = len;
+ tx_bi->bytecount = desc.len;
tx_bi->xdpf = NULL;
tx_bi->gso_segs = 1;
@@ -599,10 +602,10 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
cmd_type = IXGBE_ADVTXD_DTYP_DATA |
IXGBE_ADVTXD_DCMD_DEXT |
IXGBE_ADVTXD_DCMD_IFCS;
- cmd_type |= len | IXGBE_TXD_CMD;
+ cmd_type |= desc.len | IXGBE_TXD_CMD;
tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
tx_desc->read.olinfo_status =
- cpu_to_le32(len << IXGBE_ADVTXD_PAYLEN_SHIFT);
+ cpu_to_le32(desc.len << IXGBE_ADVTXD_PAYLEN_SHIFT);
xdp_ring->next_to_use++;
if (xdp_ring->next_to_use == xdp_ring->count)
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index 5399787e07af..54459b69c948 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -85,22 +85,16 @@ static int ixgbevf_get_link_ksettings(struct net_device *netdev,
struct ethtool_link_ksettings *cmd)
{
struct ixgbevf_adapter *adapter = netdev_priv(netdev);
- struct ixgbe_hw *hw = &adapter->hw;
- u32 link_speed = 0;
- bool link_up;
ethtool_link_ksettings_zero_link_mode(cmd, supported);
ethtool_link_ksettings_add_link_mode(cmd, supported, 10000baseT_Full);
cmd->base.autoneg = AUTONEG_DISABLE;
cmd->base.port = -1;
- hw->mac.get_link_status = 1;
- hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
-
- if (link_up) {
+ if (adapter->link_up) {
__u32 speed = SPEED_10000;
- switch (link_speed) {
+ switch (adapter->link_speed) {
case IXGBE_LINK_SPEED_10GB_FULL:
speed = SPEED_10000;
break;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index d189ed247665..d2b41f9f87f8 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1423,6 +1423,9 @@ static void ixgbevf_update_itr(struct ixgbevf_q_vector *q_vector,
*/
/* what was last interrupt timeslice? */
timepassed_us = q_vector->itr >> 2;
+ if (timepassed_us == 0)
+ return;
+
bytes_perint = bytes / timepassed_us; /* bytes/usec */
switch (itr_setting) {
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_path.c b/drivers/net/ethernet/mediatek/mtk_eth_path.c
index 61f705d945e5..7f05880cf9ef 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_path.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_path.c
@@ -13,19 +13,32 @@
#include "mtk_eth_soc.h"
struct mtk_eth_muxc {
- int (*set_path)(struct mtk_eth *eth, int path);
+ const char *name;
+ int cap_bit;
+ int (*set_path)(struct mtk_eth *eth, int path);
};
-static const char * const mtk_eth_mux_name[] = {
- "mux_gdm1_to_gmac1_esw", "mux_gmac2_gmac0_to_gephy",
- "mux_u3_gmac2_to_qphy", "mux_gmac1_gmac2_to_sgmii_rgmii",
- "mux_gmac12_to_gephy_sgmii",
-};
-
-static const char * const mtk_eth_path_name[] = {
- "gmac1_rgmii", "gmac1_trgmii", "gmac1_sgmii", "gmac2_rgmii",
- "gmac2_sgmii", "gmac2_gephy", "gdm1_esw",
-};
+static const char *mtk_eth_path_name(int path)
+{
+ switch (path) {
+ case MTK_ETH_PATH_GMAC1_RGMII:
+ return "gmac1_rgmii";
+ case MTK_ETH_PATH_GMAC1_TRGMII:
+ return "gmac1_trgmii";
+ case MTK_ETH_PATH_GMAC1_SGMII:
+ return "gmac1_sgmii";
+ case MTK_ETH_PATH_GMAC2_RGMII:
+ return "gmac2_rgmii";
+ case MTK_ETH_PATH_GMAC2_SGMII:
+ return "gmac2_sgmii";
+ case MTK_ETH_PATH_GMAC2_GEPHY:
+ return "gmac2_gephy";
+ case MTK_ETH_PATH_GDM1_ESW:
+ return "gdm1_esw";
+ default:
+ return "unknown path";
+ }
+}
static int set_mux_gdm1_to_gmac1_esw(struct mtk_eth *eth, int path)
{
@@ -53,7 +66,7 @@ static int set_mux_gdm1_to_gmac1_esw(struct mtk_eth *eth, int path)
}
dev_dbg(eth->dev, "path %s in %s updated = %d\n",
- mtk_eth_path_name[path], __func__, updated);
+ mtk_eth_path_name(path), __func__, updated);
return 0;
}
@@ -76,7 +89,7 @@ static int set_mux_gmac2_gmac0_to_gephy(struct mtk_eth *eth, int path)
regmap_update_bits(eth->infra, INFRA_MISC2, GEPHY_MAC_SEL, val);
dev_dbg(eth->dev, "path %s in %s updated = %d\n",
- mtk_eth_path_name[path], __func__, updated);
+ mtk_eth_path_name(path), __func__, updated);
return 0;
}
@@ -99,7 +112,7 @@ static int set_mux_u3_gmac2_to_qphy(struct mtk_eth *eth, int path)
regmap_update_bits(eth->infra, INFRA_MISC2, CO_QPHY_SEL, val);
dev_dbg(eth->dev, "path %s in %s updated = %d\n",
- mtk_eth_path_name[path], __func__, updated);
+ mtk_eth_path_name(path), __func__, updated);
return 0;
}
@@ -137,7 +150,7 @@ static int set_mux_gmac1_gmac2_to_sgmii_rgmii(struct mtk_eth *eth, int path)
SYSCFG0_SGMII_MASK, val);
dev_dbg(eth->dev, "path %s in %s updated = %d\n",
- mtk_eth_path_name[path], __func__, updated);
+ mtk_eth_path_name(path), __func__, updated);
return 0;
}
@@ -168,26 +181,42 @@ static int set_mux_gmac12_to_gephy_sgmii(struct mtk_eth *eth, int path)
SYSCFG0_SGMII_MASK, val);
dev_dbg(eth->dev, "path %s in %s updated = %d\n",
- mtk_eth_path_name[path], __func__, updated);
+ mtk_eth_path_name(path), __func__, updated);
return 0;
}
static const struct mtk_eth_muxc mtk_eth_muxc[] = {
- { .set_path = set_mux_gdm1_to_gmac1_esw, },
- { .set_path = set_mux_gmac2_gmac0_to_gephy, },
- { .set_path = set_mux_u3_gmac2_to_qphy, },
- { .set_path = set_mux_gmac1_gmac2_to_sgmii_rgmii, },
- { .set_path = set_mux_gmac12_to_gephy_sgmii, }
+ {
+ .name = "mux_gdm1_to_gmac1_esw",
+ .cap_bit = MTK_ETH_MUX_GDM1_TO_GMAC1_ESW,
+ .set_path = set_mux_gdm1_to_gmac1_esw,
+ }, {
+ .name = "mux_gmac2_gmac0_to_gephy",
+ .cap_bit = MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY,
+ .set_path = set_mux_gmac2_gmac0_to_gephy,
+ }, {
+ .name = "mux_u3_gmac2_to_qphy",
+ .cap_bit = MTK_ETH_MUX_U3_GMAC2_TO_QPHY,
+ .set_path = set_mux_u3_gmac2_to_qphy,
+ }, {
+ .name = "mux_gmac1_gmac2_to_sgmii_rgmii",
+ .cap_bit = MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII,
+ .set_path = set_mux_gmac1_gmac2_to_sgmii_rgmii,
+ }, {
+ .name = "mux_gmac12_to_gephy_sgmii",
+ .cap_bit = MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII,
+ .set_path = set_mux_gmac12_to_gephy_sgmii,
+ },
};
static int mtk_eth_mux_setup(struct mtk_eth *eth, int path)
{
int i, err = 0;
- if (!MTK_HAS_CAPS(eth->soc->caps, MTK_PATH_BIT(path))) {
+ if (!MTK_HAS_CAPS(eth->soc->caps, path)) {
dev_err(eth->dev, "path %s isn't support on the SoC\n",
- mtk_eth_path_name[path]);
+ mtk_eth_path_name(path));
return -EINVAL;
}
@@ -195,14 +224,14 @@ static int mtk_eth_mux_setup(struct mtk_eth *eth, int path)
return 0;
/* Setup MUX in path fabric */
- for (i = 0; i < MTK_ETH_MUX_MAX; i++) {
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_MUX_BIT(i))) {
+ for (i = 0; i < ARRAY_SIZE(mtk_eth_muxc); i++) {
+ if (MTK_HAS_CAPS(eth->soc->caps, mtk_eth_muxc[i].cap_bit)) {
err = mtk_eth_muxc[i].set_path(eth, path);
if (err)
goto out;
} else {
dev_dbg(eth->dev, "mux %s isn't present on the SoC\n",
- mtk_eth_mux_name[i]);
+ mtk_eth_muxc[i].name);
}
}
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 066712f2e985..b20b3a5a1ebb 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -139,9 +139,12 @@ static int mt7621_gmac0_rgmii_adjust(struct mtk_eth *eth,
{
u32 val;
- /* Check DDR memory type. Currently DDR2 is not supported. */
+ /* Check DDR memory type.
+ * Currently TRGMII mode with DDR2 memory is not supported.
+ */
regmap_read(eth->ethsys, ETHSYS_SYSCFG, &val);
- if (val & SYSCFG_DRAM_TYPE_DDR2) {
+ if (interface == PHY_INTERFACE_MODE_TRGMII &&
+ val & SYSCFG_DRAM_TYPE_DDR2) {
dev_err(eth->dev,
"TRGMII mode with DDR2 memory is not supported!\n");
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 876ce6798709..c6be599ed94d 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -592,86 +592,97 @@ struct mtk_rx_ring {
u32 crx_idx_reg;
};
-enum mtk_eth_mux {
- MTK_ETH_MUX_GDM1_TO_GMAC1_ESW,
- MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY,
- MTK_ETH_MUX_U3_GMAC2_TO_QPHY,
- MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII,
- MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII,
- MTK_ETH_MUX_MAX,
-};
-
-enum mtk_eth_path {
- MTK_ETH_PATH_GMAC1_RGMII,
- MTK_ETH_PATH_GMAC1_TRGMII,
- MTK_ETH_PATH_GMAC1_SGMII,
- MTK_ETH_PATH_GMAC2_RGMII,
- MTK_ETH_PATH_GMAC2_SGMII,
- MTK_ETH_PATH_GMAC2_GEPHY,
- MTK_ETH_PATH_GDM1_ESW,
- MTK_ETH_PATH_MAX,
+enum mkt_eth_capabilities {
+ MTK_RGMII_BIT = 0,
+ MTK_TRGMII_BIT,
+ MTK_SGMII_BIT,
+ MTK_ESW_BIT,
+ MTK_GEPHY_BIT,
+ MTK_MUX_BIT,
+ MTK_INFRA_BIT,
+ MTK_SHARED_SGMII_BIT,
+ MTK_HWLRO_BIT,
+ MTK_SHARED_INT_BIT,
+ MTK_TRGMII_MT7621_CLK_BIT,
+
+ /* MUX BITS*/
+ MTK_ETH_MUX_GDM1_TO_GMAC1_ESW_BIT,
+ MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY_BIT,
+ MTK_ETH_MUX_U3_GMAC2_TO_QPHY_BIT,
+ MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII_BIT,
+ MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII_BIT,
+
+ /* PATH BITS */
+ MTK_ETH_PATH_GMAC1_RGMII_BIT,
+ MTK_ETH_PATH_GMAC1_TRGMII_BIT,
+ MTK_ETH_PATH_GMAC1_SGMII_BIT,
+ MTK_ETH_PATH_GMAC2_RGMII_BIT,
+ MTK_ETH_PATH_GMAC2_SGMII_BIT,
+ MTK_ETH_PATH_GMAC2_GEPHY_BIT,
+ MTK_ETH_PATH_GDM1_ESW_BIT,
};
/* Supported hardware group on SoCs */
-#define MTK_RGMII BIT(0)
-#define MTK_TRGMII BIT(1)
-#define MTK_SGMII BIT(2)
-#define MTK_ESW BIT(3)
-#define MTK_GEPHY BIT(4)
-#define MTK_MUX BIT(5)
-#define MTK_INFRA BIT(6)
-#define MTK_SHARED_SGMII BIT(7)
-#define MTK_HWLRO BIT(8)
-#define MTK_SHARED_INT BIT(9)
-#define MTK_TRGMII_MT7621_CLK BIT(10)
+#define MTK_RGMII BIT(MTK_RGMII_BIT)
+#define MTK_TRGMII BIT(MTK_TRGMII_BIT)
+#define MTK_SGMII BIT(MTK_SGMII_BIT)
+#define MTK_ESW BIT(MTK_ESW_BIT)
+#define MTK_GEPHY BIT(MTK_GEPHY_BIT)
+#define MTK_MUX BIT(MTK_MUX_BIT)
+#define MTK_INFRA BIT(MTK_INFRA_BIT)
+#define MTK_SHARED_SGMII BIT(MTK_SHARED_SGMII_BIT)
+#define MTK_HWLRO BIT(MTK_HWLRO_BIT)
+#define MTK_SHARED_INT BIT(MTK_SHARED_INT_BIT)
+#define MTK_TRGMII_MT7621_CLK BIT(MTK_TRGMII_MT7621_CLK_BIT)
+
+#define MTK_ETH_MUX_GDM1_TO_GMAC1_ESW \
+ BIT(MTK_ETH_MUX_GDM1_TO_GMAC1_ESW_BIT)
+#define MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY \
+ BIT(MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY_BIT)
+#define MTK_ETH_MUX_U3_GMAC2_TO_QPHY \
+ BIT(MTK_ETH_MUX_U3_GMAC2_TO_QPHY_BIT)
+#define MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII \
+ BIT(MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII_BIT)
+#define MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII \
+ BIT(MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII_BIT)
/* Supported path present on SoCs */
-#define MTK_PATH_BIT(x) BIT((x) + 10)
-
-#define MTK_GMAC1_RGMII \
- (MTK_PATH_BIT(MTK_ETH_PATH_GMAC1_RGMII) | MTK_RGMII)
-
-#define MTK_GMAC1_TRGMII \
- (MTK_PATH_BIT(MTK_ETH_PATH_GMAC1_TRGMII) | MTK_TRGMII)
-
-#define MTK_GMAC1_SGMII \
- (MTK_PATH_BIT(MTK_ETH_PATH_GMAC1_SGMII) | MTK_SGMII)
-
-#define MTK_GMAC2_RGMII \
- (MTK_PATH_BIT(MTK_ETH_PATH_GMAC2_RGMII) | MTK_RGMII)
-
-#define MTK_GMAC2_SGMII \
- (MTK_PATH_BIT(MTK_ETH_PATH_GMAC2_SGMII) | MTK_SGMII)
-
-#define MTK_GMAC2_GEPHY \
- (MTK_PATH_BIT(MTK_ETH_PATH_GMAC2_GEPHY) | MTK_GEPHY)
-
-#define MTK_GDM1_ESW \
- (MTK_PATH_BIT(MTK_ETH_PATH_GDM1_ESW) | MTK_ESW)
-
-#define MTK_MUX_BIT(x) BIT((x) + 20)
+#define MTK_ETH_PATH_GMAC1_RGMII BIT(MTK_ETH_PATH_GMAC1_RGMII_BIT)
+#define MTK_ETH_PATH_GMAC1_TRGMII BIT(MTK_ETH_PATH_GMAC1_TRGMII_BIT)
+#define MTK_ETH_PATH_GMAC1_SGMII BIT(MTK_ETH_PATH_GMAC1_SGMII_BIT)
+#define MTK_ETH_PATH_GMAC2_RGMII BIT(MTK_ETH_PATH_GMAC2_RGMII_BIT)
+#define MTK_ETH_PATH_GMAC2_SGMII BIT(MTK_ETH_PATH_GMAC2_SGMII_BIT)
+#define MTK_ETH_PATH_GMAC2_GEPHY BIT(MTK_ETH_PATH_GMAC2_GEPHY_BIT)
+#define MTK_ETH_PATH_GDM1_ESW BIT(MTK_ETH_PATH_GDM1_ESW_BIT)
+
+#define MTK_GMAC1_RGMII (MTK_ETH_PATH_GMAC1_RGMII | MTK_RGMII)
+#define MTK_GMAC1_TRGMII (MTK_ETH_PATH_GMAC1_TRGMII | MTK_TRGMII)
+#define MTK_GMAC1_SGMII (MTK_ETH_PATH_GMAC1_SGMII | MTK_SGMII)
+#define MTK_GMAC2_RGMII (MTK_ETH_PATH_GMAC2_RGMII | MTK_RGMII)
+#define MTK_GMAC2_SGMII (MTK_ETH_PATH_GMAC2_SGMII | MTK_SGMII)
+#define MTK_GMAC2_GEPHY (MTK_ETH_PATH_GMAC2_GEPHY | MTK_GEPHY)
+#define MTK_GDM1_ESW (MTK_ETH_PATH_GDM1_ESW | MTK_ESW)
/* MUXes present on SoCs */
/* 0: GDM1 -> GMAC1, 1: GDM1 -> ESW */
-#define MTK_MUX_GDM1_TO_GMAC1_ESW \
- (MTK_MUX_BIT(MTK_ETH_MUX_GDM1_TO_GMAC1_ESW) | MTK_MUX)
+#define MTK_MUX_GDM1_TO_GMAC1_ESW (MTK_ETH_MUX_GDM1_TO_GMAC1_ESW | MTK_MUX)
/* 0: GMAC2 -> GEPHY, 1: GMAC0 -> GePHY */
#define MTK_MUX_GMAC2_GMAC0_TO_GEPHY \
- (MTK_MUX_BIT(MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY) | MTK_MUX | MTK_INFRA)
+ (MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY | MTK_MUX | MTK_INFRA)
/* 0: U3 -> QPHY, 1: GMAC2 -> QPHY */
#define MTK_MUX_U3_GMAC2_TO_QPHY \
- (MTK_MUX_BIT(MTK_ETH_MUX_U3_GMAC2_TO_QPHY) | MTK_MUX | MTK_INFRA)
+ (MTK_ETH_MUX_U3_GMAC2_TO_QPHY | MTK_MUX | MTK_INFRA)
/* 2: GMAC1 -> SGMII, 3: GMAC2 -> SGMII */
#define MTK_MUX_GMAC1_GMAC2_TO_SGMII_RGMII \
- (MTK_MUX_BIT(MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII) | MTK_MUX | \
+ (MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII | MTK_MUX | \
MTK_SHARED_SGMII)
/* 0: GMACx -> GEPHY, 1: GMACx -> SGMII where x is 1 or 2 */
#define MTK_MUX_GMAC12_TO_GEPHY_SGMII \
- (MTK_MUX_BIT(MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII) | MTK_MUX)
+ (MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII | MTK_MUX)
#define MTK_HAS_CAPS(caps, _x) (((caps) & (_x)) == (_x))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 2391e3cfb56b..37fef8cd25e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -34,6 +34,7 @@ config MLX5_CORE_EN
depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
depends on IPV6=y || IPV6=n || MLX5_CORE=m
select PAGE_POOL
+ select DIMLIB
default n
---help---
Ethernet support in Mellanox Technologies ConnectX-4 NIC.
@@ -96,26 +97,60 @@ config MLX5_CORE_IPOIB
---help---
MLX5 IPoIB offloads & acceleration support.
+config MLX5_FPGA_IPSEC
+ bool "Mellanox Technologies IPsec Innova support"
+ depends on MLX5_CORE
+ depends on MLX5_FPGA
+ default n
+ help
+ Build IPsec support for the Innova family of network cards by Mellanox
+ Technologies. Innova network cards are comprised of a ConnectX chip
+ and an FPGA chip on one board. If you select this option, the
+ mlx5_core driver will include the Innova FPGA core and allow building
+ sandbox-specific client drivers.
+
config MLX5_EN_IPSEC
bool "IPSec XFRM cryptography-offload accelaration"
- depends on MLX5_ACCEL
depends on MLX5_CORE_EN
depends on XFRM_OFFLOAD
depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
+ depends on MLX5_FPGA_IPSEC
default n
- ---help---
+ help
Build support for IPsec cryptography-offload accelaration in the NIC.
Note: Support for hardware with this capability needs to be selected
for this option to become available.
-config MLX5_EN_TLS
- bool "TLS cryptography-offload accelaration"
+config MLX5_FPGA_TLS
+ bool "Mellanox Technologies TLS Innova support"
+ depends on TLS_DEVICE
+ depends on TLS=y || MLX5_CORE=m
+ depends on MLX5_FPGA
+ default n
+ help
+ Build TLS support for the Innova family of network cards by Mellanox
+ Technologies. Innova network cards are comprised of a ConnectX chip
+ and an FPGA chip on one board. If you select this option, the
+ mlx5_core driver will include the Innova FPGA core and allow building
+ sandbox-specific client drivers.
+
+config MLX5_TLS
+ bool "Mellanox Technologies TLS Connect-X support"
depends on MLX5_CORE_EN
depends on TLS_DEVICE
depends on TLS=y || MLX5_CORE=m
- depends on MLX5_ACCEL
+ select MLX5_ACCEL
default n
- ---help---
- Build support for TLS cryptography-offload accelaration in the NIC.
- Note: Support for hardware with this capability needs to be selected
- for this option to become available.
+ help
+ Build TLS support for the Connect-X family of network cards by Mellanox
+ Technologies.
+
+config MLX5_EN_TLS
+ bool "TLS cryptography-offload accelaration"
+ depends on MLX5_CORE_EN
+ depends on MLX5_FPGA_TLS || MLX5_TLS
+ default y
+ help
+ Build support for TLS cryptography-offload accelaration in the NIC.
+ Note: Support for hardware with this capability needs to be selected
+ for this option to become available.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 5fe2bf916c06..57d2cc666fe3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
#
mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
- transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
+ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
lib/devcom.o lib/pci_vsc.o diag/fs_tracepoint.o \
diag/fw_tracer.o diag/crdump.o devlink.o
@@ -24,7 +24,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o \
- en/params.o
+ en/params.o en/xsk/umem.o en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o
#
# Netdev extra
@@ -53,12 +53,14 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib
#
# Accelerations & FPGA
#
-mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o accel/tls.o
+mlx5_core-$(CONFIG_MLX5_FPGA_IPSEC) += fpga/ipsec.o
+mlx5_core-$(CONFIG_MLX5_FPGA_TLS) += fpga/tls.o
+mlx5_core-$(CONFIG_MLX5_ACCEL) += lib/crypto.o accel/tls.o accel/ipsec.o
-mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
- fpga/ipsec.o fpga/tls.o
+mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o
mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
en_accel/ipsec_stats.o
-mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o
+mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o \
+ en_accel/ktls.o en_accel/ktls_tx.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
index 9f1b1939716a..eddc34e4a762 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
@@ -31,6 +31,8 @@
*
*/
+#ifdef CONFIG_MLX5_FPGA_IPSEC
+
#include <linux/mlx5/device.h>
#include "accel/ipsec.h"
@@ -74,6 +76,11 @@ int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
return mlx5_fpga_ipsec_init(mdev);
}
+void mlx5_accel_ipsec_build_fs_cmds(void)
+{
+ mlx5_fpga_ipsec_build_fs_cmds();
+}
+
void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
{
mlx5_fpga_ipsec_cleanup(mdev);
@@ -107,3 +114,5 @@ int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
return mlx5_fpga_esp_modify_xfrm(xfrm, attrs);
}
EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
index 024dbd22a89b..530e428d46ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
@@ -37,7 +37,7 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/accel.h>
-#ifdef CONFIG_MLX5_ACCEL
+#ifdef CONFIG_MLX5_FPGA_IPSEC
#define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \
MLX5_ACCEL_IPSEC_CAP_DEVICE)
@@ -54,6 +54,7 @@ void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
void mlx5_accel_esp_free_hw_context(void *context);
int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev);
+void mlx5_accel_ipsec_build_fs_cmds(void);
void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
#else
@@ -79,6 +80,10 @@ static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
return 0;
}
+static inline void mlx5_accel_ipsec_build_fs_cmds(void)
+{
+}
+
static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
{
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
index da7bd26368f9..cab708af3422 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
@@ -35,6 +35,9 @@
#include "accel/tls.h"
#include "mlx5_core.h"
+#include "lib/mlx5.h"
+
+#ifdef CONFIG_MLX5_FPGA_TLS
#include "fpga/tls.h"
int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
@@ -61,7 +64,8 @@ int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
{
- return mlx5_fpga_is_tls_device(mdev);
+ return mlx5_fpga_is_tls_device(mdev) ||
+ mlx5_accel_is_ktls_device(mdev);
}
u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev)
@@ -78,3 +82,42 @@ void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev)
{
mlx5_fpga_tls_cleanup(mdev);
}
+#endif
+
+#ifdef CONFIG_MLX5_TLS
+int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info,
+ u32 *p_key_id)
+{
+ u32 sz_bytes;
+ void *key;
+
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128: {
+ struct tls12_crypto_info_aes_gcm_128 *info =
+ (struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+
+ key = info->key;
+ sz_bytes = sizeof(info->key);
+ break;
+ }
+ case TLS_CIPHER_AES_GCM_256: {
+ struct tls12_crypto_info_aes_gcm_256 *info =
+ (struct tls12_crypto_info_aes_gcm_256 *)crypto_info;
+
+ key = info->key;
+ sz_bytes = sizeof(info->key);
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+
+ return mlx5_create_encryption_key(mdev, key, sz_bytes, p_key_id);
+}
+
+void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id)
+{
+ mlx5_destroy_encryption_key(mdev, key_id);
+}
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
index def4093ebfae..879321b21616 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
@@ -37,8 +37,51 @@
#include <linux/mlx5/driver.h>
#include <linux/tls.h>
-#ifdef CONFIG_MLX5_ACCEL
+#ifdef CONFIG_MLX5_TLS
+int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info,
+ u32 *p_key_id);
+void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id);
+static inline bool mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev)
+{
+ if (!MLX5_CAP_GEN(mdev, tls))
+ return false;
+
+ if (!MLX5_CAP_GEN(mdev, log_max_dek))
+ return false;
+
+ return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128);
+}
+
+static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info)
+{
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128:
+ if (crypto_info->version == TLS_1_2_VERSION)
+ return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128);
+ break;
+ }
+
+ return false;
+}
+#else
+static inline int
+mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info,
+ u32 *p_key_id) { return -ENOTSUPP; }
+static inline void
+mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) {}
+
+static inline bool
+mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) { return false; }
+static inline bool
+mlx5e_ktls_type_check(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info) { return false; }
+#endif
+
+#ifdef CONFIG_MLX5_FPGA_TLS
enum {
MLX5_ACCEL_TLS_TX = BIT(0),
MLX5_ACCEL_TLS_RX = BIT(1),
@@ -84,11 +127,13 @@ static inline void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
bool direction_sx) { }
static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle,
u32 seq, u64 rcd_sn) { return 0; }
-static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; }
+static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
+{
+ return mlx5_accel_is_ktls_device(mdev);
+}
static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; }
static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; }
static inline void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { }
-
#endif
#endif /* __MLX5_ACCEL_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 713a17ee3751..818edc63e428 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -58,7 +58,7 @@ void mlx5_cq_tasklet_cb(unsigned long data)
list_for_each_entry_safe(mcq, temp, &ctx->process_list,
tasklet_ctx.list) {
list_del_init(&mcq->tasklet_ctx.list);
- mcq->tasklet_ctx.comp(mcq);
+ mcq->tasklet_ctx.comp(mcq, NULL);
mlx5_cq_put(mcq);
if (time_after(jiffies, end))
break;
@@ -68,7 +68,8 @@ void mlx5_cq_tasklet_cb(unsigned long data)
tasklet_schedule(&ctx->task);
}
-static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
+static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq,
+ struct mlx5_eqe *eqe)
{
unsigned long flags;
struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv;
@@ -87,11 +88,10 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
}
int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
- u32 *in, int inlen)
+ u32 *in, int inlen, u32 *out, int outlen)
{
int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn);
u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
- u32 out[MLX5_ST_SZ_DW(create_cq_out)];
u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
struct mlx5_eq_comp *eq;
int err;
@@ -100,9 +100,9 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
if (IS_ERR(eq))
return PTR_ERR(eq);
- memset(out, 0, sizeof(out));
+ memset(out, 0, outlen);
MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ);
- err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
if (err)
return err;
@@ -158,13 +158,8 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
int err;
- err = mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq);
- if (err)
- return err;
-
- err = mlx5_eq_del_cq(&cq->eq->core, cq);
- if (err)
- return err;
+ mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq);
+ mlx5_eq_del_cq(&cq->eq->core, cq);
MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index f6b1da99e6c2..5bb6a26ea267 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -311,13 +311,20 @@ static u32 mlx5_gen_pci_id(struct mlx5_core_dev *dev)
/* Must be called with intf_mutex held */
struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
{
- u32 pci_id = mlx5_gen_pci_id(dev);
struct mlx5_core_dev *res = NULL;
struct mlx5_core_dev *tmp_dev;
struct mlx5_priv *priv;
+ u32 pci_id;
+ if (!mlx5_core_is_pf(dev))
+ return NULL;
+
+ pci_id = mlx5_gen_pci_id(dev);
list_for_each_entry(priv, &mlx5_dev_list, dev_list) {
tmp_dev = container_of(priv, struct mlx5_core_dev, priv);
+ if (!mlx5_core_is_pf(tmp_dev))
+ continue;
+
if ((dev != tmp_dev) && (mlx5_gen_pci_id(tmp_dev) == pci_id)) {
res = tmp_dev;
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 1533c657220b..a400f4430c28 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -25,6 +25,65 @@ static int mlx5_devlink_flash_update(struct devlink *devlink,
return mlx5_firmware_flash(dev, fw, extack);
}
+static u8 mlx5_fw_ver_major(u32 version)
+{
+ return (version >> 24) & 0xff;
+}
+
+static u8 mlx5_fw_ver_minor(u32 version)
+{
+ return (version >> 16) & 0xff;
+}
+
+static u16 mlx5_fw_ver_subminor(u32 version)
+{
+ return version & 0xffff;
+}
+
+#define DEVLINK_FW_STRING_LEN 32
+
+static int
+mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ char version_str[DEVLINK_FW_STRING_LEN];
+ u32 running_fw, stored_fw;
+ int err;
+
+ err = devlink_info_driver_name_put(req, DRIVER_NAME);
+ if (err)
+ return err;
+
+ err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id);
+ if (err)
+ return err;
+
+ err = mlx5_fw_version_query(dev, &running_fw, &stored_fw);
+ if (err)
+ return err;
+
+ snprintf(version_str, sizeof(version_str), "%d.%d.%04d",
+ mlx5_fw_ver_major(running_fw), mlx5_fw_ver_minor(running_fw),
+ mlx5_fw_ver_subminor(running_fw));
+ err = devlink_info_version_running_put(req, "fw.version", version_str);
+ if (err)
+ return err;
+
+ /* no pending version, return running (stored) version */
+ if (stored_fw == 0)
+ stored_fw = running_fw;
+
+ snprintf(version_str, sizeof(version_str), "%d.%d.%04d",
+ mlx5_fw_ver_major(stored_fw), mlx5_fw_ver_minor(stored_fw),
+ mlx5_fw_ver_subminor(stored_fw));
+ err = devlink_info_version_stored_put(req, "fw.version", version_str);
+ if (err)
+ return err;
+
+ return 0;
+}
+
static const struct devlink_ops mlx5_devlink_ops = {
#ifdef CONFIG_MLX5_ESWITCH
.eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
@@ -35,6 +94,7 @@ static const struct devlink_ops mlx5_devlink_ops = {
.eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get,
#endif
.flash_update = mlx5_devlink_flash_update,
+ .info_get = mlx5_devlink_info_get,
};
struct devlink *mlx5_devlink_alloc(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
index a4cf123e3f17..ddf1b87f1bc0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -187,6 +187,7 @@ TRACE_EVENT(mlx5_fs_set_fte,
__field(u32, index)
__field(u32, action)
__field(u32, flow_tag)
+ __field(u32, flow_source)
__field(u8, mask_enable)
__field(int, new_fte)
__array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
@@ -204,7 +205,8 @@ TRACE_EVENT(mlx5_fs_set_fte,
__entry->index = fte->index;
__entry->action = fte->action.action;
__entry->mask_enable = __entry->fg->mask.match_criteria_enable;
- __entry->flow_tag = fte->action.flow_tag;
+ __entry->flow_tag = fte->flow_context.flow_tag;
+ __entry->flow_source = fte->flow_context.flow_source;
memcpy(__entry->mask_outer,
MLX5_ADDR_OF(fte_match_param,
&__entry->fg->mask.match_criteria,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 992d5cb646b2..263558875f20 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -48,7 +48,7 @@
#include <linux/rhashtable.h>
#include <net/switchdev.h>
#include <net/xdp.h>
-#include <linux/net_dim.h>
+#include <linux/dim.h>
#include <linux/bits.h>
#include "wq.h"
#include "mlx5_core.h"
@@ -137,6 +137,7 @@ struct page_pool;
#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE >> 1)
#define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
#define MLX5E_TX_CQ_POLL_BUDGET 128
+#define MLX5E_TX_XSK_POLL_BUDGET 64
#define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */
#define MLX5E_UMR_WQE_INLINE_SZ \
@@ -155,6 +156,11 @@ do { \
##__VA_ARGS__); \
} while (0)
+enum mlx5e_rq_group {
+ MLX5E_RQ_GROUP_REGULAR,
+ MLX5E_RQ_GROUP_XSK,
+ MLX5E_NUM_RQ_GROUPS /* Keep last. */
+};
static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size)
{
@@ -179,7 +185,8 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
/* Use this function to get max num channels after netdev was created */
static inline int mlx5e_get_netdev_max_channels(struct net_device *netdev)
{
- return min_t(unsigned int, netdev->num_rx_queues,
+ return min_t(unsigned int,
+ netdev->num_rx_queues / MLX5E_NUM_RQ_GROUPS,
netdev->num_tx_queues);
}
@@ -202,7 +209,10 @@ struct mlx5e_umr_wqe {
struct mlx5_wqe_ctrl_seg ctrl;
struct mlx5_wqe_umr_ctrl_seg uctrl;
struct mlx5_mkey_seg mkc;
- struct mlx5_mtt inline_mtts[0];
+ union {
+ struct mlx5_mtt inline_mtts[0];
+ u8 tls_static_params_ctx[0];
+ };
};
extern const char mlx5e_self_tests[][ETH_GSTRING_LEN];
@@ -238,9 +248,9 @@ struct mlx5e_params {
u16 num_channels;
u8 num_tc;
bool rx_cqe_compress_def;
- struct net_dim_cq_moder rx_cq_moderation;
- struct net_dim_cq_moder tx_cq_moderation;
bool tunneled_offload_en;
+ struct dim_cq_moder rx_cq_moderation;
+ struct dim_cq_moder tx_cq_moderation;
bool lro_en;
u8 tx_min_inline_mode;
bool vlan_strip_disable;
@@ -250,6 +260,7 @@ struct mlx5e_params {
u32 lro_timeout;
u32 pflags;
struct bpf_prog *xdp_prog;
+ struct mlx5e_xsk *xsk;
unsigned int sw_mtu;
int hard_mtu;
};
@@ -325,6 +336,9 @@ struct mlx5e_tx_wqe_info {
u32 num_bytes;
u8 num_wqebbs;
u8 num_dma;
+#ifdef CONFIG_MLX5_EN_TLS
+ skb_frag_t *resync_dump_frag;
+#endif
};
enum mlx5e_dma_map_type {
@@ -348,6 +362,13 @@ enum {
struct mlx5e_sq_wqe_info {
u8 opcode;
+
+ /* Auxiliary data for different opcodes. */
+ union {
+ struct {
+ struct mlx5e_rq *rq;
+ } umr;
+ };
};
struct mlx5e_txqsq {
@@ -356,7 +377,7 @@ struct mlx5e_txqsq {
/* dirtied @completion */
u16 cc;
u32 dma_fifo_cc;
- struct net_dim dim; /* Adaptive Moderation */
+ struct dim dim; /* Adaptive Moderation */
/* dirtied @xmit */
u16 pc ____cacheline_aligned_in_smp;
@@ -375,6 +396,7 @@ struct mlx5e_txqsq {
void __iomem *uar_map;
struct netdev_queue *txq;
u32 sqn;
+ u16 stop_room;
u8 min_inline_mode;
struct device *pdev;
__be32 mkey_be;
@@ -392,14 +414,55 @@ struct mlx5e_txqsq {
} ____cacheline_aligned_in_smp;
struct mlx5e_dma_info {
- struct page *page;
- dma_addr_t addr;
+ dma_addr_t addr;
+ union {
+ struct page *page;
+ struct {
+ u64 handle;
+ void *data;
+ } xsk;
+ };
+};
+
+/* XDP packets can be transmitted in different ways. On completion, we need to
+ * distinguish between them to clean up things in a proper way.
+ */
+enum mlx5e_xdp_xmit_mode {
+ /* An xdp_frame was transmitted due to either XDP_REDIRECT from another
+ * device or XDP_TX from an XSK RQ. The frame has to be unmapped and
+ * returned.
+ */
+ MLX5E_XDP_XMIT_MODE_FRAME,
+
+ /* The xdp_frame was created in place as a result of XDP_TX from a
+ * regular RQ. No DMA remapping happened, and the page belongs to us.
+ */
+ MLX5E_XDP_XMIT_MODE_PAGE,
+
+ /* No xdp_frame was created at all, the transmit happened from a UMEM
+ * page. The UMEM Completion Ring producer pointer has to be increased.
+ */
+ MLX5E_XDP_XMIT_MODE_XSK,
};
struct mlx5e_xdp_info {
- struct xdp_frame *xdpf;
- dma_addr_t dma_addr;
- struct mlx5e_dma_info di;
+ enum mlx5e_xdp_xmit_mode mode;
+ union {
+ struct {
+ struct xdp_frame *xdpf;
+ dma_addr_t dma_addr;
+ } frame;
+ struct {
+ struct mlx5e_rq *rq;
+ struct mlx5e_dma_info di;
+ } page;
+ };
+};
+
+struct mlx5e_xdp_xmit_data {
+ dma_addr_t dma_addr;
+ void *data;
+ u32 len;
};
struct mlx5e_xdp_info_fifo {
@@ -425,8 +488,12 @@ struct mlx5e_xdp_mpwqe {
};
struct mlx5e_xdpsq;
-typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq*,
- struct mlx5e_xdp_info*);
+typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *);
+typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *,
+ struct mlx5e_xdp_xmit_data *,
+ struct mlx5e_xdp_info *,
+ int);
+
struct mlx5e_xdpsq {
/* data path */
@@ -443,8 +510,10 @@ struct mlx5e_xdpsq {
struct mlx5e_cq cq;
/* read only */
+ struct xdp_umem *umem;
struct mlx5_wq_cyc wq;
struct mlx5e_xdpsq_stats *stats;
+ mlx5e_fp_xmit_xdp_frame_check xmit_xdp_frame_check;
mlx5e_fp_xmit_xdp_frame xmit_xdp_frame;
struct {
struct mlx5e_xdp_wqe_info *wqe_info;
@@ -487,12 +556,6 @@ struct mlx5e_icosq {
struct mlx5e_channel *channel;
} ____cacheline_aligned_in_smp;
-static inline bool
-mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
-{
- return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc);
-}
-
struct mlx5e_wqe_frag_info {
struct mlx5e_dma_info *di;
u32 offset;
@@ -571,9 +634,11 @@ struct mlx5e_rq {
u8 log_stride_sz;
u8 umr_in_progress;
u8 umr_last_bulk;
+ u8 umr_completed;
} mpwqe;
};
struct {
+ u16 umem_headroom;
u16 headroom;
u8 map_dir; /* dma map direction */
} buff;
@@ -596,14 +661,18 @@ struct mlx5e_rq {
int ix;
unsigned int hw_mtu;
- struct net_dim dim; /* Dynamic Interrupt Moderation */
+ struct dim dim; /* Dynamic Interrupt Moderation */
/* XDP */
struct bpf_prog *xdp_prog;
- struct mlx5e_xdpsq xdpsq;
+ struct mlx5e_xdpsq *xdpsq;
DECLARE_BITMAP(flags, 8);
struct page_pool *page_pool;
+ /* AF_XDP zero-copy */
+ struct zero_copy_allocator zca;
+ struct xdp_umem *umem;
+
/* control */
struct mlx5_wq_ctrl wq_ctrl;
__be32 mkey_be;
@@ -616,9 +685,15 @@ struct mlx5e_rq {
struct xdp_rxq_info xdp_rxq;
} ____cacheline_aligned_in_smp;
+enum mlx5e_channel_state {
+ MLX5E_CHANNEL_STATE_XSK,
+ MLX5E_CHANNEL_NUM_STATES
+};
+
struct mlx5e_channel {
/* data path */
struct mlx5e_rq rq;
+ struct mlx5e_xdpsq rq_xdpsq;
struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC];
struct mlx5e_icosq icosq; /* internal control operations */
bool xdp;
@@ -631,6 +706,13 @@ struct mlx5e_channel {
/* XDP_REDIRECT */
struct mlx5e_xdpsq xdpsq;
+ /* AF_XDP zero-copy */
+ struct mlx5e_rq xskrq;
+ struct mlx5e_xdpsq xsksq;
+ struct mlx5e_icosq xskicosq;
+ /* xskicosq can be accessed from any CPU - the spinlock protects it. */
+ spinlock_t xskicosq_lock;
+
/* data path - accessed per napi poll */
struct irq_desc *irq_desc;
struct mlx5e_ch_stats *stats;
@@ -639,6 +721,7 @@ struct mlx5e_channel {
struct mlx5e_priv *priv;
struct mlx5_core_dev *mdev;
struct hwtstamp_config *tstamp;
+ DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
int ix;
int cpu;
cpumask_var_t xps_cpumask;
@@ -654,14 +737,17 @@ struct mlx5e_channel_stats {
struct mlx5e_ch_stats ch;
struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC];
struct mlx5e_rq_stats rq;
+ struct mlx5e_rq_stats xskrq;
struct mlx5e_xdpsq_stats rq_xdpsq;
struct mlx5e_xdpsq_stats xdpsq;
+ struct mlx5e_xdpsq_stats xsksq;
} ____cacheline_aligned_in_smp;
enum {
MLX5E_STATE_OPENED,
MLX5E_STATE_DESTROYING,
MLX5E_STATE_XDP_TX_ENABLED,
+ MLX5E_STATE_XDP_OPEN,
};
struct mlx5e_rqt {
@@ -694,6 +780,17 @@ struct mlx5e_modify_sq_param {
int rl_index;
};
+struct mlx5e_xsk {
+ /* UMEMs are stored separately from channels, because we don't want to
+ * lose them when channels are recreated. The kernel also stores UMEMs,
+ * but it doesn't distinguish between zero-copy and non-zero-copy UMEMs,
+ * so rely on our mechanism.
+ */
+ struct xdp_umem **umems;
+ u16 refcnt;
+ bool ever_used;
+};
+
struct mlx5e_priv {
/* priv data path fields - start */
struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC];
@@ -714,6 +811,7 @@ struct mlx5e_priv {
struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS];
struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS];
+ struct mlx5e_tir xsk_tir[MLX5E_MAX_NUM_CHANNELS];
struct mlx5e_rss_params rss_params;
u32 tx_rates[MLX5E_MAX_NUM_SQS];
@@ -750,6 +848,7 @@ struct mlx5e_priv {
struct mlx5e_tls *tls;
#endif
struct devlink_health_reporter *tx_reporter;
+ struct mlx5e_xsk xsk;
};
struct mlx5e_profile {
@@ -763,6 +862,7 @@ struct mlx5e_profile {
void (*cleanup_tx)(struct mlx5e_priv *priv);
void (*enable)(struct mlx5e_priv *priv);
void (*disable)(struct mlx5e_priv *priv);
+ int (*update_rx)(struct mlx5e_priv *priv);
void (*update_stats)(struct mlx5e_priv *priv);
void (*update_carrier)(struct mlx5e_priv *priv);
struct {
@@ -781,7 +881,7 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more);
void mlx5e_trigger_irq(struct mlx5e_icosq *sq);
-void mlx5e_completion_event(struct mlx5_core_cq *mcq);
+void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe);
void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
int mlx5e_napi_poll(struct napi_struct *napi, int budget);
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
@@ -793,11 +893,13 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
struct mlx5e_params *params);
void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info);
-void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
- bool recycle);
+void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info,
+ bool recycle);
void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
+void mlx5e_poll_ico_cq(struct mlx5e_cq *cq);
bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq);
void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
@@ -853,6 +955,30 @@ void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params,
void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen);
struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt);
+struct mlx5e_xsk_param;
+
+struct mlx5e_rq_param;
+int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
+ struct xdp_umem *umem, struct mlx5e_rq *rq);
+int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time);
+void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
+void mlx5e_close_rq(struct mlx5e_rq *rq);
+
+struct mlx5e_sq_param;
+int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_sq_param *param, struct mlx5e_icosq *sq);
+void mlx5e_close_icosq(struct mlx5e_icosq *sq);
+int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_sq_param *param, struct xdp_umem *umem,
+ struct mlx5e_xdpsq *sq, bool is_redirect);
+void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq);
+
+struct mlx5e_cq_param;
+int mlx5e_open_cq(struct mlx5e_channel *c, struct dim_cq_moder moder,
+ struct mlx5e_cq_param *param, struct mlx5e_cq *cq);
+void mlx5e_close_cq(struct mlx5e_cq *cq);
+
int mlx5e_open_locked(struct net_device *netdev);
int mlx5e_close_locked(struct net_device *netdev);
@@ -898,102 +1024,6 @@ static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev)
MLX5_CAP_ETH(mdev, swp_csum) && MLX5_CAP_ETH(mdev, swp_lso);
}
-struct mlx5e_swp_spec {
- __be16 l3_proto;
- u8 l4_proto;
- u8 is_tun;
- __be16 tun_l3_proto;
- u8 tun_l4_proto;
-};
-
-static inline void
-mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
- struct mlx5e_swp_spec *swp_spec)
-{
- /* SWP offsets are in 2-bytes words */
- eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
- if (swp_spec->l3_proto == htons(ETH_P_IPV6))
- eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
- if (swp_spec->l4_proto) {
- eseg->swp_outer_l4_offset = skb_transport_offset(skb) / 2;
- if (swp_spec->l4_proto == IPPROTO_UDP)
- eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP;
- }
-
- if (swp_spec->is_tun) {
- eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
- if (swp_spec->tun_l3_proto == htons(ETH_P_IPV6))
- eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
- } else { /* typically for ipsec when xfrm mode != XFRM_MODE_TUNNEL */
- eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2;
- if (swp_spec->l3_proto == htons(ETH_P_IPV6))
- eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
- }
- switch (swp_spec->tun_l4_proto) {
- case IPPROTO_UDP:
- eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
- /* fall through */
- case IPPROTO_TCP:
- eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
- break;
- }
-}
-
-static inline void mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq,
- struct mlx5e_tx_wqe **wqe,
- u16 *pi)
-{
- struct mlx5_wq_cyc *wq = &sq->wq;
-
- *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- *wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
- memset(*wqe, 0, sizeof(**wqe));
-}
-
-static inline
-struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
-{
- u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc);
- struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
- struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
-
- memset(cseg, 0, sizeof(*cseg));
-
- cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
- cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01);
-
- (*pc)++;
-
- return wqe;
-}
-
-static inline
-void mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc,
- void __iomem *uar_map,
- struct mlx5_wqe_ctrl_seg *ctrl)
-{
- ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
- /* ensure wqe is visible to device before updating doorbell record */
- dma_wmb();
-
- *wq->db = cpu_to_be32(pc);
-
- /* ensure doorbell record is visible to device before ringing the
- * doorbell
- */
- wmb();
-
- mlx5_write64((__be32 *)ctrl, uar_map);
-}
-
-static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
-{
- struct mlx5_core_cq *mcq;
-
- mcq = &cq->mcq;
- mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc);
-}
-
extern const struct ethtool_ops mlx5e_ethtool_ops;
#ifdef CONFIG_MLX5_CORE_EN_DCB
extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops;
@@ -1023,17 +1053,17 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv);
int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv);
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv);
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv);
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
+void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
-int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc,
- u32 underlay_qpn, u32 *tisn);
+int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn);
void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
int mlx5e_create_tises(struct mlx5e_priv *priv);
+int mlx5e_update_nic_rx(struct mlx5e_priv *priv);
void mlx5e_update_carrier(struct mlx5e_priv *priv);
int mlx5e_close(struct net_device *netdev);
int mlx5e_open(struct net_device *netdev);
@@ -1095,6 +1125,7 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv);
void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_xsk *xsk,
struct mlx5e_rss_params *rss_params,
struct mlx5e_params *params,
u16 max_channels, u16 mtu);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index d3744bffbae3..79301d116667 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -3,65 +3,102 @@
#include "en/params.h"
-u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params)
+static inline bool mlx5e_rx_is_xdp(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
- u16 linear_rq_headroom = params->xdp_prog ?
- XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
- u32 frag_sz;
+ return params->xdp_prog || xsk;
+}
+
+u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ u16 headroom = NET_IP_ALIGN;
+
+ if (mlx5e_rx_is_xdp(params, xsk)) {
+ headroom += XDP_PACKET_HEADROOM;
+ if (xsk)
+ headroom += xsk->headroom;
+ } else {
+ headroom += MLX5_RX_HEADROOM;
+ }
+
+ return headroom;
+}
+
+u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk);
+ u32 frag_sz = linear_rq_headroom + hw_mtu;
- linear_rq_headroom += NET_IP_ALIGN;
+ /* AF_XDP doesn't build SKBs in place. */
+ if (!xsk)
+ frag_sz = MLX5_SKB_FRAG_SZ(frag_sz);
- frag_sz = MLX5_SKB_FRAG_SZ(linear_rq_headroom + hw_mtu);
+ /* XDP in mlx5e doesn't support multiple packets per page. */
+ if (mlx5e_rx_is_xdp(params, xsk))
+ frag_sz = max_t(u32, frag_sz, PAGE_SIZE);
- if (params->xdp_prog && frag_sz < PAGE_SIZE)
- frag_sz = PAGE_SIZE;
+ /* Even if we can go with a smaller fragment size, we must not put
+ * multiple packets into a single frame.
+ */
+ if (xsk)
+ frag_sz = max_t(u32, frag_sz, xsk->chunk_size);
return frag_sz;
}
-u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params)
+u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+ u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk);
return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
}
-bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params)
+bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+ /* AF_XDP allocates SKBs on XDP_PASS - ensure they don't occupy more
+ * than one page. For this, check both with and without xsk.
+ */
+ u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk),
+ mlx5e_rx_get_linear_frag_sz(params, NULL));
- return !params->lro_en && frag_sz <= PAGE_SIZE;
+ return !params->lro_en && linear_frag_sz <= PAGE_SIZE;
}
#define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \
MLX5_MPWQE_LOG_STRIDE_SZ_BASE)
bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+ u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk);
s8 signed_log_num_strides_param;
u8 log_num_strides;
- if (!mlx5e_rx_is_linear_skb(params))
+ if (!mlx5e_rx_is_linear_skb(params, xsk))
return false;
- if (order_base_2(frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ)
+ if (order_base_2(linear_frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ)
return false;
if (MLX5_CAP_GEN(mdev, ext_stride_num_range))
return true;
- log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(frag_sz);
+ log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
signed_log_num_strides_param =
(s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
return signed_log_num_strides_param >= 0;
}
-u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params)
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params);
+ u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params, xsk);
/* Numbers are unsigned, don't subtract to avoid underflow. */
if (params->log_rq_mtu_frames <
@@ -72,33 +109,30 @@ u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params)
}
u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
- return order_base_2(mlx5e_rx_get_linear_frag_sz(params));
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+ return order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk));
return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
}
u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
return MLX5_MPWRQ_LOG_WQE_SZ -
- mlx5e_mpwqe_get_log_stride_size(mdev, params);
+ mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
}
u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u16 linear_rq_headroom = params->xdp_prog ?
- XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
- bool is_linear_skb;
-
- linear_rq_headroom += NET_IP_ALIGN;
-
- is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
- mlx5e_rx_is_linear_skb(params) :
- mlx5e_rx_mpwqe_is_linear_skb(mdev, params);
+ bool is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
+ mlx5e_rx_is_linear_skb(params, xsk) :
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
- return is_linear_skb ? linear_rq_headroom : 0;
+ return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index b106a0236f36..bd882b5ee9a7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -6,17 +6,119 @@
#include "en.h"
-u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params);
-u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params);
-bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params);
+struct mlx5e_xsk_param {
+ u16 headroom;
+ u16 chunk_size;
+};
+
+struct mlx5e_rq_param {
+ u32 rqc[MLX5_ST_SZ_DW(rqc)];
+ struct mlx5_wq_param wq;
+ struct mlx5e_rq_frags_info frags_info;
+};
+
+struct mlx5e_sq_param {
+ u32 sqc[MLX5_ST_SZ_DW(sqc)];
+ struct mlx5_wq_param wq;
+ bool is_mpw;
+};
+
+struct mlx5e_cq_param {
+ u32 cqc[MLX5_ST_SZ_DW(cqc)];
+ struct mlx5_wq_param wq;
+ u16 eq_ix;
+ u8 cq_period_mode;
+};
+
+struct mlx5e_channel_param {
+ struct mlx5e_rq_param rq;
+ struct mlx5e_sq_param sq;
+ struct mlx5e_sq_param xdp_sq;
+ struct mlx5e_sq_param icosq;
+ struct mlx5e_cq_param rx_cq;
+ struct mlx5e_cq_param tx_cq;
+ struct mlx5e_cq_param icosq_cq;
+};
+
+static inline bool mlx5e_qid_get_ch_if_in_group(struct mlx5e_params *params,
+ u16 qid,
+ enum mlx5e_rq_group group,
+ u16 *ix)
+{
+ int nch = params->num_channels;
+ int ch = qid - nch * group;
+
+ if (ch < 0 || ch >= nch)
+ return false;
+
+ *ix = ch;
+ return true;
+}
+
+static inline void mlx5e_qid_get_ch_and_group(struct mlx5e_params *params,
+ u16 qid,
+ u16 *ix,
+ enum mlx5e_rq_group *group)
+{
+ u16 nch = params->num_channels;
+
+ *ix = qid % nch;
+ *group = qid / nch;
+}
+
+static inline bool mlx5e_qid_validate(struct mlx5e_params *params, u64 qid)
+{
+ return qid < params->num_channels * MLX5E_NUM_RQ_GROUPS;
+}
+
+/* Parameter calculations */
+
+u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params);
-u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params);
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params);
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params);
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params);
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+
+/* Build queue parameters */
+
+void mlx5e_build_rq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_param *param);
+void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
+ struct mlx5e_sq_param *param);
+void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_cq_param *param);
+void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_cq_param *param);
+void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
+ u8 log_wq_size,
+ struct mlx5e_cq_param *param);
+void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
+ u8 log_wq_size,
+ struct mlx5e_sq_param *param);
+void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param);
#endif /* __MLX5_EN_PARAMS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index f5ad531e1749..3739646b653f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -5,6 +5,7 @@
#include <net/gre.h>
#include <net/geneve.h>
#include "en/tc_tun.h"
+#include "en_tc.h"
struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
{
@@ -47,7 +48,8 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv,
*route_dev = dev;
if (is_vlan_dev(*route_dev))
*out_dev = uplink_dev;
- else if (mlx5e_eswitch_rep(dev))
+ else if (mlx5e_eswitch_rep(dev) &&
+ mlx5e_is_valid_eswitch_fwd_dev(priv, dev))
*out_dev = *route_dev;
else
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
new file mode 100644
index 000000000000..ddfe19adb3d9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TXRX_H___
+#define __MLX5_EN_TXRX_H___
+
+#include "en.h"
+
+#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS
+#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
+ MLX5E_SQ_NOPS_ROOM)
+
+#ifndef CONFIG_MLX5_EN_TLS
+#define MLX5E_SQ_TLS_ROOM (0)
+#else
+/* TLS offload requires additional stop_room for:
+ * - a resync SKB.
+ * kTLS offload requires additional stop_room for:
+ * - static params WQE,
+ * - progress params WQE, and
+ * - resync DUMP per frag.
+ */
+#define MLX5E_SQ_TLS_ROOM \
+ (MLX5_SEND_WQE_MAX_WQEBBS + \
+ MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS + \
+ MAX_SKB_FRAGS * MLX5E_KTLS_MAX_DUMP_WQEBBS)
+#endif
+
+#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
+
+static inline bool
+mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
+{
+ return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc);
+}
+
+static inline void *
+mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq, size_t size, u16 *pi)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ void *wqe;
+
+ *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
+ memset(wqe, 0, size);
+
+ return wqe;
+}
+
+static inline struct mlx5e_tx_wqe *
+mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
+{
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc);
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+
+ memset(cseg, 0, sizeof(*cseg));
+
+ cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
+ cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01);
+
+ (*pc)++;
+
+ return wqe;
+}
+
+static inline struct mlx5e_tx_wqe *
+mlx5e_post_nop_fence(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
+{
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc);
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+
+ memset(cseg, 0, sizeof(*cseg));
+
+ cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
+ cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01);
+ cseg->fm_ce_se = MLX5_FENCE_MODE_INITIATOR_SMALL;
+
+ (*pc)++;
+
+ return wqe;
+}
+
+static inline void
+mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq,
+ u16 pi, u16 nnops)
+{
+ struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
+
+ edge_wi = wi + nnops;
+
+ /* fill sq frag edge with nops to avoid wqe wrapping two pages */
+ for (; wi < edge_wi; wi++) {
+ wi->skb = NULL;
+ wi->num_wqebbs = 1;
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+ sq->stats->nop += nnops;
+}
+
+static inline void
+mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
+ struct mlx5_wqe_ctrl_seg *ctrl)
+{
+ ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+ /* ensure wqe is visible to device before updating doorbell record */
+ dma_wmb();
+
+ *wq->db = cpu_to_be32(pc);
+
+ /* ensure doorbell record is visible to device before ringing the
+ * doorbell
+ */
+ wmb();
+
+ mlx5_write64((__be32 *)ctrl, uar_map);
+}
+
+static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5e_tx_wqe *wqe)
+{
+ return !!wqe->ctrl.tisn;
+}
+
+static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
+{
+ struct mlx5_core_cq *mcq;
+
+ mcq = &cq->mcq;
+ mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc);
+}
+
+static inline struct mlx5e_sq_dma *
+mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i)
+{
+ return &sq->db.dma_fifo[i & sq->dma_fifo_mask];
+}
+
+static inline void
+mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size,
+ enum mlx5e_dma_map_type map_type)
+{
+ struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++);
+
+ dma->addr = addr;
+ dma->size = size;
+ dma->type = map_type;
+}
+
+static inline void
+mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
+{
+ switch (dma->type) {
+ case MLX5E_DMA_MAP_SINGLE:
+ dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+ break;
+ case MLX5E_DMA_MAP_PAGE:
+ dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+ break;
+ default:
+ WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n");
+ }
+}
+
+/* SW parser related functions */
+
+struct mlx5e_swp_spec {
+ __be16 l3_proto;
+ u8 l4_proto;
+ u8 is_tun;
+ __be16 tun_l3_proto;
+ u8 tun_l4_proto;
+};
+
+static inline void
+mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
+ struct mlx5e_swp_spec *swp_spec)
+{
+ /* SWP offsets are in 2-bytes words */
+ eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
+ if (swp_spec->l3_proto == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
+ if (swp_spec->l4_proto) {
+ eseg->swp_outer_l4_offset = skb_transport_offset(skb) / 2;
+ if (swp_spec->l4_proto == IPPROTO_UDP)
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP;
+ }
+
+ if (swp_spec->is_tun) {
+ eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+ if (swp_spec->tun_l3_proto == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+ } else { /* typically for ipsec when xfrm mode != XFRM_MODE_TUNNEL */
+ eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2;
+ if (swp_spec->l3_proto == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+ }
+ switch (swp_spec->tun_l4_proto) {
+ case IPPROTO_UDP:
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+ /* fall through */
+ case IPPROTO_TCP:
+ eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+ break;
+ }
+}
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index eb8ef78e5626..b0b982cf69bb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -31,11 +31,13 @@
*/
#include <linux/bpf_trace.h>
+#include <net/xdp_sock.h>
#include "en/xdp.h"
+#include "en/params.h"
-int mlx5e_xdp_max_mtu(struct mlx5e_params *params)
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk)
{
- int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM;
+ int hr = mlx5e_get_linear_rq_headroom(params, xsk);
/* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
* The condition checked in mlx5e_rx_is_linear_skb is:
@@ -54,25 +56,70 @@ int mlx5e_xdp_max_mtu(struct mlx5e_params *params)
}
static inline bool
-mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
- struct xdp_buff *xdp)
+mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *di, struct xdp_buff *xdp)
{
+ struct mlx5e_xdp_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
+ struct xdp_frame *xdpf;
+ dma_addr_t dma_addr;
- xdpi.xdpf = convert_to_xdp_frame(xdp);
- if (unlikely(!xdpi.xdpf))
+ xdpf = convert_to_xdp_frame(xdp);
+ if (unlikely(!xdpf))
return false;
- xdpi.dma_addr = di->addr + (xdpi.xdpf->data - (void *)xdpi.xdpf);
- dma_sync_single_for_device(sq->pdev, xdpi.dma_addr,
- xdpi.xdpf->len, PCI_DMA_TODEVICE);
- xdpi.di = *di;
- return sq->xmit_xdp_frame(sq, &xdpi);
+ xdptxd.data = xdpf->data;
+ xdptxd.len = xdpf->len;
+
+ if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) {
+ /* The xdp_buff was in the UMEM and was copied into a newly
+ * allocated page. The UMEM page was returned via the ZCA, and
+ * this new page has to be mapped at this point and has to be
+ * unmapped and returned via xdp_return_frame on completion.
+ */
+
+ /* Prevent double recycling of the UMEM page. Even in case this
+ * function returns false, the xdp_buff shouldn't be recycled,
+ * as it was already done in xdp_convert_zc_to_xdp_frame.
+ */
+ __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
+
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
+
+ dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(sq->pdev, dma_addr)) {
+ xdp_return_frame(xdpf);
+ return false;
+ }
+
+ xdptxd.dma_addr = dma_addr;
+ xdpi.frame.xdpf = xdpf;
+ xdpi.frame.dma_addr = dma_addr;
+ } else {
+ /* Driver assumes that convert_to_xdp_frame returns an xdp_frame
+ * that points to the same memory region as the original
+ * xdp_buff. It allows to map the memory only once and to use
+ * the DMA_BIDIRECTIONAL mode.
+ */
+
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE;
+
+ dma_addr = di->addr + (xdpf->data - (void *)xdpf);
+ dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len,
+ DMA_TO_DEVICE);
+
+ xdptxd.dma_addr = dma_addr;
+ xdpi.page.rq = rq;
+ xdpi.page.di = *di;
+ }
+
+ return sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0);
}
/* returns true if packet was consumed by xdp */
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
- void *va, u16 *rx_headroom, u32 *len)
+ void *va, u16 *rx_headroom, u32 *len, bool xsk)
{
struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
struct xdp_buff xdp;
@@ -86,16 +133,20 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + *len;
xdp.data_hard_start = va;
+ if (xsk)
+ xdp.handle = di->xsk.handle;
xdp.rxq = &rq->xdp_rxq;
act = bpf_prog_run_xdp(prog, &xdp);
+ if (xsk)
+ xdp.handle += xdp.data - xdp.data_hard_start;
switch (act) {
case XDP_PASS:
*rx_headroom = xdp.data - xdp.data_hard_start;
*len = xdp.data_end - xdp.data;
return false;
case XDP_TX:
- if (unlikely(!mlx5e_xmit_xdp_buff(&rq->xdpsq, di, &xdp)))
+ if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, &xdp)))
goto xdp_abort;
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
return true;
@@ -106,7 +157,8 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
goto xdp_abort;
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
- mlx5e_page_dma_unmap(rq, di);
+ if (!xsk)
+ mlx5e_page_dma_unmap(rq, di);
rq->stats->xdp_redirect++;
return true;
default:
@@ -160,7 +212,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
stats->mpwqe++;
}
-static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
+void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
{
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
@@ -183,32 +235,55 @@ static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
session->wqe = NULL; /* Close session */
}
+enum {
+ MLX5E_XDP_CHECK_OK = 1,
+ MLX5E_XDP_CHECK_START_MPWQE = 2,
+};
+
+static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
+{
+ if (unlikely(!sq->mpwqe.wqe)) {
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
+ MLX5_SEND_WQE_MAX_WQEBBS))) {
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
+ sq->stats->full++;
+ return -EBUSY;
+ }
+
+ return MLX5E_XDP_CHECK_START_MPWQE;
+ }
+
+ return MLX5E_XDP_CHECK_OK;
+}
+
static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
- struct mlx5e_xdp_info *xdpi)
+ struct mlx5e_xdp_xmit_data *xdptxd,
+ struct mlx5e_xdp_info *xdpi,
+ int check_result)
{
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats;
- struct xdp_frame *xdpf = xdpi->xdpf;
-
- if (unlikely(sq->hw_mtu < xdpf->len)) {
+ if (unlikely(xdptxd->len > sq->hw_mtu)) {
stats->err++;
return false;
}
- if (unlikely(!session->wqe)) {
- if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
- MLX5_SEND_WQE_MAX_WQEBBS))) {
- /* SQ is full, ring doorbell */
- mlx5e_xmit_xdp_doorbell(sq);
- stats->full++;
- return false;
- }
+ if (!check_result)
+ check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq);
+ if (unlikely(check_result < 0))
+ return false;
+ if (check_result == MLX5E_XDP_CHECK_START_MPWQE) {
+ /* Start the session when nothing can fail, so it's guaranteed
+ * that if there is an active session, it has at least one dseg,
+ * and it's safe to complete it at any time.
+ */
mlx5e_xdp_mpwqe_session_start(sq);
}
- mlx5e_xdp_mpwqe_add_dseg(sq, xdpi, stats);
+ mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
if (unlikely(session->complete ||
session->ds_count == session->max_ds_count))
@@ -219,7 +294,22 @@ static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
return true;
}
-static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
+static int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
+{
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) {
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
+ sq->stats->full++;
+ return -EBUSY;
+ }
+
+ return MLX5E_XDP_CHECK_OK;
+}
+
+static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xdp_xmit_data *xdptxd,
+ struct mlx5e_xdp_info *xdpi,
+ int check_result)
{
struct mlx5_wq_cyc *wq = &sq->wq;
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
@@ -229,9 +319,8 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *
struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
struct mlx5_wqe_data_seg *dseg = wqe->data;
- struct xdp_frame *xdpf = xdpi->xdpf;
- dma_addr_t dma_addr = xdpi->dma_addr;
- unsigned int dma_len = xdpf->len;
+ dma_addr_t dma_addr = xdptxd->dma_addr;
+ u32 dma_len = xdptxd->len;
struct mlx5e_xdpsq_stats *stats = sq->stats;
@@ -242,18 +331,16 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *
return false;
}
- if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) {
- /* SQ is full, ring doorbell */
- mlx5e_xmit_xdp_doorbell(sq);
- stats->full++;
+ if (!check_result)
+ check_result = mlx5e_xmit_xdp_frame_check(sq);
+ if (unlikely(check_result < 0))
return false;
- }
cseg->fm_ce_se = 0;
/* copy the inline part if required */
if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
- memcpy(eseg->inline_hdr.start, xdpf->data, MLX5E_XDP_MIN_INLINE);
+ memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE);
eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
dma_len -= MLX5E_XDP_MIN_INLINE;
dma_addr += MLX5E_XDP_MIN_INLINE;
@@ -277,7 +364,7 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *
static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
struct mlx5e_xdp_wqe_info *wi,
- struct mlx5e_rq *rq,
+ u32 *xsk_frames,
bool recycle)
{
struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
@@ -286,22 +373,32 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
for (i = 0; i < wi->num_pkts; i++) {
struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
- if (rq) {
- /* XDP_TX */
- mlx5e_page_release(rq, &xdpi.di, recycle);
- } else {
- /* XDP_REDIRECT */
- dma_unmap_single(sq->pdev, xdpi.dma_addr,
- xdpi.xdpf->len, DMA_TO_DEVICE);
- xdp_return_frame(xdpi.xdpf);
+ switch (xdpi.mode) {
+ case MLX5E_XDP_XMIT_MODE_FRAME:
+ /* XDP_TX from the XSK RQ and XDP_REDIRECT */
+ dma_unmap_single(sq->pdev, xdpi.frame.dma_addr,
+ xdpi.frame.xdpf->len, DMA_TO_DEVICE);
+ xdp_return_frame(xdpi.frame.xdpf);
+ break;
+ case MLX5E_XDP_XMIT_MODE_PAGE:
+ /* XDP_TX from the regular RQ */
+ mlx5e_page_release_dynamic(xdpi.page.rq, &xdpi.page.di, recycle);
+ break;
+ case MLX5E_XDP_XMIT_MODE_XSK:
+ /* AF_XDP send */
+ (*xsk_frames)++;
+ break;
+ default:
+ WARN_ON_ONCE(true);
}
}
}
-bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
{
struct mlx5e_xdpsq *sq;
struct mlx5_cqe64 *cqe;
+ u32 xsk_frames = 0;
u16 sqcc;
int i;
@@ -343,10 +440,13 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
sqcc += wi->num_wqebbs;
- mlx5e_free_xdpsq_desc(sq, wi, rq, true);
+ mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true);
} while (!last_wqe);
} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+ if (xsk_frames)
+ xsk_umem_complete_tx(sq->umem, xsk_frames);
+
sq->stats->cqes += i;
mlx5_cqwq_update_db_record(&cq->wq);
@@ -358,8 +458,10 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
return (i == MLX5E_TX_CQ_POLL_BUDGET);
}
-void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
{
+ u32 xsk_frames = 0;
+
while (sq->cc != sq->pc) {
struct mlx5e_xdp_wqe_info *wi;
u16 ci;
@@ -369,8 +471,11 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
sq->cc += wi->num_wqebbs;
- mlx5e_free_xdpsq_desc(sq, wi, rq, false);
+ mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false);
}
+
+ if (xsk_frames)
+ xsk_umem_complete_tx(sq->umem, xsk_frames);
}
int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
@@ -398,21 +503,27 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
+ struct mlx5e_xdp_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
- xdpi.dma_addr = dma_map_single(sq->pdev, xdpf->data, xdpf->len,
- DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(sq->pdev, xdpi.dma_addr))) {
+ xdptxd.data = xdpf->data;
+ xdptxd.len = xdpf->len;
+ xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data,
+ xdptxd.len, DMA_TO_DEVICE);
+
+ if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) {
xdp_return_frame_rx_napi(xdpf);
drops++;
continue;
}
- xdpi.xdpf = xdpf;
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
+ xdpi.frame.xdpf = xdpf;
+ xdpi.frame.dma_addr = xdptxd.dma_addr;
- if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) {
- dma_unmap_single(sq->pdev, xdpi.dma_addr,
- xdpf->len, DMA_TO_DEVICE);
+ if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0))) {
+ dma_unmap_single(sq->pdev, xdptxd.dma_addr,
+ xdptxd.len, DMA_TO_DEVICE);
xdp_return_frame_rx_napi(xdpf);
drops++;
}
@@ -429,7 +540,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
{
- struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
+ struct mlx5e_xdpsq *xdpsq = rq->xdpsq;
if (xdpsq->mpwqe.wqe)
mlx5e_xdp_mpwqe_complete(xdpsq);
@@ -444,6 +555,8 @@ void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
{
+ sq->xmit_xdp_frame_check = is_mpw ?
+ mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check;
sq->xmit_xdp_frame = is_mpw ?
mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index 8b537a4b0840..b90923932668 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -33,17 +33,20 @@
#define __MLX5_EN_XDP_H__
#include "en.h"
+#include "en/txrx.h"
#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
#define MLX5E_XDP_TX_EMPTY_DS_COUNT \
(sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
-int mlx5e_xdp_max_mtu(struct mlx5e_params *params);
+struct mlx5e_xsk_param;
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
- void *va, u16 *rx_headroom, u32 *len);
-bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq);
-void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq);
+ void *va, u16 *rx_headroom, u32 *len, bool xsk);
+void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw);
void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq);
int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
@@ -66,6 +69,21 @@ static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)
return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
}
+static inline void mlx5e_xdp_set_open(struct mlx5e_priv *priv)
+{
+ set_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
+}
+
+static inline void mlx5e_xdp_set_closed(struct mlx5e_priv *priv)
+{
+ clear_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
+}
+
+static inline bool mlx5e_xdp_is_open(struct mlx5e_priv *priv)
+{
+ return test_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
+}
+
static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
{
if (sq->doorbell_cseg) {
@@ -97,15 +115,14 @@ static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq)
}
static inline void
-mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi,
+mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xdp_xmit_data *xdptxd,
struct mlx5e_xdpsq_stats *stats)
{
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
- dma_addr_t dma_addr = xdpi->dma_addr;
- struct xdp_frame *xdpf = xdpi->xdpf;
struct mlx5_wqe_data_seg *dseg =
(struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
- u16 dma_len = xdpf->len;
+ u32 dma_len = xdptxd->len;
session->pkt_count++;
@@ -124,7 +141,7 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi,
}
inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG);
- memcpy(inline_dseg->data, xdpf->data, dma_len);
+ memcpy(inline_dseg->data, xdptxd->data, dma_len);
session->ds_count += ds_cnt;
stats->inlnw++;
@@ -132,7 +149,7 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi,
}
no_inline:
- dseg->addr = cpu_to_be64(dma_addr);
+ dseg->addr = cpu_to_be64(xdptxd->dma_addr);
dseg->byte_count = cpu_to_be32(dma_len);
dseg->lkey = sq->mkey_be;
session->ds_count++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile
new file mode 100644
index 000000000000..5ee42991900a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/../..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
new file mode 100644
index 000000000000..6a55573ec8f2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "rx.h"
+#include "en/xdp.h"
+#include <net/xdp_sock.h>
+
+/* RX data path */
+
+bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count)
+{
+ /* Check in advance that we have enough frames, instead of allocating
+ * one-by-one, failing and moving frames to the Reuse Ring.
+ */
+ return xsk_umem_has_addrs_rq(rq->umem, count);
+}
+
+int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
+{
+ struct xdp_umem *umem = rq->umem;
+ u64 handle;
+
+ if (!xsk_umem_peek_addr_rq(umem, &handle))
+ return -ENOMEM;
+
+ dma_info->xsk.handle = handle + rq->buff.umem_headroom;
+ dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle);
+
+ /* No need to add headroom to the DMA address. In striding RQ case, we
+ * just provide pages for UMR, and headroom is counted at the setup
+ * stage when creating a WQE. In non-striding RQ case, headroom is
+ * accounted in mlx5e_alloc_rx_wqe.
+ */
+ dma_info->addr = xdp_umem_get_dma(umem, handle);
+
+ xsk_umem_discard_addr_rq(umem);
+
+ dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+
+ return 0;
+}
+
+static inline void mlx5e_xsk_recycle_frame(struct mlx5e_rq *rq, u64 handle)
+{
+ xsk_umem_fq_reuse(rq->umem, handle & rq->umem->chunk_mask);
+}
+
+/* XSKRQ uses pages from UMEM, they must not be released. They are returned to
+ * the userspace if possible, and if not, this function is called to reuse them
+ * in the driver.
+ */
+void mlx5e_xsk_page_release(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
+{
+ mlx5e_xsk_recycle_frame(rq, dma_info->xsk.handle);
+}
+
+/* Return a frame back to the hardware to fill in again. It is used by XDP when
+ * the XDP program returns XDP_TX or XDP_REDIRECT not to an XSKMAP.
+ */
+void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle)
+{
+ struct mlx5e_rq *rq = container_of(zca, struct mlx5e_rq, zca);
+
+ mlx5e_xsk_recycle_frame(rq, handle);
+}
+
+static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, void *data,
+ u32 cqe_bcnt)
+{
+ struct sk_buff *skb;
+
+ skb = napi_alloc_skb(rq->cq.napi, cqe_bcnt);
+ if (unlikely(!skb)) {
+ rq->stats->buff_alloc_err++;
+ return NULL;
+ }
+
+ skb_put_data(skb, data, cqe_bcnt);
+
+ return skb;
+}
+
+struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
+ struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt,
+ u32 head_offset,
+ u32 page_idx)
+{
+ struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
+ u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom;
+ u32 cqe_bcnt32 = cqe_bcnt;
+ void *va, *data;
+ u32 frag_size;
+ bool consumed;
+
+ /* Check packet size. Note LRO doesn't use linear SKB */
+ if (unlikely(cqe_bcnt > rq->hw_mtu)) {
+ rq->stats->oversize_pkts_sw_drop++;
+ return NULL;
+ }
+
+ /* head_offset is not used in this function, because di->xsk.data and
+ * di->addr point directly to the necessary place. Furthermore, in the
+ * current implementation, one page = one packet = one frame, so
+ * head_offset should always be 0.
+ */
+ WARN_ON_ONCE(head_offset);
+
+ va = di->xsk.data;
+ data = va + rx_headroom;
+ frag_size = rq->buff.headroom + cqe_bcnt32;
+
+ dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL);
+ prefetch(data);
+
+ rcu_read_lock();
+ consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, true);
+ rcu_read_unlock();
+
+ /* Possible flows:
+ * - XDP_REDIRECT to XSKMAP:
+ * The page is owned by the userspace from now.
+ * - XDP_TX and other XDP_REDIRECTs:
+ * The page was returned by ZCA and recycled.
+ * - XDP_DROP:
+ * Recycle the page.
+ * - XDP_PASS:
+ * Allocate an SKB, copy the data and recycle the page.
+ *
+ * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its
+ * size is the same as the Driver RX Ring's size, and pages for WQEs are
+ * allocated first from the Reuse Ring, so it has enough space.
+ */
+
+ if (likely(consumed)) {
+ if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
+ __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
+ return NULL; /* page/packet was consumed by XDP */
+ }
+
+ /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
+ * frame. On SKB allocation failure, NULL is returned.
+ */
+ return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt32);
+}
+
+struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
+ struct mlx5_cqe64 *cqe,
+ struct mlx5e_wqe_frag_info *wi,
+ u32 cqe_bcnt)
+{
+ struct mlx5e_dma_info *di = wi->di;
+ u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom;
+ void *va, *data;
+ bool consumed;
+ u32 frag_size;
+
+ /* wi->offset is not used in this function, because di->xsk.data and
+ * di->addr point directly to the necessary place. Furthermore, in the
+ * current implementation, one page = one packet = one frame, so
+ * wi->offset should always be 0.
+ */
+ WARN_ON_ONCE(wi->offset);
+
+ va = di->xsk.data;
+ data = va + rx_headroom;
+ frag_size = rq->buff.headroom + cqe_bcnt;
+
+ dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL);
+ prefetch(data);
+
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
+ rq->stats->wqe_err++;
+ return NULL;
+ }
+
+ rcu_read_lock();
+ consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, true);
+ rcu_read_unlock();
+
+ if (likely(consumed))
+ return NULL; /* page/packet was consumed by XDP */
+
+ /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
+ * will be handled by mlx5e_put_rx_frag.
+ * On SKB allocation failure, NULL is returned.
+ */
+ return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
new file mode 100644
index 000000000000..307b923a1361
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_RX_H__
+#define __MLX5_EN_XSK_RX_H__
+
+#include "en.h"
+
+/* RX data path */
+
+bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count);
+int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info);
+void mlx5e_xsk_page_release(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info);
+void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle);
+struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
+ struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt,
+ u32 head_offset,
+ u32 page_idx);
+struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
+ struct mlx5_cqe64 *cqe,
+ struct mlx5e_wqe_frag_info *wi,
+ u32 cqe_bcnt);
+
+#endif /* __MLX5_EN_XSK_RX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
new file mode 100644
index 000000000000..aaffa6f68dc0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "setup.h"
+#include "en/params.h"
+
+bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5_core_dev *mdev)
+{
+ /* AF_XDP doesn't support frames larger than PAGE_SIZE, and the current
+ * mlx5e XDP implementation doesn't support multiple packets per page.
+ */
+ if (xsk->chunk_size != PAGE_SIZE)
+ return false;
+
+ /* Current MTU and XSK headroom don't allow packets to fit the frames. */
+ if (mlx5e_rx_get_linear_frag_sz(params, xsk) > xsk->chunk_size)
+ return false;
+
+ /* frag_sz is different for regular and XSK RQs, so ensure that linear
+ * SKB mode is possible.
+ */
+ switch (params->rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ return mlx5e_rx_is_linear_skb(params, xsk);
+ }
+}
+
+static void mlx5e_build_xskicosq_param(struct mlx5e_priv *priv,
+ u8 log_wq_size,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ mlx5e_build_sq_param_common(priv, param);
+
+ MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
+}
+
+static void mlx5e_build_xsk_cparam(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_channel_param *cparam)
+{
+ const u8 xskicosq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+
+ mlx5e_build_rq_param(priv, params, xsk, &cparam->rq);
+ mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
+ mlx5e_build_xskicosq_param(priv, xskicosq_size, &cparam->icosq);
+ mlx5e_build_rx_cq_param(priv, params, xsk, &cparam->rx_cq);
+ mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq);
+ mlx5e_build_ico_cq_param(priv, xskicosq_size, &cparam->icosq_cq);
+}
+
+int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
+ struct mlx5e_channel *c)
+{
+ struct mlx5e_channel_param cparam = {};
+ struct dim_cq_moder icocq_moder = {};
+ int err;
+
+ if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev))
+ return -EINVAL;
+
+ mlx5e_build_xsk_cparam(priv, params, xsk, &cparam);
+
+ err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam.rx_cq, &c->xskrq.cq);
+ if (unlikely(err))
+ return err;
+
+ err = mlx5e_open_rq(c, params, &cparam.rq, xsk, umem, &c->xskrq);
+ if (unlikely(err))
+ goto err_close_rx_cq;
+
+ err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam.tx_cq, &c->xsksq.cq);
+ if (unlikely(err))
+ goto err_close_rq;
+
+ /* Create a separate SQ, so that when the UMEM is disabled, we could
+ * close this SQ safely and stop receiving CQEs. In other case, e.g., if
+ * the XDPSQ was used instead, we might run into trouble when the UMEM
+ * is disabled and then reenabled, but the SQ continues receiving CQEs
+ * from the old UMEM.
+ */
+ err = mlx5e_open_xdpsq(c, params, &cparam.xdp_sq, umem, &c->xsksq, true);
+ if (unlikely(err))
+ goto err_close_tx_cq;
+
+ err = mlx5e_open_cq(c, icocq_moder, &cparam.icosq_cq, &c->xskicosq.cq);
+ if (unlikely(err))
+ goto err_close_sq;
+
+ /* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be
+ * triggered and NAPI to be called on the correct CPU.
+ */
+ err = mlx5e_open_icosq(c, params, &cparam.icosq, &c->xskicosq);
+ if (unlikely(err))
+ goto err_close_icocq;
+
+ spin_lock_init(&c->xskicosq_lock);
+
+ set_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+
+ return 0;
+
+err_close_icocq:
+ mlx5e_close_cq(&c->xskicosq.cq);
+
+err_close_sq:
+ mlx5e_close_xdpsq(&c->xsksq);
+
+err_close_tx_cq:
+ mlx5e_close_cq(&c->xsksq.cq);
+
+err_close_rq:
+ mlx5e_close_rq(&c->xskrq);
+
+err_close_rx_cq:
+ mlx5e_close_cq(&c->xskrq.cq);
+
+ return err;
+}
+
+void mlx5e_close_xsk(struct mlx5e_channel *c)
+{
+ clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+ napi_synchronize(&c->napi);
+
+ mlx5e_close_rq(&c->xskrq);
+ mlx5e_close_cq(&c->xskrq.cq);
+ mlx5e_close_icosq(&c->xskicosq);
+ mlx5e_close_cq(&c->xskicosq.cq);
+ mlx5e_close_xdpsq(&c->xsksq);
+ mlx5e_close_cq(&c->xsksq.cq);
+}
+
+void mlx5e_activate_xsk(struct mlx5e_channel *c)
+{
+ set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
+ /* TX queue is created active. */
+ mlx5e_trigger_irq(&c->xskicosq);
+}
+
+void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
+{
+ mlx5e_deactivate_rq(&c->xskrq);
+ /* TX queue is disabled on close. */
+}
+
+static int mlx5e_redirect_xsk_rqt(struct mlx5e_priv *priv, u16 ix, u32 rqn)
+{
+ struct mlx5e_redirect_rqt_param direct_rrp = {
+ .is_rss = false,
+ {
+ .rqn = rqn,
+ },
+ };
+
+ u32 rqtn = priv->xsk_tir[ix].rqt.rqtn;
+
+ return mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp);
+}
+
+int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c)
+{
+ return mlx5e_redirect_xsk_rqt(priv, c->ix, c->xskrq.rqn);
+}
+
+int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix)
+{
+ return mlx5e_redirect_xsk_rqt(priv, ix, priv->drop_rq.rqn);
+}
+
+int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+ int err, i;
+
+ if (!priv->xsk.refcnt)
+ return 0;
+
+ for (i = 0; i < chs->num; i++) {
+ struct mlx5e_channel *c = chs->c[i];
+
+ if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+ continue;
+
+ err = mlx5e_xsk_redirect_rqt_to_channel(priv, c);
+ if (unlikely(err))
+ goto err_stop;
+ }
+
+ return 0;
+
+err_stop:
+ for (i--; i >= 0; i--) {
+ if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
+ continue;
+
+ mlx5e_xsk_redirect_rqt_to_drop(priv, i);
+ }
+
+ return err;
+}
+
+void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+ int i;
+
+ if (!priv->xsk.refcnt)
+ return;
+
+ for (i = 0; i < chs->num; i++) {
+ if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
+ continue;
+
+ mlx5e_xsk_redirect_rqt_to_drop(priv, i);
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
new file mode 100644
index 000000000000..0dd11b81c046
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_SETUP_H__
+#define __MLX5_EN_XSK_SETUP_H__
+
+#include "en.h"
+
+struct mlx5e_xsk_param;
+
+bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5_core_dev *mdev);
+int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
+ struct mlx5e_channel *c);
+void mlx5e_close_xsk(struct mlx5e_channel *c);
+void mlx5e_activate_xsk(struct mlx5e_channel *c);
+void mlx5e_deactivate_xsk(struct mlx5e_channel *c);
+int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c);
+int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix);
+int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+
+#endif /* __MLX5_EN_XSK_SETUP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
new file mode 100644
index 000000000000..35e188cf4ea4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "tx.h"
+#include "umem.h"
+#include "en/xdp.h"
+#include "en/params.h"
+#include <net/xdp_sock.h>
+
+int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_params *params = &priv->channels.params;
+ struct mlx5e_channel *c;
+ u16 ix;
+
+ if (unlikely(!mlx5e_xdp_is_open(priv)))
+ return -ENETDOWN;
+
+ if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
+ return -EINVAL;
+
+ c = priv->channels.c[ix];
+
+ if (unlikely(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)))
+ return -ENXIO;
+
+ if (!napi_if_scheduled_mark_missed(&c->napi)) {
+ spin_lock(&c->xskicosq_lock);
+ mlx5e_trigger_irq(&c->xskicosq);
+ spin_unlock(&c->xskicosq_lock);
+ }
+
+ return 0;
+}
+
+/* When TX fails (because of the size of the packet), we need to get completions
+ * in order, so post a NOP to get a CQE. Since AF_XDP doesn't distinguish
+ * between successful TX and errors, handling in mlx5e_poll_xdpsq_cq is the
+ * same.
+ */
+static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xdp_info *xdpi)
+{
+ u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+ struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
+ struct mlx5e_tx_wqe *nopwqe;
+
+ wi->num_wqebbs = 1;
+ wi->num_pkts = 1;
+
+ nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
+ sq->doorbell_cseg = &nopwqe->ctrl;
+}
+
+bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
+{
+ struct xdp_umem *umem = sq->umem;
+ struct mlx5e_xdp_info xdpi;
+ struct mlx5e_xdp_xmit_data xdptxd;
+ bool work_done = true;
+ bool flush = false;
+
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_XSK;
+
+ for (; budget; budget--) {
+ int check_result = sq->xmit_xdp_frame_check(sq);
+ struct xdp_desc desc;
+
+ if (unlikely(check_result < 0)) {
+ work_done = false;
+ break;
+ }
+
+ if (!xsk_umem_consume_tx(umem, &desc)) {
+ /* TX will get stuck until something wakes it up by
+ * triggering NAPI. Currently it's expected that the
+ * application calls sendto() if there are consumed, but
+ * not completed frames.
+ */
+ break;
+ }
+
+ xdptxd.dma_addr = xdp_umem_get_dma(umem, desc.addr);
+ xdptxd.data = xdp_umem_get_data(umem, desc.addr);
+ xdptxd.len = desc.len;
+
+ dma_sync_single_for_device(sq->pdev, xdptxd.dma_addr,
+ xdptxd.len, DMA_BIDIRECTIONAL);
+
+ if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, check_result))) {
+ if (sq->mpwqe.wqe)
+ mlx5e_xdp_mpwqe_complete(sq);
+
+ mlx5e_xsk_tx_post_err(sq, &xdpi);
+ }
+
+ flush = true;
+ }
+
+ if (flush) {
+ if (sq->mpwqe.wqe)
+ mlx5e_xdp_mpwqe_complete(sq);
+ mlx5e_xmit_xdp_doorbell(sq);
+
+ xsk_umem_consume_tx_done(umem);
+ }
+
+ return !(budget && work_done);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
new file mode 100644
index 000000000000..7add18bf78d8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_TX_H__
+#define __MLX5_EN_XSK_TX_H__
+
+#include "en.h"
+
+/* TX data path */
+
+int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid);
+
+bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget);
+
+#endif /* __MLX5_EN_XSK_TX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
new file mode 100644
index 000000000000..4baaa5788320
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <net/xdp_sock.h>
+#include "umem.h"
+#include "setup.h"
+#include "en/params.h"
+
+static int mlx5e_xsk_map_umem(struct mlx5e_priv *priv,
+ struct xdp_umem *umem)
+{
+ struct device *dev = priv->mdev->device;
+ u32 i;
+
+ for (i = 0; i < umem->npgs; i++) {
+ dma_addr_t dma = dma_map_page(dev, umem->pgs[i], 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+
+ if (unlikely(dma_mapping_error(dev, dma)))
+ goto err_unmap;
+ umem->pages[i].dma = dma;
+ }
+
+ return 0;
+
+err_unmap:
+ while (i--) {
+ dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ umem->pages[i].dma = 0;
+ }
+
+ return -ENOMEM;
+}
+
+static void mlx5e_xsk_unmap_umem(struct mlx5e_priv *priv,
+ struct xdp_umem *umem)
+{
+ struct device *dev = priv->mdev->device;
+ u32 i;
+
+ for (i = 0; i < umem->npgs; i++) {
+ dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ umem->pages[i].dma = 0;
+ }
+}
+
+static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk)
+{
+ if (!xsk->umems) {
+ xsk->umems = kcalloc(MLX5E_MAX_NUM_CHANNELS,
+ sizeof(*xsk->umems), GFP_KERNEL);
+ if (unlikely(!xsk->umems))
+ return -ENOMEM;
+ }
+
+ xsk->refcnt++;
+ xsk->ever_used = true;
+
+ return 0;
+}
+
+static void mlx5e_xsk_put_umems(struct mlx5e_xsk *xsk)
+{
+ if (!--xsk->refcnt) {
+ kfree(xsk->umems);
+ xsk->umems = NULL;
+ }
+}
+
+static int mlx5e_xsk_add_umem(struct mlx5e_xsk *xsk, struct xdp_umem *umem, u16 ix)
+{
+ int err;
+
+ err = mlx5e_xsk_get_umems(xsk);
+ if (unlikely(err))
+ return err;
+
+ xsk->umems[ix] = umem;
+ return 0;
+}
+
+static void mlx5e_xsk_remove_umem(struct mlx5e_xsk *xsk, u16 ix)
+{
+ xsk->umems[ix] = NULL;
+
+ mlx5e_xsk_put_umems(xsk);
+}
+
+static bool mlx5e_xsk_is_umem_sane(struct xdp_umem *umem)
+{
+ return umem->headroom <= 0xffff && umem->chunk_size_nohr <= 0xffff;
+}
+
+void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk)
+{
+ xsk->headroom = umem->headroom;
+ xsk->chunk_size = umem->chunk_size_nohr + umem->headroom;
+}
+
+static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
+ struct xdp_umem *umem, u16 ix)
+{
+ struct mlx5e_params *params = &priv->channels.params;
+ struct mlx5e_xsk_param xsk;
+ struct mlx5e_channel *c;
+ int err;
+
+ if (unlikely(mlx5e_xsk_get_umem(&priv->channels.params, &priv->xsk, ix)))
+ return -EBUSY;
+
+ if (unlikely(!mlx5e_xsk_is_umem_sane(umem)))
+ return -EINVAL;
+
+ err = mlx5e_xsk_map_umem(priv, umem);
+ if (unlikely(err))
+ return err;
+
+ err = mlx5e_xsk_add_umem(&priv->xsk, umem, ix);
+ if (unlikely(err))
+ goto err_unmap_umem;
+
+ mlx5e_build_xsk_param(umem, &xsk);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ /* XSK objects will be created on open. */
+ goto validate_closed;
+ }
+
+ if (!params->xdp_prog) {
+ /* XSK objects will be created when an XDP program is set,
+ * and the channels are reopened.
+ */
+ goto validate_closed;
+ }
+
+ c = priv->channels.c[ix];
+
+ err = mlx5e_open_xsk(priv, params, &xsk, umem, c);
+ if (unlikely(err))
+ goto err_remove_umem;
+
+ mlx5e_activate_xsk(c);
+
+ /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide
+ * any Fill Ring entries at the setup stage.
+ */
+
+ err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]);
+ if (unlikely(err))
+ goto err_deactivate;
+
+ return 0;
+
+err_deactivate:
+ mlx5e_deactivate_xsk(c);
+ mlx5e_close_xsk(c);
+
+err_remove_umem:
+ mlx5e_xsk_remove_umem(&priv->xsk, ix);
+
+err_unmap_umem:
+ mlx5e_xsk_unmap_umem(priv, umem);
+
+ return err;
+
+validate_closed:
+ /* Check the configuration in advance, rather than fail at a later stage
+ * (in mlx5e_xdp_set or on open) and end up with no channels.
+ */
+ if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) {
+ err = -EINVAL;
+ goto err_remove_umem;
+ }
+
+ return 0;
+}
+
+static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
+{
+ struct xdp_umem *umem = mlx5e_xsk_get_umem(&priv->channels.params,
+ &priv->xsk, ix);
+ struct mlx5e_channel *c;
+
+ if (unlikely(!umem))
+ return -EINVAL;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto remove_umem;
+
+ /* XSK RQ and SQ are only created if XDP program is set. */
+ if (!priv->channels.params.xdp_prog)
+ goto remove_umem;
+
+ c = priv->channels.c[ix];
+ mlx5e_xsk_redirect_rqt_to_drop(priv, ix);
+ mlx5e_deactivate_xsk(c);
+ mlx5e_close_xsk(c);
+
+remove_umem:
+ mlx5e_xsk_remove_umem(&priv->xsk, ix);
+ mlx5e_xsk_unmap_umem(priv, umem);
+
+ return 0;
+}
+
+static int mlx5e_xsk_enable_umem(struct mlx5e_priv *priv, struct xdp_umem *umem,
+ u16 ix)
+{
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_xsk_enable_locked(priv, umem, ix);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_xsk_disable_umem(struct mlx5e_priv *priv, u16 ix)
+{
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_xsk_disable_locked(priv, ix);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_params *params = &priv->channels.params;
+ u16 ix;
+
+ if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
+ return -EINVAL;
+
+ return umem ? mlx5e_xsk_enable_umem(priv, umem, ix) :
+ mlx5e_xsk_disable_umem(priv, ix);
+}
+
+int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries)
+{
+ struct xdp_umem_fq_reuse *reuseq;
+
+ reuseq = xsk_reuseq_prepare(nentries);
+ if (unlikely(!reuseq))
+ return -ENOMEM;
+ xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
+
+ return 0;
+}
+
+u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk)
+{
+ u16 res = xsk->refcnt ? params->num_channels : 0;
+
+ while (res) {
+ if (mlx5e_xsk_get_umem(params, xsk, res - 1))
+ break;
+ --res;
+ }
+
+ return res;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h
new file mode 100644
index 000000000000..25b4cbe58b54
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_UMEM_H__
+#define __MLX5_EN_XSK_UMEM_H__
+
+#include "en.h"
+
+static inline struct xdp_umem *mlx5e_xsk_get_umem(struct mlx5e_params *params,
+ struct mlx5e_xsk *xsk, u16 ix)
+{
+ if (!xsk || !xsk->umems)
+ return NULL;
+
+ if (unlikely(ix >= params->num_channels))
+ return NULL;
+
+ return xsk->umems[ix];
+}
+
+struct mlx5e_xsk_param;
+void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk);
+
+/* .ndo_bpf callback. */
+int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid);
+
+int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries);
+
+u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk);
+
+#endif /* __MLX5_EN_XSK_UMEM_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index 6da7c88742dc..3022463f2284 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -39,6 +39,7 @@
#include "en_accel/ipsec_rxtx.h"
#include "en_accel/tls_rxtx.h"
#include "en.h"
+#include "en/txrx.h"
#if IS_ENABLED(CONFIG_GENEVE)
static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index ca47c0540904..db84500b024f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -39,6 +39,7 @@
#include <linux/skbuff.h>
#include <net/xfrm.h>
#include "en.h"
+#include "en/txrx.h"
struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
struct sk_buff *skb, u32 *cqe_bcnt);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
new file mode 100644
index 000000000000..d2ff74d52720
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include "en.h"
+#include "en_accel/ktls.h"
+
+static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn)
+{
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
+ void *tisc;
+
+ tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+ MLX5_SET(tisc, tisc, tls_en, 1);
+
+ return mlx5e_create_tis(mdev, in, tisn);
+}
+
+static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk,
+ enum tls_offload_ctx_dir direction,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_ktls_offload_context_tx *tx_priv;
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ if (WARN_ON(direction != TLS_OFFLOAD_CTX_DIR_TX))
+ return -EINVAL;
+
+ if (WARN_ON(!mlx5e_ktls_type_check(mdev, crypto_info)))
+ return -EOPNOTSUPP;
+
+ tx_priv = kvzalloc(sizeof(*tx_priv), GFP_KERNEL);
+ if (!tx_priv)
+ return -ENOMEM;
+
+ tx_priv->expected_seq = start_offload_tcp_sn;
+ tx_priv->crypto_info = crypto_info;
+ mlx5e_set_ktls_tx_priv_ctx(tls_ctx, tx_priv);
+
+ /* tc and underlay_qpn values are not in use for tls tis */
+ err = mlx5e_ktls_create_tis(mdev, &tx_priv->tisn);
+ if (err)
+ goto create_tis_fail;
+
+ err = mlx5_ktls_create_key(mdev, crypto_info, &tx_priv->key_id);
+ if (err)
+ goto encryption_key_create_fail;
+
+ mlx5e_ktls_tx_offload_set_pending(tx_priv);
+
+ return 0;
+
+encryption_key_create_fail:
+ mlx5e_destroy_tis(priv->mdev, tx_priv->tisn);
+create_tis_fail:
+ kvfree(tx_priv);
+ return err;
+}
+
+static void mlx5e_ktls_del(struct net_device *netdev,
+ struct tls_context *tls_ctx,
+ enum tls_offload_ctx_dir direction)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_ktls_offload_context_tx *tx_priv =
+ mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
+
+ mlx5_ktls_destroy_key(priv->mdev, tx_priv->key_id);
+ mlx5e_destroy_tis(priv->mdev, tx_priv->tisn);
+ kvfree(tx_priv);
+}
+
+static const struct tlsdev_ops mlx5e_ktls_ops = {
+ .tls_dev_add = mlx5e_ktls_add,
+ .tls_dev_del = mlx5e_ktls_del,
+};
+
+void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
+{
+ struct net_device *netdev = priv->netdev;
+
+ if (!mlx5_accel_is_ktls_device(priv->mdev))
+ return;
+
+ netdev->hw_features |= NETIF_F_HW_TLS_TX;
+ netdev->features |= NETIF_F_HW_TLS_TX;
+
+ netdev->tlsdev_ops = &mlx5e_ktls_ops;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
new file mode 100644
index 000000000000..407da83474ef
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5E_KTLS_H__
+#define __MLX5E_KTLS_H__
+
+#include "en.h"
+
+#ifdef CONFIG_MLX5_EN_TLS
+#include <net/tls.h>
+#include "accel/tls.h"
+
+#define MLX5E_KTLS_STATIC_UMR_WQE_SZ \
+ (sizeof(struct mlx5e_umr_wqe) + MLX5_ST_SZ_BYTES(tls_static_params))
+#define MLX5E_KTLS_STATIC_WQEBBS \
+ (DIV_ROUND_UP(MLX5E_KTLS_STATIC_UMR_WQE_SZ, MLX5_SEND_WQE_BB))
+
+#define MLX5E_KTLS_PROGRESS_WQE_SZ \
+ (sizeof(struct mlx5e_tx_wqe) + MLX5_ST_SZ_BYTES(tls_progress_params))
+#define MLX5E_KTLS_PROGRESS_WQEBBS \
+ (DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_BB))
+#define MLX5E_KTLS_MAX_DUMP_WQEBBS 2
+
+enum {
+ MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD = 0,
+ MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_OFFLOAD = 1,
+ MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_AUTHENTICATION = 2,
+};
+
+enum {
+ MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START = 0,
+ MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING = 1,
+ MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING = 2,
+};
+
+struct mlx5e_ktls_offload_context_tx {
+ struct tls_offload_context_tx *tx_ctx;
+ struct tls_crypto_info *crypto_info;
+ u32 expected_seq;
+ u32 tisn;
+ u32 key_id;
+ bool ctx_post_pending;
+};
+
+struct mlx5e_ktls_offload_context_tx_shadow {
+ struct tls_offload_context_tx tx_ctx;
+ struct mlx5e_ktls_offload_context_tx *priv_tx;
+};
+
+static inline void
+mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx,
+ struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+ struct tls_offload_context_tx *tx_ctx = tls_offload_ctx_tx(tls_ctx);
+ struct mlx5e_ktls_offload_context_tx_shadow *shadow;
+
+ BUILD_BUG_ON(sizeof(*shadow) > TLS_OFFLOAD_CONTEXT_SIZE_TX);
+
+ shadow = (struct mlx5e_ktls_offload_context_tx_shadow *)tx_ctx;
+
+ shadow->priv_tx = priv_tx;
+ priv_tx->tx_ctx = tx_ctx;
+}
+
+static inline struct mlx5e_ktls_offload_context_tx *
+mlx5e_get_ktls_tx_priv_ctx(struct tls_context *tls_ctx)
+{
+ struct tls_offload_context_tx *tx_ctx = tls_offload_ctx_tx(tls_ctx);
+ struct mlx5e_ktls_offload_context_tx_shadow *shadow;
+
+ BUILD_BUG_ON(sizeof(*shadow) > TLS_OFFLOAD_CONTEXT_SIZE_TX);
+
+ shadow = (struct mlx5e_ktls_offload_context_tx_shadow *)tx_ctx;
+
+ return shadow->priv_tx;
+}
+
+void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv);
+void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx);
+
+struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
+ struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
+ struct mlx5e_tx_wqe **wqe, u16 *pi);
+void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe_info *wi,
+ struct mlx5e_sq_dma *dma);
+
+#else
+
+static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
+{
+}
+
+#endif
+
+#endif /* __MLX5E_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
new file mode 100644
index 000000000000..3f5f4317a22b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include <linux/tls.h>
+#include "en.h"
+#include "en/txrx.h"
+#include "en_accel/ktls.h"
+
+enum {
+ MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2 = 0x2,
+};
+
+enum {
+ MLX5E_ENCRYPTION_STANDARD_TLS = 0x1,
+};
+
+#define EXTRACT_INFO_FIELDS do { \
+ salt = info->salt; \
+ rec_seq = info->rec_seq; \
+ salt_sz = sizeof(info->salt); \
+ rec_seq_sz = sizeof(info->rec_seq); \
+} while (0)
+
+static void
+fill_static_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+ struct tls_crypto_info *crypto_info = priv_tx->crypto_info;
+ char *initial_rn, *gcm_iv;
+ u16 salt_sz, rec_seq_sz;
+ char *salt, *rec_seq;
+ u8 tls_version;
+
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128: {
+ struct tls12_crypto_info_aes_gcm_128 *info =
+ (struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+
+ EXTRACT_INFO_FIELDS;
+ break;
+ }
+ default:
+ WARN_ON(1);
+ return;
+ }
+
+ gcm_iv = MLX5_ADDR_OF(tls_static_params, ctx, gcm_iv);
+ initial_rn = MLX5_ADDR_OF(tls_static_params, ctx, initial_record_number);
+
+ memcpy(gcm_iv, salt, salt_sz);
+ memcpy(initial_rn, rec_seq, rec_seq_sz);
+
+ tls_version = MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2;
+
+ MLX5_SET(tls_static_params, ctx, tls_version, tls_version);
+ MLX5_SET(tls_static_params, ctx, const_1, 1);
+ MLX5_SET(tls_static_params, ctx, const_2, 2);
+ MLX5_SET(tls_static_params, ctx, encryption_standard,
+ MLX5E_ENCRYPTION_STANDARD_TLS);
+ MLX5_SET(tls_static_params, ctx, dek_index, priv_tx->key_id);
+}
+
+static void
+build_static_params(struct mlx5e_umr_wqe *wqe, u16 pc, u32 sqn,
+ struct mlx5e_ktls_offload_context_tx *priv_tx,
+ bool fence)
+{
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+ struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
+
+#define STATIC_PARAMS_DS_CNT \
+ DIV_ROUND_UP(MLX5E_KTLS_STATIC_UMR_WQE_SZ, MLX5_SEND_WQE_DS)
+
+ cseg->opmod_idx_opcode = cpu_to_be32((pc << 8) | MLX5_OPCODE_UMR |
+ (MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS << 24));
+ cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
+ STATIC_PARAMS_DS_CNT);
+ cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
+ cseg->imm = cpu_to_be32(priv_tx->tisn);
+
+ ucseg->flags = MLX5_UMR_INLINE;
+ ucseg->bsf_octowords = cpu_to_be16(MLX5_ST_SZ_BYTES(tls_static_params) / 16);
+
+ fill_static_params_ctx(wqe->tls_static_params_ctx, priv_tx);
+}
+
+static void
+fill_progress_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+ MLX5_SET(tls_progress_params, ctx, pd, priv_tx->tisn);
+ MLX5_SET(tls_progress_params, ctx, record_tracker_state,
+ MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START);
+ MLX5_SET(tls_progress_params, ctx, auth_state,
+ MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD);
+}
+
+static void
+build_progress_params(struct mlx5e_tx_wqe *wqe, u16 pc, u32 sqn,
+ struct mlx5e_ktls_offload_context_tx *priv_tx,
+ bool fence)
+{
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+
+#define PROGRESS_PARAMS_DS_CNT \
+ DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_DS)
+
+ cseg->opmod_idx_opcode =
+ cpu_to_be32((pc << 8) | MLX5_OPCODE_SET_PSV |
+ (MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS << 24));
+ cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
+ PROGRESS_PARAMS_DS_CNT);
+ cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
+
+ fill_progress_params_ctx(wqe->data, priv_tx);
+}
+
+static void tx_fill_wi(struct mlx5e_txqsq *sq,
+ u16 pi, u8 num_wqebbs,
+ skb_frag_t *resync_dump_frag)
+{
+ struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi];
+
+ wi->skb = NULL;
+ wi->num_wqebbs = num_wqebbs;
+ wi->resync_dump_frag = resync_dump_frag;
+}
+
+void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+ priv_tx->ctx_post_pending = true;
+}
+
+static bool
+mlx5e_ktls_tx_offload_test_and_clear_pending(struct mlx5e_ktls_offload_context_tx *priv_tx)
+{
+ bool ret = priv_tx->ctx_post_pending;
+
+ priv_tx->ctx_post_pending = false;
+
+ return ret;
+}
+
+static void
+post_static_params(struct mlx5e_txqsq *sq,
+ struct mlx5e_ktls_offload_context_tx *priv_tx,
+ bool fence)
+{
+ struct mlx5e_umr_wqe *umr_wqe;
+ u16 pi;
+
+ umr_wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_STATIC_UMR_WQE_SZ, &pi);
+ build_static_params(umr_wqe, sq->pc, sq->sqn, priv_tx, fence);
+ tx_fill_wi(sq, pi, MLX5E_KTLS_STATIC_WQEBBS, NULL);
+ sq->pc += MLX5E_KTLS_STATIC_WQEBBS;
+}
+
+static void
+post_progress_params(struct mlx5e_txqsq *sq,
+ struct mlx5e_ktls_offload_context_tx *priv_tx,
+ bool fence)
+{
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi;
+
+ wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_PROGRESS_WQE_SZ, &pi);
+ build_progress_params(wqe, sq->pc, sq->sqn, priv_tx, fence);
+ tx_fill_wi(sq, pi, MLX5E_KTLS_PROGRESS_WQEBBS, NULL);
+ sq->pc += MLX5E_KTLS_PROGRESS_WQEBBS;
+}
+
+static void
+mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq,
+ struct mlx5e_ktls_offload_context_tx *priv_tx,
+ bool skip_static_post, bool fence_first_post)
+{
+ bool progress_fence = skip_static_post || !fence_first_post;
+
+ if (!skip_static_post)
+ post_static_params(sq, priv_tx, fence_first_post);
+
+ post_progress_params(sq, priv_tx, progress_fence);
+}
+
+struct tx_sync_info {
+ u64 rcd_sn;
+ s32 sync_len;
+ int nr_frags;
+ skb_frag_t *frags[MAX_SKB_FRAGS];
+};
+
+static bool tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx,
+ u32 tcp_seq, struct tx_sync_info *info)
+{
+ struct tls_offload_context_tx *tx_ctx = priv_tx->tx_ctx;
+ struct tls_record_info *record;
+ int remaining, i = 0;
+ unsigned long flags;
+ bool ret = true;
+
+ spin_lock_irqsave(&tx_ctx->lock, flags);
+ record = tls_get_record(tx_ctx, tcp_seq, &info->rcd_sn);
+
+ if (unlikely(!record)) {
+ ret = false;
+ goto out;
+ }
+
+ if (unlikely(tcp_seq < tls_record_start_seq(record))) {
+ if (!tls_record_is_start_marker(record))
+ ret = false;
+ goto out;
+ }
+
+ info->sync_len = tcp_seq - tls_record_start_seq(record);
+ remaining = info->sync_len;
+ while (remaining > 0) {
+ skb_frag_t *frag = &record->frags[i];
+
+ __skb_frag_ref(frag);
+ remaining -= skb_frag_size(frag);
+ info->frags[i++] = frag;
+ }
+ /* reduce the part which will be sent with the original SKB */
+ if (remaining < 0)
+ skb_frag_size_add(info->frags[i - 1], remaining);
+ info->nr_frags = i;
+out:
+ spin_unlock_irqrestore(&tx_ctx->lock, flags);
+ return ret;
+}
+
+static void
+tx_post_resync_params(struct mlx5e_txqsq *sq,
+ struct mlx5e_ktls_offload_context_tx *priv_tx,
+ u64 rcd_sn)
+{
+ struct tls_crypto_info *crypto_info = priv_tx->crypto_info;
+ __be64 rn_be = cpu_to_be64(rcd_sn);
+ bool skip_static_post;
+ u16 rec_seq_sz;
+ char *rec_seq;
+
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128: {
+ struct tls12_crypto_info_aes_gcm_128 *info =
+ (struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+
+ rec_seq = info->rec_seq;
+ rec_seq_sz = sizeof(info->rec_seq);
+ break;
+ }
+ default:
+ WARN_ON(1);
+ }
+
+ skip_static_post = !memcmp(rec_seq, &rn_be, rec_seq_sz);
+ if (!skip_static_post)
+ memcpy(rec_seq, &rn_be, rec_seq_sz);
+
+ mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, skip_static_post, true);
+}
+
+static int
+tx_post_resync_dump(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ skb_frag_t *frag, u32 tisn, bool first)
+{
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5_wqe_eth_seg *eseg;
+ struct mlx5_wqe_data_seg *dseg;
+ struct mlx5e_tx_wqe *wqe;
+ dma_addr_t dma_addr = 0;
+ u16 ds_cnt, ds_cnt_inl;
+ u8 num_wqebbs;
+ u16 pi, ihs;
+ int fsz;
+
+ ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
+ ihs = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb));
+ ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS);
+ ds_cnt += ds_cnt_inl;
+ ds_cnt += 1; /* one frag */
+
+ wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi);
+
+ num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+
+ cseg = &wqe->ctrl;
+ eseg = &wqe->eth;
+ dseg = wqe->data;
+
+ cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_DUMP);
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+ cseg->imm = cpu_to_be32(tisn);
+ cseg->fm_ce_se = first ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
+
+ eseg->inline_hdr.sz = cpu_to_be16(ihs);
+ memcpy(eseg->inline_hdr.start, skb->data, ihs);
+ dseg += ds_cnt_inl;
+
+ fsz = skb_frag_size(frag);
+ dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
+ return -ENOMEM;
+
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->lkey = sq->mkey_be;
+ dseg->byte_count = cpu_to_be32(fsz);
+ mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE);
+
+ tx_fill_wi(sq, pi, num_wqebbs, frag);
+ sq->pc += num_wqebbs;
+
+ WARN(num_wqebbs > MLX5E_KTLS_MAX_DUMP_WQEBBS,
+ "unexpected DUMP num_wqebbs, %d > %d",
+ num_wqebbs, MLX5E_KTLS_MAX_DUMP_WQEBBS);
+
+ return 0;
+}
+
+void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe_info *wi,
+ struct mlx5e_sq_dma *dma)
+{
+ struct mlx5e_sq_stats *stats = sq->stats;
+
+ mlx5e_tx_dma_unmap(sq->pdev, dma);
+ __skb_frag_unref(wi->resync_dump_frag);
+ stats->tls_dump_packets++;
+ stats->tls_dump_bytes += wi->num_bytes;
+}
+
+static void tx_post_fence_nop(struct mlx5e_txqsq *sq)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+ tx_fill_wi(sq, pi, 1, NULL);
+
+ mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc);
+}
+
+static struct sk_buff *
+mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
+ struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
+ u32 seq)
+{
+ struct mlx5e_sq_stats *stats = sq->stats;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct tx_sync_info info = {};
+ u16 contig_wqebbs_room, pi;
+ u8 num_wqebbs;
+ int i;
+
+ if (!tx_sync_info_get(priv_tx, seq, &info)) {
+ /* We might get here if a retransmission reaches the driver
+ * after the relevant record is acked.
+ * It should be safe to drop the packet in this case
+ */
+ stats->tls_drop_no_sync_data++;
+ goto err_out;
+ }
+
+ if (unlikely(info.sync_len < 0)) {
+ u32 payload;
+ int headln;
+
+ headln = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ payload = skb->len - headln;
+ if (likely(payload <= -info.sync_len))
+ return skb;
+
+ stats->tls_drop_bypass_req++;
+ goto err_out;
+ }
+
+ stats->tls_ooo++;
+
+ num_wqebbs = MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS +
+ (info.nr_frags ? info.nr_frags * MLX5E_KTLS_MAX_DUMP_WQEBBS : 1);
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs_room < num_wqebbs))
+ mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
+
+ tx_post_resync_params(sq, priv_tx, info.rcd_sn);
+
+ for (i = 0; i < info.nr_frags; i++)
+ if (tx_post_resync_dump(sq, skb, info.frags[i],
+ priv_tx->tisn, !i))
+ goto err_out;
+
+ /* If no dump WQE was sent, we need to have a fence NOP WQE before the
+ * actual data xmit.
+ */
+ if (!info.nr_frags)
+ tx_post_fence_nop(sq);
+
+ return skb;
+
+err_out:
+ dev_kfree_skb_any(skb);
+ return NULL;
+}
+
+struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
+ struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
+ struct mlx5e_tx_wqe **wqe, u16 *pi)
+{
+ struct mlx5e_ktls_offload_context_tx *priv_tx;
+ struct mlx5e_sq_stats *stats = sq->stats;
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct tls_context *tls_ctx;
+ int datalen;
+ u32 seq;
+
+ if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
+ goto out;
+
+ datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
+ if (!datalen)
+ goto out;
+
+ tls_ctx = tls_get_ctx(skb->sk);
+ if (unlikely(tls_ctx->netdev != netdev))
+ goto err_out;
+
+ priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
+
+ if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) {
+ mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, false, false);
+ *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
+ stats->tls_ctx++;
+ }
+
+ seq = ntohl(tcp_hdr(skb)->seq);
+ if (unlikely(priv_tx->expected_seq != seq)) {
+ skb = mlx5e_ktls_tx_handle_ooo(priv_tx, sq, skb, seq);
+ if (unlikely(!skb))
+ goto out;
+ *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
+ }
+
+ priv_tx->expected_seq = seq + datalen;
+
+ cseg = &(*wqe)->ctrl;
+ cseg->imm = cpu_to_be32(priv_tx->tisn);
+
+ stats->tls_encrypted_packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
+ stats->tls_encrypted_bytes += datalen;
+
+out:
+ return skb;
+
+err_out:
+ dev_kfree_skb_any(skb);
+ return NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
index dc15c5c9e557..f8b93b62a7d2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
@@ -190,6 +190,11 @@ void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
struct net_device *netdev = priv->netdev;
u32 caps;
+ if (mlx5_accel_is_ktls_device(priv->mdev)) {
+ mlx5e_ktls_build_netdev(priv);
+ return;
+ }
+
if (!mlx5_accel_is_tls_device(priv->mdev))
return;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
index 3f5d72163b56..9015f3f7792d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
@@ -33,8 +33,10 @@
#ifndef __MLX5E_TLS_H__
#define __MLX5E_TLS_H__
-#ifdef CONFIG_MLX5_EN_TLS
+#include "accel/tls.h"
+#include "en_accel/ktls.h"
+#ifdef CONFIG_MLX5_EN_TLS
#include <net/tls.h>
#include "en.h"
@@ -94,7 +96,12 @@ int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data);
#else
-static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) { }
+static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
+{
+ if (mlx5_accel_is_ktls_device(priv->mdev))
+ mlx5e_ktls_build_netdev(priv);
+}
+
static inline int mlx5e_tls_init(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { }
static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
index 439bf5953885..71384ad1a443 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
@@ -248,7 +248,7 @@ mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln,
cpu_to_be64(info.rcd_sn));
mlx5e_sq_xmit(sq, nskb, *wqe, *pi, true);
- mlx5e_sq_fetch_wqe(sq, wqe, pi);
+ *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
return skb;
err_out:
@@ -269,6 +269,11 @@ struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
int datalen;
u32 skb_seq;
+ if (MLX5_CAP_GEN(sq->channel->mdev, tls)) {
+ skb = mlx5e_ktls_handle_tx_skb(netdev, sq, skb, wqe, pi);
+ goto out;
+ }
+
if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
index 311667ec71b8..90bc1f2384c8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
@@ -38,6 +38,7 @@
#include <linux/skbuff.h>
#include "en.h"
+#include "en/txrx.h"
struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
struct mlx5e_txqsq *sq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 554672edf8c3..8dd31b5c740c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -680,7 +680,7 @@ static void mlx5e_dcbnl_getpermhwaddr(struct net_device *netdev,
memset(perm_addr, 0xff, MAX_ADDR_LEN);
- mlx5_query_nic_vport_mac_address(priv->mdev, 0, perm_addr);
+ mlx5_query_mac_address(priv->mdev, perm_addr);
}
static void mlx5e_dcbnl_setpgtccfgtx(struct net_device *netdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
index d67adf70a97b..ca9cfbf57d8f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
@@ -30,22 +30,22 @@
* SOFTWARE.
*/
-#include <linux/net_dim.h>
+#include <linux/dim.h>
#include "en.h"
static void
-mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder,
+mlx5e_complete_dim_work(struct dim *dim, struct dim_cq_moder moder,
struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq)
{
mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts);
- dim->state = NET_DIM_START_MEASURE;
+ dim->state = DIM_START_MEASURE;
}
void mlx5e_rx_dim_work(struct work_struct *work)
{
- struct net_dim *dim = container_of(work, struct net_dim, work);
+ struct dim *dim = container_of(work, struct dim, work);
struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim);
- struct net_dim_cq_moder cur_moder =
+ struct dim_cq_moder cur_moder =
net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq);
@@ -53,9 +53,9 @@ void mlx5e_rx_dim_work(struct work_struct *work)
void mlx5e_tx_dim_work(struct work_struct *work)
{
- struct net_dim *dim = container_of(work, struct net_dim, work);
+ struct dim *dim = container_of(work, struct dim, work);
struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim);
- struct net_dim_cq_moder cur_moder =
+ struct dim_cq_moder cur_moder =
net_dim_get_tx_moderation(dim->mode, dim->profile_ix);
mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index ea59097dd4f8..126ec4181286 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -32,6 +32,7 @@
#include "en.h"
#include "en/port.h"
+#include "en/xsk/umem.h"
#include "lib/clock.h"
void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
@@ -46,7 +47,7 @@ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
"%d.%d.%04d (%.16s)",
fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev),
mdev->board_id);
- strlcpy(drvinfo->bus_info, pci_name(mdev->pdev),
+ strlcpy(drvinfo->bus_info, dev_name(mdev->device),
sizeof(drvinfo->bus_info));
}
@@ -388,8 +389,17 @@ static int mlx5e_set_ringparam(struct net_device *dev,
void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
struct ethtool_channels *ch)
{
+ mutex_lock(&priv->state_lock);
+
ch->max_combined = mlx5e_get_netdev_max_channels(priv->netdev);
ch->combined_count = priv->channels.params.num_channels;
+ if (priv->xsk.refcnt) {
+ /* The upper half are XSK queues. */
+ ch->max_combined *= 2;
+ ch->combined_count *= 2;
+ }
+
+ mutex_unlock(&priv->state_lock);
}
static void mlx5e_get_channels(struct net_device *dev,
@@ -403,6 +413,7 @@ static void mlx5e_get_channels(struct net_device *dev,
int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
struct ethtool_channels *ch)
{
+ struct mlx5e_params *cur_params = &priv->channels.params;
unsigned int count = ch->combined_count;
struct mlx5e_channels new_channels = {};
bool arfs_enabled;
@@ -414,16 +425,26 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
return -EINVAL;
}
- if (priv->channels.params.num_channels == count)
+ if (cur_params->num_channels == count)
return 0;
mutex_lock(&priv->state_lock);
+ /* Don't allow changing the number of channels if there is an active
+ * XSK, because the numeration of the XSK and regular RQs will change.
+ */
+ if (priv->xsk.refcnt) {
+ err = -EINVAL;
+ netdev_err(priv->netdev, "%s: AF_XDP is active, cannot change the number of channels\n",
+ __func__);
+ goto out;
+ }
+
new_channels.params = priv->channels.params;
new_channels.params.num_channels = count;
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- priv->channels.params = new_channels.params;
+ *cur_params = new_channels.params;
if (!netif_is_rxfh_configured(priv->netdev))
mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
MLX5E_INDIR_RQT_SIZE, count);
@@ -466,7 +487,7 @@ static int mlx5e_set_channels(struct net_device *dev,
int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
struct ethtool_coalesce *coal)
{
- struct net_dim_cq_moder *rx_moder, *tx_moder;
+ struct dim_cq_moder *rx_moder, *tx_moder;
if (!MLX5_CAP_GEN(priv->mdev, cq_moderation))
return -EOPNOTSUPP;
@@ -521,7 +542,7 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc
int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
struct ethtool_coalesce *coal)
{
- struct net_dim_cq_moder *rx_moder, *tx_moder;
+ struct dim_cq_moder *rx_moder, *tx_moder;
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_channels new_channels = {};
int err = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 4421c10f58ae..ea3a490b569a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -32,6 +32,8 @@
#include <linux/mlx5/fs.h>
#include "en.h"
+#include "en/params.h"
+#include "en/xsk/umem.h"
struct mlx5e_ethtool_rule {
struct list_head list;
@@ -414,6 +416,14 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv,
if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
} else {
+ struct mlx5e_params *params = &priv->channels.params;
+ enum mlx5e_rq_group group;
+ struct mlx5e_tir *tir;
+ u16 ix;
+
+ mlx5e_qid_get_ch_and_group(params, fs->ring_cookie, &ix, &group);
+ tir = group == MLX5E_RQ_GROUP_XSK ? priv->xsk_tir : priv->direct_tir;
+
dst = kzalloc(sizeof(*dst), GFP_KERNEL);
if (!dst) {
err = -ENOMEM;
@@ -421,12 +431,12 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv,
}
dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn;
+ dst->tir_num = tir[ix].tirn;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
}
spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria));
- flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+ spec->flow_context.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -600,9 +610,9 @@ static int validate_flow(struct mlx5e_priv *priv,
if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES)
return -ENOSPC;
- if (fs->ring_cookie >= priv->channels.params.num_channels &&
- fs->ring_cookie != RX_CLS_FLOW_DISC)
- return -EINVAL;
+ if (fs->ring_cookie != RX_CLS_FLOW_DISC)
+ if (!mlx5e_qid_validate(&priv->channels.params, fs->ring_cookie))
+ return -EINVAL;
switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
case ETHER_FLOW:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5e40db8f92e6..83194d56434d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -38,8 +38,10 @@
#include <linux/bpf.h>
#include <linux/if_bridge.h>
#include <net/page_pool.h>
+#include <net/xdp_sock.h>
#include "eswitch.h"
#include "en.h"
+#include "en/txrx.h"
#include "en_tc.h"
#include "en_rep.h"
#include "en_accel/ipsec.h"
@@ -56,35 +58,11 @@
#include "en/monitor_stats.h"
#include "en/reporter.h"
#include "en/params.h"
+#include "en/xsk/umem.h"
+#include "en/xsk/setup.h"
+#include "en/xsk/rx.h"
+#include "en/xsk/tx.h"
-struct mlx5e_rq_param {
- u32 rqc[MLX5_ST_SZ_DW(rqc)];
- struct mlx5_wq_param wq;
- struct mlx5e_rq_frags_info frags_info;
-};
-
-struct mlx5e_sq_param {
- u32 sqc[MLX5_ST_SZ_DW(sqc)];
- struct mlx5_wq_param wq;
- bool is_mpw;
-};
-
-struct mlx5e_cq_param {
- u32 cqc[MLX5_ST_SZ_DW(cqc)];
- struct mlx5_wq_param wq;
- u16 eq_ix;
- u8 cq_period_mode;
-};
-
-struct mlx5e_channel_param {
- struct mlx5e_rq_param rq;
- struct mlx5e_sq_param sq;
- struct mlx5e_sq_param xdp_sq;
- struct mlx5e_sq_param icosq;
- struct mlx5e_cq_param rx_cq;
- struct mlx5e_cq_param tx_cq;
- struct mlx5e_cq_param icosq_cq;
-};
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
{
@@ -114,18 +92,31 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
- BIT(mlx5e_mpwqe_get_log_rq_size(params)) :
+ BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) :
BIT(params->log_rq_mtu_frames),
- BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params)),
+ BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)),
MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
}
bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
struct mlx5e_params *params)
{
- return mlx5e_check_fragmented_striding_rq_cap(mdev) &&
- !MLX5_IPSEC_DEV(mdev) &&
- !(params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params));
+ if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
+ return false;
+
+ if (MLX5_IPSEC_DEV(mdev))
+ return false;
+
+ if (params->xdp_prog) {
+ /* XSK params are not considered here. If striding RQ is in use,
+ * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will
+ * be called with the known XSK params.
+ */
+ if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
+ return false;
+ }
+
+ return true;
}
void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
@@ -394,6 +385,8 @@ static void mlx5e_free_di_list(struct mlx5e_rq *rq)
static int mlx5e_alloc_rq(struct mlx5e_channel *c,
struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct xdp_umem *umem,
struct mlx5e_rq_param *rqp,
struct mlx5e_rq *rq)
{
@@ -401,6 +394,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
struct mlx5_core_dev *mdev = c->mdev;
void *rqc = rqp->rqc;
void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ u32 num_xsk_frames = 0;
+ u32 rq_xdp_ix;
u32 pool_size;
int wq_sz;
int err;
@@ -417,7 +412,13 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
rq->ix = c->ix;
rq->mdev = mdev;
rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
- rq->stats = &c->priv->channel_stats[c->ix].rq;
+ rq->xdpsq = &c->rq_xdpsq;
+ rq->umem = umem;
+
+ if (rq->umem)
+ rq->stats = &c->priv->channel_stats[c->ix].xskrq;
+ else
+ rq->stats = &c->priv->channel_stats[c->ix].rq;
rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL;
if (IS_ERR(rq->xdp_prog)) {
@@ -426,12 +427,16 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
goto err_rq_wq_destroy;
}
- err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix);
+ rq_xdp_ix = rq->ix;
+ if (xsk)
+ rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK;
+ err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix);
if (err < 0)
goto err_rq_wq_destroy;
rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
- rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params);
+ rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk);
+ rq->buff.umem_headroom = xsk ? xsk->headroom : 0;
pool_size = 1 << params->log_rq_mtu_frames;
switch (rq->wq_type) {
@@ -445,7 +450,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
- pool_size = MLX5_MPWRQ_PAGES_PER_WQE << mlx5e_mpwqe_get_log_rq_size(params);
+ if (xsk)
+ num_xsk_frames = wq_sz <<
+ mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
+
+ pool_size = MLX5_MPWRQ_PAGES_PER_WQE <<
+ mlx5e_mpwqe_get_log_rq_size(params, xsk);
rq->post_wqes = mlx5e_post_rx_mpwqes;
rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
@@ -464,12 +474,15 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
goto err_rq_wq_destroy;
}
- rq->mpwqe.skb_from_cqe_mpwrq =
- mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ?
- mlx5e_skb_from_cqe_mpwrq_linear :
- mlx5e_skb_from_cqe_mpwrq_nonlinear;
- rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params);
- rq->mpwqe.num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params));
+ rq->mpwqe.skb_from_cqe_mpwrq = xsk ?
+ mlx5e_xsk_skb_from_cqe_mpwrq_linear :
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ?
+ mlx5e_skb_from_cqe_mpwrq_linear :
+ mlx5e_skb_from_cqe_mpwrq_nonlinear;
+
+ rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+ rq->mpwqe.num_strides =
+ BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
err = mlx5e_create_rq_umr_mkey(mdev, rq);
if (err)
@@ -490,6 +503,9 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq);
+ if (xsk)
+ num_xsk_frames = wq_sz << rq->wqe.info.log_num_frags;
+
rq->wqe.info = rqp->frags_info;
rq->wqe.frags =
kvzalloc_node(array_size(sizeof(*rq->wqe.frags),
@@ -503,6 +519,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
err = mlx5e_init_di_list(rq, wq_sz, c->cpu);
if (err)
goto err_free;
+
rq->post_wqes = mlx5e_post_rx_wqes;
rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
@@ -518,37 +535,53 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
goto err_free;
}
- rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(params) ?
- mlx5e_skb_from_cqe_linear :
- mlx5e_skb_from_cqe_nonlinear;
+ rq->wqe.skb_from_cqe = xsk ?
+ mlx5e_xsk_skb_from_cqe_linear :
+ mlx5e_rx_is_linear_skb(params, NULL) ?
+ mlx5e_skb_from_cqe_linear :
+ mlx5e_skb_from_cqe_nonlinear;
rq->mkey_be = c->mkey_be;
}
- /* Create a page_pool and register it with rxq */
- pp_params.order = 0;
- pp_params.flags = 0; /* No-internal DMA mapping in page_pool */
- pp_params.pool_size = pool_size;
- pp_params.nid = cpu_to_node(c->cpu);
- pp_params.dev = c->pdev;
- pp_params.dma_dir = rq->buff.map_dir;
-
- /* page_pool can be used even when there is no rq->xdp_prog,
- * given page_pool does not handle DMA mapping there is no
- * required state to clear. And page_pool gracefully handle
- * elevated refcnt.
- */
- rq->page_pool = page_pool_create(&pp_params);
- if (IS_ERR(rq->page_pool)) {
- err = PTR_ERR(rq->page_pool);
- rq->page_pool = NULL;
- goto err_free;
+ if (xsk) {
+ err = mlx5e_xsk_resize_reuseq(umem, num_xsk_frames);
+ if (unlikely(err)) {
+ mlx5_core_err(mdev, "Unable to allocate the Reuse Ring for %u frames\n",
+ num_xsk_frames);
+ goto err_free;
+ }
+
+ rq->zca.free = mlx5e_xsk_zca_free;
+ err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
+ MEM_TYPE_ZERO_COPY,
+ &rq->zca);
+ } else {
+ /* Create a page_pool and register it with rxq */
+ pp_params.order = 0;
+ pp_params.flags = 0; /* No-internal DMA mapping in page_pool */
+ pp_params.pool_size = pool_size;
+ pp_params.nid = cpu_to_node(c->cpu);
+ pp_params.dev = c->pdev;
+ pp_params.dma_dir = rq->buff.map_dir;
+
+ /* page_pool can be used even when there is no rq->xdp_prog,
+ * given page_pool does not handle DMA mapping there is no
+ * required state to clear. And page_pool gracefully handle
+ * elevated refcnt.
+ */
+ rq->page_pool = page_pool_create(&pp_params);
+ if (IS_ERR(rq->page_pool)) {
+ err = PTR_ERR(rq->page_pool);
+ rq->page_pool = NULL;
+ goto err_free;
+ }
+ err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
+ MEM_TYPE_PAGE_POOL, rq->page_pool);
+ if (err)
+ page_pool_free(rq->page_pool);
}
- err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
- MEM_TYPE_PAGE_POOL, rq->page_pool);
- if (err) {
- page_pool_free(rq->page_pool);
+ if (err)
goto err_free;
- }
for (i = 0; i < wq_sz; i++) {
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
@@ -586,11 +619,11 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
switch (params->rx_cq_moderation.cq_period_mode) {
case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
- rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
+ rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE;
break;
case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
default:
- rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
}
rq->page_cache.head = 0;
@@ -639,7 +672,11 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) {
struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i];
- mlx5e_page_release(rq, dma_info, false);
+ /* With AF_XDP, page_cache is not used, so this loop is not
+ * entered, and it's safe to call mlx5e_page_release_dynamic
+ * directly.
+ */
+ mlx5e_page_release_dynamic(rq, dma_info, false);
}
xdp_rxq_info_unreg(&rq->xdp_rxq);
@@ -776,7 +813,7 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq)
mlx5_core_destroy_rq(rq->mdev, rq->rqn);
}
-static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
+int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
{
unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time);
struct mlx5e_channel *c = rq->channel;
@@ -834,14 +871,13 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
}
-static int mlx5e_open_rq(struct mlx5e_channel *c,
- struct mlx5e_params *params,
- struct mlx5e_rq_param *param,
- struct mlx5e_rq *rq)
+int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
+ struct xdp_umem *umem, struct mlx5e_rq *rq)
{
int err;
- err = mlx5e_alloc_rq(c, params, param, rq);
+ err = mlx5e_alloc_rq(c, params, xsk, umem, param, rq);
if (err)
return err;
@@ -879,13 +915,13 @@ static void mlx5e_activate_rq(struct mlx5e_rq *rq)
mlx5e_trigger_irq(&rq->channel->icosq);
}
-static void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
+void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
{
clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
}
-static void mlx5e_close_rq(struct mlx5e_rq *rq)
+void mlx5e_close_rq(struct mlx5e_rq *rq)
{
cancel_work_sync(&rq->dim.work);
mlx5e_destroy_rq(rq);
@@ -938,6 +974,7 @@ static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
struct mlx5e_params *params,
+ struct xdp_umem *umem,
struct mlx5e_sq_param *param,
struct mlx5e_xdpsq *sq,
bool is_redirect)
@@ -953,9 +990,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
sq->uar_map = mdev->mlx5e_res.bfreg.map;
sq->min_inline_mode = params->tx_min_inline_mode;
sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
- sq->stats = is_redirect ?
- &c->priv->channel_stats[c->ix].xdpsq :
- &c->priv->channel_stats[c->ix].rq_xdpsq;
+ sq->umem = umem;
+
+ sq->stats = sq->umem ?
+ &c->priv->channel_stats[c->ix].xsksq :
+ is_redirect ?
+ &c->priv->channel_stats[c->ix].xdpsq :
+ &c->priv->channel_stats[c->ix].rq_xdpsq;
param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@ -1085,11 +1126,14 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
sq->uar_map = mdev->mlx5e_res.bfreg.map;
sq->min_inline_mode = params->tx_min_inline_mode;
sq->stats = &c->priv->channel_stats[c->ix].sq[tc];
+ sq->stop_room = MLX5E_SQ_STOP_ROOM;
INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
if (MLX5_IPSEC_DEV(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
- if (mlx5_accel_is_tls_device(c->priv->mdev))
+ if (mlx5_accel_is_tls_device(c->priv->mdev)) {
set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
+ sq->stop_room += MLX5E_SQ_TLS_ROOM;
+ }
param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@ -1335,10 +1379,8 @@ static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
mlx5e_tx_reporter_err_cqe(sq);
}
-static int mlx5e_open_icosq(struct mlx5e_channel *c,
- struct mlx5e_params *params,
- struct mlx5e_sq_param *param,
- struct mlx5e_icosq *sq)
+int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_sq_param *param, struct mlx5e_icosq *sq)
{
struct mlx5e_create_sq_param csp = {};
int err;
@@ -1364,7 +1406,7 @@ err_free_icosq:
return err;
}
-static void mlx5e_close_icosq(struct mlx5e_icosq *sq)
+void mlx5e_close_icosq(struct mlx5e_icosq *sq)
{
struct mlx5e_channel *c = sq->channel;
@@ -1375,16 +1417,14 @@ static void mlx5e_close_icosq(struct mlx5e_icosq *sq)
mlx5e_free_icosq(sq);
}
-static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
- struct mlx5e_params *params,
- struct mlx5e_sq_param *param,
- struct mlx5e_xdpsq *sq,
- bool is_redirect)
+int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_sq_param *param, struct xdp_umem *umem,
+ struct mlx5e_xdpsq *sq, bool is_redirect)
{
struct mlx5e_create_sq_param csp = {};
int err;
- err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect);
+ err = mlx5e_alloc_xdpsq(c, params, umem, param, sq, is_redirect);
if (err)
return err;
@@ -1438,7 +1478,7 @@ err_free_xdpsq:
return err;
}
-static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
+void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
{
struct mlx5e_channel *c = sq->channel;
@@ -1446,7 +1486,7 @@ static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
napi_synchronize(&c->napi);
mlx5e_destroy_sq(c->mdev, sq->sqn);
- mlx5e_free_xdpsq_descs(sq, rq);
+ mlx5e_free_xdpsq_descs(sq);
mlx5e_free_xdpsq(sq);
}
@@ -1516,6 +1556,7 @@ static void mlx5e_free_cq(struct mlx5e_cq *cq)
static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
{
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
struct mlx5_core_dev *mdev = cq->mdev;
struct mlx5_core_cq *mcq = &cq->mcq;
@@ -1550,7 +1591,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
MLX5_ADAPTER_PAGE_SHIFT);
MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
- err = mlx5_core_create_cq(mdev, mcq, in, inlen);
+ err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out));
kvfree(in);
@@ -1567,10 +1608,8 @@ static void mlx5e_destroy_cq(struct mlx5e_cq *cq)
mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
}
-static int mlx5e_open_cq(struct mlx5e_channel *c,
- struct net_dim_cq_moder moder,
- struct mlx5e_cq_param *param,
- struct mlx5e_cq *cq)
+int mlx5e_open_cq(struct mlx5e_channel *c, struct dim_cq_moder moder,
+ struct mlx5e_cq_param *param, struct mlx5e_cq *cq)
{
struct mlx5_core_dev *mdev = c->mdev;
int err;
@@ -1593,7 +1632,7 @@ err_free_cq:
return err;
}
-static void mlx5e_close_cq(struct mlx5e_cq *cq)
+void mlx5e_close_cq(struct mlx5e_cq *cq)
{
mlx5e_destroy_cq(cq);
mlx5e_free_cq(cq);
@@ -1767,49 +1806,16 @@ static void mlx5e_free_xps_cpumask(struct mlx5e_channel *c)
free_cpumask_var(c->xps_cpumask);
}
-static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
- struct mlx5e_params *params,
- struct mlx5e_channel_param *cparam,
- struct mlx5e_channel **cp)
+static int mlx5e_open_queues(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_channel_param *cparam)
{
- int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
- struct net_dim_cq_moder icocq_moder = {0, 0};
- struct net_device *netdev = priv->netdev;
- struct mlx5e_channel *c;
- unsigned int irq;
+ struct dim_cq_moder icocq_moder = {0, 0};
int err;
- int eqn;
-
- err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq);
- if (err)
- return err;
-
- c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
- if (!c)
- return -ENOMEM;
-
- c->priv = priv;
- c->mdev = priv->mdev;
- c->tstamp = &priv->tstamp;
- c->ix = ix;
- c->cpu = cpu;
- c->pdev = priv->mdev->device;
- c->netdev = priv->netdev;
- c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
- c->num_tc = params->num_tc;
- c->xdp = !!params->xdp_prog;
- c->stats = &priv->channel_stats[ix].ch;
- c->irq_desc = irq_to_desc(irq);
-
- err = mlx5e_alloc_xps_cpumask(c, params);
- if (err)
- goto err_free_channel;
-
- netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq);
if (err)
- goto err_napi_del;
+ return err;
err = mlx5e_open_tx_cqs(c, params, cparam);
if (err)
@@ -1825,7 +1831,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
/* XDP SQ CQ params are same as normal TXQ sq CQ params */
err = c->xdp ? mlx5e_open_cq(c, params->tx_cq_moderation,
- &cparam->tx_cq, &c->rq.xdpsq.cq) : 0;
+ &cparam->tx_cq, &c->rq_xdpsq.cq) : 0;
if (err)
goto err_close_rx_cq;
@@ -1839,20 +1845,21 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
if (err)
goto err_close_icosq;
- err = c->xdp ? mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->rq.xdpsq, false) : 0;
- if (err)
- goto err_close_sqs;
+ if (c->xdp) {
+ err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL,
+ &c->rq_xdpsq, false);
+ if (err)
+ goto err_close_sqs;
+ }
- err = mlx5e_open_rq(c, params, &cparam->rq, &c->rq);
+ err = mlx5e_open_rq(c, params, &cparam->rq, NULL, NULL, &c->rq);
if (err)
goto err_close_xdp_sq;
- err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->xdpsq, true);
+ err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->xdpsq, true);
if (err)
goto err_close_rq;
- *cp = c;
-
return 0;
err_close_rq:
@@ -1860,7 +1867,7 @@ err_close_rq:
err_close_xdp_sq:
if (c->xdp)
- mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq);
+ mlx5e_close_xdpsq(&c->rq_xdpsq);
err_close_sqs:
mlx5e_close_sqs(c);
@@ -1870,8 +1877,9 @@ err_close_icosq:
err_disable_napi:
napi_disable(&c->napi);
+
if (c->xdp)
- mlx5e_close_cq(&c->rq.xdpsq.cq);
+ mlx5e_close_cq(&c->rq_xdpsq.cq);
err_close_rx_cq:
mlx5e_close_cq(&c->rq.cq);
@@ -1885,6 +1893,85 @@ err_close_tx_cqs:
err_close_icosq_cq:
mlx5e_close_cq(&c->icosq.cq);
+ return err;
+}
+
+static void mlx5e_close_queues(struct mlx5e_channel *c)
+{
+ mlx5e_close_xdpsq(&c->xdpsq);
+ mlx5e_close_rq(&c->rq);
+ if (c->xdp)
+ mlx5e_close_xdpsq(&c->rq_xdpsq);
+ mlx5e_close_sqs(c);
+ mlx5e_close_icosq(&c->icosq);
+ napi_disable(&c->napi);
+ if (c->xdp)
+ mlx5e_close_cq(&c->rq_xdpsq.cq);
+ mlx5e_close_cq(&c->rq.cq);
+ mlx5e_close_cq(&c->xdpsq.cq);
+ mlx5e_close_tx_cqs(c);
+ mlx5e_close_cq(&c->icosq.cq);
+}
+
+static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
+ struct mlx5e_params *params,
+ struct mlx5e_channel_param *cparam,
+ struct xdp_umem *umem,
+ struct mlx5e_channel **cp)
+{
+ int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
+ struct net_device *netdev = priv->netdev;
+ struct mlx5e_xsk_param xsk;
+ struct mlx5e_channel *c;
+ unsigned int irq;
+ int err;
+ int eqn;
+
+ err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq);
+ if (err)
+ return err;
+
+ c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
+ if (!c)
+ return -ENOMEM;
+
+ c->priv = priv;
+ c->mdev = priv->mdev;
+ c->tstamp = &priv->tstamp;
+ c->ix = ix;
+ c->cpu = cpu;
+ c->pdev = priv->mdev->device;
+ c->netdev = priv->netdev;
+ c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
+ c->num_tc = params->num_tc;
+ c->xdp = !!params->xdp_prog;
+ c->stats = &priv->channel_stats[ix].ch;
+ c->irq_desc = irq_to_desc(irq);
+
+ err = mlx5e_alloc_xps_cpumask(c, params);
+ if (err)
+ goto err_free_channel;
+
+ netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
+
+ err = mlx5e_open_queues(c, params, cparam);
+ if (unlikely(err))
+ goto err_napi_del;
+
+ if (umem) {
+ mlx5e_build_xsk_param(umem, &xsk);
+ err = mlx5e_open_xsk(priv, params, &xsk, umem, c);
+ if (unlikely(err))
+ goto err_close_queues;
+ }
+
+ *cp = c;
+
+ return 0;
+
+err_close_queues:
+ mlx5e_close_queues(c);
+
err_napi_del:
netif_napi_del(&c->napi);
mlx5e_free_xps_cpumask(c);
@@ -1903,12 +1990,18 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
mlx5e_activate_txqsq(&c->sq[tc]);
mlx5e_activate_rq(&c->rq);
netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix);
+
+ if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+ mlx5e_activate_xsk(c);
}
static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
{
int tc;
+ if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+ mlx5e_deactivate_xsk(c);
+
mlx5e_deactivate_rq(&c->rq);
for (tc = 0; tc < c->num_tc; tc++)
mlx5e_deactivate_txqsq(&c->sq[tc]);
@@ -1916,19 +2009,9 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
static void mlx5e_close_channel(struct mlx5e_channel *c)
{
- mlx5e_close_xdpsq(&c->xdpsq, NULL);
- mlx5e_close_rq(&c->rq);
- if (c->xdp)
- mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq);
- mlx5e_close_sqs(c);
- mlx5e_close_icosq(&c->icosq);
- napi_disable(&c->napi);
- if (c->xdp)
- mlx5e_close_cq(&c->rq.xdpsq.cq);
- mlx5e_close_cq(&c->rq.cq);
- mlx5e_close_cq(&c->xdpsq.cq);
- mlx5e_close_tx_cqs(c);
- mlx5e_close_cq(&c->icosq.cq);
+ if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+ mlx5e_close_xsk(c);
+ mlx5e_close_queues(c);
netif_napi_del(&c->napi);
mlx5e_free_xps_cpumask(c);
@@ -1939,6 +2022,7 @@ static void mlx5e_close_channel(struct mlx5e_channel *c)
static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
struct mlx5e_rq_frags_info *info)
{
u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu);
@@ -1951,10 +2035,10 @@ static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
byte_count += MLX5E_METADATA_ETHER_LEN;
#endif
- if (mlx5e_rx_is_linear_skb(params)) {
+ if (mlx5e_rx_is_linear_skb(params, xsk)) {
int frag_stride;
- frag_stride = mlx5e_rx_get_linear_frag_sz(params);
+ frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk);
frag_stride = roundup_pow_of_two(frag_stride);
info->arr[0].frag_size = byte_count;
@@ -2012,9 +2096,10 @@ static u8 mlx5e_get_rq_log_wq_sz(void *rqc)
return MLX5_GET(wq, wq, log_wq_sz);
}
-static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
- struct mlx5e_params *params,
- struct mlx5e_rq_param *param)
+void mlx5e_build_rq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_param *param)
{
struct mlx5_core_dev *mdev = priv->mdev;
void *rqc = param->rqc;
@@ -2024,16 +2109,16 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
switch (params->rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
MLX5_SET(wq, wq, log_wqe_num_of_strides,
- mlx5e_mpwqe_get_log_num_strides(mdev, params) -
+ mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk) -
MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
MLX5_SET(wq, wq, log_wqe_stride_size,
- mlx5e_mpwqe_get_log_stride_size(mdev, params) -
+ mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk) -
MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
- MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params));
+ MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk));
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
- mlx5e_build_rq_frags_info(mdev, params, &param->frags_info);
+ mlx5e_build_rq_frags_info(mdev, params, xsk, &param->frags_info);
ndsegs = param->frags_info.num_frags;
}
@@ -2064,8 +2149,8 @@ static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
param->wq.buf_numa_node = dev_to_node(mdev->device);
}
-static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
- struct mlx5e_sq_param *param)
+void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
+ struct mlx5e_sq_param *param)
{
void *sqc = param->sqc;
void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
@@ -2101,9 +2186,10 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
}
-static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
- struct mlx5e_params *params,
- struct mlx5e_cq_param *param)
+void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_cq_param *param)
{
struct mlx5_core_dev *mdev = priv->mdev;
void *cqc = param->cqc;
@@ -2111,8 +2197,8 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
switch (params->rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
- log_cq_size = mlx5e_mpwqe_get_log_rq_size(params) +
- mlx5e_mpwqe_get_log_num_strides(mdev, params);
+ log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) +
+ mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
log_cq_size = params->log_rq_mtu_frames;
@@ -2128,9 +2214,9 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
}
-static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
- struct mlx5e_params *params,
- struct mlx5e_cq_param *param)
+void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_cq_param *param)
{
void *cqc = param->cqc;
@@ -2140,9 +2226,9 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
param->cq_period_mode = params->tx_cq_moderation.cq_period_mode;
}
-static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
- u8 log_wq_size,
- struct mlx5e_cq_param *param)
+void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
+ u8 log_wq_size,
+ struct mlx5e_cq_param *param)
{
void *cqc = param->cqc;
@@ -2150,12 +2236,12 @@ static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
mlx5e_build_common_cq_param(priv, param);
- param->cq_period_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
}
-static void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
- u8 log_wq_size,
- struct mlx5e_sq_param *param)
+void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
+ u8 log_wq_size,
+ struct mlx5e_sq_param *param)
{
void *sqc = param->sqc;
void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
@@ -2166,9 +2252,9 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq));
}
-static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
- struct mlx5e_params *params,
- struct mlx5e_sq_param *param)
+void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param)
{
void *sqc = param->sqc;
void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
@@ -2196,14 +2282,14 @@ static void mlx5e_build_channel_param(struct mlx5e_priv *priv,
{
u8 icosq_log_wq_sz;
- mlx5e_build_rq_param(priv, params, &cparam->rq);
+ mlx5e_build_rq_param(priv, params, NULL, &cparam->rq);
icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq);
mlx5e_build_sq_param(priv, params, &cparam->sq);
mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq);
- mlx5e_build_rx_cq_param(priv, params, &cparam->rx_cq);
+ mlx5e_build_rx_cq_param(priv, params, NULL, &cparam->rx_cq);
mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq);
mlx5e_build_ico_cq_param(priv, icosq_log_wq_sz, &cparam->icosq_cq);
}
@@ -2224,7 +2310,12 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
mlx5e_build_channel_param(priv, &chs->params, cparam);
for (i = 0; i < chs->num; i++) {
- err = mlx5e_open_channel(priv, i, &chs->params, cparam, &chs->c[i]);
+ struct xdp_umem *umem = NULL;
+
+ if (chs->params.xdp_prog)
+ umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, i);
+
+ err = mlx5e_open_channel(priv, i, &chs->params, cparam, umem, &chs->c[i]);
if (err)
goto err_close_channels;
}
@@ -2266,6 +2357,10 @@ static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs)
int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT;
err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq, timeout);
+
+ /* Don't wait on the XSK RQ, because the newer xdpsock sample
+ * doesn't provide any Fill Ring entries at the setup stage.
+ */
}
return err ? -ETIMEDOUT : 0;
@@ -2338,35 +2433,35 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv)
return err;
}
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv)
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
{
- struct mlx5e_rqt *rqt;
+ const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
int err;
int ix;
- for (ix = 0; ix < mlx5e_get_netdev_max_channels(priv->netdev); ix++) {
- rqt = &priv->direct_tir[ix].rqt;
- err = mlx5e_create_rqt(priv, 1 /*size */, rqt);
- if (err)
+ for (ix = 0; ix < max_nch; ix++) {
+ err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt);
+ if (unlikely(err))
goto err_destroy_rqts;
}
return 0;
err_destroy_rqts:
- mlx5_core_warn(priv->mdev, "create direct rqts failed, %d\n", err);
+ mlx5_core_warn(priv->mdev, "create rqts failed, %d\n", err);
for (ix--; ix >= 0; ix--)
- mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt);
+ mlx5e_destroy_rqt(priv, &tirs[ix].rqt);
return err;
}
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv)
+void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
{
+ const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
int i;
- for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++)
- mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
+ for (i = 0; i < max_nch; i++)
+ mlx5e_destroy_rqt(priv, &tirs[i].rqt);
}
static int mlx5e_rx_hash_fn(int hfunc)
@@ -2786,11 +2881,12 @@ static void mlx5e_build_tx2sq_maps(struct mlx5e_priv *priv)
void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
{
int num_txqs = priv->channels.num * priv->channels.params.num_tc;
+ int num_rxqs = priv->channels.num * MLX5E_NUM_RQ_GROUPS;
struct net_device *netdev = priv->netdev;
mlx5e_netdev_set_tcs(netdev);
netif_set_real_num_tx_queues(netdev, num_txqs);
- netif_set_real_num_rx_queues(netdev, priv->channels.num);
+ netif_set_real_num_rx_queues(netdev, num_rxqs);
mlx5e_build_tx2sq_maps(priv);
mlx5e_activate_channels(&priv->channels);
@@ -2802,10 +2898,14 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
mlx5e_wait_channels_min_rx_wqes(&priv->channels);
mlx5e_redirect_rqts_to_channels(priv, &priv->channels);
+
+ mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels);
}
void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
{
+ mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels);
+
mlx5e_redirect_rqts_to_drop(priv);
if (mlx5e_is_vport_rep(priv))
@@ -2845,7 +2945,7 @@ static void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
if (hw_modify)
hw_modify(priv);
- mlx5e_refresh_tirs(priv, false);
+ priv->profile->update_rx(priv);
mlx5e_activate_priv_channels(priv);
/* return carrier back if needed */
@@ -2884,15 +2984,18 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv)
int mlx5e_open_locked(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ bool is_xdp = priv->channels.params.xdp_prog;
int err;
set_bit(MLX5E_STATE_OPENED, &priv->state);
+ if (is_xdp)
+ mlx5e_xdp_set_open(priv);
err = mlx5e_open_channels(priv, &priv->channels);
if (err)
goto err_clear_state_opened_flag;
- mlx5e_refresh_tirs(priv, false);
+ priv->profile->update_rx(priv);
mlx5e_activate_priv_channels(priv);
if (priv->profile->update_carrier)
priv->profile->update_carrier(priv);
@@ -2901,6 +3004,8 @@ int mlx5e_open_locked(struct net_device *netdev)
return 0;
err_clear_state_opened_flag:
+ if (is_xdp)
+ mlx5e_xdp_set_closed(priv);
clear_bit(MLX5E_STATE_OPENED, &priv->state);
return err;
}
@@ -2932,6 +3037,8 @@ int mlx5e_close_locked(struct net_device *netdev)
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
return 0;
+ if (priv->channels.params.xdp_prog)
+ mlx5e_xdp_set_closed(priv);
clear_bit(MLX5E_STATE_OPENED, &priv->state);
netif_carrier_off(priv->netdev);
@@ -3043,20 +3150,19 @@ void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq)
mlx5e_free_cq(&drop_rq->cq);
}
-int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc,
- u32 underlay_qpn, u32 *tisn)
+int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn)
{
- u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
- MLX5_SET(tisc, tisc, prio, tc << 1);
- MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn);
MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn);
+ if (MLX5_GET(tisc, tisc, tls_en))
+ MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.pdn);
+
if (mlx5_lag_is_lacp_owner(mdev))
MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
- return mlx5_core_create_tis(mdev, in, sizeof(in), tisn);
+ return mlx5_core_create_tis(mdev, in, MLX5_ST_SZ_BYTES(create_tis_in), tisn);
}
void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn)
@@ -3070,7 +3176,14 @@ int mlx5e_create_tises(struct mlx5e_priv *priv)
int tc;
for (tc = 0; tc < priv->profile->max_tc; tc++) {
- err = mlx5e_create_tis(priv->mdev, tc, 0, &priv->tisn[tc]);
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
+ void *tisc;
+
+ tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+ MLX5_SET(tisc, tisc, prio, tc << 1);
+
+ err = mlx5e_create_tis(priv->mdev, in, &priv->tisn[tc]);
if (err)
goto err_close_tises;
}
@@ -3188,13 +3301,13 @@ err_destroy_inner_tirs:
return err;
}
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv)
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
{
- int nch = mlx5e_get_netdev_max_channels(priv->netdev);
+ const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
struct mlx5e_tir *tir;
void *tirc;
int inlen;
- int err;
+ int err = 0;
u32 *in;
int ix;
@@ -3203,25 +3316,24 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv)
if (!in)
return -ENOMEM;
- for (ix = 0; ix < nch; ix++) {
+ for (ix = 0; ix < max_nch; ix++) {
memset(in, 0, inlen);
- tir = &priv->direct_tir[ix];
+ tir = &tirs[ix];
tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
- mlx5e_build_direct_tir_ctx(priv, priv->direct_tir[ix].rqt.rqtn, tirc);
+ mlx5e_build_direct_tir_ctx(priv, tir->rqt.rqtn, tirc);
err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
- if (err)
+ if (unlikely(err))
goto err_destroy_ch_tirs;
}
- kvfree(in);
-
- return 0;
+ goto out;
err_destroy_ch_tirs:
- mlx5_core_warn(priv->mdev, "create direct tirs failed, %d\n", err);
+ mlx5_core_warn(priv->mdev, "create tirs failed, %d\n", err);
for (ix--; ix >= 0; ix--)
- mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]);
+ mlx5e_destroy_tir(priv->mdev, &tirs[ix]);
+out:
kvfree(in);
return err;
@@ -3241,13 +3353,13 @@ void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc)
mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
}
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv)
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
{
- int nch = mlx5e_get_netdev_max_channels(priv->netdev);
+ const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
int i;
- for (i = 0; i < nch; i++)
- mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]);
+ for (i = 0; i < max_nch; i++)
+ mlx5e_destroy_tir(priv->mdev, &tirs[i]);
}
static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable)
@@ -3389,11 +3501,12 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s)
for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) {
struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i];
+ struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq;
struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
int j;
- s->rx_packets += rq_stats->packets;
- s->rx_bytes += rq_stats->bytes;
+ s->rx_packets += rq_stats->packets + xskrq_stats->packets;
+ s->rx_bytes += rq_stats->bytes + xskrq_stats->bytes;
for (j = 0; j < priv->max_opened_tc; j++) {
struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
@@ -3492,6 +3605,13 @@ static int set_feature_lro(struct net_device *netdev, bool enable)
mutex_lock(&priv->state_lock);
+ if (enable && priv->xsk.refcnt) {
+ netdev_warn(netdev, "LRO is incompatible with AF_XDP (%hu XSKs are active)\n",
+ priv->xsk.refcnt);
+ err = -EINVAL;
+ goto out;
+ }
+
old_params = &priv->channels.params;
if (enable && !MLX5E_GET_PFLAG(old_params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
netdev_warn(netdev, "can't set LRO with legacy RQ\n");
@@ -3505,8 +3625,8 @@ static int set_feature_lro(struct net_device *netdev, bool enable)
new_channels.params.lro_en = enable;
if (old_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) {
- if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params) ==
- mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params))
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params, NULL) ==
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params, NULL))
reset = false;
}
@@ -3696,6 +3816,43 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
return features;
}
+static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
+ struct mlx5e_channels *chs,
+ struct mlx5e_params *new_params,
+ struct mlx5_core_dev *mdev)
+{
+ u16 ix;
+
+ for (ix = 0; ix < chs->params.num_channels; ix++) {
+ struct xdp_umem *umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, ix);
+ struct mlx5e_xsk_param xsk;
+
+ if (!umem)
+ continue;
+
+ mlx5e_build_xsk_param(umem, &xsk);
+
+ if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) {
+ u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk);
+ int max_mtu_frame, max_mtu_page, max_mtu;
+
+ /* Two criteria must be met:
+ * 1. HW MTU + all headrooms <= XSK frame size.
+ * 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE.
+ */
+ max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr);
+ max_mtu_page = mlx5e_xdp_max_mtu(new_params, &xsk);
+ max_mtu = min(max_mtu_frame, max_mtu_page);
+
+ netdev_err(netdev, "MTU %d is too big for an XSK running on channel %hu. Try MTU <= %d\n",
+ new_params->sw_mtu, ix, max_mtu);
+ return false;
+ }
+ }
+
+ return true;
+}
+
int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
change_hw_mtu_cb set_mtu_cb)
{
@@ -3716,18 +3873,31 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
new_channels.params.sw_mtu = new_mtu;
if (params->xdp_prog &&
- !mlx5e_rx_is_linear_skb(&new_channels.params)) {
+ !mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) {
netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n",
- new_mtu, mlx5e_xdp_max_mtu(params));
+ new_mtu, mlx5e_xdp_max_mtu(params, NULL));
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (priv->xsk.refcnt &&
+ !mlx5e_xsk_validate_mtu(netdev, &priv->channels,
+ &new_channels.params, priv->mdev)) {
err = -EINVAL;
goto out;
}
if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
- bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, &new_channels.params);
- u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params);
- u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params);
+ bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev,
+ &new_channels.params,
+ NULL);
+ u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params, NULL);
+ u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params, NULL);
+ /* If XSK is active, XSK RQs are linear. */
+ is_linear |= priv->xsk.refcnt;
+
+ /* Always reset in linear mode - hw_mtu is used in data path. */
reset = reset && (is_linear || (ppw_old != ppw_new));
}
@@ -4160,16 +4330,29 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
new_channels.params = priv->channels.params;
new_channels.params.xdp_prog = prog;
- if (!mlx5e_rx_is_linear_skb(&new_channels.params)) {
+ /* No XSK params: AF_XDP can't be enabled yet at the point of setting
+ * the XDP program.
+ */
+ if (!mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) {
netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n",
new_channels.params.sw_mtu,
- mlx5e_xdp_max_mtu(&new_channels.params));
+ mlx5e_xdp_max_mtu(&new_channels.params, NULL));
return -EINVAL;
}
return 0;
}
+static int mlx5e_xdp_update_state(struct mlx5e_priv *priv)
+{
+ if (priv->channels.params.xdp_prog)
+ mlx5e_xdp_set_open(priv);
+ else
+ mlx5e_xdp_set_closed(priv);
+
+ return 0;
+}
+
static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -4190,8 +4373,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
/* no need for full reset when exchanging programs */
reset = (!priv->channels.params.xdp_prog || !prog);
- if (was_opened && reset)
- mlx5e_close_locked(netdev);
if (was_opened && !reset) {
/* num_channels is invariant here, so we can take the
* batched reference right upfront.
@@ -4203,20 +4384,31 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
}
}
- /* exchange programs, extra prog reference we got from caller
- * as long as we don't fail from this point onwards.
- */
- old_prog = xchg(&priv->channels.params.xdp_prog, prog);
+ if (was_opened && reset) {
+ struct mlx5e_channels new_channels = {};
+
+ new_channels.params = priv->channels.params;
+ new_channels.params.xdp_prog = prog;
+ mlx5e_set_rq_type(priv->mdev, &new_channels.params);
+ old_prog = priv->channels.params.xdp_prog;
+
+ err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_xdp_update_state);
+ if (err)
+ goto unlock;
+ } else {
+ /* exchange programs, extra prog reference we got from caller
+ * as long as we don't fail from this point onwards.
+ */
+ old_prog = xchg(&priv->channels.params.xdp_prog, prog);
+ }
+
if (old_prog)
bpf_prog_put(old_prog);
- if (reset) /* change RQ type according to priv->xdp_prog */
+ if (!was_opened && reset) /* change RQ type according to priv->xdp_prog */
mlx5e_set_rq_type(priv->mdev, &priv->channels.params);
- if (was_opened && reset)
- err = mlx5e_open_locked(netdev);
-
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset)
+ if (!was_opened || reset)
goto unlock;
/* exchanging programs w/o reset, we update ref counts on behalf
@@ -4224,19 +4416,29 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
*/
for (i = 0; i < priv->channels.num; i++) {
struct mlx5e_channel *c = priv->channels.c[i];
+ bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
+ if (xsk_open)
+ clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
napi_synchronize(&c->napi);
/* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */
old_prog = xchg(&c->rq.xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ if (xsk_open) {
+ old_prog = xchg(&c->xskrq.xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ }
set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
+ if (xsk_open)
+ set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
/* napi_schedule in case we have missed anything */
napi_schedule(&c->napi);
-
- if (old_prog)
- bpf_prog_put(old_prog);
}
unlock:
@@ -4267,6 +4469,9 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
case XDP_QUERY_PROG:
xdp->prog_id = mlx5e_xdp_query(dev);
return 0;
+ case XDP_SETUP_XSK_UMEM:
+ return mlx5e_xsk_setup_umem(dev, xdp->xsk.umem,
+ xdp->xsk.queue_id);
default:
return -EINVAL;
}
@@ -4349,6 +4554,7 @@ const struct net_device_ops mlx5e_netdev_ops = {
.ndo_tx_timeout = mlx5e_tx_timeout,
.ndo_bpf = mlx5e_xdp,
.ndo_xdp_xmit = mlx5e_xdp_xmit,
+ .ndo_xsk_async_xmit = mlx5e_xsk_async_xmit,
#ifdef CONFIG_MLX5_EN_ARFS
.ndo_rx_flow_steer = mlx5e_rx_flow_steer,
#endif
@@ -4418,9 +4624,9 @@ static bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
}
-static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
+static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
{
- struct net_dim_cq_moder moder;
+ struct dim_cq_moder moder;
moder.cq_period_mode = cq_period_mode;
moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
@@ -4431,9 +4637,9 @@ static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
return moder;
}
-static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
+static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
{
- struct net_dim_cq_moder moder;
+ struct dim_cq_moder moder;
moder.cq_period_mode = cq_period_mode;
moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
@@ -4447,8 +4653,8 @@ static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode)
{
return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ?
- NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE :
- NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ DIM_CQ_PERIOD_MODE_START_FROM_CQE :
+ DIM_CQ_PERIOD_MODE_START_FROM_EQE;
}
void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
@@ -4500,11 +4706,13 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
* - Striding RQ configuration is not possible/supported.
* - Slow PCI heuristic.
* - Legacy RQ would use linear SKB while Striding RQ would use non-linear.
+ *
+ * No XSK params: checking the availability of striding RQ in general.
*/
if (!slow_pci_heuristic(mdev) &&
mlx5e_striding_rq_possible(mdev, params) &&
- (mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ||
- !mlx5e_rx_is_linear_skb(params)))
+ (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ||
+ !mlx5e_rx_is_linear_skb(params, NULL)))
MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
mlx5e_set_rq_type(mdev, params);
mlx5e_init_rq_type_params(mdev, params);
@@ -4526,6 +4734,7 @@ void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
}
void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_xsk *xsk,
struct mlx5e_rss_params *rss_params,
struct mlx5e_params *params,
u16 max_channels, u16 mtu)
@@ -4561,9 +4770,11 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
/* HW LRO */
/* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */
- if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
- if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
+ if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+ /* No XSK params: checking the availability of striding RQ in general. */
+ if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
params->lro_en = !slow_pci_heuristic(mdev);
+ }
params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
/* CQ moderation params */
@@ -4582,13 +4793,16 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
mlx5e_build_rss_params(rss_params, params->num_channels);
params->tunneled_offload_en =
mlx5e_tunnel_inner_ft_supported(mdev);
+
+ /* AF_XDP */
+ params->xsk = xsk;
}
static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- mlx5_query_nic_vport_mac_address(priv->mdev, 0, netdev->dev_addr);
+ mlx5_query_mac_address(priv->mdev, netdev->dev_addr);
if (is_zero_ether_addr(netdev->dev_addr) &&
!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) {
eth_hw_addr_random(netdev);
@@ -4617,14 +4831,18 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->ethtool_ops = &mlx5e_ethtool_ops;
netdev->vlan_features |= NETIF_F_SG;
- netdev->vlan_features |= NETIF_F_IP_CSUM;
- netdev->vlan_features |= NETIF_F_IPV6_CSUM;
+ netdev->vlan_features |= NETIF_F_HW_CSUM;
netdev->vlan_features |= NETIF_F_GRO;
netdev->vlan_features |= NETIF_F_TSO;
netdev->vlan_features |= NETIF_F_TSO6;
netdev->vlan_features |= NETIF_F_RXCSUM;
netdev->vlan_features |= NETIF_F_RXHASH;
+ netdev->mpls_features |= NETIF_F_SG;
+ netdev->mpls_features |= NETIF_F_HW_CSUM;
+ netdev->mpls_features |= NETIF_F_TSO;
+ netdev->mpls_features |= NETIF_F_TSO6;
+
netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_TX;
netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_RX;
@@ -4640,8 +4858,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev) ||
MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
- netdev->hw_enc_features |= NETIF_F_IP_CSUM;
- netdev->hw_enc_features |= NETIF_F_IPV6_CSUM;
+ netdev->hw_enc_features |= NETIF_F_HW_CSUM;
netdev->hw_enc_features |= NETIF_F_TSO;
netdev->hw_enc_features |= NETIF_F_TSO6;
netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL;
@@ -4754,7 +4971,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
if (err)
return err;
- mlx5e_build_nic_params(mdev, rss, &priv->channels.params,
+ mlx5e_build_nic_params(mdev, &priv->xsk, rss, &priv->channels.params,
mlx5e_get_netdev_max_channels(netdev),
netdev->mtu);
@@ -4796,7 +5013,7 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
if (err)
goto err_close_drop_rq;
- err = mlx5e_create_direct_rqts(priv);
+ err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
if (err)
goto err_destroy_indirect_rqts;
@@ -4804,14 +5021,22 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
if (err)
goto err_destroy_direct_rqts;
- err = mlx5e_create_direct_tirs(priv);
+ err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
if (err)
goto err_destroy_indirect_tirs;
+ err = mlx5e_create_direct_rqts(priv, priv->xsk_tir);
+ if (unlikely(err))
+ goto err_destroy_direct_tirs;
+
+ err = mlx5e_create_direct_tirs(priv, priv->xsk_tir);
+ if (unlikely(err))
+ goto err_destroy_xsk_rqts;
+
err = mlx5e_create_flow_steering(priv);
if (err) {
mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
- goto err_destroy_direct_tirs;
+ goto err_destroy_xsk_tirs;
}
err = mlx5e_tc_nic_init(priv);
@@ -4822,12 +5047,16 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
err_destroy_flow_steering:
mlx5e_destroy_flow_steering(priv);
+err_destroy_xsk_tirs:
+ mlx5e_destroy_direct_tirs(priv, priv->xsk_tir);
+err_destroy_xsk_rqts:
+ mlx5e_destroy_direct_rqts(priv, priv->xsk_tir);
err_destroy_direct_tirs:
- mlx5e_destroy_direct_tirs(priv);
+ mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
err_destroy_indirect_tirs:
mlx5e_destroy_indirect_tirs(priv, true);
err_destroy_direct_rqts:
- mlx5e_destroy_direct_rqts(priv);
+ mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
err_destroy_indirect_rqts:
mlx5e_destroy_rqt(priv, &priv->indir_rqt);
err_close_drop_rq:
@@ -4841,9 +5070,11 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
{
mlx5e_tc_nic_cleanup(priv);
mlx5e_destroy_flow_steering(priv);
- mlx5e_destroy_direct_tirs(priv);
+ mlx5e_destroy_direct_tirs(priv, priv->xsk_tir);
+ mlx5e_destroy_direct_rqts(priv, priv->xsk_tir);
+ mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
mlx5e_destroy_indirect_tirs(priv, true);
- mlx5e_destroy_direct_rqts(priv);
+ mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
mlx5e_destroy_rqt(priv, &priv->indir_rqt);
mlx5e_close_drop_rq(&priv->drop_rq);
mlx5e_destroy_q_counters(priv);
@@ -4925,6 +5156,11 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
mlx5_lag_remove(mdev);
}
+int mlx5e_update_nic_rx(struct mlx5e_priv *priv)
+{
+ return mlx5e_refresh_tirs(priv, false);
+}
+
static const struct mlx5e_profile mlx5e_nic_profile = {
.init = mlx5e_nic_init,
.cleanup = mlx5e_nic_cleanup,
@@ -4934,6 +5170,7 @@ static const struct mlx5e_profile mlx5e_nic_profile = {
.cleanup_tx = mlx5e_cleanup_nic_tx,
.enable = mlx5e_nic_enable,
.disable = mlx5e_nic_disable,
+ .update_rx = mlx5e_update_nic_rx,
.update_stats = mlx5e_update_ndo_stats,
.update_carrier = mlx5e_update_carrier,
.rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe,
@@ -4993,7 +5230,7 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
nch * profile->max_tc,
- nch);
+ nch * MLX5E_NUM_RQ_GROUPS);
if (!netdev) {
mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
return NULL;
@@ -5131,7 +5368,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
#ifdef CONFIG_MLX5_ESWITCH
if (MLX5_ESWITCH_MANAGER(mdev) &&
- mlx5_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+ mlx5_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) {
mlx5e_rep_register_vport_reps(mdev);
return mdev;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 3999da3e6314..529f8e4b32c6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -391,30 +391,19 @@ static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = {
static int mlx5e_rep_get_port_parent_id(struct net_device *dev,
struct netdev_phys_item_id *ppid)
{
- struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct net_device *uplink_upper = NULL;
- struct mlx5e_priv *uplink_priv = NULL;
- struct net_device *uplink_dev;
-
- if (esw->mode == SRIOV_NONE)
- return -EOPNOTSUPP;
+ struct mlx5_eswitch *esw;
+ struct mlx5e_priv *priv;
+ u64 parent_id;
- uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
- if (uplink_dev) {
- uplink_upper = netdev_master_upper_dev_get(uplink_dev);
- uplink_priv = netdev_priv(uplink_dev);
- }
+ priv = netdev_priv(dev);
+ esw = priv->mdev->priv.eswitch;
- ppid->id_len = ETH_ALEN;
- if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) {
- ether_addr_copy(ppid->id, uplink_upper->dev_addr);
- } else {
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
- struct mlx5_eswitch_rep *rep = rpriv->rep;
+ if (esw->mode == MLX5_ESWITCH_NONE)
+ return -EOPNOTSUPP;
- ether_addr_copy(ppid->id, rep->hw_id);
- }
+ parent_id = mlx5_query_nic_system_image_guid(priv->mdev);
+ ppid->id_len = sizeof(parent_id);
+ memcpy(ppid->id, &parent_id, sizeof(parent_id));
return 0;
}
@@ -425,7 +414,7 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
struct mlx5e_rep_sq *rep_sq, *tmp;
struct mlx5e_rep_priv *rpriv;
- if (esw->mode != SRIOV_OFFLOADS)
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS)
return;
rpriv = mlx5e_rep_to_rep_priv(rep);
@@ -446,7 +435,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
int err;
int i;
- if (esw->mode != SRIOV_OFFLOADS)
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS)
return 0;
rpriv = mlx5e_rep_to_rep_priv(rep);
@@ -1145,6 +1134,8 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
if (rep->vport == MLX5_VPORT_UPLINK)
ret = snprintf(buf, len, "p%d", fn);
+ else if (rep->vport == MLX5_VPORT_PF)
+ ret = snprintf(buf, len, "pf%d", fn);
else
ret = snprintf(buf, len, "pf%dvf%d", fn, rep->vport - 1);
@@ -1401,7 +1392,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
SET_NETDEV_DEV(netdev, mdev->device);
netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep;
/* we want a persistent mac for the uplink rep */
- mlx5_query_nic_vport_mac_address(mdev, 0, netdev->dev_addr);
+ mlx5_query_mac_address(mdev, netdev->dev_addr);
netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops;
#ifdef CONFIG_MLX5_CORE_EN_DCB
if (MLX5_CAP_GEN(mdev, qos))
@@ -1519,7 +1510,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
if (err)
goto err_close_drop_rq;
- err = mlx5e_create_direct_rqts(priv);
+ err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
if (err)
goto err_destroy_indirect_rqts;
@@ -1527,7 +1518,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
if (err)
goto err_destroy_direct_rqts;
- err = mlx5e_create_direct_tirs(priv);
+ err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
if (err)
goto err_destroy_indirect_tirs;
@@ -1544,11 +1535,11 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
err_destroy_ttc_table:
mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
err_destroy_direct_tirs:
- mlx5e_destroy_direct_tirs(priv);
+ mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
err_destroy_indirect_tirs:
mlx5e_destroy_indirect_tirs(priv, false);
err_destroy_direct_rqts:
- mlx5e_destroy_direct_rqts(priv);
+ mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
err_destroy_indirect_rqts:
mlx5e_destroy_rqt(priv, &priv->indir_rqt);
err_close_drop_rq:
@@ -1562,9 +1553,9 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
mlx5_del_flow_rules(rpriv->vport_rx_rule);
mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
- mlx5e_destroy_direct_tirs(priv);
+ mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
mlx5e_destroy_indirect_tirs(priv, false);
- mlx5e_destroy_direct_rqts(priv);
+ mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
mlx5e_destroy_rqt(priv, &priv->indir_rqt);
mlx5e_close_drop_rq(&priv->drop_rq);
}
@@ -1636,6 +1627,11 @@ static void mlx5e_rep_enable(struct mlx5e_priv *priv)
mlx5e_set_netdev_mtu_boundaries(priv);
}
+static int mlx5e_update_rep_rx(struct mlx5e_priv *priv)
+{
+ return 0;
+}
+
static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data)
{
struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
@@ -1711,6 +1707,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = {
.init_tx = mlx5e_init_rep_tx,
.cleanup_tx = mlx5e_cleanup_rep_tx,
.enable = mlx5e_rep_enable,
+ .update_rx = mlx5e_update_rep_rx,
.update_stats = mlx5e_rep_update_hw_counters,
.rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
@@ -1726,6 +1723,7 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
.cleanup_tx = mlx5e_cleanup_rep_tx,
.enable = mlx5e_uplink_rep_enable,
.disable = mlx5e_uplink_rep_disable,
+ .update_rx = mlx5e_update_rep_rx,
.update_stats = mlx5e_uplink_rep_update_hw_counters,
.update_carrier = mlx5e_update_carrier,
.rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 234a3fd39901..56a2f4666c47 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -47,6 +47,7 @@
#include "en_accel/tls_rxtx.h"
#include "lib/clock.h"
#include "en/xdp.h"
+#include "en/xsk/rx.h"
static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
{
@@ -235,8 +236,8 @@ static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq,
return true;
}
-static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
+static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
{
if (mlx5e_rx_cache_get(rq, dma_info))
return 0;
@@ -256,13 +257,23 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
return 0;
}
+static inline int mlx5e_page_alloc(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
+{
+ if (rq->umem)
+ return mlx5e_xsk_page_alloc_umem(rq, dma_info);
+ else
+ return mlx5e_page_alloc_pool(rq, dma_info);
+}
+
void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info)
{
dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir);
}
-void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
- bool recycle)
+void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info,
+ bool recycle)
{
if (likely(recycle)) {
if (mlx5e_rx_cache_put(rq, dma_info))
@@ -277,6 +288,20 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
}
}
+static inline void mlx5e_page_release(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info,
+ bool recycle)
+{
+ if (rq->umem)
+ /* The `recycle` parameter is ignored, and the page is always
+ * put into the Reuse Ring, because there is no way to return
+ * the page to the userspace when the interface goes down.
+ */
+ mlx5e_xsk_page_release(rq, dma_info);
+ else
+ mlx5e_page_release_dynamic(rq, dma_info, recycle);
+}
+
static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
struct mlx5e_wqe_frag_info *frag)
{
@@ -288,7 +313,7 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
* offset) should just use the new one without replenishing again
* by themselves.
*/
- err = mlx5e_page_alloc_mapped(rq, frag->di);
+ err = mlx5e_page_alloc(rq, frag->di);
return err;
}
@@ -354,6 +379,13 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk)
int err;
int i;
+ if (rq->umem) {
+ int pages_desired = wqe_bulk << rq->wqe.info.log_num_frags;
+
+ if (unlikely(!mlx5e_xsk_pages_enough_umem(rq, pages_desired)))
+ return -ENOMEM;
+ }
+
for (i = 0; i < wqe_bulk; i++) {
struct mlx5e_rx_wqe_cyc *wqe = mlx5_wq_cyc_get_wqe(wq, ix + i);
@@ -401,11 +433,17 @@ mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb,
static void
mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle)
{
- const bool no_xdp_xmit =
- bitmap_empty(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
+ bool no_xdp_xmit;
struct mlx5e_dma_info *dma_info = wi->umr.dma_info;
int i;
+ /* A common case for AF_XDP. */
+ if (bitmap_full(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE))
+ return;
+
+ no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap,
+ MLX5_MPWRQ_PAGES_PER_WQE);
+
for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++)
if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
mlx5e_page_release(rq, &dma_info[i], recycle);
@@ -427,11 +465,6 @@ static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n)
mlx5_wq_ll_update_db_record(wq);
}
-static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
-{
- return mlx5_wq_cyc_get_ctr_wrap_cnt(&sq->wq, sq->pc);
-}
-
static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
struct mlx5_wq_cyc *wq,
u16 pi, u16 nnops)
@@ -459,6 +492,12 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
int err;
int i;
+ if (rq->umem &&
+ unlikely(!mlx5e_xsk_pages_enough_umem(rq, MLX5_MPWRQ_PAGES_PER_WQE))) {
+ err = -ENOMEM;
+ goto err;
+ }
+
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
@@ -467,12 +506,10 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
}
umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
- if (unlikely(mlx5e_icosq_wrap_cnt(sq) < 2))
- memcpy(umr_wqe, &rq->mpwqe.umr_wqe,
- offsetof(struct mlx5e_umr_wqe, inline_mtts));
+ memcpy(umr_wqe, &rq->mpwqe.umr_wqe, offsetof(struct mlx5e_umr_wqe, inline_mtts));
for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) {
- err = mlx5e_page_alloc_mapped(rq, dma_info);
+ err = mlx5e_page_alloc(rq, dma_info);
if (unlikely(err))
goto err_unmap;
umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
@@ -487,6 +524,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset);
sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
+ sq->db.ico_wqe[pi].umr.rq = rq;
sq->pc += MLX5E_UMR_WQEBBS;
sq->doorbell_cseg = &umr_wqe->ctrl;
@@ -498,6 +536,8 @@ err_unmap:
dma_info--;
mlx5e_page_release(rq, dma_info, true);
}
+
+err:
rq->stats->buff_alloc_err++;
return err;
@@ -544,11 +584,10 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
return !!err;
}
-static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
+void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
{
struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq);
struct mlx5_cqe64 *cqe;
- u8 completed_umr = 0;
u16 sqcc;
int i;
@@ -589,7 +628,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
if (likely(wi->opcode == MLX5_OPCODE_UMR)) {
sqcc += MLX5E_UMR_WQEBBS;
- completed_umr++;
+ wi->umr.rq->mpwqe.umr_completed++;
} else if (likely(wi->opcode == MLX5_OPCODE_NOP)) {
sqcc++;
} else {
@@ -605,24 +644,25 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
sq->cc = sqcc;
mlx5_cqwq_update_db_record(&cq->wq);
-
- if (likely(completed_umr)) {
- mlx5e_post_rx_mpwqe(rq, completed_umr);
- rq->mpwqe.umr_in_progress -= completed_umr;
- }
}
bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
{
struct mlx5e_icosq *sq = &rq->channel->icosq;
struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
+ u8 umr_completed = rq->mpwqe.umr_completed;
+ int alloc_err = 0;
u8 missing, i;
u16 head;
if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
return false;
- mlx5e_poll_ico_cq(&sq->cq, rq);
+ if (umr_completed) {
+ mlx5e_post_rx_mpwqe(rq, umr_completed);
+ rq->mpwqe.umr_in_progress -= umr_completed;
+ rq->mpwqe.umr_completed = 0;
+ }
missing = mlx5_wq_ll_missing(wq) - rq->mpwqe.umr_in_progress;
@@ -636,7 +676,9 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
head = rq->mpwqe.actual_wq_head;
i = missing;
do {
- if (unlikely(mlx5e_alloc_rx_mpwqe(rq, head)))
+ alloc_err = mlx5e_alloc_rx_mpwqe(rq, head);
+
+ if (unlikely(alloc_err))
break;
head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
} while (--i);
@@ -650,6 +692,12 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk;
rq->mpwqe.actual_wq_head = head;
+ /* If XSK Fill Ring doesn't have enough frames, busy poll by
+ * rescheduling the NAPI poll.
+ */
+ if (unlikely(alloc_err == -ENOMEM && rq->umem))
+ return true;
+
return false;
}
@@ -1018,7 +1066,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
}
rcu_read_lock();
- consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt);
+ consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, false);
rcu_read_unlock();
if (consumed)
return NULL; /* page/packet was consumed by XDP */
@@ -1235,7 +1283,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
prefetch(data);
rcu_read_lock();
- consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32);
+ consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, false);
rcu_read_unlock();
if (consumed) {
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 483d321d2151..539b4d3656da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -48,8 +48,15 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) },
#ifdef CONFIG_MLX5_EN_TLS
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ctx) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_resync_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_no_sync_data) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_bypass_req) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) },
#endif
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
@@ -104,7 +111,33 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_aff_change) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_force_irq) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_eq_rearm) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_complete) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary_inner) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_none) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_ecn_mark) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_removed_vlan_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_redirect) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_wqe_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_cqes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_strides) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_oversize_pkts_sw_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_buff_alloc_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_blks) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_pkts) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_congst_umr) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_arfs_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_xmit) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_mpwqe) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_inlnw) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_full) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_cqes) },
};
#define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc)
@@ -144,6 +177,8 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
&priv->channel_stats[i];
struct mlx5e_xdpsq_stats *xdpsq_red_stats = &channel_stats->xdpsq;
struct mlx5e_xdpsq_stats *xdpsq_stats = &channel_stats->rq_xdpsq;
+ struct mlx5e_xdpsq_stats *xsksq_stats = &channel_stats->xsksq;
+ struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq;
struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
struct mlx5e_ch_stats *ch_stats = &channel_stats->ch;
int j;
@@ -186,6 +221,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->ch_poll += ch_stats->poll;
s->ch_arm += ch_stats->arm;
s->ch_aff_change += ch_stats->aff_change;
+ s->ch_force_irq += ch_stats->force_irq;
s->ch_eq_rearm += ch_stats->eq_rearm;
/* xdp redirect */
s->tx_xdp_xmit += xdpsq_red_stats->xmit;
@@ -194,6 +230,32 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->tx_xdp_full += xdpsq_red_stats->full;
s->tx_xdp_err += xdpsq_red_stats->err;
s->tx_xdp_cqes += xdpsq_red_stats->cqes;
+ /* AF_XDP zero-copy */
+ s->rx_xsk_packets += xskrq_stats->packets;
+ s->rx_xsk_bytes += xskrq_stats->bytes;
+ s->rx_xsk_csum_complete += xskrq_stats->csum_complete;
+ s->rx_xsk_csum_unnecessary += xskrq_stats->csum_unnecessary;
+ s->rx_xsk_csum_unnecessary_inner += xskrq_stats->csum_unnecessary_inner;
+ s->rx_xsk_csum_none += xskrq_stats->csum_none;
+ s->rx_xsk_ecn_mark += xskrq_stats->ecn_mark;
+ s->rx_xsk_removed_vlan_packets += xskrq_stats->removed_vlan_packets;
+ s->rx_xsk_xdp_drop += xskrq_stats->xdp_drop;
+ s->rx_xsk_xdp_redirect += xskrq_stats->xdp_redirect;
+ s->rx_xsk_wqe_err += xskrq_stats->wqe_err;
+ s->rx_xsk_mpwqe_filler_cqes += xskrq_stats->mpwqe_filler_cqes;
+ s->rx_xsk_mpwqe_filler_strides += xskrq_stats->mpwqe_filler_strides;
+ s->rx_xsk_oversize_pkts_sw_drop += xskrq_stats->oversize_pkts_sw_drop;
+ s->rx_xsk_buff_alloc_err += xskrq_stats->buff_alloc_err;
+ s->rx_xsk_cqe_compress_blks += xskrq_stats->cqe_compress_blks;
+ s->rx_xsk_cqe_compress_pkts += xskrq_stats->cqe_compress_pkts;
+ s->rx_xsk_congst_umr += xskrq_stats->congst_umr;
+ s->rx_xsk_arfs_err += xskrq_stats->arfs_err;
+ s->tx_xsk_xmit += xsksq_stats->xmit;
+ s->tx_xsk_mpwqe += xsksq_stats->mpwqe;
+ s->tx_xsk_inlnw += xsksq_stats->inlnw;
+ s->tx_xsk_full += xsksq_stats->full;
+ s->tx_xsk_err += xsksq_stats->err;
+ s->tx_xsk_cqes += xsksq_stats->cqes;
for (j = 0; j < priv->max_opened_tc; j++) {
struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
@@ -216,8 +278,15 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->tx_csum_none += sq_stats->csum_none;
s->tx_csum_partial += sq_stats->csum_partial;
#ifdef CONFIG_MLX5_EN_TLS
- s->tx_tls_ooo += sq_stats->tls_ooo;
- s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes;
+ s->tx_tls_encrypted_packets += sq_stats->tls_encrypted_packets;
+ s->tx_tls_encrypted_bytes += sq_stats->tls_encrypted_bytes;
+ s->tx_tls_ctx += sq_stats->tls_ctx;
+ s->tx_tls_ooo += sq_stats->tls_ooo;
+ s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes;
+ s->tx_tls_drop_no_sync_data += sq_stats->tls_drop_no_sync_data;
+ s->tx_tls_drop_bypass_req += sq_stats->tls_drop_bypass_req;
+ s->tx_tls_dump_bytes += sq_stats->tls_dump_bytes;
+ s->tx_tls_dump_packets += sq_stats->tls_dump_packets;
#endif
s->tx_cqes += sq_stats->cqes;
}
@@ -1238,6 +1307,16 @@ static const struct counter_desc sq_stats_desc[] = {
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) },
+#ifdef CONFIG_MLX5_EN_TLS
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ctx) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ooo) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) },
+#endif
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
@@ -1266,11 +1345,43 @@ static const struct counter_desc xdpsq_stats_desc[] = {
{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
};
+static const struct counter_desc xskrq_stats_desc[] = {
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, packets) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, bytes) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_complete) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_none) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, ecn_mark) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_drop) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_redirect) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, wqe_err) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, congst_umr) },
+ { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, arfs_err) },
+};
+
+static const struct counter_desc xsksq_stats_desc[] = {
+ { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
+ { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) },
+ { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) },
+ { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, full) },
+ { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, err) },
+ { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
+};
+
static const struct counter_desc ch_stats_desc[] = {
{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, events) },
{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, poll) },
{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, arm) },
{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, aff_change) },
+ { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, force_irq) },
{ MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, eq_rearm) },
};
@@ -1278,6 +1389,8 @@ static const struct counter_desc ch_stats_desc[] = {
#define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc)
#define NUM_XDPSQ_STATS ARRAY_SIZE(xdpsq_stats_desc)
#define NUM_RQ_XDPSQ_STATS ARRAY_SIZE(rq_xdpsq_stats_desc)
+#define NUM_XSKRQ_STATS ARRAY_SIZE(xskrq_stats_desc)
+#define NUM_XSKSQ_STATS ARRAY_SIZE(xsksq_stats_desc)
#define NUM_CH_STATS ARRAY_SIZE(ch_stats_desc)
static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv)
@@ -1288,13 +1401,16 @@ static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv)
(NUM_CH_STATS * max_nch) +
(NUM_SQ_STATS * max_nch * priv->max_opened_tc) +
(NUM_RQ_XDPSQ_STATS * max_nch) +
- (NUM_XDPSQ_STATS * max_nch);
+ (NUM_XDPSQ_STATS * max_nch) +
+ (NUM_XSKRQ_STATS * max_nch * priv->xsk.ever_used) +
+ (NUM_XSKSQ_STATS * max_nch * priv->xsk.ever_used);
}
static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
int idx)
{
int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
+ bool is_xsk = priv->xsk.ever_used;
int i, j, tc;
for (i = 0; i < max_nch; i++)
@@ -1306,6 +1422,9 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
for (j = 0; j < NUM_RQ_STATS; j++)
sprintf(data + (idx++) * ETH_GSTRING_LEN,
rq_stats_desc[j].format, i);
+ for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ xskrq_stats_desc[j].format, i);
for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++)
sprintf(data + (idx++) * ETH_GSTRING_LEN,
rq_xdpsq_stats_desc[j].format, i);
@@ -1318,10 +1437,14 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
sq_stats_desc[j].format,
priv->channel_tc2txq[i][tc]);
- for (i = 0; i < max_nch; i++)
+ for (i = 0; i < max_nch; i++) {
+ for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ xsksq_stats_desc[j].format, i);
for (j = 0; j < NUM_XDPSQ_STATS; j++)
sprintf(data + (idx++) * ETH_GSTRING_LEN,
xdpsq_stats_desc[j].format, i);
+ }
return idx;
}
@@ -1330,6 +1453,7 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data,
int idx)
{
int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
+ bool is_xsk = priv->xsk.ever_used;
int i, j, tc;
for (i = 0; i < max_nch; i++)
@@ -1343,6 +1467,10 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data,
data[idx++] =
MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq,
rq_stats_desc, j);
+ for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xskrq,
+ xskrq_stats_desc, j);
for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++)
data[idx++] =
MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq_xdpsq,
@@ -1356,11 +1484,16 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data,
MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].sq[tc],
sq_stats_desc, j);
- for (i = 0; i < max_nch; i++)
+ for (i = 0; i < max_nch; i++) {
+ for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xsksq,
+ xsksq_stats_desc, j);
for (j = 0; j < NUM_XDPSQ_STATS; j++)
data[idx++] =
MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xdpsq,
xdpsq_stats_desc, j);
+ }
return idx;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index cdddcc46971b..76ac111e14d0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -46,6 +46,8 @@
#define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld)
#define MLX5E_DECLARE_XDPSQ_STAT(type, fld) "tx%d_xdp_"#fld, offsetof(type, fld)
#define MLX5E_DECLARE_RQ_XDPSQ_STAT(type, fld) "rx%d_xdp_tx_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_XSKRQ_STAT(type, fld) "rx%d_xsk_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_XSKSQ_STAT(type, fld) "tx%d_xsk_"#fld, offsetof(type, fld)
#define MLX5E_DECLARE_CH_STAT(type, fld) "ch%d_"#fld, offsetof(type, fld)
struct counter_desc {
@@ -116,12 +118,46 @@ struct mlx5e_sw_stats {
u64 ch_poll;
u64 ch_arm;
u64 ch_aff_change;
+ u64 ch_force_irq;
u64 ch_eq_rearm;
#ifdef CONFIG_MLX5_EN_TLS
+ u64 tx_tls_encrypted_packets;
+ u64 tx_tls_encrypted_bytes;
+ u64 tx_tls_ctx;
u64 tx_tls_ooo;
u64 tx_tls_resync_bytes;
+ u64 tx_tls_drop_no_sync_data;
+ u64 tx_tls_drop_bypass_req;
+ u64 tx_tls_dump_packets;
+ u64 tx_tls_dump_bytes;
#endif
+
+ u64 rx_xsk_packets;
+ u64 rx_xsk_bytes;
+ u64 rx_xsk_csum_complete;
+ u64 rx_xsk_csum_unnecessary;
+ u64 rx_xsk_csum_unnecessary_inner;
+ u64 rx_xsk_csum_none;
+ u64 rx_xsk_ecn_mark;
+ u64 rx_xsk_removed_vlan_packets;
+ u64 rx_xsk_xdp_drop;
+ u64 rx_xsk_xdp_redirect;
+ u64 rx_xsk_wqe_err;
+ u64 rx_xsk_mpwqe_filler_cqes;
+ u64 rx_xsk_mpwqe_filler_strides;
+ u64 rx_xsk_oversize_pkts_sw_drop;
+ u64 rx_xsk_buff_alloc_err;
+ u64 rx_xsk_cqe_compress_blks;
+ u64 rx_xsk_cqe_compress_pkts;
+ u64 rx_xsk_congst_umr;
+ u64 rx_xsk_arfs_err;
+ u64 tx_xsk_xmit;
+ u64 tx_xsk_mpwqe;
+ u64 tx_xsk_inlnw;
+ u64 tx_xsk_full;
+ u64 tx_xsk_err;
+ u64 tx_xsk_cqes;
};
struct mlx5e_qcounter_stats {
@@ -227,8 +263,15 @@ struct mlx5e_sq_stats {
u64 added_vlan_packets;
u64 nop;
#ifdef CONFIG_MLX5_EN_TLS
+ u64 tls_encrypted_packets;
+ u64 tls_encrypted_bytes;
+ u64 tls_ctx;
u64 tls_ooo;
u64 tls_resync_bytes;
+ u64 tls_drop_no_sync_data;
+ u64 tls_drop_bypass_req;
+ u64 tls_dump_packets;
+ u64 tls_dump_bytes;
#endif
/* less likely accessed in data path */
u64 csum_none;
@@ -256,6 +299,7 @@ struct mlx5e_ch_stats {
u64 poll;
u64 arm;
u64 aff_change;
+ u64 force_irq;
u64 eq_rearm;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index eb8433cc49a7..3ac9b1e423ee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -717,19 +717,22 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
+ struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context;
struct mlx5_nic_flow_attr *attr = flow->nic_attr;
struct mlx5_core_dev *dev = priv->mdev;
struct mlx5_flow_destination dest[2] = {};
struct mlx5_flow_act flow_act = {
.action = attr->action,
- .flow_tag = attr->flow_tag,
.reformat_id = 0,
- .flags = FLOW_ACT_HAS_TAG | FLOW_ACT_NO_APPEND,
+ .flags = FLOW_ACT_NO_APPEND,
};
struct mlx5_fc *counter = NULL;
bool table_created = false;
int err, dest_ix = 0;
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
+ flow_context->flow_tag = attr->flow_tag;
+
if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
if (err) {
@@ -2799,6 +2802,16 @@ static int add_vlan_pop_action(struct mlx5e_priv *priv,
return err;
}
+bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
+ struct net_device *out_dev)
+{
+ if (is_merged_eswitch_dev(priv, out_dev))
+ return true;
+
+ return mlx5e_eswitch_rep(out_dev) &&
+ same_hw_devs(priv, netdev_priv(out_dev));
+}
+
static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
struct flow_action *flow_action,
struct mlx5e_tc_flow *flow,
@@ -2864,9 +2877,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
- if (netdev_port_same_parent_id(priv->netdev,
- out_dev) ||
- is_merged_eswitch_dev(priv, out_dev)) {
+ if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev);
@@ -2883,6 +2894,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
if (err)
return err;
}
+
if (is_vlan_dev(parse_attr->filter_dev)) {
err = add_vlan_pop_action(priv, attr,
&action);
@@ -2890,8 +2902,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
return err;
}
- if (!mlx5e_eswitch_rep(out_dev))
+ if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are not on same switch HW, can't offload forwarding");
+ pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
+ priv->netdev->name, out_dev->name);
return -EOPNOTSUPP;
+ }
out_priv = netdev_priv(out_dev);
rpriv = out_priv->ppriv;
@@ -3355,7 +3372,7 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv,
if (!tc_can_offload_extack(priv->netdev, f->common.extack))
return -EOPNOTSUPP;
- if (esw && esw->mode == SRIOV_OFFLOADS)
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
err = mlx5e_add_fdb_flow(priv, f, flow_flags,
filter_dev, flow);
else
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index f62e81902d27..8f288cc53cee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -74,6 +74,9 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags);
void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
+bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
+ struct net_device *out_dev);
+
#else /* CONFIG_MLX5_ESWITCH */
static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 9048faa4bfcf..600e92cb629a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -35,55 +35,12 @@
#include <net/geneve.h>
#include <net/dsfield.h>
#include "en.h"
+#include "en/txrx.h"
#include "ipoib/ipoib.h"
#include "en_accel/en_accel.h"
+#include "en_accel/ktls.h"
#include "lib/clock.h"
-#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS
-
-#ifndef CONFIG_MLX5_EN_TLS
-#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
- MLX5E_SQ_NOPS_ROOM)
-#else
-/* TLS offload requires MLX5E_SQ_STOP_ROOM to have
- * enough room for a resync SKB, a normal SKB and a NOP
- */
-#define MLX5E_SQ_STOP_ROOM (2 * MLX5_SEND_WQE_MAX_WQEBBS +\
- MLX5E_SQ_NOPS_ROOM)
-#endif
-
-static inline void mlx5e_tx_dma_unmap(struct device *pdev,
- struct mlx5e_sq_dma *dma)
-{
- switch (dma->type) {
- case MLX5E_DMA_MAP_SINGLE:
- dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
- break;
- case MLX5E_DMA_MAP_PAGE:
- dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
- break;
- default:
- WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n");
- }
-}
-
-static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i)
-{
- return &sq->db.dma_fifo[i & sq->dma_fifo_mask];
-}
-
-static inline void mlx5e_dma_push(struct mlx5e_txqsq *sq,
- dma_addr_t addr,
- u32 size,
- enum mlx5e_dma_map_type map_type)
-{
- struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++);
-
- dma->addr = addr;
- dma->size = size;
- dma->type = map_type;
-}
-
static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma)
{
int i;
@@ -277,23 +234,6 @@ dma_unmap_wqe_err:
return -ENOMEM;
}
-static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq,
- struct mlx5_wq_cyc *wq,
- u16 pi, u16 nnops)
-{
- struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-
- edge_wi = wi + nnops;
-
- /* fill sq frag edge with nops to avoid wqe wrapping two pages */
- for (; wi < edge_wi; wi++) {
- wi->skb = NULL;
- wi->num_wqebbs = 1;
- mlx5e_post_nop(wq, sq->sqn, &sq->pc);
- }
- sq->stats->nop += nnops;
-}
-
static inline void
mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
u8 opcode, u16 ds_cnt, u8 num_wqebbs, u32 num_bytes, u8 num_dma,
@@ -315,7 +255,7 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
sq->pc += wi->num_wqebbs;
- if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM))) {
+ if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, sq->stop_room))) {
netif_tx_stop_queue(sq->txq);
sq->stats->stopped++;
}
@@ -326,8 +266,6 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg);
}
-#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
-
netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more)
{
@@ -354,9 +292,12 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
stats->packets += skb_shinfo(skb)->gso_segs;
} else {
+ u8 mode = mlx5e_transport_inline_tx_wqe(wqe) ?
+ MLX5_INLINE_MODE_TCP_UDP : sq->min_inline_mode;
+
opcode = MLX5_OPCODE_SEND;
mss = 0;
- ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb);
+ ihs = mlx5e_calc_min_inline(mode, skb);
num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
stats->packets++;
}
@@ -381,11 +322,17 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
#ifdef CONFIG_MLX5_EN_IPSEC
struct mlx5_wqe_eth_seg cur_eth = wqe->eth;
#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ struct mlx5_wqe_ctrl_seg cur_ctrl = wqe->ctrl;
+#endif
mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
- mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
+ wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi);
#ifdef CONFIG_MLX5_EN_IPSEC
wqe->eth = cur_eth;
#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ wqe->ctrl = cur_ctrl;
+#endif
}
/* fill wqe */
@@ -444,7 +391,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
u16 pi;
sq = priv->txq2sq[skb_get_queue_mapping(skb)];
- mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
+ wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi);
/* might send skbs and update wqe and pi */
skb = mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi);
@@ -532,8 +479,16 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
wi = &sq->db.wqe_info[ci];
skb = wi->skb;
- if (unlikely(!skb)) { /* nop */
- sqcc++;
+ if (unlikely(!skb)) {
+#ifdef CONFIG_MLX5_EN_TLS
+ if (wi->resync_dump_frag) {
+ struct mlx5e_sq_dma *dma =
+ mlx5e_dma_get(sq, dma_fifo_cc++);
+
+ mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma);
+ }
+#endif
+ sqcc += wi->num_wqebbs;
continue;
}
@@ -575,8 +530,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
netdev_tx_completed_queue(sq->txq, npkts, nbytes);
if (netif_tx_queue_stopped(sq->txq) &&
- mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
- MLX5E_SQ_STOP_ROOM) &&
+ mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
netif_tx_wake_queue(sq->txq);
stats->wake++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index f9862bf75491..c50b6f0769c8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -33,6 +33,7 @@
#include <linux/irq.h>
#include "en.h"
#include "en/xdp.h"
+#include "en/xsk/tx.h"
static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
{
@@ -48,26 +49,24 @@ static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq)
{
struct mlx5e_sq_stats *stats = sq->stats;
- struct net_dim_sample dim_sample;
+ struct dim_sample dim_sample;
if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state)))
return;
- net_dim_sample(sq->cq.event_ctr, stats->packets, stats->bytes,
- &dim_sample);
+ dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample);
net_dim(&sq->dim, dim_sample);
}
static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq)
{
struct mlx5e_rq_stats *stats = rq->stats;
- struct net_dim_sample dim_sample;
+ struct dim_sample dim_sample;
if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state)))
return;
- net_dim_sample(rq->cq.event_ctr, stats->packets, stats->bytes,
- &dim_sample);
+ dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample);
net_dim(&rq->dim, dim_sample);
}
@@ -87,7 +86,12 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
napi);
struct mlx5e_ch_stats *ch_stats = c->stats;
+ struct mlx5e_xdpsq *xsksq = &c->xsksq;
+ struct mlx5e_rq *xskrq = &c->xskrq;
struct mlx5e_rq *rq = &c->rq;
+ bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+ bool aff_change = false;
+ bool busy_xsk = false;
bool busy = false;
int work_done = 0;
int i;
@@ -97,22 +101,38 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
for (i = 0; i < c->num_tc; i++)
busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget);
- busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq, NULL);
+ busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
if (c->xdp)
- busy |= mlx5e_poll_xdpsq_cq(&rq->xdpsq.cq, rq);
+ busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq);
if (likely(budget)) { /* budget=0 means: don't poll rx rings */
- work_done = mlx5e_poll_rx_cq(&rq->cq, budget);
+ if (xsk_open)
+ work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget);
+
+ if (likely(budget - work_done))
+ work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done);
+
busy |= work_done == budget;
}
- busy |= c->rq.post_wqes(rq);
+ mlx5e_poll_ico_cq(&c->icosq.cq);
+
+ busy |= rq->post_wqes(rq);
+ if (xsk_open) {
+ mlx5e_poll_ico_cq(&c->xskicosq.cq);
+ busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq);
+ busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET);
+ busy_xsk |= xskrq->post_wqes(xskrq);
+ }
+
+ busy |= busy_xsk;
if (busy) {
if (likely(mlx5e_channel_no_affinity_change(c)))
return budget;
ch_stats->aff_change++;
+ aff_change = true;
if (budget && work_done == budget)
work_done--;
}
@@ -133,10 +153,22 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
mlx5e_cq_arm(&c->icosq.cq);
mlx5e_cq_arm(&c->xdpsq.cq);
+ if (xsk_open) {
+ mlx5e_handle_rx_dim(xskrq);
+ mlx5e_cq_arm(&c->xskicosq.cq);
+ mlx5e_cq_arm(&xsksq->cq);
+ mlx5e_cq_arm(&xskrq->cq);
+ }
+
+ if (unlikely(aff_change && busy_xsk)) {
+ mlx5e_trigger_irq(&c->icosq);
+ ch_stats->force_irq++;
+ }
+
return work_done;
}
-void mlx5e_completion_event(struct mlx5_core_cq *mcq)
+void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
{
struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 5e9319d3d90c..41f25ea2e8d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -61,17 +61,21 @@ enum {
MLX5_EQ_DOORBEL_OFFSET = 0x40,
};
-struct mlx5_irq_info {
- cpumask_var_t mask;
- char name[MLX5_MAX_IRQ_NAME];
- void *context; /* dev_id provided to request_irq */
+/* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update
+ * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is
+ * used to set the EQ size, budget must be smaller than the EQ size.
+ */
+enum {
+ MLX5_EQ_POLLING_BUDGET = 128,
};
+static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE);
+
struct mlx5_eq_table {
struct list_head comp_eqs_list;
- struct mlx5_eq pages_eq;
- struct mlx5_eq cmd_eq;
- struct mlx5_eq async_eq;
+ struct mlx5_eq_async pages_eq;
+ struct mlx5_eq_async cmd_eq;
+ struct mlx5_eq_async async_eq;
struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX];
@@ -79,11 +83,8 @@ struct mlx5_eq_table {
struct mlx5_nb cq_err_nb;
struct mutex lock; /* sync async eqs creations */
- int num_comp_vectors;
- struct mlx5_irq_info *irq_info;
-#ifdef CONFIG_RFS_ACCEL
- struct cpu_rmap *rmap;
-#endif
+ int num_comp_eqs;
+ struct mlx5_irq_table *irq_table;
};
#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \
@@ -124,16 +125,24 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
return cq;
}
-static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr)
+static int mlx5_eq_comp_int(struct notifier_block *nb,
+ __always_unused unsigned long action,
+ __always_unused void *data)
{
- struct mlx5_eq_comp *eq_comp = eq_ptr;
- struct mlx5_eq *eq = eq_ptr;
+ struct mlx5_eq_comp *eq_comp =
+ container_of(nb, struct mlx5_eq_comp, irq_nb);
+ struct mlx5_eq *eq = &eq_comp->core;
struct mlx5_eqe *eqe;
- int set_ci = 0;
+ int num_eqes = 0;
u32 cqn = -1;
- while ((eqe = next_eqe_sw(eq))) {
+ eqe = next_eqe_sw(eq);
+ if (!eqe)
+ goto out;
+
+ do {
struct mlx5_core_cq *cq;
+
/* Make sure we read EQ entry contents after we've
* checked the ownership bit.
*/
@@ -144,33 +153,23 @@ static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr)
cq = mlx5_eq_cq_get(eq, cqn);
if (likely(cq)) {
++cq->arm_sn;
- cq->comp(cq);
+ cq->comp(cq, eqe);
mlx5_cq_put(cq);
} else {
mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
}
++eq->cons_index;
- ++set_ci;
- /* The HCA will think the queue has overflowed if we
- * don't tell it we've been processing events. We
- * create our EQs with MLX5_NUM_SPARE_EQE extra
- * entries, so we must update our consumer index at
- * least that often.
- */
- if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
- eq_update_ci(eq, 0);
- set_ci = 0;
- }
- }
+ } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
+out:
eq_update_ci(eq, 1);
if (cqn != -1)
tasklet_schedule(&eq_comp->tasklet_ctx.task);
- return IRQ_HANDLED;
+ return 0;
}
/* Some architectures don't latch interrupts when they are disabled, so using
@@ -184,25 +183,32 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
disable_irq(eq->core.irqn);
count_eqe = eq->core.cons_index;
- mlx5_eq_comp_int(eq->core.irqn, eq);
+ mlx5_eq_comp_int(&eq->irq_nb, 0, NULL);
count_eqe = eq->core.cons_index - count_eqe;
enable_irq(eq->core.irqn);
return count_eqe;
}
-static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr)
+static int mlx5_eq_async_int(struct notifier_block *nb,
+ unsigned long action, void *data)
{
- struct mlx5_eq *eq = eq_ptr;
+ struct mlx5_eq_async *eq_async =
+ container_of(nb, struct mlx5_eq_async, irq_nb);
+ struct mlx5_eq *eq = &eq_async->core;
struct mlx5_eq_table *eqt;
struct mlx5_core_dev *dev;
struct mlx5_eqe *eqe;
- int set_ci = 0;
+ int num_eqes = 0;
dev = eq->dev;
eqt = dev->priv.eq_table;
- while ((eqe = next_eqe_sw(eq))) {
+ eqe = next_eqe_sw(eq);
+ if (!eqe)
+ goto out;
+
+ do {
/*
* Make sure we read EQ entry contents after we've
* checked the ownership bit.
@@ -217,23 +223,13 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr)
atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe);
++eq->cons_index;
- ++set_ci;
- /* The HCA will think the queue has overflowed if we
- * don't tell it we've been processing events. We
- * create our EQs with MLX5_NUM_SPARE_EQE extra
- * entries, so we must update our consumer index at
- * least that often.
- */
- if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
- eq_update_ci(eq, 0);
- set_ci = 0;
- }
- }
+ } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
+out:
eq_update_ci(eq, 1);
- return IRQ_HANDLED;
+ return 0;
}
static void init_eq_buf(struct mlx5_eq *eq)
@@ -248,22 +244,19 @@ static void init_eq_buf(struct mlx5_eq *eq)
}
static int
-create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
+create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
struct mlx5_eq_param *param)
{
- struct mlx5_eq_table *eq_table = dev->priv.eq_table;
struct mlx5_cq_table *cq_table = &eq->cq_table;
u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
struct mlx5_priv *priv = &dev->priv;
- u8 vecidx = param->index;
+ u8 vecidx = param->irq_index;
__be64 *pas;
void *eqc;
int inlen;
u32 *in;
int err;
-
- if (eq_table->irq_info[vecidx].context)
- return -EEXIST;
+ int i;
/* Init CQ table */
memset(cq_table, 0, sizeof(*cq_table));
@@ -291,10 +284,12 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
mlx5_fill_page_array(&eq->buf, pas);
MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
- if (!param->mask && MLX5_CAP_GEN(dev, log_max_uctx))
+ if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx))
MLX5_SET(create_eq_in, in, uid, MLX5_SHARED_RESOURCE_UID);
- MLX5_SET64(create_eq_in, in, event_bitmask, param->mask);
+ for (i = 0; i < 4; i++)
+ MLX5_ARRAY_SET64(create_eq_in, in, event_bitmask, i,
+ param->mask[i]);
eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
@@ -307,34 +302,19 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
if (err)
goto err_in;
- snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
- name, pci_name(dev->pdev));
- eq_table->irq_info[vecidx].context = param->context;
-
eq->vecidx = vecidx;
eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
eq->irqn = pci_irq_vector(dev->pdev, vecidx);
eq->dev = dev;
eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
- err = request_irq(eq->irqn, param->handler, 0,
- eq_table->irq_info[vecidx].name, param->context);
- if (err)
- goto err_eq;
err = mlx5_debug_eq_add(dev, eq);
if (err)
- goto err_irq;
-
- /* EQs are created in ARMED state
- */
- eq_update_ci(eq, 1);
+ goto err_eq;
kvfree(in);
return 0;
-err_irq:
- free_irq(eq->irqn, eq);
-
err_eq:
mlx5_cmd_destroy_eq(dev, eq->eqn);
@@ -346,18 +326,48 @@ err_buf:
return err;
}
-static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+/**
+ * mlx5_eq_enable - Enable EQ for receiving EQEs
+ * @dev - Device which owns the eq
+ * @eq - EQ to enable
+ * @nb - notifier call block
+ * mlx5_eq_enable - must be called after EQ is created in device.
+ */
+int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ struct notifier_block *nb)
{
struct mlx5_eq_table *eq_table = dev->priv.eq_table;
- struct mlx5_irq_info *irq_info;
int err;
- irq_info = &eq_table->irq_info[eq->vecidx];
+ err = mlx5_irq_attach_nb(eq_table->irq_table, eq->vecidx, nb);
+ if (!err)
+ eq_update_ci(eq, 1);
- mlx5_debug_eq_remove(dev, eq);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_eq_enable);
+
+/**
+ * mlx5_eq_disable - Enable EQ for receiving EQEs
+ * @dev - Device which owns the eq
+ * @eq - EQ to disable
+ * @nb - notifier call block
+ * mlx5_eq_disable - must be called before EQ is destroyed.
+ */
+void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ struct notifier_block *nb)
+{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+
+ mlx5_irq_detach_nb(eq_table->irq_table, eq->vecidx, nb);
+}
+EXPORT_SYMBOL(mlx5_eq_disable);
+
+static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ int err;
- free_irq(eq->irqn, irq_info->context);
- irq_info->context = NULL;
+ mlx5_debug_eq_remove(dev, eq);
err = mlx5_cmd_destroy_eq(dev, eq->eqn);
if (err)
@@ -382,7 +392,7 @@ int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
return err;
}
-int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
+void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
{
struct mlx5_cq_table *table = &eq->cq_table;
struct mlx5_core_cq *tmp;
@@ -392,16 +402,14 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
spin_unlock(&table->lock);
if (!tmp) {
- mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn);
- return -ENOENT;
- }
-
- if (tmp != cq) {
- mlx5_core_warn(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", eq->eqn, cq->cqn);
- return -EINVAL;
+ mlx5_core_dbg(eq->dev, "cq 0x%x not found in eq 0x%x tree\n",
+ eq->eqn, cq->cqn);
+ return;
}
- return 0;
+ if (tmp != cq)
+ mlx5_core_dbg(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n",
+ eq->eqn, cq->cqn);
}
int mlx5_eq_table_init(struct mlx5_core_dev *dev)
@@ -423,6 +431,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev)
for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++)
ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]);
+ eq_table->irq_table = dev->priv.irq_table;
return 0;
kvfree_eq_table:
@@ -439,19 +448,20 @@ void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev)
/* Async EQs */
-static int create_async_eq(struct mlx5_core_dev *dev, const char *name,
+static int create_async_eq(struct mlx5_core_dev *dev,
struct mlx5_eq *eq, struct mlx5_eq_param *param)
{
struct mlx5_eq_table *eq_table = dev->priv.eq_table;
int err;
mutex_lock(&eq_table->lock);
- if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) {
- err = -ENOSPC;
+ /* Async EQs must share irq index 0 */
+ if (param->irq_index != 0) {
+ err = -EINVAL;
goto unlock;
}
- err = create_map_eq(dev, eq, name, param);
+ err = create_map_eq(dev, eq, param);
unlock:
mutex_unlock(&eq_table->lock);
return err;
@@ -480,7 +490,7 @@ static int cq_err_event_notifier(struct notifier_block *nb,
/* type == MLX5_EVENT_TYPE_CQ_ERROR */
eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb);
- eq = &eqt->async_eq;
+ eq = &eqt->async_eq.core;
eqe = data;
cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
@@ -493,14 +503,31 @@ static int cq_err_event_notifier(struct notifier_block *nb,
return NOTIFY_OK;
}
- cq->event(cq, type);
+ if (cq->event)
+ cq->event(cq, type);
mlx5_cq_put(cq);
return NOTIFY_OK;
}
-static u64 gather_async_events_mask(struct mlx5_core_dev *dev)
+static void gather_user_async_events(struct mlx5_core_dev *dev, u64 mask[4])
+{
+ __be64 *user_unaffiliated_events;
+ __be64 *user_affiliated_events;
+ int i;
+
+ user_affiliated_events =
+ MLX5_CAP_DEV_EVENT(dev, user_affiliated_events);
+ user_unaffiliated_events =
+ MLX5_CAP_DEV_EVENT(dev, user_unaffiliated_events);
+
+ for (i = 0; i < 4; i++)
+ mask[i] |= be64_to_cpu(user_affiliated_events[i] |
+ user_unaffiliated_events[i]);
+}
+
+static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4])
{
u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
@@ -537,7 +564,10 @@ static u64 gather_async_events_mask(struct mlx5_core_dev *dev)
async_event_mask |=
(1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED);
- return async_event_mask;
+ mask[0] = async_event_mask;
+
+ if (MLX5_CAP_GEN(dev, event_cap))
+ gather_user_async_events(dev, mask);
}
static int create_async_eqs(struct mlx5_core_dev *dev)
@@ -549,55 +579,76 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
mlx5_eq_notifier_register(dev, &table->cq_err_nb);
+ table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int;
param = (struct mlx5_eq_param) {
- .index = MLX5_EQ_CMD_IDX,
- .mask = 1ull << MLX5_EVENT_TYPE_CMD,
+ .irq_index = 0,
.nent = MLX5_NUM_CMD_EQE,
- .context = &table->cmd_eq,
- .handler = mlx5_eq_async_int,
};
- err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, &param);
+
+ param.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD;
+ err = create_async_eq(dev, &table->cmd_eq.core, &param);
if (err) {
mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
goto err0;
}
-
+ err = mlx5_eq_enable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+ if (err) {
+ mlx5_core_warn(dev, "failed to enable cmd EQ %d\n", err);
+ goto err1;
+ }
mlx5_cmd_use_events(dev);
+ table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int;
param = (struct mlx5_eq_param) {
- .index = MLX5_EQ_ASYNC_IDX,
- .mask = gather_async_events_mask(dev),
+ .irq_index = 0,
.nent = MLX5_NUM_ASYNC_EQE,
- .context = &table->async_eq,
- .handler = mlx5_eq_async_int,
};
- err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, &param);
+
+ gather_async_events_mask(dev, param.mask);
+ err = create_async_eq(dev, &table->async_eq.core, &param);
if (err) {
mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
- goto err1;
+ goto err2;
+ }
+ err = mlx5_eq_enable(dev, &table->async_eq.core,
+ &table->async_eq.irq_nb);
+ if (err) {
+ mlx5_core_warn(dev, "failed to enable async EQ %d\n", err);
+ goto err3;
}
+ table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int;
param = (struct mlx5_eq_param) {
- .index = MLX5_EQ_PAGEREQ_IDX,
- .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST,
+ .irq_index = 0,
.nent = /* TODO: sriov max_vf + */ 1,
- .context = &table->pages_eq,
- .handler = mlx5_eq_async_int,
};
- err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, &param);
+
+ param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST;
+ err = create_async_eq(dev, &table->pages_eq.core, &param);
if (err) {
mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
- goto err2;
+ goto err4;
+ }
+ err = mlx5_eq_enable(dev, &table->pages_eq.core,
+ &table->pages_eq.irq_nb);
+ if (err) {
+ mlx5_core_warn(dev, "failed to enable pages EQ %d\n", err);
+ goto err5;
}
return err;
+err5:
+ destroy_async_eq(dev, &table->pages_eq.core);
+err4:
+ mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb);
+err3:
+ destroy_async_eq(dev, &table->async_eq.core);
err2:
- destroy_async_eq(dev, &table->async_eq);
-
-err1:
mlx5_cmd_use_polling(dev);
- destroy_async_eq(dev, &table->cmd_eq);
+ mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+err1:
+ destroy_async_eq(dev, &table->cmd_eq.core);
err0:
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
return err;
@@ -608,19 +659,22 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
struct mlx5_eq_table *table = dev->priv.eq_table;
int err;
- err = destroy_async_eq(dev, &table->pages_eq);
+ mlx5_eq_disable(dev, &table->pages_eq.core, &table->pages_eq.irq_nb);
+ err = destroy_async_eq(dev, &table->pages_eq.core);
if (err)
mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
err);
- err = destroy_async_eq(dev, &table->async_eq);
+ mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb);
+ err = destroy_async_eq(dev, &table->async_eq.core);
if (err)
mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
err);
mlx5_cmd_use_polling(dev);
- err = destroy_async_eq(dev, &table->cmd_eq);
+ mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+ err = destroy_async_eq(dev, &table->cmd_eq.core);
if (err)
mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
err);
@@ -630,24 +684,24 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
{
- return &dev->priv.eq_table->async_eq;
+ return &dev->priv.eq_table->async_eq.core;
}
void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev)
{
- synchronize_irq(dev->priv.eq_table->async_eq.irqn);
+ synchronize_irq(dev->priv.eq_table->async_eq.core.irqn);
}
void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev)
{
- synchronize_irq(dev->priv.eq_table->cmd_eq.irqn);
+ synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn);
}
/* Generic EQ API for mlx5_core consumers
* Needed For RDMA ODP EQ for now
*/
struct mlx5_eq *
-mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
+mlx5_eq_create_generic(struct mlx5_core_dev *dev,
struct mlx5_eq_param *param)
{
struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
@@ -656,7 +710,7 @@ mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
if (!eq)
return ERR_PTR(-ENOMEM);
- err = create_async_eq(dev, name, eq, param);
+ err = create_async_eq(dev, eq, param);
if (err) {
kvfree(eq);
eq = ERR_PTR(err);
@@ -714,84 +768,14 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
}
EXPORT_SYMBOL(mlx5_eq_update_ci);
-/* Completion EQs */
-
-static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
- struct mlx5_priv *priv = &mdev->priv;
- int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
- int irq = pci_irq_vector(mdev->pdev, vecidx);
- struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
-
- if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) {
- mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
- return -ENOMEM;
- }
-
- cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
- irq_info->mask);
-
- if (IS_ENABLED(CONFIG_SMP) &&
- irq_set_affinity_hint(irq, irq_info->mask))
- mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
-
- return 0;
-}
-
-static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
- int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
- struct mlx5_priv *priv = &mdev->priv;
- int irq = pci_irq_vector(mdev->pdev, vecidx);
- struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
-
- irq_set_affinity_hint(irq, NULL);
- free_cpumask_var(irq_info->mask);
-}
-
-static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
-{
- int err;
- int i;
-
- for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) {
- err = set_comp_irq_affinity_hint(mdev, i);
- if (err)
- goto err_out;
- }
-
- return 0;
-
-err_out:
- for (i--; i >= 0; i--)
- clear_comp_irq_affinity_hint(mdev, i);
-
- return err;
-}
-
-static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
-{
- int i;
-
- for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++)
- clear_comp_irq_affinity_hint(mdev, i);
-}
-
static void destroy_comp_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
struct mlx5_eq_comp *eq, *n;
- clear_comp_irqs_affinity_hints(dev);
-
-#ifdef CONFIG_RFS_ACCEL
- if (table->rmap) {
- free_irq_cpu_rmap(table->rmap);
- table->rmap = NULL;
- }
-#endif
list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
list_del(&eq->list);
+ mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
if (destroy_unmap_eq(dev, &eq->core))
mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n",
eq->core.eqn);
@@ -803,23 +787,17 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev)
static int create_comp_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
- char name[MLX5_MAX_IRQ_NAME];
struct mlx5_eq_comp *eq;
- int ncomp_vec;
+ int ncomp_eqs;
int nent;
int err;
int i;
INIT_LIST_HEAD(&table->comp_eqs_list);
- ncomp_vec = table->num_comp_vectors;
+ ncomp_eqs = table->num_comp_eqs;
nent = MLX5_COMP_EQ_SIZE;
-#ifdef CONFIG_RFS_ACCEL
- table->rmap = alloc_irq_cpu_rmap(ncomp_vec);
- if (!table->rmap)
- return -ENOMEM;
-#endif
- for (i = 0; i < ncomp_vec; i++) {
- int vecidx = i + MLX5_EQ_VEC_COMP_BASE;
+ for (i = 0; i < ncomp_eqs; i++) {
+ int vecidx = i + MLX5_IRQ_VEC_COMP_BASE;
struct mlx5_eq_param param = {};
eq = kzalloc(sizeof(*eq), GFP_KERNEL);
@@ -834,33 +812,28 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
(unsigned long)&eq->tasklet_ctx);
-#ifdef CONFIG_RFS_ACCEL
- irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx));
-#endif
- snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
+ eq->irq_nb.notifier_call = mlx5_eq_comp_int;
param = (struct mlx5_eq_param) {
- .index = vecidx,
- .mask = 0,
+ .irq_index = vecidx,
.nent = nent,
- .context = &eq->core,
- .handler = mlx5_eq_comp_int
};
- err = create_map_eq(dev, &eq->core, name, &param);
+ err = create_map_eq(dev, &eq->core, &param);
+ if (err) {
+ kfree(eq);
+ goto clean;
+ }
+ err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
if (err) {
+ destroy_unmap_eq(dev, &eq->core);
kfree(eq);
goto clean;
}
+
mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
/* add tail, to keep the list ordered, for mlx5_vector2eqn to work */
list_add_tail(&eq->list, &table->comp_eqs_list);
}
- err = set_comp_irq_affinity_hints(dev);
- if (err) {
- mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
- goto clean;
- }
-
return 0;
clean:
@@ -891,22 +864,24 @@ EXPORT_SYMBOL(mlx5_vector2eqn);
unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev)
{
- return dev->priv.eq_table->num_comp_vectors;
+ return dev->priv.eq_table->num_comp_eqs;
}
EXPORT_SYMBOL(mlx5_comp_vectors_count);
struct cpumask *
mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
{
- /* TODO: consider irq_get_affinity_mask(irq) */
- return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask;
+ int vecidx = vector + MLX5_IRQ_VEC_COMP_BASE;
+
+ return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table,
+ vecidx);
}
EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
#ifdef CONFIG_RFS_ACCEL
struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
{
- return dev->priv.eq_table->rmap;
+ return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table);
}
#endif
@@ -927,82 +902,19 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
- int i, max_eqs;
-
- clear_comp_irqs_affinity_hints(dev);
-
-#ifdef CONFIG_RFS_ACCEL
- if (table->rmap) {
- free_irq_cpu_rmap(table->rmap);
- table->rmap = NULL;
- }
-#endif
mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
- max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE;
- for (i = max_eqs - 1; i >= 0; i--) {
- if (!table->irq_info[i].context)
- continue;
- free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context);
- table->irq_info[i].context = NULL;
- }
+ mlx5_irq_table_destroy(dev);
mutex_unlock(&table->lock);
- pci_free_irq_vectors(dev->pdev);
-}
-
-static int alloc_irq_vectors(struct mlx5_core_dev *dev)
-{
- struct mlx5_priv *priv = &dev->priv;
- struct mlx5_eq_table *table = priv->eq_table;
- int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
- MLX5_CAP_GEN(dev, max_num_eqs) :
- 1 << MLX5_CAP_GEN(dev, log_max_eq);
- int nvec;
- int err;
-
- nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
- MLX5_EQ_VEC_COMP_BASE;
- nvec = min_t(int, nvec, num_eqs);
- if (nvec <= MLX5_EQ_VEC_COMP_BASE)
- return -ENOMEM;
-
- table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL);
- if (!table->irq_info)
- return -ENOMEM;
-
- nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1,
- nvec, PCI_IRQ_MSIX);
- if (nvec < 0) {
- err = nvec;
- goto err_free_irq_info;
- }
-
- table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
-
- return 0;
-
-err_free_irq_info:
- kfree(table->irq_info);
- return err;
-}
-
-static void free_irq_vectors(struct mlx5_core_dev *dev)
-{
- struct mlx5_priv *priv = &dev->priv;
-
- pci_free_irq_vectors(dev->pdev);
- kfree(priv->eq_table->irq_info);
}
int mlx5_eq_table_create(struct mlx5_core_dev *dev)
{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
int err;
- err = alloc_irq_vectors(dev);
- if (err) {
- mlx5_core_err(dev, "alloc irq vectors failed\n");
- return err;
- }
+ eq_table->num_comp_eqs =
+ mlx5_irq_get_num_comp(eq_table->irq_table);
err = create_async_eqs(dev);
if (err) {
@@ -1020,7 +932,6 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
err_comp_eqs:
destroy_async_eqs(dev);
err_async_eqs:
- free_irq_vectors(dev);
return err;
}
@@ -1028,7 +939,6 @@ void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
{
destroy_comp_eqs(dev);
destroy_async_eqs(dev);
- free_irq_vectors(dev);
}
int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
@@ -1040,6 +950,7 @@ int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb);
}
+EXPORT_SYMBOL(mlx5_eq_notifier_register);
int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
{
@@ -1050,3 +961,4 @@ int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb);
}
+EXPORT_SYMBOL(mlx5_eq_notifier_unregister);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 5414e8f82d5f..7281f8d6cba6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -134,6 +134,30 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
}
+int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+ void *in, int inlen)
+{
+ return modify_esw_vport_context_cmd(esw->dev, vport, in, inlen);
+}
+
+static int query_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
+ void *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {};
+
+ MLX5_SET(query_esw_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
+ MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
+ MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
+int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+ void *out, int outlen)
+{
+ return query_esw_vport_context_cmd(esw->dev, vport, out, outlen);
+}
+
static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport,
u16 vlan, u8 qos, u8 set_flags)
{
@@ -473,7 +497,7 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
fdb_add:
/* SRIOV is enabled: Forward UC MAC to vport */
- if (esw->fdb_table.legacy.fdb && esw->mode == SRIOV_LEGACY)
+ if (esw->fdb_table.legacy.fdb && esw->mode == MLX5_ESWITCH_LEGACY)
vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n",
@@ -873,7 +897,7 @@ static void esw_vport_change_handle_locked(struct mlx5_vport *vport)
struct mlx5_eswitch *esw = dev->priv.eswitch;
u8 mac[ETH_ALEN];
- mlx5_query_nic_vport_mac_address(dev, vport->vport, mac);
+ mlx5_query_nic_vport_mac_address(dev, vport->vport, true, mac);
esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n",
vport->vport, mac);
@@ -939,7 +963,7 @@ int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size));
root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS,
- vport->vport);
+ mlx5_eswitch_vport_num_to_index(esw, vport->vport));
if (!root_ns) {
esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport);
return -EOPNOTSUPP;
@@ -1057,7 +1081,7 @@ int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size));
root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
- vport->vport);
+ mlx5_eswitch_vport_num_to_index(esw, vport->vport));
if (!root_ns) {
esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", vport->vport);
return -EOPNOTSUPP;
@@ -1168,6 +1192,8 @@ void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
vport->ingress.drop_rule = NULL;
vport->ingress.allow_rule = NULL;
+
+ esw_vport_del_ingress_acl_modify_metadata(esw, vport);
}
void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
@@ -1527,6 +1553,7 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
u16 vport_num = vport->vport;
+ int flags;
if (esw->manager_vport == vport_num)
return;
@@ -1544,11 +1571,13 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
vport->info.node_guid);
}
+ flags = (vport->info.vlan || vport->info.qos) ?
+ SET_VLAN_STRIP | SET_VLAN_INSERT : 0;
modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos,
- (vport->info.vlan || vport->info.qos));
+ flags);
/* Only legacy mode needs ACLs */
- if (esw->mode == SRIOV_LEGACY) {
+ if (esw->mode == MLX5_ESWITCH_LEGACY) {
esw_vport_ingress_config(esw, vport);
esw_vport_egress_config(esw, vport);
}
@@ -1600,7 +1629,7 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
/* Create steering drop counters for ingress and egress ACLs */
- if (vport_num && esw->mode == SRIOV_LEGACY)
+ if (vport_num && esw->mode == MLX5_ESWITCH_LEGACY)
esw_vport_create_drop_counters(vport);
/* Restore old vport configuration */
@@ -1654,7 +1683,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw,
vport->enabled_events = 0;
esw_vport_disable_qos(esw, vport);
if (esw->manager_vport != vport_num &&
- esw->mode == SRIOV_LEGACY) {
+ esw->mode == MLX5_ESWITCH_LEGACY) {
mlx5_modify_vport_admin_state(esw->dev,
MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
vport_num, 1,
@@ -1686,82 +1715,91 @@ static int eswitch_vport_event(struct notifier_block *nb,
return NOTIFY_OK;
}
-static int query_esw_functions(struct mlx5_core_dev *dev,
- u32 *out, int outlen)
+/**
+ * mlx5_esw_query_functions - Returns raw output about functions state
+ * @dev: Pointer to device to query
+ *
+ * mlx5_esw_query_functions() allocates and returns functions changed
+ * raw output memory pointer from device on success. Otherwise returns ERR_PTR.
+ * Caller must free the memory using kvfree() when valid pointer is returned.
+ */
+const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
{
- u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {0};
+ int outlen = MLX5_ST_SZ_BYTES(query_esw_functions_out);
+ u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {};
+ u32 *out;
+ int err;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return ERR_PTR(-ENOMEM);
MLX5_SET(query_esw_functions_in, in, opcode,
MLX5_CMD_OP_QUERY_ESW_FUNCTIONS);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+ if (!err)
+ return out;
+
+ kvfree(out);
+ return ERR_PTR(err);
}
-int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u16 *num_vfs)
+static void mlx5_eswitch_event_handlers_register(struct mlx5_eswitch *esw)
{
- u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {0};
- int err;
+ MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
+ mlx5_eq_notifier_register(esw->dev, &esw->nb);
- err = query_esw_functions(dev, out, sizeof(out));
- if (err)
- return err;
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) {
+ MLX5_NB_INIT(&esw->esw_funcs.nb, mlx5_esw_funcs_changed_handler,
+ ESW_FUNCTIONS_CHANGED);
+ mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb);
+ }
+}
+
+static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw)
+{
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev))
+ mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb);
- *num_vfs = MLX5_GET(query_esw_functions_out, out,
- host_params_context.host_num_of_vfs);
- esw_debug(dev, "host_num_of_vfs=%d\n", *num_vfs);
+ mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
- return 0;
+ flush_workqueue(esw->work_queue);
}
/* Public E-Switch API */
#define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
-int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
+int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode)
{
struct mlx5_vport *vport;
- int total_nvports = 0;
- u16 vf_nvports = 0;
int err;
int i, enabled_events;
if (!ESW_ALLOWED(esw) ||
!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
- esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n");
+ esw_warn(esw->dev, "FDB is not supported, aborting ...\n");
return -EOPNOTSUPP;
}
if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support))
- esw_warn(esw->dev, "E-Switch ingress ACL is not supported by FW\n");
+ esw_warn(esw->dev, "ingress ACL is not supported by FW\n");
if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support))
- esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n");
-
- esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
-
- if (mode == SRIOV_OFFLOADS) {
- if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
- err = mlx5_esw_query_functions(esw->dev, &vf_nvports);
- if (err)
- return err;
- total_nvports = esw->total_vports;
- } else {
- vf_nvports = nvfs;
- total_nvports = nvfs + MLX5_SPECIAL_VPORTS(esw->dev);
- }
- }
+ esw_warn(esw->dev, "engress ACL is not supported by FW\n");
esw->mode = mode;
mlx5_lag_update(esw->dev);
- if (mode == SRIOV_LEGACY) {
+ if (mode == MLX5_ESWITCH_LEGACY) {
err = esw_create_legacy_table(esw);
if (err)
goto abort;
} else {
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
- err = esw_offloads_init(esw, vf_nvports, total_nvports);
+ err = esw_offloads_init(esw);
}
if (err)
@@ -1771,11 +1809,8 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
if (err)
esw_warn(esw->dev, "Failed to create eswitch TSAR");
- /* Don't enable vport events when in SRIOV_OFFLOADS mode, since:
- * 1. L2 table (MPFS) is programmed by PF/VF representors netdevs set_rx_mode
- * 2. FDB/Eswitch is programmed by user space tools
- */
- enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0;
+ enabled_events = (mode == MLX5_ESWITCH_LEGACY) ? SRIOV_VPORT_EVENTS :
+ UC_ADDR_CHANGE;
/* Enable PF vport */
vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
@@ -1788,22 +1823,21 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
}
/* Enable VF vports */
- mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs)
+ mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs)
esw_enable_vport(esw, vport, enabled_events);
- if (mode == SRIOV_LEGACY) {
- MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
- mlx5_eq_notifier_register(esw->dev, &esw->nb);
- }
+ mlx5_eswitch_event_handlers_register(esw);
+
+ esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n",
+ mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+ esw->esw_funcs.num_vfs, esw->enabled_vports);
- esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n",
- esw->enabled_vports);
return 0;
abort:
- esw->mode = SRIOV_NONE;
+ esw->mode = MLX5_ESWITCH_NONE;
- if (mode == SRIOV_OFFLOADS) {
+ if (mode == MLX5_ESWITCH_OFFLOADS) {
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
}
@@ -1811,23 +1845,22 @@ abort:
return err;
}
-void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw)
{
struct esw_mc_addr *mc_promisc;
struct mlx5_vport *vport;
int old_mode;
int i;
- if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE)
+ if (!ESW_ALLOWED(esw) || esw->mode == MLX5_ESWITCH_NONE)
return;
- esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n",
- esw->enabled_vports, esw->mode);
+ esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n",
+ esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+ esw->esw_funcs.num_vfs, esw->enabled_vports);
mc_promisc = &esw->mc_promisc;
-
- if (esw->mode == SRIOV_LEGACY)
- mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
+ mlx5_eswitch_event_handlers_unregister(esw);
mlx5_esw_for_all_vports(esw, i, vport)
esw_disable_vport(esw, vport);
@@ -1837,17 +1870,17 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
esw_destroy_tsar(esw);
- if (esw->mode == SRIOV_LEGACY)
+ if (esw->mode == MLX5_ESWITCH_LEGACY)
esw_destroy_legacy_table(esw);
- else if (esw->mode == SRIOV_OFFLOADS)
+ else if (esw->mode == MLX5_ESWITCH_OFFLOADS)
esw_offloads_cleanup(esw);
old_mode = esw->mode;
- esw->mode = SRIOV_NONE;
+ esw->mode = MLX5_ESWITCH_NONE;
mlx5_lag_update(esw->dev);
- if (old_mode == SRIOV_OFFLOADS) {
+ if (old_mode == MLX5_ESWITCH_OFFLOADS) {
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
}
@@ -1855,14 +1888,16 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
int mlx5_eswitch_init(struct mlx5_core_dev *dev)
{
- int total_vports = MLX5_TOTAL_VPORTS(dev);
struct mlx5_eswitch *esw;
struct mlx5_vport *vport;
+ int total_vports;
int err, i;
if (!MLX5_VPORT_MANAGER(dev))
return 0;
+ total_vports = mlx5_eswitch_get_total_vports(dev);
+
esw_info(dev,
"Total vports %d, per vport: max uc(%d) max mc(%d)\n",
total_vports,
@@ -1875,6 +1910,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
esw->dev = dev;
esw->manager_vport = mlx5_eswitch_manager_vport(dev);
+ esw->first_host_vport = mlx5_eswitch_first_host_vport_num(dev);
esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq");
if (!esw->work_queue) {
@@ -1908,7 +1944,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
}
esw->enabled_vports = 0;
- esw->mode = SRIOV_NONE;
+ esw->mode = MLX5_ESWITCH_NONE;
esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) &&
MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
@@ -1978,7 +2014,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
ether_addr_copy(evport->info.mac, mac);
evport->info.node_guid = node_guid;
- if (evport->enabled && esw->mode == SRIOV_LEGACY)
+ if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
err = esw_vport_ingress_config(esw, evport);
unlock:
@@ -2062,7 +2098,7 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
evport->info.vlan = vlan;
evport->info.qos = qos;
- if (evport->enabled && esw->mode == SRIOV_LEGACY) {
+ if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) {
err = esw_vport_ingress_config(esw, evport);
if (err)
goto unlock;
@@ -2104,7 +2140,7 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
mlx5_core_warn(esw->dev,
"Spoofchk in set while MAC is invalid, vport(%d)\n",
evport->vport);
- if (evport->enabled && esw->mode == SRIOV_LEGACY)
+ if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
err = esw_vport_ingress_config(esw, evport);
if (err)
evport->info.spoofchk = pschk;
@@ -2200,7 +2236,7 @@ int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting)
return -EPERM;
mutex_lock(&esw->state_lock);
- if (esw->mode != SRIOV_LEGACY) {
+ if (esw->mode != MLX5_ESWITCH_LEGACY) {
err = -EOPNOTSUPP;
goto out;
}
@@ -2223,7 +2259,7 @@ int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting)
return -EPERM;
mutex_lock(&esw->state_lock);
- if (esw->mode != SRIOV_LEGACY) {
+ if (esw->mode != MLX5_ESWITCH_LEGACY) {
err = -EOPNOTSUPP;
goto out;
}
@@ -2366,7 +2402,7 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev,
u64 bytes = 0;
int err = 0;
- if (!vport->enabled || esw->mode != SRIOV_LEGACY)
+ if (!vport->enabled || esw->mode != MLX5_ESWITCH_LEGACY)
return 0;
if (vport->egress.drop_counter)
@@ -2476,16 +2512,27 @@ free_out:
u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
{
- return ESW_ALLOWED(esw) ? esw->mode : SRIOV_NONE;
+ return ESW_ALLOWED(esw) ? esw->mode : MLX5_ESWITCH_NONE;
}
EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
+enum devlink_eswitch_encap_mode
+mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
+{
+ struct mlx5_eswitch *esw;
+
+ esw = dev->priv.eswitch;
+ return ESW_ALLOWED(esw) ? esw->offloads.encap :
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode);
+
bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1)
{
- if ((dev0->priv.eswitch->mode == SRIOV_NONE &&
- dev1->priv.eswitch->mode == SRIOV_NONE) ||
- (dev0->priv.eswitch->mode == SRIOV_OFFLOADS &&
- dev1->priv.eswitch->mode == SRIOV_OFFLOADS))
+ if ((dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
+ dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE) ||
+ (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS &&
+ dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS))
return true;
return false;
@@ -2494,6 +2541,26 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1)
bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
struct mlx5_core_dev *dev1)
{
- return (dev0->priv.eswitch->mode == SRIOV_OFFLOADS &&
- dev1->priv.eswitch->mode == SRIOV_OFFLOADS);
+ return (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS &&
+ dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS);
+}
+
+void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs)
+{
+ const u32 *out;
+
+ WARN_ON_ONCE(esw->mode != MLX5_ESWITCH_NONE);
+
+ if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ esw->esw_funcs.num_vfs = num_vfs;
+ return;
+ }
+
+ out = mlx5_esw_query_functions(esw->dev);
+ if (IS_ERR(out))
+ return;
+
+ esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out,
+ host_params_context.host_num_of_vfs);
+ kvfree(out);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 849a628f6d17..a38e8a3c7c9a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -68,6 +68,8 @@ struct vport_ingress {
struct mlx5_flow_group *allow_spoofchk_only_grp;
struct mlx5_flow_group *allow_untagged_only_grp;
struct mlx5_flow_group *drop_grp;
+ int modify_metadata_id;
+ struct mlx5_flow_handle *modify_metadata_rule;
struct mlx5_flow_handle *allow_rule;
struct mlx5_flow_handle *drop_rule;
struct mlx5_fc *drop_counter;
@@ -178,7 +180,7 @@ struct mlx5_esw_offload {
const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
u8 inline_mode;
u64 num_flows;
- u8 encap;
+ enum devlink_eswitch_encap_mode encap;
};
/* E-Switch MC FDB table hash node */
@@ -198,6 +200,10 @@ struct mlx5_esw_functions {
u16 num_vfs;
};
+enum {
+ MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0),
+};
+
struct mlx5_eswitch {
struct mlx5_core_dev *dev;
struct mlx5_nb nb;
@@ -205,6 +211,7 @@ struct mlx5_eswitch {
struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE];
struct workqueue_struct *work_queue;
struct mlx5_vport *vports;
+ u32 flags;
int total_vports;
int enabled_vports;
/* Synchronize between vport change events
@@ -222,12 +229,12 @@ struct mlx5_eswitch {
int mode;
int nvports;
u16 manager_vport;
+ u16 first_host_vport;
struct mlx5_esw_functions esw_funcs;
};
void esw_offloads_cleanup(struct mlx5_eswitch *esw);
-int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
- int total_nvports);
+int esw_offloads_init(struct mlx5_eswitch *esw);
void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
int esw_offloads_init_reps(struct mlx5_eswitch *esw);
void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
@@ -242,12 +249,14 @@ void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
+void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport);
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
-int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
-void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
+int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode);
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw);
int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
u16 vport, u8 mac[ETH_ALEN]);
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
@@ -269,6 +278,11 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
struct ifla_vf_stats *vf_stats);
void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
+int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+ void *in, int inlen);
+int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+ void *out, int outlen);
+
struct mlx5_flow_spec;
struct mlx5_esw_flow_attr;
struct mlx5_termtbl_handle;
@@ -378,10 +392,12 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
struct netlink_ext_ack *extack);
int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
-int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
-int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
+int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode);
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
+ enum devlink_eswitch_encap_mode encap,
struct netlink_ext_ack *extack);
-int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
+ enum devlink_eswitch_encap_mode *encap);
void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
@@ -409,7 +425,7 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0,
bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
struct mlx5_core_dev *dev1);
-int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u16 *num_vfs);
+const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev);
#define MLX5_DEBUG_ESWITCH_MASK BIT(3)
@@ -429,6 +445,12 @@ static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev)
MLX5_VPORT_ECPF : MLX5_VPORT_PF;
}
+static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev)
+{
+ return mlx5_core_is_ecpf_esw_manager(dev) ?
+ MLX5_VPORT_PF : MLX5_VPORT_FIRST_VF;
+}
+
static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev)
{
/* Ideally device should have the functions changed supported
@@ -525,17 +547,47 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);
#define mlx5_esw_for_each_vf_vport_num_reverse(esw, vport, nvfs) \
for ((vport) = (nvfs); (vport) >= MLX5_VPORT_FIRST_VF; (vport)--)
+/* Includes host PF (vport 0) if it's not esw manager. */
+#define mlx5_esw_for_each_host_func_rep(esw, i, rep, nvfs) \
+ for ((i) = (esw)->first_host_vport; \
+ (rep) = &(esw)->offloads.vport_reps[i], \
+ (i) <= (nvfs); (i)++)
+
+#define mlx5_esw_for_each_host_func_rep_reverse(esw, i, rep, nvfs) \
+ for ((i) = (nvfs); \
+ (rep) = &(esw)->offloads.vport_reps[i], \
+ (i) >= (esw)->first_host_vport; (i)--)
+
+#define mlx5_esw_for_each_host_func_vport(esw, vport, nvfs) \
+ for ((vport) = (esw)->first_host_vport; \
+ (vport) <= (nvfs); (vport)++)
+
+#define mlx5_esw_for_each_host_func_vport_reverse(esw, vport, nvfs) \
+ for ((vport) = (nvfs); \
+ (vport) >= (esw)->first_host_vport; (vport)--)
+
struct mlx5_vport *__must_check
mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num);
+bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num);
+
+void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs);
+int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data);
+
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
-static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; }
-static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
+static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) { return 0; }
+static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {}
static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; }
static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
+static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {}
#define FDB_MAX_CHAIN 1
#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 060de01f09b6..8ed4497929b9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -88,6 +88,53 @@ u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw)
return 1;
}
+static void
+mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr)
+{
+ void *misc2;
+ void *misc;
+
+ /* Use metadata matching because vport is not represented by single
+ * VHCA in dual-port RoCE mode, and matching on source vport may fail.
+ */
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(attr->in_mdev->priv.eswitch,
+ attr->in_rep->vport));
+
+ misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+ MLX5_SET_TO_ONES(fte_match_set_misc2, misc2, metadata_reg_c_0);
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ if (memchr_inv(misc, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc)))
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
+
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ MLX5_SET(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+ }
+
+ if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) &&
+ attr->in_rep->vport == MLX5_VPORT_UPLINK)
+ spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+}
+
struct mlx5_flow_handle *
mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
@@ -99,9 +146,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *rule;
struct mlx5_flow_table *fdb;
int j, i = 0;
- void *misc;
- if (esw->mode != SRIOV_OFFLOADS)
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS)
return ERR_PTR(-EOPNOTSUPP);
flow_act.action = attr->action;
@@ -159,21 +205,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
i++;
}
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
+ mlx5_eswitch_set_rule_source_port(esw, spec, attr);
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
- MLX5_SET(fte_match_set_misc, misc,
- source_eswitch_owner_vhca_id,
- MLX5_CAP_GEN(attr->in_mdev, vhca_id));
-
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
- MLX5_SET_TO_ONES(fte_match_set_misc, misc,
- source_eswitch_owner_vhca_id);
-
- spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
if (attr->tunnel_match_level != MLX5_MATCH_NONE)
spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
@@ -223,7 +256,6 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_table *fast_fdb;
struct mlx5_flow_table *fwd_fdb;
struct mlx5_flow_handle *rule;
- void *misc;
int i;
fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0);
@@ -255,25 +287,11 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
dest[i].ft = fwd_fdb,
i++;
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
-
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
- MLX5_SET(fte_match_set_misc, misc,
- source_eswitch_owner_vhca_id,
- MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+ mlx5_eswitch_set_rule_source_port(esw, spec, attr);
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
- MLX5_SET_TO_ONES(fte_match_set_misc, misc,
- source_eswitch_owner_vhca_id);
-
- if (attr->match_level == MLX5_MATCH_NONE)
- spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
- else
- spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS |
- MLX5_MATCH_MISC_PARAMETERS;
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+ if (attr->match_level != MLX5_MATCH_NONE)
+ spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
@@ -339,11 +357,10 @@ mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
{
struct mlx5_eswitch_rep *rep;
- int vf_vport, err = 0;
+ int i, err = 0;
esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
- for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) {
- rep = &esw->offloads.vport_reps[vf_vport];
+ mlx5_esw_for_each_host_func_rep(esw, i, rep, esw->esw_funcs.num_vfs) {
if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
continue;
@@ -570,23 +587,87 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
mlx5_del_flow_rules(rule);
}
-static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
+static int mlx5_eswitch_enable_passing_vport_metadata(struct mlx5_eswitch *esw)
+{
+ u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
+ u8 fdb_to_vport_reg_c_id;
+ int err;
+
+ err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
+ out, sizeof(out));
+ if (err)
+ return err;
+
+ fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
+ esw_vport_context.fdb_to_vport_reg_c_id);
+
+ fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0;
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
+
+ MLX5_SET(modify_esw_vport_context_in, in,
+ field_select.fdb_to_vport_reg_c_id, 1);
+
+ return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport,
+ in, sizeof(in));
+}
+
+static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw)
+{
+ u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
+ u8 fdb_to_vport_reg_c_id;
+ int err;
+
+ err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
+ out, sizeof(out));
+ if (err)
+ return err;
+
+ fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
+ esw_vport_context.fdb_to_vport_reg_c_id);
+
+ fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0;
+
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
+
+ MLX5_SET(modify_esw_vport_context_in, in,
+ field_select.fdb_to_vport_reg_c_id, 1);
+
+ return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport,
+ in, sizeof(in));
+}
+
+static void peer_miss_rules_setup(struct mlx5_eswitch *esw,
+ struct mlx5_core_dev *peer_dev,
struct mlx5_flow_spec *spec,
struct mlx5_flow_destination *dest)
{
- void *misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
+ void *misc;
- MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
- MLX5_CAP_GEN(peer_dev, vhca_id));
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+ MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
- spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc,
- source_eswitch_owner_vhca_id);
+ MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(peer_dev, vhca_id));
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+ }
dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
dest->vport.num = peer_dev->priv.eswitch->manager_vport;
@@ -594,6 +675,26 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
}
+static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch *peer_esw,
+ struct mlx5_flow_spec *spec,
+ u16 vport)
+{
+ void *misc;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(peer_esw,
+ vport));
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+ }
+}
+
static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
struct mlx5_core_dev *peer_dev)
{
@@ -611,7 +712,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
if (!spec)
return -ENOMEM;
- peer_miss_rules_setup(peer_dev, spec, &dest);
+ peer_miss_rules_setup(esw, peer_dev, spec, &dest);
flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL);
if (!flows) {
@@ -624,7 +725,9 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
misc_parameters);
if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
- MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF);
+ esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch,
+ spec, MLX5_VPORT_PF);
+
flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
if (IS_ERR(flow)) {
@@ -646,7 +749,10 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
}
mlx5_esw_for_each_vf_vport_num(esw, i, mlx5_core_max_vfs(esw->dev)) {
- MLX5_SET(fte_match_set_misc, misc, source_port, i);
+ esw_set_peer_miss_rule_source_port(esw,
+ peer_dev->priv.eswitch,
+ spec, i);
+
flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
if (IS_ERR(flow)) {
@@ -930,6 +1036,30 @@ static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw)
#define MAX_PF_SQ 256
#define MAX_SQ_NVPORTS 32
+static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
+ u32 *flow_group_in)
+{
+ void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ flow_group_in,
+ match_criteria);
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters_2.metadata_reg_c_0);
+ } else {
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_port);
+ }
+}
+
static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
@@ -1027,19 +1157,21 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
/* create peer esw miss group */
memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
- MLX5_MATCH_MISC_PARAMETERS);
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
- match_criteria);
+ esw_set_flow_group_source_port(esw, flow_group_in);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria,
- misc_parameters.source_port);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria,
- misc_parameters.source_eswitch_owner_vhca_id);
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ flow_group_in,
+ match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_eswitch_owner_vhca_id);
+
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ source_eswitch_owner_vhca_id_valid, 1);
+ }
- MLX5_SET(create_flow_group_in, flow_group_in,
- source_eswitch_owner_vhca_id_valid, 1);
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
ix + esw->total_vports - 1);
@@ -1153,7 +1285,6 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports)
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_group *g;
u32 *flow_group_in;
- void *match_criteria, *misc;
int err = 0;
nvports = nvports + MLX5_ESW_MISS_FLOWS;
@@ -1163,12 +1294,8 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports)
/* create vport rx group */
memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
- MLX5_MATCH_MISC_PARAMETERS);
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
- misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ esw_set_flow_group_source_port(esw, flow_group_in);
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1);
@@ -1207,13 +1334,24 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
goto out;
}
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw, vport));
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+ MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
- spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ }
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
@@ -1231,21 +1369,22 @@ out:
static int esw_offloads_start(struct mlx5_eswitch *esw,
struct netlink_ext_ack *extack)
{
- int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
+ int err, err1;
- if (esw->mode != SRIOV_LEGACY &&
+ if (esw->mode != MLX5_ESWITCH_LEGACY &&
!mlx5_core_is_ecpf_esw_manager(esw->dev)) {
NL_SET_ERR_MSG_MOD(extack,
"Can't set offloads mode, SRIOV legacy not enabled");
return -EINVAL;
}
- mlx5_eswitch_disable_sriov(esw);
- err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
+ mlx5_eswitch_disable(esw);
+ mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs);
+ err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
"Failed setting eswitch to offloads");
- err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
+ err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY);
if (err1) {
NL_SET_ERR_MSG_MOD(extack,
"Failed setting eswitch back to legacy");
@@ -1253,7 +1392,6 @@ static int esw_offloads_start(struct mlx5_eswitch *esw,
}
if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) {
if (mlx5_eswitch_inline_mode_get(esw,
- num_vfs,
&esw->offloads.inline_mode)) {
esw->offloads.inline_mode = MLX5_INLINE_MODE_L2;
NL_SET_ERR_MSG_MOD(extack,
@@ -1270,11 +1408,11 @@ void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw)
int esw_offloads_init_reps(struct mlx5_eswitch *esw)
{
- int total_vports = MLX5_TOTAL_VPORTS(esw->dev);
+ int total_vports = esw->total_vports;
struct mlx5_core_dev *dev = esw->dev;
struct mlx5_eswitch_rep *rep;
u8 hw_id[ETH_ALEN], rep_type;
- int vport;
+ int vport_index;
esw->offloads.vport_reps = kcalloc(total_vports,
sizeof(struct mlx5_eswitch_rep),
@@ -1282,10 +1420,11 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
if (!esw->offloads.vport_reps)
return -ENOMEM;
- mlx5_query_nic_vport_mac_address(dev, 0, hw_id);
+ mlx5_query_mac_address(dev, hw_id);
- mlx5_esw_for_all_reps(esw, vport, rep) {
- rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport);
+ mlx5_esw_for_all_reps(esw, vport_index, rep) {
+ rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index);
+ rep->vport_index = vport_index;
ether_addr_copy(rep->hw_id, hw_id);
for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
@@ -1340,21 +1479,20 @@ static void esw_offloads_unload_vf_reps(struct mlx5_eswitch *esw, int nvports)
__unload_reps_vf_vport(esw, nvports, rep_type);
}
-static void __unload_reps_all_vport(struct mlx5_eswitch *esw, int nvports,
- u8 rep_type)
+static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
{
- __unload_reps_vf_vport(esw, nvports, rep_type);
+ __unload_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type);
/* Special vports must be the last to unload. */
__unload_reps_special_vport(esw, rep_type);
}
-static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw, int nvports)
+static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw)
{
u8 rep_type = NUM_REP_TYPES;
while (rep_type-- > 0)
- __unload_reps_all_vport(esw, nvports, rep_type);
+ __unload_reps_all_vport(esw, rep_type);
}
static int __esw_offloads_load_rep(struct mlx5_eswitch *esw,
@@ -1430,6 +1568,26 @@ err_vf:
return err;
}
+static int __load_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
+{
+ int err;
+
+ /* Special vports must be loaded first, uplink rep creates mdev resource. */
+ err = __load_reps_special_vport(esw, rep_type);
+ if (err)
+ return err;
+
+ err = __load_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type);
+ if (err)
+ goto err_vfs;
+
+ return 0;
+
+err_vfs:
+ __unload_reps_special_vport(esw, rep_type);
+ return err;
+}
+
static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports)
{
u8 rep_type = 0;
@@ -1449,34 +1607,13 @@ err_reps:
return err;
}
-static int __load_reps_all_vport(struct mlx5_eswitch *esw, int nvports,
- u8 rep_type)
-{
- int err;
-
- /* Special vports must be loaded first. */
- err = __load_reps_special_vport(esw, rep_type);
- if (err)
- return err;
-
- err = __load_reps_vf_vport(esw, nvports, rep_type);
- if (err)
- goto err_vfs;
-
- return 0;
-
-err_vfs:
- __unload_reps_special_vport(esw, rep_type);
- return err;
-}
-
-static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports)
+static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw)
{
u8 rep_type = 0;
int err;
for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
- err = __load_reps_all_vport(esw, nvports, rep_type);
+ err = __load_reps_all_vport(esw, rep_type);
if (err)
goto err_reps;
}
@@ -1485,7 +1622,7 @@ static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports)
err_reps:
while (rep_type-- > 0)
- __unload_reps_all_vport(esw, nvports, rep_type);
+ __unload_reps_all_vport(esw, rep_type);
return err;
}
@@ -1521,6 +1658,10 @@ static int mlx5_esw_offloads_devcom_event(int event,
switch (event) {
case ESW_OFFLOADS_DEVCOM_PAIR:
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw) !=
+ mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
+ break;
+
err = mlx5_esw_offloads_pair(esw, peer_esw);
if (err)
goto err_out;
@@ -1589,32 +1730,16 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
- struct mlx5_core_dev *dev = esw->dev;
struct mlx5_flow_act flow_act = {0};
struct mlx5_flow_spec *spec;
int err = 0;
/* For prio tag mode, there is only 1 FTEs:
- * 1) Untagged packets - push prio tag VLAN, allow
+ * 1) Untagged packets - push prio tag VLAN and modify metadata if
+ * required, allow
* Unmatched traffic is allowed by default
*/
- if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
- return -EOPNOTSUPP;
-
- esw_vport_cleanup_ingress_rules(esw, vport);
-
- err = esw_vport_enable_ingress_acl(esw, vport);
- if (err) {
- mlx5_core_warn(esw->dev,
- "failed to enable prio tag ingress acl (%d) on vport[%d]\n",
- err, vport->vport);
- return err;
- }
-
- esw_debug(esw->dev,
- "vport[%d] configure ingress rules\n", vport->vport);
-
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec) {
err = -ENOMEM;
@@ -1630,6 +1755,12 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
flow_act.vlan[0].ethtype = ETH_P_8021Q;
flow_act.vlan[0].vid = 0;
flow_act.vlan[0].prio = 0;
+
+ if (vport->ingress.modify_metadata_rule) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ flow_act.modify_id = vport->ingress.modify_metadata_id;
+ }
+
vport->ingress.allow_rule =
mlx5_add_flow_rules(vport->ingress.acl, spec,
&flow_act, NULL, 0);
@@ -1650,6 +1781,58 @@ out_no_mem:
return err;
}
+static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec spec = {};
+ int err = 0;
+
+ MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
+ MLX5_SET(set_action_in, action, data,
+ mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport));
+
+ err = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ 1, action, &vport->ingress.modify_metadata_id);
+ if (err) {
+ esw_warn(esw->dev,
+ "failed to alloc modify header for vport %d ingress acl (%d)\n",
+ vport->vport, err);
+ return err;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ flow_act.modify_id = vport->ingress.modify_metadata_id;
+ vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl,
+ &spec, &flow_act, NULL, 0);
+ if (IS_ERR(vport->ingress.modify_metadata_rule)) {
+ err = PTR_ERR(vport->ingress.modify_metadata_rule);
+ esw_warn(esw->dev,
+ "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n",
+ vport->vport, err);
+ vport->ingress.modify_metadata_rule = NULL;
+ goto out;
+ }
+
+out:
+ if (err)
+ mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id);
+ return err;
+}
+
+void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (vport->ingress.modify_metadata_rule) {
+ mlx5_del_flow_rules(vport->ingress.modify_metadata_rule);
+ mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id);
+
+ vport->ingress.modify_metadata_rule = NULL;
+ }
+}
+
static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
@@ -1657,6 +1840,9 @@ static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec;
int err = 0;
+ if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
+ return 0;
+
/* For prio tag mode, there is only 1 FTEs:
* 1) prio tag packets - pop the prio tag VLAN, allow
* Unmatched traffic is allowed by default
@@ -1710,27 +1896,98 @@ out_no_mem:
return err;
}
-static int esw_prio_tag_acls_config(struct mlx5_eswitch *esw, int nvports)
+static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
{
- struct mlx5_vport *vport = NULL;
- int i, j;
int err;
- mlx5_esw_for_each_vf_vport(esw, i, vport, nvports) {
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+ !MLX5_CAP_GEN(esw->dev, prio_tag_required))
+ return 0;
+
+ esw_vport_cleanup_ingress_rules(esw, vport);
+
+ err = esw_vport_enable_ingress_acl(esw, vport);
+ if (err) {
+ esw_warn(esw->dev,
+ "failed to enable ingress acl (%d) on vport[%d]\n",
+ err, vport->vport);
+ return err;
+ }
+
+ esw_debug(esw->dev,
+ "vport[%d] configure ingress rules\n", vport->vport);
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ err = esw_vport_add_ingress_acl_modify_metadata(esw, vport);
+ if (err)
+ goto out;
+ }
+
+ if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
+ mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
err = esw_vport_ingress_prio_tag_config(esw, vport);
if (err)
- goto err_ingress;
- err = esw_vport_egress_prio_tag_config(esw, vport);
+ goto out;
+ }
+
+out:
+ if (err)
+ esw_vport_disable_ingress_acl(esw, vport);
+ return err;
+}
+
+static bool
+esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
+{
+ if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl))
+ return false;
+
+ if (!(MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) &
+ MLX5_FDB_TO_VPORT_REG_C_0))
+ return false;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
+ return false;
+
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev) ||
+ mlx5_ecpf_vport_exists(esw->dev))
+ return false;
+
+ return true;
+}
+
+static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+ int i, j;
+ int err;
+
+ if (esw_check_vport_match_metadata_supported(esw))
+ esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
+
+ mlx5_esw_for_all_vports(esw, i, vport) {
+ err = esw_vport_ingress_common_config(esw, vport);
if (err)
- goto err_egress;
+ goto err_ingress;
+
+ if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
+ err = esw_vport_egress_prio_tag_config(esw, vport);
+ if (err)
+ goto err_egress;
+ }
}
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+ esw_info(esw->dev, "Use metadata reg_c as source vport to match\n");
+
return 0;
err_egress:
esw_vport_disable_ingress_acl(esw, vport);
err_ingress:
- mlx5_esw_for_each_vf_vport_reverse(esw, j, vport, i - 1) {
+ for (j = MLX5_VPORT_PF; j < i; j++) {
+ vport = &esw->vports[j];
esw_vport_disable_egress_acl(esw, vport);
esw_vport_disable_ingress_acl(esw, vport);
}
@@ -1738,40 +1995,46 @@ err_ingress:
return err;
}
-static void esw_prio_tag_acls_cleanup(struct mlx5_eswitch *esw)
+static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
{
struct mlx5_vport *vport;
int i;
- mlx5_esw_for_each_vf_vport(esw, i, vport, esw->dev->priv.sriov.num_vfs) {
+ mlx5_esw_for_all_vports(esw, i, vport) {
esw_vport_disable_egress_acl(esw, vport);
esw_vport_disable_ingress_acl(esw, vport);
}
+
+ esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
}
-static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int vf_nvports,
- int nvports)
+static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
{
+ int num_vfs = esw->esw_funcs.num_vfs;
+ int total_vports;
int err;
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev))
+ total_vports = esw->total_vports;
+ else
+ total_vports = num_vfs + MLX5_SPECIAL_VPORTS(esw->dev);
+
memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
- if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) {
- err = esw_prio_tag_acls_config(esw, vf_nvports);
- if (err)
- return err;
- }
-
- err = esw_create_offloads_fdb_tables(esw, nvports);
+ err = esw_create_offloads_acl_tables(esw);
if (err)
return err;
- err = esw_create_offloads_table(esw, nvports);
+ err = esw_create_offloads_fdb_tables(esw, total_vports);
+ if (err)
+ goto create_fdb_err;
+
+ err = esw_create_offloads_table(esw, total_vports);
if (err)
goto create_ft_err;
- err = esw_create_vport_rx_group(esw, nvports);
+ err = esw_create_vport_rx_group(esw, total_vports);
if (err)
goto create_fg_err;
@@ -1783,6 +2046,9 @@ create_fg_err:
create_ft_err:
esw_destroy_offloads_fdb_tables(esw);
+create_fdb_err:
+ esw_destroy_offloads_acl_tables(esw);
+
return err;
}
@@ -1791,42 +2057,56 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
esw_destroy_vport_rx_group(esw);
esw_destroy_offloads_table(esw);
esw_destroy_offloads_fdb_tables(esw);
- if (MLX5_CAP_GEN(esw->dev, prio_tag_required))
- esw_prio_tag_acls_cleanup(esw);
+ esw_destroy_offloads_acl_tables(esw);
}
-static void esw_functions_changed_event_handler(struct work_struct *work)
+static void
+esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out)
{
- struct mlx5_host_work *host_work;
- struct mlx5_eswitch *esw;
- u16 num_vfs = 0;
- int err;
+ bool host_pf_disabled;
+ u16 new_num_vfs;
- host_work = container_of(work, struct mlx5_host_work, work);
- esw = host_work->esw;
+ new_num_vfs = MLX5_GET(query_esw_functions_out, out,
+ host_params_context.host_num_of_vfs);
+ host_pf_disabled = MLX5_GET(query_esw_functions_out, out,
+ host_params_context.host_pf_disabled);
- err = mlx5_esw_query_functions(esw->dev, &num_vfs);
- if (err || num_vfs == esw->esw_funcs.num_vfs)
- goto out;
+ if (new_num_vfs == esw->esw_funcs.num_vfs || host_pf_disabled)
+ return;
/* Number of VFs can only change from "0 to x" or "x to 0". */
if (esw->esw_funcs.num_vfs > 0) {
esw_offloads_unload_vf_reps(esw, esw->esw_funcs.num_vfs);
} else {
- err = esw_offloads_load_vf_reps(esw, num_vfs);
+ int err;
+ err = esw_offloads_load_vf_reps(esw, new_num_vfs);
if (err)
- goto out;
+ return;
}
+ esw->esw_funcs.num_vfs = new_num_vfs;
+}
- esw->esw_funcs.num_vfs = num_vfs;
+static void esw_functions_changed_event_handler(struct work_struct *work)
+{
+ struct mlx5_host_work *host_work;
+ struct mlx5_eswitch *esw;
+ const u32 *out;
+
+ host_work = container_of(work, struct mlx5_host_work, work);
+ esw = host_work->esw;
+
+ out = mlx5_esw_query_functions(esw->dev);
+ if (IS_ERR(out))
+ goto out;
+ esw_vfs_changed_event_handler(esw, out);
+ kvfree(out);
out:
kfree(host_work);
}
-static int esw_functions_changed_event(struct notifier_block *nb,
- unsigned long type, void *data)
+int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data)
{
struct mlx5_esw_functions *esw_funcs;
struct mlx5_host_work *host_work;
@@ -1847,50 +2127,35 @@ static int esw_functions_changed_event(struct notifier_block *nb,
return NOTIFY_OK;
}
-static void esw_functions_changed_event_init(struct mlx5_eswitch *esw,
- u16 vf_nvports)
-{
- if (!mlx5_eswitch_is_funcs_handler(esw->dev))
- return;
-
- MLX5_NB_INIT(&esw->esw_funcs.nb, esw_functions_changed_event,
- ESW_FUNCTIONS_CHANGED);
- mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb);
- esw->esw_funcs.num_vfs = vf_nvports;
-}
-
-static void esw_functions_changed_event_cleanup(struct mlx5_eswitch *esw)
-{
- if (!mlx5_eswitch_is_funcs_handler(esw->dev))
- return;
-
- mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb);
- flush_workqueue(esw->work_queue);
-}
-
-int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
- int total_nvports)
+int esw_offloads_init(struct mlx5_eswitch *esw)
{
int err;
- err = esw_offloads_steering_init(esw, vf_nvports, total_nvports);
+ err = esw_offloads_steering_init(esw);
if (err)
return err;
- err = esw_offloads_load_all_reps(esw, vf_nvports);
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ err = mlx5_eswitch_enable_passing_vport_metadata(esw);
+ if (err)
+ goto err_vport_metadata;
+ }
+
+ err = esw_offloads_load_all_reps(esw);
if (err)
goto err_reps;
esw_offloads_devcom_init(esw);
mutex_init(&esw->offloads.termtbl_mutex);
- esw_functions_changed_event_init(esw, vf_nvports);
-
mlx5_rdma_enable_roce(esw->dev);
return 0;
err_reps:
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+ mlx5_eswitch_disable_passing_vport_metadata(esw);
+err_vport_metadata:
esw_offloads_steering_cleanup(esw);
return err;
}
@@ -1898,13 +2163,13 @@ err_reps:
static int esw_offloads_stop(struct mlx5_eswitch *esw,
struct netlink_ext_ack *extack)
{
- int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
+ int err, err1;
- mlx5_eswitch_disable_sriov(esw);
- err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
+ mlx5_eswitch_disable(esw);
+ err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
- err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
+ err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS);
if (err1) {
NL_SET_ERR_MSG_MOD(extack,
"Failed setting eswitch back to offloads");
@@ -1916,18 +2181,11 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
void esw_offloads_cleanup(struct mlx5_eswitch *esw)
{
- u16 num_vfs;
-
- esw_functions_changed_event_cleanup(esw);
-
- if (mlx5_eswitch_is_funcs_handler(esw->dev))
- num_vfs = esw->esw_funcs.num_vfs;
- else
- num_vfs = esw->dev->priv.sriov.num_vfs;
-
mlx5_rdma_disable_roce(esw->dev);
esw_offloads_devcom_cleanup(esw);
- esw_offloads_unload_all_reps(esw, num_vfs);
+ esw_offloads_unload_all_reps(esw);
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+ mlx5_eswitch_disable_passing_vport_metadata(esw);
esw_offloads_steering_cleanup(esw);
}
@@ -1935,10 +2193,10 @@ static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
{
switch (mode) {
case DEVLINK_ESWITCH_MODE_LEGACY:
- *mlx5_mode = SRIOV_LEGACY;
+ *mlx5_mode = MLX5_ESWITCH_LEGACY;
break;
case DEVLINK_ESWITCH_MODE_SWITCHDEV:
- *mlx5_mode = SRIOV_OFFLOADS;
+ *mlx5_mode = MLX5_ESWITCH_OFFLOADS;
break;
default:
return -EINVAL;
@@ -1950,10 +2208,10 @@ static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode)
{
switch (mlx5_mode) {
- case SRIOV_LEGACY:
+ case MLX5_ESWITCH_LEGACY:
*mode = DEVLINK_ESWITCH_MODE_LEGACY;
break;
- case SRIOV_OFFLOADS:
+ case MLX5_ESWITCH_OFFLOADS:
*mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
break;
default:
@@ -2017,7 +2275,7 @@ static int mlx5_devlink_eswitch_check(struct devlink *devlink)
if(!MLX5_ESWITCH_MANAGER(dev))
return -EPERM;
- if (dev->priv.eswitch->mode == SRIOV_NONE &&
+ if (dev->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
!mlx5_core_is_ecpf_esw_manager(dev))
return -EOPNOTSUPP;
@@ -2068,7 +2326,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
struct mlx5_eswitch *esw = dev->priv.eswitch;
- int err, vport;
+ int err, vport, num_vport;
u8 mlx5_mode;
err = mlx5_devlink_eswitch_check(devlink);
@@ -2097,7 +2355,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
if (err)
goto out;
- for (vport = 1; vport < esw->enabled_vports; vport++) {
+ mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) {
err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
@@ -2110,7 +2368,8 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
return 0;
revert_inline_mode:
- while (--vport > 0)
+ num_vport = --vport;
+ mlx5_esw_for_each_host_func_vport_reverse(esw, vport, num_vport)
mlx5_modify_nic_vport_min_inline(dev,
vport,
esw->offloads.inline_mode);
@@ -2131,7 +2390,7 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
}
-int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
+int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode)
{
u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
struct mlx5_core_dev *dev = esw->dev;
@@ -2140,7 +2399,7 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
if (!MLX5_CAP_GEN(dev, vport_group_manager))
return -EOPNOTSUPP;
- if (esw->mode == SRIOV_NONE)
+ if (esw->mode == MLX5_ESWITCH_NONE)
return -EOPNOTSUPP;
switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
@@ -2155,9 +2414,10 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
}
query_vports:
- for (vport = 1; vport <= nvfs; vport++) {
+ mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode);
+ mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) {
mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode);
- if (vport > 1 && prev_mlx5_mode != mlx5_mode)
+ if (prev_mlx5_mode != mlx5_mode)
return -EINVAL;
prev_mlx5_mode = mlx5_mode;
}
@@ -2167,7 +2427,8 @@ out:
return 0;
}
-int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
+ enum devlink_eswitch_encap_mode encap,
struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
@@ -2186,7 +2447,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC)
return -EOPNOTSUPP;
- if (esw->mode == SRIOV_LEGACY) {
+ if (esw->mode == MLX5_ESWITCH_LEGACY) {
esw->offloads.encap = encap;
return 0;
}
@@ -2216,7 +2477,8 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
return err;
}
-int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
+ enum devlink_eswitch_encap_mode *encap)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
struct mlx5_eswitch *esw = dev->priv.eswitch;
@@ -2248,12 +2510,11 @@ EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps);
void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type)
{
- u16 max_vf = mlx5_core_max_vfs(esw->dev);
struct mlx5_eswitch_rep *rep;
int i;
- if (esw->mode == SRIOV_OFFLOADS)
- __unload_reps_all_vport(esw, max_vf, rep_type);
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS)
+ __unload_reps_all_vport(esw, rep_type);
mlx5_esw_for_all_reps(esw, i, rep)
atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
@@ -2295,3 +2556,22 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
return mlx5_eswitch_get_rep(esw, vport);
}
EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
+
+bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return vport_num >= MLX5_VPORT_FIRST_VF &&
+ vport_num <= esw->dev->priv.sriov.max_vfs;
+}
+
+bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
+{
+ return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA);
+}
+EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled);
+
+u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
+ u16 vport_num)
+{
+ return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
index cb7d8ebe2c95..1d55a324a17e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -49,8 +49,8 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev,
struct mlx5_termtbl_handle *tt,
struct mlx5_flow_act *flow_act)
{
+ static const struct mlx5_flow_spec spec = {};
struct mlx5_flow_namespace *root_ns;
- struct mlx5_flow_spec spec = {};
int prio, flags;
int err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index ca2296a2f9ee..4c50efe4e7f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -414,7 +414,8 @@ static void mlx5_fpga_conn_cq_tasklet(unsigned long data)
mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET);
}
-static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq)
+static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq,
+ struct mlx5_eqe *eqe)
{
struct mlx5_fpga_conn *conn;
@@ -429,6 +430,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
struct mlx5_fpga_device *fdev = conn->fdev;
struct mlx5_core_dev *mdev = fdev->mdev;
u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {0};
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
struct mlx5_wq_param wqp;
struct mlx5_cqe64 *cqe;
int inlen, err, eqn;
@@ -476,7 +478,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.buf, pas);
- err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen);
+ err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen, out, sizeof(out));
kvfree(in);
if (err)
@@ -867,7 +869,7 @@ struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev,
conn->cb_arg = attr->cb_arg;
remote_mac = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_mac_47_32);
- err = mlx5_query_nic_vport_mac_address(fdev->mdev, 0, remote_mac);
+ err = mlx5_query_mac_address(fdev->mdev, remote_mac);
if (err) {
mlx5_fpga_err(fdev, "Failed to query local MAC: %d\n", err);
ret = ERR_PTR(err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 52c47d3dd5a5..c76da309506b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -636,7 +636,8 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
u8 match_criteria_enable,
const u32 *match_c,
const u32 *match_v,
- struct mlx5_flow_act *flow_act)
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_context *flow_context)
{
const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c,
outer_headers);
@@ -655,7 +656,7 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
(match_criteria_enable &
~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) ||
(flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) ||
- (flow_act->flags & FLOW_ACT_HAS_TAG))
+ (flow_context->flags & FLOW_CONTEXT_HAS_TAG))
return false;
return true;
@@ -767,7 +768,8 @@ mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev,
fg->mask.match_criteria_enable,
fg->mask.match_criteria,
fte->val,
- &fte->action))
+ &fte->action,
+ &fte->flow_context))
return ERR_PTR(-EINVAL);
else if (!mlx5_is_fpga_ipsec_rule(mdev,
fg->mask.match_criteria_enable,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
index 2b5e63b0d4d6..382985e65b48 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
@@ -37,8 +37,6 @@
#include "accel/ipsec.h"
#include "fs_cmd.h"
-#ifdef CONFIG_MLX5_FPGA
-
u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev);
int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
@@ -66,77 +64,4 @@ int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
const struct mlx5_flow_cmds *
mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type);
-#else
-
-static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
-{
- return 0;
-}
-
-static inline unsigned int
-mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev)
-{
- return 0;
-}
-
-static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev,
- u64 *counters)
-{
- return 0;
-}
-
-static inline void *
-mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm *accel_xfrm,
- const __be32 saddr[4],
- const __be32 daddr[4],
- const __be32 spi, bool is_ipv6)
-{
- return NULL;
-}
-
-static inline void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
-{
-}
-
-static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
-{
- return 0;
-}
-
-static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
-{
-}
-
-static inline void mlx5_fpga_ipsec_build_fs_cmds(void)
-{
-}
-
-static inline struct mlx5_accel_esp_xfrm *
-mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *attrs,
- u32 flags)
-{
- return ERR_PTR(-EOPNOTSUPP);
-}
-
-static inline void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
-{
-}
-
-static inline int
-mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
- const struct mlx5_accel_esp_xfrm_attrs *attrs)
-{
- return -EOPNOTSUPP;
-}
-
-static inline const struct mlx5_flow_cmds *
-mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
-{
- return mlx5_fs_cmd_get_default(type);
-}
-
-#endif /* CONFIG_MLX5_FPGA */
-
#endif /* __MLX5_FPGA_SADB_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index bb24c3797218..7ac1249eadc3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -396,7 +396,11 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
MLX5_SET(flow_context, in_flow_context, group_id, group_id);
- MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
+ MLX5_SET(flow_context, in_flow_context, flow_tag,
+ fte->flow_context.flow_tag);
+ MLX5_SET(flow_context, in_flow_context, flow_source,
+ fte->flow_context.flow_source);
+
MLX5_SET(flow_context, in_flow_context, extended_destination,
extended_dest);
if (extended_dest) {
@@ -771,6 +775,10 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions);
table_type = FS_FT_NIC_TX;
break;
+ case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
+ max_actions = MLX5_CAP_ESW_INGRESS_ACL(dev, max_modify_header_actions);
+ table_type = FS_FT_ESW_INGRESS_ACL;
+ break;
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index fe76c6fd6d80..3e99799bdb40 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -584,7 +584,7 @@ err_ida_remove:
}
static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
- u32 *match_value,
+ const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act)
{
struct mlx5_flow_steering *steering = get_steering(&ft->node);
@@ -594,9 +594,10 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
if (!fte)
return ERR_PTR(-ENOMEM);
- memcpy(fte->val, match_value, sizeof(fte->val));
+ memcpy(fte->val, &spec->match_value, sizeof(fte->val));
fte->node.type = FS_TYPE_FLOW_ENTRY;
fte->action = *flow_act;
+ fte->flow_context = spec->flow_context;
tree_init_node(&fte->node, NULL, del_sw_fte);
@@ -612,7 +613,7 @@ static void dealloc_flow_group(struct mlx5_flow_steering *steering,
static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering,
u8 match_criteria_enable,
- void *match_criteria,
+ const void *match_criteria,
int start_index,
int end_index)
{
@@ -642,7 +643,7 @@ static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steer
static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft,
u8 match_criteria_enable,
- void *match_criteria,
+ const void *match_criteria,
int start_index,
int end_index,
struct list_head *prev)
@@ -1285,7 +1286,7 @@ free_handle:
}
static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft,
- struct mlx5_flow_spec *spec)
+ const struct mlx5_flow_spec *spec)
{
struct list_head *prev = &ft->node.children;
struct mlx5_flow_group *fg;
@@ -1430,7 +1431,9 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
return false;
}
-static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act)
+static int check_conflicting_ftes(struct fs_fte *fte,
+ const struct mlx5_flow_context *flow_context,
+ const struct mlx5_flow_act *flow_act)
{
if (check_conflicting_actions(flow_act->action, fte->action.action)) {
mlx5_core_warn(get_dev(&fte->node),
@@ -1438,12 +1441,12 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act
return -EEXIST;
}
- if ((flow_act->flags & FLOW_ACT_HAS_TAG) &&
- fte->action.flow_tag != flow_act->flow_tag) {
+ if ((flow_context->flags & FLOW_CONTEXT_HAS_TAG) &&
+ fte->flow_context.flow_tag != flow_context->flow_tag) {
mlx5_core_warn(get_dev(&fte->node),
"FTE flow tag %u already exists with different flow tag %u\n",
- fte->action.flow_tag,
- flow_act->flow_tag);
+ fte->flow_context.flow_tag,
+ flow_context->flow_tag);
return -EEXIST;
}
@@ -1451,7 +1454,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act
}
static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
- u32 *match_value,
+ const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_destination *dest,
int dest_num,
@@ -1462,7 +1465,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
int i;
int ret;
- ret = check_conflicting_ftes(fte, flow_act);
+ ret = check_conflicting_ftes(fte, &spec->flow_context, flow_act);
if (ret)
return ERR_PTR(ret);
@@ -1536,7 +1539,7 @@ static void free_match_list(struct match_list_head *head)
static int build_match_list(struct match_list_head *match_head,
struct mlx5_flow_table *ft,
- struct mlx5_flow_spec *spec)
+ const struct mlx5_flow_spec *spec)
{
struct rhlist_head *tmp, *list;
struct mlx5_flow_group *g;
@@ -1589,7 +1592,7 @@ static u64 matched_fgs_get_version(struct list_head *match_head)
static struct fs_fte *
lookup_fte_locked(struct mlx5_flow_group *g,
- u32 *match_value,
+ const u32 *match_value,
bool take_write)
{
struct fs_fte *fte_tmp;
@@ -1622,7 +1625,7 @@ out:
static struct mlx5_flow_handle *
try_add_to_existing_fg(struct mlx5_flow_table *ft,
struct list_head *match_head,
- struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_destination *dest,
int dest_num,
@@ -1637,7 +1640,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
u64 version;
int err;
- fte = alloc_fte(ft, spec->match_value, flow_act);
+ fte = alloc_fte(ft, spec, flow_act);
if (IS_ERR(fte))
return ERR_PTR(-ENOMEM);
@@ -1653,8 +1656,7 @@ search_again_locked:
fte_tmp = lookup_fte_locked(g, spec->match_value, take_write);
if (!fte_tmp)
continue;
- rule = add_rule_fg(g, spec->match_value,
- flow_act, dest, dest_num, fte_tmp);
+ rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp);
up_write_ref_node(&fte_tmp->node, false);
tree_put_node(&fte_tmp->node, false);
kmem_cache_free(steering->ftes_cache, fte);
@@ -1701,8 +1703,7 @@ skip_search:
nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
up_write_ref_node(&g->node, false);
- rule = add_rule_fg(g, spec->match_value,
- flow_act, dest, dest_num, fte);
+ rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
up_write_ref_node(&fte->node, false);
tree_put_node(&fte->node, false);
return rule;
@@ -1715,7 +1716,7 @@ out:
static struct mlx5_flow_handle *
_mlx5_add_flow_rules(struct mlx5_flow_table *ft,
- struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_destination *dest,
int dest_num)
@@ -1788,7 +1789,7 @@ search_again_locked:
if (err)
goto err_release_fg;
- fte = alloc_fte(ft, spec->match_value, flow_act);
+ fte = alloc_fte(ft, spec, flow_act);
if (IS_ERR(fte)) {
err = PTR_ERR(fte);
goto err_release_fg;
@@ -1802,8 +1803,7 @@ search_again_locked:
nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
up_write_ref_node(&g->node, false);
- rule = add_rule_fg(g, spec->match_value, flow_act, dest,
- dest_num, fte);
+ rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
up_write_ref_node(&fte->node, false);
tree_put_node(&fte->node, false);
tree_put_node(&g->node, false);
@@ -1823,7 +1823,7 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
struct mlx5_flow_handle *
mlx5_add_flow_rules(struct mlx5_flow_table *ft,
- struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_destination *dest,
int num_dest)
@@ -2092,7 +2092,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d
{
struct mlx5_flow_steering *steering = dev->priv.steering;
- if (!steering || vport >= MLX5_TOTAL_VPORTS(dev))
+ if (!steering || vport >= mlx5_eswitch_get_total_vports(dev))
return NULL;
switch (type) {
@@ -2423,7 +2423,7 @@ static void cleanup_egress_acls_root_ns(struct mlx5_core_dev *dev)
if (!steering->esw_egress_root_ns)
return;
- for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++)
+ for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++)
cleanup_root_ns(steering->esw_egress_root_ns[i]);
kfree(steering->esw_egress_root_ns);
@@ -2438,7 +2438,7 @@ static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev)
if (!steering->esw_ingress_root_ns)
return;
- for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++)
+ for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++)
cleanup_root_ns(steering->esw_ingress_root_ns[i]);
kfree(steering->esw_ingress_root_ns);
@@ -2606,16 +2606,18 @@ static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vpo
static int init_egress_acls_root_ns(struct mlx5_core_dev *dev)
{
struct mlx5_flow_steering *steering = dev->priv.steering;
+ int total_vports = mlx5_eswitch_get_total_vports(dev);
int err;
int i;
- steering->esw_egress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev),
- sizeof(*steering->esw_egress_root_ns),
- GFP_KERNEL);
+ steering->esw_egress_root_ns =
+ kcalloc(total_vports,
+ sizeof(*steering->esw_egress_root_ns),
+ GFP_KERNEL);
if (!steering->esw_egress_root_ns)
return -ENOMEM;
- for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) {
+ for (i = 0; i < total_vports; i++) {
err = init_egress_acl_root_ns(steering, i);
if (err)
goto cleanup_root_ns;
@@ -2634,16 +2636,18 @@ cleanup_root_ns:
static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev)
{
struct mlx5_flow_steering *steering = dev->priv.steering;
+ int total_vports = mlx5_eswitch_get_total_vports(dev);
int err;
int i;
- steering->esw_ingress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev),
- sizeof(*steering->esw_ingress_root_ns),
- GFP_KERNEL);
+ steering->esw_ingress_root_ns =
+ kcalloc(total_vports,
+ sizeof(*steering->esw_ingress_root_ns),
+ GFP_KERNEL);
if (!steering->esw_ingress_root_ns)
return -ENOMEM;
- for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) {
+ for (i = 0; i < total_vports; i++) {
err = init_ingress_acl_root_ns(steering, i);
if (err)
goto cleanup_root_ns;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index a08c3d09a50f..c48c382f926f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -170,6 +170,7 @@ struct fs_fte {
u32 val[MLX5_ST_SZ_DW_MATCH_PARAM];
u32 dests_size;
u32 index;
+ struct mlx5_flow_context flow_context;
struct mlx5_flow_act action;
enum fs_fte_status status;
struct mlx5_fc *counter;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index e8fedb307b2c..a19790dee7b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -37,6 +37,37 @@
#include "mlx5_core.h"
#include "../../mlxfw/mlxfw.h"
+enum {
+ MCQS_IDENTIFIER_BOOT_IMG = 0x1,
+ MCQS_IDENTIFIER_OEM_NVCONFIG = 0x4,
+ MCQS_IDENTIFIER_MLNX_NVCONFIG = 0x5,
+ MCQS_IDENTIFIER_CS_TOKEN = 0x6,
+ MCQS_IDENTIFIER_DBG_TOKEN = 0x7,
+ MCQS_IDENTIFIER_GEARBOX = 0xA,
+};
+
+enum {
+ MCQS_UPDATE_STATE_IDLE,
+ MCQS_UPDATE_STATE_IN_PROGRESS,
+ MCQS_UPDATE_STATE_APPLIED,
+ MCQS_UPDATE_STATE_ACTIVE,
+ MCQS_UPDATE_STATE_ACTIVE_PENDING_RESET,
+ MCQS_UPDATE_STATE_FAILED,
+ MCQS_UPDATE_STATE_CANCELED,
+ MCQS_UPDATE_STATE_BUSY,
+};
+
+enum {
+ MCQI_INFO_TYPE_CAPABILITIES = 0x0,
+ MCQI_INFO_TYPE_VERSION = 0x1,
+ MCQI_INFO_TYPE_ACTIVATION_METHOD = 0x5,
+};
+
+enum {
+ MCQI_FW_RUNNING_VERSION = 0,
+ MCQI_FW_STORED_VERSION = 1,
+};
+
static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out,
int outlen)
{
@@ -202,6 +233,18 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
return err;
}
+ if (MLX5_CAP_GEN(dev, event_cap)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_EVENT);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, tls)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_TLS);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -392,33 +435,49 @@ static int mlx5_reg_mcda_set(struct mlx5_core_dev *dev,
}
static int mlx5_reg_mcqi_query(struct mlx5_core_dev *dev,
- u16 component_index,
- u32 *max_component_size,
- u8 *log_mcda_word_size,
- u16 *mcda_max_write_size)
+ u16 component_index, bool read_pending,
+ u8 info_type, u16 data_size, void *mcqi_data)
{
- u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_ST_SZ_DW(mcqi_cap)];
- int offset = MLX5_ST_SZ_DW(mcqi_reg);
- u32 in[MLX5_ST_SZ_DW(mcqi_reg)];
+ u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_UN_SZ_DW(mcqi_reg_data)] = {};
+ u32 in[MLX5_ST_SZ_DW(mcqi_reg)] = {};
+ void *data;
int err;
- memset(in, 0, sizeof(in));
- memset(out, 0, sizeof(out));
-
MLX5_SET(mcqi_reg, in, component_index, component_index);
- MLX5_SET(mcqi_reg, in, data_size, MLX5_ST_SZ_BYTES(mcqi_cap));
+ MLX5_SET(mcqi_reg, in, read_pending_component, read_pending);
+ MLX5_SET(mcqi_reg, in, info_type, info_type);
+ MLX5_SET(mcqi_reg, in, data_size, data_size);
err = mlx5_core_access_reg(dev, in, sizeof(in), out,
- sizeof(out), MLX5_REG_MCQI, 0, 0);
+ MLX5_ST_SZ_BYTES(mcqi_reg) + data_size,
+ MLX5_REG_MCQI, 0, 0);
if (err)
- goto out;
+ return err;
- *max_component_size = MLX5_GET(mcqi_cap, out + offset, max_component_size);
- *log_mcda_word_size = MLX5_GET(mcqi_cap, out + offset, log_mcda_word_size);
- *mcda_max_write_size = MLX5_GET(mcqi_cap, out + offset, mcda_max_write_size);
+ data = MLX5_ADDR_OF(mcqi_reg, out, data);
+ memcpy(mcqi_data, data, data_size);
-out:
- return err;
+ return 0;
+}
+
+static int mlx5_reg_mcqi_caps_query(struct mlx5_core_dev *dev, u16 component_index,
+ u32 *max_component_size, u8 *log_mcda_word_size,
+ u16 *mcda_max_write_size)
+{
+ u32 mcqi_reg[MLX5_ST_SZ_DW(mcqi_cap)] = {};
+ int err;
+
+ err = mlx5_reg_mcqi_query(dev, component_index, 0,
+ MCQI_INFO_TYPE_CAPABILITIES,
+ MLX5_ST_SZ_BYTES(mcqi_cap), mcqi_reg);
+ if (err)
+ return err;
+
+ *max_component_size = MLX5_GET(mcqi_cap, mcqi_reg, max_component_size);
+ *log_mcda_word_size = MLX5_GET(mcqi_cap, mcqi_reg, log_mcda_word_size);
+ *mcda_max_write_size = MLX5_GET(mcqi_cap, mcqi_reg, mcda_max_write_size);
+
+ return 0;
}
struct mlx5_mlxfw_dev {
@@ -434,8 +493,13 @@ static int mlx5_component_query(struct mlxfw_dev *mlxfw_dev,
container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
- return mlx5_reg_mcqi_query(dev, component_index, p_max_size,
- p_align_bits, p_max_write_size);
+ if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi)) {
+ mlx5_core_warn(dev, "caps query isn't supported by running FW\n");
+ return -EOPNOTSUPP;
+ }
+
+ return mlx5_reg_mcqi_caps_query(dev, component_index, p_max_size,
+ p_align_bits, p_max_write_size);
}
static int mlx5_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle)
@@ -575,3 +639,130 @@ int mlx5_firmware_flash(struct mlx5_core_dev *dev,
return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev,
firmware, extack);
}
+
+static int mlx5_reg_mcqi_version_query(struct mlx5_core_dev *dev,
+ u16 component_index, bool read_pending,
+ u32 *mcqi_version_out)
+{
+ return mlx5_reg_mcqi_query(dev, component_index, read_pending,
+ MCQI_INFO_TYPE_VERSION,
+ MLX5_ST_SZ_BYTES(mcqi_version),
+ mcqi_version_out);
+}
+
+static int mlx5_reg_mcqs_query(struct mlx5_core_dev *dev, u32 *out,
+ u16 component_index)
+{
+ u8 out_sz = MLX5_ST_SZ_BYTES(mcqs_reg);
+ u32 in[MLX5_ST_SZ_DW(mcqs_reg)] = {};
+ int err;
+
+ memset(out, 0, out_sz);
+
+ MLX5_SET(mcqs_reg, in, component_index, component_index);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ out_sz, MLX5_REG_MCQS, 0, 0);
+ return err;
+}
+
+/* scans component index sequentially, to find the boot img index */
+static int mlx5_get_boot_img_component_index(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(mcqs_reg)] = {};
+ u16 identifier, component_idx = 0;
+ bool quit;
+ int err;
+
+ do {
+ err = mlx5_reg_mcqs_query(dev, out, component_idx);
+ if (err)
+ return err;
+
+ identifier = MLX5_GET(mcqs_reg, out, identifier);
+ quit = !!MLX5_GET(mcqs_reg, out, last_index_flag);
+ quit |= identifier == MCQS_IDENTIFIER_BOOT_IMG;
+ } while (!quit && ++component_idx);
+
+ if (identifier != MCQS_IDENTIFIER_BOOT_IMG) {
+ mlx5_core_warn(dev, "mcqs: can't find boot_img component ix, last scanned idx %d\n",
+ component_idx);
+ return -EOPNOTSUPP;
+ }
+
+ return component_idx;
+}
+
+static int
+mlx5_fw_image_pending(struct mlx5_core_dev *dev,
+ int component_index,
+ bool *pending_version_exists)
+{
+ u32 out[MLX5_ST_SZ_DW(mcqs_reg)];
+ u8 component_update_state;
+ int err;
+
+ err = mlx5_reg_mcqs_query(dev, out, component_index);
+ if (err)
+ return err;
+
+ component_update_state = MLX5_GET(mcqs_reg, out, component_update_state);
+
+ if (component_update_state == MCQS_UPDATE_STATE_IDLE) {
+ *pending_version_exists = false;
+ } else if (component_update_state == MCQS_UPDATE_STATE_ACTIVE_PENDING_RESET) {
+ *pending_version_exists = true;
+ } else {
+ mlx5_core_warn(dev,
+ "mcqs: can't read pending fw version while fw state is %d\n",
+ component_update_state);
+ return -ENODATA;
+ }
+ return 0;
+}
+
+int mlx5_fw_version_query(struct mlx5_core_dev *dev,
+ u32 *running_ver, u32 *pending_ver)
+{
+ u32 reg_mcqi_version[MLX5_ST_SZ_DW(mcqi_version)] = {};
+ bool pending_version_exists;
+ int component_index;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi) ||
+ !MLX5_CAP_MCAM_REG(dev, mcqs)) {
+ mlx5_core_warn(dev, "fw query isn't supported by the FW\n");
+ return -EOPNOTSUPP;
+ }
+
+ component_index = mlx5_get_boot_img_component_index(dev);
+ if (component_index < 0)
+ return component_index;
+
+ err = mlx5_reg_mcqi_version_query(dev, component_index,
+ MCQI_FW_RUNNING_VERSION,
+ reg_mcqi_version);
+ if (err)
+ return err;
+
+ *running_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version);
+
+ err = mlx5_fw_image_pending(dev, component_index, &pending_version_exists);
+ if (err)
+ return err;
+
+ if (!pending_version_exists) {
+ *pending_ver = 0;
+ return 0;
+ }
+
+ err = mlx5_reg_mcqi_version_query(dev, component_index,
+ MCQI_FW_STORED_VERSION,
+ reg_mcqi_version);
+ if (err)
+ return err;
+
+ *pending_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 9ca492b430d8..faf197d53743 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -87,7 +87,7 @@ int mlx5i_init(struct mlx5_core_dev *mdev,
mlx5e_set_netdev_mtu_boundaries(priv);
netdev->mtu = netdev->max_mtu;
- mlx5e_build_nic_params(mdev, &priv->rss_params, &priv->channels.params,
+ mlx5e_build_nic_params(mdev, NULL, &priv->rss_params, &priv->channels.params,
mlx5e_get_netdev_max_channels(netdev),
netdev->mtu);
mlx5i_build_nic_params(mdev, &priv->channels.params);
@@ -258,6 +258,18 @@ void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *
mlx5_core_destroy_qp(mdev, qp);
}
+int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn)
+{
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
+ void *tisc;
+
+ tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+ MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn);
+
+ return mlx5e_create_tis(mdev, in, tisn);
+}
+
static int mlx5i_init_tx(struct mlx5e_priv *priv)
{
struct mlx5i_priv *ipriv = priv->ppriv;
@@ -269,7 +281,7 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv)
return err;
}
- err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]);
+ err = mlx5i_create_tis(priv->mdev, ipriv->qp.qpn, &priv->tisn[0]);
if (err) {
mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err);
goto err_destroy_underlay_qp;
@@ -365,7 +377,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
if (err)
goto err_close_drop_rq;
- err = mlx5e_create_direct_rqts(priv);
+ err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
if (err)
goto err_destroy_indirect_rqts;
@@ -373,7 +385,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
if (err)
goto err_destroy_direct_rqts;
- err = mlx5e_create_direct_tirs(priv);
+ err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
if (err)
goto err_destroy_indirect_tirs;
@@ -384,11 +396,11 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
return 0;
err_destroy_direct_tirs:
- mlx5e_destroy_direct_tirs(priv);
+ mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
err_destroy_indirect_tirs:
mlx5e_destroy_indirect_tirs(priv, true);
err_destroy_direct_rqts:
- mlx5e_destroy_direct_rqts(priv);
+ mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
err_destroy_indirect_rqts:
mlx5e_destroy_rqt(priv, &priv->indir_rqt);
err_close_drop_rq:
@@ -401,9 +413,9 @@ err_destroy_q_counters:
static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
{
mlx5i_destroy_flow_steering(priv);
- mlx5e_destroy_direct_tirs(priv);
+ mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
mlx5e_destroy_indirect_tirs(priv, true);
- mlx5e_destroy_direct_rqts(priv);
+ mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
mlx5e_destroy_rqt(priv, &priv->indir_rqt);
mlx5e_close_drop_rq(&priv->drop_rq);
mlx5e_destroy_q_counters(priv);
@@ -418,6 +430,7 @@ static const struct mlx5e_profile mlx5i_nic_profile = {
.cleanup_rx = mlx5i_cleanup_rx,
.enable = NULL, /* mlx5i_enable */
.disable = NULL, /* mlx5i_disable */
+ .update_rx = mlx5e_update_nic_rx,
.update_stats = NULL, /* mlx5i_update_stats */
.update_carrier = NULL, /* no HW update in IB link */
.rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe,
@@ -526,7 +539,7 @@ static int mlx5i_open(struct net_device *netdev)
if (err)
goto err_remove_fs_underlay_qp;
- mlx5e_refresh_tirs(epriv, false);
+ epriv->profile->update_rx(epriv);
mlx5e_activate_priv_channels(epriv);
mutex_unlock(&epriv->state_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
index e19ba3fcd1b7..c87962cab921 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -59,6 +59,8 @@ struct mlx5i_priv {
char *mlx5e_priv[0];
};
+int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn);
+
/* Underlay QP create/destroy functions */
int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp);
void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
index b491b8f5fd6b..6e56fa769d2e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
@@ -210,7 +210,7 @@ static int mlx5i_pkey_open(struct net_device *netdev)
goto err_unint_underlay_qp;
}
- err = mlx5e_create_tis(mdev, 0 /* tc */, ipriv->qp.qpn, &epriv->tisn[0]);
+ err = mlx5i_create_tis(mdev, ipriv->qp.qpn, &epriv->tisn[0]);
if (err) {
mlx5_core_warn(mdev, "create child tis failed, %d\n", err);
goto err_remove_rx_uderlay_qp;
@@ -221,7 +221,7 @@ static int mlx5i_pkey_open(struct net_device *netdev)
mlx5_core_warn(mdev, "opening child channels failed, %d\n", err);
goto err_clear_state_opened_flag;
}
- mlx5e_refresh_tirs(epriv, false);
+ epriv->profile->update_rx(epriv);
mlx5e_activate_priv_channels(epriv);
mutex_unlock(&epriv->state_lock);
@@ -350,6 +350,7 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = {
.cleanup_rx = mlx5i_pkey_cleanup_rx,
.enable = NULL,
.disable = NULL,
+ .update_rx = mlx5e_update_nic_rx,
.update_stats = NULL,
.rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe,
.rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 959605559858..c5ef2ff26465 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -305,8 +305,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
!mlx5_sriov_is_enabled(dev1);
#ifdef CONFIG_MLX5_ESWITCH
- roce_lag &= dev0->priv.eswitch->mode == SRIOV_NONE &&
- dev1->priv.eswitch->mode == SRIOV_NONE;
+ roce_lag &= dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
+ dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
#endif
if (roce_lag)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c
new file mode 100644
index 000000000000..ea9ee88491e5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies.
+
+#include "mlx5_core.h"
+
+int mlx5_create_encryption_key(struct mlx5_core_dev *mdev,
+ void *key, u32 sz_bytes,
+ u32 *p_key_id)
+{
+ u32 in[MLX5_ST_SZ_DW(create_encryption_key_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ u32 sz_bits = sz_bytes * BITS_PER_BYTE;
+ u8 general_obj_key_size;
+ u64 general_obj_types;
+ void *obj, *key_p;
+ int err;
+
+ obj = MLX5_ADDR_OF(create_encryption_key_in, in, encryption_key_object);
+ key_p = MLX5_ADDR_OF(encryption_key_obj, obj, key);
+
+ general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
+ if (!(general_obj_types &
+ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY))
+ return -EINVAL;
+
+ switch (sz_bits) {
+ case 128:
+ general_obj_key_size =
+ MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128;
+ break;
+ case 256:
+ general_obj_key_size =
+ MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_256;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ memcpy(key_p, key, sz_bytes);
+
+ MLX5_SET(encryption_key_obj, obj, key_size, general_obj_key_size);
+ MLX5_SET(encryption_key_obj, obj, key_type,
+ MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_DEK);
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY);
+ MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.pdn);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *p_key_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+ /* avoid leaking key on the stack */
+ memzero_explicit(in, sizeof(in));
+
+ return err;
+}
+
+void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id)
+{
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, key_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
index c0fb6d72b695..3dfab91ae5f2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -7,7 +7,6 @@
#include <linux/mlx5/eq.h>
#include <linux/mlx5/cq.h>
-#define MLX5_MAX_IRQ_NAME (32)
#define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe))
struct mlx5_eq_tasklet {
@@ -36,8 +35,14 @@ struct mlx5_eq {
struct mlx5_rsc_debug *dbg;
};
+struct mlx5_eq_async {
+ struct mlx5_eq core;
+ struct notifier_block irq_nb;
+};
+
struct mlx5_eq_comp {
- struct mlx5_eq core; /* Must be first */
+ struct mlx5_eq core;
+ struct notifier_block irq_nb;
struct mlx5_eq_tasklet tasklet_ctx;
struct list_head list;
};
@@ -70,7 +75,7 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev);
void mlx5_eq_table_destroy(struct mlx5_core_dev *dev);
int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
-int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn);
struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev);
void mlx5_cq_tasklet_cb(unsigned long data);
@@ -92,7 +97,4 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev);
#endif
-int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb);
-int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb);
-
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
index d918e44491f4..b99d469e4e64 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -79,4 +79,9 @@ struct mlx5_pme_stats {
void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats);
int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data);
+/* Crypto */
+int mlx5_create_encryption_key(struct mlx5_core_dev *mdev,
+ void *key, u32 sz_bytes, u32 *p_key_id);
+void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id);
+
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
index a71d5b9c7ab2..3118e8d66407 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
@@ -67,6 +67,7 @@ static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index)
struct l2table_node {
struct l2addr_node node;
u32 index; /* index in HW l2 table */
+ int ref_count;
};
struct mlx5_mpfs {
@@ -134,8 +135,8 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac)
{
struct mlx5_mpfs *mpfs = dev->priv.mpfs;
struct l2table_node *l2addr;
+ int err = 0;
u32 index;
- int err;
if (!MLX5_ESWITCH_MANAGER(dev))
return 0;
@@ -144,30 +145,35 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac)
l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node);
if (l2addr) {
- err = -EEXIST;
- goto abort;
+ l2addr->ref_count++;
+ goto out;
}
err = alloc_l2table_index(mpfs, &index);
if (err)
- goto abort;
+ goto out;
l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL);
if (!l2addr) {
- free_l2table_index(mpfs, index);
err = -ENOMEM;
- goto abort;
+ goto hash_add_err;
}
- l2addr->index = index;
err = set_l2table_entry_cmd(dev, index, mac);
- if (err) {
- l2addr_hash_del(l2addr);
- free_l2table_index(mpfs, index);
- }
+ if (err)
+ goto set_table_entry_err;
+
+ l2addr->index = index;
+ l2addr->ref_count = 1;
mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index);
-abort:
+ goto out;
+
+set_table_entry_err:
+ l2addr_hash_del(l2addr);
+hash_add_err:
+ free_l2table_index(mpfs, index);
+out:
mutex_unlock(&mpfs->lock);
return err;
}
@@ -190,6 +196,9 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac)
goto unlock;
}
+ if (--l2addr->ref_count > 0)
+ goto unlock;
+
index = l2addr->index;
del_l2table_entry_cmd(dev, index);
l2addr_hash_del(l2addr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 998eec938d3c..b15b27a497fc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -172,18 +172,28 @@ static struct mlx5_profile profile[] = {
#define FW_INIT_TIMEOUT_MILI 2000
#define FW_INIT_WAIT_MS 2
-#define FW_PRE_INIT_TIMEOUT_MILI 10000
+#define FW_PRE_INIT_TIMEOUT_MILI 120000
+#define FW_INIT_WARN_MESSAGE_INTERVAL 20000
-static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
+static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
+ u32 warn_time_mili)
{
+ unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili);
unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
int err = 0;
+ BUILD_BUG_ON(FW_PRE_INIT_TIMEOUT_MILI < FW_INIT_WARN_MESSAGE_INTERVAL);
+
while (fw_initializing(dev)) {
if (time_after(jiffies, end)) {
err = -EBUSY;
break;
}
+ if (warn_time_mili && time_after(jiffies, warn)) {
+ mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds\n",
+ jiffies_to_msecs(end - warn) / 1000);
+ warn = jiffies + msecs_to_jiffies(warn_time_mili);
+ }
msleep(FW_INIT_WAIT_MS);
}
@@ -724,8 +734,7 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
struct mlx5_priv *priv = &dev->priv;
int err = 0;
- priv->pci_dev_data = id->driver_data;
-
+ mutex_init(&dev->pci_status_mutex);
pci_set_drvdata(dev->pdev, dev);
dev->bar_addr = pci_resource_start(pdev, 0);
@@ -799,10 +808,16 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
goto err_devcom;
}
+ err = mlx5_irq_table_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "failed to initialize irq table\n");
+ goto err_devcom;
+ }
+
err = mlx5_eq_table_init(dev);
if (err) {
mlx5_core_err(dev, "failed to initialize eq\n");
- goto err_devcom;
+ goto err_irq_cleanup;
}
err = mlx5_events_init(dev);
@@ -840,32 +855,32 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
goto err_rl_cleanup;
}
- err = mlx5_eswitch_init(dev);
+ err = mlx5_sriov_init(dev);
if (err) {
- mlx5_core_err(dev, "Failed to init eswitch %d\n", err);
+ mlx5_core_err(dev, "Failed to init sriov %d\n", err);
goto err_mpfs_cleanup;
}
- err = mlx5_sriov_init(dev);
+ err = mlx5_eswitch_init(dev);
if (err) {
- mlx5_core_err(dev, "Failed to init sriov %d\n", err);
- goto err_eswitch_cleanup;
+ mlx5_core_err(dev, "Failed to init eswitch %d\n", err);
+ goto err_sriov_cleanup;
}
err = mlx5_fpga_init(dev);
if (err) {
mlx5_core_err(dev, "Failed to init fpga device %d\n", err);
- goto err_sriov_cleanup;
+ goto err_eswitch_cleanup;
}
dev->tracer = mlx5_fw_tracer_create(dev);
return 0;
-err_sriov_cleanup:
- mlx5_sriov_cleanup(dev);
err_eswitch_cleanup:
mlx5_eswitch_cleanup(dev->priv.eswitch);
+err_sriov_cleanup:
+ mlx5_sriov_cleanup(dev);
err_mpfs_cleanup:
mlx5_mpfs_cleanup(dev);
err_rl_cleanup:
@@ -880,6 +895,8 @@ err_events_cleanup:
mlx5_events_cleanup(dev);
err_eq_cleanup:
mlx5_eq_table_cleanup(dev);
+err_irq_cleanup:
+ mlx5_irq_table_cleanup(dev);
err_devcom:
mlx5_devcom_unregister_device(dev->priv.devcom);
@@ -890,8 +907,8 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
{
mlx5_fw_tracer_destroy(dev->tracer);
mlx5_fpga_cleanup(dev);
- mlx5_sriov_cleanup(dev);
mlx5_eswitch_cleanup(dev->priv.eswitch);
+ mlx5_sriov_cleanup(dev);
mlx5_mpfs_cleanup(dev);
mlx5_cleanup_rl_table(dev);
mlx5_geneve_destroy(dev->geneve);
@@ -903,6 +920,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_cq_debugfs_cleanup(dev);
mlx5_events_cleanup(dev);
mlx5_eq_table_cleanup(dev);
+ mlx5_irq_table_cleanup(dev);
mlx5_devcom_unregister_device(dev->priv.devcom);
}
@@ -919,7 +937,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
/* wait for firmware to accept initialization segments configurations
*/
- err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI);
+ err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI, FW_INIT_WARN_MESSAGE_INTERVAL);
if (err) {
mlx5_core_err(dev, "Firmware over %d MS in pre-initializing state, aborting\n",
FW_PRE_INIT_TIMEOUT_MILI);
@@ -932,7 +950,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
return err;
}
- err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI);
+ err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0);
if (err) {
mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n",
FW_INIT_TIMEOUT_MILI);
@@ -1036,6 +1054,12 @@ static int mlx5_load(struct mlx5_core_dev *dev)
mlx5_events_start(dev);
mlx5_pagealloc_start(dev);
+ err = mlx5_irq_table_create(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to alloc IRQs\n");
+ goto err_irq_table;
+ }
+
err = mlx5_eq_table_create(dev);
if (err) {
mlx5_core_err(dev, "Failed to create EQs\n");
@@ -1107,6 +1131,8 @@ err_fpga_start:
err_fw_tracer:
mlx5_eq_table_destroy(dev);
err_eq_table:
+ mlx5_irq_table_destroy(dev);
+err_irq_table:
mlx5_pagealloc_stop(dev);
mlx5_events_stop(dev);
mlx5_put_uars_page(dev, dev->priv.uar);
@@ -1123,6 +1149,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
mlx5_fpga_device_stop(dev);
mlx5_fw_tracer_cleanup(dev->tracer);
mlx5_eq_table_destroy(dev);
+ mlx5_irq_table_destroy(dev);
mlx5_pagealloc_stop(dev);
mlx5_events_stop(dev);
mlx5_put_uars_page(dev, dev->priv.uar);
@@ -1227,7 +1254,6 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
INIT_LIST_HEAD(&priv->ctx_list);
spin_lock_init(&priv->ctx_lock);
- mutex_init(&dev->pci_status_mutex);
mutex_init(&dev->intf_state_mutex);
mutex_init(&priv->bfregs.reg_head.lock);
@@ -1289,6 +1315,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
dev->device = &pdev->dev;
dev->pdev = pdev;
+ dev->coredev_type = id->driver_data & MLX5_PCI_DEV_IS_VF ?
+ MLX5_COREDEV_VF : MLX5_COREDEV_PF;
+
err = mlx5_mdev_init(dev, prof_sel);
if (err)
goto mdev_init_err;
@@ -1571,7 +1600,7 @@ static int __init init(void)
get_random_bytes(&sw_owner_id, sizeof(sw_owner_id));
mlx5_core_verify_params();
- mlx5_fpga_ipsec_build_fs_cmds();
+ mlx5_accel_ipsec_build_fs_cmds();
mlx5_register_debugfs();
err = pci_register_driver(&mlx5_core_driver);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 29bb61a10289..471bbc48bc1f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -159,6 +159,19 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
void mlx5_lag_remove(struct mlx5_core_dev *dev);
+int mlx5_irq_table_init(struct mlx5_core_dev *dev);
+void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
+int mlx5_irq_table_create(struct mlx5_core_dev *dev);
+void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
+int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+ struct notifier_block *nb);
+int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+ struct notifier_block *nb);
+struct cpumask *
+mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx);
+struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table);
+int mlx5_irq_get_num_comp(struct mlx5_irq_table *table);
+
int mlx5_events_init(struct mlx5_core_dev *dev);
void mlx5_events_cleanup(struct mlx5_core_dev *dev);
void mlx5_events_start(struct mlx5_core_dev *dev);
@@ -192,6 +205,8 @@ int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode);
int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw,
struct netlink_ext_ack *extack);
+int mlx5_fw_version_query(struct mlx5_core_dev *dev,
+ u32 *running_ver, u32 *stored_ver);
void mlx5e_init(void);
void mlx5e_cleanup(void);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index ea744d8466ea..9231b39d18b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -38,15 +38,12 @@
void mlx5_init_mkey_table(struct mlx5_core_dev *dev)
{
- struct mlx5_mkey_table *table = &dev->priv.mkey_table;
-
- memset(table, 0, sizeof(*table));
- rwlock_init(&table->lock);
- INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+ xa_init_flags(&dev->priv.mkey_table, XA_FLAGS_LOCK_IRQ);
}
void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev)
{
+ WARN_ON(!xa_empty(&dev->priv.mkey_table));
}
int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
@@ -56,8 +53,8 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
mlx5_async_cbk_t callback,
struct mlx5_async_work *context)
{
- struct mlx5_mkey_table *table = &dev->priv.mkey_table;
u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0};
+ struct xarray *mkeys = &dev->priv.mkey_table;
u32 mkey_index;
void *mkc;
int err;
@@ -88,12 +85,10 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
mkey_index, key, mkey->key);
- /* connect to mkey tree */
- write_lock_irq(&table->lock);
- err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey);
- write_unlock_irq(&table->lock);
+ err = xa_err(xa_store_irq(mkeys, mlx5_base_mkey(mkey->key), mkey,
+ GFP_KERNEL));
if (err) {
- mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n",
+ mlx5_core_warn(dev, "failed xarray insert of mkey 0x%x, %d\n",
mlx5_base_mkey(mkey->key), err);
mlx5_core_destroy_mkey(dev, mkey);
}
@@ -114,17 +109,17 @@ EXPORT_SYMBOL(mlx5_core_create_mkey);
int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
struct mlx5_core_mkey *mkey)
{
- struct mlx5_mkey_table *table = &dev->priv.mkey_table;
u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0};
u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0};
+ struct xarray *mkeys = &dev->priv.mkey_table;
struct mlx5_core_mkey *deleted_mkey;
unsigned long flags;
- write_lock_irqsave(&table->lock, flags);
- deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key));
- write_unlock_irqrestore(&table->lock, flags);
+ xa_lock_irqsave(mkeys, flags);
+ deleted_mkey = __xa_erase(mkeys, mlx5_base_mkey(mkey->key));
+ xa_unlock_irqrestore(mkeys, flags);
if (!deleted_mkey) {
- mlx5_core_dbg(dev, "failed radix tree delete of mkey 0x%x\n",
+ mlx5_core_dbg(dev, "failed xarray delete of mkey 0x%x\n",
mlx5_base_mkey(mkey->key));
return -ENOENT;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
new file mode 100644
index 000000000000..373981a659c7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
+
+#define MLX5_MAX_IRQ_NAME (32)
+
+struct mlx5_irq {
+ struct atomic_notifier_head nh;
+ cpumask_var_t mask;
+ char name[MLX5_MAX_IRQ_NAME];
+};
+
+struct mlx5_irq_table {
+ struct mlx5_irq *irq;
+ int nvec;
+#ifdef CONFIG_RFS_ACCEL
+ struct cpu_rmap *rmap;
+#endif
+};
+
+int mlx5_irq_table_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_irq_table *irq_table;
+
+ irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL);
+ if (!irq_table)
+ return -ENOMEM;
+
+ dev->priv.irq_table = irq_table;
+ return 0;
+}
+
+void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
+{
+ kvfree(dev->priv.irq_table);
+}
+
+int mlx5_irq_get_num_comp(struct mlx5_irq_table *table)
+{
+ return table->nvec - MLX5_IRQ_VEC_COMP_BASE;
+}
+
+static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx)
+{
+ struct mlx5_irq_table *irq_table = dev->priv.irq_table;
+
+ return &irq_table->irq[vecidx];
+}
+
+int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+ struct notifier_block *nb)
+{
+ struct mlx5_irq *irq;
+
+ irq = &irq_table->irq[vecidx];
+ return atomic_notifier_chain_register(&irq->nh, nb);
+}
+
+int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+ struct notifier_block *nb)
+{
+ struct mlx5_irq *irq;
+
+ irq = &irq_table->irq[vecidx];
+ return atomic_notifier_chain_unregister(&irq->nh, nb);
+}
+
+static irqreturn_t mlx5_irq_int_handler(int irq, void *nh)
+{
+ atomic_notifier_call_chain(nh, 0, NULL);
+ return IRQ_HANDLED;
+}
+
+static void irq_set_name(char *name, int vecidx)
+{
+ if (vecidx == 0) {
+ snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async");
+ return;
+ }
+
+ snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d",
+ vecidx - MLX5_IRQ_VEC_COMP_BASE);
+ return;
+}
+
+static int request_irqs(struct mlx5_core_dev *dev, int nvec)
+{
+ char name[MLX5_MAX_IRQ_NAME];
+ int err;
+ int i;
+
+ for (i = 0; i < nvec; i++) {
+ struct mlx5_irq *irq = mlx5_irq_get(dev, i);
+ int irqn = pci_irq_vector(dev->pdev, i);
+
+ irq_set_name(name, i);
+ ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
+ snprintf(irq->name, MLX5_MAX_IRQ_NAME,
+ "%s@pci:%s", name, pci_name(dev->pdev));
+ err = request_irq(irqn, mlx5_irq_int_handler, 0, irq->name,
+ &irq->nh);
+ if (err) {
+ mlx5_core_err(dev, "Failed to request irq\n");
+ goto err_request_irq;
+ }
+ }
+ return 0;
+
+err_request_irq:
+ for (; i >= 0; i--) {
+ struct mlx5_irq *irq = mlx5_irq_get(dev, i);
+ int irqn = pci_irq_vector(dev->pdev, i);
+
+ free_irq(irqn, &irq->nh);
+ }
+ return err;
+}
+
+static void irq_clear_rmap(struct mlx5_core_dev *dev)
+{
+#ifdef CONFIG_RFS_ACCEL
+ struct mlx5_irq_table *irq_table = dev->priv.irq_table;
+
+ free_irq_cpu_rmap(irq_table->rmap);
+#endif
+}
+
+static int irq_set_rmap(struct mlx5_core_dev *mdev)
+{
+ int err = 0;
+#ifdef CONFIG_RFS_ACCEL
+ struct mlx5_irq_table *irq_table = mdev->priv.irq_table;
+ int num_affinity_vec;
+ int vecidx;
+
+ num_affinity_vec = mlx5_irq_get_num_comp(irq_table);
+ irq_table->rmap = alloc_irq_cpu_rmap(num_affinity_vec);
+ if (!irq_table->rmap) {
+ err = -ENOMEM;
+ mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
+ goto err_out;
+ }
+
+ vecidx = MLX5_IRQ_VEC_COMP_BASE;
+ for (; vecidx < irq_table->nvec; vecidx++) {
+ err = irq_cpu_rmap_add(irq_table->rmap,
+ pci_irq_vector(mdev->pdev, vecidx));
+ if (err) {
+ mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
+ err);
+ goto err_irq_cpu_rmap_add;
+ }
+ }
+ return 0;
+
+err_irq_cpu_rmap_add:
+ irq_clear_rmap(mdev);
+err_out:
+#endif
+ return err;
+}
+
+/* Completion IRQ vectors */
+
+static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+ int vecidx = MLX5_IRQ_VEC_COMP_BASE + i;
+ struct mlx5_irq *irq;
+ int irqn;
+
+ irq = mlx5_irq_get(mdev, vecidx);
+ irqn = pci_irq_vector(mdev->pdev, vecidx);
+ if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
+ mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+ return -ENOMEM;
+ }
+
+ cpumask_set_cpu(cpumask_local_spread(i, mdev->priv.numa_node),
+ irq->mask);
+ if (IS_ENABLED(CONFIG_SMP) &&
+ irq_set_affinity_hint(irqn, irq->mask))
+ mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x",
+ irqn);
+
+ return 0;
+}
+
+static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+ int vecidx = MLX5_IRQ_VEC_COMP_BASE + i;
+ struct mlx5_irq *irq;
+ int irqn;
+
+ irq = mlx5_irq_get(mdev, vecidx);
+ irqn = pci_irq_vector(mdev->pdev, vecidx);
+ irq_set_affinity_hint(irqn, NULL);
+ free_cpumask_var(irq->mask);
+}
+
+static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
+{
+ int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table);
+ int err;
+ int i;
+
+ for (i = 0; i < nvec; i++) {
+ err = set_comp_irq_affinity_hint(mdev, i);
+ if (err)
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+ for (i--; i >= 0; i--)
+ clear_comp_irq_affinity_hint(mdev, i);
+
+ return err;
+}
+
+static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
+{
+ int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table);
+ int i;
+
+ for (i = 0; i < nvec; i++)
+ clear_comp_irq_affinity_hint(mdev, i);
+}
+
+struct cpumask *
+mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx)
+{
+ return irq_table->irq[vecidx].mask;
+}
+
+#ifdef CONFIG_RFS_ACCEL
+struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table)
+{
+ return irq_table->rmap;
+}
+#endif
+
+static void unrequest_irqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_irq_table *table = dev->priv.irq_table;
+ int i;
+
+ for (i = 0; i < table->nvec; i++)
+ free_irq(pci_irq_vector(dev->pdev, i),
+ &mlx5_irq_get(dev, i)->nh);
+}
+
+int mlx5_irq_table_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_irq_table *table = priv->irq_table;
+ int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+ MLX5_CAP_GEN(dev, max_num_eqs) :
+ 1 << MLX5_CAP_GEN(dev, log_max_eq);
+ int nvec;
+ int err;
+
+ nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
+ MLX5_IRQ_VEC_COMP_BASE;
+ nvec = min_t(int, nvec, num_eqs);
+ if (nvec <= MLX5_IRQ_VEC_COMP_BASE)
+ return -ENOMEM;
+
+ table->irq = kcalloc(nvec, sizeof(*table->irq), GFP_KERNEL);
+ if (!table->irq)
+ return -ENOMEM;
+
+ nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1,
+ nvec, PCI_IRQ_MSIX);
+ if (nvec < 0) {
+ err = nvec;
+ goto err_free_irq;
+ }
+
+ table->nvec = nvec;
+
+ err = irq_set_rmap(dev);
+ if (err)
+ goto err_set_rmap;
+
+ err = request_irqs(dev, nvec);
+ if (err)
+ goto err_request_irqs;
+
+ err = set_comp_irq_affinity_hints(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
+ goto err_set_affinity;
+ }
+
+ return 0;
+
+err_set_affinity:
+ unrequest_irqs(dev);
+err_request_irqs:
+ irq_clear_rmap(dev);
+err_set_rmap:
+ pci_free_irq_vectors(dev->pdev);
+err_free_irq:
+ kfree(table->irq);
+ return err;
+}
+
+void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
+{
+ struct mlx5_irq_table *table = dev->priv.irq_table;
+ int i;
+
+ /* free_irq requires that affinity and rmap will be cleared
+ * before calling it. This is why there is asymmetry with set_rmap
+ * which should be called after alloc_irq but before request_irq.
+ */
+ irq_clear_rmap(dev);
+ clear_comp_irqs_affinity_hints(dev);
+ for (i = 0; i < table->nvec; i++)
+ free_irq(pci_irq_vector(dev->pdev, i),
+ &mlx5_irq_get(dev, i)->nh);
+ pci_free_irq_vectors(dev->pdev);
+ kfree(table->irq);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
index 401441aefbcb..17ce9dd56b13 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
@@ -126,7 +126,7 @@ static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid *
{
u8 hw_id[ETH_ALEN];
- mlx5_query_nic_vport_mac_address(dev, 0, hw_id);
+ mlx5_query_mac_address(dev, hw_id);
gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
addrconf_addr_eui48(&gid->raw[8], hw_id);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index a249b3c3843d..61fcfd8b39b4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -74,17 +74,11 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
int err;
int vf;
- if (sriov->enabled_vfs) {
- mlx5_core_warn(dev,
- "failed to enable SRIOV on device, already enabled with %d vfs\n",
- sriov->enabled_vfs);
- return -EBUSY;
- }
-
if (!MLX5_ESWITCH_MANAGER(dev))
goto enable_vfs_hca;
- err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
+ mlx5_eswitch_update_num_of_vfs(dev->priv.eswitch, num_vfs);
+ err = mlx5_eswitch_enable(dev->priv.eswitch, MLX5_ESWITCH_LEGACY);
if (err) {
mlx5_core_warn(dev,
"failed to enable eswitch SRIOV (%d)\n", err);
@@ -99,7 +93,6 @@ enable_vfs_hca:
continue;
}
sriov->vfs_ctx[vf].enabled = 1;
- sriov->enabled_vfs++;
if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) {
err = sriov_restore_guids(dev, vf);
if (err) {
@@ -118,13 +111,11 @@ enable_vfs_hca:
static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
{
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ int num_vfs = pci_num_vf(dev->pdev);
int err;
int vf;
- if (!sriov->enabled_vfs)
- goto out;
-
- for (vf = 0; vf < sriov->num_vfs; vf++) {
+ for (vf = num_vfs - 1; vf >= 0; vf--) {
if (!sriov->vfs_ctx[vf].enabled)
continue;
err = mlx5_core_disable_hca(dev, vf + 1);
@@ -133,12 +124,10 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
continue;
}
sriov->vfs_ctx[vf].enabled = 0;
- sriov->enabled_vfs--;
}
-out:
if (MLX5_ESWITCH_MANAGER(dev))
- mlx5_eswitch_disable_sriov(dev->priv.eswitch);
+ mlx5_eswitch_disable(dev->priv.eswitch);
if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages))
mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
@@ -191,13 +180,11 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
int mlx5_sriov_attach(struct mlx5_core_dev *dev)
{
- struct mlx5_core_sriov *sriov = &dev->priv.sriov;
-
- if (!mlx5_core_is_pf(dev) || !sriov->num_vfs)
+ if (!mlx5_core_is_pf(dev) || !pci_num_vf(dev->pdev))
return 0;
/* If sriov VFs exist in PCI level, enable them in device level */
- return mlx5_device_enable_sriov(dev, sriov->num_vfs);
+ return mlx5_device_enable_sriov(dev, pci_num_vf(dev->pdev));
}
void mlx5_sriov_detach(struct mlx5_core_dev *dev)
@@ -208,6 +195,30 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev)
mlx5_device_disable_sriov(dev);
}
+static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev)
+{
+ u16 host_total_vfs;
+ const u32 *out;
+
+ if (mlx5_core_is_ecpf_esw_manager(dev)) {
+ out = mlx5_esw_query_functions(dev);
+
+ /* Old FW doesn't support getting total_vfs from esw func
+ * but supports getting it from pci_sriov.
+ */
+ if (IS_ERR(out))
+ goto done;
+ host_total_vfs = MLX5_GET(query_esw_functions_out, out,
+ host_params_context.host_total_vfs);
+ kvfree(out);
+ if (host_total_vfs)
+ return host_total_vfs;
+ }
+
+done:
+ return pci_sriov_get_totalvfs(dev->pdev);
+}
+
int mlx5_sriov_init(struct mlx5_core_dev *dev)
{
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
@@ -218,6 +229,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev)
return 0;
total_vfs = pci_sriov_get_totalvfs(pdev);
+ sriov->max_vfs = mlx5_get_max_vfs(dev);
sriov->num_vfs = pci_num_vf(pdev);
sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL);
if (!sriov->vfs_ctx)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 95cdc8cbcba4..c912d82ca64b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -34,6 +34,7 @@
#include <linux/etherdevice.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/vport.h>
+#include <linux/mlx5/eswitch.h>
#include "mlx5_core.h"
/* Mutex to hold while enabling or disabling RoCE */
@@ -155,11 +156,12 @@ int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
}
int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
- u16 vport, u8 *addr)
+ u16 vport, bool other, u8 *addr)
{
- u32 *out;
int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+ u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {};
u8 *out_addr;
+ u32 *out;
int err;
out = kvzalloc(outlen, GFP_KERNEL);
@@ -169,7 +171,12 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out,
nic_vport_context.permanent_address);
- err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
+ MLX5_SET(query_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+ MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+ MLX5_SET(query_nic_vport_context_in, in, other_vport, other);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
if (!err)
ether_addr_copy(addr, &out_addr[2]);
@@ -178,6 +185,12 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address);
+int mlx5_query_mac_address(struct mlx5_core_dev *mdev, u8 *addr)
+{
+ return mlx5_query_nic_vport_mac_address(mdev, 0, false, addr);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_mac_address);
+
int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
u16 vport, u8 *addr)
{
@@ -194,9 +207,7 @@ int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
MLX5_SET(modify_nic_vport_context_in, in,
field_select.permanent_address, 1);
MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
-
- if (vport)
- MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
in, nic_vport_context);
@@ -291,9 +302,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type);
MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
-
- if (vport)
- MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
+ MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
if (err)
@@ -483,7 +492,7 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
MLX5_SET(modify_nic_vport_context_in, in,
field_select.node_guid, 1);
MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
- MLX5_SET(modify_nic_vport_context_in, in, other_vport, !!vport);
+ MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in,
in, nic_vport_context);
@@ -1157,3 +1166,17 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev)
return tmp;
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid);
+
+/**
+ * mlx5_eswitch_get_total_vports - Get total vports of the eswitch
+ *
+ * @dev: Pointer to core device
+ *
+ * mlx5_eswitch_get_total_vports returns total number of vports for
+ * the eswitch.
+ */
+u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
+{
+ return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev);
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_total_vports);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index 1f87cce421e0..f1ec58c9e9e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -134,11 +134,6 @@ static inline void mlx5_wq_cyc_update_db_record(struct mlx5_wq_cyc *wq)
*wq->db = cpu_to_be32(wq->wqe_ctr);
}
-static inline u16 mlx5_wq_cyc_get_ctr_wrap_cnt(struct mlx5_wq_cyc *wq, u16 ctr)
-{
- return ctr >> wq->fbc.log_sz;
-}
-
static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
{
return ctr & wq->fbc.sz_m1;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index b5d64aed259e..06c80343d9ed 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -84,6 +84,7 @@ config MLXSW_SPECTRUM
select OBJAGG
select MLXFW
imply PTP_1588_CLOCK
+ select NET_PTP_CLASSIFY if PTP_1588_CLOCK
default m
---help---
This driver supports Mellanox Technologies Spectrum Ethernet
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 30e0526a9cf6..17ceac7505e5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1245,6 +1245,15 @@ int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
}
EXPORT_SYMBOL(mlxsw_core_skb_transmit);
+void mlxsw_core_ptp_transmitted(struct mlxsw_core *mlxsw_core,
+ struct sk_buff *skb, u8 local_port)
+{
+ if (mlxsw_core->driver->ptp_transmitted)
+ mlxsw_core->driver->ptp_transmitted(mlxsw_core, skb,
+ local_port);
+}
+EXPORT_SYMBOL(mlxsw_core_ptp_transmitted);
+
static bool __is_rx_listener_equal(const struct mlxsw_rx_listener *rxl_a,
const struct mlxsw_rx_listener *rxl_b)
{
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 6dbb0ede502e..8efcff4b59cb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -48,6 +48,8 @@ bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core,
const struct mlxsw_tx_info *tx_info);
int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
const struct mlxsw_tx_info *tx_info);
+void mlxsw_core_ptp_transmitted(struct mlxsw_core *mlxsw_core,
+ struct sk_buff *skb, u8 local_port);
struct mlxsw_rx_listener {
void (*func)(struct sk_buff *skb, u8 local_port, void *priv);
@@ -296,6 +298,13 @@ struct mlxsw_driver {
u64 *p_linear_size);
int (*params_register)(struct mlxsw_core *mlxsw_core);
void (*params_unregister)(struct mlxsw_core *mlxsw_core);
+
+ /* Notify a driver that a timestamped packet was transmitted. Driver
+ * is responsible for freeing the passed-in SKB.
+ */
+ void (*ptp_transmitted)(struct mlxsw_core *mlxsw_core,
+ struct sk_buff *skb, u8 local_port);
+
u8 txhdr_len;
const struct mlxsw_config_profile *profile;
bool res_query_enabled;
@@ -418,4 +427,14 @@ enum mlxsw_devlink_param_id {
MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL,
};
+struct mlxsw_skb_cb {
+ struct mlxsw_tx_info tx_info;
+};
+
+static inline struct mlxsw_skb_cb *mlxsw_skb_cb(struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(mlxsw_skb_cb) > sizeof(skb->cb));
+ return (struct mlxsw_skb_cb *) skb->cb;
+}
+
#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 6acb9bbfdf89..051b19388a81 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -508,17 +508,28 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci,
{
struct pci_dev *pdev = mlxsw_pci->pdev;
struct mlxsw_pci_queue_elem_info *elem_info;
+ struct mlxsw_tx_info tx_info;
char *wqe;
struct sk_buff *skb;
int i;
spin_lock(&q->lock);
elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
+ tx_info = mlxsw_skb_cb(elem_info->u.sdq.skb)->tx_info;
skb = elem_info->u.sdq.skb;
wqe = elem_info->elem;
for (i = 0; i < MLXSW_PCI_WQE_SG_ENTRIES; i++)
mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, i, DMA_TO_DEVICE);
- dev_kfree_skb_any(skb);
+
+ if (unlikely(!tx_info.is_emad &&
+ skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+ mlxsw_core_ptp_transmitted(mlxsw_pci->core, skb,
+ tx_info.local_port);
+ skb = NULL;
+ }
+
+ if (skb)
+ dev_kfree_skb_any(skb);
elem_info->u.sdq.skb = NULL;
if (q->consumer_counter++ != consumer_counter_limit)
@@ -1548,6 +1559,7 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb,
err = -EAGAIN;
goto unlock;
}
+ mlxsw_skb_cb(skb)->tx_info = *tx_info;
elem_info->u.sdq.skb = skb;
wqe = elem_info->elem;
@@ -1571,6 +1583,9 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb,
goto unmap_frags;
}
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+
/* Set unused sq entries byte count to zero. */
for (i++; i < MLXSW_PCI_WQE_SG_ENTRIES; i++)
mlxsw_pci_wqe_byte_count_set(wqe, i, 0);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index e5f6bfd8a35a..ead36702549a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3515,6 +3515,18 @@ MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8);
*/
MLXSW_ITEM32(reg, qeec, mise, 0x0C, 31, 1);
+/* reg_qeec_ptps
+ * PTP shaper
+ * 0: regular shaper mode
+ * 1: PTP oriented shaper
+ * Allowed only for hierarchy 0
+ * Not supported for CPU port
+ * Note that ptps mode may affect the shaper rates of all hierarchies
+ * Supported only on Spectrum-1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, ptps, 0x0C, 29, 1);
+
enum {
MLXSW_REG_QEEC_BYTES_MODE,
MLXSW_REG_QEEC_PACKETS_MODE,
@@ -3601,6 +3613,16 @@ static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port,
mlxsw_reg_qeec_next_element_index_set(payload, next_index);
}
+static inline void mlxsw_reg_qeec_ptps_pack(char *payload, u8 local_port,
+ bool ptps)
+{
+ MLXSW_REG_ZERO(qeec, payload);
+ mlxsw_reg_qeec_local_port_set(payload, local_port);
+ mlxsw_reg_qeec_element_hierarchy_set(payload,
+ MLXSW_REG_QEEC_HIERARCY_PORT);
+ mlxsw_reg_qeec_ptps_set(payload, ptps);
+}
+
/* QRWE - QoS ReWrite Enable
* -------------------------
* This register configures the rewrite enable per receive port.
@@ -3814,6 +3836,112 @@ mlxsw_reg_qtctm_pack(char *payload, u8 local_port, bool mc)
mlxsw_reg_qtctm_mc_set(payload, mc);
}
+/* QPSC - QoS PTP Shaper Configuration Register
+ * --------------------------------------------
+ * The QPSC allows advanced configuration of the shapers when QEEC.ptps=1.
+ * Supported only on Spectrum-1.
+ */
+#define MLXSW_REG_QPSC_ID 0x401B
+#define MLXSW_REG_QPSC_LEN 0x28
+
+MLXSW_REG_DEFINE(qpsc, MLXSW_REG_QPSC_ID, MLXSW_REG_QPSC_LEN);
+
+enum mlxsw_reg_qpsc_port_speed {
+ MLXSW_REG_QPSC_PORT_SPEED_100M,
+ MLXSW_REG_QPSC_PORT_SPEED_1G,
+ MLXSW_REG_QPSC_PORT_SPEED_10G,
+ MLXSW_REG_QPSC_PORT_SPEED_25G,
+};
+
+/* reg_qpsc_port_speed
+ * Port speed.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qpsc, port_speed, 0x00, 0, 4);
+
+/* reg_qpsc_shaper_time_exp
+ * The base-time-interval for updating the shapers tokens (for all hierarchies).
+ * shaper_update_rate = 2 ^ shaper_time_exp * (1 + shaper_time_mantissa) * 32nSec
+ * shaper_rate = 64bit * shaper_inc / shaper_update_rate
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, shaper_time_exp, 0x04, 16, 4);
+
+/* reg_qpsc_shaper_time_mantissa
+ * The base-time-interval for updating the shapers tokens (for all hierarchies).
+ * shaper_update_rate = 2 ^ shaper_time_exp * (1 + shaper_time_mantissa) * 32nSec
+ * shaper_rate = 64bit * shaper_inc / shaper_update_rate
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, shaper_time_mantissa, 0x04, 0, 5);
+
+/* reg_qpsc_shaper_inc
+ * Number of tokens added to shaper on each update.
+ * Units of 8B.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, shaper_inc, 0x08, 0, 5);
+
+/* reg_qpsc_shaper_bs
+ * Max shaper Burst size.
+ * Burst size is 2 ^ max_shaper_bs * 512 [bits]
+ * Range is: 5..25 (from 2KB..2GB)
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, shaper_bs, 0x0C, 0, 6);
+
+/* reg_qpsc_ptsc_we
+ * Write enable to port_to_shaper_credits.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, qpsc, ptsc_we, 0x10, 31, 1);
+
+/* reg_qpsc_port_to_shaper_credits
+ * For split ports: range 1..57
+ * For non-split ports: range 1..112
+ * Written only when ptsc_we is set.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, port_to_shaper_credits, 0x10, 0, 8);
+
+/* reg_qpsc_ing_timestamp_inc
+ * Ingress timestamp increment.
+ * 2's complement.
+ * The timestamp of MTPPTR at ingress will be incremented by this value. Global
+ * value for all ports.
+ * Same units as used by MTPPTR.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, ing_timestamp_inc, 0x20, 0, 32);
+
+/* reg_qpsc_egr_timestamp_inc
+ * Egress timestamp increment.
+ * 2's complement.
+ * The timestamp of MTPPTR at egress will be incremented by this value. Global
+ * value for all ports.
+ * Same units as used by MTPPTR.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpsc, egr_timestamp_inc, 0x24, 0, 32);
+
+static inline void
+mlxsw_reg_qpsc_pack(char *payload, enum mlxsw_reg_qpsc_port_speed port_speed,
+ u8 shaper_time_exp, u8 shaper_time_mantissa, u8 shaper_inc,
+ u8 shaper_bs, u8 port_to_shaper_credits,
+ int ing_timestamp_inc, int egr_timestamp_inc)
+{
+ MLXSW_REG_ZERO(qpsc, payload);
+ mlxsw_reg_qpsc_port_speed_set(payload, port_speed);
+ mlxsw_reg_qpsc_shaper_time_exp_set(payload, shaper_time_exp);
+ mlxsw_reg_qpsc_shaper_time_mantissa_set(payload, shaper_time_mantissa);
+ mlxsw_reg_qpsc_shaper_inc_set(payload, shaper_inc);
+ mlxsw_reg_qpsc_shaper_bs_set(payload, shaper_bs);
+ mlxsw_reg_qpsc_ptsc_we_set(payload, true);
+ mlxsw_reg_qpsc_port_to_shaper_credits_set(payload, port_to_shaper_credits);
+ mlxsw_reg_qpsc_ing_timestamp_inc_set(payload, ing_timestamp_inc);
+ mlxsw_reg_qpsc_egr_timestamp_inc_set(payload, egr_timestamp_inc);
+}
+
/* PMLP - Ports Module to Local Port Register
* ------------------------------------------
* Configures the assignment of modules to local ports.
@@ -5292,6 +5420,8 @@ enum mlxsw_reg_htgt_trap_group {
MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD,
MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND,
MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1,
};
/* reg_htgt_trap_group
@@ -9148,6 +9278,216 @@ static inline void mlxsw_reg_mprs_pack(char *payload, u16 parsing_depth,
mlxsw_reg_mprs_vxlan_udp_dport_set(payload, vxlan_udp_dport);
}
+/* MOGCR - Monitoring Global Configuration Register
+ * ------------------------------------------------
+ */
+#define MLXSW_REG_MOGCR_ID 0x9086
+#define MLXSW_REG_MOGCR_LEN 0x20
+
+MLXSW_REG_DEFINE(mogcr, MLXSW_REG_MOGCR_ID, MLXSW_REG_MOGCR_LEN);
+
+/* reg_mogcr_ptp_iftc
+ * PTP Ingress FIFO Trap Clear
+ * The PTP_ING_FIFO trap provides MTPPTR with clr according
+ * to this value. Default 0.
+ * Reserved when IB switches and when SwitchX/-2, Spectrum-2
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mogcr, ptp_iftc, 0x00, 1, 1);
+
+/* reg_mogcr_ptp_eftc
+ * PTP Egress FIFO Trap Clear
+ * The PTP_EGR_FIFO trap provides MTPPTR with clr according
+ * to this value. Default 0.
+ * Reserved when IB switches and when SwitchX/-2, Spectrum-2
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mogcr, ptp_eftc, 0x00, 0, 1);
+
+/* MTPPPC - Time Precision Packet Port Configuration
+ * -------------------------------------------------
+ * This register serves for configuration of which PTP messages should be
+ * timestamped. This is a global configuration, despite the register name.
+ *
+ * Reserved when Spectrum-2.
+ */
+#define MLXSW_REG_MTPPPC_ID 0x9090
+#define MLXSW_REG_MTPPPC_LEN 0x28
+
+MLXSW_REG_DEFINE(mtpppc, MLXSW_REG_MTPPPC_ID, MLXSW_REG_MTPPPC_LEN);
+
+/* reg_mtpppc_ing_timestamp_message_type
+ * Bitwise vector of PTP message types to timestamp at ingress.
+ * MessageType field as defined by IEEE 1588
+ * Each bit corresponds to a value (e.g. Bit0: Sync, Bit1: Delay_Req)
+ * Default all 0
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtpppc, ing_timestamp_message_type, 0x08, 0, 16);
+
+/* reg_mtpppc_egr_timestamp_message_type
+ * Bitwise vector of PTP message types to timestamp at egress.
+ * MessageType field as defined by IEEE 1588
+ * Each bit corresponds to a value (e.g. Bit0: Sync, Bit1: Delay_Req)
+ * Default all 0
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtpppc, egr_timestamp_message_type, 0x0C, 0, 16);
+
+static inline void mlxsw_reg_mtpppc_pack(char *payload, u16 ing, u16 egr)
+{
+ MLXSW_REG_ZERO(mtpppc, payload);
+ mlxsw_reg_mtpppc_ing_timestamp_message_type_set(payload, ing);
+ mlxsw_reg_mtpppc_egr_timestamp_message_type_set(payload, egr);
+}
+
+/* MTPPTR - Time Precision Packet Timestamping Reading
+ * ---------------------------------------------------
+ * The MTPPTR is used for reading the per port PTP timestamp FIFO.
+ * There is a trap for packets which are latched to the timestamp FIFO, thus the
+ * SW knows which FIFO to read. Note that packets enter the FIFO before been
+ * trapped. The sequence number is used to synchronize the timestamp FIFO
+ * entries and the trapped packets.
+ * Reserved when Spectrum-2.
+ */
+
+#define MLXSW_REG_MTPPTR_ID 0x9091
+#define MLXSW_REG_MTPPTR_BASE_LEN 0x10 /* base length, without records */
+#define MLXSW_REG_MTPPTR_REC_LEN 0x10 /* record length */
+#define MLXSW_REG_MTPPTR_REC_MAX_COUNT 4
+#define MLXSW_REG_MTPPTR_LEN (MLXSW_REG_MTPPTR_BASE_LEN + \
+ MLXSW_REG_MTPPTR_REC_LEN * MLXSW_REG_MTPPTR_REC_MAX_COUNT)
+
+MLXSW_REG_DEFINE(mtpptr, MLXSW_REG_MTPPTR_ID, MLXSW_REG_MTPPTR_LEN);
+
+/* reg_mtpptr_local_port
+ * Not supported for CPU port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtpptr, local_port, 0x00, 16, 8);
+
+enum mlxsw_reg_mtpptr_dir {
+ MLXSW_REG_MTPPTR_DIR_INGRESS,
+ MLXSW_REG_MTPPTR_DIR_EGRESS,
+};
+
+/* reg_mtpptr_dir
+ * Direction.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtpptr, dir, 0x00, 0, 1);
+
+/* reg_mtpptr_clr
+ * Clear the records.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, mtpptr, clr, 0x04, 31, 1);
+
+/* reg_mtpptr_num_rec
+ * Number of valid records in the response
+ * Range 0.. cap_ptp_timestamp_fifo
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mtpptr, num_rec, 0x08, 0, 4);
+
+/* reg_mtpptr_rec_message_type
+ * MessageType field as defined by IEEE 1588 Each bit corresponds to a value
+ * (e.g. Bit0: Sync, Bit1: Delay_Req)
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_message_type,
+ MLXSW_REG_MTPPTR_BASE_LEN, 8, 4,
+ MLXSW_REG_MTPPTR_REC_LEN, 0, false);
+
+/* reg_mtpptr_rec_domain_number
+ * DomainNumber field as defined by IEEE 1588
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_domain_number,
+ MLXSW_REG_MTPPTR_BASE_LEN, 0, 8,
+ MLXSW_REG_MTPPTR_REC_LEN, 0, false);
+
+/* reg_mtpptr_rec_sequence_id
+ * SequenceId field as defined by IEEE 1588
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_sequence_id,
+ MLXSW_REG_MTPPTR_BASE_LEN, 0, 16,
+ MLXSW_REG_MTPPTR_REC_LEN, 0x4, false);
+
+/* reg_mtpptr_rec_timestamp_high
+ * Timestamp of when the PTP packet has passed through the port Units of PLL
+ * clock time.
+ * For Spectrum-1 the PLL clock is 156.25Mhz and PLL clock time is 6.4nSec.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_timestamp_high,
+ MLXSW_REG_MTPPTR_BASE_LEN, 0, 32,
+ MLXSW_REG_MTPPTR_REC_LEN, 0x8, false);
+
+/* reg_mtpptr_rec_timestamp_low
+ * See rec_timestamp_high.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_timestamp_low,
+ MLXSW_REG_MTPPTR_BASE_LEN, 0, 32,
+ MLXSW_REG_MTPPTR_REC_LEN, 0xC, false);
+
+static inline void mlxsw_reg_mtpptr_unpack(const char *payload,
+ unsigned int rec,
+ u8 *p_message_type,
+ u8 *p_domain_number,
+ u16 *p_sequence_id,
+ u64 *p_timestamp)
+{
+ u32 timestamp_high, timestamp_low;
+
+ *p_message_type = mlxsw_reg_mtpptr_rec_message_type_get(payload, rec);
+ *p_domain_number = mlxsw_reg_mtpptr_rec_domain_number_get(payload, rec);
+ *p_sequence_id = mlxsw_reg_mtpptr_rec_sequence_id_get(payload, rec);
+ timestamp_high = mlxsw_reg_mtpptr_rec_timestamp_high_get(payload, rec);
+ timestamp_low = mlxsw_reg_mtpptr_rec_timestamp_low_get(payload, rec);
+ *p_timestamp = (u64)timestamp_high << 32 | timestamp_low;
+}
+
+/* MTPTPT - Monitoring Precision Time Protocol Trap Register
+ * ---------------------------------------------------------
+ * This register is used for configuring under which trap to deliver PTP
+ * packets depending on type of the packet.
+ */
+#define MLXSW_REG_MTPTPT_ID 0x9092
+#define MLXSW_REG_MTPTPT_LEN 0x08
+
+MLXSW_REG_DEFINE(mtptpt, MLXSW_REG_MTPTPT_ID, MLXSW_REG_MTPTPT_LEN);
+
+enum mlxsw_reg_mtptpt_trap_id {
+ MLXSW_REG_MTPTPT_TRAP_ID_PTP0,
+ MLXSW_REG_MTPTPT_TRAP_ID_PTP1,
+};
+
+/* reg_mtptpt_trap_id
+ * Trap id.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtptpt, trap_id, 0x00, 0, 4);
+
+/* reg_mtptpt_message_type
+ * Bitwise vector of PTP message types to trap. This is a necessary but
+ * non-sufficient condition since need to enable also per port. See MTPPPC.
+ * Message types are defined by IEEE 1588 Each bit corresponds to a value (e.g.
+ * Bit0: Sync, Bit1: Delay_Req)
+ */
+MLXSW_ITEM32(reg, mtptpt, message_type, 0x04, 0, 16);
+
+static inline void mlxsw_reg_mtptptp_pack(char *payload,
+ enum mlxsw_reg_mtptpt_trap_id trap_id,
+ u16 message_type)
+{
+ MLXSW_REG_ZERO(mtptpt, payload);
+ mlxsw_reg_mtptpt_trap_id_set(payload, trap_id);
+ mlxsw_reg_mtptpt_message_type_set(payload, message_type);
+}
+
/* MGPIR - Management General Peripheral Information Register
* ----------------------------------------------------------
* MGPIR register allows software to query the hardware and
@@ -10162,6 +10502,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(qpdsm),
MLXSW_REG(qpdpm),
MLXSW_REG(qtctm),
+ MLXSW_REG(qpsc),
MLXSW_REG(pmlp),
MLXSW_REG(pmtu),
MLXSW_REG(ptys),
@@ -10216,6 +10557,10 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(mcda),
MLXSW_REG(mgpc),
MLXSW_REG(mprs),
+ MLXSW_REG(mogcr),
+ MLXSW_REG(mtpppc),
+ MLXSW_REG(mtpptr),
+ MLXSW_REG(mtptpt),
MLXSW_REG(mgpir),
MLXSW_REG(tngcr),
MLXSW_REG(tnumt),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 3e8593824b33..ce285fbeebd3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -147,6 +147,35 @@ struct mlxsw_sp_mlxfw_dev {
struct mlxsw_sp *mlxsw_sp;
};
+struct mlxsw_sp_ptp_ops {
+ struct mlxsw_sp_ptp_clock *
+ (*clock_init)(struct mlxsw_sp *mlxsw_sp, struct device *dev);
+ void (*clock_fini)(struct mlxsw_sp_ptp_clock *clock);
+
+ struct mlxsw_sp_ptp_state *(*init)(struct mlxsw_sp *mlxsw_sp);
+ void (*fini)(struct mlxsw_sp_ptp_state *ptp_state);
+
+ /* Notify a driver that a packet that might be PTP was received. Driver
+ * is responsible for freeing the passed-in SKB.
+ */
+ void (*receive)(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+ u8 local_port);
+
+ /* Notify a driver that a timestamped packet was transmitted. Driver
+ * is responsible for freeing the passed-in SKB.
+ */
+ void (*transmitted)(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+ u8 local_port);
+
+ int (*hwtstamp_get)(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct hwtstamp_config *config);
+ int (*hwtstamp_set)(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct hwtstamp_config *config);
+ void (*shaper_work)(struct work_struct *work);
+ int (*get_ts_info)(struct mlxsw_sp *mlxsw_sp,
+ struct ethtool_ts_info *info);
+};
+
static int mlxsw_sp_component_query(struct mlxfw_dev *mlxfw_dev,
u16 component_index, u32 *p_max_size,
u8 *p_align_bits, u16 *p_max_write_size)
@@ -778,6 +807,8 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb,
u64 len;
int err;
+ memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb));
+
if (mlxsw_core_skb_transmit_busy(mlxsw_sp->core, &tx_info))
return NETDEV_TX_BUSY;
@@ -1785,6 +1816,65 @@ mlxsw_sp_port_get_devlink_port(struct net_device *dev)
mlxsw_sp_port->local_port);
}
+static int mlxsw_sp_port_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct ifreq *ifr)
+{
+ struct hwtstamp_config config;
+ int err;
+
+ if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+ return -EFAULT;
+
+ err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port,
+ &config);
+ if (err)
+ return err;
+
+ if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int mlxsw_sp_port_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct ifreq *ifr)
+{
+ struct hwtstamp_config config;
+ int err;
+
+ err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_get(mlxsw_sp_port,
+ &config);
+ if (err)
+ return err;
+
+ if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static inline void mlxsw_sp_port_ptp_clear(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ struct hwtstamp_config config = {0};
+
+ mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, &config);
+}
+
+static int
+mlxsw_sp_port_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
+ switch (cmd) {
+ case SIOCSHWTSTAMP:
+ return mlxsw_sp_port_hwtstamp_set(mlxsw_sp_port, ifr);
+ case SIOCGHWTSTAMP:
+ return mlxsw_sp_port_hwtstamp_get(mlxsw_sp_port, ifr);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
.ndo_open = mlxsw_sp_port_open,
.ndo_stop = mlxsw_sp_port_stop,
@@ -1800,6 +1890,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
.ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid,
.ndo_set_features = mlxsw_sp_set_features,
.ndo_get_devlink_port = mlxsw_sp_port_get_devlink_port,
+ .ndo_do_ioctl = mlxsw_sp_port_ioctl,
};
static void mlxsw_sp_port_get_drvinfo(struct net_device *dev,
@@ -2565,28 +2656,33 @@ mlxsw_sp1_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto,
}
}
+static u32
+mlxsw_sp1_from_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto)
+{
+ int i;
+
+ for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) {
+ if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask)
+ return mlxsw_sp1_port_link_mode[i].speed;
+ }
+
+ return SPEED_UNKNOWN;
+}
+
static void
mlxsw_sp1_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok,
u32 ptys_eth_proto,
struct ethtool_link_ksettings *cmd)
{
- u32 speed = SPEED_UNKNOWN;
- u8 duplex = DUPLEX_UNKNOWN;
- int i;
+ cmd->base.speed = SPEED_UNKNOWN;
+ cmd->base.duplex = DUPLEX_UNKNOWN;
if (!carrier_ok)
- goto out;
+ return;
- for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) {
- if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask) {
- speed = mlxsw_sp1_port_link_mode[i].speed;
- duplex = DUPLEX_FULL;
- break;
- }
- }
-out:
- cmd->base.speed = speed;
- cmd->base.duplex = duplex;
+ cmd->base.speed = mlxsw_sp1_from_ptys_speed(mlxsw_sp, ptys_eth_proto);
+ if (cmd->base.speed != SPEED_UNKNOWN)
+ cmd->base.duplex = DUPLEX_FULL;
}
static u32
@@ -2657,6 +2753,7 @@ static const struct mlxsw_sp_port_type_speed_ops
mlxsw_sp1_port_type_speed_ops = {
.from_ptys_supported_port = mlxsw_sp1_from_ptys_supported_port,
.from_ptys_link = mlxsw_sp1_from_ptys_link,
+ .from_ptys_speed = mlxsw_sp1_from_ptys_speed,
.from_ptys_speed_duplex = mlxsw_sp1_from_ptys_speed_duplex,
.to_ptys_advert_link = mlxsw_sp1_to_ptys_advert_link,
.to_ptys_speed = mlxsw_sp1_to_ptys_speed,
@@ -2907,28 +3004,33 @@ mlxsw_sp2_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto,
}
}
+static u32
+mlxsw_sp2_from_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto)
+{
+ int i;
+
+ for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) {
+ if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask)
+ return mlxsw_sp2_port_link_mode[i].speed;
+ }
+
+ return SPEED_UNKNOWN;
+}
+
static void
mlxsw_sp2_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok,
u32 ptys_eth_proto,
struct ethtool_link_ksettings *cmd)
{
- u32 speed = SPEED_UNKNOWN;
- u8 duplex = DUPLEX_UNKNOWN;
- int i;
+ cmd->base.speed = SPEED_UNKNOWN;
+ cmd->base.duplex = DUPLEX_UNKNOWN;
if (!carrier_ok)
- goto out;
+ return;
- for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) {
- if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) {
- speed = mlxsw_sp2_port_link_mode[i].speed;
- duplex = DUPLEX_FULL;
- break;
- }
- }
-out:
- cmd->base.speed = speed;
- cmd->base.duplex = duplex;
+ cmd->base.speed = mlxsw_sp2_from_ptys_speed(mlxsw_sp, ptys_eth_proto);
+ if (cmd->base.speed != SPEED_UNKNOWN)
+ cmd->base.duplex = DUPLEX_FULL;
}
static bool
@@ -3039,6 +3141,7 @@ static const struct mlxsw_sp_port_type_speed_ops
mlxsw_sp2_port_type_speed_ops = {
.from_ptys_supported_port = mlxsw_sp2_from_ptys_supported_port,
.from_ptys_link = mlxsw_sp2_from_ptys_link,
+ .from_ptys_speed = mlxsw_sp2_from_ptys_speed,
.from_ptys_speed_duplex = mlxsw_sp2_from_ptys_speed_duplex,
.to_ptys_advert_link = mlxsw_sp2_to_ptys_advert_link,
.to_ptys_speed = mlxsw_sp2_to_ptys_speed,
@@ -3228,6 +3331,15 @@ static int mlxsw_sp_get_module_eeprom(struct net_device *netdev,
return err;
}
+static int
+mlxsw_sp_get_ts_info(struct net_device *netdev, struct ethtool_ts_info *info)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+
+ return mlxsw_sp->ptp_ops->get_ts_info(mlxsw_sp, info);
+}
+
static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = {
.get_drvinfo = mlxsw_sp_port_get_drvinfo,
.get_link = ethtool_op_get_link,
@@ -3241,6 +3353,7 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = {
.set_link_ksettings = mlxsw_sp_port_set_link_ksettings,
.get_module_info = mlxsw_sp_get_module_info,
.get_module_eeprom = mlxsw_sp_get_module_eeprom,
+ .get_ts_info = mlxsw_sp_get_ts_info,
};
static int
@@ -3357,8 +3470,9 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
return err;
}
- /* Make sure the max shaper is disabled in all hierarchies that
- * support it.
+ /* Make sure the max shaper is disabled in all hierarchies that support
+ * it. Note that this disables ptps (PTP shaper), but that is intended
+ * for the initial configuration.
*/
err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HIERARCY_PORT, 0, 0,
@@ -3603,6 +3717,9 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
}
mlxsw_sp_port->default_vlan = mlxsw_sp_port_vlan;
+ INIT_DELAYED_WORK(&mlxsw_sp_port->ptp.shaper_dw,
+ mlxsw_sp->ptp_ops->shaper_work);
+
mlxsw_sp->ports[local_port] = mlxsw_sp_port;
err = register_netdev(dev);
if (err) {
@@ -3657,6 +3774,8 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
cancel_delayed_work_sync(&mlxsw_sp_port->periodic_hw_stats.update_dw);
+ cancel_delayed_work_sync(&mlxsw_sp_port->ptp.shaper_dw);
+ mlxsw_sp_port_ptp_clear(mlxsw_sp_port);
mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp);
unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
mlxsw_sp->ports[local_port] = NULL;
@@ -3941,14 +4060,55 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg,
if (status == MLXSW_PORT_OPER_STATUS_UP) {
netdev_info(mlxsw_sp_port->dev, "link up\n");
netif_carrier_on(mlxsw_sp_port->dev);
+ mlxsw_core_schedule_dw(&mlxsw_sp_port->ptp.shaper_dw, 0);
} else {
netdev_info(mlxsw_sp_port->dev, "link down\n");
netif_carrier_off(mlxsw_sp_port->dev);
}
}
-static void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
- u8 local_port, void *priv)
+static void mlxsw_sp1_ptp_fifo_event_func(struct mlxsw_sp *mlxsw_sp,
+ char *mtpptr_pl, bool ingress)
+{
+ u8 local_port;
+ u8 num_rec;
+ int i;
+
+ local_port = mlxsw_reg_mtpptr_local_port_get(mtpptr_pl);
+ num_rec = mlxsw_reg_mtpptr_num_rec_get(mtpptr_pl);
+ for (i = 0; i < num_rec; i++) {
+ u8 domain_number;
+ u8 message_type;
+ u16 sequence_id;
+ u64 timestamp;
+
+ mlxsw_reg_mtpptr_unpack(mtpptr_pl, i, &message_type,
+ &domain_number, &sequence_id,
+ &timestamp);
+ mlxsw_sp1_ptp_got_timestamp(mlxsw_sp, ingress, local_port,
+ message_type, domain_number,
+ sequence_id, timestamp);
+ }
+}
+
+static void mlxsw_sp1_ptp_ing_fifo_event_func(const struct mlxsw_reg_info *reg,
+ char *mtpptr_pl, void *priv)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+
+ mlxsw_sp1_ptp_fifo_event_func(mlxsw_sp, mtpptr_pl, true);
+}
+
+static void mlxsw_sp1_ptp_egr_fifo_event_func(const struct mlxsw_reg_info *reg,
+ char *mtpptr_pl, void *priv)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+
+ mlxsw_sp1_ptp_fifo_event_func(mlxsw_sp, mtpptr_pl, false);
+}
+
+void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
+ u8 local_port, void *priv)
{
struct mlxsw_sp *mlxsw_sp = priv;
struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
@@ -4022,6 +4182,14 @@ out:
consume_skb(skb);
}
+static void mlxsw_sp_rx_listener_ptp(struct sk_buff *skb, u8 local_port,
+ void *priv)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+
+ mlxsw_sp->ptp_ops->receive(mlxsw_sp, skb, local_port);
+}
+
#define MLXSW_SP_RXL_NO_MARK(_trap_id, _action, _trap_group, _is_ctrl) \
MLXSW_RXL(mlxsw_sp_rx_listener_no_mark_func, _trap_id, _action, \
_is_ctrl, SP_##_trap_group, DISCARD)
@@ -4043,7 +4211,8 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
/* L2 traps */
MLXSW_SP_RXL_NO_MARK(STP, TRAP_TO_CPU, STP, true),
MLXSW_SP_RXL_NO_MARK(LACP, TRAP_TO_CPU, LACP, true),
- MLXSW_SP_RXL_NO_MARK(LLDP, TRAP_TO_CPU, LLDP, true),
+ MLXSW_RXL(mlxsw_sp_rx_listener_ptp, LLDP, TRAP_TO_CPU,
+ false, SP_LLDP, DISCARD),
MLXSW_SP_RXL_MARK(DHCP, MIRROR_TO_CPU, DHCP, false),
MLXSW_SP_RXL_MARK(IGMP_QUERY, MIRROR_TO_CPU, IGMP, false),
MLXSW_SP_RXL_NO_MARK(IGMP_V1_REPORT, TRAP_TO_CPU, IGMP, false),
@@ -4112,6 +4281,16 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
/* NVE traps */
MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, ARP, false),
MLXSW_SP_RXL_NO_MARK(NVE_DECAP_ARP, TRAP_TO_CPU, ARP, false),
+ /* PTP traps */
+ MLXSW_RXL(mlxsw_sp_rx_listener_ptp, PTP0, TRAP_TO_CPU,
+ false, SP_PTP0, DISCARD),
+ MLXSW_SP_RXL_NO_MARK(PTP1, TRAP_TO_CPU, PTP1, false),
+};
+
+static const struct mlxsw_listener mlxsw_sp1_listener[] = {
+ /* Events */
+ MLXSW_EVENTL(mlxsw_sp1_ptp_egr_fifo_event_func, PTP_EGR_FIFO, SP_PTP0),
+ MLXSW_EVENTL(mlxsw_sp1_ptp_ing_fifo_event_func, PTP_ING_FIFO, SP_PTP0),
};
static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
@@ -4163,6 +4342,14 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
rate = 1024;
burst_size = 7;
break;
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0:
+ rate = 24 * 1024;
+ burst_size = 12;
+ break;
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1:
+ rate = 19 * 1024;
+ burst_size = 12;
+ break;
default:
continue;
}
@@ -4201,6 +4388,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP:
case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0:
priority = 5;
tc = 5;
break;
@@ -4218,6 +4406,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1:
priority = 2;
tc = 2;
break;
@@ -4251,22 +4440,16 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
return 0;
}
-static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
+static int mlxsw_sp_traps_register(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_listener listeners[],
+ size_t listeners_count)
{
int i;
int err;
- err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core);
- if (err)
- return err;
-
- err = mlxsw_sp_trap_groups_set(mlxsw_sp->core);
- if (err)
- return err;
-
- for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) {
+ for (i = 0; i < listeners_count; i++) {
err = mlxsw_core_trap_register(mlxsw_sp->core,
- &mlxsw_sp_listener[i],
+ &listeners[i],
mlxsw_sp);
if (err)
goto err_listener_register;
@@ -4277,23 +4460,63 @@ static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
err_listener_register:
for (i--; i >= 0; i--) {
mlxsw_core_trap_unregister(mlxsw_sp->core,
- &mlxsw_sp_listener[i],
+ &listeners[i],
mlxsw_sp);
}
return err;
}
-static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
+static void mlxsw_sp_traps_unregister(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_listener listeners[],
+ size_t listeners_count)
{
int i;
- for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) {
+ for (i = 0; i < listeners_count; i++) {
mlxsw_core_trap_unregister(mlxsw_sp->core,
- &mlxsw_sp_listener[i],
+ &listeners[i],
mlxsw_sp);
}
}
+static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
+{
+ int err;
+
+ err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_trap_groups_set(mlxsw_sp->core);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp_listener,
+ ARRAY_SIZE(mlxsw_sp_listener));
+ if (err)
+ return err;
+
+ err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp->listeners,
+ mlxsw_sp->listeners_count);
+ if (err)
+ goto err_extra_traps_init;
+
+ return 0;
+
+err_extra_traps_init:
+ mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener,
+ ARRAY_SIZE(mlxsw_sp_listener));
+ return err;
+}
+
+static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp->listeners,
+ mlxsw_sp->listeners_count);
+ mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener,
+ ARRAY_SIZE(mlxsw_sp_listener));
+}
+
#define MLXSW_SP_LAG_SEED_INIT 0xcafecafe
static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp)
@@ -4346,20 +4569,30 @@ static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
}
-struct mlxsw_sp_ptp_ops {
- struct mlxsw_sp_ptp_clock *
- (*clock_init)(struct mlxsw_sp *mlxsw_sp, struct device *dev);
- void (*clock_fini)(struct mlxsw_sp_ptp_clock *clock);
-};
-
static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = {
.clock_init = mlxsw_sp1_ptp_clock_init,
.clock_fini = mlxsw_sp1_ptp_clock_fini,
+ .init = mlxsw_sp1_ptp_init,
+ .fini = mlxsw_sp1_ptp_fini,
+ .receive = mlxsw_sp1_ptp_receive,
+ .transmitted = mlxsw_sp1_ptp_transmitted,
+ .hwtstamp_get = mlxsw_sp1_ptp_hwtstamp_get,
+ .hwtstamp_set = mlxsw_sp1_ptp_hwtstamp_set,
+ .shaper_work = mlxsw_sp1_ptp_shaper_work,
+ .get_ts_info = mlxsw_sp1_ptp_get_ts_info,
};
static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = {
.clock_init = mlxsw_sp2_ptp_clock_init,
.clock_fini = mlxsw_sp2_ptp_clock_fini,
+ .init = mlxsw_sp2_ptp_init,
+ .fini = mlxsw_sp2_ptp_fini,
+ .receive = mlxsw_sp2_ptp_receive,
+ .transmitted = mlxsw_sp2_ptp_transmitted,
+ .hwtstamp_get = mlxsw_sp2_ptp_hwtstamp_get,
+ .hwtstamp_set = mlxsw_sp2_ptp_hwtstamp_set,
+ .shaper_work = mlxsw_sp2_ptp_shaper_work,
+ .get_ts_info = mlxsw_sp2_ptp_get_ts_info,
};
static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
@@ -4471,6 +4704,16 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
}
}
+ if (mlxsw_sp->clock) {
+ /* NULL is a valid return value from ptp_ops->init */
+ mlxsw_sp->ptp_state = mlxsw_sp->ptp_ops->init(mlxsw_sp);
+ if (IS_ERR(mlxsw_sp->ptp_state)) {
+ err = PTR_ERR(mlxsw_sp->ptp_state);
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize PTP\n");
+ goto err_ptp_init;
+ }
+ }
+
/* Initialize netdevice notifier after router and SPAN is initialized,
* so that the event handler can use router structures and call SPAN
* respin.
@@ -4502,6 +4745,9 @@ err_dpipe_init:
unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
err_netdev_notifier:
if (mlxsw_sp->clock)
+ mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state);
+err_ptp_init:
+ if (mlxsw_sp->clock)
mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock);
err_ptp_clock_init:
mlxsw_sp_router_fini(mlxsw_sp);
@@ -4548,6 +4794,8 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->sb_vals = &mlxsw_sp1_sb_vals;
mlxsw_sp->port_type_speed_ops = &mlxsw_sp1_port_type_speed_ops;
mlxsw_sp->ptp_ops = &mlxsw_sp1_ptp_ops;
+ mlxsw_sp->listeners = mlxsw_sp1_listener;
+ mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener);
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
}
@@ -4579,8 +4827,10 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
mlxsw_sp_ports_remove(mlxsw_sp);
mlxsw_sp_dpipe_fini(mlxsw_sp);
unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
- if (mlxsw_sp->clock)
+ if (mlxsw_sp->clock) {
+ mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state);
mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock);
+ }
mlxsw_sp_router_fini(mlxsw_sp);
mlxsw_sp_acl_fini(mlxsw_sp);
mlxsw_sp_nve_fini(mlxsw_sp);
@@ -4923,6 +5173,15 @@ static void mlxsw_sp2_params_unregister(struct mlxsw_core *mlxsw_core)
mlxsw_sp_params_unregister(mlxsw_core);
}
+static void mlxsw_sp_ptp_transmitted(struct mlxsw_core *mlxsw_core,
+ struct sk_buff *skb, u8 local_port)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+
+ skb_pull(skb, MLXSW_TXHDR_LEN);
+ mlxsw_sp->ptp_ops->transmitted(mlxsw_sp, skb, local_port);
+}
+
static struct mlxsw_driver mlxsw_sp1_driver = {
.kind = mlxsw_sp1_driver_name,
.priv_size = sizeof(struct mlxsw_sp),
@@ -4947,6 +5206,7 @@ static struct mlxsw_driver mlxsw_sp1_driver = {
.kvd_sizes_get = mlxsw_sp_kvd_sizes_get,
.params_register = mlxsw_sp_params_register,
.params_unregister = mlxsw_sp_params_unregister,
+ .ptp_transmitted = mlxsw_sp_ptp_transmitted,
.txhdr_len = MLXSW_TXHDR_LEN,
.profile = &mlxsw_sp1_config_profile,
.res_query_enabled = true,
@@ -4975,6 +5235,7 @@ static struct mlxsw_driver mlxsw_sp2_driver = {
.resources_register = mlxsw_sp2_resources_register,
.params_register = mlxsw_sp2_params_register,
.params_unregister = mlxsw_sp2_params_unregister,
+ .ptp_transmitted = mlxsw_sp_ptp_transmitted,
.txhdr_len = MLXSW_TXHDR_LEN,
.profile = &mlxsw_sp2_config_profile,
.res_query_enabled = true,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 84f4276193b3..abbb563db440 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -136,6 +136,7 @@ struct mlxsw_sp_acl_tcam_ops;
struct mlxsw_sp_nve_ops;
struct mlxsw_sp_sb_vals;
struct mlxsw_sp_port_type_speed_ops;
+struct mlxsw_sp_ptp_state;
struct mlxsw_sp_ptp_ops;
struct mlxsw_sp {
@@ -157,6 +158,7 @@ struct mlxsw_sp {
struct mlxsw_sp_nve *nve;
struct notifier_block netdevice_nb;
struct mlxsw_sp_ptp_clock *clock;
+ struct mlxsw_sp_ptp_state *ptp_state;
struct mlxsw_sp_counter_pool *counter_pool;
struct {
@@ -175,6 +177,8 @@ struct mlxsw_sp {
const struct mlxsw_sp_sb_vals *sb_vals;
const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops;
const struct mlxsw_sp_ptp_ops *ptp_ops;
+ const struct mlxsw_listener *listeners;
+ size_t listeners_count;
};
static inline struct mlxsw_sp_upper *
@@ -262,6 +266,12 @@ struct mlxsw_sp_port {
unsigned acl_rule_count;
struct mlxsw_sp_acl_block *ing_acl_block;
struct mlxsw_sp_acl_block *eg_acl_block;
+ struct {
+ struct delayed_work shaper_dw;
+ struct hwtstamp_config hwtstamp_config;
+ u16 ing_types;
+ u16 egr_types;
+ } ptp;
};
struct mlxsw_sp_port_type_speed_ops {
@@ -270,6 +280,7 @@ struct mlxsw_sp_port_type_speed_ops {
struct ethtool_link_ksettings *cmd);
void (*from_ptys_link)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto,
unsigned long *mode);
+ u32 (*from_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto);
void (*from_ptys_speed_duplex)(struct mlxsw_sp *mlxsw_sp,
bool carrier_ok, u32 ptys_eth_proto,
struct ethtool_link_ksettings *cmd);
@@ -438,6 +449,8 @@ struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp,
extern struct notifier_block mlxsw_sp_switchdev_notifier;
/* spectrum.c */
+void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
+ u8 local_port, void *priv);
int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index,
bool dwrr, u8 dwrr_weight);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
index bb6c0cb25771..bd9c2bc2d5d6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
@@ -6,7 +6,13 @@
#include <linux/timecounter.h>
#include <linux/spinlock.h>
#include <linux/device.h>
+#include <linux/rhashtable.h>
+#include <linux/ptp_classify.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/net_tstamp.h>
+#include "spectrum.h"
#include "spectrum_ptp.h"
#include "core.h"
@@ -14,6 +20,44 @@
#define MLXSW_SP1_PTP_CLOCK_FREQ_KHZ 156257 /* 6.4nSec */
#define MLXSW_SP1_PTP_CLOCK_MASK 64
+#define MLXSW_SP1_PTP_HT_GC_INTERVAL 500 /* ms */
+
+/* How long, approximately, should the unmatched entries stay in the hash table
+ * before they are collected. Should be evenly divisible by the GC interval.
+ */
+#define MLXSW_SP1_PTP_HT_GC_TIMEOUT 1000 /* ms */
+
+struct mlxsw_sp_ptp_state {
+ struct mlxsw_sp *mlxsw_sp;
+ struct rhashtable unmatched_ht;
+ spinlock_t unmatched_lock; /* protects the HT */
+ struct delayed_work ht_gc_dw;
+ u32 gc_cycle;
+};
+
+struct mlxsw_sp1_ptp_key {
+ u8 local_port;
+ u8 message_type;
+ u16 sequence_id;
+ u8 domain_number;
+ bool ingress;
+};
+
+struct mlxsw_sp1_ptp_unmatched {
+ struct mlxsw_sp1_ptp_key key;
+ struct rhash_head ht_node;
+ struct rcu_head rcu;
+ struct sk_buff *skb;
+ u64 timestamp;
+ u32 gc_cycle;
+};
+
+static const struct rhashtable_params mlxsw_sp1_ptp_unmatched_ht_params = {
+ .key_len = sizeof_field(struct mlxsw_sp1_ptp_unmatched, key),
+ .key_offset = offsetof(struct mlxsw_sp1_ptp_unmatched, key),
+ .head_offset = offsetof(struct mlxsw_sp1_ptp_unmatched, ht_node),
+};
+
struct mlxsw_sp_ptp_clock {
struct mlxsw_core *core;
spinlock_t lock; /* protect this structure */
@@ -89,9 +133,9 @@ mlxsw_sp1_ptp_phc_settime(struct mlxsw_sp_ptp_clock *clock, u64 nsec)
next_sec = div_u64(nsec, NSEC_PER_SEC) + 1;
next_sec_in_nsec = next_sec * NSEC_PER_SEC;
- spin_lock(&clock->lock);
+ spin_lock_bh(&clock->lock);
cycles = mlxsw_sp1_ptp_ns2cycles(&clock->tc, next_sec_in_nsec);
- spin_unlock(&clock->lock);
+ spin_unlock_bh(&clock->lock);
mlxsw_reg_mtpps_vpin_pack(mtpps_pl, cycles);
err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtpps), mtpps_pl);
@@ -124,11 +168,11 @@ static int mlxsw_sp1_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
adj *= ppb;
diff = div_u64(adj, NSEC_PER_SEC);
- spin_lock(&clock->lock);
+ spin_lock_bh(&clock->lock);
timecounter_read(&clock->tc);
clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
clock->nominal_c_mult + diff;
- spin_unlock(&clock->lock);
+ spin_unlock_bh(&clock->lock);
return mlxsw_sp1_ptp_phc_adjfreq(clock, neg_adj ? -ppb : ppb);
}
@@ -139,10 +183,10 @@ static int mlxsw_sp1_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info);
u64 nsec;
- spin_lock(&clock->lock);
+ spin_lock_bh(&clock->lock);
timecounter_adjtime(&clock->tc, delta);
nsec = timecounter_read(&clock->tc);
- spin_unlock(&clock->lock);
+ spin_unlock_bh(&clock->lock);
return mlxsw_sp1_ptp_phc_settime(clock, nsec);
}
@@ -155,10 +199,10 @@ static int mlxsw_sp1_ptp_gettimex(struct ptp_clock_info *ptp,
container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info);
u64 cycles, nsec;
- spin_lock(&clock->lock);
+ spin_lock_bh(&clock->lock);
cycles = __mlxsw_sp1_ptp_read_frc(clock, sts);
nsec = timecounter_cyc2time(&clock->tc, cycles);
- spin_unlock(&clock->lock);
+ spin_unlock_bh(&clock->lock);
*ts = ns_to_timespec64(nsec);
@@ -172,10 +216,10 @@ static int mlxsw_sp1_ptp_settime(struct ptp_clock_info *ptp,
container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info);
u64 nsec = timespec64_to_ns(ts);
- spin_lock(&clock->lock);
+ spin_lock_bh(&clock->lock);
timecounter_init(&clock->tc, &clock->cycles, nsec);
nsec = timecounter_read(&clock->tc);
- spin_unlock(&clock->lock);
+ spin_unlock_bh(&clock->lock);
return mlxsw_sp1_ptp_phc_settime(clock, nsec);
}
@@ -197,9 +241,9 @@ static void mlxsw_sp1_ptp_clock_overflow(struct work_struct *work)
clock = container_of(dwork, struct mlxsw_sp_ptp_clock, overflow_work);
- spin_lock(&clock->lock);
+ spin_lock_bh(&clock->lock);
timecounter_read(&clock->tc);
- spin_unlock(&clock->lock);
+ spin_unlock_bh(&clock->lock);
mlxsw_core_schedule_dw(&clock->overflow_work, clock->overflow_period);
}
@@ -264,3 +308,804 @@ void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock)
cancel_delayed_work_sync(&clock->overflow_work);
kfree(clock);
}
+
+static int mlxsw_sp_ptp_parse(struct sk_buff *skb,
+ u8 *p_domain_number,
+ u8 *p_message_type,
+ u16 *p_sequence_id)
+{
+ unsigned int offset = 0;
+ unsigned int ptp_class;
+ u8 *data;
+
+ data = skb_mac_header(skb);
+ ptp_class = ptp_classify_raw(skb);
+
+ switch (ptp_class & PTP_CLASS_VMASK) {
+ case PTP_CLASS_V1:
+ case PTP_CLASS_V2:
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ if (ptp_class & PTP_CLASS_VLAN)
+ offset += VLAN_HLEN;
+
+ switch (ptp_class & PTP_CLASS_PMASK) {
+ case PTP_CLASS_IPV4:
+ offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
+ break;
+ case PTP_CLASS_IPV6:
+ offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
+ break;
+ case PTP_CLASS_L2:
+ offset += ETH_HLEN;
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ /* PTP header is 34 bytes. */
+ if (skb->len < offset + 34)
+ return -EINVAL;
+
+ *p_message_type = data[offset] & 0x0f;
+ *p_domain_number = data[offset + 4];
+ *p_sequence_id = (u16)(data[offset + 30]) << 8 | data[offset + 31];
+ return 0;
+}
+
+/* Returns NULL on successful insertion, a pointer on conflict, or an ERR_PTR on
+ * error.
+ */
+static struct mlxsw_sp1_ptp_unmatched *
+mlxsw_sp1_ptp_unmatched_save(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp1_ptp_key key,
+ struct sk_buff *skb,
+ u64 timestamp)
+{
+ int cycles = MLXSW_SP1_PTP_HT_GC_TIMEOUT / MLXSW_SP1_PTP_HT_GC_INTERVAL;
+ struct mlxsw_sp_ptp_state *ptp_state = mlxsw_sp->ptp_state;
+ struct mlxsw_sp1_ptp_unmatched *unmatched;
+ struct mlxsw_sp1_ptp_unmatched *conflict;
+
+ unmatched = kzalloc(sizeof(*unmatched), GFP_ATOMIC);
+ if (!unmatched)
+ return ERR_PTR(-ENOMEM);
+
+ unmatched->key = key;
+ unmatched->skb = skb;
+ unmatched->timestamp = timestamp;
+ unmatched->gc_cycle = mlxsw_sp->ptp_state->gc_cycle + cycles;
+
+ conflict = rhashtable_lookup_get_insert_fast(&ptp_state->unmatched_ht,
+ &unmatched->ht_node,
+ mlxsw_sp1_ptp_unmatched_ht_params);
+ if (conflict)
+ kfree(unmatched);
+
+ return conflict;
+}
+
+static struct mlxsw_sp1_ptp_unmatched *
+mlxsw_sp1_ptp_unmatched_lookup(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp1_ptp_key key)
+{
+ return rhashtable_lookup(&mlxsw_sp->ptp_state->unmatched_ht, &key,
+ mlxsw_sp1_ptp_unmatched_ht_params);
+}
+
+static int
+mlxsw_sp1_ptp_unmatched_remove(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp1_ptp_unmatched *unmatched)
+{
+ return rhashtable_remove_fast(&mlxsw_sp->ptp_state->unmatched_ht,
+ &unmatched->ht_node,
+ mlxsw_sp1_ptp_unmatched_ht_params);
+}
+
+/* This function is called in the following scenarios:
+ *
+ * 1) When a packet is matched with its timestamp.
+ * 2) In several situation when it is necessary to immediately pass on
+ * an SKB without a timestamp.
+ * 3) From GC indirectly through mlxsw_sp1_ptp_unmatched_finish().
+ * This case is similar to 2) above.
+ */
+static void mlxsw_sp1_ptp_packet_finish(struct mlxsw_sp *mlxsw_sp,
+ struct sk_buff *skb, u8 local_port,
+ bool ingress,
+ struct skb_shared_hwtstamps *hwtstamps)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port;
+
+ /* Between capturing the packet and finishing it, there is a window of
+ * opportunity for the originating port to go away (e.g. due to a
+ * split). Also make sure the SKB device reference is still valid.
+ */
+ mlxsw_sp_port = mlxsw_sp->ports[local_port];
+ if (!(mlxsw_sp_port && (!skb->dev || skb->dev == mlxsw_sp_port->dev))) {
+ dev_kfree_skb_any(skb);
+ return;
+ }
+
+ if (ingress) {
+ if (hwtstamps)
+ *skb_hwtstamps(skb) = *hwtstamps;
+ mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp);
+ } else {
+ /* skb_tstamp_tx() allows hwtstamps to be NULL. */
+ skb_tstamp_tx(skb, hwtstamps);
+ dev_kfree_skb_any(skb);
+ }
+}
+
+static void mlxsw_sp1_packet_timestamp(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp1_ptp_key key,
+ struct sk_buff *skb,
+ u64 timestamp)
+{
+ struct skb_shared_hwtstamps hwtstamps;
+ u64 nsec;
+
+ spin_lock_bh(&mlxsw_sp->clock->lock);
+ nsec = timecounter_cyc2time(&mlxsw_sp->clock->tc, timestamp);
+ spin_unlock_bh(&mlxsw_sp->clock->lock);
+
+ hwtstamps.hwtstamp = ns_to_ktime(nsec);
+ mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb,
+ key.local_port, key.ingress, &hwtstamps);
+}
+
+static void
+mlxsw_sp1_ptp_unmatched_finish(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp1_ptp_unmatched *unmatched)
+{
+ if (unmatched->skb && unmatched->timestamp)
+ mlxsw_sp1_packet_timestamp(mlxsw_sp, unmatched->key,
+ unmatched->skb,
+ unmatched->timestamp);
+ else if (unmatched->skb)
+ mlxsw_sp1_ptp_packet_finish(mlxsw_sp, unmatched->skb,
+ unmatched->key.local_port,
+ unmatched->key.ingress, NULL);
+ kfree_rcu(unmatched, rcu);
+}
+
+static void mlxsw_sp1_ptp_unmatched_free_fn(void *ptr, void *arg)
+{
+ struct mlxsw_sp1_ptp_unmatched *unmatched = ptr;
+
+ /* This is invoked at a point where the ports are gone already. Nothing
+ * to do with whatever is left in the HT but to free it.
+ */
+ if (unmatched->skb)
+ dev_kfree_skb_any(unmatched->skb);
+ kfree_rcu(unmatched, rcu);
+}
+
+static void mlxsw_sp1_ptp_got_piece(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp1_ptp_key key,
+ struct sk_buff *skb, u64 timestamp)
+{
+ struct mlxsw_sp1_ptp_unmatched *unmatched, *conflict;
+ int err;
+
+ rcu_read_lock();
+
+ unmatched = mlxsw_sp1_ptp_unmatched_lookup(mlxsw_sp, key);
+
+ spin_lock(&mlxsw_sp->ptp_state->unmatched_lock);
+
+ if (unmatched) {
+ /* There was an unmatched entry when we looked, but it may have
+ * been removed before we took the lock.
+ */
+ err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp, unmatched);
+ if (err)
+ unmatched = NULL;
+ }
+
+ if (!unmatched) {
+ /* We have no unmatched entry, but one may have been added after
+ * we looked, but before we took the lock.
+ */
+ unmatched = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key,
+ skb, timestamp);
+ if (IS_ERR(unmatched)) {
+ if (skb)
+ mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb,
+ key.local_port,
+ key.ingress, NULL);
+ unmatched = NULL;
+ } else if (unmatched) {
+ /* Save just told us, under lock, that the entry is
+ * there, so this has to work.
+ */
+ err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp,
+ unmatched);
+ WARN_ON_ONCE(err);
+ }
+ }
+
+ /* If unmatched is non-NULL here, it comes either from the lookup, or
+ * from the save attempt above. In either case the entry was removed
+ * from the hash table. If unmatched is NULL, a new unmatched entry was
+ * added to the hash table, and there was no conflict.
+ */
+
+ if (skb && unmatched && unmatched->timestamp) {
+ unmatched->skb = skb;
+ } else if (timestamp && unmatched && unmatched->skb) {
+ unmatched->timestamp = timestamp;
+ } else if (unmatched) {
+ /* unmatched holds an older entry of the same type: either an
+ * skb if we are handling skb, or a timestamp if we are handling
+ * timestamp. We can't match that up, so save what we have.
+ */
+ conflict = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key,
+ skb, timestamp);
+ if (IS_ERR(conflict)) {
+ if (skb)
+ mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb,
+ key.local_port,
+ key.ingress, NULL);
+ } else {
+ /* Above, we removed an object with this key from the
+ * hash table, under lock, so conflict can not be a
+ * valid pointer.
+ */
+ WARN_ON_ONCE(conflict);
+ }
+ }
+
+ spin_unlock(&mlxsw_sp->ptp_state->unmatched_lock);
+
+ if (unmatched)
+ mlxsw_sp1_ptp_unmatched_finish(mlxsw_sp, unmatched);
+
+ rcu_read_unlock();
+}
+
+static void mlxsw_sp1_ptp_got_packet(struct mlxsw_sp *mlxsw_sp,
+ struct sk_buff *skb, u8 local_port,
+ bool ingress)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ struct mlxsw_sp1_ptp_key key;
+ u8 types;
+ int err;
+
+ mlxsw_sp_port = mlxsw_sp->ports[local_port];
+ if (!mlxsw_sp_port)
+ goto immediate;
+
+ types = ingress ? mlxsw_sp_port->ptp.ing_types :
+ mlxsw_sp_port->ptp.egr_types;
+ if (!types)
+ goto immediate;
+
+ memset(&key, 0, sizeof(key));
+ key.local_port = local_port;
+ key.ingress = ingress;
+
+ err = mlxsw_sp_ptp_parse(skb, &key.domain_number, &key.message_type,
+ &key.sequence_id);
+ if (err)
+ goto immediate;
+
+ /* For packets whose timestamping was not enabled on this port, don't
+ * bother trying to match the timestamp.
+ */
+ if (!((1 << key.message_type) & types))
+ goto immediate;
+
+ mlxsw_sp1_ptp_got_piece(mlxsw_sp, key, skb, 0);
+ return;
+
+immediate:
+ mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, local_port, ingress, NULL);
+}
+
+void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress,
+ u8 local_port, u8 message_type,
+ u8 domain_number, u16 sequence_id,
+ u64 timestamp)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ struct mlxsw_sp1_ptp_key key;
+ u8 types;
+
+ mlxsw_sp_port = mlxsw_sp->ports[local_port];
+ if (!mlxsw_sp_port)
+ return;
+
+ types = ingress ? mlxsw_sp_port->ptp.ing_types :
+ mlxsw_sp_port->ptp.egr_types;
+
+ /* For message types whose timestamping was not enabled on this port,
+ * don't bother with the timestamp.
+ */
+ if (!((1 << message_type) & types))
+ return;
+
+ memset(&key, 0, sizeof(key));
+ key.local_port = local_port;
+ key.domain_number = domain_number;
+ key.message_type = message_type;
+ key.sequence_id = sequence_id;
+ key.ingress = ingress;
+
+ mlxsw_sp1_ptp_got_piece(mlxsw_sp, key, NULL, timestamp);
+}
+
+void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+ u8 local_port)
+{
+ skb_reset_mac_header(skb);
+ mlxsw_sp1_ptp_got_packet(mlxsw_sp, skb, local_port, true);
+}
+
+void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp,
+ struct sk_buff *skb, u8 local_port)
+{
+ mlxsw_sp1_ptp_got_packet(mlxsw_sp, skb, local_port, false);
+}
+
+static void
+mlxsw_sp1_ptp_ht_gc_collect(struct mlxsw_sp_ptp_state *ptp_state,
+ struct mlxsw_sp1_ptp_unmatched *unmatched)
+{
+ int err;
+
+ /* If an unmatched entry has an SKB, it has to be handed over to the
+ * networking stack. This is usually done from a trap handler, which is
+ * invoked in a softirq context. Here we are going to do it in process
+ * context. If that were to be interrupted by a softirq, it could cause
+ * a deadlock when an attempt is made to take an already-taken lock
+ * somewhere along the sending path. Disable softirqs to prevent this.
+ */
+ local_bh_disable();
+
+ spin_lock(&ptp_state->unmatched_lock);
+ err = rhashtable_remove_fast(&ptp_state->unmatched_ht,
+ &unmatched->ht_node,
+ mlxsw_sp1_ptp_unmatched_ht_params);
+ spin_unlock(&ptp_state->unmatched_lock);
+
+ if (err)
+ /* The packet was matched with timestamp during the walk. */
+ goto out;
+
+ /* mlxsw_sp1_ptp_unmatched_finish() invokes netif_receive_skb(). While
+ * the comment at that function states that it can only be called in
+ * soft IRQ context, this pattern of local_bh_disable() +
+ * netif_receive_skb(), in process context, is seen elsewhere in the
+ * kernel, notably in pktgen.
+ */
+ mlxsw_sp1_ptp_unmatched_finish(ptp_state->mlxsw_sp, unmatched);
+
+out:
+ local_bh_enable();
+}
+
+static void mlxsw_sp1_ptp_ht_gc(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct mlxsw_sp1_ptp_unmatched *unmatched;
+ struct mlxsw_sp_ptp_state *ptp_state;
+ struct rhashtable_iter iter;
+ u32 gc_cycle;
+ void *obj;
+
+ ptp_state = container_of(dwork, struct mlxsw_sp_ptp_state, ht_gc_dw);
+ gc_cycle = ptp_state->gc_cycle++;
+
+ rhashtable_walk_enter(&ptp_state->unmatched_ht, &iter);
+ rhashtable_walk_start(&iter);
+ while ((obj = rhashtable_walk_next(&iter))) {
+ if (IS_ERR(obj))
+ continue;
+
+ unmatched = obj;
+ if (unmatched->gc_cycle <= gc_cycle)
+ mlxsw_sp1_ptp_ht_gc_collect(ptp_state, unmatched);
+ }
+ rhashtable_walk_stop(&iter);
+ rhashtable_walk_exit(&iter);
+
+ mlxsw_core_schedule_dw(&ptp_state->ht_gc_dw,
+ MLXSW_SP1_PTP_HT_GC_INTERVAL);
+}
+
+static int mlxsw_sp_ptp_mtptpt_set(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_reg_mtptpt_trap_id trap_id,
+ u16 message_type)
+{
+ char mtptpt_pl[MLXSW_REG_MTPTPT_LEN];
+
+ mlxsw_reg_mtptptp_pack(mtptpt_pl, trap_id, message_type);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mtptpt), mtptpt_pl);
+}
+
+static int mlxsw_sp1_ptp_set_fifo_clr_on_trap(struct mlxsw_sp *mlxsw_sp,
+ bool clr)
+{
+ char mogcr_pl[MLXSW_REG_MOGCR_LEN] = {0};
+ int err;
+
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mogcr), mogcr_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_mogcr_ptp_iftc_set(mogcr_pl, clr);
+ mlxsw_reg_mogcr_ptp_eftc_set(mogcr_pl, clr);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mogcr), mogcr_pl);
+}
+
+static int mlxsw_sp1_ptp_mtpppc_set(struct mlxsw_sp *mlxsw_sp,
+ u16 ing_types, u16 egr_types)
+{
+ char mtpppc_pl[MLXSW_REG_MTPPPC_LEN];
+
+ mlxsw_reg_mtpppc_pack(mtpppc_pl, ing_types, egr_types);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mtpppc), mtpppc_pl);
+}
+
+struct mlxsw_sp1_ptp_shaper_params {
+ u32 ethtool_speed;
+ enum mlxsw_reg_qpsc_port_speed port_speed;
+ u8 shaper_time_exp;
+ u8 shaper_time_mantissa;
+ u8 shaper_inc;
+ u8 shaper_bs;
+ u8 port_to_shaper_credits;
+ int ing_timestamp_inc;
+ int egr_timestamp_inc;
+};
+
+static const struct mlxsw_sp1_ptp_shaper_params
+mlxsw_sp1_ptp_shaper_params[] = {
+ {
+ .ethtool_speed = SPEED_100,
+ .port_speed = MLXSW_REG_QPSC_PORT_SPEED_100M,
+ .shaper_time_exp = 4,
+ .shaper_time_mantissa = 12,
+ .shaper_inc = 9,
+ .shaper_bs = 1,
+ .port_to_shaper_credits = 1,
+ .ing_timestamp_inc = -313,
+ .egr_timestamp_inc = 313,
+ },
+ {
+ .ethtool_speed = SPEED_1000,
+ .port_speed = MLXSW_REG_QPSC_PORT_SPEED_1G,
+ .shaper_time_exp = 0,
+ .shaper_time_mantissa = 12,
+ .shaper_inc = 6,
+ .shaper_bs = 0,
+ .port_to_shaper_credits = 1,
+ .ing_timestamp_inc = -35,
+ .egr_timestamp_inc = 35,
+ },
+ {
+ .ethtool_speed = SPEED_10000,
+ .port_speed = MLXSW_REG_QPSC_PORT_SPEED_10G,
+ .shaper_time_exp = 0,
+ .shaper_time_mantissa = 2,
+ .shaper_inc = 14,
+ .shaper_bs = 1,
+ .port_to_shaper_credits = 1,
+ .ing_timestamp_inc = -11,
+ .egr_timestamp_inc = 11,
+ },
+ {
+ .ethtool_speed = SPEED_25000,
+ .port_speed = MLXSW_REG_QPSC_PORT_SPEED_25G,
+ .shaper_time_exp = 0,
+ .shaper_time_mantissa = 0,
+ .shaper_inc = 11,
+ .shaper_bs = 1,
+ .port_to_shaper_credits = 1,
+ .ing_timestamp_inc = -14,
+ .egr_timestamp_inc = 14,
+ },
+};
+
+#define MLXSW_SP1_PTP_SHAPER_PARAMS_LEN ARRAY_SIZE(mlxsw_sp1_ptp_shaper_params)
+
+static int mlxsw_sp1_ptp_shaper_params_set(struct mlxsw_sp *mlxsw_sp)
+{
+ const struct mlxsw_sp1_ptp_shaper_params *params;
+ char qpsc_pl[MLXSW_REG_QPSC_LEN];
+ int i, err;
+
+ for (i = 0; i < MLXSW_SP1_PTP_SHAPER_PARAMS_LEN; i++) {
+ params = &mlxsw_sp1_ptp_shaper_params[i];
+ mlxsw_reg_qpsc_pack(qpsc_pl, params->port_speed,
+ params->shaper_time_exp,
+ params->shaper_time_mantissa,
+ params->shaper_inc, params->shaper_bs,
+ params->port_to_shaper_credits,
+ params->ing_timestamp_inc,
+ params->egr_timestamp_inc);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpsc), qpsc_pl);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_ptp_state *ptp_state;
+ u16 message_type;
+ int err;
+
+ err = mlxsw_sp1_ptp_shaper_params_set(mlxsw_sp);
+ if (err)
+ return ERR_PTR(err);
+
+ ptp_state = kzalloc(sizeof(*ptp_state), GFP_KERNEL);
+ if (!ptp_state)
+ return ERR_PTR(-ENOMEM);
+ ptp_state->mlxsw_sp = mlxsw_sp;
+
+ spin_lock_init(&ptp_state->unmatched_lock);
+
+ err = rhashtable_init(&ptp_state->unmatched_ht,
+ &mlxsw_sp1_ptp_unmatched_ht_params);
+ if (err)
+ goto err_hashtable_init;
+
+ /* Delive these message types as PTP0. */
+ message_type = BIT(MLXSW_SP_PTP_MESSAGE_TYPE_SYNC) |
+ BIT(MLXSW_SP_PTP_MESSAGE_TYPE_DELAY_REQ) |
+ BIT(MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_REQ) |
+ BIT(MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_RESP);
+ err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0,
+ message_type);
+ if (err)
+ goto err_mtptpt_set;
+
+ /* Everything else is PTP1. */
+ message_type = ~message_type;
+ err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1,
+ message_type);
+ if (err)
+ goto err_mtptpt1_set;
+
+ err = mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, true);
+ if (err)
+ goto err_fifo_clr;
+
+ INIT_DELAYED_WORK(&ptp_state->ht_gc_dw, mlxsw_sp1_ptp_ht_gc);
+ mlxsw_core_schedule_dw(&ptp_state->ht_gc_dw,
+ MLXSW_SP1_PTP_HT_GC_INTERVAL);
+ return ptp_state;
+
+err_fifo_clr:
+ mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0);
+err_mtptpt1_set:
+ mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0);
+err_mtptpt_set:
+ rhashtable_destroy(&ptp_state->unmatched_ht);
+err_hashtable_init:
+ kfree(ptp_state);
+ return ERR_PTR(err);
+}
+
+void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state)
+{
+ struct mlxsw_sp *mlxsw_sp = ptp_state->mlxsw_sp;
+
+ cancel_delayed_work_sync(&ptp_state->ht_gc_dw);
+ mlxsw_sp1_ptp_mtpppc_set(mlxsw_sp, 0, 0);
+ mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, false);
+ mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0);
+ mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0);
+ rhashtable_free_and_destroy(&ptp_state->unmatched_ht,
+ &mlxsw_sp1_ptp_unmatched_free_fn, NULL);
+ kfree(ptp_state);
+}
+
+int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct hwtstamp_config *config)
+{
+ *config = mlxsw_sp_port->ptp.hwtstamp_config;
+ return 0;
+}
+
+static int mlxsw_sp_ptp_get_message_types(const struct hwtstamp_config *config,
+ u16 *p_ing_types, u16 *p_egr_types,
+ enum hwtstamp_rx_filters *p_rx_filter)
+{
+ enum hwtstamp_rx_filters rx_filter = config->rx_filter;
+ enum hwtstamp_tx_types tx_type = config->tx_type;
+ u16 ing_types = 0x00;
+ u16 egr_types = 0x00;
+
+ switch (tx_type) {
+ case HWTSTAMP_TX_OFF:
+ egr_types = 0x00;
+ break;
+ case HWTSTAMP_TX_ON:
+ egr_types = 0xff;
+ break;
+ case HWTSTAMP_TX_ONESTEP_SYNC:
+ return -ERANGE;
+ }
+
+ switch (rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ ing_types = 0x00;
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ ing_types = 0x01;
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ ing_types = 0x02;
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ ing_types = 0x0f;
+ break;
+ case HWTSTAMP_FILTER_ALL:
+ ing_types = 0xff;
+ break;
+ case HWTSTAMP_FILTER_SOME:
+ case HWTSTAMP_FILTER_NTP_ALL:
+ return -ERANGE;
+ }
+
+ *p_ing_types = ing_types;
+ *p_egr_types = egr_types;
+ *p_rx_filter = rx_filter;
+ return 0;
+}
+
+static int mlxsw_sp1_ptp_mtpppc_update(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 ing_types, u16 egr_types)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct mlxsw_sp_port *tmp;
+ int i;
+
+ /* MTPPPC configures timestamping globally, not per port. Find the
+ * configuration that contains all configured timestamping requests.
+ */
+ for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) {
+ tmp = mlxsw_sp->ports[i];
+ if (tmp && tmp != mlxsw_sp_port) {
+ ing_types |= tmp->ptp.ing_types;
+ egr_types |= tmp->ptp.egr_types;
+ }
+ }
+
+ return mlxsw_sp1_ptp_mtpppc_set(mlxsw_sp_port->mlxsw_sp,
+ ing_types, egr_types);
+}
+
+static bool mlxsw_sp1_ptp_hwtstamp_enabled(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ return mlxsw_sp_port->ptp.ing_types || mlxsw_sp_port->ptp.egr_types;
+}
+
+static int
+mlxsw_sp1_ptp_port_shaper_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char qeec_pl[MLXSW_REG_QEEC_LEN];
+
+ mlxsw_reg_qeec_ptps_pack(qeec_pl, mlxsw_sp_port->local_port, enable);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
+}
+
+static int mlxsw_sp1_ptp_port_shaper_check(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops;
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char ptys_pl[MLXSW_REG_PTYS_LEN];
+ u32 eth_proto_oper, speed;
+ bool ptps = false;
+ int err, i;
+
+ if (!mlxsw_sp1_ptp_hwtstamp_enabled(mlxsw_sp_port))
+ return mlxsw_sp1_ptp_port_shaper_set(mlxsw_sp_port, false);
+
+ port_type_speed_ops = mlxsw_sp->port_type_speed_ops;
+ port_type_speed_ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl,
+ mlxsw_sp_port->local_port, 0,
+ false);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
+ if (err)
+ return err;
+ port_type_speed_ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, NULL, NULL,
+ &eth_proto_oper);
+
+ speed = port_type_speed_ops->from_ptys_speed(mlxsw_sp, eth_proto_oper);
+ for (i = 0; i < MLXSW_SP1_PTP_SHAPER_PARAMS_LEN; i++) {
+ if (mlxsw_sp1_ptp_shaper_params[i].ethtool_speed == speed) {
+ ptps = true;
+ break;
+ }
+ }
+
+ return mlxsw_sp1_ptp_port_shaper_set(mlxsw_sp_port, ptps);
+}
+
+void mlxsw_sp1_ptp_shaper_work(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ int err;
+
+ mlxsw_sp_port = container_of(dwork, struct mlxsw_sp_port,
+ ptp.shaper_dw);
+
+ if (!mlxsw_sp1_ptp_hwtstamp_enabled(mlxsw_sp_port))
+ return;
+
+ err = mlxsw_sp1_ptp_port_shaper_check(mlxsw_sp_port);
+ if (err)
+ netdev_err(mlxsw_sp_port->dev, "Failed to set up PTP shaper\n");
+}
+
+int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct hwtstamp_config *config)
+{
+ enum hwtstamp_rx_filters rx_filter;
+ u16 ing_types;
+ u16 egr_types;
+ int err;
+
+ err = mlxsw_sp_ptp_get_message_types(config, &ing_types, &egr_types,
+ &rx_filter);
+ if (err)
+ return err;
+
+ err = mlxsw_sp1_ptp_mtpppc_update(mlxsw_sp_port, ing_types, egr_types);
+ if (err)
+ return err;
+
+ mlxsw_sp_port->ptp.hwtstamp_config = *config;
+ mlxsw_sp_port->ptp.ing_types = ing_types;
+ mlxsw_sp_port->ptp.egr_types = egr_types;
+
+ err = mlxsw_sp1_ptp_port_shaper_check(mlxsw_sp_port);
+ if (err)
+ return err;
+
+ /* Notify the ioctl caller what we are actually timestamping. */
+ config->rx_filter = rx_filter;
+
+ return 0;
+}
+
+int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp,
+ struct ethtool_ts_info *info)
+{
+ info->phc_index = ptp_clock_index(mlxsw_sp->clock->ptp);
+
+ info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+
+ info->tx_types = BIT(HWTSTAMP_TX_OFF) |
+ BIT(HWTSTAMP_TX_ON);
+
+ info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
+ BIT(HWTSTAMP_FILTER_ALL);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h
index 76fa00a4be75..72e55f6926b9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h
@@ -5,11 +5,27 @@
#define _MLXSW_SPECTRUM_PTP_H
#include <linux/device.h>
+#include <linux/rhashtable.h>
-#include "spectrum.h"
-
+struct mlxsw_sp;
+struct mlxsw_sp_port;
struct mlxsw_sp_ptp_clock;
+enum {
+ MLXSW_SP_PTP_MESSAGE_TYPE_SYNC,
+ MLXSW_SP_PTP_MESSAGE_TYPE_DELAY_REQ,
+ MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_REQ,
+ MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_RESP,
+};
+
+static inline int mlxsw_sp_ptp_get_ts_info_noptp(struct ethtool_ts_info *info)
+{
+ info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE;
+ info->phc_index = -1;
+ return 0;
+}
+
#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK)
struct mlxsw_sp_ptp_clock *
@@ -17,6 +33,32 @@ mlxsw_sp1_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev);
void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock);
+struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp);
+
+void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state);
+
+void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+ u8 local_port);
+
+void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp,
+ struct sk_buff *skb, u8 local_port);
+
+void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress,
+ u8 local_port, u8 message_type,
+ u8 domain_number, u16 sequence_id,
+ u64 timestamp);
+
+int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct hwtstamp_config *config);
+
+int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct hwtstamp_config *config);
+
+void mlxsw_sp1_ptp_shaper_work(struct work_struct *work);
+
+int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp,
+ struct ethtool_ts_info *info);
+
#else
static inline struct mlxsw_sp_ptp_clock *
@@ -29,6 +71,60 @@ static inline void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock)
{
}
+static inline struct mlxsw_sp_ptp_state *
+mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp)
+{
+ return NULL;
+}
+
+static inline void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state)
+{
+}
+
+static inline void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp,
+ struct sk_buff *skb, u8 local_port)
+{
+ mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp);
+}
+
+static inline void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp,
+ struct sk_buff *skb, u8 local_port)
+{
+ dev_kfree_skb_any(skb);
+}
+
+static inline void
+mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress,
+ u8 local_port, u8 message_type,
+ u8 domain_number,
+ u16 sequence_id, u64 timestamp)
+{
+}
+
+static inline int
+mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct hwtstamp_config *config)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct hwtstamp_config *config)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void mlxsw_sp1_ptp_shaper_work(struct work_struct *work)
+{
+}
+
+static inline int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp,
+ struct ethtool_ts_info *info)
+{
+ return mlxsw_sp_ptp_get_ts_info_noptp(info);
+}
+
#endif
static inline struct mlxsw_sp_ptp_clock *
@@ -41,4 +137,50 @@ static inline void mlxsw_sp2_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock)
{
}
+static inline struct mlxsw_sp_ptp_state *
+mlxsw_sp2_ptp_init(struct mlxsw_sp *mlxsw_sp)
+{
+ return NULL;
+}
+
+static inline void mlxsw_sp2_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state)
+{
+}
+
+static inline void mlxsw_sp2_ptp_receive(struct mlxsw_sp *mlxsw_sp,
+ struct sk_buff *skb, u8 local_port)
+{
+ mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp);
+}
+
+static inline void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp,
+ struct sk_buff *skb, u8 local_port)
+{
+ dev_kfree_skb_any(skb);
+}
+
+static inline int
+mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct hwtstamp_config *config)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
+mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct hwtstamp_config *config)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void mlxsw_sp2_ptp_shaper_work(struct work_struct *work)
+{
+}
+
+static inline int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp,
+ struct ethtool_ts_info *info)
+{
+ return mlxsw_sp_ptp_get_ts_info_noptp(info);
+}
+
#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index fc4f19167262..bdab96f5bc70 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -299,6 +299,8 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb,
u64 len;
int err;
+ memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb));
+
if (mlxsw_core_skb_transmit_busy(mlxsw_sx->core, &tx_info))
return NETDEV_TX_BUSY;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 451216dd7f6b..19202bdb5105 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -17,6 +17,8 @@ enum {
MLXSW_TRAP_ID_MVRP = 0x15,
MLXSW_TRAP_ID_RPVST = 0x16,
MLXSW_TRAP_ID_DHCP = 0x19,
+ MLXSW_TRAP_ID_PTP0 = 0x28,
+ MLXSW_TRAP_ID_PTP1 = 0x29,
MLXSW_TRAP_ID_IGMP_QUERY = 0x30,
MLXSW_TRAP_ID_IGMP_V1_REPORT = 0x31,
MLXSW_TRAP_ID_IGMP_V2_REPORT = 0x32,
@@ -76,6 +78,10 @@ enum {
enum mlxsw_event_trap_id {
/* Port Up/Down event generated by hardware */
MLXSW_TRAP_ID_PUDE = 0x8,
+ /* PTP Ingress FIFO has a new entry */
+ MLXSW_TRAP_ID_PTP_ING_FIFO = 0x2D,
+ /* PTP Egress FIFO has a new entry */
+ MLXSW_TRAP_ID_PTP_EGR_FIFO = 0x2E,
};
#endif /* _MLXSW_TRAP_H */
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index 8bea3004d66c..b6bd31fe44b2 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -170,13 +170,36 @@ nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output,
return 0;
}
+static bool
+nfp_flower_tun_is_gre(struct tc_cls_flower_offload *flow, int start_idx)
+{
+ struct flow_action_entry *act = flow->rule->action.entries;
+ int num_act = flow->rule->action.num_entries;
+ int act_idx;
+
+ /* Preparse action list for next mirred or redirect action */
+ for (act_idx = start_idx + 1; act_idx < num_act; act_idx++)
+ if (act[act_idx].id == FLOW_ACTION_REDIRECT ||
+ act[act_idx].id == FLOW_ACTION_MIRRED)
+ return netif_is_gretap(act[act_idx].dev);
+
+ return false;
+}
+
static enum nfp_flower_tun_type
-nfp_fl_get_tun_from_act_l4_port(struct nfp_app *app,
- const struct flow_action_entry *act)
+nfp_fl_get_tun_from_act(struct nfp_app *app,
+ struct tc_cls_flower_offload *flow,
+ const struct flow_action_entry *act, int act_idx)
{
const struct ip_tunnel_info *tun = act->tunnel;
struct nfp_flower_priv *priv = app->priv;
+ /* Determine the tunnel type based on the egress netdev
+ * in the mirred action for tunnels without l4.
+ */
+ if (nfp_flower_tun_is_gre(flow, act_idx))
+ return NFP_FL_TUNNEL_GRE;
+
switch (tun->key.tp_dst) {
case htons(IANA_VXLAN_UDP_PORT):
return NFP_FL_TUNNEL_VXLAN;
@@ -281,15 +304,13 @@ nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len,
}
static int
-nfp_fl_set_ipv4_udp_tun(struct nfp_app *app,
- struct nfp_fl_set_ipv4_udp_tun *set_tun,
- const struct flow_action_entry *act,
- struct nfp_fl_pre_tunnel *pre_tun,
- enum nfp_flower_tun_type tun_type,
- struct net_device *netdev,
- struct netlink_ext_ack *extack)
+nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun,
+ const struct flow_action_entry *act,
+ struct nfp_fl_pre_tunnel *pre_tun,
+ enum nfp_flower_tun_type tun_type,
+ struct net_device *netdev, struct netlink_ext_ack *extack)
{
- size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun);
+ size_t act_size = sizeof(struct nfp_fl_set_ipv4_tun);
const struct ip_tunnel_info *ip_tun = act->tunnel;
struct nfp_flower_priv *priv = app->priv;
u32 tmp_set_ip_tun_type_index = 0;
@@ -843,9 +864,9 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
enum nfp_flower_tun_type *tun_type, int *tun_out_cnt,
int *out_cnt, u32 *csum_updated,
struct nfp_flower_pedit_acts *set_act,
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack, int act_idx)
{
- struct nfp_fl_set_ipv4_udp_tun *set_tun;
+ struct nfp_fl_set_ipv4_tun *set_tun;
struct nfp_fl_pre_tunnel *pre_tun;
struct nfp_fl_push_vlan *psh_v;
struct nfp_fl_pop_vlan *pop_v;
@@ -898,7 +919,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
case FLOW_ACTION_TUNNEL_ENCAP: {
const struct ip_tunnel_info *ip_tun = act->tunnel;
- *tun_type = nfp_fl_get_tun_from_act_l4_port(app, act);
+ *tun_type = nfp_fl_get_tun_from_act(app, flow, act, act_idx);
if (*tun_type == NFP_FL_TUNNEL_NONE) {
NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported tunnel type in action list");
return -EOPNOTSUPP;
@@ -914,7 +935,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
* If none, the packet falls back before applying other actions.
*/
if (*a_len + sizeof(struct nfp_fl_pre_tunnel) +
- sizeof(struct nfp_fl_set_ipv4_udp_tun) > NFP_FL_MAX_A_SIZ) {
+ sizeof(struct nfp_fl_set_ipv4_tun) > NFP_FL_MAX_A_SIZ) {
NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at tunnel encap");
return -EOPNOTSUPP;
}
@@ -928,11 +949,11 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
return err;
set_tun = (void *)&nfp_fl->action_data[*a_len];
- err = nfp_fl_set_ipv4_udp_tun(app, set_tun, act, pre_tun,
- *tun_type, netdev, extack);
+ err = nfp_fl_set_ipv4_tun(app, set_tun, act, pre_tun,
+ *tun_type, netdev, extack);
if (err)
return err;
- *a_len += sizeof(struct nfp_fl_set_ipv4_udp_tun);
+ *a_len += sizeof(struct nfp_fl_set_ipv4_tun);
}
break;
case FLOW_ACTION_TUNNEL_DECAP:
@@ -1024,8 +1045,8 @@ int nfp_flower_compile_action(struct nfp_app *app,
memset(&set_act, 0, sizeof(set_act));
err = nfp_flower_loop_action(app, act, flow, nfp_flow, &act_len,
netdev, &tun_type, &tun_out_cnt,
- &out_cnt, &csum_updated, &set_act,
- extack);
+ &out_cnt, &csum_updated,
+ &set_act, extack, i);
if (err)
return err;
act_cnt++;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 537f7fc19584..0f1706ae5bfc 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -8,6 +8,7 @@
#include <linux/skbuff.h>
#include <linux/types.h>
#include <net/geneve.h>
+#include <net/gre.h>
#include <net/vxlan.h>
#include "../nfp_app.h"
@@ -22,6 +23,7 @@
#define NFP_FLOWER_LAYER_CT BIT(6)
#define NFP_FLOWER_LAYER_VXLAN BIT(7)
+#define NFP_FLOWER_LAYER2_GRE BIT(0)
#define NFP_FLOWER_LAYER2_GENEVE BIT(5)
#define NFP_FLOWER_LAYER2_GENEVE_OP BIT(6)
@@ -37,6 +39,9 @@
#define NFP_FL_IP_FRAG_FIRST BIT(7)
#define NFP_FL_IP_FRAGMENTED BIT(6)
+/* GRE Tunnel flags */
+#define NFP_FL_GRE_FLAG_KEY BIT(2)
+
/* Compressed HW representation of TCP Flags */
#define NFP_FL_TCP_FLAG_URG BIT(4)
#define NFP_FL_TCP_FLAG_PSH BIT(3)
@@ -107,6 +112,7 @@
enum nfp_flower_tun_type {
NFP_FL_TUNNEL_NONE = 0,
+ NFP_FL_TUNNEL_GRE = 1,
NFP_FL_TUNNEL_VXLAN = 2,
NFP_FL_TUNNEL_GENEVE = 4,
};
@@ -203,7 +209,7 @@ struct nfp_fl_pre_tunnel {
__be32 extra[3];
};
-struct nfp_fl_set_ipv4_udp_tun {
+struct nfp_fl_set_ipv4_tun {
struct nfp_fl_act_head head;
__be16 reserved;
__be64 tun_id __packed;
@@ -354,6 +360,16 @@ struct nfp_flower_ipv6 {
struct in6_addr ipv6_dst;
};
+struct nfp_flower_tun_ipv4 {
+ __be32 src;
+ __be32 dst;
+};
+
+struct nfp_flower_tun_ip_ext {
+ u8 tos;
+ u8 ttl;
+};
+
/* Flow Frame IPv4 UDP TUNNEL --> Tunnel details (4W/16B)
* -----------------------------------------------------------------
* 3 2 1
@@ -371,15 +387,42 @@ struct nfp_flower_ipv6 {
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*/
struct nfp_flower_ipv4_udp_tun {
- __be32 ip_src;
- __be32 ip_dst;
+ struct nfp_flower_tun_ipv4 ipv4;
__be16 reserved1;
- u8 tos;
- u8 ttl;
+ struct nfp_flower_tun_ip_ext ip_ext;
__be32 reserved2;
__be32 tun_id;
};
+/* Flow Frame GRE TUNNEL --> Tunnel details (6W/24B)
+ * -----------------------------------------------------------------
+ * 3 2 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv4_addr_src |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv4_addr_dst |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | tun_flags | tos | ttl |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Reserved | Ethertype |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Key |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+struct nfp_flower_ipv4_gre_tun {
+ struct nfp_flower_tun_ipv4 ipv4;
+ __be16 tun_flags;
+ struct nfp_flower_tun_ip_ext ip_ext;
+ __be16 reserved1;
+ __be16 ethertype;
+ __be32 tun_key;
+ __be32 reserved2;
+};
+
struct nfp_flower_geneve_options {
u8 data[NFP_FL_MAX_GENEVE_OPT_KEY];
};
@@ -530,6 +573,8 @@ nfp_fl_netdev_is_tunnel_type(struct net_device *netdev,
{
if (netif_is_vxlan(netdev))
return tun_type == NFP_FL_TUNNEL_VXLAN;
+ if (netif_is_gretap(netdev))
+ return tun_type == NFP_FL_TUNNEL_GRE;
if (netif_is_geneve(netdev))
return tun_type == NFP_FL_TUNNEL_GENEVE;
@@ -546,6 +591,8 @@ static inline bool nfp_fl_is_netdev_to_offload(struct net_device *netdev)
return true;
if (netif_is_geneve(netdev))
return true;
+ if (netif_is_gretap(netdev))
+ return true;
return false;
}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index 371b5be33dc7..c1690de19172 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -281,6 +281,71 @@ nfp_flower_compile_geneve_opt(void *ext, void *msk,
}
static void
+nfp_flower_compile_tun_ipv4_addrs(struct nfp_flower_tun_ipv4 *ext,
+ struct nfp_flower_tun_ipv4 *msk,
+ struct tc_cls_flower_offload *flow)
+{
+ struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+ struct flow_match_ipv4_addrs match;
+
+ flow_rule_match_enc_ipv4_addrs(rule, &match);
+ ext->src = match.key->src;
+ ext->dst = match.key->dst;
+ msk->src = match.mask->src;
+ msk->dst = match.mask->dst;
+ }
+}
+
+static void
+nfp_flower_compile_tun_ip_ext(struct nfp_flower_tun_ip_ext *ext,
+ struct nfp_flower_tun_ip_ext *msk,
+ struct tc_cls_flower_offload *flow)
+{
+ struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
+ struct flow_match_ip match;
+
+ flow_rule_match_enc_ip(rule, &match);
+ ext->tos = match.key->tos;
+ ext->ttl = match.key->ttl;
+ msk->tos = match.mask->tos;
+ msk->ttl = match.mask->ttl;
+ }
+}
+
+static void
+nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext,
+ struct nfp_flower_ipv4_gre_tun *msk,
+ struct tc_cls_flower_offload *flow)
+{
+ struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
+
+ memset(ext, 0, sizeof(struct nfp_flower_ipv4_gre_tun));
+ memset(msk, 0, sizeof(struct nfp_flower_ipv4_gre_tun));
+
+ /* NVGRE is the only supported GRE tunnel type */
+ ext->ethertype = cpu_to_be16(ETH_P_TEB);
+ msk->ethertype = cpu_to_be16(~0);
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_match_enc_keyid match;
+
+ flow_rule_match_enc_keyid(rule, &match);
+ ext->tun_key = match.key->keyid;
+ msk->tun_key = match.mask->keyid;
+
+ ext->tun_flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY);
+ msk->tun_flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY);
+ }
+
+ nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, flow);
+ nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, flow);
+}
+
+static void
nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext,
struct nfp_flower_ipv4_udp_tun *msk,
struct tc_cls_flower_offload *flow)
@@ -301,25 +366,8 @@ nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext,
msk->tun_id = cpu_to_be32(temp_vni);
}
- if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
- struct flow_match_ipv4_addrs match;
-
- flow_rule_match_enc_ipv4_addrs(rule, &match);
- ext->ip_src = match.key->src;
- ext->ip_dst = match.key->dst;
- msk->ip_src = match.mask->src;
- msk->ip_dst = match.mask->dst;
- }
-
- if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
- struct flow_match_ip match;
-
- flow_rule_match_enc_ip(rule, &match);
- ext->tos = match.key->tos;
- ext->ttl = match.key->ttl;
- msk->tos = match.mask->tos;
- msk->ttl = match.mask->ttl;
- }
+ nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, flow);
+ nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, flow);
}
int nfp_flower_compile_flow_match(struct nfp_app *app,
@@ -406,12 +454,27 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
msk += sizeof(struct nfp_flower_ipv6);
}
+ if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GRE) {
+ __be32 tun_dst;
+
+ nfp_flower_compile_ipv4_gre_tun((void *)ext, (void *)msk, flow);
+ tun_dst = ((struct nfp_flower_ipv4_gre_tun *)ext)->ipv4.dst;
+ ext += sizeof(struct nfp_flower_ipv4_gre_tun);
+ msk += sizeof(struct nfp_flower_ipv4_gre_tun);
+
+ /* Store the tunnel destination in the rule data.
+ * This must be present and be an exact match.
+ */
+ nfp_flow->nfp_tun_ipv4_addr = tun_dst;
+ nfp_tunnel_add_ipv4_off(app, tun_dst);
+ }
+
if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN ||
key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE) {
__be32 tun_dst;
nfp_flower_compile_ipv4_udp_tun((void *)ext, (void *)msk, flow);
- tun_dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ip_dst;
+ tun_dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ipv4.dst;
ext += sizeof(struct nfp_flower_ipv4_udp_tun);
msk += sizeof(struct nfp_flower_ipv4_udp_tun);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 39e6599f2bd7..6dbe947269c3 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -52,8 +52,7 @@
#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \
(BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
- BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
- BIT(FLOW_DISSECTOR_KEY_ENC_PORTS))
+ BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS))
#define NFP_FLOWER_MERGE_FIELDS \
(NFP_FLOWER_LAYER_PORT | \
@@ -141,16 +140,16 @@ static bool nfp_flower_check_higher_than_l3(struct tc_cls_flower_offload *f)
}
static int
-nfp_flower_calc_opt_layer(struct flow_match_enc_opts *enc_opts,
+nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts,
u32 *key_layer_two, int *key_size,
struct netlink_ext_ack *extack)
{
- if (enc_opts->key->len > NFP_FL_MAX_GENEVE_OPT_KEY) {
+ if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY) {
NL_SET_ERR_MSG_MOD(extack, "unsupported offload: geneve options exceed maximum length");
return -EOPNOTSUPP;
}
- if (enc_opts->key->len > 0) {
+ if (enc_opts->len > 0) {
*key_layer_two |= NFP_FLOWER_LAYER2_GENEVE_OP;
*key_size += sizeof(struct nfp_flower_geneve_options);
}
@@ -159,6 +158,57 @@ nfp_flower_calc_opt_layer(struct flow_match_enc_opts *enc_opts,
}
static int
+nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports,
+ struct flow_dissector_key_enc_opts *enc_op,
+ u32 *key_layer_two, u8 *key_layer, int *key_size,
+ struct nfp_flower_priv *priv,
+ enum nfp_flower_tun_type *tun_type,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ switch (enc_ports->dst) {
+ case htons(IANA_VXLAN_UDP_PORT):
+ *tun_type = NFP_FL_TUNNEL_VXLAN;
+ *key_layer |= NFP_FLOWER_LAYER_VXLAN;
+ *key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+
+ if (enc_op) {
+ NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on vxlan tunnels");
+ return -EOPNOTSUPP;
+ }
+ break;
+ case htons(GENEVE_UDP_PORT):
+ if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE)) {
+ NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve offload");
+ return -EOPNOTSUPP;
+ }
+ *tun_type = NFP_FL_TUNNEL_GENEVE;
+ *key_layer |= NFP_FLOWER_LAYER_EXT_META;
+ *key_size += sizeof(struct nfp_flower_ext_meta);
+ *key_layer_two |= NFP_FLOWER_LAYER2_GENEVE;
+ *key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+
+ if (!enc_op)
+ break;
+ if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT)) {
+ NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve option offload");
+ return -EOPNOTSUPP;
+ }
+ err = nfp_flower_calc_opt_layer(enc_op, key_layer_two,
+ key_size, extack);
+ if (err)
+ return err;
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel type unknown");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int
nfp_flower_calculate_key_layers(struct nfp_app *app,
struct net_device *netdev,
struct nfp_fl_key_ls *ret_key_ls,
@@ -234,58 +284,51 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
return -EOPNOTSUPP;
}
- flow_rule_match_enc_ports(rule, &enc_ports);
- if (enc_ports.mask->dst != cpu_to_be16(~0)) {
- NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match L4 destination port is supported");
- return -EOPNOTSUPP;
- }
-
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS))
flow_rule_match_enc_opts(rule, &enc_op);
- switch (enc_ports.key->dst) {
- case htons(IANA_VXLAN_UDP_PORT):
- *tun_type = NFP_FL_TUNNEL_VXLAN;
- key_layer |= NFP_FLOWER_LAYER_VXLAN;
- key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
- if (enc_op.key) {
- NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on vxlan tunnels");
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
+ /* check if GRE, which has no enc_ports */
+ if (netif_is_gretap(netdev)) {
+ *tun_type = NFP_FL_TUNNEL_GRE;
+ key_layer |= NFP_FLOWER_LAYER_EXT_META;
+ key_size += sizeof(struct nfp_flower_ext_meta);
+ key_layer_two |= NFP_FLOWER_LAYER2_GRE;
+ key_size +=
+ sizeof(struct nfp_flower_ipv4_gre_tun);
+
+ if (enc_op.key) {
+ NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on GRE tunnels");
+ return -EOPNOTSUPP;
+ }
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "unsupported offload: an exact match on L4 destination port is required for non-GRE tunnels");
return -EOPNOTSUPP;
}
- break;
- case htons(GENEVE_UDP_PORT):
- if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE)) {
- NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve offload");
+ } else {
+ flow_rule_match_enc_ports(rule, &enc_ports);
+ if (enc_ports.mask->dst != cpu_to_be16(~0)) {
+ NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match L4 destination port is supported");
return -EOPNOTSUPP;
}
- *tun_type = NFP_FL_TUNNEL_GENEVE;
- key_layer |= NFP_FLOWER_LAYER_EXT_META;
- key_size += sizeof(struct nfp_flower_ext_meta);
- key_layer_two |= NFP_FLOWER_LAYER2_GENEVE;
- key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
- if (!enc_op.key)
- break;
- if (!(priv->flower_ext_feats &
- NFP_FL_FEATS_GENEVE_OPT)) {
- NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve option offload");
- return -EOPNOTSUPP;
- }
- err = nfp_flower_calc_opt_layer(&enc_op, &key_layer_two,
- &key_size, extack);
+ err = nfp_flower_calc_udp_tun_layer(enc_ports.key,
+ enc_op.key,
+ &key_layer_two,
+ &key_layer,
+ &key_size, priv,
+ tun_type, extack);
if (err)
return err;
- break;
- default:
- NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel type unknown");
- return -EOPNOTSUPP;
- }
- /* Ensure the ingress netdev matches the expected tun type. */
- if (!nfp_fl_netdev_is_tunnel_type(netdev, *tun_type)) {
- NL_SET_ERR_MSG_MOD(extack, "unsupported offload: ingress netdev does not match the expected tunnel type");
- return -EOPNOTSUPP;
+ /* Ensure the ingress netdev matches the expected
+ * tun type.
+ */
+ if (!nfp_fl_netdev_is_tunnel_type(netdev, *tun_type)) {
+ NL_SET_ERR_MSG_MOD(extack, "unsupported offload: ingress netdev does not match the expected tunnel type");
+ return -EOPNOTSUPP;
+ }
}
}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 6bbd77ba56f2..0659756bf2bb 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -401,7 +401,7 @@ struct nfp_net_r_vector {
struct {
struct tasklet_struct tasklet;
struct sk_buff_head queue;
- struct spinlock lock;
+ spinlock_t lock;
};
};
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ptp.c b/drivers/net/ethernet/qlogic/qed/qed_ptp.c
index f3ebdc5e8f85..0dacf2c18c09 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ptp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ptp.c
@@ -44,6 +44,8 @@
/* Add/subtract the Adjustment_Value when making a Drift adjustment */
#define QED_DRIFT_CNTR_DIRECTION_SHIFT 31
#define QED_TIMESTAMP_MASK BIT(16)
+/* Param mask for Hardware to detect/timestamp the unicast PTP packets */
+#define QED_PTP_UCAST_PARAM_MASK 0xF
static enum qed_resc_lock qed_ptcdev_to_resc(struct qed_hwfn *p_hwfn)
{
@@ -243,7 +245,8 @@ static int qed_ptp_hw_cfg_filters(struct qed_dev *cdev,
return -EINVAL;
}
- qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_PARAM_MASK, 0);
+ qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_PARAM_MASK,
+ QED_PTP_UCAST_PARAM_MASK);
qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_RULE_MASK, rule_mask);
qed_wr(p_hwfn, p_ptt, NIG_REG_RX_PTP_EN, enable_cfg);
@@ -253,7 +256,8 @@ static int qed_ptp_hw_cfg_filters(struct qed_dev *cdev,
qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_RULE_MASK, 0x3FFF);
} else {
qed_wr(p_hwfn, p_ptt, NIG_REG_TX_PTP_EN, enable_cfg);
- qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_PARAM_MASK, 0);
+ qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_PARAM_MASK,
+ QED_PTP_UCAST_PARAM_MASK);
qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_RULE_MASK, rule_mask);
}
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
index af3b037fa442..5632da05145a 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
@@ -1066,7 +1066,7 @@ static int qlcnic_sriov_pf_cfg_ip_cmd(struct qlcnic_bc_trans *trans,
{
struct qlcnic_vf_info *vf = trans->vf;
struct qlcnic_adapter *adapter = vf->adapter;
- int err = -EIO;
+ int err;
cmd->req.arg[1] |= vf->vp->handle << 16;
cmd->req.arg[1] |= BIT_31;
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 48b8a90f7057..495d40c8f20e 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -5784,7 +5784,6 @@ static struct sk_buff *rtl8169_try_rx_copy(void *data,
skb = napi_alloc_skb(&tp->napi, pkt_size);
if (skb)
skb_copy_to_linear_data(skb, data, pkt_size);
- dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
return skb;
}
@@ -6651,13 +6650,36 @@ static int rtl_get_ether_clk(struct rtl8169_private *tp)
return rc;
}
+static void rtl_init_mac_address(struct rtl8169_private *tp)
+{
+ struct net_device *dev = tp->dev;
+ u8 *mac_addr = dev->dev_addr;
+ int rc, i;
+
+ rc = eth_platform_get_mac_address(tp_to_dev(tp), mac_addr);
+ if (!rc)
+ goto done;
+
+ rtl_read_mac_address(tp, mac_addr);
+ if (is_valid_ether_addr(mac_addr))
+ goto done;
+
+ for (i = 0; i < ETH_ALEN; i++)
+ mac_addr[i] = RTL_R8(tp, MAC0 + i);
+ if (is_valid_ether_addr(mac_addr))
+ goto done;
+
+ eth_hw_addr_random(dev);
+ dev_warn(tp_to_dev(tp), "can't read MAC address, setting random one\n");
+done:
+ rtl_rar_set(tp, mac_addr);
+}
+
static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
- /* align to u16 for is_valid_ether_addr() */
- u8 mac_addr[ETH_ALEN] __aligned(2) = {};
struct rtl8169_private *tp;
struct net_device *dev;
- int chipset, region, i;
+ int chipset, region;
int jumbo_max, rc;
dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp));
@@ -6723,15 +6745,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
tp->cp_cmd = RTL_R16(tp, CPlusCmd);
if (sizeof(dma_addr_t) > 4 && tp->mac_version >= RTL_GIGA_MAC_VER_18 &&
- !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
+ !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)))
dev->features |= NETIF_F_HIGHDMA;
- } else {
- rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
- if (rc < 0) {
- dev_err(&pdev->dev, "DMA configuration failed\n");
- return rc;
- }
- }
rtl_init_rxcfg(tp);
@@ -6756,16 +6771,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
u64_stats_init(&tp->rx_stats.syncp);
u64_stats_init(&tp->tx_stats.syncp);
- /* get MAC address */
- rc = eth_platform_get_mac_address(&pdev->dev, mac_addr);
- if (rc)
- rtl_read_mac_address(tp, mac_addr);
-
- if (is_valid_ether_addr(mac_addr))
- rtl_rar_set(tp, mac_addr);
-
- for (i = 0; i < ETH_ALEN; i++)
- dev->dev_addr[i] = RTL_R8(tp, MAC0 + i);
+ rtl_init_mac_address(tp);
dev->ethtool_ops = &rtl8169_ethtool_ops;
diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index 67f9bb6e941b..aba6eea72f15 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -360,7 +360,7 @@ static int sis635_get_mac_addr(struct pci_dev *pci_dev,
* SiS962 or SiS963 model, use EEPROM to store MAC address. And EEPROM
* is shared by
* LAN and 1394. When access EEPROM, send EEREQ signal to hardware first
- * and wait for EEGNT. If EEGNT is ON, EEPROM is permitted to be access
+ * and wait for EEGNT. If EEGNT is ON, EEPROM is permitted to be accessed
* by LAN, otherwise is not. After MAC address is read from EEPROM, send
* EEDONE signal to refuse EEPROM access by LAN.
* The EEPROM map of SiS962 or SiS963 is different to SiS900.
@@ -882,7 +882,7 @@ static void mdio_reset(struct sis900_private *sp)
* mdio_read - read MII PHY register
* @net_dev: the net device to read
* @phy_id: the phy address to read
- * @location: the phy regiester id to read
+ * @location: the phy register id to read
*
* Read MII registers through MDIO and MDC
* using MDIO management frame structure and protocol(defined by ISO/IEC).
@@ -926,7 +926,7 @@ static int mdio_read(struct net_device *net_dev, int phy_id, int location)
* mdio_write - write MII PHY register
* @net_dev: the net device to write
* @phy_id: the phy address to write
- * @location: the phy regiester id to write
+ * @location: the phy register id to write
* @value: the register value to write with
*
* Write MII registers with @value through MDIO and MDC
@@ -1057,7 +1057,7 @@ sis900_open(struct net_device *net_dev)
sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED);
/* Enable all known interrupts by setting the interrupt mask. */
- sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE);
+ sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxDESC);
sw32(cr, RxENA | sr32(cr));
sw32(ier, IE);
@@ -1101,7 +1101,7 @@ sis900_init_rxfilter (struct net_device * net_dev)
sw32(rfdr, w);
if (netif_msg_hw(sis_priv)) {
- printk(KERN_DEBUG "%s: Receive Filter Addrss[%d]=%x\n",
+ printk(KERN_DEBUG "%s: Receive Filter Address[%d]=%x\n",
net_dev->name, i, sr32(rfdr));
}
}
@@ -1148,7 +1148,7 @@ sis900_init_tx_ring(struct net_device *net_dev)
* @net_dev: the net device to initialize for
*
* Initialize the Rx descriptor ring,
- * and pre-allocate recevie buffers (socket buffer)
+ * and pre-allocate receive buffers (socket buffer)
*/
static void
@@ -1578,7 +1578,7 @@ static void sis900_tx_timeout(struct net_device *net_dev)
sw32(txdp, sis_priv->tx_ring_dma);
/* Enable all known interrupts by setting the interrupt mask. */
- sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE);
+ sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxDESC);
}
/**
@@ -1618,7 +1618,7 @@ sis900_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
spin_unlock_irqrestore(&sis_priv->lock, flags);
return NETDEV_TX_OK;
}
- sis_priv->tx_ring[entry].cmdsts = (OWN | skb->len);
+ sis_priv->tx_ring[entry].cmdsts = (OWN | INTR | skb->len);
sw32(cr, TxENA | sr32(cr));
sis_priv->cur_tx ++;
@@ -1674,8 +1674,8 @@ static irqreturn_t sis900_interrupt(int irq, void *dev_instance)
do {
status = sr32(isr);
- if ((status & (HIBERR|TxURN|TxERR|TxIDLE|RxORN|RxERR|RxOK)) == 0)
- /* nothing intresting happened */
+ if ((status & (HIBERR|TxURN|TxERR|TxDESC|RxORN|RxERR|RxOK)) == 0)
+ /* nothing interesting happened */
break;
handled = 1;
@@ -1684,7 +1684,7 @@ static irqreturn_t sis900_interrupt(int irq, void *dev_instance)
/* Rx interrupt */
sis900_rx(net_dev);
- if (status & (TxURN | TxERR | TxIDLE))
+ if (status & (TxURN | TxERR | TxDESC))
/* Tx interrupt */
sis900_finish_xmit(net_dev);
@@ -1896,8 +1896,8 @@ static void sis900_finish_xmit (struct net_device *net_dev)
if (tx_status & OWN) {
/* The packet is not transmitted yet (owned by hardware) !
- * Note: the interrupt is generated only when Tx Machine
- * is idle, so this is an almost impossible case */
+ * Note: this is an almost impossible condition
+ * on TxDESC interrupt ('descriptor interrupt') */
break;
}
@@ -2473,7 +2473,7 @@ static int sis900_resume(struct pci_dev *pci_dev)
sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED);
/* Enable all known interrupts by setting the interrupt mask. */
- sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE);
+ sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxDESC);
sw32(cr, RxENA | sr32(cr));
sw32(ier, IE);
diff --git a/drivers/net/ethernet/socionext/Kconfig b/drivers/net/ethernet/socionext/Kconfig
index 25f18be27423..95e99baf3f45 100644
--- a/drivers/net/ethernet/socionext/Kconfig
+++ b/drivers/net/ethernet/socionext/Kconfig
@@ -26,6 +26,7 @@ config SNI_NETSEC
tristate "Socionext NETSEC ethernet support"
depends on (ARCH_SYNQUACER || COMPILE_TEST) && OF
select PHYLIB
+ select PAGE_POOL
select MII
---help---
Enable to add support for the SocioNext NetSec Gigabit Ethernet
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index 48fd7448b513..f6e261c6a059 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -9,8 +9,12 @@
#include <linux/etherdevice.h>
#include <linux/interrupt.h>
#include <linux/io.h>
+#include <linux/netlink.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
#include <net/tcp.h>
+#include <net/page_pool.h>
#include <net/ip6_checksum.h>
#define NETSEC_REG_SOFT_RST 0x104
@@ -235,22 +239,41 @@
#define DESC_NUM 256
#define NETSEC_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
-#define NETSEC_RX_BUF_SZ 1536
+#define NETSEC_RXBUF_HEADROOM (max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \
+ NET_IP_ALIGN)
+#define NETSEC_RX_BUF_NON_DATA (NETSEC_RXBUF_HEADROOM + \
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
#define DESC_SZ sizeof(struct netsec_de)
#define NETSEC_F_NETSEC_VER_MAJOR_NUM(x) ((x) & 0xffff0000)
+#define NETSEC_XDP_PASS 0
+#define NETSEC_XDP_CONSUMED BIT(0)
+#define NETSEC_XDP_TX BIT(1)
+#define NETSEC_XDP_REDIR BIT(2)
+#define NETSEC_XDP_RX_OK (NETSEC_XDP_PASS | NETSEC_XDP_TX | NETSEC_XDP_REDIR)
+
enum ring_id {
NETSEC_RING_TX = 0,
NETSEC_RING_RX
};
+enum buf_type {
+ TYPE_NETSEC_SKB = 0,
+ TYPE_NETSEC_XDP_TX,
+ TYPE_NETSEC_XDP_NDO,
+};
+
struct netsec_desc {
- struct sk_buff *skb;
+ union {
+ struct sk_buff *skb;
+ struct xdp_frame *xdpf;
+ };
dma_addr_t dma_addr;
void *addr;
u16 len;
+ u8 buf_type;
};
struct netsec_desc_ring {
@@ -258,11 +281,17 @@ struct netsec_desc_ring {
struct netsec_desc *desc;
void *vaddr;
u16 head, tail;
+ u16 xdp_xmit; /* netsec_xdp_xmit packets */
+ bool is_xdp;
+ struct page_pool *page_pool;
+ struct xdp_rxq_info xdp_rxq;
+ spinlock_t lock; /* XDP tx queue locking */
};
struct netsec_priv {
struct netsec_desc_ring desc_ring[NETSEC_RING_MAX];
struct ethtool_coalesce et_coalesce;
+ struct bpf_prog *xdp_prog;
spinlock_t reglock; /* protect reg access */
struct napi_struct napi;
phy_interface_t phy_interface;
@@ -299,6 +328,11 @@ struct netsec_rx_pkt_info {
bool err_flag;
};
+static void netsec_set_tx_de(struct netsec_priv *priv,
+ struct netsec_desc_ring *dring,
+ const struct netsec_tx_pkt_ctrl *tx_ctrl,
+ const struct netsec_desc *desc, void *buf);
+
static void netsec_write(struct netsec_priv *priv, u32 reg_addr, u32 val)
{
writel(val, priv->ioaddr + reg_addr);
@@ -600,12 +634,14 @@ static void netsec_set_rx_de(struct netsec_priv *priv,
static bool netsec_clean_tx_dring(struct netsec_priv *priv)
{
struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX];
- unsigned int pkts, bytes;
struct netsec_de *entry;
int tail = dring->tail;
+ unsigned int bytes;
int cnt = 0;
- pkts = 0;
+ if (dring->is_xdp)
+ spin_lock(&dring->lock);
+
bytes = 0;
entry = dring->vaddr + DESC_SZ * tail;
@@ -618,13 +654,23 @@ static bool netsec_clean_tx_dring(struct netsec_priv *priv)
eop = (entry->attr >> NETSEC_TX_LAST) & 1;
dma_rmb();
- dma_unmap_single(priv->dev, desc->dma_addr, desc->len,
- DMA_TO_DEVICE);
- if (eop) {
- pkts++;
+ if (desc->buf_type == TYPE_NETSEC_SKB)
+ dma_unmap_single(priv->dev, desc->dma_addr, desc->len,
+ DMA_TO_DEVICE);
+ else if (desc->buf_type == TYPE_NETSEC_XDP_NDO)
+ dma_unmap_single(priv->dev, desc->dma_addr,
+ desc->len, DMA_TO_DEVICE);
+
+ if (!eop)
+ goto next;
+
+ if (desc->buf_type == TYPE_NETSEC_SKB) {
bytes += desc->skb->len;
dev_kfree_skb(desc->skb);
+ } else {
+ xdp_return_frame(desc->xdpf);
}
+next:
/* clean up so netsec_uninit_pkt_dring() won't free the skb
* again
*/
@@ -641,6 +687,8 @@ static bool netsec_clean_tx_dring(struct netsec_priv *priv)
entry = dring->vaddr + DESC_SZ * tail;
cnt++;
}
+ if (dring->is_xdp)
+ spin_unlock(&dring->lock);
if (!cnt)
return false;
@@ -673,33 +721,33 @@ static void netsec_process_tx(struct netsec_priv *priv)
}
static void *netsec_alloc_rx_data(struct netsec_priv *priv,
- dma_addr_t *dma_handle, u16 *desc_len,
- bool napi)
+ dma_addr_t *dma_handle, u16 *desc_len)
+
{
- size_t total_len = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- size_t payload_len = NETSEC_RX_BUF_SZ;
- dma_addr_t mapping;
- void *buf;
- total_len += SKB_DATA_ALIGN(payload_len + NETSEC_SKB_PAD);
+ struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
+ enum dma_data_direction dma_dir;
+ struct page *page;
- buf = napi ? napi_alloc_frag(total_len) : netdev_alloc_frag(total_len);
- if (!buf)
+ page = page_pool_dev_alloc_pages(dring->page_pool);
+ if (!page)
return NULL;
- mapping = dma_map_single(priv->dev, buf + NETSEC_SKB_PAD, payload_len,
- DMA_FROM_DEVICE);
- if (unlikely(dma_mapping_error(priv->dev, mapping)))
- goto err_out;
-
- *dma_handle = mapping;
- *desc_len = payload_len;
-
- return buf;
+ /* We allocate the same buffer length for XDP and non-XDP cases.
+ * page_pool API will map the whole page, skip what's needed for
+ * network payloads and/or XDP
+ */
+ *dma_handle = page_pool_get_dma_addr(page) + NETSEC_RXBUF_HEADROOM;
+ /* Make sure the incoming payload fits in the page for XDP and non-XDP
+ * cases and reserve enough space for headroom + skb_shared_info
+ */
+ *desc_len = PAGE_SIZE - NETSEC_RX_BUF_NON_DATA;
+ dma_dir = page_pool_get_dma_dir(dring->page_pool);
+ dma_sync_single_for_device(priv->dev,
+ *dma_handle - NETSEC_RXBUF_HEADROOM,
+ PAGE_SIZE, dma_dir);
-err_out:
- skb_free_frag(buf);
- return NULL;
+ return page_address(page);
}
static void netsec_rx_fill(struct netsec_priv *priv, u16 from, u16 num)
@@ -716,22 +764,160 @@ static void netsec_rx_fill(struct netsec_priv *priv, u16 from, u16 num)
}
}
+static void netsec_xdp_ring_tx_db(struct netsec_priv *priv, u16 pkts)
+{
+ if (likely(pkts))
+ netsec_write(priv, NETSEC_REG_NRM_TX_PKTCNT, pkts);
+}
+
+static void netsec_finalize_xdp_rx(struct netsec_priv *priv, u32 xdp_res,
+ u16 pkts)
+{
+ if (xdp_res & NETSEC_XDP_REDIR)
+ xdp_do_flush_map();
+
+ if (xdp_res & NETSEC_XDP_TX)
+ netsec_xdp_ring_tx_db(priv, pkts);
+}
+
+/* The current driver only supports 1 Txq, this should run under spin_lock() */
+static u32 netsec_xdp_queue_one(struct netsec_priv *priv,
+ struct xdp_frame *xdpf, bool is_ndo)
+
+{
+ struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX];
+ struct page *page = virt_to_page(xdpf->data);
+ struct netsec_tx_pkt_ctrl tx_ctrl = {};
+ struct netsec_desc tx_desc;
+ dma_addr_t dma_handle;
+ u16 filled;
+
+ if (tx_ring->head >= tx_ring->tail)
+ filled = tx_ring->head - tx_ring->tail;
+ else
+ filled = tx_ring->head + DESC_NUM - tx_ring->tail;
+
+ if (DESC_NUM - filled <= 1)
+ return NETSEC_XDP_CONSUMED;
+
+ if (is_ndo) {
+ /* this is for ndo_xdp_xmit, the buffer needs mapping before
+ * sending
+ */
+ dma_handle = dma_map_single(priv->dev, xdpf->data, xdpf->len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(priv->dev, dma_handle))
+ return NETSEC_XDP_CONSUMED;
+ tx_desc.buf_type = TYPE_NETSEC_XDP_NDO;
+ } else {
+ /* This is the device Rx buffer from page_pool. No need to remap
+ * just sync and send it
+ */
+ struct netsec_desc_ring *rx_ring =
+ &priv->desc_ring[NETSEC_RING_RX];
+ enum dma_data_direction dma_dir =
+ page_pool_get_dma_dir(rx_ring->page_pool);
+
+ dma_handle = page_pool_get_dma_addr(page) +
+ NETSEC_RXBUF_HEADROOM;
+ dma_sync_single_for_device(priv->dev, dma_handle, xdpf->len,
+ dma_dir);
+ tx_desc.buf_type = TYPE_NETSEC_XDP_TX;
+ }
+
+ tx_desc.dma_addr = dma_handle;
+ tx_desc.addr = xdpf->data;
+ tx_desc.len = xdpf->len;
+
+ netsec_set_tx_de(priv, tx_ring, &tx_ctrl, &tx_desc, xdpf);
+
+ return NETSEC_XDP_TX;
+}
+
+static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp)
+{
+ struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX];
+ struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
+ u32 ret;
+
+ if (unlikely(!xdpf))
+ return NETSEC_XDP_CONSUMED;
+
+ spin_lock(&tx_ring->lock);
+ ret = netsec_xdp_queue_one(priv, xdpf, false);
+ spin_unlock(&tx_ring->lock);
+
+ return ret;
+}
+
+static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
+ struct xdp_buff *xdp)
+{
+ u32 ret = NETSEC_XDP_PASS;
+ int err;
+ u32 act;
+
+ act = bpf_prog_run_xdp(prog, xdp);
+
+ switch (act) {
+ case XDP_PASS:
+ ret = NETSEC_XDP_PASS;
+ break;
+ case XDP_TX:
+ ret = netsec_xdp_xmit_back(priv, xdp);
+ if (ret != NETSEC_XDP_TX)
+ xdp_return_buff(xdp);
+ break;
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(priv->ndev, xdp, prog);
+ if (!err) {
+ ret = NETSEC_XDP_REDIR;
+ } else {
+ ret = NETSEC_XDP_CONSUMED;
+ xdp_return_buff(xdp);
+ }
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fall through */
+ case XDP_ABORTED:
+ trace_xdp_exception(priv->ndev, prog, act);
+ /* fall through -- handle aborts by dropping packet */
+ case XDP_DROP:
+ ret = NETSEC_XDP_CONSUMED;
+ xdp_return_buff(xdp);
+ break;
+ }
+
+ return ret;
+}
+
static int netsec_process_rx(struct netsec_priv *priv, int budget)
{
struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
struct net_device *ndev = priv->ndev;
struct netsec_rx_pkt_info rx_info;
- struct sk_buff *skb;
+ enum dma_data_direction dma_dir;
+ struct bpf_prog *xdp_prog;
+ struct sk_buff *skb = NULL;
+ u16 xdp_xmit = 0;
+ u32 xdp_act = 0;
int done = 0;
+ rcu_read_lock();
+ xdp_prog = READ_ONCE(priv->xdp_prog);
+ dma_dir = page_pool_get_dma_dir(dring->page_pool);
+
while (done < budget) {
u16 idx = dring->tail;
struct netsec_de *de = dring->vaddr + (DESC_SZ * idx);
struct netsec_desc *desc = &dring->desc[idx];
+ struct page *page = virt_to_page(desc->addr);
+ u32 xdp_result = XDP_PASS;
u16 pkt_len, desc_len;
dma_addr_t dma_handle;
+ struct xdp_buff xdp;
void *buf_addr;
- u32 truesize;
if (de->attr & (1U << NETSEC_RX_PKT_OWN_FIELD)) {
/* reading the register clears the irq */
@@ -766,53 +952,71 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
/* allocate a fresh buffer and map it to the hardware.
* This will eventually replace the old buffer in the hardware
*/
- buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len,
- true);
+ buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len);
+
if (unlikely(!buf_addr))
break;
dma_sync_single_for_cpu(priv->dev, desc->dma_addr, pkt_len,
- DMA_FROM_DEVICE);
+ dma_dir);
prefetch(desc->addr);
- truesize = SKB_DATA_ALIGN(desc->len + NETSEC_SKB_PAD) +
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- skb = build_skb(desc->addr, truesize);
+ xdp.data_hard_start = desc->addr;
+ xdp.data = desc->addr + NETSEC_RXBUF_HEADROOM;
+ xdp_set_data_meta_invalid(&xdp);
+ xdp.data_end = xdp.data + pkt_len;
+ xdp.rxq = &dring->xdp_rxq;
+
+ if (xdp_prog) {
+ xdp_result = netsec_run_xdp(priv, xdp_prog, &xdp);
+ if (xdp_result != NETSEC_XDP_PASS) {
+ xdp_act |= xdp_result;
+ if (xdp_result == NETSEC_XDP_TX)
+ xdp_xmit++;
+ goto next;
+ }
+ }
+ skb = build_skb(desc->addr, desc->len + NETSEC_RX_BUF_NON_DATA);
+
if (unlikely(!skb)) {
- /* free the newly allocated buffer, we are not going to
- * use it
+ /* If skb fails recycle_direct will either unmap and
+ * free the page or refill the cache depending on the
+ * cache state. Since we paid the allocation cost if
+ * building an skb fails try to put the page into cache
*/
- dma_unmap_single(priv->dev, dma_handle, desc_len,
- DMA_FROM_DEVICE);
- skb_free_frag(buf_addr);
+ page_pool_recycle_direct(dring->page_pool, page);
netif_err(priv, drv, priv->ndev,
"rx failed to build skb\n");
break;
}
- dma_unmap_single_attrs(priv->dev, desc->dma_addr, desc->len,
- DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+ page_pool_release_page(dring->page_pool, page);
- /* Update the descriptor with the new buffer we allocated */
- desc->len = desc_len;
- desc->dma_addr = dma_handle;
- desc->addr = buf_addr;
-
- skb_reserve(skb, NETSEC_SKB_PAD);
- skb_put(skb, pkt_len);
+ skb_reserve(skb, xdp.data - xdp.data_hard_start);
+ skb_put(skb, xdp.data_end - xdp.data);
skb->protocol = eth_type_trans(skb, priv->ndev);
if (priv->rx_cksum_offload_flag &&
rx_info.rx_cksum_result == NETSEC_RX_CKSUM_OK)
skb->ip_summed = CHECKSUM_UNNECESSARY;
- if (napi_gro_receive(&priv->napi, skb) != GRO_DROP) {
+next:
+ if ((skb && napi_gro_receive(&priv->napi, skb) != GRO_DROP) ||
+ xdp_result & NETSEC_XDP_RX_OK) {
ndev->stats.rx_packets++;
- ndev->stats.rx_bytes += pkt_len;
+ ndev->stats.rx_bytes += xdp.data_end - xdp.data;
}
+ /* Update the descriptor with fresh buffers */
+ desc->len = desc_len;
+ desc->dma_addr = dma_handle;
+ desc->addr = buf_addr;
+
netsec_rx_fill(priv, idx, 1);
dring->tail = (dring->tail + 1) % DESC_NUM;
}
+ netsec_finalize_xdp_rx(priv, xdp_act, xdp_xmit);
+
+ rcu_read_unlock();
return done;
}
@@ -842,8 +1046,7 @@ static int netsec_napi_poll(struct napi_struct *napi, int budget)
static void netsec_set_tx_de(struct netsec_priv *priv,
struct netsec_desc_ring *dring,
const struct netsec_tx_pkt_ctrl *tx_ctrl,
- const struct netsec_desc *desc,
- struct sk_buff *skb)
+ const struct netsec_desc *desc, void *buf)
{
int idx = dring->head;
struct netsec_de *de;
@@ -866,10 +1069,16 @@ static void netsec_set_tx_de(struct netsec_priv *priv,
de->data_buf_addr_lw = lower_32_bits(desc->dma_addr);
de->buf_len_info = (tx_ctrl->tcp_seg_len << 16) | desc->len;
de->attr = attr;
- dma_wmb();
+ /* under spin_lock if using XDP */
+ if (!dring->is_xdp)
+ dma_wmb();
dring->desc[idx] = *desc;
- dring->desc[idx].skb = skb;
+ if (desc->buf_type == TYPE_NETSEC_SKB)
+ dring->desc[idx].skb = buf;
+ else if (desc->buf_type == TYPE_NETSEC_XDP_TX ||
+ desc->buf_type == TYPE_NETSEC_XDP_NDO)
+ dring->desc[idx].xdpf = buf;
/* move head ahead */
dring->head = (dring->head + 1) % DESC_NUM;
@@ -920,8 +1129,12 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb,
u16 tso_seg_len = 0;
int filled;
+ if (dring->is_xdp)
+ spin_lock_bh(&dring->lock);
filled = netsec_desc_used(dring);
if (netsec_check_stop_tx(priv, filled)) {
+ if (dring->is_xdp)
+ spin_unlock_bh(&dring->lock);
net_warn_ratelimited("%s %s Tx queue full\n",
dev_name(priv->dev), ndev->name);
return NETDEV_TX_BUSY;
@@ -954,6 +1167,8 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb,
tx_desc.dma_addr = dma_map_single(priv->dev, skb->data,
skb_headlen(skb), DMA_TO_DEVICE);
if (dma_mapping_error(priv->dev, tx_desc.dma_addr)) {
+ if (dring->is_xdp)
+ spin_unlock_bh(&dring->lock);
netif_err(priv, drv, priv->ndev,
"%s: DMA mapping failed\n", __func__);
ndev->stats.tx_dropped++;
@@ -962,11 +1177,14 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb,
}
tx_desc.addr = skb->data;
tx_desc.len = skb_headlen(skb);
+ tx_desc.buf_type = TYPE_NETSEC_SKB;
skb_tx_timestamp(skb);
netdev_sent_queue(priv->ndev, skb->len);
netsec_set_tx_de(priv, dring, &tx_ctrl, &tx_desc, skb);
+ if (dring->is_xdp)
+ spin_unlock_bh(&dring->lock);
netsec_write(priv, NETSEC_REG_NRM_TX_PKTCNT, 1); /* submit another tx */
return NETDEV_TX_OK;
@@ -980,19 +1198,31 @@ static void netsec_uninit_pkt_dring(struct netsec_priv *priv, int id)
if (!dring->vaddr || !dring->desc)
return;
-
for (idx = 0; idx < DESC_NUM; idx++) {
desc = &dring->desc[idx];
if (!desc->addr)
continue;
- dma_unmap_single(priv->dev, desc->dma_addr, desc->len,
- id == NETSEC_RING_RX ? DMA_FROM_DEVICE :
- DMA_TO_DEVICE);
- if (id == NETSEC_RING_RX)
- skb_free_frag(desc->addr);
- else if (id == NETSEC_RING_TX)
+ if (id == NETSEC_RING_RX) {
+ struct page *page = virt_to_page(desc->addr);
+
+ page_pool_put_page(dring->page_pool, page, false);
+ } else if (id == NETSEC_RING_TX) {
+ dma_unmap_single(priv->dev, desc->dma_addr, desc->len,
+ DMA_TO_DEVICE);
dev_kfree_skb(desc->skb);
+ }
+ }
+
+ /* Rx is currently using page_pool
+ * since the pool is created during netsec_setup_rx_dring(), we need to
+ * free the pool manually if the registration failed
+ */
+ if (id == NETSEC_RING_RX) {
+ if (xdp_rxq_info_is_reg(&dring->xdp_rxq))
+ xdp_rxq_info_unreg(&dring->xdp_rxq);
+ else
+ page_pool_free(dring->page_pool);
}
memset(dring->desc, 0, sizeof(struct netsec_desc) * DESC_NUM);
@@ -1042,6 +1272,7 @@ err:
static void netsec_setup_tx_dring(struct netsec_priv *priv)
{
struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX];
+ struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog);
int i;
for (i = 0; i < DESC_NUM; i++) {
@@ -1054,12 +1285,35 @@ static void netsec_setup_tx_dring(struct netsec_priv *priv)
*/
de->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD;
}
+
+ if (xdp_prog)
+ dring->is_xdp = true;
+ else
+ dring->is_xdp = false;
+
}
static int netsec_setup_rx_dring(struct netsec_priv *priv)
{
struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
- int i;
+ struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog);
+ struct page_pool_params pp_params = { 0 };
+ int i, err;
+
+ pp_params.order = 0;
+ /* internal DMA mapping in page_pool */
+ pp_params.flags = PP_FLAG_DMA_MAP;
+ pp_params.pool_size = DESC_NUM;
+ pp_params.nid = cpu_to_node(0);
+ pp_params.dev = priv->dev;
+ pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
+
+ dring->page_pool = page_pool_create(&pp_params);
+ if (IS_ERR(dring->page_pool)) {
+ err = PTR_ERR(dring->page_pool);
+ dring->page_pool = NULL;
+ goto err_out;
+ }
for (i = 0; i < DESC_NUM; i++) {
struct netsec_desc *desc = &dring->desc[i];
@@ -1067,10 +1321,10 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv)
void *buf;
u16 len;
- buf = netsec_alloc_rx_data(priv, &dma_handle, &len,
- false);
+ buf = netsec_alloc_rx_data(priv, &dma_handle, &len);
+
if (!buf) {
- netsec_uninit_pkt_dring(priv, NETSEC_RING_RX);
+ err = -ENOMEM;
goto err_out;
}
desc->dma_addr = dma_handle;
@@ -1079,11 +1333,20 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv)
}
netsec_rx_fill(priv, 0, DESC_NUM);
+ err = xdp_rxq_info_reg(&dring->xdp_rxq, priv->ndev, 0);
+ if (err)
+ goto err_out;
+
+ err = xdp_rxq_info_reg_mem_model(&dring->xdp_rxq, MEM_TYPE_PAGE_POOL,
+ dring->page_pool);
+ if (err)
+ goto err_out;
return 0;
err_out:
- return -ENOMEM;
+ netsec_uninit_pkt_dring(priv, NETSEC_RING_RX);
+ return err;
}
static int netsec_netdev_load_ucode_region(struct netsec_priv *priv, u32 reg,
@@ -1463,6 +1726,9 @@ static int netsec_netdev_init(struct net_device *ndev)
if (ret)
goto err2;
+ spin_lock_init(&priv->desc_ring[NETSEC_RING_TX].lock);
+ spin_lock_init(&priv->desc_ring[NETSEC_RING_RX].lock);
+
return 0;
err2:
netsec_free_dring(priv, NETSEC_RING_RX);
@@ -1495,6 +1761,81 @@ static int netsec_netdev_ioctl(struct net_device *ndev, struct ifreq *ifr,
return phy_mii_ioctl(ndev->phydev, ifr, cmd);
}
+static int netsec_xdp_xmit(struct net_device *ndev, int n,
+ struct xdp_frame **frames, u32 flags)
+{
+ struct netsec_priv *priv = netdev_priv(ndev);
+ struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX];
+ int drops = 0;
+ int i;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ spin_lock(&tx_ring->lock);
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+ int err;
+
+ err = netsec_xdp_queue_one(priv, xdpf, true);
+ if (err != NETSEC_XDP_TX) {
+ xdp_return_frame_rx_napi(xdpf);
+ drops++;
+ } else {
+ tx_ring->xdp_xmit++;
+ }
+ }
+ spin_unlock(&tx_ring->lock);
+
+ if (unlikely(flags & XDP_XMIT_FLUSH)) {
+ netsec_xdp_ring_tx_db(priv, tx_ring->xdp_xmit);
+ tx_ring->xdp_xmit = 0;
+ }
+
+ return n - drops;
+}
+
+static int netsec_xdp_setup(struct netsec_priv *priv, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *dev = priv->ndev;
+ struct bpf_prog *old_prog;
+
+ /* For now just support only the usual MTU sized frames */
+ if (prog && dev->mtu > 1500) {
+ NL_SET_ERR_MSG_MOD(extack, "Jumbo frames not supported on XDP");
+ return -EOPNOTSUPP;
+ }
+
+ if (netif_running(dev))
+ netsec_netdev_stop(dev);
+
+ /* Detach old prog, if any */
+ old_prog = xchg(&priv->xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ if (netif_running(dev))
+ netsec_netdev_open(dev);
+
+ return 0;
+}
+
+static int netsec_xdp(struct net_device *ndev, struct netdev_bpf *xdp)
+{
+ struct netsec_priv *priv = netdev_priv(ndev);
+
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return netsec_xdp_setup(priv, xdp->prog, xdp->extack);
+ case XDP_QUERY_PROG:
+ xdp->prog_id = priv->xdp_prog ? priv->xdp_prog->aux->id : 0;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static const struct net_device_ops netsec_netdev_ops = {
.ndo_init = netsec_netdev_init,
.ndo_uninit = netsec_netdev_uninit,
@@ -1505,6 +1846,8 @@ static const struct net_device_ops netsec_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_do_ioctl = netsec_netdev_ioctl,
+ .ndo_xdp_xmit = netsec_xdp_xmit,
+ .ndo_bpf = netsec_xdp,
};
static int netsec_of_probe(struct platform_device *pdev,
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index c43e2da4e7e3..943189dcccb1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
config STMMAC_ETH
- tristate "STMicroelectronics 10/100/1000/EQOS Ethernet driver"
+ tristate "STMicroelectronics Multi-Gigabit Ethernet driver"
depends on HAS_IOMEM && HAS_DMA
select MII
select PHYLINK
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index ad9e9368535d..2403a65167b2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -246,7 +246,7 @@ struct stmmac_safety_stats {
/* Max/Min RI Watchdog Timer count value */
#define MAX_DMA_RIWT 0xff
-#define MIN_DMA_RIWT 0x20
+#define MIN_DMA_RIWT 0x10
/* Tx coalesce parameters */
#define STMMAC_COAL_TX_TIMER 1000
#define STMMAC_MAX_COAL_TX_TICK 100000
@@ -351,6 +351,7 @@ struct dma_features {
unsigned int frpsel;
unsigned int frpbs;
unsigned int frpes;
+ unsigned int addr64;
};
/* GMAC TX FIFO is 8K, Rx FIFO is 16K */
@@ -392,8 +393,12 @@ struct mac_link {
u32 speed100;
u32 speed1000;
u32 speed2500;
- u32 speed10000;
u32 duplex;
+ struct {
+ u32 speed2500;
+ u32 speed5000;
+ u32 speed10000;
+ } xgmii;
};
struct mii_regs {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index cf6436d3d6c7..dbde23e7e169 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -443,6 +443,15 @@ static void dwmac4_clear(struct dma_desc *p)
p->des3 = 0;
}
+static int set_16kib_bfsize(int mtu)
+{
+ int ret = 0;
+
+ if (unlikely(mtu >= BUF_SIZE_8KiB))
+ ret = BUF_SIZE_16KiB;
+ return ret;
+}
+
const struct stmmac_desc_ops dwmac4_desc_ops = {
.tx_status = dwmac4_wrback_get_tx_status,
.rx_status = dwmac4_wrback_get_rx_status,
@@ -469,4 +478,6 @@ const struct stmmac_desc_ops dwmac4_desc_ops = {
.clear = dwmac4_clear,
};
-const struct stmmac_mode_ops dwmac4_ring_mode_ops = { };
+const struct stmmac_mode_ops dwmac4_ring_mode_ops = {
+ .set_16kib_bfsize = set_16kib_bfsize,
+};
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index b8296eb41011..9a9792527530 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -15,10 +15,14 @@
/* MAC Registers */
#define XGMAC_TX_CONFIG 0x00000000
#define XGMAC_CONFIG_SS_OFF 29
-#define XGMAC_CONFIG_SS_MASK GENMASK(30, 29)
+#define XGMAC_CONFIG_SS_MASK GENMASK(31, 29)
#define XGMAC_CONFIG_SS_10000 (0x0 << XGMAC_CONFIG_SS_OFF)
-#define XGMAC_CONFIG_SS_2500 (0x2 << XGMAC_CONFIG_SS_OFF)
-#define XGMAC_CONFIG_SS_1000 (0x3 << XGMAC_CONFIG_SS_OFF)
+#define XGMAC_CONFIG_SS_2500_GMII (0x2 << XGMAC_CONFIG_SS_OFF)
+#define XGMAC_CONFIG_SS_1000_GMII (0x3 << XGMAC_CONFIG_SS_OFF)
+#define XGMAC_CONFIG_SS_100_MII (0x4 << XGMAC_CONFIG_SS_OFF)
+#define XGMAC_CONFIG_SS_5000 (0x5 << XGMAC_CONFIG_SS_OFF)
+#define XGMAC_CONFIG_SS_2500 (0x6 << XGMAC_CONFIG_SS_OFF)
+#define XGMAC_CONFIG_SS_10_MII (0x7 << XGMAC_CONFIG_SS_OFF)
#define XGMAC_CONFIG_SARC GENMASK(22, 20)
#define XGMAC_CONFIG_SARC_SHIFT 20
#define XGMAC_CONFIG_JD BIT(16)
@@ -83,6 +87,7 @@
#define XGMAC_HWFEAT_GMIISEL BIT(1)
#define XGMAC_HW_FEATURE1 0x00000120
#define XGMAC_HWFEAT_TSOEN BIT(18)
+#define XGMAC_HWFEAT_ADDR64 GENMASK(15, 14)
#define XGMAC_HWFEAT_TXFIFOSIZE GENMASK(10, 6)
#define XGMAC_HWFEAT_RXFIFOSIZE GENMASK(4, 0)
#define XGMAC_HW_FEATURE2 0x00000124
@@ -168,6 +173,7 @@
#define XGMAC_EN_LPI BIT(15)
#define XGMAC_LPI_XIT_PKT BIT(14)
#define XGMAC_AAL BIT(12)
+#define XGMAC_EAME BIT(11)
#define XGMAC_BLEN GENMASK(7, 1)
#define XGMAC_BLEN256 BIT(7)
#define XGMAC_BLEN128 BIT(6)
@@ -177,6 +183,10 @@
#define XGMAC_BLEN8 BIT(2)
#define XGMAC_BLEN4 BIT(1)
#define XGMAC_UNDEF BIT(0)
+#define XGMAC_TX_EDMA_CTRL 0x00003040
+#define XGMAC_TDPS GENMASK(29, 0)
+#define XGMAC_RX_EDMA_CTRL 0x00003044
+#define XGMAC_RDPS GENMASK(29, 0)
#define XGMAC_DMA_CH_CONTROL(x) (0x00003100 + (0x80 * (x)))
#define XGMAC_PBLx8 BIT(16)
#define XGMAC_DMA_CH_TX_CONTROL(x) (0x00003104 + (0x80 * (x)))
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index bfa7d6913fd4..0a32c96a7854 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -36,7 +36,7 @@ static void dwxgmac2_core_init(struct mac_device_info *hw,
switch (hw->ps) {
case SPEED_10000:
- tx |= hw->link.speed10000;
+ tx |= hw->link.xgmii.speed10000;
break;
case SPEED_2500:
tx |= hw->link.speed2500;
@@ -381,11 +381,13 @@ int dwxgmac2_setup(struct stmmac_priv *priv)
mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
mac->link.duplex = 0;
- mac->link.speed10 = 0;
- mac->link.speed100 = 0;
- mac->link.speed1000 = XGMAC_CONFIG_SS_1000;
- mac->link.speed2500 = XGMAC_CONFIG_SS_2500;
- mac->link.speed10000 = XGMAC_CONFIG_SS_10000;
+ mac->link.speed10 = XGMAC_CONFIG_SS_10_MII;
+ mac->link.speed100 = XGMAC_CONFIG_SS_100_MII;
+ mac->link.speed1000 = XGMAC_CONFIG_SS_1000_GMII;
+ mac->link.speed2500 = XGMAC_CONFIG_SS_2500_GMII;
+ mac->link.xgmii.speed2500 = XGMAC_CONFIG_SS_2500;
+ mac->link.xgmii.speed5000 = XGMAC_CONFIG_SS_5000;
+ mac->link.xgmii.speed10000 = XGMAC_CONFIG_SS_10000;
mac->link.speed_mask = XGMAC_CONFIG_SS_MASK;
mac->mii.addr = XGMAC_MDIO_ADDR;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
index 98fa471da7c0..c4c45402b8f8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
@@ -242,8 +242,8 @@ static void dwxgmac2_get_addr(struct dma_desc *p, unsigned int *addr)
static void dwxgmac2_set_addr(struct dma_desc *p, dma_addr_t addr)
{
- p->des0 = cpu_to_le32(addr);
- p->des1 = 0;
+ p->des0 = cpu_to_le32(lower_32_bits(addr));
+ p->des1 = cpu_to_le32(upper_32_bits(addr));
}
static void dwxgmac2_clear(struct dma_desc *p)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index 7861a938420a..229c58758cbd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -27,7 +27,7 @@ static void dwxgmac2_dma_init(void __iomem *ioaddr,
if (dma_cfg->aal)
value |= XGMAC_AAL;
- writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE);
+ writel(value | XGMAC_EAME, ioaddr + XGMAC_DMA_SYSBUS_MODE);
}
static void dwxgmac2_dma_init_chan(void __iomem *ioaddr,
@@ -91,11 +91,11 @@ static void dwxgmac2_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
value |= (axi->axi_rd_osr_lmt << XGMAC_RD_OSR_LMT_SHIFT) &
XGMAC_RD_OSR_LMT;
+ if (!axi->axi_fb)
+ value |= XGMAC_UNDEF;
+
value &= ~XGMAC_BLEN;
for (i = 0; i < AXI_BLEN; i++) {
- if (axi->axi_blen[i])
- value &= ~XGMAC_UNDEF;
-
switch (axi->axi_blen[i]) {
case 256:
value |= XGMAC_BLEN256;
@@ -122,6 +122,8 @@ static void dwxgmac2_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
}
writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE);
+ writel(XGMAC_TDPS, ioaddr + XGMAC_TX_EDMA_CTRL);
+ writel(XGMAC_RDPS, ioaddr + XGMAC_RX_EDMA_CTRL);
}
static void dwxgmac2_dma_rx_mode(void __iomem *ioaddr, int mode,
@@ -359,6 +361,23 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr,
/* MAC HW feature 1 */
hw_cap = readl(ioaddr + XGMAC_HW_FEATURE1);
dma_cap->tsoen = (hw_cap & XGMAC_HWFEAT_TSOEN) >> 18;
+
+ dma_cap->addr64 = (hw_cap & XGMAC_HWFEAT_ADDR64) >> 14;
+ switch (dma_cap->addr64) {
+ case 0:
+ dma_cap->addr64 = 32;
+ break;
+ case 1:
+ dma_cap->addr64 = 40;
+ break;
+ case 2:
+ dma_cap->addr64 = 48;
+ break;
+ default:
+ dma_cap->addr64 = 32;
+ break;
+ }
+
dma_cap->tx_fifo_size =
128 << ((hw_cap & XGMAC_HWFEAT_TXFIFOSIZE) >> 6);
dma_cap->rx_fifo_size =
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 2dcdf761d525..020159622559 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -112,7 +112,7 @@ static int adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec,
* programmed with (2^32 – <new_sec_value>)
*/
if (gmac4)
- sec = (100000000ULL - sec);
+ sec = -sec;
value = readl(ioaddr + PTP_TCR);
if (value & PTP_TCR_TSCTRLSSR)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index ee4f1e265993..3425d4dda03d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -805,14 +805,43 @@ static void stmmac_validate(struct phylink_config *config,
struct phylink_link_state *state)
{
struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(mac_supported) = { 0, };
__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
int tx_cnt = priv->plat->tx_queues_to_use;
int max_speed = priv->plat->max_speed;
+ phylink_set(mac_supported, 10baseT_Half);
+ phylink_set(mac_supported, 10baseT_Full);
+ phylink_set(mac_supported, 100baseT_Half);
+ phylink_set(mac_supported, 100baseT_Full);
+
+ phylink_set(mac_supported, Autoneg);
+ phylink_set(mac_supported, Pause);
+ phylink_set(mac_supported, Asym_Pause);
+ phylink_set_port_modes(mac_supported);
+
+ if (priv->plat->has_gmac ||
+ priv->plat->has_gmac4 ||
+ priv->plat->has_xgmac) {
+ phylink_set(mac_supported, 1000baseT_Half);
+ phylink_set(mac_supported, 1000baseT_Full);
+ phylink_set(mac_supported, 1000baseKX_Full);
+ }
+
/* Cut down 1G if asked to */
if ((max_speed > 0) && (max_speed < 1000)) {
phylink_set(mask, 1000baseT_Full);
phylink_set(mask, 1000baseX_Full);
+ } else if (priv->plat->has_xgmac) {
+ phylink_set(mac_supported, 2500baseT_Full);
+ phylink_set(mac_supported, 5000baseT_Full);
+ phylink_set(mac_supported, 10000baseSR_Full);
+ phylink_set(mac_supported, 10000baseLR_Full);
+ phylink_set(mac_supported, 10000baseER_Full);
+ phylink_set(mac_supported, 10000baseLRM_Full);
+ phylink_set(mac_supported, 10000baseT_Full);
+ phylink_set(mac_supported, 10000baseKX4_Full);
+ phylink_set(mac_supported, 10000baseKR_Full);
}
/* Half-Duplex can only work with single queue */
@@ -822,7 +851,12 @@ static void stmmac_validate(struct phylink_config *config,
phylink_set(mask, 1000baseT_Half);
}
- bitmap_andnot(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ bitmap_and(supported, supported, mac_supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+ bitmap_andnot(supported, supported, mask,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+ bitmap_and(state->advertising, state->advertising, mac_supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
bitmap_andnot(state->advertising, state->advertising, mask,
__ETHTOOL_LINK_MODE_MASK_NBITS);
}
@@ -842,18 +876,37 @@ static void stmmac_mac_config(struct phylink_config *config, unsigned int mode,
ctrl = readl(priv->ioaddr + MAC_CTRL_REG);
ctrl &= ~priv->hw->link.speed_mask;
- switch (state->speed) {
- case SPEED_1000:
- ctrl |= priv->hw->link.speed1000;
- break;
- case SPEED_100:
- ctrl |= priv->hw->link.speed100;
- break;
- case SPEED_10:
- ctrl |= priv->hw->link.speed10;
- break;
- default:
- return;
+ if (state->interface == PHY_INTERFACE_MODE_USXGMII) {
+ switch (state->speed) {
+ case SPEED_10000:
+ ctrl |= priv->hw->link.xgmii.speed10000;
+ break;
+ case SPEED_5000:
+ ctrl |= priv->hw->link.xgmii.speed5000;
+ break;
+ case SPEED_2500:
+ ctrl |= priv->hw->link.xgmii.speed2500;
+ break;
+ default:
+ return;
+ }
+ } else {
+ switch (state->speed) {
+ case SPEED_2500:
+ ctrl |= priv->hw->link.speed2500;
+ break;
+ case SPEED_1000:
+ ctrl |= priv->hw->link.speed1000;
+ break;
+ case SPEED_100:
+ ctrl |= priv->hw->link.speed100;
+ break;
+ case SPEED_10:
+ ctrl |= priv->hw->link.speed10;
+ break;
+ default:
+ return;
+ }
}
priv->speed = state->speed;
@@ -896,7 +949,7 @@ static void stmmac_mac_link_up(struct phylink_config *config,
struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
stmmac_mac_set(priv, priv->ioaddr, true);
- if (phy) {
+ if (phy && priv->dma_cap.eee) {
priv->eee_active = phy_init_eee(phy, 1) >= 0;
priv->eee_enabled = stmmac_eee_init(priv);
stmmac_set_eee_pls(priv, priv->hw, true);
@@ -953,9 +1006,13 @@ static int stmmac_init_phy(struct net_device *dev)
node = priv->plat->phylink_node;
- if (node) {
+ if (node)
ret = phylink_of_phy_connect(priv->phylink, node, 0);
- } else {
+
+ /* Some DT bindings do not set-up the PHY handle. Let's try to
+ * manually parse it
+ */
+ if (!node || ret) {
int addr = priv->plat->phy_addr;
struct phy_device *phydev;
@@ -1996,18 +2053,16 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan)
&priv->xstats, chan);
struct stmmac_channel *ch = &priv->channel[chan];
- if (status)
- status |= handle_rx | handle_tx;
-
if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) {
- stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
- napi_schedule_irqoff(&ch->rx_napi);
+ if (napi_schedule_prep(&ch->rx_napi)) {
+ stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
+ __napi_schedule_irqoff(&ch->rx_napi);
+ status |= handle_tx;
+ }
}
- if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) {
- stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
+ if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use))
napi_schedule_irqoff(&ch->tx_napi);
- }
return status;
}
@@ -2512,9 +2567,9 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS;
if (priv->use_riwt) {
- ret = stmmac_rx_watchdog(priv, priv->ioaddr, MAX_DMA_RIWT, rx_cnt);
+ ret = stmmac_rx_watchdog(priv, priv->ioaddr, MIN_DMA_RIWT, rx_cnt);
if (!ret)
- priv->rx_riwt = MAX_DMA_RIWT;
+ priv->rx_riwt = MIN_DMA_RIWT;
}
if (priv->hw->pcs)
@@ -2717,7 +2772,7 @@ static int stmmac_release(struct net_device *dev)
* This function fills descriptor and request new descriptors according to
* buffer length to fill
*/
-static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
+static void stmmac_tso_allocator(struct stmmac_priv *priv, dma_addr_t des,
int total_len, bool last_segment, u32 queue)
{
struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
@@ -2728,11 +2783,18 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
tmp_len = total_len;
while (tmp_len > 0) {
+ dma_addr_t curr_addr;
+
tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
desc = tx_q->dma_tx + tx_q->cur_tx;
- desc->des0 = cpu_to_le32(des + (total_len - tmp_len));
+ curr_addr = des + (total_len - tmp_len);
+ if (priv->dma_cap.addr64 <= 32)
+ desc->des0 = cpu_to_le32(curr_addr);
+ else
+ stmmac_set_desc_addr(priv, desc, curr_addr);
+
buff_size = tmp_len >= TSO_MAX_BUFF_SIZE ?
TSO_MAX_BUFF_SIZE : tmp_len;
@@ -2778,11 +2840,12 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
struct stmmac_priv *priv = netdev_priv(dev);
int nfrags = skb_shinfo(skb)->nr_frags;
u32 queue = skb_get_queue_mapping(skb);
- unsigned int first_entry, des;
+ unsigned int first_entry;
struct stmmac_tx_queue *tx_q;
int tmp_pay_len = 0;
u32 pay_len, mss;
u8 proto_hdr_len;
+ dma_addr_t des;
int i;
tx_q = &priv->tx_queue[queue];
@@ -2839,14 +2902,19 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
tx_q->tx_skbuff_dma[first_entry].buf = des;
tx_q->tx_skbuff_dma[first_entry].len = skb_headlen(skb);
- first->des0 = cpu_to_le32(des);
+ if (priv->dma_cap.addr64 <= 32) {
+ first->des0 = cpu_to_le32(des);
- /* Fill start of payload in buff2 of first descriptor */
- if (pay_len)
- first->des1 = cpu_to_le32(des + proto_hdr_len);
+ /* Fill start of payload in buff2 of first descriptor */
+ if (pay_len)
+ first->des1 = cpu_to_le32(des + proto_hdr_len);
- /* If needed take extra descriptors to fill the remaining payload */
- tmp_pay_len = pay_len - TSO_MAX_BUFF_SIZE;
+ /* If needed take extra descriptors to fill the remaining payload */
+ tmp_pay_len = pay_len - TSO_MAX_BUFF_SIZE;
+ } else {
+ stmmac_set_desc_addr(priv, first, des);
+ tmp_pay_len = pay_len;
+ }
stmmac_tso_allocator(priv, des, tmp_pay_len, (nfrags == 0), queue);
@@ -2892,12 +2960,15 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
/* Manage tx mitigation */
tx_q->tx_count_frames += nfrags + 1;
- if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
+ if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) &&
+ !(priv->synopsys_id >= DWMAC_CORE_4_00 &&
+ (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+ priv->hwts_tx_en)) {
+ stmmac_tx_timer_arm(priv, queue);
+ } else {
+ tx_q->tx_count_frames = 0;
stmmac_set_tx_ic(priv, desc);
priv->xstats.tx_set_ic_bit++;
- tx_q->tx_count_frames = 0;
- } else {
- stmmac_tx_timer_arm(priv, queue);
}
skb_tx_timestamp(skb);
@@ -2973,12 +3044,12 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
int i, csum_insertion = 0, is_jumbo = 0;
u32 queue = skb_get_queue_mapping(skb);
int nfrags = skb_shinfo(skb)->nr_frags;
- int entry;
- unsigned int first_entry;
struct dma_desc *desc, *first;
struct stmmac_tx_queue *tx_q;
+ unsigned int first_entry;
unsigned int enh_desc;
- unsigned int des;
+ dma_addr_t des;
+ int entry;
tx_q = &priv->tx_queue[queue];
@@ -3111,12 +3182,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
* element in case of no SG.
*/
tx_q->tx_count_frames += nfrags + 1;
- if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
+ if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) &&
+ !(priv->synopsys_id >= DWMAC_CORE_4_00 &&
+ (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+ priv->hwts_tx_en)) {
+ stmmac_tx_timer_arm(priv, queue);
+ } else {
+ tx_q->tx_count_frames = 0;
stmmac_set_tx_ic(priv, desc);
priv->xstats.tx_set_ic_bit++;
- tx_q->tx_count_frames = 0;
- } else {
- stmmac_tx_timer_arm(priv, queue);
}
skb_tx_timestamp(skb);
@@ -3273,6 +3347,8 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
entry = STMMAC_GET_ENTRY(entry, DMA_RX_SIZE);
}
rx_q->dirty_rx = entry;
+ rx_q->rx_tail_addr = rx_q->dma_rx_phy +
+ (rx_q->dirty_rx * sizeof(struct dma_desc));
stmmac_set_rx_tail_ptr(priv, priv->ioaddr, rx_q->rx_tail_addr, queue);
}
@@ -3507,8 +3583,8 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
work_done = stmmac_tx_clean(priv, DMA_TX_SIZE, chan);
work_done = min(work_done, budget);
- if (work_done < budget && napi_complete_done(napi, work_done))
- stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
+ if (work_done < budget)
+ napi_complete_done(napi, work_done);
/* Force transmission restart */
tx_q = &priv->tx_queue[chan];
@@ -4255,6 +4331,24 @@ int stmmac_dvr_probe(struct device *device,
priv->tso = true;
dev_info(priv->device, "TSO feature enabled\n");
}
+
+ if (priv->dma_cap.addr64) {
+ ret = dma_set_mask_and_coherent(device,
+ DMA_BIT_MASK(priv->dma_cap.addr64));
+ if (!ret) {
+ dev_info(priv->device, "Using %d bits DMA width\n",
+ priv->dma_cap.addr64);
+ } else {
+ ret = dma_set_mask_and_coherent(device, DMA_BIT_MASK(32));
+ if (ret) {
+ dev_err(priv->device, "Failed to set DMA Mask\n");
+ goto error_hw_init;
+ }
+
+ priv->dma_cap.addr64 = 32;
+ }
+ }
+
ndev->features |= ndev->hw_features | NETIF_F_HIGHDMA;
ndev->watchdog_timeo = msecs_to_jiffies(watchdog);
#ifdef STMMAC_VLAN_TAG_USED
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index f8061e34122f..18cadf0b0d66 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -242,7 +242,6 @@ int stmmac_mdio_reset(struct mii_bus *bus)
if (priv->device->of_node) {
struct gpio_desc *reset_gpio;
u32 delays[3] = { 0, 0, 0 };
- int ret;
reset_gpio = devm_gpiod_get_optional(priv->device,
"snps,reset",
@@ -250,15 +249,9 @@ int stmmac_mdio_reset(struct mii_bus *bus)
if (IS_ERR(reset_gpio))
return PTR_ERR(reset_gpio);
- ret = device_property_read_u32_array(priv->device,
- "snps,reset-delays-us",
- delays,
- ARRAY_SIZE(delays));
- if (ret) {
- dev_err(ndev->dev.parent,
- "invalid property snps,reset-delays-us\n");
- return -EINVAL;
- }
+ device_property_read_u32_array(priv->device,
+ "snps,reset-delays-us",
+ delays, ARRAY_SIZE(delays));
if (delays[0])
msleep(DIV_ROUND_UP(delays[0], 1000));
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 6f99437a6962..0bc5863bffeb 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -1217,8 +1217,6 @@ static int link_status_1g_rgmii(struct niu *np, int *link_up_p)
spin_lock_irqsave(&np->lock, flags);
- err = -EINVAL;
-
err = mii_read(np, np->phy_addr, MII_BMSR);
if (err < 0)
goto out;
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 87d361666cdd..14545a8797a8 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -55,6 +55,13 @@
#include <net/net_namespace.h>
#include <linux/u64_stats_sync.h>
+/* blackhole_netdev - a device used for dsts that are marked expired!
+ * This is global device (instead of per-net-ns) since it's not needed
+ * to be per-ns and gets initialized at boot time.
+ */
+struct net_device *blackhole_netdev;
+EXPORT_SYMBOL(blackhole_netdev);
+
/* The higher levels take care of making this non-reentrant (it's
* called with bh's disabled).
*/
@@ -150,12 +157,14 @@ static const struct net_device_ops loopback_ops = {
.ndo_set_mac_address = eth_mac_addr,
};
-/* The loopback device is special. There is only one instance
- * per network namespace.
- */
-static void loopback_setup(struct net_device *dev)
+static void gen_lo_setup(struct net_device *dev,
+ unsigned int mtu,
+ const struct ethtool_ops *eth_ops,
+ const struct header_ops *hdr_ops,
+ const struct net_device_ops *dev_ops,
+ void (*dev_destructor)(struct net_device *dev))
{
- dev->mtu = 64 * 1024;
+ dev->mtu = mtu;
dev->hard_header_len = ETH_HLEN; /* 14 */
dev->min_header_len = ETH_HLEN; /* 14 */
dev->addr_len = ETH_ALEN; /* 6 */
@@ -174,11 +183,20 @@ static void loopback_setup(struct net_device *dev)
| NETIF_F_NETNS_LOCAL
| NETIF_F_VLAN_CHALLENGED
| NETIF_F_LOOPBACK;
- dev->ethtool_ops = &loopback_ethtool_ops;
- dev->header_ops = &eth_header_ops;
- dev->netdev_ops = &loopback_ops;
+ dev->ethtool_ops = eth_ops;
+ dev->header_ops = hdr_ops;
+ dev->netdev_ops = dev_ops;
dev->needs_free_netdev = true;
- dev->priv_destructor = loopback_dev_free;
+ dev->priv_destructor = dev_destructor;
+}
+
+/* The loopback device is special. There is only one instance
+ * per network namespace.
+ */
+static void loopback_setup(struct net_device *dev)
+{
+ gen_lo_setup(dev, (64 * 1024), &loopback_ethtool_ops, &eth_header_ops,
+ &loopback_ops, loopback_dev_free);
}
/* Setup and register the loopback device. */
@@ -213,3 +231,45 @@ out:
struct pernet_operations __net_initdata loopback_net_ops = {
.init = loopback_net_init,
};
+
+/* blackhole netdevice */
+static netdev_tx_t blackhole_netdev_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ kfree_skb(skb);
+ net_warn_ratelimited("%s(): Dropping skb.\n", __func__);
+ return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops blackhole_netdev_ops = {
+ .ndo_start_xmit = blackhole_netdev_xmit,
+};
+
+/* This is a dst-dummy device used specifically for invalidated
+ * DSTs and unlike loopback, this is not per-ns.
+ */
+static void blackhole_netdev_setup(struct net_device *dev)
+{
+ gen_lo_setup(dev, ETH_MIN_MTU, NULL, NULL, &blackhole_netdev_ops, NULL);
+}
+
+/* Setup and register the blackhole_netdev. */
+static int __init blackhole_netdev_init(void)
+{
+ blackhole_netdev = alloc_netdev(0, "blackhole_dev", NET_NAME_UNKNOWN,
+ blackhole_netdev_setup);
+ if (!blackhole_netdev)
+ return -ENOMEM;
+
+ rtnl_lock();
+ dev_init_scheduler(blackhole_netdev);
+ dev_activate(blackhole_netdev);
+ rtnl_unlock();
+
+ blackhole_netdev->flags |= IFF_UP | IFF_RUNNING;
+ dev_net_set(blackhole_netdev, &init_net);
+
+ return 0;
+}
+
+device_initcall(blackhole_netdev_init);
diff --git a/drivers/net/ppp/ppp_mppe.c b/drivers/net/ppp/ppp_mppe.c
index ff61dd8748de..66c8e65f6872 100644
--- a/drivers/net/ppp/ppp_mppe.c
+++ b/drivers/net/ppp/ppp_mppe.c
@@ -63,6 +63,7 @@ MODULE_AUTHOR("Frank Cusack <[email protected]>");
MODULE_DESCRIPTION("Point-to-Point Protocol Microsoft Point-to-Point Encryption support");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS("ppp-compress-" __stringify(CI_MPPE));
+MODULE_SOFTDEP("pre: arc4");
MODULE_VERSION("1.0.2");
static unsigned int
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index f3422f85f604..abfa0da9bbd2 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2153,12 +2153,12 @@ static void team_setup(struct net_device *dev)
dev->features |= NETIF_F_NETNS_LOCAL;
dev->hw_features = TEAM_VLAN_FEATURES |
- NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_CTAG_FILTER;
dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4;
dev->features |= dev->hw_features;
+ dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
}
static int team_newlink(struct net *src_net, struct net_device *dev,
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index d080f8048e52..8b4ad10cf940 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1482,7 +1482,7 @@ static int qmi_wwan_probe(struct usb_interface *intf,
* different. Ignore the current interface if the number of endpoints
* equals the number for the diag interface (two).
*/
- info = (void *)&id->driver_info;
+ info = (void *)id->driver_info;
if (info->data & QMI_WWAN_QUIRK_QUECTEL_DYNCFG) {
if (desc->bNumEndpoints == 2)
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index c6916bf1017b..9f3c839f9e5f 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -38,6 +38,8 @@
#define VETH_XDP_TX BIT(0)
#define VETH_XDP_REDIR BIT(1)
+#define VETH_XDP_TX_BULK_SIZE 16
+
struct veth_rq_stats {
u64 xdp_packets;
u64 xdp_bytes;
@@ -64,6 +66,11 @@ struct veth_priv {
unsigned int requested_headroom;
};
+struct veth_xdp_tx_bq {
+ struct xdp_frame *q[VETH_XDP_TX_BULK_SIZE];
+ unsigned int count;
+};
+
/*
* ethtool interface
*/
@@ -442,13 +449,30 @@ drop:
return ret;
}
-static void veth_xdp_flush(struct net_device *dev)
+static void veth_xdp_flush_bq(struct net_device *dev, struct veth_xdp_tx_bq *bq)
+{
+ int sent, i, err = 0;
+
+ sent = veth_xdp_xmit(dev, bq->count, bq->q, 0);
+ if (sent < 0) {
+ err = sent;
+ sent = 0;
+ for (i = 0; i < bq->count; i++)
+ xdp_return_frame(bq->q[i]);
+ }
+ trace_xdp_bulk_tx(dev, sent, bq->count - sent, err);
+
+ bq->count = 0;
+}
+
+static void veth_xdp_flush(struct net_device *dev, struct veth_xdp_tx_bq *bq)
{
struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
struct net_device *rcv;
struct veth_rq *rq;
rcu_read_lock();
+ veth_xdp_flush_bq(dev, bq);
rcv = rcu_dereference(priv->peer);
if (unlikely(!rcv))
goto out;
@@ -464,19 +488,26 @@ out:
rcu_read_unlock();
}
-static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
+static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp,
+ struct veth_xdp_tx_bq *bq)
{
struct xdp_frame *frame = convert_to_xdp_frame(xdp);
if (unlikely(!frame))
return -EOVERFLOW;
- return veth_xdp_xmit(dev, 1, &frame, 0);
+ if (unlikely(bq->count == VETH_XDP_TX_BULK_SIZE))
+ veth_xdp_flush_bq(dev, bq);
+
+ bq->q[bq->count++] = frame;
+
+ return 0;
}
static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
struct xdp_frame *frame,
- unsigned int *xdp_xmit)
+ unsigned int *xdp_xmit,
+ struct veth_xdp_tx_bq *bq)
{
void *hard_start = frame->data - frame->headroom;
void *head = hard_start - sizeof(struct xdp_frame);
@@ -509,7 +540,7 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
orig_frame = *frame;
xdp.data_hard_start = head;
xdp.rxq->mem = frame->mem;
- if (unlikely(veth_xdp_tx(rq->dev, &xdp) < 0)) {
+ if (unlikely(veth_xdp_tx(rq->dev, &xdp, bq) < 0)) {
trace_xdp_exception(rq->dev, xdp_prog, act);
frame = &orig_frame;
goto err_xdp;
@@ -560,7 +591,8 @@ xdp_xmit:
}
static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb,
- unsigned int *xdp_xmit)
+ unsigned int *xdp_xmit,
+ struct veth_xdp_tx_bq *bq)
{
u32 pktlen, headroom, act, metalen;
void *orig_data, *orig_data_end;
@@ -636,7 +668,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb,
get_page(virt_to_page(xdp.data));
consume_skb(skb);
xdp.rxq->mem = rq->xdp_mem;
- if (unlikely(veth_xdp_tx(rq->dev, &xdp) < 0)) {
+ if (unlikely(veth_xdp_tx(rq->dev, &xdp, bq) < 0)) {
trace_xdp_exception(rq->dev, xdp_prog, act);
goto err_xdp;
}
@@ -691,7 +723,8 @@ xdp_xmit:
return NULL;
}
-static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit)
+static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit,
+ struct veth_xdp_tx_bq *bq)
{
int i, done = 0, drops = 0, bytes = 0;
@@ -707,11 +740,11 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit)
struct xdp_frame *frame = veth_ptr_to_xdp(ptr);
bytes += frame->len;
- skb = veth_xdp_rcv_one(rq, frame, &xdp_xmit_one);
+ skb = veth_xdp_rcv_one(rq, frame, &xdp_xmit_one, bq);
} else {
skb = ptr;
bytes += skb->len;
- skb = veth_xdp_rcv_skb(rq, skb, &xdp_xmit_one);
+ skb = veth_xdp_rcv_skb(rq, skb, &xdp_xmit_one, bq);
}
*xdp_xmit |= xdp_xmit_one;
@@ -737,10 +770,13 @@ static int veth_poll(struct napi_struct *napi, int budget)
struct veth_rq *rq =
container_of(napi, struct veth_rq, xdp_napi);
unsigned int xdp_xmit = 0;
+ struct veth_xdp_tx_bq bq;
int done;
+ bq.count = 0;
+
xdp_set_return_frame_no_direct();
- done = veth_xdp_rcv(rq, budget, &xdp_xmit);
+ done = veth_xdp_rcv(rq, budget, &xdp_xmit, &bq);
if (done < budget && napi_complete_done(napi, done)) {
/* Write rx_notify_masked before reading ptr_ring */
@@ -752,7 +788,7 @@ static int veth_poll(struct napi_struct *napi, int budget)
}
if (xdp_xmit & VETH_XDP_TX)
- veth_xdp_flush(rq->dev);
+ veth_xdp_flush(rq->dev, &bq);
if (xdp_xmit & VETH_XDP_REDIR)
xdp_do_flush_map();
xdp_clear_return_frame_no_direct();
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 69ef9cce5858..54edf8956a25 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -350,8 +350,8 @@ static int vrf_finish_output6(struct net *net, struct sock *sk,
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
+ const struct in6_addr *nexthop;
struct neighbour *neigh;
- struct in6_addr *nexthop;
int ret;
nf_reset(skb);
diff --git a/drivers/nfc/st-nci/i2c.c b/drivers/nfc/st-nci/i2c.c
index 67a685adfd44..55d600cd3861 100644
--- a/drivers/nfc/st-nci/i2c.c
+++ b/drivers/nfc/st-nci/i2c.c
@@ -72,7 +72,7 @@ static void st_nci_i2c_disable(void *phy_id)
*/
static int st_nci_i2c_write(void *phy_id, struct sk_buff *skb)
{
- int r = -1;
+ int r;
struct st_nci_i2c_phy *phy = phy_id;
struct i2c_client *client = phy->i2c_dev;
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index a98126ad9c3a..a4994aa3acc0 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -18,6 +18,7 @@
#include <linux/percpu-refcount.h>
#include <linux/random.h>
#include <linux/seq_buf.h>
+#include <linux/iommu.h>
struct pci_p2pdma {
struct gen_pool *pool;
@@ -299,6 +300,9 @@ static bool root_complex_whitelist(struct pci_dev *dev)
struct pci_dev *root = pci_get_slot(host->bus, PCI_DEVFN(0, 0));
unsigned short vendor, device;
+ if (iommu_present(dev->dev.bus))
+ return false;
+
if (!root)
return false;
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 5bcdede5e955..c7ee07ce3615 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -25,6 +25,8 @@
#include <linux/wait.h>
#include <linux/workqueue.h>
+#include <net/dst.h>
+#include <net/ip6_fib.h>
#include <net/ipv6.h>
#include <net/if_inet6.h>
#include <net/addrconf.h>
@@ -60,7 +62,7 @@ struct qeth_dbf_info {
debug_info_t *id;
};
-#define QETH_DBF_CTRL_LEN 256
+#define QETH_DBF_CTRL_LEN 256U
#define QETH_DBF_TEXT(name, level, text) \
debug_text_event(qeth_dbf[QETH_DBF_##name].id, level, text)
@@ -525,11 +527,6 @@ struct qeth_qdio_info {
};
/**
- * buffer stuff for read channel
- */
-#define QETH_CMD_BUFFER_NO 8
-
-/**
* channel state machine
*/
enum qeth_channel_states {
@@ -551,16 +548,11 @@ enum qeth_card_states {
* Protocol versions
*/
enum qeth_prot_versions {
+ QETH_PROT_NONE = 0x0000,
QETH_PROT_IPV4 = 0x0004,
QETH_PROT_IPV6 = 0x0006,
};
-enum qeth_cmd_buffer_state {
- BUF_STATE_FREE,
- BUF_STATE_LOCKED,
- BUF_STATE_MALLOC,
-};
-
enum qeth_cq {
QETH_CQ_DISABLED = 0,
QETH_CQ_ENABLED = 1,
@@ -574,18 +566,20 @@ struct qeth_ipato {
struct list_head entries;
};
-struct qeth_channel;
+struct qeth_channel {
+ struct ccw_device *ccwdev;
+ enum qeth_channel_states state;
+ atomic_t irq_pending;
+};
struct qeth_cmd_buffer {
- enum qeth_cmd_buffer_state state;
unsigned int length;
refcount_t ref_count;
struct qeth_channel *channel;
struct qeth_reply *reply;
long timeout;
unsigned char *data;
- void (*finalize)(struct qeth_card *card, struct qeth_cmd_buffer *iob,
- unsigned int length);
+ void (*finalize)(struct qeth_card *card, struct qeth_cmd_buffer *iob);
void (*callback)(struct qeth_card *card, struct qeth_cmd_buffer *iob);
};
@@ -599,25 +593,8 @@ static inline struct qeth_ipa_cmd *__ipa_cmd(struct qeth_cmd_buffer *iob)
return (struct qeth_ipa_cmd *)(iob->data + IPA_PDU_HEADER_SIZE);
}
-/**
- * definition of a qeth channel, used for read and write
- */
-struct qeth_channel {
- enum qeth_channel_states state;
- struct ccw1 *ccw;
- spinlock_t iob_lock;
- wait_queue_head_t wait_q;
- struct ccw_device *ccwdev;
-/*command buffer for control data*/
- struct qeth_cmd_buffer iob[QETH_CMD_BUFFER_NO];
- atomic_t irq_pending;
- int io_buf_no;
-};
-
static inline struct ccw1 *__ccw_from_cmd(struct qeth_cmd_buffer *iob)
{
- if (iob->state != BUF_STATE_MALLOC)
- return iob->channel->ccw;
return (struct ccw1 *)(iob->data + ALIGN(iob->length, 8));
}
@@ -738,9 +715,6 @@ struct qeth_discipline {
void (*remove) (struct ccwgroup_device *);
int (*set_online) (struct ccwgroup_device *);
int (*set_offline) (struct ccwgroup_device *);
- int (*freeze)(struct ccwgroup_device *);
- int (*thaw) (struct ccwgroup_device *);
- int (*restore)(struct ccwgroup_device *);
int (*do_ioctl)(struct net_device *dev, struct ifreq *rq, int cmd);
int (*control_event_handler)(struct qeth_card *card,
struct qeth_ipa_cmd *cmd);
@@ -905,6 +879,17 @@ static inline int qeth_get_ether_cast_type(struct sk_buff *skb)
return RTN_UNICAST;
}
+static inline struct dst_entry *qeth_dst_check_rcu(struct sk_buff *skb, int ipv)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct rt6_info *rt;
+
+ rt = (struct rt6_info *) dst;
+ if (dst)
+ dst = dst_check(dst, (ipv == 6) ? rt6_get_cookie(rt) : 0);
+ return dst;
+}
+
static inline void qeth_rx_csum(struct qeth_card *card, struct sk_buff *skb,
u8 flags)
{
@@ -939,12 +924,12 @@ static inline int qeth_is_diagass_supported(struct qeth_card *card,
int qeth_send_simple_setassparms_prot(struct qeth_card *card,
enum qeth_ipa_funcs ipa_func,
- u16 cmd_code, long data,
+ u16 cmd_code, u32 *data,
enum qeth_prot_versions prot);
/* IPv4 variant */
static inline int qeth_send_simple_setassparms(struct qeth_card *card,
enum qeth_ipa_funcs ipa_func,
- u16 cmd_code, long data)
+ u16 cmd_code, u32 *data)
{
return qeth_send_simple_setassparms_prot(card, ipa_func, cmd_code,
data, QETH_PROT_IPV4);
@@ -952,7 +937,7 @@ static inline int qeth_send_simple_setassparms(struct qeth_card *card,
static inline int qeth_send_simple_setassparms_v6(struct qeth_card *card,
enum qeth_ipa_funcs ipa_func,
- u16 cmd_code, long data)
+ u16 cmd_code, u32 *data)
{
return qeth_send_simple_setassparms_prot(card, ipa_func, cmd_code,
data, QETH_PROT_IPV6);
@@ -993,8 +978,23 @@ int qeth_send_ipa_cmd(struct qeth_card *, struct qeth_cmd_buffer *,
int (*reply_cb)
(struct qeth_card *, struct qeth_reply *, unsigned long),
void *);
-struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *,
- enum qeth_ipa_cmds, enum qeth_prot_versions);
+struct qeth_cmd_buffer *qeth_ipa_alloc_cmd(struct qeth_card *card,
+ enum qeth_ipa_cmds cmd_code,
+ enum qeth_prot_versions prot,
+ unsigned int data_length);
+struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel,
+ unsigned int length, unsigned int ccws,
+ long timeout);
+struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *card,
+ enum qeth_ipa_funcs ipa_func,
+ u16 cmd_code,
+ unsigned int data_length,
+ enum qeth_prot_versions prot);
+struct qeth_cmd_buffer *qeth_get_diag_cmd(struct qeth_card *card,
+ enum qeth_diags_cmds sub_cmd,
+ unsigned int data_length);
+void qeth_put_cmd(struct qeth_cmd_buffer *iob);
+
struct sk_buff *qeth_core_get_next_skb(struct qeth_card *,
struct qeth_qdio_buffer *, struct qdio_buffer_element **, int *,
struct qeth_hdr **);
@@ -1003,16 +1003,13 @@ int qeth_poll(struct napi_struct *napi, int budget);
void qeth_clear_ipacmd_list(struct qeth_card *);
int qeth_qdio_clear_card(struct qeth_card *, int);
void qeth_clear_working_pool_list(struct qeth_card *);
-void qeth_clear_cmd_buffers(struct qeth_channel *);
void qeth_drain_output_queues(struct qeth_card *card);
void qeth_setadp_promisc_mode(struct qeth_card *);
int qeth_setadpparms_change_macaddr(struct qeth_card *);
void qeth_tx_timeout(struct net_device *);
-void qeth_release_buffer(struct qeth_cmd_buffer *iob);
void qeth_notify_reply(struct qeth_reply *reply, int reason);
void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob,
u16 cmd_length);
-struct qeth_cmd_buffer *qeth_get_buffer(struct qeth_channel *channel);
int qeth_query_switch_attributes(struct qeth_card *card,
struct qeth_switch_info *sw_info);
int qeth_query_card_info(struct qeth_card *card,
@@ -1029,10 +1026,6 @@ int qeth_configure_cq(struct qeth_card *, enum qeth_cq);
int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action);
void qeth_trace_features(struct qeth_card *);
int qeth_setassparms_cb(struct qeth_card *, struct qeth_reply *, unsigned long);
-struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *,
- enum qeth_ipa_funcs,
- __u16, __u16,
- enum qeth_prot_versions);
int qeth_set_features(struct net_device *, netdev_features_t);
void qeth_enable_hw_features(struct net_device *dev);
netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t);
@@ -1047,11 +1040,10 @@ int qeth_stop(struct net_device *dev);
int qeth_vm_request_mac(struct qeth_card *card);
int qeth_xmit(struct qeth_card *card, struct sk_buff *skb,
- struct qeth_qdio_out_q *queue, int ipv, int cast_type,
+ struct qeth_qdio_out_q *queue, int ipv,
void (*fill_header)(struct qeth_qdio_out_q *queue,
struct qeth_hdr *hdr, struct sk_buff *skb,
- int ipv, int cast_type,
- unsigned int data_len));
+ int ipv, unsigned int data_len));
/* exports for OSN */
int qeth_osn_assist(struct net_device *, void *, int);
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index fe3dfeaf5ceb..4d0caeebc802 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -517,7 +517,7 @@ static int __qeth_issue_next_read(struct qeth_card *card)
QETH_DBF_MESSAGE(2, "error %i on device %x when starting next read ccw!\n",
rc, CARD_DEVID(card));
atomic_set(&channel->irq_pending, 0);
- qeth_release_buffer(iob);
+ qeth_put_cmd(iob);
card->read_or_write_problem = 1;
qeth_schedule_recovery(card);
wake_up(&card->wait_q);
@@ -689,7 +689,7 @@ static int qeth_check_idx_response(struct qeth_card *card,
return 0;
}
-static void qeth_put_cmd(struct qeth_cmd_buffer *iob)
+void qeth_put_cmd(struct qeth_cmd_buffer *iob)
{
if (refcount_dec_and_test(&iob->ref_count)) {
if (iob->reply)
@@ -698,53 +698,12 @@ static void qeth_put_cmd(struct qeth_cmd_buffer *iob)
kfree(iob);
}
}
-
-static struct qeth_cmd_buffer *__qeth_get_buffer(struct qeth_channel *channel)
-{
- __u8 index;
-
- index = channel->io_buf_no;
- do {
- if (channel->iob[index].state == BUF_STATE_FREE) {
- channel->iob[index].state = BUF_STATE_LOCKED;
- channel->iob[index].timeout = QETH_TIMEOUT;
- channel->io_buf_no = (channel->io_buf_no + 1) %
- QETH_CMD_BUFFER_NO;
- memset(channel->iob[index].data, 0, QETH_BUFSIZE);
- return channel->iob + index;
- }
- index = (index + 1) % QETH_CMD_BUFFER_NO;
- } while (index != channel->io_buf_no);
-
- return NULL;
-}
-
-void qeth_release_buffer(struct qeth_cmd_buffer *iob)
-{
- struct qeth_channel *channel = iob->channel;
- unsigned long flags;
-
- if (iob->state == BUF_STATE_MALLOC) {
- qeth_put_cmd(iob);
- return;
- }
-
- spin_lock_irqsave(&channel->iob_lock, flags);
- iob->state = BUF_STATE_FREE;
- iob->callback = NULL;
- if (iob->reply) {
- qeth_put_reply(iob->reply);
- iob->reply = NULL;
- }
- spin_unlock_irqrestore(&channel->iob_lock, flags);
- wake_up(&channel->wait_q);
-}
-EXPORT_SYMBOL_GPL(qeth_release_buffer);
+EXPORT_SYMBOL_GPL(qeth_put_cmd);
static void qeth_release_buffer_cb(struct qeth_card *card,
struct qeth_cmd_buffer *iob)
{
- qeth_release_buffer(iob);
+ qeth_put_cmd(iob);
}
static void qeth_cancel_cmd(struct qeth_cmd_buffer *iob, int rc)
@@ -753,24 +712,12 @@ static void qeth_cancel_cmd(struct qeth_cmd_buffer *iob, int rc)
if (reply)
qeth_notify_reply(reply, rc);
- qeth_release_buffer(iob);
-}
-
-struct qeth_cmd_buffer *qeth_get_buffer(struct qeth_channel *channel)
-{
- struct qeth_cmd_buffer *buffer = NULL;
- unsigned long flags;
-
- spin_lock_irqsave(&channel->iob_lock, flags);
- buffer = __qeth_get_buffer(channel);
- spin_unlock_irqrestore(&channel->iob_lock, flags);
- return buffer;
+ qeth_put_cmd(iob);
}
-EXPORT_SYMBOL_GPL(qeth_get_buffer);
-static struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel,
- unsigned int length,
- unsigned int ccws, long timeout)
+struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel,
+ unsigned int length, unsigned int ccws,
+ long timeout)
{
struct qeth_cmd_buffer *iob;
@@ -788,23 +735,13 @@ static struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel,
return NULL;
}
- iob->state = BUF_STATE_MALLOC;
refcount_set(&iob->ref_count, 1);
iob->channel = channel;
iob->timeout = timeout;
iob->length = length;
return iob;
}
-
-void qeth_clear_cmd_buffers(struct qeth_channel *channel)
-{
- int cnt;
-
- for (cnt = 0; cnt < QETH_CMD_BUFFER_NO; cnt++)
- qeth_release_buffer(&channel->iob[cnt]);
- channel->io_buf_no = 0;
-}
-EXPORT_SYMBOL_GPL(qeth_clear_cmd_buffers);
+EXPORT_SYMBOL_GPL(qeth_alloc_cmd);
static void qeth_issue_next_read_cb(struct qeth_card *card,
struct qeth_cmd_buffer *iob)
@@ -879,7 +816,7 @@ out:
memcpy(&card->seqno.pdu_hdr_ack,
QETH_PDU_HEADER_SEQ_NO(iob->data),
QETH_SEQ_NO_LENGTH);
- qeth_release_buffer(iob);
+ qeth_put_cmd(iob);
__qeth_issue_next_read(card);
}
@@ -1229,56 +1166,26 @@ static void qeth_free_buffer_pool(struct qeth_card *card)
static void qeth_clean_channel(struct qeth_channel *channel)
{
struct ccw_device *cdev = channel->ccwdev;
- int cnt;
QETH_DBF_TEXT(SETUP, 2, "freech");
spin_lock_irq(get_ccwdev_lock(cdev));
cdev->handler = NULL;
spin_unlock_irq(get_ccwdev_lock(cdev));
-
- for (cnt = 0; cnt < QETH_CMD_BUFFER_NO; cnt++)
- kfree(channel->iob[cnt].data);
- kfree(channel->ccw);
}
-static int qeth_setup_channel(struct qeth_channel *channel, bool alloc_buffers)
+static void qeth_setup_channel(struct qeth_channel *channel)
{
struct ccw_device *cdev = channel->ccwdev;
- int cnt;
QETH_DBF_TEXT(SETUP, 2, "setupch");
- channel->ccw = kmalloc(sizeof(struct ccw1), GFP_KERNEL | GFP_DMA);
- if (!channel->ccw)
- return -ENOMEM;
channel->state = CH_STATE_DOWN;
atomic_set(&channel->irq_pending, 0);
- init_waitqueue_head(&channel->wait_q);
spin_lock_irq(get_ccwdev_lock(cdev));
cdev->handler = qeth_irq;
spin_unlock_irq(get_ccwdev_lock(cdev));
-
- if (!alloc_buffers)
- return 0;
-
- for (cnt = 0; cnt < QETH_CMD_BUFFER_NO; cnt++) {
- channel->iob[cnt].data = kmalloc(QETH_BUFSIZE,
- GFP_KERNEL | GFP_DMA);
- if (channel->iob[cnt].data == NULL)
- break;
- channel->iob[cnt].state = BUF_STATE_FREE;
- channel->iob[cnt].channel = channel;
- }
- if (cnt < QETH_CMD_BUFFER_NO) {
- qeth_clean_channel(channel);
- return -ENOMEM;
- }
- channel->io_buf_no = 0;
- spin_lock_init(&channel->iob_lock);
-
- return 0;
}
static int qeth_osa_set_output_queues(struct qeth_card *card, bool single)
@@ -1452,22 +1359,14 @@ static struct qeth_card *qeth_alloc_card(struct ccwgroup_device *gdev)
card->read_cmd = qeth_alloc_cmd(&card->read, QETH_BUFSIZE, 1, 0);
if (!card->read_cmd)
goto out_read_cmd;
- if (qeth_setup_channel(&card->read, false))
- goto out_read;
- if (qeth_setup_channel(&card->write, true))
- goto out_write;
- if (qeth_setup_channel(&card->data, false))
- goto out_data;
+
+ qeth_setup_channel(&card->read);
+ qeth_setup_channel(&card->write);
+ qeth_setup_channel(&card->data);
card->qeth_service_level.seq_print = qeth_core_sl_print;
register_service_level(&card->qeth_service_level);
return card;
-out_data:
- qeth_clean_channel(&card->write);
-out_write:
- qeth_clean_channel(&card->read);
-out_read:
- qeth_release_buffer(card->read_cmd);
out_read_cmd:
destroy_workqueue(card->event_wq);
out_wq:
@@ -1715,8 +1614,7 @@ static void qeth_init_func_level(struct qeth_card *card)
}
static void qeth_idx_finalize_cmd(struct qeth_card *card,
- struct qeth_cmd_buffer *iob,
- unsigned int length)
+ struct qeth_cmd_buffer *iob)
{
memcpy(QETH_TRANSPORT_HEADER_SEQ_NO(iob->data), &card->seqno.trans_hdr,
QETH_SEQ_NO_LENGTH);
@@ -1734,12 +1632,9 @@ static int qeth_peer_func_level(int level)
}
static void qeth_mpc_finalize_cmd(struct qeth_card *card,
- struct qeth_cmd_buffer *iob,
- unsigned int length)
+ struct qeth_cmd_buffer *iob)
{
- qeth_setup_ccw(__ccw_from_cmd(iob), CCW_CMD_WRITE, 0, length,
- iob->data);
- qeth_idx_finalize_cmd(card, iob, length);
+ qeth_idx_finalize_cmd(card, iob);
memcpy(QETH_PDU_HEADER_SEQ_NO(iob->data),
&card->seqno.pdu_hdr, QETH_SEQ_NO_LENGTH);
@@ -1751,20 +1646,26 @@ static void qeth_mpc_finalize_cmd(struct qeth_card *card,
iob->callback = qeth_release_buffer_cb;
}
-static struct qeth_cmd_buffer *qeth_mpc_get_cmd_buffer(struct qeth_card *card)
+static struct qeth_cmd_buffer *qeth_mpc_alloc_cmd(struct qeth_card *card,
+ void *data,
+ unsigned int data_length)
{
struct qeth_cmd_buffer *iob;
- iob = qeth_get_buffer(&card->write);
- if (iob)
- iob->finalize = qeth_mpc_finalize_cmd;
+ iob = qeth_alloc_cmd(&card->write, data_length, 1, QETH_TIMEOUT);
+ if (!iob)
+ return NULL;
+
+ memcpy(iob->data, data, data_length);
+ qeth_setup_ccw(__ccw_from_cmd(iob), CCW_CMD_WRITE, 0, data_length,
+ iob->data);
+ iob->finalize = qeth_mpc_finalize_cmd;
return iob;
}
/**
* qeth_send_control_data() - send control command to the card
* @card: qeth_card structure pointer
- * @len: size of the command buffer
* @iob: qeth_cmd_buffer pointer
* @reply_cb: callback function pointer
* @cb_card: pointer to the qeth_card structure
@@ -1784,7 +1685,7 @@ static struct qeth_cmd_buffer *qeth_mpc_get_cmd_buffer(struct qeth_card *card)
* field 'param' of the structure qeth_reply.
*/
-static int qeth_send_control_data(struct qeth_card *card, int len,
+static int qeth_send_control_data(struct qeth_card *card,
struct qeth_cmd_buffer *iob,
int (*reply_cb)(struct qeth_card *cb_card,
struct qeth_reply *cb_reply,
@@ -1800,13 +1701,13 @@ static int qeth_send_control_data(struct qeth_card *card, int len,
reply = qeth_alloc_reply(card);
if (!reply) {
- qeth_release_buffer(iob);
+ qeth_put_cmd(iob);
return -ENOMEM;
}
reply->callback = reply_cb;
reply->param = reply_param;
- /* pairs with qeth_release_buffer(): */
+ /* pairs with qeth_put_cmd(): */
qeth_get_reply(reply);
iob->reply = reply;
@@ -1815,13 +1716,13 @@ static int qeth_send_control_data(struct qeth_card *card, int len,
timeout);
if (timeout <= 0) {
qeth_put_reply(reply);
- qeth_release_buffer(iob);
+ qeth_put_cmd(iob);
return (timeout == -ERESTARTSYS) ? -EINTR : -ETIME;
}
if (iob->finalize)
- iob->finalize(card, iob, len);
- QETH_DBF_HEX(CTRL, 2, iob->data, min(len, QETH_DBF_CTRL_LEN));
+ iob->finalize(card, iob);
+ QETH_DBF_HEX(CTRL, 2, iob->data, min(iob->length, QETH_DBF_CTRL_LEN));
qeth_enqueue_reply(card, reply);
@@ -1836,7 +1737,7 @@ static int qeth_send_control_data(struct qeth_card *card, int len,
QETH_CARD_TEXT_(card, 2, " err%d", rc);
qeth_dequeue_reply(card, reply);
qeth_put_reply(reply);
- qeth_release_buffer(iob);
+ qeth_put_cmd(iob);
atomic_set(&channel->irq_pending, 0);
wake_up(&card->wait_q);
return rc;
@@ -1869,7 +1770,7 @@ static void qeth_read_conf_data_cb(struct qeth_card *card,
prcd[76] >= 0xF1 && prcd[76] <= 0xF4;
qeth_notify_reply(iob->reply, 0);
- qeth_release_buffer(iob);
+ qeth_put_cmd(iob);
}
static int qeth_read_conf_data(struct qeth_card *card)
@@ -1891,7 +1792,7 @@ static int qeth_read_conf_data(struct qeth_card *card)
qeth_setup_ccw(__ccw_from_cmd(iob), ciw->cmd, 0, iob->length,
iob->data);
- return qeth_send_control_data(card, iob->length, iob, NULL, NULL);
+ return qeth_send_control_data(card, iob, NULL, NULL);
}
static int qeth_idx_check_activate_response(struct qeth_card *card,
@@ -1958,7 +1859,7 @@ static void qeth_idx_activate_read_channel_cb(struct qeth_card *card,
out:
qeth_notify_reply(iob->reply, rc);
- qeth_release_buffer(iob);
+ qeth_put_cmd(iob);
}
static void qeth_idx_activate_write_channel_cb(struct qeth_card *card,
@@ -1985,7 +1886,7 @@ static void qeth_idx_activate_write_channel_cb(struct qeth_card *card,
out:
qeth_notify_reply(iob->reply, rc);
- qeth_release_buffer(iob);
+ qeth_put_cmd(iob);
}
static void qeth_idx_setup_activate_cmd(struct qeth_card *card,
@@ -2027,7 +1928,7 @@ static int qeth_idx_activate_read_channel(struct qeth_card *card)
qeth_idx_setup_activate_cmd(card, iob);
iob->callback = qeth_idx_activate_read_channel_cb;
- rc = qeth_send_control_data(card, IDX_ACTIVATE_SIZE, iob, NULL, NULL);
+ rc = qeth_send_control_data(card, iob, NULL, NULL);
if (rc)
return rc;
@@ -2051,7 +1952,7 @@ static int qeth_idx_activate_write_channel(struct qeth_card *card)
qeth_idx_setup_activate_cmd(card, iob);
iob->callback = qeth_idx_activate_write_channel_cb;
- rc = qeth_send_control_data(card, IDX_ACTIVATE_SIZE, iob, NULL, NULL);
+ rc = qeth_send_control_data(card, iob, NULL, NULL);
if (rc)
return rc;
@@ -2075,24 +1976,20 @@ static int qeth_cm_enable_cb(struct qeth_card *card, struct qeth_reply *reply,
static int qeth_cm_enable(struct qeth_card *card)
{
- int rc;
struct qeth_cmd_buffer *iob;
QETH_CARD_TEXT(card, 2, "cmenable");
- iob = qeth_mpc_get_cmd_buffer(card);
+ iob = qeth_mpc_alloc_cmd(card, CM_ENABLE, CM_ENABLE_SIZE);
if (!iob)
return -ENOMEM;
- memcpy(iob->data, CM_ENABLE, CM_ENABLE_SIZE);
memcpy(QETH_CM_ENABLE_ISSUER_RM_TOKEN(iob->data),
&card->token.issuer_rm_r, QETH_MPC_TOKEN_LENGTH);
memcpy(QETH_CM_ENABLE_FILTER_TOKEN(iob->data),
&card->token.cm_filter_w, QETH_MPC_TOKEN_LENGTH);
- rc = qeth_send_control_data(card, CM_ENABLE_SIZE, iob,
- qeth_cm_enable_cb, NULL);
- return rc;
+ return qeth_send_control_data(card, iob, qeth_cm_enable_cb, NULL);
}
static int qeth_cm_setup_cb(struct qeth_card *card, struct qeth_reply *reply,
@@ -2111,25 +2008,21 @@ static int qeth_cm_setup_cb(struct qeth_card *card, struct qeth_reply *reply,
static int qeth_cm_setup(struct qeth_card *card)
{
- int rc;
struct qeth_cmd_buffer *iob;
QETH_CARD_TEXT(card, 2, "cmsetup");
- iob = qeth_mpc_get_cmd_buffer(card);
+ iob = qeth_mpc_alloc_cmd(card, CM_SETUP, CM_SETUP_SIZE);
if (!iob)
return -ENOMEM;
- memcpy(iob->data, CM_SETUP, CM_SETUP_SIZE);
memcpy(QETH_CM_SETUP_DEST_ADDR(iob->data),
&card->token.issuer_rm_r, QETH_MPC_TOKEN_LENGTH);
memcpy(QETH_CM_SETUP_CONNECTION_TOKEN(iob->data),
&card->token.cm_connection_w, QETH_MPC_TOKEN_LENGTH);
memcpy(QETH_CM_SETUP_FILTER_TOKEN(iob->data),
&card->token.cm_filter_r, QETH_MPC_TOKEN_LENGTH);
- rc = qeth_send_control_data(card, CM_SETUP_SIZE, iob,
- qeth_cm_setup_cb, NULL);
- return rc;
+ return qeth_send_control_data(card, iob, qeth_cm_setup_cb, NULL);
}
static int qeth_update_max_mtu(struct qeth_card *card, unsigned int max_mtu)
@@ -2235,19 +2128,17 @@ static int qeth_ulp_enable(struct qeth_card *card)
QETH_CARD_TEXT(card, 2, "ulpenabl");
- iob = qeth_mpc_get_cmd_buffer(card);
+ iob = qeth_mpc_alloc_cmd(card, ULP_ENABLE, ULP_ENABLE_SIZE);
if (!iob)
return -ENOMEM;
- memcpy(iob->data, ULP_ENABLE, ULP_ENABLE_SIZE);
*(QETH_ULP_ENABLE_LINKNUM(iob->data)) = (u8) card->dev->dev_port;
memcpy(QETH_ULP_ENABLE_PROT_TYPE(iob->data), &prot_type, 1);
memcpy(QETH_ULP_ENABLE_DEST_ADDR(iob->data),
&card->token.cm_connection_r, QETH_MPC_TOKEN_LENGTH);
memcpy(QETH_ULP_ENABLE_FILTER_TOKEN(iob->data),
&card->token.ulp_filter_w, QETH_MPC_TOKEN_LENGTH);
- rc = qeth_send_control_data(card, ULP_ENABLE_SIZE, iob,
- qeth_ulp_enable_cb, &max_mtu);
+ rc = qeth_send_control_data(card, iob, qeth_ulp_enable_cb, &max_mtu);
if (rc)
return rc;
return qeth_update_max_mtu(card, max_mtu);
@@ -2276,18 +2167,16 @@ static int qeth_ulp_setup_cb(struct qeth_card *card, struct qeth_reply *reply,
static int qeth_ulp_setup(struct qeth_card *card)
{
- int rc;
__u16 temp;
struct qeth_cmd_buffer *iob;
struct ccw_dev_id dev_id;
QETH_CARD_TEXT(card, 2, "ulpsetup");
- iob = qeth_mpc_get_cmd_buffer(card);
+ iob = qeth_mpc_alloc_cmd(card, ULP_SETUP, ULP_SETUP_SIZE);
if (!iob)
return -ENOMEM;
- memcpy(iob->data, ULP_SETUP, ULP_SETUP_SIZE);
memcpy(QETH_ULP_SETUP_DEST_ADDR(iob->data),
&card->token.cm_connection_r, QETH_MPC_TOKEN_LENGTH);
memcpy(QETH_ULP_SETUP_CONNECTION_TOKEN(iob->data),
@@ -2299,9 +2188,7 @@ static int qeth_ulp_setup(struct qeth_card *card)
memcpy(QETH_ULP_SETUP_CUA(iob->data), &dev_id.devno, 2);
temp = (card->info.cula << 8) + card->info.unit_addr2;
memcpy(QETH_ULP_SETUP_REAL_DEVADDR(iob->data), &temp, 2);
- rc = qeth_send_control_data(card, ULP_SETUP_SIZE, iob,
- qeth_ulp_setup_cb, NULL);
- return rc;
+ return qeth_send_control_data(card, iob, qeth_ulp_setup_cb, NULL);
}
static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx)
@@ -2468,22 +2355,19 @@ static int qeth_qdio_activate(struct qeth_card *card)
static int qeth_dm_act(struct qeth_card *card)
{
- int rc;
struct qeth_cmd_buffer *iob;
QETH_CARD_TEXT(card, 2, "dmact");
- iob = qeth_mpc_get_cmd_buffer(card);
+ iob = qeth_mpc_alloc_cmd(card, DM_ACT, DM_ACT_SIZE);
if (!iob)
return -ENOMEM;
- memcpy(iob->data, DM_ACT, DM_ACT_SIZE);
memcpy(QETH_DM_ACT_DEST_ADDR(iob->data),
&card->token.cm_connection_r, QETH_MPC_TOKEN_LENGTH);
memcpy(QETH_DM_ACT_CONNECTION_TOKEN(iob->data),
&card->token.ulp_connection_r, QETH_MPC_TOKEN_LENGTH);
- rc = qeth_send_control_data(card, DM_ACT_SIZE, iob, NULL, NULL);
- return rc;
+ return qeth_send_control_data(card, iob, NULL, NULL);
}
static int qeth_mpc_initialize(struct qeth_card *card)
@@ -2728,36 +2612,10 @@ int qeth_init_qdio_queues(struct qeth_card *card)
}
EXPORT_SYMBOL_GPL(qeth_init_qdio_queues);
-static __u8 qeth_get_ipa_adp_type(enum qeth_link_types link_type)
-{
- switch (link_type) {
- case QETH_LINK_TYPE_HSTR:
- return 2;
- default:
- return 1;
- }
-}
-
-static void qeth_fill_ipacmd_header(struct qeth_card *card,
- struct qeth_ipa_cmd *cmd,
- enum qeth_ipa_cmds command,
- enum qeth_prot_versions prot)
-{
- cmd->hdr.command = command;
- cmd->hdr.initiator = IPA_CMD_INITIATOR_HOST;
- /* cmd->hdr.seqno is set by qeth_send_control_data() */
- cmd->hdr.adapter_type = qeth_get_ipa_adp_type(card->info.link_type);
- cmd->hdr.rel_adapter_no = (u8) card->dev->dev_port;
- cmd->hdr.prim_version_no = IS_LAYER2(card) ? 2 : 1;
- cmd->hdr.param_count = 1;
- cmd->hdr.prot_version = prot;
-}
-
static void qeth_ipa_finalize_cmd(struct qeth_card *card,
- struct qeth_cmd_buffer *iob,
- unsigned int length)
+ struct qeth_cmd_buffer *iob)
{
- qeth_mpc_finalize_cmd(card, iob, length);
+ qeth_mpc_finalize_cmd(card, iob);
/* override with IPA-specific values: */
__ipa_cmd(iob)->hdr.seqno = card->seqno.ipa;
@@ -2767,11 +2625,12 @@ static void qeth_ipa_finalize_cmd(struct qeth_card *card,
void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob,
u16 cmd_length)
{
- u16 total_length = IPA_PDU_HEADER_SIZE + cmd_length;
u8 prot_type = qeth_mpc_select_prot_type(card);
+ u16 total_length = iob->length;
+ qeth_setup_ccw(__ccw_from_cmd(iob), CCW_CMD_WRITE, 0, total_length,
+ iob->data);
iob->finalize = qeth_ipa_finalize_cmd;
- iob->timeout = QETH_IPA_TIMEOUT;
memcpy(iob->data, IPA_PDU_HEADER, IPA_PDU_HEADER_SIZE);
memcpy(QETH_IPA_PDU_LEN_TOTAL(iob->data), &total_length, 2);
@@ -2784,25 +2643,35 @@ void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob,
}
EXPORT_SYMBOL_GPL(qeth_prepare_ipa_cmd);
-struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *card,
- enum qeth_ipa_cmds ipacmd, enum qeth_prot_versions prot)
+struct qeth_cmd_buffer *qeth_ipa_alloc_cmd(struct qeth_card *card,
+ enum qeth_ipa_cmds cmd_code,
+ enum qeth_prot_versions prot,
+ unsigned int data_length)
{
+ enum qeth_link_types link_type = card->info.link_type;
struct qeth_cmd_buffer *iob;
+ struct qeth_ipacmd_hdr *hdr;
- iob = qeth_get_buffer(&card->write);
- if (iob) {
- qeth_prepare_ipa_cmd(card, iob, sizeof(struct qeth_ipa_cmd));
- qeth_fill_ipacmd_header(card, __ipa_cmd(iob), ipacmd, prot);
- } else {
- dev_warn(&card->gdev->dev,
- "The qeth driver ran out of channel command buffers\n");
- QETH_DBF_MESSAGE(1, "device %x ran out of channel command buffers",
- CARD_DEVID(card));
- }
+ data_length += offsetof(struct qeth_ipa_cmd, data);
+ iob = qeth_alloc_cmd(&card->write, IPA_PDU_HEADER_SIZE + data_length, 1,
+ QETH_IPA_TIMEOUT);
+ if (!iob)
+ return NULL;
+ qeth_prepare_ipa_cmd(card, iob, data_length);
+
+ hdr = &__ipa_cmd(iob)->hdr;
+ hdr->command = cmd_code;
+ hdr->initiator = IPA_CMD_INITIATOR_HOST;
+ /* hdr->seqno is set by qeth_send_control_data() */
+ hdr->adapter_type = (link_type == QETH_LINK_TYPE_HSTR) ? 2 : 1;
+ hdr->rel_adapter_no = (u8) card->dev->dev_port;
+ hdr->prim_version_no = IS_LAYER2(card) ? 2 : 1;
+ hdr->param_count = 1;
+ hdr->prot_version = prot;
return iob;
}
-EXPORT_SYMBOL_GPL(qeth_get_ipacmd_buffer);
+EXPORT_SYMBOL_GPL(qeth_ipa_alloc_cmd);
static int qeth_send_ipa_cmd_cb(struct qeth_card *card,
struct qeth_reply *reply, unsigned long data)
@@ -2823,20 +2692,18 @@ int qeth_send_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob,
unsigned long),
void *reply_param)
{
- u16 length;
int rc;
QETH_CARD_TEXT(card, 4, "sendipa");
if (card->read_or_write_problem) {
- qeth_release_buffer(iob);
+ qeth_put_cmd(iob);
return -EIO;
}
if (reply_cb == NULL)
reply_cb = qeth_send_ipa_cmd_cb;
- memcpy(&length, QETH_IPA_PDU_LEN_TOTAL(iob->data), 2);
- rc = qeth_send_control_data(card, length, iob, reply_cb, reply_param);
+ rc = qeth_send_control_data(card, iob, reply_cb, reply_param);
if (rc == -ETIME) {
qeth_clear_ipacmd_list(card);
qeth_schedule_recovery(card);
@@ -2862,7 +2729,7 @@ static int qeth_send_startlan(struct qeth_card *card)
QETH_CARD_TEXT(card, 2, "strtlan");
- iob = qeth_get_ipacmd_buffer(card, IPA_CMD_STARTLAN, 0);
+ iob = qeth_ipa_alloc_cmd(card, IPA_CMD_STARTLAN, QETH_PROT_NONE, 0);
if (!iob)
return -ENOMEM;
return qeth_send_ipa_cmd(card, iob, qeth_send_startlan_cb, NULL);
@@ -2896,21 +2763,24 @@ static int qeth_query_setadapterparms_cb(struct qeth_card *card,
}
static struct qeth_cmd_buffer *qeth_get_adapter_cmd(struct qeth_card *card,
- __u32 command, __u32 cmdlen)
+ enum qeth_ipa_setadp_cmd adp_cmd,
+ unsigned int data_length)
{
+ struct qeth_ipacmd_setadpparms_hdr *hdr;
struct qeth_cmd_buffer *iob;
- struct qeth_ipa_cmd *cmd;
- iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETADAPTERPARMS,
- QETH_PROT_IPV4);
- if (iob) {
- cmd = __ipa_cmd(iob);
- cmd->data.setadapterparms.hdr.cmdlength = cmdlen;
- cmd->data.setadapterparms.hdr.command_code = command;
- cmd->data.setadapterparms.hdr.used_total = 1;
- cmd->data.setadapterparms.hdr.seq_no = 1;
- }
+ iob = qeth_ipa_alloc_cmd(card, IPA_CMD_SETADAPTERPARMS, QETH_PROT_IPV4,
+ data_length +
+ offsetof(struct qeth_ipacmd_setadpparms,
+ data));
+ if (!iob)
+ return NULL;
+ hdr = &__ipa_cmd(iob)->data.setadapterparms.hdr;
+ hdr->cmdlength = sizeof(*hdr) + data_length;
+ hdr->command_code = adp_cmd;
+ hdr->used_total = 1;
+ hdr->seq_no = 1;
return iob;
}
@@ -2921,7 +2791,7 @@ static int qeth_query_setadapterparms(struct qeth_card *card)
QETH_CARD_TEXT(card, 3, "queryadp");
iob = qeth_get_adapter_cmd(card, IPA_SETADP_QUERY_COMMANDS_SUPPORTED,
- sizeof(struct qeth_ipacmd_setadpparms));
+ SETADP_DATA_SIZEOF(query_cmds_supp));
if (!iob)
return -ENOMEM;
rc = qeth_send_ipa_cmd(card, iob, qeth_query_setadapterparms_cb, NULL);
@@ -2971,7 +2841,7 @@ static int qeth_query_ipassists(struct qeth_card *card,
struct qeth_cmd_buffer *iob;
QETH_CARD_TEXT_(card, 2, "qipassi%i", prot);
- iob = qeth_get_ipacmd_buffer(card, IPA_CMD_QIPASSIST, prot);
+ iob = qeth_ipa_alloc_cmd(card, IPA_CMD_QIPASSIST, prot, 0);
if (!iob)
return -ENOMEM;
rc = qeth_send_ipa_cmd(card, iob, qeth_query_ipassists_cb, NULL);
@@ -3008,14 +2878,32 @@ int qeth_query_switch_attributes(struct qeth_card *card,
return -EOPNOTSUPP;
if (!netif_carrier_ok(card->dev))
return -ENOMEDIUM;
- iob = qeth_get_adapter_cmd(card, IPA_SETADP_QUERY_SWITCH_ATTRIBUTES,
- sizeof(struct qeth_ipacmd_setadpparms_hdr));
+ iob = qeth_get_adapter_cmd(card, IPA_SETADP_QUERY_SWITCH_ATTRIBUTES, 0);
if (!iob)
return -ENOMEM;
return qeth_send_ipa_cmd(card, iob,
qeth_query_switch_attributes_cb, sw_info);
}
+struct qeth_cmd_buffer *qeth_get_diag_cmd(struct qeth_card *card,
+ enum qeth_diags_cmds sub_cmd,
+ unsigned int data_length)
+{
+ struct qeth_ipacmd_diagass *cmd;
+ struct qeth_cmd_buffer *iob;
+
+ iob = qeth_ipa_alloc_cmd(card, IPA_CMD_SET_DIAG_ASS, QETH_PROT_NONE,
+ DIAG_HDR_LEN + data_length);
+ if (!iob)
+ return NULL;
+
+ cmd = &__ipa_cmd(iob)->data.diagass;
+ cmd->subcmd_len = DIAG_SUB_HDR_LEN + data_length;
+ cmd->subcmd = sub_cmd;
+ return iob;
+}
+EXPORT_SYMBOL_GPL(qeth_get_diag_cmd);
+
static int qeth_query_setdiagass_cb(struct qeth_card *card,
struct qeth_reply *reply, unsigned long data)
{
@@ -3034,15 +2922,11 @@ static int qeth_query_setdiagass_cb(struct qeth_card *card,
static int qeth_query_setdiagass(struct qeth_card *card)
{
struct qeth_cmd_buffer *iob;
- struct qeth_ipa_cmd *cmd;
QETH_CARD_TEXT(card, 2, "qdiagass");
- iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
+ iob = qeth_get_diag_cmd(card, QETH_DIAGS_CMD_QUERY, 0);
if (!iob)
return -ENOMEM;
- cmd = __ipa_cmd(iob);
- cmd->data.diagass.subcmd_len = 16;
- cmd->data.diagass.subcmd = QETH_DIAGS_CMD_QUERY;
return qeth_send_ipa_cmd(card, iob, qeth_query_setdiagass_cb, NULL);
}
@@ -3090,12 +2974,10 @@ int qeth_hw_trap(struct qeth_card *card, enum qeth_diags_trap_action action)
struct qeth_ipa_cmd *cmd;
QETH_CARD_TEXT(card, 2, "diagtrap");
- iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
+ iob = qeth_get_diag_cmd(card, QETH_DIAGS_CMD_TRAP, 64);
if (!iob)
return -ENOMEM;
cmd = __ipa_cmd(iob);
- cmd->data.diagass.subcmd_len = 80;
- cmd->data.diagass.subcmd = QETH_DIAGS_CMD_TRAP;
cmd->data.diagass.type = 1;
cmd->data.diagass.action = action;
switch (action) {
@@ -4026,11 +3908,10 @@ static void qeth_fill_tso_ext(struct qeth_hdr_tso *hdr,
}
int qeth_xmit(struct qeth_card *card, struct sk_buff *skb,
- struct qeth_qdio_out_q *queue, int ipv, int cast_type,
+ struct qeth_qdio_out_q *queue, int ipv,
void (*fill_header)(struct qeth_qdio_out_q *queue,
struct qeth_hdr *hdr, struct sk_buff *skb,
- int ipv, int cast_type,
- unsigned int data_len))
+ int ipv, unsigned int data_len))
{
unsigned int proto_len, hw_hdr_len;
unsigned int frame_len = skb->len;
@@ -4064,7 +3945,7 @@ int qeth_xmit(struct qeth_card *card, struct sk_buff *skb,
data_offset = push_len + proto_len;
}
memset(hdr, 0, hw_hdr_len);
- fill_header(queue, hdr, skb, ipv, cast_type, frame_len);
+ fill_header(queue, hdr, skb, ipv, frame_len);
if (is_tso)
qeth_fill_tso_ext((struct qeth_hdr_tso *) hdr,
frame_len - proto_len, skb, proto_len);
@@ -4133,7 +4014,7 @@ void qeth_setadp_promisc_mode(struct qeth_card *card)
QETH_CARD_TEXT_(card, 4, "mode:%x", mode);
iob = qeth_get_adapter_cmd(card, IPA_SETADP_SET_PROMISC_MODE,
- sizeof(struct qeth_ipacmd_setadpparms_hdr) + 8);
+ SETADP_DATA_SIZEOF(mode));
if (!iob)
return;
cmd = __ipa_cmd(iob);
@@ -4173,8 +4054,7 @@ int qeth_setadpparms_change_macaddr(struct qeth_card *card)
QETH_CARD_TEXT(card, 4, "chgmac");
iob = qeth_get_adapter_cmd(card, IPA_SETADP_ALTER_MAC_ADDRESS,
- sizeof(struct qeth_ipacmd_setadpparms_hdr) +
- sizeof(struct qeth_change_addr));
+ SETADP_DATA_SIZEOF(change_addr));
if (!iob)
return -ENOMEM;
cmd = __ipa_cmd(iob);
@@ -4283,8 +4163,7 @@ static int qeth_setadpparms_set_access_ctrl(struct qeth_card *card,
QETH_CARD_TEXT(card, 4, "setacctl");
iob = qeth_get_adapter_cmd(card, IPA_SETADP_SET_ACCESS_CONTROL,
- sizeof(struct qeth_ipacmd_setadpparms_hdr) +
- sizeof(struct qeth_set_access_ctrl));
+ SETADP_DATA_SIZEOF(set_access_ctrl));
if (!iob)
return -ENOMEM;
cmd = __ipa_cmd(iob);
@@ -4440,18 +4319,13 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
return -ENOSPC;
}
QETH_CARD_TEXT_(card, 4, "snore%i",
- cmd->data.setadapterparms.hdr.used_total);
+ cmd->data.setadapterparms.hdr.used_total);
QETH_CARD_TEXT_(card, 4, "sseqn%i",
- cmd->data.setadapterparms.hdr.seq_no);
+ cmd->data.setadapterparms.hdr.seq_no);
/*copy entries to user buffer*/
memcpy(qinfo->udata + qinfo->udata_offset, snmp_data, data_len);
qinfo->udata_offset += data_len;
- /* check if all replies received ... */
- QETH_CARD_TEXT_(card, 4, "srtot%i",
- cmd->data.setadapterparms.hdr.used_total);
- QETH_CARD_TEXT_(card, 4, "srseq%i",
- cmd->data.setadapterparms.hdr.seq_no);
if (cmd->data.setadapterparms.hdr.seq_no <
cmd->data.setadapterparms.hdr.used_total)
return 1;
@@ -4460,9 +4334,8 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
static int qeth_snmp_command(struct qeth_card *card, char __user *udata)
{
+ struct qeth_snmp_ureq __user *ureq;
struct qeth_cmd_buffer *iob;
- struct qeth_ipa_cmd *cmd;
- struct qeth_snmp_ureq *ureq;
unsigned int req_len;
struct qeth_arp_query_info qinfo = {0, };
int rc = 0;
@@ -4476,38 +4349,28 @@ static int qeth_snmp_command(struct qeth_card *card, char __user *udata)
IS_LAYER3(card))
return -EOPNOTSUPP;
- /* skip 4 bytes (data_len struct member) to get req_len */
- if (copy_from_user(&req_len, udata + sizeof(int), sizeof(int)))
+ ureq = (struct qeth_snmp_ureq __user *) udata;
+ if (get_user(qinfo.udata_len, &ureq->hdr.data_len) ||
+ get_user(req_len, &ureq->hdr.req_len))
+ return -EFAULT;
+
+ iob = qeth_get_adapter_cmd(card, IPA_SETADP_SET_SNMP_CONTROL, req_len);
+ if (!iob)
+ return -ENOMEM;
+
+ if (copy_from_user(&__ipa_cmd(iob)->data.setadapterparms.data.snmp,
+ &ureq->cmd, req_len)) {
+ qeth_put_cmd(iob);
return -EFAULT;
- if (req_len > (QETH_BUFSIZE - IPA_PDU_HEADER_SIZE -
- sizeof(struct qeth_ipacmd_hdr) -
- sizeof(struct qeth_ipacmd_setadpparms_hdr)))
- return -EINVAL;
- ureq = memdup_user(udata, req_len + sizeof(struct qeth_snmp_ureq_hdr));
- if (IS_ERR(ureq)) {
- QETH_CARD_TEXT(card, 2, "snmpnome");
- return PTR_ERR(ureq);
}
- qinfo.udata_len = ureq->hdr.data_len;
+
qinfo.udata = kzalloc(qinfo.udata_len, GFP_KERNEL);
if (!qinfo.udata) {
- kfree(ureq);
+ qeth_put_cmd(iob);
return -ENOMEM;
}
qinfo.udata_offset = sizeof(struct qeth_snmp_ureq_hdr);
- iob = qeth_get_adapter_cmd(card, IPA_SETADP_SET_SNMP_CONTROL,
- QETH_SNMP_SETADP_CMDLENGTH + req_len);
- if (!iob) {
- rc = -ENOMEM;
- goto out;
- }
-
- /* for large requests, fix-up the length fields: */
- qeth_prepare_ipa_cmd(card, iob, QETH_SETADP_BASE_LEN + req_len);
-
- cmd = __ipa_cmd(iob);
- memcpy(&cmd->data.setadapterparms.data.snmp, &ureq->cmd, req_len);
rc = qeth_send_ipa_cmd(card, iob, qeth_snmp_command_cb, &qinfo);
if (rc)
QETH_DBF_MESSAGE(2, "SNMP command failed on device %x: (%#x)\n",
@@ -4516,8 +4379,7 @@ static int qeth_snmp_command(struct qeth_card *card, char __user *udata)
if (copy_to_user(udata, qinfo.udata, qinfo.udata_len))
rc = -EFAULT;
}
-out:
- kfree(ureq);
+
kfree(qinfo.udata);
return rc;
}
@@ -4583,8 +4445,7 @@ static int qeth_query_oat_command(struct qeth_card *card, char __user *udata)
}
iob = qeth_get_adapter_cmd(card, IPA_SETADP_QUERY_OAT,
- sizeof(struct qeth_ipacmd_setadpparms_hdr) +
- sizeof(struct qeth_query_oat));
+ SETADP_DATA_SIZEOF(query_oat));
if (!iob) {
rc = -ENOMEM;
goto out_free;
@@ -4646,8 +4507,7 @@ int qeth_query_card_info(struct qeth_card *card,
QETH_CARD_TEXT(card, 2, "qcrdinfo");
if (!qeth_adp_supported(card, IPA_SETADP_QUERY_CARD_INFO))
return -EOPNOTSUPP;
- iob = qeth_get_adapter_cmd(card, IPA_SETADP_QUERY_CARD_INFO,
- sizeof(struct qeth_ipacmd_setadpparms_hdr));
+ iob = qeth_get_adapter_cmd(card, IPA_SETADP_QUERY_CARD_INFO, 0);
if (!iob)
return -ENOMEM;
return qeth_send_ipa_cmd(card, iob, qeth_query_card_info_cb,
@@ -4901,7 +4761,7 @@ static void qeth_core_free_card(struct qeth_card *card)
qeth_clean_channel(&card->read);
qeth_clean_channel(&card->write);
qeth_clean_channel(&card->data);
- qeth_release_buffer(card->read_cmd);
+ qeth_put_cmd(card->read_cmd);
destroy_workqueue(card->event_wq);
qeth_free_qdio_queues(card);
unregister_service_level(&card->qeth_service_level);
@@ -5314,42 +5174,47 @@ EXPORT_SYMBOL_GPL(qeth_setassparms_cb);
struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *card,
enum qeth_ipa_funcs ipa_func,
- __u16 cmd_code, __u16 len,
+ u16 cmd_code,
+ unsigned int data_length,
enum qeth_prot_versions prot)
{
+ struct qeth_ipacmd_setassparms *setassparms;
+ struct qeth_ipacmd_setassparms_hdr *hdr;
struct qeth_cmd_buffer *iob;
- struct qeth_ipa_cmd *cmd;
QETH_CARD_TEXT(card, 4, "getasscm");
- iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETASSPARMS, prot);
+ iob = qeth_ipa_alloc_cmd(card, IPA_CMD_SETASSPARMS, prot,
+ data_length +
+ offsetof(struct qeth_ipacmd_setassparms,
+ data));
+ if (!iob)
+ return NULL;
- if (iob) {
- cmd = __ipa_cmd(iob);
- cmd->data.setassparms.hdr.assist_no = ipa_func;
- cmd->data.setassparms.hdr.length = 8 + len;
- cmd->data.setassparms.hdr.command_code = cmd_code;
- }
+ setassparms = &__ipa_cmd(iob)->data.setassparms;
+ setassparms->assist_no = ipa_func;
+ hdr = &setassparms->hdr;
+ hdr->length = sizeof(*hdr) + data_length;
+ hdr->command_code = cmd_code;
return iob;
}
EXPORT_SYMBOL_GPL(qeth_get_setassparms_cmd);
int qeth_send_simple_setassparms_prot(struct qeth_card *card,
enum qeth_ipa_funcs ipa_func,
- u16 cmd_code, long data,
+ u16 cmd_code, u32 *data,
enum qeth_prot_versions prot)
{
- int length = 0;
+ unsigned int length = data ? SETASS_DATA_SIZEOF(flags_32bit) : 0;
struct qeth_cmd_buffer *iob;
QETH_CARD_TEXT_(card, 4, "simassp%i", prot);
- if (data)
- length = sizeof(__u32);
iob = qeth_get_setassparms_cmd(card, ipa_func, cmd_code, length, prot);
if (!iob)
return -ENOMEM;
- __ipa_cmd(iob)->data.setassparms.data.flags_32bit = (__u32) data;
+ if (data)
+ __ipa_cmd(iob)->data.setassparms.data.flags_32bit = *data;
return qeth_send_ipa_cmd(card, iob, qeth_setassparms_cb, NULL);
}
EXPORT_SYMBOL_GPL(qeth_send_simple_setassparms_prot);
@@ -5723,28 +5588,30 @@ static void qeth_core_shutdown(struct ccwgroup_device *gdev)
qdio_free(CARD_DDEV(card));
}
-static int qeth_core_freeze(struct ccwgroup_device *gdev)
+static int qeth_suspend(struct ccwgroup_device *gdev)
{
struct qeth_card *card = dev_get_drvdata(&gdev->dev);
- if (card->discipline && card->discipline->freeze)
- return card->discipline->freeze(gdev);
- return 0;
-}
-static int qeth_core_thaw(struct ccwgroup_device *gdev)
-{
- struct qeth_card *card = dev_get_drvdata(&gdev->dev);
- if (card->discipline && card->discipline->thaw)
- return card->discipline->thaw(gdev);
+ qeth_set_allowed_threads(card, 0, 1);
+ wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
+ if (gdev->state == CCWGROUP_OFFLINE)
+ return 0;
+
+ card->discipline->set_offline(gdev);
return 0;
}
-static int qeth_core_restore(struct ccwgroup_device *gdev)
+static int qeth_resume(struct ccwgroup_device *gdev)
{
struct qeth_card *card = dev_get_drvdata(&gdev->dev);
- if (card->discipline && card->discipline->restore)
- return card->discipline->restore(gdev);
- return 0;
+ int rc;
+
+ rc = card->discipline->set_online(gdev);
+
+ qeth_set_allowed_threads(card, 0xffffffff, 0);
+ if (rc)
+ dev_warn(&card->gdev->dev, "The qeth device driver failed to recover an error on the device\n");
+ return rc;
}
static ssize_t group_store(struct device_driver *ddrv, const char *buf,
@@ -5785,9 +5652,9 @@ static struct ccwgroup_driver qeth_core_ccwgroup_driver = {
.shutdown = qeth_core_shutdown,
.prepare = NULL,
.complete = NULL,
- .freeze = qeth_core_freeze,
- .thaw = qeth_core_thaw,
- .restore = qeth_core_restore,
+ .freeze = qeth_suspend,
+ .thaw = qeth_resume,
+ .restore = qeth_resume,
};
struct qeth_card *qeth_get_card_by_busid(char *bus_id)
@@ -5866,8 +5733,8 @@ static int qeth_start_csum_cb(struct qeth_card *card, struct qeth_reply *reply,
static int qeth_set_csum_off(struct qeth_card *card, enum qeth_ipa_funcs cstype,
enum qeth_prot_versions prot)
{
- return qeth_send_simple_setassparms_prot(card, cstype,
- IPA_CMD_ASS_STOP, 0, prot);
+ return qeth_send_simple_setassparms_prot(card, cstype, IPA_CMD_ASS_STOP,
+ NULL, prot);
}
static int qeth_set_csum_on(struct qeth_card *card, enum qeth_ipa_funcs cstype,
@@ -5898,7 +5765,8 @@ static int qeth_set_csum_on(struct qeth_card *card, enum qeth_ipa_funcs cstype,
return -EOPNOTSUPP;
}
- iob = qeth_get_setassparms_cmd(card, cstype, IPA_CMD_ASS_ENABLE, 4,
+ iob = qeth_get_setassparms_cmd(card, cstype, IPA_CMD_ASS_ENABLE,
+ SETASS_DATA_SIZEOF(flags_32bit),
prot);
if (!iob) {
qeth_set_csum_off(card, cstype, prot);
@@ -5955,7 +5823,7 @@ static int qeth_set_tso_off(struct qeth_card *card,
enum qeth_prot_versions prot)
{
return qeth_send_simple_setassparms_prot(card, IPA_OUTBOUND_TSO,
- IPA_CMD_ASS_STOP, 0, prot);
+ IPA_CMD_ASS_STOP, NULL, prot);
}
static int qeth_set_tso_on(struct qeth_card *card,
@@ -5981,7 +5849,8 @@ static int qeth_set_tso_on(struct qeth_card *card,
}
iob = qeth_get_setassparms_cmd(card, IPA_OUTBOUND_TSO,
- IPA_CMD_ASS_ENABLE, sizeof(caps), prot);
+ IPA_CMD_ASS_ENABLE,
+ SETASS_DATA_SIZEOF(caps), prot);
if (!iob) {
qeth_set_tso_off(card, prot);
return -ENOMEM;
diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h
index fadafdc0e8e4..75b5834ed28d 100644
--- a/drivers/s390/net/qeth_core_mpc.h
+++ b/drivers/s390/net/qeth_core_mpc.h
@@ -379,9 +379,7 @@ struct qeth_ipacmd_layer2setdelvlan {
__u16 vlan_id;
} __attribute__ ((packed));
-
struct qeth_ipacmd_setassparms_hdr {
- __u32 assist_no;
__u16 length;
__u16 command_code;
__u16 return_code;
@@ -426,6 +424,7 @@ struct qeth_tso_start_data {
/* SETASSPARMS IPA Command: */
struct qeth_ipacmd_setassparms {
+ u32 assist_no;
struct qeth_ipacmd_setassparms_hdr hdr;
union {
__u32 flags_32bit;
@@ -437,6 +436,8 @@ struct qeth_ipacmd_setassparms {
} data;
} __attribute__ ((packed));
+#define SETASS_DATA_SIZEOF(field) FIELD_SIZEOF(struct qeth_ipacmd_setassparms,\
+ data.field)
/* SETRTG IPA Command: ****************************************************/
struct qeth_set_routing {
@@ -524,8 +525,6 @@ struct qeth_query_switch_attributes {
#define QETH_SETADP_FLAGS_VIRTUAL_MAC 0x80 /* for CHANGE_ADDR_READ_MAC */
struct qeth_ipacmd_setadpparms_hdr {
- u32 supp_hw_cmds;
- u32 reserved1;
u16 cmdlength;
u16 reserved2;
u32 command_code;
@@ -537,6 +536,7 @@ struct qeth_ipacmd_setadpparms_hdr {
};
struct qeth_ipacmd_setadpparms {
+ struct qeth_ipa_caps hw_cmds;
struct qeth_ipacmd_setadpparms_hdr hdr;
union {
struct qeth_query_cmds_supp query_cmds_supp;
@@ -550,6 +550,9 @@ struct qeth_ipacmd_setadpparms {
} data;
} __attribute__ ((packed));
+#define SETADP_DATA_SIZEOF(field) FIELD_SIZEOF(struct qeth_ipacmd_setadpparms,\
+ data.field)
+
/* CREATE_ADDR IPA Command: ***********************************************/
struct qeth_create_destroy_address {
__u8 unique_id[8];
@@ -596,6 +599,11 @@ struct qeth_ipacmd_diagass {
__u8 cdata[64];
} __attribute__ ((packed));
+#define DIAG_HDR_LEN offsetofend(struct qeth_ipacmd_diagass, ext)
+#define DIAG_SUB_HDR_LEN (offsetofend(struct qeth_ipacmd_diagass, ext) -\
+ offsetof(struct qeth_ipacmd_diagass, \
+ subcmd_len))
+
/* VNIC Characteristics IPA Command: *****************************************/
/* IPA commands/sub commands for VNICC */
#define IPA_VNICC_QUERY_CHARS 0x00000000L
@@ -622,12 +630,6 @@ struct qeth_ipacmd_diagass {
/* VNICC header */
struct qeth_ipacmd_vnicc_hdr {
- u32 sup;
- u32 cur;
-};
-
-/* VNICC sub command header */
-struct qeth_vnicc_sub_hdr {
u16 data_length;
u16 reserved;
u32 sub_command;
@@ -652,15 +654,18 @@ struct qeth_vnicc_getset_timeout {
/* complete VNICC IPA command message */
struct qeth_ipacmd_vnicc {
+ struct qeth_ipa_caps vnicc_cmds;
struct qeth_ipacmd_vnicc_hdr hdr;
- struct qeth_vnicc_sub_hdr sub_hdr;
union {
struct qeth_vnicc_query_cmds query_cmds;
struct qeth_vnicc_set_char set_char;
struct qeth_vnicc_getset_timeout getset_timeout;
- };
+ } data;
};
+#define VNICC_DATA_SIZEOF(field) FIELD_SIZEOF(struct qeth_ipacmd_vnicc,\
+ data.field)
+
/* SETBRIDGEPORT IPA Command: *********************************************/
enum qeth_ipa_sbp_cmd {
IPA_SBP_QUERY_COMMANDS_SUPPORTED = 0x00000000L,
@@ -686,8 +691,6 @@ struct mac_addr_lnid {
} __packed;
struct qeth_ipacmd_sbp_hdr {
- __u32 supported_sbp_cmds;
- __u32 enabled_sbp_cmds;
__u16 cmdlength;
__u16 reserved1;
__u32 command_code;
@@ -702,16 +705,10 @@ struct qeth_sbp_query_cmds_supp {
__u32 reserved;
} __packed;
-struct qeth_sbp_reset_role {
-} __packed;
-
struct qeth_sbp_set_primary {
struct net_if_token token;
} __packed;
-struct qeth_sbp_set_secondary {
-} __packed;
-
struct qeth_sbp_port_entry {
__u8 role;
__u8 state;
@@ -737,17 +734,19 @@ struct qeth_sbp_state_change {
} __packed;
struct qeth_ipacmd_setbridgeport {
+ struct qeth_ipa_caps sbp_cmds;
struct qeth_ipacmd_sbp_hdr hdr;
union {
struct qeth_sbp_query_cmds_supp query_cmds_supp;
- struct qeth_sbp_reset_role reset_role;
struct qeth_sbp_set_primary set_primary;
- struct qeth_sbp_set_secondary set_secondary;
struct qeth_sbp_query_ports query_ports;
struct qeth_sbp_state_change state_change;
} data;
} __packed;
+#define SBP_DATA_SIZEOF(field) FIELD_SIZEOF(struct qeth_ipacmd_setbridgeport,\
+ data.field)
+
/* ADDRESS_CHANGE_NOTIFICATION adapter-initiated "command" *******************/
/* Bitmask for entry->change_code. Both bits may be raised. */
enum qeth_ipa_addr_change_code {
@@ -806,6 +805,8 @@ struct qeth_ipa_cmd {
} data;
} __attribute__ ((packed));
+#define IPA_DATA_SIZEOF(field) FIELD_SIZEOF(struct qeth_ipa_cmd, data.field)
+
/*
* special command for ARP processing.
* this is not included in setassparms command before, because we get
@@ -823,10 +824,6 @@ enum qeth_ipa_arp_return_codes {
extern const char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc);
extern const char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd);
-#define QETH_SETADP_BASE_LEN (sizeof(struct qeth_ipacmd_hdr) + \
- sizeof(struct qeth_ipacmd_setadpparms_hdr))
-#define QETH_SNMP_SETADP_CMDLENGTH 16
-
/* Helper functions */
#define IS_IPA_REPLY(cmd) ((cmd->hdr.initiator == IPA_CMD_INITIATOR_HOST) || \
(cmd->hdr.initiator == IPA_CMD_INITIATOR_OSA_REPLY))
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 9565ef9747c1..fd64bc3f4062 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -85,7 +85,8 @@ static int qeth_l2_send_setdelmac(struct qeth_card *card, __u8 *mac,
struct qeth_cmd_buffer *iob;
QETH_CARD_TEXT(card, 2, "L2sdmac");
- iob = qeth_get_ipacmd_buffer(card, ipacmd, QETH_PROT_IPV4);
+ iob = qeth_ipa_alloc_cmd(card, ipacmd, QETH_PROT_IPV4,
+ IPA_DATA_SIZEOF(setdelmac));
if (!iob)
return -ENOMEM;
cmd = __ipa_cmd(iob);
@@ -163,8 +164,9 @@ static void qeth_l2_drain_rx_mode_cache(struct qeth_card *card)
static void qeth_l2_fill_header(struct qeth_qdio_out_q *queue,
struct qeth_hdr *hdr, struct sk_buff *skb,
- int ipv, int cast_type, unsigned int data_len)
+ int ipv, unsigned int data_len)
{
+ int cast_type = qeth_get_ether_cast_type(skb);
struct vlan_ethhdr *veth = vlan_eth_hdr(skb);
hdr->hdr.l2.pkt_length = data_len;
@@ -240,7 +242,8 @@ static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i,
struct qeth_cmd_buffer *iob;
QETH_CARD_TEXT_(card, 4, "L2sdv%x", ipacmd);
- iob = qeth_get_ipacmd_buffer(card, ipacmd, QETH_PROT_IPV4);
+ iob = qeth_ipa_alloc_cmd(card, ipacmd, QETH_PROT_IPV4,
+ IPA_DATA_SIZEOF(setdelvlan));
if (!iob)
return -ENOMEM;
cmd = __ipa_cmd(iob);
@@ -292,7 +295,6 @@ static void qeth_l2_stop_card(struct qeth_card *card)
card->state = CARD_STATE_DOWN;
}
- qeth_clear_cmd_buffers(&card->write);
flush_workqueue(card->event_wq);
card->info.mac_bits &= ~QETH_LAYER2_MAC_REGISTERED;
}
@@ -597,7 +599,6 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
rc = qeth_l2_xmit_osn(card, skb, queue);
else
rc = qeth_xmit(card, skb, queue, qeth_get_ip_version(skb),
- qeth_get_ether_cast_type(skb),
qeth_l2_fill_header);
if (!rc) {
@@ -964,33 +965,6 @@ static void __exit qeth_l2_exit(void)
pr_info("unregister layer 2 discipline\n");
}
-static int qeth_l2_pm_suspend(struct ccwgroup_device *gdev)
-{
- struct qeth_card *card = dev_get_drvdata(&gdev->dev);
-
- qeth_set_allowed_threads(card, 0, 1);
- wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
- if (gdev->state == CCWGROUP_OFFLINE)
- return 0;
-
- qeth_l2_set_offline(gdev);
- return 0;
-}
-
-static int qeth_l2_pm_resume(struct ccwgroup_device *gdev)
-{
- struct qeth_card *card = dev_get_drvdata(&gdev->dev);
- int rc;
-
- rc = qeth_l2_set_online(gdev);
-
- qeth_set_allowed_threads(card, 0xffffffff, 0);
- if (rc)
- dev_warn(&card->gdev->dev, "The qeth device driver "
- "failed to recover an error on the device\n");
- return rc;
-}
-
/* Returns zero if the command is successfully "consumed" */
static int qeth_l2_control_event(struct qeth_card *card,
struct qeth_ipa_cmd *cmd)
@@ -1020,9 +994,6 @@ struct qeth_discipline qeth_l2_discipline = {
.remove = qeth_l2_remove_device,
.set_online = qeth_l2_set_online,
.set_offline = qeth_l2_set_offline,
- .freeze = qeth_l2_pm_suspend,
- .thaw = qeth_l2_pm_resume,
- .restore = qeth_l2_pm_resume,
.do_ioctl = NULL,
.control_event_handler = qeth_l2_control_event,
};
@@ -1032,7 +1003,7 @@ static void qeth_osn_assist_cb(struct qeth_card *card,
struct qeth_cmd_buffer *iob)
{
qeth_notify_reply(iob->reply, 0);
- qeth_release_buffer(iob);
+ qeth_put_cmd(iob);
}
int qeth_osn_assist(struct net_device *dev, void *data, int data_len)
@@ -1040,6 +1011,8 @@ int qeth_osn_assist(struct net_device *dev, void *data, int data_len)
struct qeth_cmd_buffer *iob;
struct qeth_card *card;
+ if (data_len < 0)
+ return -EINVAL;
if (!dev)
return -ENODEV;
card = dev->ml_priv;
@@ -1048,7 +1021,9 @@ int qeth_osn_assist(struct net_device *dev, void *data, int data_len)
QETH_CARD_TEXT(card, 2, "osnsdmc");
if (!qeth_card_hw_is_reachable(card))
return -ENODEV;
- iob = qeth_get_buffer(&card->write);
+
+ iob = qeth_alloc_cmd(&card->write, IPA_PDU_HEADER_SIZE + data_len, 1,
+ QETH_IPA_TIMEOUT);
if (!iob)
return -ENOMEM;
@@ -1421,22 +1396,25 @@ static int qeth_bridgeport_makerc(struct qeth_card *card,
static struct qeth_cmd_buffer *qeth_sbp_build_cmd(struct qeth_card *card,
enum qeth_ipa_sbp_cmd sbp_cmd,
- unsigned int cmd_length)
+ unsigned int data_length)
{
enum qeth_ipa_cmds ipa_cmd = IS_IQD(card) ? IPA_CMD_SETBRIDGEPORT_IQD :
IPA_CMD_SETBRIDGEPORT_OSA;
+ struct qeth_ipacmd_sbp_hdr *hdr;
struct qeth_cmd_buffer *iob;
- struct qeth_ipa_cmd *cmd;
- iob = qeth_get_ipacmd_buffer(card, ipa_cmd, 0);
+ iob = qeth_ipa_alloc_cmd(card, ipa_cmd, QETH_PROT_NONE,
+ data_length +
+ offsetof(struct qeth_ipacmd_setbridgeport,
+ data));
if (!iob)
return iob;
- cmd = __ipa_cmd(iob);
- cmd->data.sbp.hdr.cmdlength = sizeof(struct qeth_ipacmd_sbp_hdr) +
- cmd_length;
- cmd->data.sbp.hdr.command_code = sbp_cmd;
- cmd->data.sbp.hdr.used_total = 1;
- cmd->data.sbp.hdr.seq_no = 1;
+
+ hdr = &__ipa_cmd(iob)->data.sbp.hdr;
+ hdr->cmdlength = sizeof(*hdr) + data_length;
+ hdr->command_code = sbp_cmd;
+ hdr->used_total = 1;
+ hdr->seq_no = 1;
return iob;
}
@@ -1471,7 +1449,7 @@ static void qeth_bridgeport_query_support(struct qeth_card *card)
QETH_CARD_TEXT(card, 2, "brqsuppo");
iob = qeth_sbp_build_cmd(card, IPA_SBP_QUERY_COMMANDS_SUPPORTED,
- sizeof(struct qeth_sbp_query_cmds_supp));
+ SBP_DATA_SIZEOF(query_cmds_supp));
if (!iob)
return;
@@ -1563,23 +1541,21 @@ static int qeth_bridgeport_set_cb(struct qeth_card *card,
*/
int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role)
{
- int cmdlength;
struct qeth_cmd_buffer *iob;
enum qeth_ipa_sbp_cmd setcmd;
+ unsigned int cmdlength = 0;
QETH_CARD_TEXT(card, 2, "brsetrol");
switch (role) {
case QETH_SBP_ROLE_NONE:
setcmd = IPA_SBP_RESET_BRIDGE_PORT_ROLE;
- cmdlength = sizeof(struct qeth_sbp_reset_role);
break;
case QETH_SBP_ROLE_PRIMARY:
setcmd = IPA_SBP_SET_PRIMARY_BRIDGE_PORT;
- cmdlength = sizeof(struct qeth_sbp_set_primary);
+ cmdlength = SBP_DATA_SIZEOF(set_primary);
break;
case QETH_SBP_ROLE_SECONDARY:
setcmd = IPA_SBP_SET_SECONDARY_BRIDGE_PORT;
- cmdlength = sizeof(struct qeth_sbp_set_secondary);
break;
default:
return -EINVAL;
@@ -1729,10 +1705,6 @@ static int qeth_l2_vnicc_makerc(struct qeth_card *card, u16 ipa_rc)
struct _qeth_l2_vnicc_request_cbctl {
u32 sub_cmd;
struct {
- u32 vnic_char;
- u32 timeout;
- } param;
- struct {
union{
u32 *sup_cmds;
u32 *timeout;
@@ -1754,80 +1726,52 @@ static int qeth_l2_vnicc_request_cb(struct qeth_card *card,
if (cmd->hdr.return_code)
return qeth_l2_vnicc_makerc(card, cmd->hdr.return_code);
/* return results to caller */
- card->options.vnicc.sup_chars = rep->hdr.sup;
- card->options.vnicc.cur_chars = rep->hdr.cur;
+ card->options.vnicc.sup_chars = rep->vnicc_cmds.supported;
+ card->options.vnicc.cur_chars = rep->vnicc_cmds.enabled;
if (cbctl->sub_cmd == IPA_VNICC_QUERY_CMDS)
- *cbctl->result.sup_cmds = rep->query_cmds.sup_cmds;
+ *cbctl->result.sup_cmds = rep->data.query_cmds.sup_cmds;
if (cbctl->sub_cmd == IPA_VNICC_GET_TIMEOUT)
- *cbctl->result.timeout = rep->getset_timeout.timeout;
+ *cbctl->result.timeout = rep->data.getset_timeout.timeout;
return 0;
}
-/* generic VNICC request */
-static int qeth_l2_vnicc_request(struct qeth_card *card,
- struct _qeth_l2_vnicc_request_cbctl *cbctl)
+static struct qeth_cmd_buffer *qeth_l2_vnicc_build_cmd(struct qeth_card *card,
+ u32 vnicc_cmd,
+ unsigned int data_length)
{
- struct qeth_ipacmd_vnicc *req;
+ struct qeth_ipacmd_vnicc_hdr *hdr;
struct qeth_cmd_buffer *iob;
- struct qeth_ipa_cmd *cmd;
-
- QETH_CARD_TEXT(card, 2, "vniccreq");
- /* get new buffer for request */
- iob = qeth_get_ipacmd_buffer(card, IPA_CMD_VNICC, 0);
+ iob = qeth_ipa_alloc_cmd(card, IPA_CMD_VNICC, QETH_PROT_NONE,
+ data_length +
+ offsetof(struct qeth_ipacmd_vnicc, data));
if (!iob)
- return -ENOMEM;
-
- /* create header for request */
- cmd = __ipa_cmd(iob);
- req = &cmd->data.vnicc;
-
- /* create sub command header for request */
- req->sub_hdr.data_length = sizeof(req->sub_hdr);
- req->sub_hdr.sub_command = cbctl->sub_cmd;
-
- /* create sub command specific request fields */
- switch (cbctl->sub_cmd) {
- case IPA_VNICC_QUERY_CHARS:
- break;
- case IPA_VNICC_QUERY_CMDS:
- req->sub_hdr.data_length += sizeof(req->query_cmds);
- req->query_cmds.vnic_char = cbctl->param.vnic_char;
- break;
- case IPA_VNICC_ENABLE:
- case IPA_VNICC_DISABLE:
- req->sub_hdr.data_length += sizeof(req->set_char);
- req->set_char.vnic_char = cbctl->param.vnic_char;
- break;
- case IPA_VNICC_SET_TIMEOUT:
- req->getset_timeout.timeout = cbctl->param.timeout;
- /* fallthrough */
- case IPA_VNICC_GET_TIMEOUT:
- req->sub_hdr.data_length += sizeof(req->getset_timeout);
- req->getset_timeout.vnic_char = cbctl->param.vnic_char;
- break;
- default:
- qeth_release_buffer(iob);
- return -EOPNOTSUPP;
- }
+ return NULL;
- /* send request */
- return qeth_send_ipa_cmd(card, iob, qeth_l2_vnicc_request_cb, cbctl);
+ hdr = &__ipa_cmd(iob)->data.vnicc.hdr;
+ hdr->data_length = sizeof(*hdr) + data_length;
+ hdr->sub_command = vnicc_cmd;
+ return iob;
}
/* VNICC query VNIC characteristics request */
static int qeth_l2_vnicc_query_chars(struct qeth_card *card)
{
struct _qeth_l2_vnicc_request_cbctl cbctl;
+ struct qeth_cmd_buffer *iob;
+
+ QETH_CARD_TEXT(card, 2, "vniccqch");
+ iob = qeth_l2_vnicc_build_cmd(card, IPA_VNICC_QUERY_CHARS, 0);
+ if (!iob)
+ return -ENOMEM;
/* prepare callback control */
cbctl.sub_cmd = IPA_VNICC_QUERY_CHARS;
- QETH_CARD_TEXT(card, 2, "vniccqch");
- return qeth_l2_vnicc_request(card, &cbctl);
+ return qeth_send_ipa_cmd(card, iob, qeth_l2_vnicc_request_cb, &cbctl);
}
/* VNICC query sub commands request */
@@ -1835,14 +1779,21 @@ static int qeth_l2_vnicc_query_cmds(struct qeth_card *card, u32 vnic_char,
u32 *sup_cmds)
{
struct _qeth_l2_vnicc_request_cbctl cbctl;
+ struct qeth_cmd_buffer *iob;
+
+ QETH_CARD_TEXT(card, 2, "vniccqcm");
+ iob = qeth_l2_vnicc_build_cmd(card, IPA_VNICC_QUERY_CMDS,
+ VNICC_DATA_SIZEOF(query_cmds));
+ if (!iob)
+ return -ENOMEM;
+
+ __ipa_cmd(iob)->data.vnicc.data.query_cmds.vnic_char = vnic_char;
/* prepare callback control */
cbctl.sub_cmd = IPA_VNICC_QUERY_CMDS;
- cbctl.param.vnic_char = vnic_char;
cbctl.result.sup_cmds = sup_cmds;
- QETH_CARD_TEXT(card, 2, "vniccqcm");
- return qeth_l2_vnicc_request(card, &cbctl);
+ return qeth_send_ipa_cmd(card, iob, qeth_l2_vnicc_request_cb, &cbctl);
}
/* VNICC enable/disable characteristic request */
@@ -1850,31 +1801,47 @@ static int qeth_l2_vnicc_set_char(struct qeth_card *card, u32 vnic_char,
u32 cmd)
{
struct _qeth_l2_vnicc_request_cbctl cbctl;
+ struct qeth_cmd_buffer *iob;
+
+ QETH_CARD_TEXT(card, 2, "vniccedc");
+ iob = qeth_l2_vnicc_build_cmd(card, cmd, VNICC_DATA_SIZEOF(set_char));
+ if (!iob)
+ return -ENOMEM;
+
+ __ipa_cmd(iob)->data.vnicc.data.set_char.vnic_char = vnic_char;
/* prepare callback control */
cbctl.sub_cmd = cmd;
- cbctl.param.vnic_char = vnic_char;
- QETH_CARD_TEXT(card, 2, "vniccedc");
- return qeth_l2_vnicc_request(card, &cbctl);
+ return qeth_send_ipa_cmd(card, iob, qeth_l2_vnicc_request_cb, &cbctl);
}
/* VNICC get/set timeout for characteristic request */
static int qeth_l2_vnicc_getset_timeout(struct qeth_card *card, u32 vnicc,
u32 cmd, u32 *timeout)
{
+ struct qeth_vnicc_getset_timeout *getset_timeout;
struct _qeth_l2_vnicc_request_cbctl cbctl;
+ struct qeth_cmd_buffer *iob;
+
+ QETH_CARD_TEXT(card, 2, "vniccgst");
+ iob = qeth_l2_vnicc_build_cmd(card, cmd,
+ VNICC_DATA_SIZEOF(getset_timeout));
+ if (!iob)
+ return -ENOMEM;
+
+ getset_timeout = &__ipa_cmd(iob)->data.vnicc.data.getset_timeout;
+ getset_timeout->vnic_char = vnicc;
+
+ if (cmd == IPA_VNICC_SET_TIMEOUT)
+ getset_timeout->timeout = *timeout;
/* prepare callback control */
cbctl.sub_cmd = cmd;
- cbctl.param.vnic_char = vnicc;
- if (cmd == IPA_VNICC_SET_TIMEOUT)
- cbctl.param.timeout = *timeout;
if (cmd == IPA_VNICC_GET_TIMEOUT)
cbctl.result.timeout = timeout;
- QETH_CARD_TEXT(card, 2, "vniccgst");
- return qeth_l2_vnicc_request(card, &cbctl);
+ return qeth_send_ipa_cmd(card, iob, qeth_l2_vnicc_request_cb, &cbctl);
}
/* set current VNICC flag state; called from sysfs store function */
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 4d66f9556451..2dd99f103671 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -32,7 +32,6 @@
#include <net/route.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
-#include <net/ip6_fib.h>
#include <net/iucv/af_iucv.h>
#include <linux/hashtable.h>
@@ -377,7 +376,8 @@ static int qeth_l3_send_setdelmc(struct qeth_card *card,
QETH_CARD_TEXT(card, 4, "setdelmc");
- iob = qeth_get_ipacmd_buffer(card, ipacmd, addr->proto);
+ iob = qeth_ipa_alloc_cmd(card, ipacmd, addr->proto,
+ IPA_DATA_SIZEOF(setdelipm));
if (!iob)
return -ENOMEM;
cmd = __ipa_cmd(iob);
@@ -429,7 +429,8 @@ static int qeth_l3_send_setdelip(struct qeth_card *card,
QETH_CARD_TEXT(card, 4, "setdelip");
- iob = qeth_get_ipacmd_buffer(card, ipacmd, addr->proto);
+ iob = qeth_ipa_alloc_cmd(card, ipacmd, addr->proto,
+ IPA_DATA_SIZEOF(setdelip6));
if (!iob)
return -ENOMEM;
cmd = __ipa_cmd(iob);
@@ -461,7 +462,8 @@ static int qeth_l3_send_setrouting(struct qeth_card *card,
struct qeth_cmd_buffer *iob;
QETH_CARD_TEXT(card, 4, "setroutg");
- iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETRTG, prot);
+ iob = qeth_ipa_alloc_cmd(card, IPA_CMD_SETRTG, prot,
+ IPA_DATA_SIZEOF(setrtg));
if (!iob)
return -ENOMEM;
cmd = __ipa_cmd(iob);
@@ -767,7 +769,7 @@ static int qeth_l3_start_ipa_arp_processing(struct qeth_card *card)
return 0;
}
rc = qeth_send_simple_setassparms(card, IPA_ARP_PROCESSING,
- IPA_CMD_ASS_START, 0);
+ IPA_CMD_ASS_START, NULL);
if (rc) {
dev_warn(&card->gdev->dev,
"Starting ARP processing support for %s failed\n",
@@ -790,7 +792,7 @@ static int qeth_l3_start_ipa_source_mac(struct qeth_card *card)
}
rc = qeth_send_simple_setassparms(card, IPA_SOURCE_MAC,
- IPA_CMD_ASS_START, 0);
+ IPA_CMD_ASS_START, NULL);
if (rc)
dev_warn(&card->gdev->dev,
"Starting source MAC-address support for %s failed\n",
@@ -811,7 +813,7 @@ static int qeth_l3_start_ipa_vlan(struct qeth_card *card)
}
rc = qeth_send_simple_setassparms(card, IPA_VLAN_PRIO,
- IPA_CMD_ASS_START, 0);
+ IPA_CMD_ASS_START, NULL);
if (rc) {
dev_warn(&card->gdev->dev,
"Starting VLAN support for %s failed\n",
@@ -836,7 +838,7 @@ static int qeth_l3_start_ipa_multicast(struct qeth_card *card)
}
rc = qeth_send_simple_setassparms(card, IPA_MULTICASTING,
- IPA_CMD_ASS_START, 0);
+ IPA_CMD_ASS_START, NULL);
if (rc) {
dev_warn(&card->gdev->dev,
"Starting multicast support for %s failed\n",
@@ -850,6 +852,7 @@ static int qeth_l3_start_ipa_multicast(struct qeth_card *card)
static int qeth_l3_softsetup_ipv6(struct qeth_card *card)
{
+ u32 ipv6_data = 3;
int rc;
QETH_CARD_TEXT(card, 3, "softipv6");
@@ -857,16 +860,16 @@ static int qeth_l3_softsetup_ipv6(struct qeth_card *card)
if (IS_IQD(card))
goto out;
- rc = qeth_send_simple_setassparms(card, IPA_IPV6,
- IPA_CMD_ASS_START, 3);
+ rc = qeth_send_simple_setassparms(card, IPA_IPV6, IPA_CMD_ASS_START,
+ &ipv6_data);
if (rc) {
dev_err(&card->gdev->dev,
"Activating IPv6 support for %s failed\n",
QETH_CARD_IFNAME(card));
return rc;
}
- rc = qeth_send_simple_setassparms_v6(card, IPA_IPV6,
- IPA_CMD_ASS_START, 0);
+ rc = qeth_send_simple_setassparms_v6(card, IPA_IPV6, IPA_CMD_ASS_START,
+ NULL);
if (rc) {
dev_err(&card->gdev->dev,
"Activating IPv6 support for %s failed\n",
@@ -874,7 +877,7 @@ static int qeth_l3_softsetup_ipv6(struct qeth_card *card)
return rc;
}
rc = qeth_send_simple_setassparms_v6(card, IPA_PASSTHRU,
- IPA_CMD_ASS_START, 0);
+ IPA_CMD_ASS_START, NULL);
if (rc) {
dev_warn(&card->gdev->dev,
"Enabling the passthrough mode for %s failed\n",
@@ -900,6 +903,7 @@ static int qeth_l3_start_ipa_ipv6(struct qeth_card *card)
static int qeth_l3_start_ipa_broadcast(struct qeth_card *card)
{
+ u32 filter_data = 1;
int rc;
QETH_CARD_TEXT(card, 3, "stbrdcst");
@@ -912,7 +916,7 @@ static int qeth_l3_start_ipa_broadcast(struct qeth_card *card)
goto out;
}
rc = qeth_send_simple_setassparms(card, IPA_FILTERING,
- IPA_CMD_ASS_START, 0);
+ IPA_CMD_ASS_START, NULL);
if (rc) {
dev_warn(&card->gdev->dev, "Enabling broadcast filtering for "
"%s failed\n", QETH_CARD_IFNAME(card));
@@ -920,7 +924,7 @@ static int qeth_l3_start_ipa_broadcast(struct qeth_card *card)
}
rc = qeth_send_simple_setassparms(card, IPA_FILTERING,
- IPA_CMD_ASS_CONFIGURE, 1);
+ IPA_CMD_ASS_CONFIGURE, &filter_data);
if (rc) {
dev_warn(&card->gdev->dev,
"Setting up broadcast filtering for %s failed\n",
@@ -930,7 +934,7 @@ static int qeth_l3_start_ipa_broadcast(struct qeth_card *card)
card->info.broadcast_capable = QETH_BROADCAST_WITH_ECHO;
dev_info(&card->gdev->dev, "Broadcast enabled\n");
rc = qeth_send_simple_setassparms(card, IPA_FILTERING,
- IPA_CMD_ASS_ENABLE, 1);
+ IPA_CMD_ASS_ENABLE, &filter_data);
if (rc) {
dev_warn(&card->gdev->dev, "Setting up broadcast echo "
"filtering for %s failed\n", QETH_CARD_IFNAME(card));
@@ -981,8 +985,8 @@ static int qeth_l3_iqd_read_initial_mac(struct qeth_card *card)
QETH_CARD_TEXT(card, 2, "hsrmac");
- iob = qeth_get_ipacmd_buffer(card, IPA_CMD_CREATE_ADDR,
- QETH_PROT_IPV6);
+ iob = qeth_ipa_alloc_cmd(card, IPA_CMD_CREATE_ADDR, QETH_PROT_IPV6,
+ IPA_DATA_SIZEOF(create_destroy_addr));
if (!iob)
return -ENOMEM;
cmd = __ipa_cmd(iob);
@@ -1025,8 +1029,8 @@ static int qeth_l3_get_unique_id(struct qeth_card *card)
return 0;
}
- iob = qeth_get_ipacmd_buffer(card, IPA_CMD_CREATE_ADDR,
- QETH_PROT_IPV6);
+ iob = qeth_ipa_alloc_cmd(card, IPA_CMD_CREATE_ADDR, QETH_PROT_IPV6,
+ IPA_DATA_SIZEOF(create_destroy_addr));
if (!iob)
return -ENOMEM;
cmd = __ipa_cmd(iob);
@@ -1102,12 +1106,10 @@ qeth_diags_trace(struct qeth_card *card, enum qeth_diags_trace_cmds diags_cmd)
QETH_CARD_TEXT(card, 2, "diagtrac");
- iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
+ iob = qeth_get_diag_cmd(card, QETH_DIAGS_CMD_TRACE, 0);
if (!iob)
return -ENOMEM;
cmd = __ipa_cmd(iob);
- cmd->data.diagass.subcmd_len = 16;
- cmd->data.diagass.subcmd = QETH_DIAGS_CMD_TRACE;
cmd->data.diagass.type = QETH_DIAGS_TYPE_HIPERSOCKET;
cmd->data.diagass.action = diags_cmd;
return qeth_send_ipa_cmd(card, iob, qeth_diags_trace_cb, NULL);
@@ -1309,6 +1311,15 @@ static int qeth_l3_vlan_rx_kill_vid(struct net_device *dev,
static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
struct qeth_hdr *hdr)
{
+ struct af_iucv_trans_hdr *iucv = (struct af_iucv_trans_hdr *) skb->data;
+ struct net_device *dev = skb->dev;
+
+ if (IS_IQD(card) && iucv->magic == ETH_P_AF_IUCV) {
+ dev_hard_header(skb, dev, ETH_P_AF_IUCV, dev->dev_addr,
+ "FAKELL", skb->len);
+ return;
+ }
+
if (!(hdr->hdr.l3.flags & QETH_HDR_PASSTHRU)) {
u16 prot = (hdr->hdr.l3.flags & QETH_HDR_IPV6) ? ETH_P_IPV6 :
ETH_P_IP;
@@ -1342,8 +1353,6 @@ static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
tg_addr, "FAKELL", skb->len);
}
- skb->protocol = eth_type_trans(skb, card->dev);
-
/* copy VLAN tag from hdr into skb */
if (!card->options.sniffer &&
(hdr->hdr.l3.ext_flags & (QETH_HDR_EXT_VLAN_FRAME |
@@ -1360,12 +1369,10 @@ static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
int budget, int *done)
{
- struct net_device *dev = card->dev;
int work_done = 0;
struct sk_buff *skb;
struct qeth_hdr *hdr;
unsigned int len;
- __u16 magic;
*done = 0;
WARN_ON_ONCE(!budget);
@@ -1379,23 +1386,12 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
}
switch (hdr->hdr.l3.id) {
case QETH_HEADER_TYPE_LAYER3:
- magic = *(__u16 *)skb->data;
- if (IS_IQD(card) && magic == ETH_P_AF_IUCV) {
- len = skb->len;
- dev_hard_header(skb, dev, ETH_P_AF_IUCV,
- dev->dev_addr, "FAKELL", len);
- skb->protocol = eth_type_trans(skb, dev);
- netif_receive_skb(skb);
- } else {
- qeth_l3_rebuild_skb(card, skb, hdr);
- len = skb->len;
- napi_gro_receive(&card->napi, skb);
- }
- break;
+ qeth_l3_rebuild_skb(card, skb, hdr);
+ /* fall through */
case QETH_HEADER_TYPE_LAYER2: /* for HiperSockets sniffer */
skb->protocol = eth_type_trans(skb, skb->dev);
len = skb->len;
- netif_receive_skb(skb);
+ napi_gro_receive(&card->napi, skb);
break;
default:
dev_kfree_skb_any(skb);
@@ -1436,7 +1432,6 @@ static void qeth_l3_stop_card(struct qeth_card *card)
card->state = CARD_STATE_DOWN;
}
- qeth_clear_cmd_buffers(&card->write);
flush_workqueue(card->event_wq);
}
@@ -1559,7 +1554,8 @@ static int qeth_l3_arp_set_no_entries(struct qeth_card *card, int no_entries)
}
iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING,
- IPA_CMD_ASS_ARP_SET_NO_ENTRIES, 4,
+ IPA_CMD_ASS_ARP_SET_NO_ENTRIES,
+ SETASS_DATA_SIZEOF(flags_32bit),
QETH_PROT_IPV4);
if (!iob)
return -ENOMEM;
@@ -1705,9 +1701,7 @@ static int qeth_l3_query_arp_cache_info(struct qeth_card *card,
iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING,
IPA_CMD_ASS_ARP_QUERY_INFO,
- sizeof(struct qeth_arp_query_data)
- - sizeof(char),
- prot);
+ SETASS_DATA_SIZEOF(query_arp), prot);
if (!iob)
return -ENOMEM;
cmd = __ipa_cmd(iob);
@@ -1791,7 +1785,8 @@ static int qeth_l3_arp_modify_entry(struct qeth_card *card,
}
iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING, arp_cmd,
- sizeof(*cmd_entry), QETH_PROT_IPV4);
+ SETASS_DATA_SIZEOF(arp_entry),
+ QETH_PROT_IPV4);
if (!iob)
return -ENOMEM;
@@ -1882,26 +1877,17 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
return rc;
}
-static int qeth_l3_get_cast_type(struct sk_buff *skb)
+static int qeth_l3_get_cast_type_rcu(struct sk_buff *skb, struct dst_entry *dst,
+ int ipv)
{
- int ipv = qeth_get_ip_version(skb);
struct neighbour *n = NULL;
- struct dst_entry *dst;
- rcu_read_lock();
- dst = skb_dst(skb);
- if (dst) {
- struct rt6_info *rt = (struct rt6_info *) dst;
-
- dst = dst_check(dst, (ipv == 6) ? rt6_get_cookie(rt) : 0);
- if (dst)
- n = dst_neigh_lookup_skb(dst, skb);
- }
+ if (dst)
+ n = dst_neigh_lookup_skb(dst, skb);
if (n) {
int cast_type = n->type;
- rcu_read_unlock();
neigh_release(n);
if ((cast_type == RTN_BROADCAST) ||
(cast_type == RTN_MULTICAST) ||
@@ -1909,7 +1895,6 @@ static int qeth_l3_get_cast_type(struct sk_buff *skb)
return cast_type;
return RTN_UNICAST;
}
- rcu_read_unlock();
/* no neighbour (eg AF_PACKET), fall back to target's IP address ... */
switch (ipv) {
@@ -1927,6 +1912,20 @@ static int qeth_l3_get_cast_type(struct sk_buff *skb)
}
}
+static int qeth_l3_get_cast_type(struct sk_buff *skb)
+{
+ int ipv = qeth_get_ip_version(skb);
+ struct dst_entry *dst;
+ int cast_type;
+
+ rcu_read_lock();
+ dst = qeth_dst_check_rcu(skb, ipv);
+ cast_type = qeth_l3_get_cast_type_rcu(skb, dst, ipv);
+ rcu_read_unlock();
+
+ return cast_type;
+}
+
static u8 qeth_l3_cast_type_to_flag(int cast_type)
{
if (cast_type == RTN_MULTICAST)
@@ -1940,12 +1939,13 @@ static u8 qeth_l3_cast_type_to_flag(int cast_type)
static void qeth_l3_fill_header(struct qeth_qdio_out_q *queue,
struct qeth_hdr *hdr, struct sk_buff *skb,
- int ipv, int cast_type, unsigned int data_len)
+ int ipv, unsigned int data_len)
{
struct qeth_hdr_layer3 *l3_hdr = &hdr->hdr.l3;
struct vlan_ethhdr *veth = vlan_eth_hdr(skb);
struct qeth_card *card = queue->card;
struct dst_entry *dst;
+ int cast_type;
hdr->hdr.l3.length = data_len;
@@ -1982,36 +1982,23 @@ static void qeth_l3_fill_header(struct qeth_qdio_out_q *queue,
hdr->hdr.l3.vlan_id = ntohs(veth->h_vlan_TCI);
}
- l3_hdr->flags = qeth_l3_cast_type_to_flag(cast_type);
-
- /* OSA only: */
- if (!ipv) {
- l3_hdr->flags |= QETH_HDR_PASSTHRU;
- return;
- }
-
rcu_read_lock();
- dst = skb_dst(skb);
+ dst = qeth_dst_check_rcu(skb, ipv);
- if (ipv == 4) {
- struct rtable *rt;
+ if (IS_IQD(card) && skb_get_queue_mapping(skb) != QETH_IQD_MCAST_TXQ)
+ cast_type = RTN_UNICAST;
+ else
+ cast_type = qeth_l3_get_cast_type_rcu(skb, dst, ipv);
+ l3_hdr->flags |= qeth_l3_cast_type_to_flag(cast_type);
- if (dst)
- dst = dst_check(dst, 0);
- rt = (struct rtable *) dst;
+ if (ipv == 4) {
+ struct rtable *rt = (struct rtable *) dst;
*((__be32 *) &hdr->hdr.l3.next_hop.ipv4.addr) = (rt) ?
rt_nexthop(rt, ip_hdr(skb)->daddr) :
ip_hdr(skb)->daddr;
- } else {
- /* IPv6 */
- struct rt6_info *rt;
-
- if (dst) {
- rt = (struct rt6_info *) dst;
- dst = dst_check(dst, rt6_get_cookie(rt));
- }
- rt = (struct rt6_info *) dst;
+ } else if (ipv == 6) {
+ struct rt6_info *rt = (struct rt6_info *) dst;
if (rt && !ipv6_addr_any(&rt->rt6i_gateway))
l3_hdr->next_hop.ipv6_addr = rt->rt6i_gateway;
@@ -2021,6 +2008,9 @@ static void qeth_l3_fill_header(struct qeth_qdio_out_q *queue,
hdr->hdr.l3.flags |= QETH_HDR_IPV6;
if (!IS_IQD(card))
hdr->hdr.l3.flags |= QETH_HDR_PASSTHRU;
+ } else {
+ /* OSA only: */
+ l3_hdr->flags |= QETH_HDR_PASSTHRU;
}
rcu_read_unlock();
}
@@ -2040,7 +2030,7 @@ static void qeth_l3_fixup_headers(struct sk_buff *skb)
}
static int qeth_l3_xmit(struct qeth_card *card, struct sk_buff *skb,
- struct qeth_qdio_out_q *queue, int ipv, int cast_type)
+ struct qeth_qdio_out_q *queue, int ipv)
{
unsigned int hw_hdr_len;
int rc;
@@ -2054,7 +2044,7 @@ static int qeth_l3_xmit(struct qeth_card *card, struct sk_buff *skb,
skb_pull(skb, ETH_HLEN);
qeth_l3_fixup_headers(skb);
- return qeth_xmit(card, skb, queue, ipv, cast_type, qeth_l3_fill_header);
+ return qeth_xmit(card, skb, queue, ipv, qeth_l3_fill_header);
}
static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
@@ -2065,7 +2055,7 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
int ipv = qeth_get_ip_version(skb);
struct qeth_qdio_out_q *queue;
int tx_bytes = skb->len;
- int cast_type, rc;
+ int rc;
if (IS_IQD(card)) {
queue = card->qdio.out_qs[qeth_iqd_translate_txq(dev, txq)];
@@ -2076,24 +2066,18 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
(card->options.cq == QETH_CQ_ENABLED &&
skb->protocol != htons(ETH_P_AF_IUCV)))
goto tx_drop;
-
- if (txq == QETH_IQD_MCAST_TXQ)
- cast_type = qeth_l3_get_cast_type(skb);
- else
- cast_type = RTN_UNICAST;
} else {
queue = card->qdio.out_qs[txq];
- cast_type = qeth_l3_get_cast_type(skb);
}
- if (cast_type == RTN_BROADCAST && !card->info.broadcast_capable)
+ if (!(dev->flags & IFF_BROADCAST) &&
+ qeth_l3_get_cast_type(skb) == RTN_BROADCAST)
goto tx_drop;
if (ipv == 4 || IS_IQD(card))
- rc = qeth_l3_xmit(card, skb, queue, ipv, cast_type);
+ rc = qeth_l3_xmit(card, skb, queue, ipv);
else
- rc = qeth_xmit(card, skb, queue, ipv, cast_type,
- qeth_l3_fill_header);
+ rc = qeth_xmit(card, skb, queue, ipv, qeth_l3_fill_header);
if (!rc) {
QETH_TXQ_STAT_INC(queue, tx_packets);
@@ -2498,33 +2482,6 @@ static int qeth_l3_recover(void *ptr)
return 0;
}
-static int qeth_l3_pm_suspend(struct ccwgroup_device *gdev)
-{
- struct qeth_card *card = dev_get_drvdata(&gdev->dev);
-
- qeth_set_allowed_threads(card, 0, 1);
- wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
- if (gdev->state == CCWGROUP_OFFLINE)
- return 0;
-
- qeth_l3_set_offline(gdev);
- return 0;
-}
-
-static int qeth_l3_pm_resume(struct ccwgroup_device *gdev)
-{
- struct qeth_card *card = dev_get_drvdata(&gdev->dev);
- int rc;
-
- rc = qeth_l3_set_online(gdev);
-
- qeth_set_allowed_threads(card, 0xffffffff, 0);
- if (rc)
- dev_warn(&card->gdev->dev, "The qeth device driver "
- "failed to recover an error on the device\n");
- return rc;
-}
-
/* Returns zero if the command is successfully "consumed" */
static int qeth_l3_control_event(struct qeth_card *card,
struct qeth_ipa_cmd *cmd)
@@ -2540,9 +2497,6 @@ struct qeth_discipline qeth_l3_discipline = {
.remove = qeth_l3_remove_device,
.set_online = qeth_l3_set_online,
.set_offline = qeth_l3_set_offline,
- .freeze = qeth_l3_pm_suspend,
- .thaw = qeth_l3_pm_resume,
- .restore = qeth_l3_pm_resume,
.do_ioctl = qeth_l3_do_ioctl,
.control_event_handler = qeth_l3_control_event,
};
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 1b58e63b4e51..acb930b8c6a6 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -987,6 +987,9 @@ static int qedi_find_boot_info(struct qedi_ctx *qedi,
if (!iscsi_is_session_online(cls_sess))
continue;
+ if (!sess->targetname)
+ continue;
+
if (pri_ctrl_flags) {
if (!strcmp(pri_tgt->iscsi_name, sess->targetname) &&
!strcmp(pri_tgt->ip_addr, ep_ip_addr)) {
diff --git a/drivers/scsi/qedi/qedi_version.h b/drivers/scsi/qedi/qedi_version.h
index f56f0ba0c4a8..0ac1055bd420 100644
--- a/drivers/scsi/qedi/qedi_version.h
+++ b/drivers/scsi/qedi/qedi_version.h
@@ -4,8 +4,8 @@
* Copyright (c) 2016 Cavium Inc.
*/
-#define QEDI_MODULE_VERSION "8.33.0.21"
+#define QEDI_MODULE_VERSION "8.37.0.20"
#define QEDI_DRIVER_MAJOR_VER 8
-#define QEDI_DRIVER_MINOR_VER 33
+#define QEDI_DRIVER_MINOR_VER 37
#define QEDI_DRIVER_REV_VER 0
-#define QEDI_DRIVER_ENG_VER 21
+#define QEDI_DRIVER_ENG_VER 20
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 172ef21827dd..d056f5e7cf93 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1731,8 +1731,8 @@ static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res,
!test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) &&
!qla2x00_isp_reg_stat(ha))) {
sp->comp = &comp;
- rval = ha->isp_ops->abort_command(sp);
spin_unlock_irqrestore(qp->qp_lock_ptr, *flags);
+ rval = ha->isp_ops->abort_command(sp);
switch (rval) {
case QLA_SUCCESS:
diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c
index 8a74ec30c3d2..d7d521b394c3 100644
--- a/drivers/scsi/ufs/ufshcd-pltfrm.c
+++ b/drivers/scsi/ufs/ufshcd-pltfrm.c
@@ -430,24 +430,21 @@ int ufshcd_pltfrm_init(struct platform_device *pdev,
goto dealloc_host;
}
- pm_runtime_set_active(&pdev->dev);
- pm_runtime_enable(&pdev->dev);
-
ufshcd_init_lanes_per_dir(hba);
err = ufshcd_init(hba, mmio_base, irq);
if (err) {
dev_err(dev, "Initialization failed\n");
- goto out_disable_rpm;
+ goto dealloc_host;
}
platform_set_drvdata(pdev, hba);
+ pm_runtime_set_active(&pdev->dev);
+ pm_runtime_enable(&pdev->dev);
+
return 0;
-out_disable_rpm:
- pm_runtime_disable(&pdev->dev);
- pm_runtime_set_suspended(&pdev->dev);
dealloc_host:
ufshcd_dealloc_host(hba);
out:
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index d441bef72163..915010464572 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -275,9 +275,9 @@ static void afs_break_one_callback(struct afs_server *server,
struct afs_super_info *as = AFS_FS_S(cbi->sb);
struct afs_volume *volume = as->volume;
- write_lock(&volume->cb_break_lock);
+ write_lock(&volume->cb_v_break_lock);
volume->cb_v_break++;
- write_unlock(&volume->cb_break_lock);
+ write_unlock(&volume->cb_v_break_lock);
} else {
data.volume = NULL;
data.fid = *fid;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index b42d9d09669c..18a50d4febcf 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -56,6 +56,16 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren
}
/*
+ * Set the file size and block count. Estimate the number of 512 bytes blocks
+ * used, rounded up to nearest 1K for consistency with other AFS clients.
+ */
+static void afs_set_i_size(struct afs_vnode *vnode, u64 size)
+{
+ i_size_write(&vnode->vfs_inode, size);
+ vnode->vfs_inode.i_blocks = ((size + 1023) >> 10) << 1;
+}
+
+/*
* Initialise an inode from the vnode status.
*/
static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key,
@@ -124,12 +134,7 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key,
return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type);
}
- /*
- * Estimate 512 bytes blocks used, rounded up to nearest 1K
- * for consistency with other AFS clients.
- */
- inode->i_blocks = ((i_size_read(inode) + 1023) >> 10) << 1;
- i_size_write(&vnode->vfs_inode, status->size);
+ afs_set_i_size(vnode, status->size);
vnode->invalid_before = status->data_version;
inode_set_iversion_raw(&vnode->vfs_inode, status->data_version);
@@ -207,11 +212,13 @@ static void afs_apply_status(struct afs_fs_cursor *fc,
if (expected_version &&
*expected_version != status->data_version) {
- kdebug("vnode modified %llx on {%llx:%llu} [exp %llx] %s",
- (unsigned long long) status->data_version,
- vnode->fid.vid, vnode->fid.vnode,
- (unsigned long long) *expected_version,
- fc->type ? fc->type->name : "???");
+ if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
+ pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s\n",
+ vnode->fid.vid, vnode->fid.vnode,
+ (unsigned long long)*expected_version,
+ (unsigned long long)status->data_version,
+ fc->type ? fc->type->name : "???");
+
vnode->invalid_before = status->data_version;
if (vnode->status.type == AFS_FTYPE_DIR) {
if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
@@ -230,7 +237,7 @@ static void afs_apply_status(struct afs_fs_cursor *fc,
if (data_changed) {
inode_set_iversion_raw(&vnode->vfs_inode, status->data_version);
- i_size_write(&vnode->vfs_inode, status->size);
+ afs_set_i_size(vnode, status->size);
}
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index ce9559e98f17..0f84d0da5417 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -109,10 +109,8 @@ struct afs_call {
struct rxrpc_call *rxcall; /* RxRPC call handle */
struct key *key; /* security for this call */
struct afs_net *net; /* The network namespace */
- union {
- struct afs_server *server;
- struct afs_vlserver *vlserver;
- };
+ struct afs_server *server; /* The fileserver record if fs op (pins ref) */
+ struct afs_vlserver *vlserver; /* The vlserver record if vl op */
struct afs_cb_interest *cbi; /* Callback interest for server used */
struct afs_vnode *lvnode; /* vnode being locked */
void *request; /* request data (first part) */
@@ -616,7 +614,7 @@ struct afs_volume {
unsigned int servers_seq; /* Incremented each time ->servers changes */
unsigned cb_v_break; /* Break-everything counter. */
- rwlock_t cb_break_lock;
+ rwlock_t cb_v_break_lock;
afs_voltype_t type; /* type of volume */
short error;
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 08fdb3951c49..1a414300b654 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -43,6 +43,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
atomic_set(&volume->usage, 1);
INIT_LIST_HEAD(&volume->proc_link);
rwlock_init(&volume->servers_lock);
+ rwlock_init(&volume->cb_v_break_lock);
memcpy(volume->name, vldb->name, vldb->name_len + 1);
slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9c8ca6cd3ce4..255f6754c70d 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3077,8 +3077,7 @@ static const struct file_operations proc_tgid_base_operations = {
struct pid *tgid_pidfd_to_pid(const struct file *file)
{
- if (!d_is_dir(file->f_path.dentry) ||
- (file->f_op != &proc_tgid_base_operations))
+ if (file->f_op != &proc_tgid_base_operations)
return ERR_PTR(-EBADF);
return proc_pid(file_inode(file));
diff --git a/include/dt-bindings/clock/g12a-clkc.h b/include/dt-bindings/clock/g12a-clkc.h
index 82c9e0c020b2..e10470ed7c4f 100644
--- a/include/dt-bindings/clock/g12a-clkc.h
+++ b/include/dt-bindings/clock/g12a-clkc.h
@@ -130,7 +130,7 @@
#define CLKID_MALI_1_SEL 172
#define CLKID_MALI_1 174
#define CLKID_MALI 175
-#define CLKID_MPLL_5OM 177
+#define CLKID_MPLL_50M 177
#define CLKID_CPU_CLK 187
#define CLKID_PCIE_PLL 201
#define CLKID_VDEC_1 204
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 191621ff7594..ca956b672ac0 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -61,12 +61,14 @@ enum virtchnl_status_code {
#define VIRTCHNL_ERR_PARAM VIRTCHNL_STATUS_ERR_PARAM
#define VIRTCHNL_STATUS_NOT_SUPPORTED VIRTCHNL_STATUS_ERR_NOT_SUPPORTED
+#define VIRTCHNL_LINK_SPEED_2_5GB_SHIFT 0x0
#define VIRTCHNL_LINK_SPEED_100MB_SHIFT 0x1
#define VIRTCHNL_LINK_SPEED_1000MB_SHIFT 0x2
#define VIRTCHNL_LINK_SPEED_10GB_SHIFT 0x3
#define VIRTCHNL_LINK_SPEED_40GB_SHIFT 0x4
#define VIRTCHNL_LINK_SPEED_20GB_SHIFT 0x5
#define VIRTCHNL_LINK_SPEED_25GB_SHIFT 0x6
+#define VIRTCHNL_LINK_SPEED_5GB_SHIFT 0x7
enum virtchnl_link_speed {
VIRTCHNL_LINK_SPEED_UNKNOWN = 0,
@@ -76,6 +78,8 @@ enum virtchnl_link_speed {
VIRTCHNL_LINK_SPEED_40GB = BIT(VIRTCHNL_LINK_SPEED_40GB_SHIFT),
VIRTCHNL_LINK_SPEED_20GB = BIT(VIRTCHNL_LINK_SPEED_20GB_SHIFT),
VIRTCHNL_LINK_SPEED_25GB = BIT(VIRTCHNL_LINK_SPEED_25GB_SHIFT),
+ VIRTCHNL_LINK_SPEED_2_5GB = BIT(VIRTCHNL_LINK_SPEED_2_5GB_SHIFT),
+ VIRTCHNL_LINK_SPEED_5GB = BIT(VIRTCHNL_LINK_SPEED_5GB_SHIFT),
};
/* for hsplit_0 field of Rx HMC context */
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index bd79ae32909a..169fd25f6bc2 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -124,6 +124,14 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
loff_t *ppos, void **new_buf,
enum bpf_attach_type type);
+int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level,
+ int *optname, char __user *optval,
+ int *optlen, char **kernel_optval);
+int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
+ int optname, char __user *optval,
+ int __user *optlen, int max_optlen,
+ int retval);
+
static inline enum bpf_cgroup_storage_type cgroup_storage_type(
struct bpf_map *map)
{
@@ -286,6 +294,38 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
__ret; \
})
+#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
+ kernel_optval) \
+({ \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled) \
+ __ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \
+ optname, optval, \
+ optlen, \
+ kernel_optval); \
+ __ret; \
+})
+
+#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \
+({ \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled) \
+ get_user(__ret, optlen); \
+ __ret; \
+})
+
+#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen, \
+ max_optlen, retval) \
+({ \
+ int __ret = retval; \
+ if (cgroup_bpf_enabled) \
+ __ret = __cgroup_bpf_run_filter_getsockopt(sock, level, \
+ optname, optval, \
+ optlen, max_optlen, \
+ retval); \
+ __ret; \
+})
+
int cgroup_bpf_prog_attach(const union bpf_attr *attr,
enum bpf_prog_type ptype, struct bpf_prog *prog);
int cgroup_bpf_prog_detach(const union bpf_attr *attr,
@@ -357,6 +397,11 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos,nbuf) ({ 0; })
+#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \
+ optlen, max_optlen, retval) ({ retval; })
+#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
+ kernel_optval) ({ 0; })
#define for_each_cgroup_storage_type(stype) for (; false; )
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a62e7889b0b6..18f4cc2c6acd 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -518,6 +518,7 @@ struct bpf_prog_array {
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
void bpf_prog_array_free(struct bpf_prog_array *progs);
int bpf_prog_array_length(struct bpf_prog_array *progs);
+bool bpf_prog_array_is_empty(struct bpf_prog_array *array);
int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs,
__u32 __user *prog_ids, u32 cnt);
@@ -1051,6 +1052,7 @@ extern const struct bpf_func_proto bpf_spin_unlock_proto;
extern const struct bpf_func_proto bpf_get_local_storage_proto;
extern const struct bpf_func_proto bpf_strtol_proto;
extern const struct bpf_func_proto bpf_strtoul_proto;
+extern const struct bpf_func_proto bpf_tcp_sock_proto;
/* Shared helpers among cBPF and eBPF. */
void bpf_user_rnd_init_once(void);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 5a9975678d6f..eec5aeeeaf92 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -30,6 +30,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
#ifdef CONFIG_CGROUP_BPF
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt)
#endif
#ifdef CONFIG_BPF_LIRC_MODE2
BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
diff --git a/include/linux/dim.h b/include/linux/dim.h
new file mode 100644
index 000000000000..aa9bdd47a648
--- /dev/null
+++ b/include/linux/dim.h
@@ -0,0 +1,366 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef DIM_H
+#define DIM_H
+
+#include <linux/module.h>
+
+/**
+ * Number of events between DIM iterations.
+ * Causes a moderation of the algorithm run.
+ */
+#define DIM_NEVENTS 64
+
+/**
+ * Is a difference between values justifies taking an action.
+ * We consider 10% difference as significant.
+ */
+#define IS_SIGNIFICANT_DIFF(val, ref) \
+ (((100UL * abs((val) - (ref))) / (ref)) > 10)
+
+/**
+ * Calculate the gap between two values.
+ * Take wrap-around and variable size into consideration.
+ */
+#define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) \
+ & (BIT_ULL(bits) - 1))
+
+/**
+ * Structure for CQ moderation values.
+ * Used for communications between DIM and its consumer.
+ *
+ * @usec: CQ timer suggestion (by DIM)
+ * @pkts: CQ packet counter suggestion (by DIM)
+ * @cq_period_mode: CQ priod count mode (from CQE/EQE)
+ */
+struct dim_cq_moder {
+ u16 usec;
+ u16 pkts;
+ u16 comps;
+ u8 cq_period_mode;
+};
+
+/**
+ * Structure for DIM sample data.
+ * Used for communications between DIM and its consumer.
+ *
+ * @time: Sample timestamp
+ * @pkt_ctr: Number of packets
+ * @byte_ctr: Number of bytes
+ * @event_ctr: Number of events
+ */
+struct dim_sample {
+ ktime_t time;
+ u32 pkt_ctr;
+ u32 byte_ctr;
+ u16 event_ctr;
+ u32 comp_ctr;
+};
+
+/**
+ * Structure for DIM stats.
+ * Used for holding current measured rates.
+ *
+ * @ppms: Packets per msec
+ * @bpms: Bytes per msec
+ * @epms: Events per msec
+ */
+struct dim_stats {
+ int ppms; /* packets per msec */
+ int bpms; /* bytes per msec */
+ int epms; /* events per msec */
+ int cpms; /* completions per msec */
+ int cpe_ratio; /* ratio of completions to events */
+};
+
+/**
+ * Main structure for dynamic interrupt moderation (DIM).
+ * Used for holding all information about a specific DIM instance.
+ *
+ * @state: Algorithm state (see below)
+ * @prev_stats: Measured rates from previous iteration (for comparison)
+ * @start_sample: Sampled data at start of current iteration
+ * @work: Work to perform on action required
+ * @profile_ix: Current moderation profile
+ * @mode: CQ period count mode
+ * @tune_state: Algorithm tuning state (see below)
+ * @steps_right: Number of steps taken towards higher moderation
+ * @steps_left: Number of steps taken towards lower moderation
+ * @tired: Parking depth counter
+ */
+struct dim {
+ u8 state;
+ struct dim_stats prev_stats;
+ struct dim_sample start_sample;
+ struct dim_sample measuring_sample;
+ struct work_struct work;
+ u8 profile_ix;
+ u8 mode;
+ u8 tune_state;
+ u8 steps_right;
+ u8 steps_left;
+ u8 tired;
+};
+
+/**
+ * enum dim_cq_period_mode
+ *
+ * These are the modes for CQ period count.
+ *
+ * @DIM_CQ_PERIOD_MODE_START_FROM_EQE: Start counting from EQE
+ * @DIM_CQ_PERIOD_MODE_START_FROM_CQE: Start counting from CQE (implies timer reset)
+ * @DIM_CQ_PERIOD_NUM_MODES: Number of modes
+ */
+enum {
+ DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0,
+ DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1,
+ DIM_CQ_PERIOD_NUM_MODES
+};
+
+/**
+ * enum dim_state
+ *
+ * These are the DIM algorithm states.
+ * These will determine if the algorithm is in a valid state to start an iteration.
+ *
+ * @DIM_START_MEASURE: This is the first iteration (also after applying a new profile)
+ * @DIM_MEASURE_IN_PROGRESS: Algorithm is already in progress - check if
+ * need to perform an action
+ * @DIM_APPLY_NEW_PROFILE: DIM consumer is currently applying a profile - no need to measure
+ */
+enum {
+ DIM_START_MEASURE,
+ DIM_MEASURE_IN_PROGRESS,
+ DIM_APPLY_NEW_PROFILE,
+};
+
+/**
+ * enum dim_tune_state
+ *
+ * These are the DIM algorithm tune states.
+ * These will determine which action the algorithm should perform.
+ *
+ * @DIM_PARKING_ON_TOP: Algorithm found a local top point - exit on significant difference
+ * @DIM_PARKING_TIRED: Algorithm found a deep top point - don't exit if tired > 0
+ * @DIM_GOING_RIGHT: Algorithm is currently trying higher moderation levels
+ * @DIM_GOING_LEFT: Algorithm is currently trying lower moderation levels
+ */
+enum {
+ DIM_PARKING_ON_TOP,
+ DIM_PARKING_TIRED,
+ DIM_GOING_RIGHT,
+ DIM_GOING_LEFT,
+};
+
+/**
+ * enum dim_stats_state
+ *
+ * These are the DIM algorithm statistics states.
+ * These will determine the verdict of current iteration.
+ *
+ * @DIM_STATS_WORSE: Current iteration shows worse performance than before
+ * @DIM_STATS_WORSE: Current iteration shows same performance than before
+ * @DIM_STATS_WORSE: Current iteration shows better performance than before
+ */
+enum {
+ DIM_STATS_WORSE,
+ DIM_STATS_SAME,
+ DIM_STATS_BETTER,
+};
+
+/**
+ * enum dim_step_result
+ *
+ * These are the DIM algorithm step results.
+ * These describe the result of a step.
+ *
+ * @DIM_STEPPED: Performed a regular step
+ * @DIM_TOO_TIRED: Same kind of step was done multiple times - should go to
+ * tired parking
+ * @DIM_ON_EDGE: Stepped to the most left/right profile
+ */
+enum {
+ DIM_STEPPED,
+ DIM_TOO_TIRED,
+ DIM_ON_EDGE,
+};
+
+/**
+ * dim_on_top - check if current state is a good place to stop (top location)
+ * @dim: DIM context
+ *
+ * Check if current profile is a good place to park at.
+ * This will result in reducing the DIM checks frequency as we assume we
+ * shouldn't probably change profiles, unless traffic pattern wasn't changed.
+ */
+bool dim_on_top(struct dim *dim);
+
+/**
+ * dim_turn - change profile alterning direction
+ * @dim: DIM context
+ *
+ * Go left if we were going right and vice-versa.
+ * Do nothing if currently parking.
+ */
+void dim_turn(struct dim *dim);
+
+/**
+ * dim_park_on_top - enter a parking state on a top location
+ * @dim: DIM context
+ *
+ * Enter parking state.
+ * Clear all movement history.
+ */
+void dim_park_on_top(struct dim *dim);
+
+/**
+ * dim_park_tired - enter a tired parking state
+ * @dim: DIM context
+ *
+ * Enter parking state.
+ * Clear all movement history and cause DIM checks frequency to reduce.
+ */
+void dim_park_tired(struct dim *dim);
+
+/**
+ * dim_calc_stats - calculate the difference between two samples
+ * @start: start sample
+ * @end: end sample
+ * @curr_stats: delta between samples
+ *
+ * Calculate the delta between two samples (in data rates).
+ * Takes into consideration counter wrap-around.
+ */
+void dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
+ struct dim_stats *curr_stats);
+
+/**
+ * dim_update_sample - set a sample's fields with give values
+ * @event_ctr: number of events to set
+ * @packets: number of packets to set
+ * @bytes: number of bytes to set
+ * @s: DIM sample
+ */
+static inline void
+dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, struct dim_sample *s)
+{
+ s->time = ktime_get();
+ s->pkt_ctr = packets;
+ s->byte_ctr = bytes;
+ s->event_ctr = event_ctr;
+}
+
+/**
+ * dim_update_sample_with_comps - set a sample's fields with given
+ * values including the completion parameter
+ * @event_ctr: number of events to set
+ * @packets: number of packets to set
+ * @bytes: number of bytes to set
+ * @comps: number of completions to set
+ * @s: DIM sample
+ */
+static inline void
+dim_update_sample_with_comps(u16 event_ctr, u64 packets, u64 bytes, u64 comps,
+ struct dim_sample *s)
+{
+ dim_update_sample(event_ctr, packets, bytes, s);
+ s->comp_ctr = comps;
+}
+
+/* Net DIM */
+
+/*
+ * Net DIM profiles:
+ * There are different set of profiles for each CQ period mode.
+ * There are different set of profiles for RX/TX CQs.
+ * Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES
+ */
+#define NET_DIM_PARAMS_NUM_PROFILES 5
+#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256
+#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128
+#define NET_DIM_DEF_PROFILE_CQE 1
+#define NET_DIM_DEF_PROFILE_EQE 1
+
+#define NET_DIM_RX_EQE_PROFILES { \
+ {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+ {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+ {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+ {128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+ {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+}
+
+#define NET_DIM_RX_CQE_PROFILES { \
+ {2, 256}, \
+ {8, 128}, \
+ {16, 64}, \
+ {32, 64}, \
+ {64, 64} \
+}
+
+#define NET_DIM_TX_EQE_PROFILES { \
+ {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
+ {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
+ {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
+ {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
+ {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \
+}
+
+#define NET_DIM_TX_CQE_PROFILES { \
+ {5, 128}, \
+ {8, 64}, \
+ {16, 32}, \
+ {32, 32}, \
+ {64, 32} \
+}
+
+static const struct dim_cq_moder
+rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
+ NET_DIM_RX_EQE_PROFILES,
+ NET_DIM_RX_CQE_PROFILES,
+};
+
+static const struct dim_cq_moder
+tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
+ NET_DIM_TX_EQE_PROFILES,
+ NET_DIM_TX_CQE_PROFILES,
+};
+
+/**
+ * net_dim_get_rx_moderation - provide a CQ moderation object for the given RX profile
+ * @cq_period_mode: CQ period mode
+ * @ix: Profile index
+ */
+struct dim_cq_moder net_dim_get_rx_moderation(u8 cq_period_mode, int ix);
+
+/**
+ * net_dim_get_def_rx_moderation - provide the default RX moderation
+ * @cq_period_mode: CQ period mode
+ */
+struct dim_cq_moder net_dim_get_def_rx_moderation(u8 cq_period_mode);
+
+/**
+ * net_dim_get_tx_moderation - provide a CQ moderation object for the given TX profile
+ * @cq_period_mode: CQ period mode
+ * @ix: Profile index
+ */
+struct dim_cq_moder net_dim_get_tx_moderation(u8 cq_period_mode, int ix);
+
+/**
+ * net_dim_get_def_tx_moderation - provide the default TX moderation
+ * @cq_period_mode: CQ period mode
+ */
+struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode);
+
+/**
+ * net_dim - main DIM algorithm entry point
+ * @dim: DIM instance information
+ * @end_sample: Current data measurement
+ *
+ * Called by the consumer.
+ * This is the main logic of the algorithm, where data is processed in order to decide on next
+ * required action.
+ */
+void net_dim(struct dim *dim, struct dim_sample end_sample);
+
+#endif /* DIM_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 43b45d6db36d..1fe53e78c7e3 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -578,8 +578,9 @@ struct bpf_skb_data_end {
};
struct bpf_redirect_info {
- u32 ifindex;
u32 flags;
+ u32 tgt_index;
+ void *tgt_value;
struct bpf_map *map;
struct bpf_map *map_to_flush;
u32 kern_flags;
@@ -1199,4 +1200,14 @@ struct bpf_sysctl_kern {
u64 tmp_reg;
};
+struct bpf_sockopt_kern {
+ struct sock *sk;
+ u8 *optval;
+ u8 *optval_end;
+ s32 level;
+ s32 optname;
+ s32 optlen;
+ s32 retval;
+};
+
#endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h
index 16255c2ca2f4..0d6b4bc191c5 100644
--- a/include/linux/intel-ish-client-if.h
+++ b/include/linux/intel-ish-client-if.h
@@ -103,6 +103,7 @@ void ishtp_put_device(struct ishtp_cl_device *cl_dev);
void ishtp_get_device(struct ishtp_cl_device *cl_dev);
void ishtp_set_drvdata(struct ishtp_cl_device *cl_device, void *data);
void *ishtp_get_drvdata(struct ishtp_cl_device *cl_device);
+struct ishtp_cl_device *ishtp_dev_to_cl_device(struct device *dev);
int ishtp_register_event_cb(struct ishtp_cl_device *device,
void (*read_cb)(struct ishtp_cl_device *));
struct ishtp_fw_client *ishtp_fw_cl_get_client(struct ishtp_device *dev,
diff --git a/include/linux/list.h b/include/linux/list.h
index e951228db4b2..85c92555e31f 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -106,6 +106,20 @@ static inline void __list_del(struct list_head * prev, struct list_head * next)
WRITE_ONCE(prev->next, next);
}
+/*
+ * Delete a list entry and clear the 'prev' pointer.
+ *
+ * This is a special-purpose list clearing method used in the networking code
+ * for lists allocated as per-cpu, where we don't want to incur the extra
+ * WRITE_ONCE() overhead of a regular list_del_init(). The code that uses this
+ * needs to check the node 'prev' pointer instead of calling list_empty().
+ */
+static inline void __list_del_clearprev(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ entry->prev = NULL;
+}
+
/**
* list_del - deletes entry from list.
* @entry: the element to delete from the list.
diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h
index 70e7e5673ce9..5613e677a5f9 100644
--- a/include/linux/mlx5/accel.h
+++ b/include/linux/mlx5/accel.h
@@ -114,7 +114,7 @@ enum mlx5_accel_ipsec_cap {
MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN = 1 << 7,
};
-#ifdef CONFIG_MLX5_ACCEL
+#ifdef CONFIG_MLX5_FPGA_IPSEC
u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev);
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 769326ea1d9b..40748fc1b11b 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -47,7 +47,7 @@ struct mlx5_core_cq {
struct completion free;
unsigned vector;
unsigned int irqn;
- void (*comp) (struct mlx5_core_cq *);
+ void (*comp)(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe);
void (*event) (struct mlx5_core_cq *, enum mlx5_event);
u32 cons_index;
unsigned arm_sn;
@@ -55,7 +55,7 @@ struct mlx5_core_cq {
int pid;
struct {
struct list_head list;
- void (*comp)(struct mlx5_core_cq *);
+ void (*comp)(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe);
void *priv;
} tasklet_ctx;
int reset_notify_added;
@@ -185,7 +185,7 @@ static inline void mlx5_cq_put(struct mlx5_core_cq *cq)
}
int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
- u32 *in, int inlen);
+ u32 *in, int inlen, u32 *out, int outlen);
int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
u32 *out, int outlen);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 35ed38c2ae6c..ce9839c8bc1a 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -351,7 +351,7 @@ enum mlx5_event {
MLX5_EVENT_TYPE_DEVICE_TRACER = 0x26,
- MLX5_EVENT_TYPE_MAX = MLX5_EVENT_TYPE_DEVICE_TRACER + 1,
+ MLX5_EVENT_TYPE_MAX = 0x100,
};
enum {
@@ -437,6 +437,7 @@ enum {
MLX5_OPCODE_SET_PSV = 0x20,
MLX5_OPCODE_GET_PSV = 0x21,
MLX5_OPCODE_CHECK_PSV = 0x22,
+ MLX5_OPCODE_DUMP = 0x23,
MLX5_OPCODE_RGET_PSV = 0x26,
MLX5_OPCODE_RCHECK_PSV = 0x27,
@@ -445,6 +446,14 @@ enum {
};
enum {
+ MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS = 0x20,
+};
+
+enum {
+ MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS = 0x20,
+};
+
+enum {
MLX5_SET_PORT_RESET_QKEY = 0,
MLX5_SET_PORT_GUID0 = 16,
MLX5_SET_PORT_NODE_GUID = 17,
@@ -1085,6 +1094,9 @@ enum mlx5_cap_type {
MLX5_CAP_DEBUG,
MLX5_CAP_RESERVED_14,
MLX5_CAP_DEV_MEM,
+ MLX5_CAP_RESERVED_16,
+ MLX5_CAP_TLS,
+ MLX5_CAP_DEV_EVENT = 0x14,
/* NUM OF CAP Types */
MLX5_CAP_NUM
};
@@ -1263,6 +1275,12 @@ enum mlx5_qcam_feature_groups {
#define MLX5_CAP64_DEV_MEM(mdev, cap)\
MLX5_GET64(device_mem_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_MEM], cap)
+#define MLX5_CAP_TLS(mdev, cap) \
+ MLX5_GET(tls_cap, (mdev)->caps.hca_cur[MLX5_CAP_TLS], cap)
+
+#define MLX5_CAP_DEV_EVENT(mdev, cap)\
+ MLX5_ADDR_OF(device_event_cap, (mdev)->caps.hca_cur[MLX5_CAP_DEV_EVENT], cap)
+
enum {
MLX5_CMD_STAT_OK = 0x0,
MLX5_CMD_STAT_INT_ERR = 0x1,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 25847beabd3f..0e6da1840c7d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -41,7 +41,7 @@
#include <linux/semaphore.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
-#include <linux/radix-tree.h>
+#include <linux/xarray.h>
#include <linux/workqueue.h>
#include <linux/mempool.h>
#include <linux/interrupt.h>
@@ -139,6 +139,7 @@ enum {
MLX5_REG_MTPPS = 0x9053,
MLX5_REG_MTPPSE = 0x9054,
MLX5_REG_MPEGC = 0x9056,
+ MLX5_REG_MCQS = 0x9060,
MLX5_REG_MCQI = 0x9061,
MLX5_REG_MCC = 0x9062,
MLX5_REG_MCDA = 0x9063,
@@ -182,6 +183,11 @@ enum port_state_policy {
MLX5_POLICY_INVALID = 0xffffffff
};
+enum mlx5_coredev_type {
+ MLX5_COREDEV_PF,
+ MLX5_COREDEV_VF
+};
+
struct mlx5_field_desc {
struct dentry *dent;
int i;
@@ -458,13 +464,6 @@ struct mlx5_qp_table {
struct radix_tree_root tree;
};
-struct mlx5_mkey_table {
- /* protect radix tree
- */
- rwlock_t lock;
- struct radix_tree_root tree;
-};
-
struct mlx5_vf_context {
int enabled;
u64 port_guid;
@@ -475,7 +474,7 @@ struct mlx5_vf_context {
struct mlx5_core_sriov {
struct mlx5_vf_context *vfs_ctx;
int num_vfs;
- int enabled_vfs;
+ u16 max_vfs;
};
struct mlx5_fc_stats {
@@ -497,6 +496,7 @@ struct mlx5_eswitch;
struct mlx5_lag;
struct mlx5_devcom;
struct mlx5_eq_table;
+struct mlx5_irq_table;
struct mlx5_rate_limit {
u32 rate;
@@ -526,6 +526,8 @@ struct mlx5_core_roce {
};
struct mlx5_priv {
+ /* IRQ table valid only for real pci devices PF or VF */
+ struct mlx5_irq_table *irq_table;
struct mlx5_eq_table *eq_table;
/* pages stuff */
@@ -548,9 +550,7 @@ struct mlx5_priv {
struct dentry *cmdif_debugfs;
/* end: qp staff */
- /* start: mkey staff */
- struct mlx5_mkey_table mkey_table;
- /* end: mkey staff */
+ struct xarray mkey_table;
/* start: alloc staff */
/* protect buffer alocation according to numa node */
@@ -577,7 +577,6 @@ struct mlx5_priv {
struct mlx5_core_sriov sriov;
struct mlx5_lag *lag;
struct mlx5_devcom *devcom;
- unsigned long pci_dev_data;
struct mlx5_core_roce roce;
struct mlx5_fc_stats fc_stats;
struct mlx5_rl_table rl_table;
@@ -658,6 +657,7 @@ struct mlx5_geneve;
struct mlx5_core_dev {
struct device *device;
+ enum mlx5_coredev_type coredev_type;
struct pci_dev *pdev;
/* sync pci state */
struct mutex pci_status_mutex;
@@ -1052,6 +1052,8 @@ int mlx5_register_interface(struct mlx5_interface *intf);
void mlx5_unregister_interface(struct mlx5_interface *intf);
int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb);
int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb);
+int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb);
+int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb);
int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id);
@@ -1092,9 +1094,9 @@ enum {
MLX5_PCI_DEV_IS_VF = 1 << 0,
};
-static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev)
+static inline bool mlx5_core_is_pf(const struct mlx5_core_dev *dev)
{
- return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF);
+ return dev->coredev_type == MLX5_COREDEV_PF;
}
static inline bool mlx5_core_is_ecpf(struct mlx5_core_dev *dev)
@@ -1102,23 +1104,20 @@ static inline bool mlx5_core_is_ecpf(struct mlx5_core_dev *dev)
return dev->caps.embedded_cpu;
}
-static inline bool mlx5_core_is_ecpf_esw_manager(struct mlx5_core_dev *dev)
+static inline bool
+mlx5_core_is_ecpf_esw_manager(const struct mlx5_core_dev *dev)
{
return dev->caps.embedded_cpu && MLX5_CAP_GEN(dev, eswitch_manager);
}
-static inline bool mlx5_ecpf_vport_exists(struct mlx5_core_dev *dev)
+static inline bool mlx5_ecpf_vport_exists(const struct mlx5_core_dev *dev)
{
return mlx5_core_is_pf(dev) && MLX5_CAP_ESW(dev, ecpf_vport_exists);
}
-#define MLX5_HOST_PF_MAX_VFS (127u)
-static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev)
+static inline u16 mlx5_core_max_vfs(const struct mlx5_core_dev *dev)
{
- if (mlx5_core_is_ecpf_esw_manager(dev))
- return MLX5_HOST_PF_MAX_VFS;
- else
- return pci_sriov_get_totalvfs(dev->pdev);
+ return dev->priv.sriov.max_vfs;
}
static inline int mlx5_get_gid_table_len(u16 param)
diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h
index 00045cc4ea11..e49d8c0d4f26 100644
--- a/include/linux/mlx5/eq.h
+++ b/include/linux/mlx5/eq.h
@@ -4,17 +4,7 @@
#ifndef MLX5_CORE_EQ_H
#define MLX5_CORE_EQ_H
-enum {
- MLX5_EQ_PAGEREQ_IDX = 0,
- MLX5_EQ_CMD_IDX = 1,
- MLX5_EQ_ASYNC_IDX = 2,
- /* reserved to be used by mlx5_core ulps (mlx5e/mlx5_ib) */
- MLX5_EQ_PFAULT_IDX = 3,
- MLX5_EQ_MAX_ASYNC_EQS,
- /* completion eqs vector indices start here */
- MLX5_EQ_VEC_COMP_BASE = MLX5_EQ_MAX_ASYNC_EQS,
-};
-
+#define MLX5_IRQ_VEC_COMP_BASE 1
#define MLX5_NUM_CMD_EQE (32)
#define MLX5_NUM_ASYNC_EQE (0x1000)
#define MLX5_NUM_SPARE_EQE (0x80)
@@ -23,18 +13,19 @@ struct mlx5_eq;
struct mlx5_core_dev;
struct mlx5_eq_param {
- u8 index;
+ u8 irq_index;
int nent;
- u64 mask;
- void *context;
- irq_handler_t handler;
+ u64 mask[4];
};
struct mlx5_eq *
-mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
- struct mlx5_eq_param *param);
+mlx5_eq_create_generic(struct mlx5_core_dev *dev, struct mlx5_eq_param *param);
int
mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ struct notifier_block *nb);
+void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ struct notifier_block *nb);
struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc);
void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm);
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index e9a55c0d50fd..46b5ba029802 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -7,13 +7,14 @@
#define _MLX5_ESWITCH_
#include <linux/mlx5/driver.h>
+#include <net/devlink.h>
#define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager)
enum {
- SRIOV_NONE,
- SRIOV_LEGACY,
- SRIOV_OFFLOADS
+ MLX5_ESWITCH_NONE,
+ MLX5_ESWITCH_LEGACY,
+ MLX5_ESWITCH_OFFLOADS
};
enum {
@@ -45,6 +46,8 @@ struct mlx5_eswitch_rep {
u16 vport;
u8 hw_id[ETH_ALEN];
u16 vlan;
+ /* Only IB rep is using vport_index */
+ u16 vport_index;
u32 vlan_refcount;
};
@@ -62,4 +65,35 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
struct mlx5_flow_handle *
mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
u16 vport_num, u32 sqn);
+
+u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
+
+#ifdef CONFIG_MLX5_ESWITCH
+enum devlink_eswitch_encap_mode
+mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev);
+
+bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw);
+u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
+ u16 vport_num);
+#else /* CONFIG_MLX5_ESWITCH */
+static inline enum devlink_eswitch_encap_mode
+mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
+{
+ return DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+}
+
+static inline bool
+mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
+{
+ return false;
+};
+
+static inline u32
+mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
+ int vport_num)
+{
+ return 0;
+};
+#endif /* CONFIG_MLX5_ESWITCH */
+
#endif
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 2ddaa97f2179..04a569568eac 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -88,10 +88,21 @@ struct mlx5_flow_group;
struct mlx5_flow_namespace;
struct mlx5_flow_handle;
+enum {
+ FLOW_CONTEXT_HAS_TAG = BIT(0),
+};
+
+struct mlx5_flow_context {
+ u32 flags;
+ u32 flow_tag;
+ u32 flow_source;
+};
+
struct mlx5_flow_spec {
u8 match_criteria_enable;
u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)];
u32 match_value[MLX5_ST_SZ_DW(fte_match_param)];
+ struct mlx5_flow_context flow_context;
};
enum {
@@ -173,13 +184,11 @@ struct mlx5_fs_vlan {
#define MLX5_FS_VLAN_DEPTH 2
enum {
- FLOW_ACT_HAS_TAG = BIT(0),
- FLOW_ACT_NO_APPEND = BIT(1),
+ FLOW_ACT_NO_APPEND = BIT(0),
};
struct mlx5_flow_act {
u32 action;
- u32 flow_tag;
u32 reformat_id;
u32 modify_id;
uintptr_t esp_id;
@@ -190,7 +199,6 @@ struct mlx5_flow_act {
#define MLX5_DECLARE_FLOW_ACT(name) \
struct mlx5_flow_act name = { .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\
- .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, \
.reformat_id = 0, \
.modify_id = 0, \
.flags = 0, }
@@ -200,7 +208,7 @@ struct mlx5_flow_act {
*/
struct mlx5_flow_handle *
mlx5_add_flow_rules(struct mlx5_flow_table *ft,
- struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_spec *spec,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_destination *dest,
int num_dest);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 6513b985c5e9..06881b79167e 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -91,6 +91,20 @@ enum {
enum {
MLX5_OBJ_TYPE_GENEVE_TLV_OPT = 0x000b,
+ MLX5_OBJ_TYPE_MKEY = 0xff01,
+ MLX5_OBJ_TYPE_QP = 0xff02,
+ MLX5_OBJ_TYPE_PSV = 0xff03,
+ MLX5_OBJ_TYPE_RMP = 0xff04,
+ MLX5_OBJ_TYPE_XRC_SRQ = 0xff05,
+ MLX5_OBJ_TYPE_RQ = 0xff06,
+ MLX5_OBJ_TYPE_SQ = 0xff07,
+ MLX5_OBJ_TYPE_TIR = 0xff08,
+ MLX5_OBJ_TYPE_TIS = 0xff09,
+ MLX5_OBJ_TYPE_DCT = 0xff0a,
+ MLX5_OBJ_TYPE_XRQ = 0xff0b,
+ MLX5_OBJ_TYPE_RQT = 0xff0e,
+ MLX5_OBJ_TYPE_FLOW_COUNTER = 0xff0f,
+ MLX5_OBJ_TYPE_CQ = 0xff10,
};
enum {
@@ -106,6 +120,9 @@ enum {
MLX5_CMD_OP_QUERY_ISSI = 0x10a,
MLX5_CMD_OP_SET_ISSI = 0x10b,
MLX5_CMD_OP_SET_DRIVER_VERSION = 0x10d,
+ MLX5_CMD_OP_QUERY_SF_PARTITION = 0x111,
+ MLX5_CMD_OP_ALLOC_SF = 0x113,
+ MLX5_CMD_OP_DEALLOC_SF = 0x114,
MLX5_CMD_OP_CREATE_MKEY = 0x200,
MLX5_CMD_OP_QUERY_MKEY = 0x201,
MLX5_CMD_OP_DESTROY_MKEY = 0x202,
@@ -528,7 +545,21 @@ struct mlx5_ifc_fte_match_set_misc2_bits {
struct mlx5_ifc_fte_match_mpls_bits outer_first_mpls_over_udp;
- u8 reserved_at_80[0x100];
+ u8 metadata_reg_c_7[0x20];
+
+ u8 metadata_reg_c_6[0x20];
+
+ u8 metadata_reg_c_5[0x20];
+
+ u8 metadata_reg_c_4[0x20];
+
+ u8 metadata_reg_c_3[0x20];
+
+ u8 metadata_reg_c_2[0x20];
+
+ u8 metadata_reg_c_1[0x20];
+
+ u8 metadata_reg_c_0[0x20];
u8 metadata_reg_a[0x20];
@@ -636,8 +667,22 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
u8 reserved_at_e00[0x7200];
};
+enum {
+ MLX5_FDB_TO_VPORT_REG_C_0 = 0x01,
+ MLX5_FDB_TO_VPORT_REG_C_1 = 0x02,
+ MLX5_FDB_TO_VPORT_REG_C_2 = 0x04,
+ MLX5_FDB_TO_VPORT_REG_C_3 = 0x08,
+ MLX5_FDB_TO_VPORT_REG_C_4 = 0x10,
+ MLX5_FDB_TO_VPORT_REG_C_5 = 0x20,
+ MLX5_FDB_TO_VPORT_REG_C_6 = 0x40,
+ MLX5_FDB_TO_VPORT_REG_C_7 = 0x80,
+};
+
struct mlx5_ifc_flow_table_eswitch_cap_bits {
- u8 reserved_at_0[0x1a];
+ u8 fdb_to_vport_reg_c_id[0x8];
+ u8 reserved_at_8[0xf];
+ u8 flow_source[0x1];
+ u8 reserved_at_18[0x2];
u8 multi_fdb_encap[0x1];
u8 reserved_at_1b[0x1];
u8 fdb_multi_path_to_table[0x1];
@@ -665,7 +710,9 @@ struct mlx5_ifc_e_switch_cap_bits {
u8 vport_svlan_insert[0x1];
u8 vport_cvlan_insert_if_not_exist[0x1];
u8 vport_cvlan_insert_overwrite[0x1];
- u8 reserved_at_5[0x14];
+ u8 reserved_at_5[0x3];
+ u8 esw_uplink_ingress_acl[0x1];
+ u8 reserved_at_9[0x10];
u8 esw_functions_changed[0x1];
u8 reserved_at_1a[0x1];
u8 ecpf_vport_exists[0x1];
@@ -683,7 +730,11 @@ struct mlx5_ifc_e_switch_cap_bits {
u8 reserved_2b[0x6];
u8 max_encap_header_size[0xa];
- u8 reserved_40[0x7c0];
+ u8 reserved_at_40[0xb];
+ u8 log_max_esw_sf[0x5];
+ u8 esw_sf_base_id[0x10];
+
+ u8 reserved_at_60[0x7a0];
};
@@ -823,6 +874,12 @@ struct mlx5_ifc_device_mem_cap_bits {
u8 reserved_at_180[0x680];
};
+struct mlx5_ifc_device_event_cap_bits {
+ u8 user_affiliated_events[4][0x40];
+
+ u8 user_unaffiliated_events[4][0x40];
+};
+
enum {
MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_1_BYTE = 0x0,
MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_2_BYTES = 0x2,
@@ -916,6 +973,16 @@ struct mlx5_ifc_vector_calc_cap_bits {
u8 reserved_at_c0[0x720];
};
+struct mlx5_ifc_tls_cap_bits {
+ u8 tls_1_2_aes_gcm_128[0x1];
+ u8 tls_1_3_aes_gcm_128[0x1];
+ u8 tls_1_2_aes_gcm_256[0x1];
+ u8 tls_1_3_aes_gcm_256[0x1];
+ u8 reserved_at_4[0x1c];
+
+ u8 reserved_at_20[0x7e0];
+};
+
enum {
MLX5_WQ_TYPE_LINKED_LIST = 0x0,
MLX5_WQ_TYPE_CYCLIC = 0x1,
@@ -980,7 +1047,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 log_max_srq_sz[0x8];
u8 log_max_qp_sz[0x8];
- u8 reserved_at_90[0x8];
+ u8 event_cap[0x1];
+ u8 reserved_at_91[0x7];
u8 prio_tag_required[0x1];
u8 reserved_at_99[0x2];
u8 log_max_qp[0x5];
@@ -1028,7 +1096,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 cc_modify_allowed[0x1];
u8 start_pad[0x1];
u8 cache_line_128byte[0x1];
- u8 reserved_at_165[0xa];
+ u8 reserved_at_165[0x4];
+ u8 rts2rts_qp_counters_set_id[0x1];
+ u8 reserved_at_16a[0x5];
u8 qcam_reg[0x1];
u8 gid_table_size[0x10];
@@ -1245,7 +1315,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_440[0x20];
- u8 reserved_at_460[0x3];
+ u8 tls[0x1];
+ u8 reserved_at_461[0x2];
u8 log_max_uctx[0x5];
u8 reserved_at_468[0x3];
u8 log_max_umem[0x5];
@@ -1270,7 +1341,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 max_geneve_tlv_option_data_len[0x5];
u8 reserved_at_570[0x10];
- u8 reserved_at_580[0x3c];
+ u8 reserved_at_580[0x33];
+ u8 log_max_dek[0x5];
+ u8 reserved_at_5b8[0x4];
u8 mini_cqe_resp_stride_index[0x1];
u8 cqe_128_always[0x1];
u8 cqe_compression_128[0x1];
@@ -1300,13 +1373,24 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_640[0x10];
u8 num_q_monitor_counters[0x10];
- u8 reserved_at_660[0x40];
+ u8 reserved_at_660[0x20];
+
+ u8 sf[0x1];
+ u8 sf_set_partition[0x1];
+ u8 reserved_at_682[0x1];
+ u8 log_max_sf[0x5];
+ u8 reserved_at_688[0x8];
+ u8 log_min_sf_size[0x8];
+ u8 max_num_sf_partitions[0x8];
u8 uctx_cap[0x20];
u8 reserved_at_6c0[0x4];
u8 flex_parser_id_geneve_tlv_option_0[0x4];
- u8 reserved_at_6c8[0x138];
+ u8 reserved_at_6c8[0x28];
+ u8 sf_base_id[0x10];
+
+ u8 reserved_at_700[0x100];
};
enum mlx5_flow_destination_type {
@@ -2538,6 +2622,7 @@ union mlx5_ifc_hca_cap_union_bits {
struct mlx5_ifc_qos_cap_bits qos_cap;
struct mlx5_ifc_debug_cap_bits debug_cap;
struct mlx5_ifc_fpga_cap_bits fpga_cap;
+ struct mlx5_ifc_tls_cap_bits tls_cap;
u8 reserved_at_0[0x8000];
};
@@ -2555,6 +2640,12 @@ enum {
MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 = 0x800,
};
+enum {
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT = 0x0,
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK = 0x1,
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT = 0x2,
+};
+
struct mlx5_ifc_vlan_bits {
u8 ethtype[0x10];
u8 prio[0x3];
@@ -2574,7 +2665,9 @@ struct mlx5_ifc_flow_context_bits {
u8 action[0x10];
u8 extended_destination[0x1];
- u8 reserved_at_80[0x7];
+ u8 reserved_at_81[0x1];
+ u8 flow_source[0x2];
+ u8 reserved_at_84[0x4];
u8 destination_list_size[0x18];
u8 reserved_at_a0[0x8];
@@ -2669,7 +2762,8 @@ struct mlx5_ifc_traffic_counter_bits {
struct mlx5_ifc_tisc_bits {
u8 strict_lag_tx_port_affinity[0x1];
- u8 reserved_at_1[0x3];
+ u8 tls_en[0x1];
+ u8 reserved_at_1[0x2];
u8 lag_tx_port_affinity[0x04];
u8 reserved_at_8[0x4];
@@ -2683,7 +2777,11 @@ struct mlx5_ifc_tisc_bits {
u8 reserved_at_140[0x8];
u8 underlay_qpn[0x18];
- u8 reserved_at_160[0x3a0];
+
+ u8 reserved_at_160[0x8];
+ u8 pd[0x18];
+
+ u8 reserved_at_180[0x380];
};
enum {
@@ -3099,12 +3197,14 @@ struct mlx5_ifc_hca_vport_context_bits {
};
struct mlx5_ifc_esw_vport_context_bits {
- u8 reserved_at_0[0x3];
+ u8 fdb_to_vport_reg_c[0x1];
+ u8 reserved_at_1[0x2];
u8 vport_svlan_strip[0x1];
u8 vport_cvlan_strip[0x1];
u8 vport_svlan_insert[0x1];
u8 vport_cvlan_insert[0x2];
- u8 reserved_at_8[0x18];
+ u8 fdb_to_vport_reg_c_id[0x8];
+ u8 reserved_at_10[0x10];
u8 reserved_at_20[0x20];
@@ -4985,7 +5085,8 @@ struct mlx5_ifc_modify_esw_vport_context_out_bits {
};
struct mlx5_ifc_esw_vport_context_fields_select_bits {
- u8 reserved_at_0[0x1c];
+ u8 reserved_at_0[0x1b];
+ u8 fdb_to_vport_reg_c_id[0x1];
u8 vport_cvlan_insert[0x1];
u8 vport_svlan_insert[0x1];
u8 vport_cvlan_strip[0x1];
@@ -5182,6 +5283,7 @@ enum {
MLX5_ACTION_IN_FIELD_OUT_DIPV4 = 0x16,
MLX5_ACTION_IN_FIELD_OUT_FIRST_VID = 0x17,
MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT = 0x47,
+ MLX5_ACTION_IN_FIELD_METADATA_REG_C_0 = 0x51,
};
struct mlx5_ifc_alloc_modify_header_context_out_bits {
@@ -7362,9 +7464,9 @@ struct mlx5_ifc_create_eq_in_bits {
u8 reserved_at_280[0x40];
- u8 event_bitmask[0x40];
+ u8 event_bitmask[4][0x40];
- u8 reserved_at_300[0x580];
+ u8 reserved_at_3c0[0x4c0];
u8 pas[0][0x40];
};
@@ -8482,7 +8584,7 @@ struct mlx5_ifc_mcam_access_reg_bits {
u8 mcda[0x1];
u8 mcc[0x1];
u8 mcqi[0x1];
- u8 reserved_at_1f[0x1];
+ u8 mcqs[0x1];
u8 regs_95_to_87[0x9];
u8 mpegc[0x1];
@@ -8974,6 +9076,24 @@ struct mlx5_ifc_mtppse_reg_bits {
u8 reserved_at_40[0x40];
};
+struct mlx5_ifc_mcqs_reg_bits {
+ u8 last_index_flag[0x1];
+ u8 reserved_at_1[0x7];
+ u8 fw_device[0x8];
+ u8 component_index[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 identifier[0x10];
+
+ u8 reserved_at_40[0x17];
+ u8 component_status[0x5];
+ u8 component_update_state[0x4];
+
+ u8 last_update_state_changer_type[0x4];
+ u8 last_update_state_changer_host_id[0x4];
+ u8 reserved_at_68[0x18];
+};
+
struct mlx5_ifc_mcqi_cap_bits {
u8 supported_info_bitmask[0x20];
@@ -8994,6 +9114,43 @@ struct mlx5_ifc_mcqi_cap_bits {
u8 reserved_at_86[0x1a];
};
+struct mlx5_ifc_mcqi_version_bits {
+ u8 reserved_at_0[0x2];
+ u8 build_time_valid[0x1];
+ u8 user_defined_time_valid[0x1];
+ u8 reserved_at_4[0x14];
+ u8 version_string_length[0x8];
+
+ u8 version[0x20];
+
+ u8 build_time[0x40];
+
+ u8 user_defined_time[0x40];
+
+ u8 build_tool_version[0x20];
+
+ u8 reserved_at_e0[0x20];
+
+ u8 version_string[92][0x8];
+};
+
+struct mlx5_ifc_mcqi_activation_method_bits {
+ u8 pending_server_ac_power_cycle[0x1];
+ u8 pending_server_dc_power_cycle[0x1];
+ u8 pending_server_reboot[0x1];
+ u8 pending_fw_reset[0x1];
+ u8 auto_activate[0x1];
+ u8 all_hosts_sync[0x1];
+ u8 device_hw_reset[0x1];
+ u8 reserved_at_7[0x19];
+};
+
+union mlx5_ifc_mcqi_reg_data_bits {
+ struct mlx5_ifc_mcqi_cap_bits mcqi_caps;
+ struct mlx5_ifc_mcqi_version_bits mcqi_version;
+ struct mlx5_ifc_mcqi_activation_method_bits mcqi_activation_mathod;
+};
+
struct mlx5_ifc_mcqi_reg_bits {
u8 read_pending_component[0x1];
u8 reserved_at_1[0xf];
@@ -9011,7 +9168,7 @@ struct mlx5_ifc_mcqi_reg_bits {
u8 reserved_at_a0[0x10];
u8 data_size[0x10];
- u8 data[0][0x20];
+ union mlx5_ifc_mcqi_reg_data_bits data[0];
};
struct mlx5_ifc_mcc_reg_bits {
@@ -9708,10 +9865,11 @@ struct mlx5_ifc_mtrc_ctrl_bits {
struct mlx5_ifc_host_params_context_bits {
u8 host_number[0x8];
- u8 reserved_at_8[0x8];
+ u8 reserved_at_8[0x7];
+ u8 host_pf_disabled[0x1];
u8 host_num_of_vfs[0x10];
- u8 reserved_at_20[0x10];
+ u8 host_total_vfs[0x10];
u8 host_pci_bus[0x10];
u8 reserved_at_40[0x10];
@@ -9744,6 +9902,165 @@ struct mlx5_ifc_query_esw_functions_out_bits {
struct mlx5_ifc_host_params_context_bits host_params_context;
u8 reserved_at_280[0x180];
+ u8 host_sf_enable[0][0x40];
+};
+
+struct mlx5_ifc_sf_partition_bits {
+ u8 reserved_at_0[0x10];
+ u8 log_num_sf[0x8];
+ u8 log_sf_bar_size[0x8];
+};
+
+struct mlx5_ifc_query_sf_partitions_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x18];
+ u8 num_sf_partitions[0x8];
+
+ u8 reserved_at_60[0x20];
+
+ struct mlx5_ifc_sf_partition_bits sf_partition[0];
+};
+
+struct mlx5_ifc_query_sf_partitions_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_dealloc_sf_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_dealloc_sf_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x10];
+ u8 function_id[0x10];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_alloc_sf_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_alloc_sf_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x10];
+ u8 function_id[0x10];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_affiliated_event_header_bits {
+ u8 reserved_at_0[0x10];
+ u8 obj_type[0x10];
+
+ u8 obj_id[0x20];
+};
+
+enum {
+ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = BIT(0xc),
+};
+
+enum {
+ MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = 0xc,
+};
+
+struct mlx5_ifc_encryption_key_obj_bits {
+ u8 modify_field_select[0x40];
+
+ u8 reserved_at_40[0x14];
+ u8 key_size[0x4];
+ u8 reserved_at_58[0x4];
+ u8 key_type[0x4];
+
+ u8 reserved_at_60[0x8];
+ u8 pd[0x18];
+
+ u8 reserved_at_80[0x180];
+ u8 key[8][0x20];
+
+ u8 reserved_at_300[0x500];
+};
+
+struct mlx5_ifc_create_encryption_key_in_bits {
+ struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr;
+ struct mlx5_ifc_encryption_key_obj_bits encryption_key_object;
+};
+
+enum {
+ MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128 = 0x0,
+ MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_256 = 0x1,
+};
+
+enum {
+ MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_DEK = 0x1,
+};
+
+struct mlx5_ifc_tls_static_params_bits {
+ u8 const_2[0x2];
+ u8 tls_version[0x4];
+ u8 const_1[0x2];
+ u8 reserved_at_8[0x14];
+ u8 encryption_standard[0x4];
+
+ u8 reserved_at_20[0x20];
+
+ u8 initial_record_number[0x40];
+
+ u8 resync_tcp_sn[0x20];
+
+ u8 gcm_iv[0x20];
+
+ u8 implicit_iv[0x40];
+
+ u8 reserved_at_100[0x8];
+ u8 dek_index[0x18];
+
+ u8 reserved_at_120[0xe0];
+};
+
+struct mlx5_ifc_tls_progress_params_bits {
+ u8 valid[0x1];
+ u8 reserved_at_1[0x7];
+ u8 pd[0x18];
+
+ u8 next_record_tcp_sn[0x20];
+
+ u8 hw_resync_tcp_sn[0x20];
+
+ u8 record_tracker_state[0x2];
+ u8 auth_state[0x2];
+ u8 reserved_at_64[0x4];
+ u8 hw_offset_record_number[0x18];
};
#endif /* MLX5_IFC_H */
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 3ba4edbd17a6..127d224443e3 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -202,7 +202,12 @@ struct mlx5_wqe_ctrl_seg {
u8 signature;
u8 rsvd[2];
u8 fm_ce_se;
- __be32 imm;
+ union {
+ __be32 general_id;
+ __be32 imm;
+ __be32 umr_mkey;
+ __be32 tisn;
+ };
};
#define MLX5_WQE_CTRL_DS_MASK 0x3f
@@ -551,11 +556,6 @@ static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u
return radix_tree_lookup(&dev->priv.qp_table.tree, qpn);
}
-static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key)
-{
- return radix_tree_lookup(&dev->priv.mkey_table.tree, key);
-}
-
int mlx5_core_create_dct(struct mlx5_core_dev *dev,
struct mlx5_core_dct *qp,
u32 *in, int inlen,
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 3d1c6cdbbba7..16060fb9b5e5 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -44,9 +44,6 @@
MLX5_VPORT_UPLINK_PLACEHOLDER + \
MLX5_VPORT_ECPF_PLACEHOLDER(mdev))
-#define MLX5_TOTAL_VPORTS(mdev) (MLX5_SPECIAL_VPORTS(mdev) + \
- mlx5_core_max_vfs(mdev))
-
#define MLX5_VPORT_MANAGER(mdev) \
(MLX5_CAP_GEN(mdev, vport_group_manager) && \
(MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
@@ -58,6 +55,7 @@ enum {
MLX5_CAP_INLINE_MODE_NOT_REQUIRED,
};
+/* Vport number for each function must keep unchanged */
enum {
MLX5_VPORT_PF = 0x0,
MLX5_VPORT_FIRST_VF = 0x1,
@@ -69,7 +67,8 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport);
int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
u16 vport, u8 other_vport, u8 state);
int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
- u16 vport, u8 *addr);
+ u16 vport, bool other, u8 *addr);
+int mlx5_query_mac_address(struct mlx5_core_dev *mdev, u8 *addr);
int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
u16 vport, u8 *min_inline);
void mlx5_query_min_inline(struct mlx5_core_dev *mdev, u8 *min_inline);
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index b3d360b0ee3d..9f57cdfcc93d 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -373,6 +373,8 @@ struct flash_info;
* @flash_unlock: [FLASH-SPECIFIC] unlock a region of the SPI NOR
* @flash_is_locked: [FLASH-SPECIFIC] check if a region of the SPI NOR is
* @quad_enable: [FLASH-SPECIFIC] enables SPI NOR quad mode
+ * @clear_sr_bp: [FLASH-SPECIFIC] clears the Block Protection Bits from
+ * the SPI NOR Status Register.
* completely locked
* @priv: the private data
*/
@@ -410,6 +412,7 @@ struct spi_nor {
int (*flash_unlock)(struct spi_nor *nor, loff_t ofs, uint64_t len);
int (*flash_is_locked)(struct spi_nor *nor, loff_t ofs, uint64_t len);
int (*quad_enable)(struct spi_nor *nor);
+ int (*clear_sr_bp)(struct spi_nor *nor);
void *priv;
};
diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h
deleted file mode 100644
index fd458389f7d1..000000000000
--- a/include/linux/net_dim.h
+++ /dev/null
@@ -1,418 +0,0 @@
-/*
- * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017-2018, Broadcom Limited. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef NET_DIM_H
-#define NET_DIM_H
-
-#include <linux/module.h>
-
-struct net_dim_cq_moder {
- u16 usec;
- u16 pkts;
- u8 cq_period_mode;
-};
-
-struct net_dim_sample {
- ktime_t time;
- u32 pkt_ctr;
- u32 byte_ctr;
- u16 event_ctr;
-};
-
-struct net_dim_stats {
- int ppms; /* packets per msec */
- int bpms; /* bytes per msec */
- int epms; /* events per msec */
-};
-
-struct net_dim { /* Adaptive Moderation */
- u8 state;
- struct net_dim_stats prev_stats;
- struct net_dim_sample start_sample;
- struct work_struct work;
- u8 profile_ix;
- u8 mode;
- u8 tune_state;
- u8 steps_right;
- u8 steps_left;
- u8 tired;
-};
-
-enum {
- NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0,
- NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1,
- NET_DIM_CQ_PERIOD_NUM_MODES
-};
-
-/* Adaptive moderation logic */
-enum {
- NET_DIM_START_MEASURE,
- NET_DIM_MEASURE_IN_PROGRESS,
- NET_DIM_APPLY_NEW_PROFILE,
-};
-
-enum {
- NET_DIM_PARKING_ON_TOP,
- NET_DIM_PARKING_TIRED,
- NET_DIM_GOING_RIGHT,
- NET_DIM_GOING_LEFT,
-};
-
-enum {
- NET_DIM_STATS_WORSE,
- NET_DIM_STATS_SAME,
- NET_DIM_STATS_BETTER,
-};
-
-enum {
- NET_DIM_STEPPED,
- NET_DIM_TOO_TIRED,
- NET_DIM_ON_EDGE,
-};
-
-#define NET_DIM_PARAMS_NUM_PROFILES 5
-/* Adaptive moderation profiles */
-#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256
-#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128
-#define NET_DIM_DEF_PROFILE_CQE 1
-#define NET_DIM_DEF_PROFILE_EQE 1
-
-/* All profiles sizes must be NET_PARAMS_DIM_NUM_PROFILES */
-#define NET_DIM_RX_EQE_PROFILES { \
- {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
- {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
- {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
- {128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
- {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
-}
-
-#define NET_DIM_RX_CQE_PROFILES { \
- {2, 256}, \
- {8, 128}, \
- {16, 64}, \
- {32, 64}, \
- {64, 64} \
-}
-
-#define NET_DIM_TX_EQE_PROFILES { \
- {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
- {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
- {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
- {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
- {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \
-}
-
-#define NET_DIM_TX_CQE_PROFILES { \
- {5, 128}, \
- {8, 64}, \
- {16, 32}, \
- {32, 32}, \
- {64, 32} \
-}
-
-static const struct net_dim_cq_moder
-rx_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
- NET_DIM_RX_EQE_PROFILES,
- NET_DIM_RX_CQE_PROFILES,
-};
-
-static const struct net_dim_cq_moder
-tx_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
- NET_DIM_TX_EQE_PROFILES,
- NET_DIM_TX_CQE_PROFILES,
-};
-
-static inline struct net_dim_cq_moder
-net_dim_get_rx_moderation(u8 cq_period_mode, int ix)
-{
- struct net_dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix];
-
- cq_moder.cq_period_mode = cq_period_mode;
- return cq_moder;
-}
-
-static inline struct net_dim_cq_moder
-net_dim_get_def_rx_moderation(u8 cq_period_mode)
-{
- u8 profile_ix = cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
- NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
-
- return net_dim_get_rx_moderation(cq_period_mode, profile_ix);
-}
-
-static inline struct net_dim_cq_moder
-net_dim_get_tx_moderation(u8 cq_period_mode, int ix)
-{
- struct net_dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix];
-
- cq_moder.cq_period_mode = cq_period_mode;
- return cq_moder;
-}
-
-static inline struct net_dim_cq_moder
-net_dim_get_def_tx_moderation(u8 cq_period_mode)
-{
- u8 profile_ix = cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
- NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
-
- return net_dim_get_tx_moderation(cq_period_mode, profile_ix);
-}
-
-static inline bool net_dim_on_top(struct net_dim *dim)
-{
- switch (dim->tune_state) {
- case NET_DIM_PARKING_ON_TOP:
- case NET_DIM_PARKING_TIRED:
- return true;
- case NET_DIM_GOING_RIGHT:
- return (dim->steps_left > 1) && (dim->steps_right == 1);
- default: /* NET_DIM_GOING_LEFT */
- return (dim->steps_right > 1) && (dim->steps_left == 1);
- }
-}
-
-static inline void net_dim_turn(struct net_dim *dim)
-{
- switch (dim->tune_state) {
- case NET_DIM_PARKING_ON_TOP:
- case NET_DIM_PARKING_TIRED:
- break;
- case NET_DIM_GOING_RIGHT:
- dim->tune_state = NET_DIM_GOING_LEFT;
- dim->steps_left = 0;
- break;
- case NET_DIM_GOING_LEFT:
- dim->tune_state = NET_DIM_GOING_RIGHT;
- dim->steps_right = 0;
- break;
- }
-}
-
-static inline int net_dim_step(struct net_dim *dim)
-{
- if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2))
- return NET_DIM_TOO_TIRED;
-
- switch (dim->tune_state) {
- case NET_DIM_PARKING_ON_TOP:
- case NET_DIM_PARKING_TIRED:
- break;
- case NET_DIM_GOING_RIGHT:
- if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1))
- return NET_DIM_ON_EDGE;
- dim->profile_ix++;
- dim->steps_right++;
- break;
- case NET_DIM_GOING_LEFT:
- if (dim->profile_ix == 0)
- return NET_DIM_ON_EDGE;
- dim->profile_ix--;
- dim->steps_left++;
- break;
- }
-
- dim->tired++;
- return NET_DIM_STEPPED;
-}
-
-static inline void net_dim_park_on_top(struct net_dim *dim)
-{
- dim->steps_right = 0;
- dim->steps_left = 0;
- dim->tired = 0;
- dim->tune_state = NET_DIM_PARKING_ON_TOP;
-}
-
-static inline void net_dim_park_tired(struct net_dim *dim)
-{
- dim->steps_right = 0;
- dim->steps_left = 0;
- dim->tune_state = NET_DIM_PARKING_TIRED;
-}
-
-static inline void net_dim_exit_parking(struct net_dim *dim)
-{
- dim->tune_state = dim->profile_ix ? NET_DIM_GOING_LEFT :
- NET_DIM_GOING_RIGHT;
- net_dim_step(dim);
-}
-
-#define IS_SIGNIFICANT_DIFF(val, ref) \
- (((100UL * abs((val) - (ref))) / (ref)) > 10) /* more than 10% difference */
-
-static inline int net_dim_stats_compare(struct net_dim_stats *curr,
- struct net_dim_stats *prev)
-{
- if (!prev->bpms)
- return curr->bpms ? NET_DIM_STATS_BETTER :
- NET_DIM_STATS_SAME;
-
- if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms))
- return (curr->bpms > prev->bpms) ? NET_DIM_STATS_BETTER :
- NET_DIM_STATS_WORSE;
-
- if (!prev->ppms)
- return curr->ppms ? NET_DIM_STATS_BETTER :
- NET_DIM_STATS_SAME;
-
- if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms))
- return (curr->ppms > prev->ppms) ? NET_DIM_STATS_BETTER :
- NET_DIM_STATS_WORSE;
-
- if (!prev->epms)
- return NET_DIM_STATS_SAME;
-
- if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms))
- return (curr->epms < prev->epms) ? NET_DIM_STATS_BETTER :
- NET_DIM_STATS_WORSE;
-
- return NET_DIM_STATS_SAME;
-}
-
-static inline bool net_dim_decision(struct net_dim_stats *curr_stats,
- struct net_dim *dim)
-{
- int prev_state = dim->tune_state;
- int prev_ix = dim->profile_ix;
- int stats_res;
- int step_res;
-
- switch (dim->tune_state) {
- case NET_DIM_PARKING_ON_TOP:
- stats_res = net_dim_stats_compare(curr_stats, &dim->prev_stats);
- if (stats_res != NET_DIM_STATS_SAME)
- net_dim_exit_parking(dim);
- break;
-
- case NET_DIM_PARKING_TIRED:
- dim->tired--;
- if (!dim->tired)
- net_dim_exit_parking(dim);
- break;
-
- case NET_DIM_GOING_RIGHT:
- case NET_DIM_GOING_LEFT:
- stats_res = net_dim_stats_compare(curr_stats, &dim->prev_stats);
- if (stats_res != NET_DIM_STATS_BETTER)
- net_dim_turn(dim);
-
- if (net_dim_on_top(dim)) {
- net_dim_park_on_top(dim);
- break;
- }
-
- step_res = net_dim_step(dim);
- switch (step_res) {
- case NET_DIM_ON_EDGE:
- net_dim_park_on_top(dim);
- break;
- case NET_DIM_TOO_TIRED:
- net_dim_park_tired(dim);
- break;
- }
-
- break;
- }
-
- if ((prev_state != NET_DIM_PARKING_ON_TOP) ||
- (dim->tune_state != NET_DIM_PARKING_ON_TOP))
- dim->prev_stats = *curr_stats;
-
- return dim->profile_ix != prev_ix;
-}
-
-static inline void net_dim_sample(u16 event_ctr,
- u64 packets,
- u64 bytes,
- struct net_dim_sample *s)
-{
- s->time = ktime_get();
- s->pkt_ctr = packets;
- s->byte_ctr = bytes;
- s->event_ctr = event_ctr;
-}
-
-#define NET_DIM_NEVENTS 64
-#define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) & (BIT_ULL(bits) - 1))
-
-static inline void net_dim_calc_stats(struct net_dim_sample *start,
- struct net_dim_sample *end,
- struct net_dim_stats *curr_stats)
-{
- /* u32 holds up to 71 minutes, should be enough */
- u32 delta_us = ktime_us_delta(end->time, start->time);
- u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr);
- u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr,
- start->byte_ctr);
-
- if (!delta_us)
- return;
-
- curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us);
- curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us);
- curr_stats->epms = DIV_ROUND_UP(NET_DIM_NEVENTS * USEC_PER_MSEC,
- delta_us);
-}
-
-static inline void net_dim(struct net_dim *dim,
- struct net_dim_sample end_sample)
-{
- struct net_dim_stats curr_stats;
- u16 nevents;
-
- switch (dim->state) {
- case NET_DIM_MEASURE_IN_PROGRESS:
- nevents = BIT_GAP(BITS_PER_TYPE(u16),
- end_sample.event_ctr,
- dim->start_sample.event_ctr);
- if (nevents < NET_DIM_NEVENTS)
- break;
- net_dim_calc_stats(&dim->start_sample, &end_sample,
- &curr_stats);
- if (net_dim_decision(&curr_stats, dim)) {
- dim->state = NET_DIM_APPLY_NEW_PROFILE;
- schedule_work(&dim->work);
- break;
- }
- /* fall through */
- case NET_DIM_START_MEASURE:
- net_dim_sample(end_sample.event_ctr, end_sample.pkt_ctr, end_sample.byte_ctr,
- &dim->start_sample);
- dim->state = NET_DIM_MEASURE_IN_PROGRESS;
- break;
- case NET_DIM_APPLY_NEW_PROFILE:
- break;
- }
-}
-
-#endif /* NET_DIM_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index eeacebd7debb..88292953aa6f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4870,4 +4870,6 @@ do { \
#define PTYPE_HASH_SIZE (16)
#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
+extern struct net_device *blackhole_netdev;
+
#endif /* _LINUX_NETDEVICE_H */
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 593d1b9c33a8..205fa7b1f07a 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -192,7 +192,14 @@ struct netlink_callback {
bool strict_check;
u16 answer_flags;
unsigned int prev_seq, seq;
- long args[6];
+ union {
+ u8 ctx[48];
+
+ /* args is deprecated. Cast a struct over ctx instead
+ * for proper type safety.
+ */
+ long args[6];
+ };
};
struct netlink_notify {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b5d427b149c9..7ece49d5f8ef 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3919,18 +3919,16 @@ static inline bool __skb_checksum_convert_check(struct sk_buff *skb)
return (skb->ip_summed == CHECKSUM_NONE && skb->csum_valid);
}
-static inline void __skb_checksum_convert(struct sk_buff *skb,
- __sum16 check, __wsum pseudo)
+static inline void __skb_checksum_convert(struct sk_buff *skb, __wsum pseudo)
{
skb->csum = ~pseudo;
skb->ip_summed = CHECKSUM_COMPLETE;
}
-#define skb_checksum_try_convert(skb, proto, check, compute_pseudo) \
+#define skb_checksum_try_convert(skb, proto, compute_pseudo) \
do { \
if (__skb_checksum_convert_check(skb)) \
- __skb_checksum_convert(skb, check, \
- compute_pseudo(skb, proto)); \
+ __skb_checksum_convert(skb, compute_pseudo(skb, proto)); \
} while (0)
static inline void skb_remcsum_adjust_partial(struct sk_buff *skb, void *ptr,
diff --git a/include/net/bond_options.h b/include/net/bond_options.h
index 2a05cc349018..9d382f2f0bc5 100644
--- a/include/net/bond_options.h
+++ b/include/net/bond_options.h
@@ -63,6 +63,7 @@ enum {
BOND_OPT_AD_ACTOR_SYSTEM,
BOND_OPT_AD_USER_PORT_KEY,
BOND_OPT_NUM_PEER_NOTIF_ALIAS,
+ BOND_OPT_PEER_NOTIF_DELAY,
BOND_OPT_LAST
};
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 676e7fae05a3..f7fe45689142 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -123,6 +123,7 @@ struct bond_params {
int fail_over_mac;
int updelay;
int downdelay;
+ int peer_notif_delay;
int lacp_fast;
unsigned int min_links;
int ad_select;
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 6c51e864336a..6625ea068d5e 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -528,8 +528,10 @@ struct devlink_ops {
int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode);
int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode,
struct netlink_ext_ack *extack);
- int (*eswitch_encap_mode_get)(struct devlink *devlink, u8 *p_encap_mode);
- int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode,
+ int (*eswitch_encap_mode_get)(struct devlink *devlink,
+ enum devlink_eswitch_encap_mode *p_encap_mode);
+ int (*eswitch_encap_mode_set)(struct devlink *devlink,
+ enum devlink_eswitch_encap_mode encap_mode,
struct netlink_ext_ack *extack);
int (*info_get)(struct devlink *devlink, struct devlink_info_req *req,
struct netlink_ext_ack *extack);
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 975901a95c0f..ae2ba897675c 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -25,6 +25,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags);
int inet_accept(struct socket *sock, struct socket *newsock, int flags,
bool kern);
+int inet_send_prepare(struct sock *sk);
int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags);
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index c8bba0c28286..b69c16cbbf71 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -281,8 +281,8 @@ static inline bool ip6_sk_ignore_df(const struct sock *sk)
inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT;
}
-static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
- struct in6_addr *daddr)
+static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt,
+ const struct in6_addr *daddr)
{
if (rt->rt6i_flags & RTF_GATEWAY)
return &rt->rt6i_gateway;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index b41f6a0fa903..8eca5fb30376 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -301,6 +301,13 @@ struct ipv6_txoptions {
/* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */
};
+/* flowlabel_reflect sysctl values */
+enum flowlabel_reflect {
+ FLOWLABEL_REFLECT_ESTABLISHED = 1,
+ FLOWLABEL_REFLECT_TCP_RESET = 2,
+ FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES = 4,
+};
+
struct ip6_flowlabel {
struct ip6_flowlabel __rcu *next;
__be32 label;
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index f07c518ef8a5..ee9c871d2043 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -112,6 +112,15 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
return page_pool_alloc_pages(pool, gfp);
}
+/* get the stored dma direction. A driver might decide to treat this locally and
+ * avoid the extra cache line from page_pool to determine the direction
+ */
+static
+inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
+{
+ return pool->p.dma_dir;
+}
+
struct page_pool *page_pool_create(const struct page_pool_params *params);
void __page_pool_free(struct page_pool *pool);
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 720f2b32fc2f..1a7596ba0dbe 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -10,7 +10,7 @@
#include <net/net_namespace.h>
/* TC action not accessible from user space */
-#define TC_ACT_REINSERT (TC_ACT_VALUE_MAX + 1)
+#define TC_ACT_CONSUMED (TC_ACT_VALUE_MAX + 1)
/* Basic packet classifier frontend definitions. */
diff --git a/include/net/route.h b/include/net/route.h
index cfcd0f5980f9..630a0493f1f3 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -221,6 +221,7 @@ void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt);
struct rtable *rt_dst_alloc(struct net_device *dev,
unsigned int flags, u16 type,
bool nopolicy, bool noxfrm, bool will_cache);
+struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt);
struct in_ifaddr;
void fib_add_ifaddr(struct in_ifaddr *);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 21f434f3ac9e..855167bbc372 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -279,7 +279,7 @@ struct tcf_result {
};
const struct tcf_proto *goto_tp;
- /* used by the TC_ACT_REINSERT action */
+ /* used in the skb_tc_reinsert function */
struct {
bool ingress;
struct gnet_stats_queue *qstats;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9d36cc88d043..e16d8a3fd3b4 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2221,6 +2221,14 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1);
}
+static inline void tcp_bpf_rtt(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTT_CB_FLAG))
+ tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL);
+}
+
#if IS_ENABLED(CONFIG_SMC)
extern struct static_key_false tcp_have_smc;
#endif
diff --git a/include/net/tls.h b/include/net/tls.h
index 63e473420b00..0279938386ab 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -407,21 +407,6 @@ static inline bool tls_is_partially_sent_record(struct tls_context *ctx)
return !!ctx->partially_sent_record;
}
-static inline int tls_complete_pending_work(struct sock *sk,
- struct tls_context *ctx,
- int flags, long *timeo)
-{
- int rc = 0;
-
- if (unlikely(sk->sk_write_pending))
- rc = wait_on_pending_writer(sk, timeo);
-
- if (!rc && tls_is_partially_sent_record(ctx))
- rc = tls_push_partial_record(sk, ctx, flags);
-
- return rc;
-}
-
static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
{
return tls_ctx->pending_open_record_frags;
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index ae0f368a62bb..057b159ff8b9 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -77,10 +77,11 @@ int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
void xsk_flush(struct xdp_sock *xs);
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
/* Used from netdev driver */
+bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt);
u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr);
void xsk_umem_discard_addr(struct xdp_umem *umem);
void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
-bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len);
+bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc);
void xsk_umem_consume_tx_done(struct xdp_umem *umem);
struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries);
struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
@@ -99,6 +100,16 @@ static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
}
/* Reuse-queue aware version of FILL queue helpers */
+static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt)
+{
+ struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
+
+ if (rq->length >= cnt)
+ return true;
+
+ return xsk_umem_has_addrs(umem, cnt - rq->length);
+}
+
static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
{
struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
@@ -146,6 +157,11 @@ static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
return false;
}
+static inline bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt)
+{
+ return false;
+}
+
static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
{
return NULL;
@@ -159,8 +175,8 @@ static inline void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
{
}
-static inline bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma,
- u32 *len)
+static inline bool xsk_umem_consume_tx(struct xdp_umem *umem,
+ struct xdp_desc *desc)
{
return false;
}
@@ -200,6 +216,11 @@ static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
return 0;
}
+static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt)
+{
+ return false;
+}
+
static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
{
return NULL;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index a2907873ed56..b22db30c3d88 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -346,22 +346,19 @@ void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
int __xfrm_state_delete(struct xfrm_state *x);
struct xfrm_state_afinfo {
- unsigned int family;
- unsigned int proto;
- __be16 eth_proto;
- struct module *owner;
- const struct xfrm_type *type_map[IPPROTO_MAX];
- const struct xfrm_type_offload *type_offload_map[IPPROTO_MAX];
-
- int (*init_flags)(struct xfrm_state *x);
- void (*init_tempsel)(struct xfrm_selector *sel,
- const struct flowi *fl);
- void (*init_temprop)(struct xfrm_state *x,
- const struct xfrm_tmpl *tmpl,
- const xfrm_address_t *daddr,
- const xfrm_address_t *saddr);
- int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n);
- int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n);
+ u8 family;
+ u8 proto;
+
+ const struct xfrm_type_offload *type_offload_esp;
+
+ const struct xfrm_type *type_esp;
+ const struct xfrm_type *type_ipip;
+ const struct xfrm_type *type_ipip6;
+ const struct xfrm_type *type_comp;
+ const struct xfrm_type *type_ah;
+ const struct xfrm_type *type_routing;
+ const struct xfrm_type *type_dstopts;
+
int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
int (*output_finish)(struct sock *sk, struct sk_buff *skb);
int (*extract_input)(struct xfrm_state *x,
@@ -407,12 +404,10 @@ struct xfrm_type {
int (*reject)(struct xfrm_state *, struct sk_buff *,
const struct flowi *);
int (*hdr_offset)(struct xfrm_state *, struct sk_buff *, u8 **);
- /* Estimate maximal size of result of transformation of a dgram */
- u32 (*get_mtu)(struct xfrm_state *, int size);
};
int xfrm_register_type(const struct xfrm_type *type, unsigned short family);
-int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family);
+void xfrm_unregister_type(const struct xfrm_type *type, unsigned short family);
struct xfrm_type_offload {
char *description;
@@ -424,7 +419,7 @@ struct xfrm_type_offload {
};
int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family);
-int xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family);
+void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family);
static inline int xfrm_af2proto(unsigned int family)
{
@@ -1508,21 +1503,19 @@ struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark,
u8 proto,
unsigned short family);
#ifdef CONFIG_XFRM_SUB_POLICY
-int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
- unsigned short family, struct net *net);
-int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
+void xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
unsigned short family);
+void xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
+ unsigned short family);
#else
-static inline int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src,
- int n, unsigned short family, struct net *net)
+static inline void xfrm_tmpl_sort(struct xfrm_tmpl **d, struct xfrm_tmpl **s,
+ int n, unsigned short family)
{
- return -ENOSYS;
}
-static inline int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src,
- int n, unsigned short family)
+static inline void xfrm_state_sort(struct xfrm_state **d, struct xfrm_state **s,
+ int n, unsigned short family)
{
- return -ENOSYS;
}
#endif
@@ -1551,7 +1544,7 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si);
void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq);
int xfrm_init_replay(struct xfrm_state *x);
-int xfrm_state_mtu(struct xfrm_state *x, int mtu);
+u32 xfrm_state_mtu(struct xfrm_state *x, int mtu);
int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload);
int xfrm_init_state(struct xfrm_state *x);
int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index bb5e380e2ef3..68899fdc985b 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -50,6 +50,35 @@ TRACE_EVENT(xdp_exception,
__entry->ifindex)
);
+TRACE_EVENT(xdp_bulk_tx,
+
+ TP_PROTO(const struct net_device *dev,
+ int sent, int drops, int err),
+
+ TP_ARGS(dev, sent, drops, err),
+
+ TP_STRUCT__entry(
+ __field(int, ifindex)
+ __field(u32, act)
+ __field(int, drops)
+ __field(int, sent)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ __entry->ifindex = dev->ifindex;
+ __entry->act = XDP_TX;
+ __entry->drops = drops;
+ __entry->sent = sent;
+ __entry->err = err;
+ ),
+
+ TP_printk("ifindex=%d action=%s sent=%d drops=%d err=%d",
+ __entry->ifindex,
+ __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+ __entry->sent, __entry->drops, __entry->err)
+);
+
DECLARE_EVENT_CLASS(xdp_redirect_template,
TP_PROTO(const struct net_device *dev,
@@ -146,9 +175,8 @@ struct _bpf_dtab_netdev {
#endif /* __DEVMAP_OBJ_TYPE */
#define devmap_ifindex(fwd, map) \
- (!fwd ? 0 : \
- ((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \
- ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0))
+ ((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \
+ ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)
#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \
trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \
diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h
index 4ebc2135e950..2a15f01c2243 100644
--- a/include/uapi/linux/batadv_packet.h
+++ b/include/uapi/linux/batadv_packet.h
@@ -107,12 +107,20 @@ enum batadv_icmp_packettype {
* @BATADV_MCAST_WANT_ALL_UNSNOOPABLES: we want all packets destined for
* 224.0.0.0/24 or ff02::1
* @BATADV_MCAST_WANT_ALL_IPV4: we want all IPv4 multicast packets
+ * (both link-local and routable ones)
* @BATADV_MCAST_WANT_ALL_IPV6: we want all IPv6 multicast packets
+ * (both link-local and routable ones)
+ * @BATADV_MCAST_WANT_NO_RTR4: we have no IPv4 multicast router and therefore
+ * only need routable IPv4 multicast packets we signed up for explicitly
+ * @BATADV_MCAST_WANT_NO_RTR6: we have no IPv6 multicast router and therefore
+ * only need routable IPv6 multicast packets we signed up for explicitly
*/
enum batadv_mcast_flags {
BATADV_MCAST_WANT_ALL_UNSNOOPABLES = 1UL << 0,
BATADV_MCAST_WANT_ALL_IPV4 = 1UL << 1,
BATADV_MCAST_WANT_ALL_IPV6 = 1UL << 2,
+ BATADV_MCAST_WANT_NO_RTR4 = 1UL << 3,
+ BATADV_MCAST_WANT_NO_RTR6 = 1UL << 4,
};
/* tt data subtypes */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b077507efa3f..ead27aebf491 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -170,6 +170,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_FLOW_DISSECTOR,
BPF_PROG_TYPE_CGROUP_SYSCTL,
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
+ BPF_PROG_TYPE_CGROUP_SOCKOPT,
};
enum bpf_attach_type {
@@ -194,6 +195,8 @@ enum bpf_attach_type {
BPF_CGROUP_SYSCTL,
BPF_CGROUP_UDP4_RECVMSG,
BPF_CGROUP_UDP6_RECVMSG,
+ BPF_CGROUP_GETSOCKOPT,
+ BPF_CGROUP_SETSOCKOPT,
__MAX_BPF_ATTACH_TYPE
};
@@ -1568,8 +1571,11 @@ union bpf_attr {
* but this is only implemented for native XDP (with driver
* support) as of this writing).
*
- * All values for *flags* are reserved for future usage, and must
- * be left at zero.
+ * The lower two bits of *flags* are used as the return code if
+ * the map lookup fails. This is so that the return value can be
+ * one of the XDP program return codes up to XDP_TX, as chosen by
+ * the caller. Any higher bits in the *flags* argument must be
+ * unset.
*
* When used to redirect packets to net devices, this helper
* provides a high performance increase over **bpf_redirect**\ ().
@@ -1764,6 +1770,7 @@ union bpf_attr {
* * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
* * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
* * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT)
*
* Therefore, this function can be used to clear a callback flag by
* setting the appropriate bit to zero. e.g. to disable the RTO
@@ -3066,6 +3073,12 @@ struct bpf_tcp_sock {
* sum(delta(snd_una)), or how many bytes
* were acked.
*/
+ __u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups
+ * total number of DSACK blocks received
+ */
+ __u32 delivered; /* Total data packets delivered incl. rexmits */
+ __u32 delivered_ce; /* Like the above but only ECE marked packets */
+ __u32 icsk_retransmits; /* Number of unrecovered [RTO] timeouts */
};
struct bpf_sock_tuple {
@@ -3308,7 +3321,8 @@ struct bpf_sock_ops {
#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
-#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently
+#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently
* supported cb flags
*/
@@ -3363,6 +3377,8 @@ enum {
BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after
* socket transition to LISTEN state.
*/
+ BPF_SOCK_OPS_RTT_CB, /* Called on every RTT.
+ */
};
/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
@@ -3541,4 +3557,15 @@ struct bpf_sysctl {
*/
};
+struct bpf_sockopt {
+ __bpf_md_ptr(struct bpf_sock *, sk);
+ __bpf_md_ptr(void *, optval);
+ __bpf_md_ptr(void *, optval_end);
+
+ __s32 level;
+ __s32 optname;
+ __s32 optlen;
+ __s32 retval;
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 6f75bda2c2d7..4a8c02cafa9a 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -636,6 +636,7 @@ enum {
IFLA_BOND_AD_USER_PORT_KEY,
IFLA_BOND_AD_ACTOR_SYSTEM,
IFLA_BOND_TLB_DYNAMIC_LB,
+ IFLA_BOND_PEER_NOTIF_DELAY,
__IFLA_BOND_MAX,
};
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index caed8b1614ff..faaa5ca2a117 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -46,6 +46,7 @@ struct xdp_mmap_offsets {
#define XDP_UMEM_FILL_RING 5
#define XDP_UMEM_COMPLETION_RING 6
#define XDP_STATISTICS 7
+#define XDP_OPTIONS 8
struct xdp_umem_reg {
__u64 addr; /* Start of packet data area */
@@ -60,6 +61,13 @@ struct xdp_statistics {
__u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
};
+struct xdp_options {
+ __u32 flags;
+};
+
+/* Flags for the flags field of struct xdp_options */
+#define XDP_OPTIONS_ZEROCOPY (1 << 0)
+
/* Pgoff for mmaping the rings */
#define XDP_PGOFF_RX_RING 0
#define XDP_PGOFF_TX_RING 0x80000000
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 8b2f993cbb77..390efb54b2e0 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -988,8 +988,9 @@ struct tc_etf_qopt {
__s32 delta;
__s32 clockid;
__u32 flags;
-#define TC_ETF_DEADLINE_MODE_ON BIT(0)
-#define TC_ETF_OFFLOAD_ON BIT(1)
+#define TC_ETF_DEADLINE_MODE_ON _BITUL(0)
+#define TC_ETF_OFFLOAD_ON _BITUL(1)
+#define TC_ETF_SKIP_SOCK_CHECK _BITUL(2)
};
enum {
@@ -1158,6 +1159,8 @@ enum {
* [TCA_TAPRIO_ATTR_SCHED_ENTRY_INTERVAL]
*/
+#define TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST 0x1
+
enum {
TCA_TAPRIO_ATTR_UNSPEC,
TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */
@@ -1169,6 +1172,8 @@ enum {
TCA_TAPRIO_ATTR_ADMIN_SCHED, /* The admin sched, only used in dump */
TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME, /* s64 */
TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, /* s64 */
+ TCA_TAPRIO_ATTR_FLAGS, /* u32 */
+ TCA_TAPRIO_ATTR_TXTIME_DELAY, /* s32 */
__TCA_TAPRIO_ATTR_MAX,
};
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index c225c42e114a..76fa0076f20d 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -15,6 +15,9 @@
#include <linux/bpf.h>
#include <linux/bpf-cgroup.h>
#include <net/sock.h>
+#include <net/bpf_sk_storage.h>
+
+#include "../cgroup/cgroup-internal.h"
DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
EXPORT_SYMBOL(cgroup_bpf_enabled_key);
@@ -38,6 +41,8 @@ static void cgroup_bpf_release(struct work_struct *work)
struct bpf_prog_array *old_array;
unsigned int type;
+ mutex_lock(&cgroup_mutex);
+
for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
struct list_head *progs = &cgrp->bpf.progs[type];
struct bpf_prog_list *pl, *tmp;
@@ -54,10 +59,12 @@ static void cgroup_bpf_release(struct work_struct *work)
}
old_array = rcu_dereference_protected(
cgrp->bpf.effective[type],
- percpu_ref_is_dying(&cgrp->bpf.refcnt));
+ lockdep_is_held(&cgroup_mutex));
bpf_prog_array_free(old_array);
}
+ mutex_unlock(&cgroup_mutex);
+
percpu_ref_exit(&cgrp->bpf.refcnt);
cgroup_put(cgrp);
}
@@ -229,6 +236,9 @@ static int update_effective_progs(struct cgroup *cgrp,
css_for_each_descendant_pre(css, &cgrp->self) {
struct cgroup *desc = container_of(css, struct cgroup, self);
+ if (percpu_ref_is_zero(&desc->bpf.refcnt))
+ continue;
+
err = compute_effective_progs(desc, type, &desc->bpf.inactive);
if (err)
goto cleanup;
@@ -238,6 +248,14 @@ static int update_effective_progs(struct cgroup *cgrp,
css_for_each_descendant_pre(css, &cgrp->self) {
struct cgroup *desc = container_of(css, struct cgroup, self);
+ if (percpu_ref_is_zero(&desc->bpf.refcnt)) {
+ if (unlikely(desc->bpf.inactive)) {
+ bpf_prog_array_free(desc->bpf.inactive);
+ desc->bpf.inactive = NULL;
+ }
+ continue;
+ }
+
activate_effective_progs(desc, type, desc->bpf.inactive);
desc->bpf.inactive = NULL;
}
@@ -921,6 +939,188 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
+static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
+ enum bpf_attach_type attach_type)
+{
+ struct bpf_prog_array *prog_array;
+ bool empty;
+
+ rcu_read_lock();
+ prog_array = rcu_dereference(cgrp->bpf.effective[attach_type]);
+ empty = bpf_prog_array_is_empty(prog_array);
+ rcu_read_unlock();
+
+ return empty;
+}
+
+static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)
+{
+ if (unlikely(max_optlen > PAGE_SIZE) || max_optlen < 0)
+ return -EINVAL;
+
+ ctx->optval = kzalloc(max_optlen, GFP_USER);
+ if (!ctx->optval)
+ return -ENOMEM;
+
+ ctx->optval_end = ctx->optval + max_optlen;
+ ctx->optlen = max_optlen;
+
+ return 0;
+}
+
+static void sockopt_free_buf(struct bpf_sockopt_kern *ctx)
+{
+ kfree(ctx->optval);
+}
+
+int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
+ int *optname, char __user *optval,
+ int *optlen, char **kernel_optval)
+{
+ struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+ struct bpf_sockopt_kern ctx = {
+ .sk = sk,
+ .level = *level,
+ .optname = *optname,
+ };
+ int ret;
+
+ /* Opportunistic check to see whether we have any BPF program
+ * attached to the hook so we don't waste time allocating
+ * memory and locking the socket.
+ */
+ if (!cgroup_bpf_enabled ||
+ __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_SETSOCKOPT))
+ return 0;
+
+ ret = sockopt_alloc_buf(&ctx, *optlen);
+ if (ret)
+ return ret;
+
+ if (copy_from_user(ctx.optval, optval, *optlen) != 0) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ lock_sock(sk);
+ ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_SETSOCKOPT],
+ &ctx, BPF_PROG_RUN);
+ release_sock(sk);
+
+ if (!ret) {
+ ret = -EPERM;
+ goto out;
+ }
+
+ if (ctx.optlen == -1) {
+ /* optlen set to -1, bypass kernel */
+ ret = 1;
+ } else if (ctx.optlen > *optlen || ctx.optlen < -1) {
+ /* optlen is out of bounds */
+ ret = -EFAULT;
+ } else {
+ /* optlen within bounds, run kernel handler */
+ ret = 0;
+
+ /* export any potential modifications */
+ *level = ctx.level;
+ *optname = ctx.optname;
+ *optlen = ctx.optlen;
+ *kernel_optval = ctx.optval;
+ }
+
+out:
+ if (ret)
+ sockopt_free_buf(&ctx);
+ return ret;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter_setsockopt);
+
+int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
+ int optname, char __user *optval,
+ int __user *optlen, int max_optlen,
+ int retval)
+{
+ struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+ struct bpf_sockopt_kern ctx = {
+ .sk = sk,
+ .level = level,
+ .optname = optname,
+ .retval = retval,
+ };
+ int ret;
+
+ /* Opportunistic check to see whether we have any BPF program
+ * attached to the hook so we don't waste time allocating
+ * memory and locking the socket.
+ */
+ if (!cgroup_bpf_enabled ||
+ __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT))
+ return retval;
+
+ ret = sockopt_alloc_buf(&ctx, max_optlen);
+ if (ret)
+ return ret;
+
+ if (!retval) {
+ /* If kernel getsockopt finished successfully,
+ * copy whatever was returned to the user back
+ * into our temporary buffer. Set optlen to the
+ * one that kernel returned as well to let
+ * BPF programs inspect the value.
+ */
+
+ if (get_user(ctx.optlen, optlen)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (ctx.optlen > max_optlen)
+ ctx.optlen = max_optlen;
+
+ if (copy_from_user(ctx.optval, optval, ctx.optlen) != 0) {
+ ret = -EFAULT;
+ goto out;
+ }
+ }
+
+ lock_sock(sk);
+ ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
+ &ctx, BPF_PROG_RUN);
+ release_sock(sk);
+
+ if (!ret) {
+ ret = -EPERM;
+ goto out;
+ }
+
+ if (ctx.optlen > max_optlen) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ /* BPF programs only allowed to set retval to 0, not some
+ * arbitrary value.
+ */
+ if (ctx.retval != 0 && ctx.retval != retval) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
+ put_user(ctx.optlen, optlen)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ ret = ctx.retval;
+
+out:
+ sockopt_free_buf(&ctx);
+ return ret;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter_getsockopt);
+
static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp,
size_t *lenp)
{
@@ -1181,3 +1381,153 @@ const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
const struct bpf_prog_ops cg_sysctl_prog_ops = {
};
+
+static const struct bpf_func_proto *
+cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_sk_storage_get:
+ return &bpf_sk_storage_get_proto;
+ case BPF_FUNC_sk_storage_delete:
+ return &bpf_sk_storage_delete_proto;
+#ifdef CONFIG_INET
+ case BPF_FUNC_tcp_sock:
+ return &bpf_tcp_sock_proto;
+#endif
+ default:
+ return cgroup_base_func_proto(func_id, prog);
+ }
+}
+
+static bool cg_sockopt_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ const int size_default = sizeof(__u32);
+
+ if (off < 0 || off >= sizeof(struct bpf_sockopt))
+ return false;
+
+ if (off % size != 0)
+ return false;
+
+ if (type == BPF_WRITE) {
+ switch (off) {
+ case offsetof(struct bpf_sockopt, retval):
+ if (size != size_default)
+ return false;
+ return prog->expected_attach_type ==
+ BPF_CGROUP_GETSOCKOPT;
+ case offsetof(struct bpf_sockopt, optname):
+ /* fallthrough */
+ case offsetof(struct bpf_sockopt, level):
+ if (size != size_default)
+ return false;
+ return prog->expected_attach_type ==
+ BPF_CGROUP_SETSOCKOPT;
+ case offsetof(struct bpf_sockopt, optlen):
+ return size == size_default;
+ default:
+ return false;
+ }
+ }
+
+ switch (off) {
+ case offsetof(struct bpf_sockopt, sk):
+ if (size != sizeof(__u64))
+ return false;
+ info->reg_type = PTR_TO_SOCKET;
+ break;
+ case offsetof(struct bpf_sockopt, optval):
+ if (size != sizeof(__u64))
+ return false;
+ info->reg_type = PTR_TO_PACKET;
+ break;
+ case offsetof(struct bpf_sockopt, optval_end):
+ if (size != sizeof(__u64))
+ return false;
+ info->reg_type = PTR_TO_PACKET_END;
+ break;
+ case offsetof(struct bpf_sockopt, retval):
+ if (size != size_default)
+ return false;
+ return prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT;
+ default:
+ if (size != size_default)
+ return false;
+ break;
+ }
+ return true;
+}
+
+#define CG_SOCKOPT_ACCESS_FIELD(T, F) \
+ T(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F), \
+ si->dst_reg, si->src_reg, \
+ offsetof(struct bpf_sockopt_kern, F))
+
+static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog,
+ u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (si->off) {
+ case offsetof(struct bpf_sockopt, sk):
+ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, sk);
+ break;
+ case offsetof(struct bpf_sockopt, level):
+ if (type == BPF_WRITE)
+ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, level);
+ else
+ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, level);
+ break;
+ case offsetof(struct bpf_sockopt, optname):
+ if (type == BPF_WRITE)
+ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optname);
+ else
+ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optname);
+ break;
+ case offsetof(struct bpf_sockopt, optlen):
+ if (type == BPF_WRITE)
+ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optlen);
+ else
+ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
+ break;
+ case offsetof(struct bpf_sockopt, retval):
+ if (type == BPF_WRITE)
+ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);
+ else
+ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval);
+ break;
+ case offsetof(struct bpf_sockopt, optval):
+ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);
+ break;
+ case offsetof(struct bpf_sockopt, optval_end):
+ *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval_end);
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
+static int cg_sockopt_get_prologue(struct bpf_insn *insn_buf,
+ bool direct_write,
+ const struct bpf_prog *prog)
+{
+ /* Nothing to do for sockopt argument. The data is kzalloc'ated.
+ */
+ return 0;
+}
+
+const struct bpf_verifier_ops cg_sockopt_verifier_ops = {
+ .get_func_proto = cg_sockopt_func_proto,
+ .is_valid_access = cg_sockopt_is_valid_access,
+ .convert_ctx_access = cg_sockopt_convert_ctx_access,
+ .gen_prologue = cg_sockopt_get_prologue,
+};
+
+const struct bpf_prog_ops cg_sockopt_prog_ops = {
+};
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ad3be85f1411..e2c1b43728da 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1809,6 +1809,15 @@ int bpf_prog_array_length(struct bpf_prog_array *array)
return cnt;
}
+bool bpf_prog_array_is_empty(struct bpf_prog_array *array)
+{
+ struct bpf_prog_array_item *item;
+
+ for (item = array->items; item->prog; item++)
+ if (item->prog != &dummy_bpf_prog.prog)
+ return false;
+ return true;
+}
static bool bpf_prog_array_copy_core(struct bpf_prog_array *array,
u32 *prog_ids,
@@ -2101,3 +2110,4 @@ EXPORT_SYMBOL(bpf_stats_enabled_key);
#include <linux/bpf_trace.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);
+EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_bulk_tx);
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 8dff08768087..ef49e17ae47c 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -32,14 +32,19 @@
/* General idea: XDP packets getting XDP redirected to another CPU,
* will maximum be stored/queued for one driver ->poll() call. It is
- * guaranteed that setting flush bit and flush operation happen on
+ * guaranteed that queueing the frame and the flush operation happen on
* same CPU. Thus, cpu_map_flush operation can deduct via this_cpu_ptr()
* which queue in bpf_cpu_map_entry contains packets.
*/
#define CPU_MAP_BULK_SIZE 8 /* 8 == one cacheline on 64-bit archs */
+struct bpf_cpu_map_entry;
+struct bpf_cpu_map;
+
struct xdp_bulk_queue {
void *q[CPU_MAP_BULK_SIZE];
+ struct list_head flush_node;
+ struct bpf_cpu_map_entry *obj;
unsigned int count;
};
@@ -52,6 +57,8 @@ struct bpf_cpu_map_entry {
/* XDP can run multiple RX-ring queues, need __percpu enqueue store */
struct xdp_bulk_queue __percpu *bulkq;
+ struct bpf_cpu_map *cmap;
+
/* Queue with potential multi-producers, and single-consumer kthread */
struct ptr_ring *queue;
struct task_struct *kthread;
@@ -65,23 +72,17 @@ struct bpf_cpu_map {
struct bpf_map map;
/* Below members specific for map type */
struct bpf_cpu_map_entry **cpu_map;
- unsigned long __percpu *flush_needed;
+ struct list_head __percpu *flush_list;
};
-static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
- struct xdp_bulk_queue *bq, bool in_napi_ctx);
-
-static u64 cpu_map_bitmap_size(const union bpf_attr *attr)
-{
- return BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long);
-}
+static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx);
static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
{
struct bpf_cpu_map *cmap;
int err = -ENOMEM;
+ int ret, cpu;
u64 cost;
- int ret;
if (!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
@@ -105,7 +106,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
/* make sure page count doesn't overflow */
cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *);
- cost += cpu_map_bitmap_size(attr) * num_possible_cpus();
+ cost += sizeof(struct list_head) * num_possible_cpus();
/* Notice returns -EPERM on if map size is larger than memlock limit */
ret = bpf_map_charge_init(&cmap->map.memory, cost);
@@ -114,12 +115,13 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
goto free_cmap;
}
- /* A per cpu bitfield with a bit per possible CPU in map */
- cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr),
- __alignof__(unsigned long));
- if (!cmap->flush_needed)
+ cmap->flush_list = alloc_percpu(struct list_head);
+ if (!cmap->flush_list)
goto free_charge;
+ for_each_possible_cpu(cpu)
+ INIT_LIST_HEAD(per_cpu_ptr(cmap->flush_list, cpu));
+
/* Alloc array for possible remote "destination" CPUs */
cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries *
sizeof(struct bpf_cpu_map_entry *),
@@ -129,7 +131,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
return &cmap->map;
free_percpu:
- free_percpu(cmap->flush_needed);
+ free_percpu(cmap->flush_list);
free_charge:
bpf_map_charge_finish(&cmap->map.memory);
free_cmap:
@@ -334,7 +336,8 @@ static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
{
gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
struct bpf_cpu_map_entry *rcpu;
- int numa, err;
+ struct xdp_bulk_queue *bq;
+ int numa, err, i;
/* Have map->numa_node, but choose node of redirect target CPU */
numa = cpu_to_node(cpu);
@@ -349,6 +352,11 @@ static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
if (!rcpu->bulkq)
goto free_rcu;
+ for_each_possible_cpu(i) {
+ bq = per_cpu_ptr(rcpu->bulkq, i);
+ bq->obj = rcpu;
+ }
+
/* Alloc queue */
rcpu->queue = kzalloc_node(sizeof(*rcpu->queue), gfp, numa);
if (!rcpu->queue)
@@ -405,7 +413,7 @@ static void __cpu_map_entry_free(struct rcu_head *rcu)
struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu);
/* No concurrent bq_enqueue can run at this point */
- bq_flush_to_queue(rcpu, bq, false);
+ bq_flush_to_queue(bq, false);
}
free_percpu(rcpu->bulkq);
/* Cannot kthread_stop() here, last put free rcpu resources */
@@ -488,6 +496,7 @@ static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
rcpu = __cpu_map_entry_alloc(qsize, key_cpu, map->id);
if (!rcpu)
return -ENOMEM;
+ rcpu->cmap = cmap;
}
rcu_read_lock();
__cpu_map_entry_replace(cmap, key_cpu, rcpu);
@@ -514,14 +523,14 @@ static void cpu_map_free(struct bpf_map *map)
synchronize_rcu();
/* To ensure all pending flush operations have completed wait for flush
- * bitmap to indicate all flush_needed bits to be zero on _all_ cpus.
- * Because the above synchronize_rcu() ensures the map is disconnected
- * from the program we can assume no new bits will be set.
+ * list be empty on _all_ cpus. Because the above synchronize_rcu()
+ * ensures the map is disconnected from the program we can assume no new
+ * items will be added to the list.
*/
for_each_online_cpu(cpu) {
- unsigned long *bitmap = per_cpu_ptr(cmap->flush_needed, cpu);
+ struct list_head *flush_list = per_cpu_ptr(cmap->flush_list, cpu);
- while (!bitmap_empty(bitmap, cmap->map.max_entries))
+ while (!list_empty(flush_list))
cond_resched();
}
@@ -538,7 +547,7 @@ static void cpu_map_free(struct bpf_map *map)
/* bq flush and cleanup happens after RCU graze-period */
__cpu_map_entry_replace(cmap, i, NULL); /* call_rcu */
}
- free_percpu(cmap->flush_needed);
+ free_percpu(cmap->flush_list);
bpf_map_area_free(cmap->cpu_map);
kfree(cmap);
}
@@ -590,9 +599,9 @@ const struct bpf_map_ops cpu_map_ops = {
.map_check_btf = map_check_no_btf,
};
-static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
- struct xdp_bulk_queue *bq, bool in_napi_ctx)
+static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx)
{
+ struct bpf_cpu_map_entry *rcpu = bq->obj;
unsigned int processed = 0, drops = 0;
const int to_cpu = rcpu->cpu;
struct ptr_ring *q;
@@ -621,6 +630,8 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
bq->count = 0;
spin_unlock(&q->producer_lock);
+ __list_del_clearprev(&bq->flush_node);
+
/* Feedback loop via tracepoints */
trace_xdp_cpumap_enqueue(rcpu->map_id, processed, drops, to_cpu);
return 0;
@@ -631,10 +642,11 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
*/
static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
{
+ struct list_head *flush_list = this_cpu_ptr(rcpu->cmap->flush_list);
struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
- bq_flush_to_queue(rcpu, bq, true);
+ bq_flush_to_queue(bq, true);
/* Notice, xdp_buff/page MUST be queued here, long enough for
* driver to code invoking us to finished, due to driver
@@ -646,6 +658,10 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
* operation, when completing napi->poll call.
*/
bq->q[bq->count++] = xdpf;
+
+ if (!bq->flush_node.prev)
+ list_add(&bq->flush_node, flush_list);
+
return 0;
}
@@ -665,41 +681,16 @@ int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
return 0;
}
-void __cpu_map_insert_ctx(struct bpf_map *map, u32 bit)
-{
- struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
- unsigned long *bitmap = this_cpu_ptr(cmap->flush_needed);
-
- __set_bit(bit, bitmap);
-}
-
void __cpu_map_flush(struct bpf_map *map)
{
struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
- unsigned long *bitmap = this_cpu_ptr(cmap->flush_needed);
- u32 bit;
-
- /* The napi->poll softirq makes sure __cpu_map_insert_ctx()
- * and __cpu_map_flush() happen on same CPU. Thus, the percpu
- * bitmap indicate which percpu bulkq have packets.
- */
- for_each_set_bit(bit, bitmap, map->max_entries) {
- struct bpf_cpu_map_entry *rcpu = READ_ONCE(cmap->cpu_map[bit]);
- struct xdp_bulk_queue *bq;
-
- /* This is possible if entry is removed by user space
- * between xdp redirect and flush op.
- */
- if (unlikely(!rcpu))
- continue;
-
- __clear_bit(bit, bitmap);
+ struct list_head *flush_list = this_cpu_ptr(cmap->flush_list);
+ struct xdp_bulk_queue *bq, *tmp;
- /* Flush all frames in bulkq to real queue */
- bq = this_cpu_ptr(rcpu->bulkq);
- bq_flush_to_queue(rcpu, bq, true);
+ list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
+ bq_flush_to_queue(bq, true);
/* If already running, costs spin_lock_irqsave + smb_mb */
- wake_up_process(rcpu->kthread);
+ wake_up_process(bq->obj->kthread);
}
}
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 40e86a7e0ef0..d83cf8ccc872 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -17,9 +17,8 @@
* datapath always has a valid copy. However, the datapath does a "flush"
* operation that pushes any pending packets in the driver outside the RCU
* critical section. Each bpf_dtab_netdev tracks these pending operations using
- * an atomic per-cpu bitmap. The bpf_dtab_netdev object will not be destroyed
- * until all bits are cleared indicating outstanding flush operations have
- * completed.
+ * a per-cpu flush list. The bpf_dtab_netdev object will not be destroyed until
+ * this list is empty, indicating outstanding flush operations have completed.
*
* BPF syscalls may race with BPF program calls on any of the update, delete
* or lookup operations. As noted above the xchg() operation also keep the
@@ -48,9 +47,13 @@
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
#define DEV_MAP_BULK_SIZE 16
+struct bpf_dtab_netdev;
+
struct xdp_bulk_queue {
struct xdp_frame *q[DEV_MAP_BULK_SIZE];
+ struct list_head flush_node;
struct net_device *dev_rx;
+ struct bpf_dtab_netdev *obj;
unsigned int count;
};
@@ -65,23 +68,18 @@ struct bpf_dtab_netdev {
struct bpf_dtab {
struct bpf_map map;
struct bpf_dtab_netdev **netdev_map;
- unsigned long __percpu *flush_needed;
+ struct list_head __percpu *flush_list;
struct list_head list;
};
static DEFINE_SPINLOCK(dev_map_lock);
static LIST_HEAD(dev_map_list);
-static u64 dev_map_bitmap_size(const union bpf_attr *attr)
-{
- return BITS_TO_LONGS((u64) attr->max_entries) * sizeof(unsigned long);
-}
-
static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
{
struct bpf_dtab *dtab;
+ int err, cpu;
u64 cost;
- int err;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
@@ -91,6 +89,11 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
return ERR_PTR(-EINVAL);
+ /* Lookup returns a pointer straight to dev->ifindex, so make sure the
+ * verifier prevents writes from the BPF side
+ */
+ attr->map_flags |= BPF_F_RDONLY_PROG;
+
dtab = kzalloc(sizeof(*dtab), GFP_USER);
if (!dtab)
return ERR_PTR(-ENOMEM);
@@ -99,7 +102,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
/* make sure page count doesn't overflow */
cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
- cost += dev_map_bitmap_size(attr) * num_possible_cpus();
+ cost += sizeof(struct list_head) * num_possible_cpus();
/* if map size is larger than memlock limit, reject it */
err = bpf_map_charge_init(&dtab->map.memory, cost);
@@ -108,28 +111,30 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
err = -ENOMEM;
- /* A per cpu bitfield with a bit per possible net device */
- dtab->flush_needed = __alloc_percpu_gfp(dev_map_bitmap_size(attr),
- __alignof__(unsigned long),
- GFP_KERNEL | __GFP_NOWARN);
- if (!dtab->flush_needed)
+ dtab->flush_list = alloc_percpu(struct list_head);
+ if (!dtab->flush_list)
goto free_charge;
+ for_each_possible_cpu(cpu)
+ INIT_LIST_HEAD(per_cpu_ptr(dtab->flush_list, cpu));
+
dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
sizeof(struct bpf_dtab_netdev *),
dtab->map.numa_node);
if (!dtab->netdev_map)
- goto free_charge;
+ goto free_percpu;
spin_lock(&dev_map_lock);
list_add_tail_rcu(&dtab->list, &dev_map_list);
spin_unlock(&dev_map_lock);
return &dtab->map;
+
+free_percpu:
+ free_percpu(dtab->flush_list);
free_charge:
bpf_map_charge_finish(&dtab->map.memory);
free_dtab:
- free_percpu(dtab->flush_needed);
kfree(dtab);
return ERR_PTR(err);
}
@@ -158,14 +163,14 @@ static void dev_map_free(struct bpf_map *map)
rcu_barrier();
/* To ensure all pending flush operations have completed wait for flush
- * bitmap to indicate all flush_needed bits to be zero on _all_ cpus.
+ * list to empty on _all_ cpus.
* Because the above synchronize_rcu() ensures the map is disconnected
- * from the program we can assume no new bits will be set.
+ * from the program we can assume no new items will be added.
*/
for_each_online_cpu(cpu) {
- unsigned long *bitmap = per_cpu_ptr(dtab->flush_needed, cpu);
+ struct list_head *flush_list = per_cpu_ptr(dtab->flush_list, cpu);
- while (!bitmap_empty(bitmap, dtab->map.max_entries))
+ while (!list_empty(flush_list))
cond_resched();
}
@@ -181,7 +186,7 @@ static void dev_map_free(struct bpf_map *map)
kfree(dev);
}
- free_percpu(dtab->flush_needed);
+ free_percpu(dtab->flush_list);
bpf_map_area_free(dtab->netdev_map);
kfree(dtab);
}
@@ -203,18 +208,10 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
return 0;
}
-void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
-{
- struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
- unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed);
-
- __set_bit(bit, bitmap);
-}
-
-static int bq_xmit_all(struct bpf_dtab_netdev *obj,
- struct xdp_bulk_queue *bq, u32 flags,
+static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags,
bool in_napi_ctx)
{
+ struct bpf_dtab_netdev *obj = bq->obj;
struct net_device *dev = obj->dev;
int sent = 0, drops = 0, err = 0;
int i;
@@ -241,6 +238,7 @@ out:
trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit,
sent, drops, bq->dev_rx, dev, err);
bq->dev_rx = NULL;
+ __list_del_clearprev(&bq->flush_node);
return 0;
error:
/* If ndo_xdp_xmit fails with an errno, no frames have been
@@ -263,31 +261,18 @@ error:
* from the driver before returning from its napi->poll() routine. The poll()
* routine is called either from busy_poll context or net_rx_action signaled
* from NET_RX_SOFTIRQ. Either way the poll routine must complete before the
- * net device can be torn down. On devmap tear down we ensure the ctx bitmap
- * is zeroed before completing to ensure all flush operations have completed.
+ * net device can be torn down. On devmap tear down we ensure the flush list
+ * is empty before completing to ensure all flush operations have completed.
*/
void __dev_map_flush(struct bpf_map *map)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
- unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed);
- u32 bit;
+ struct list_head *flush_list = this_cpu_ptr(dtab->flush_list);
+ struct xdp_bulk_queue *bq, *tmp;
rcu_read_lock();
- for_each_set_bit(bit, bitmap, map->max_entries) {
- struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
- struct xdp_bulk_queue *bq;
-
- /* This is possible if the dev entry is removed by user space
- * between xdp redirect and flush op.
- */
- if (unlikely(!dev))
- continue;
-
- bq = this_cpu_ptr(dev->bulkq);
- bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, true);
-
- __clear_bit(bit, bitmap);
- }
+ list_for_each_entry_safe(bq, tmp, flush_list, flush_node)
+ bq_xmit_all(bq, XDP_XMIT_FLUSH, true);
rcu_read_unlock();
}
@@ -314,10 +299,11 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
struct net_device *dev_rx)
{
+ struct list_head *flush_list = this_cpu_ptr(obj->dtab->flush_list);
struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
- bq_xmit_all(obj, bq, 0, true);
+ bq_xmit_all(bq, 0, true);
/* Ingress dev_rx will be the same for all xdp_frame's in
* bulk_queue, because bq stored per-CPU and must be flushed
@@ -327,6 +313,10 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
bq->dev_rx = dev_rx;
bq->q[bq->count++] = xdpf;
+
+ if (!bq->flush_node.prev)
+ list_add(&bq->flush_node, flush_list);
+
return 0;
}
@@ -377,17 +367,12 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
{
if (dev->dev->netdev_ops->ndo_xdp_xmit) {
struct xdp_bulk_queue *bq;
- unsigned long *bitmap;
-
int cpu;
rcu_read_lock();
for_each_online_cpu(cpu) {
- bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu);
- __clear_bit(dev->bit, bitmap);
-
bq = per_cpu_ptr(dev->bulkq, cpu);
- bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, false);
+ bq_xmit_all(bq, XDP_XMIT_FLUSH, false);
}
rcu_read_unlock();
}
@@ -434,8 +419,10 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
struct net *net = current->nsproxy->net_ns;
gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
struct bpf_dtab_netdev *dev, *old_dev;
- u32 i = *(u32 *)key;
u32 ifindex = *(u32 *)value;
+ struct xdp_bulk_queue *bq;
+ u32 i = *(u32 *)key;
+ int cpu;
if (unlikely(map_flags > BPF_EXIST))
return -EINVAL;
@@ -458,6 +445,11 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
return -ENOMEM;
}
+ for_each_possible_cpu(cpu) {
+ bq = per_cpu_ptr(dev->bulkq, cpu);
+ bq->obj = dev;
+ }
+
dev->dev = dev_get_by_index(net, ifindex);
if (!dev->dev) {
free_percpu(dev->bulkq);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7713cf39795a..b0f545e07425 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1590,6 +1590,14 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
default:
return -EINVAL;
}
+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+ switch (expected_attach_type) {
+ case BPF_CGROUP_SETSOCKOPT:
+ case BPF_CGROUP_GETSOCKOPT:
+ return 0;
+ default:
+ return -EINVAL;
+ }
default:
return 0;
}
@@ -1840,6 +1848,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
switch (prog->type) {
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
case BPF_PROG_TYPE_CGROUP_SKB:
return prog->enforce_expected_attach_type &&
@@ -1912,6 +1921,10 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_CGROUP_SYSCTL:
ptype = BPF_PROG_TYPE_CGROUP_SYSCTL;
break;
+ case BPF_CGROUP_GETSOCKOPT:
+ case BPF_CGROUP_SETSOCKOPT:
+ ptype = BPF_PROG_TYPE_CGROUP_SOCKOPT;
+ break;
default:
return -EINVAL;
}
@@ -1995,6 +2008,10 @@ static int bpf_prog_detach(const union bpf_attr *attr)
case BPF_CGROUP_SYSCTL:
ptype = BPF_PROG_TYPE_CGROUP_SYSCTL;
break;
+ case BPF_CGROUP_GETSOCKOPT:
+ case BPF_CGROUP_SETSOCKOPT:
+ ptype = BPF_PROG_TYPE_CGROUP_SOCKOPT;
+ break;
default:
return -EINVAL;
}
@@ -2031,6 +2048,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_CGROUP_SOCK_OPS:
case BPF_CGROUP_DEVICE:
case BPF_CGROUP_SYSCTL:
+ case BPF_CGROUP_GETSOCKOPT:
+ case BPF_CGROUP_SETSOCKOPT:
break;
case BPF_LIRC_MODE2:
return lirc_prog_query(attr, uattr);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 0e079b2298f8..a2e763703c30 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1659,16 +1659,18 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env,
}
}
-static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
+static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
+ int spi)
{
struct bpf_verifier_state *st = env->cur_state;
int first_idx = st->first_insn_idx;
int last_idx = env->insn_idx;
struct bpf_func_state *func;
struct bpf_reg_state *reg;
- u32 reg_mask = 1u << regno;
- u64 stack_mask = 0;
+ u32 reg_mask = regno >= 0 ? 1u << regno : 0;
+ u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
bool skip_first = true;
+ bool new_marks = false;
int i, err;
if (!env->allow_ptr_leaks)
@@ -1676,18 +1678,43 @@ static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
return 0;
func = st->frame[st->curframe];
- reg = &func->regs[regno];
- if (reg->type != SCALAR_VALUE) {
- WARN_ONCE(1, "backtracing misuse");
- return -EFAULT;
+ if (regno >= 0) {
+ reg = &func->regs[regno];
+ if (reg->type != SCALAR_VALUE) {
+ WARN_ONCE(1, "backtracing misuse");
+ return -EFAULT;
+ }
+ if (!reg->precise)
+ new_marks = true;
+ else
+ reg_mask = 0;
+ reg->precise = true;
+ }
+
+ while (spi >= 0) {
+ if (func->stack[spi].slot_type[0] != STACK_SPILL) {
+ stack_mask = 0;
+ break;
+ }
+ reg = &func->stack[spi].spilled_ptr;
+ if (reg->type != SCALAR_VALUE) {
+ stack_mask = 0;
+ break;
+ }
+ if (!reg->precise)
+ new_marks = true;
+ else
+ stack_mask = 0;
+ reg->precise = true;
+ break;
}
- if (reg->precise)
- return 0;
- func->regs[regno].precise = true;
+ if (!new_marks)
+ return 0;
+ if (!reg_mask && !stack_mask)
+ return 0;
for (;;) {
DECLARE_BITMAP(mask, 64);
- bool new_marks = false;
u32 history = st->jmp_history_cnt;
if (env->log.level & BPF_LOG_LEVEL)
@@ -1730,12 +1757,15 @@ static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
if (!st)
break;
+ new_marks = false;
func = st->frame[st->curframe];
bitmap_from_u64(mask, reg_mask);
for_each_set_bit(i, mask, 32) {
reg = &func->regs[i];
- if (reg->type != SCALAR_VALUE)
+ if (reg->type != SCALAR_VALUE) {
+ reg_mask &= ~(1u << i);
continue;
+ }
if (!reg->precise)
new_marks = true;
reg->precise = true;
@@ -1756,11 +1786,15 @@ static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
return -EFAULT;
}
- if (func->stack[i].slot_type[0] != STACK_SPILL)
+ if (func->stack[i].slot_type[0] != STACK_SPILL) {
+ stack_mask &= ~(1ull << i);
continue;
+ }
reg = &func->stack[i].spilled_ptr;
- if (reg->type != SCALAR_VALUE)
+ if (reg->type != SCALAR_VALUE) {
+ stack_mask &= ~(1ull << i);
continue;
+ }
if (!reg->precise)
new_marks = true;
reg->precise = true;
@@ -1772,6 +1806,8 @@ static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
reg_mask, stack_mask);
}
+ if (!reg_mask && !stack_mask)
+ break;
if (!new_marks)
break;
@@ -1781,6 +1817,15 @@ static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
return 0;
}
+static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
+{
+ return __mark_chain_precision(env, regno, -1);
+}
+
+static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
+{
+ return __mark_chain_precision(env, -1, spi);
+}
static bool is_spillable_regtype(enum bpf_reg_type type)
{
@@ -2215,6 +2260,13 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
env->seen_direct_write = true;
return true;
+
+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+ if (t == BPF_WRITE)
+ env->seen_direct_write = true;
+
+ return true;
+
default:
return false;
}
@@ -3407,12 +3459,9 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
if (func_id != BPF_FUNC_get_local_storage)
goto error;
break;
- /* devmap returns a pointer to a live net_device ifindex that we cannot
- * allow to be modified from bpf side. So do not allow lookup elements
- * for now.
- */
case BPF_MAP_TYPE_DEVMAP:
- if (func_id != BPF_FUNC_redirect_map)
+ if (func_id != BPF_FUNC_redirect_map &&
+ func_id != BPF_FUNC_map_lookup_elem)
goto error;
break;
/* Restrict bpf side of cpumap and xskmap, open when use-cases
@@ -6066,6 +6115,7 @@ static int check_return_code(struct bpf_verifier_env *env)
case BPF_PROG_TYPE_SOCK_OPS:
case BPF_PROG_TYPE_CGROUP_DEVICE:
case BPF_PROG_TYPE_CGROUP_SYSCTL:
+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
break;
default:
return 0;
@@ -7106,6 +7156,46 @@ static int propagate_liveness(struct bpf_verifier_env *env,
return 0;
}
+/* find precise scalars in the previous equivalent state and
+ * propagate them into the current state
+ */
+static int propagate_precision(struct bpf_verifier_env *env,
+ const struct bpf_verifier_state *old)
+{
+ struct bpf_reg_state *state_reg;
+ struct bpf_func_state *state;
+ int i, err = 0;
+
+ state = old->frame[old->curframe];
+ state_reg = state->regs;
+ for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
+ if (state_reg->type != SCALAR_VALUE ||
+ !state_reg->precise)
+ continue;
+ if (env->log.level & BPF_LOG_LEVEL2)
+ verbose(env, "propagating r%d\n", i);
+ err = mark_chain_precision(env, i);
+ if (err < 0)
+ return err;
+ }
+
+ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+ if (state->stack[i].slot_type[0] != STACK_SPILL)
+ continue;
+ state_reg = &state->stack[i].spilled_ptr;
+ if (state_reg->type != SCALAR_VALUE ||
+ !state_reg->precise)
+ continue;
+ if (env->log.level & BPF_LOG_LEVEL2)
+ verbose(env, "propagating fp%d\n",
+ (-i - 1) * BPF_REG_SIZE);
+ err = mark_chain_precision_stack(env, i);
+ if (err < 0)
+ return err;
+ }
+ return 0;
+}
+
static bool states_maybe_looping(struct bpf_verifier_state *old,
struct bpf_verifier_state *cur)
{
@@ -7198,6 +7288,14 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
* this state and will pop a new one.
*/
err = propagate_liveness(env, &sl->state, cur);
+
+ /* if previous state reached the exit with precision and
+ * current state is equivalent to it (except precsion marks)
+ * the precision needs to be propagated back in
+ * the current state.
+ */
+ err = err ? : push_jmp_history(env, cur);
+ err = err ? : propagate_precision(env, &sl->state);
if (err)
return err;
return 1;
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index ef7338cebd18..9bb96ace9fa1 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -145,8 +145,7 @@ void __xsk_map_flush(struct bpf_map *map)
list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
xsk_flush(xs);
- __list_del(xs->flush_node.prev, xs->flush_node.next);
- xs->flush_node.prev = NULL;
+ __list_del_clearprev(&xs->flush_node);
}
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 75675b9bf6df..399aca51ff75 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1712,31 +1712,6 @@ const struct file_operations pidfd_fops = {
#endif
};
-/**
- * pidfd_create() - Create a new pid file descriptor.
- *
- * @pid: struct pid that the pidfd will reference
- *
- * This creates a new pid file descriptor with the O_CLOEXEC flag set.
- *
- * Note, that this function can only be called after the fd table has
- * been unshared to avoid leaking the pidfd to the new process.
- *
- * Return: On success, a cloexec pidfd is returned.
- * On error, a negative errno number will be returned.
- */
-static int pidfd_create(struct pid *pid)
-{
- int fd;
-
- fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
- O_RDWR | O_CLOEXEC);
- if (fd < 0)
- put_pid(pid);
-
- return fd;
-}
-
static void __delayed_free_task(struct rcu_head *rhp)
{
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
@@ -1774,6 +1749,7 @@ static __latent_entropy struct task_struct *copy_process(
int pidfd = -1, retval;
struct task_struct *p;
struct multiprocess_signals delayed;
+ struct file *pidfile = NULL;
/*
* Don't allow sharing the root directory with processes in a different
@@ -1822,8 +1798,6 @@ static __latent_entropy struct task_struct *copy_process(
}
if (clone_flags & CLONE_PIDFD) {
- int reserved;
-
/*
* - CLONE_PARENT_SETTID is useless for pidfds and also
* parent_tidptr is used to return pidfds.
@@ -1834,16 +1808,6 @@ static __latent_entropy struct task_struct *copy_process(
if (clone_flags &
(CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD))
return ERR_PTR(-EINVAL);
-
- /*
- * Verify that parent_tidptr is sane so we can potentially
- * reuse it later.
- */
- if (get_user(reserved, parent_tidptr))
- return ERR_PTR(-EFAULT);
-
- if (reserved != 0)
- return ERR_PTR(-EINVAL);
}
/*
@@ -2058,11 +2022,20 @@ static __latent_entropy struct task_struct *copy_process(
* if the fd table isn't shared).
*/
if (clone_flags & CLONE_PIDFD) {
- retval = pidfd_create(pid);
+ retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
if (retval < 0)
goto bad_fork_free_pid;
pidfd = retval;
+
+ pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
+ O_RDWR | O_CLOEXEC);
+ if (IS_ERR(pidfile)) {
+ put_unused_fd(pidfd);
+ goto bad_fork_free_pid;
+ }
+ get_pid(pid); /* held by pidfile now */
+
retval = put_user(pidfd, parent_tidptr);
if (retval)
goto bad_fork_put_pidfd;
@@ -2180,6 +2153,9 @@ static __latent_entropy struct task_struct *copy_process(
goto bad_fork_cancel_cgroup;
}
+ /* past the last point of failure */
+ if (pidfile)
+ fd_install(pidfd, pidfile);
init_task_pid_links(p);
if (likely(p->pid)) {
@@ -2246,8 +2222,10 @@ bad_fork_cancel_cgroup:
bad_fork_cgroup_threadgroup_change_end:
cgroup_threadgroup_change_end(current);
bad_fork_put_pidfd:
- if (clone_flags & CLONE_PIDFD)
- ksys_close(pidfd);
+ if (clone_flags & CLONE_PIDFD) {
+ fput(pidfile);
+ put_unused_fd(pidfd);
+ }
bad_fork_free_pid:
if (pid != &init_struct_pid)
free_pid(pid);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c102c240bb0b..ca1255d14576 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1431,6 +1431,20 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
return err;
}
+static int __init send_signal_irq_work_init(void)
+{
+ int cpu;
+ struct send_signal_irq_work *work;
+
+ for_each_possible_cpu(cpu) {
+ work = per_cpu_ptr(&send_signal_work, cpu);
+ init_irq_work(&work->irq_work, do_bpf_send_signal);
+ }
+ return 0;
+}
+
+subsys_initcall(send_signal_irq_work_init);
+
#ifdef CONFIG_MODULES
static int bpf_event_notify(struct notifier_block *nb, unsigned long op,
void *module)
@@ -1478,18 +1492,5 @@ static int __init bpf_event_init(void)
return 0;
}
-static int __init send_signal_irq_work_init(void)
-{
- int cpu;
- struct send_signal_irq_work *work;
-
- for_each_possible_cpu(cpu) {
- work = per_cpu_ptr(&send_signal_work, cpu);
- init_irq_work(&work->irq_work, do_bpf_send_signal);
- }
- return 0;
-}
-
fs_initcall(bpf_event_init);
-subsys_initcall(send_signal_irq_work_init);
#endif /* CONFIG_MODULES */
diff --git a/lib/Kconfig b/lib/Kconfig
index 90623a0e1942..78ddb9526b62 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -562,6 +562,14 @@ config SIGNATURE
Digital signature verification. Currently only RSA is supported.
Implementation is done using GnuPG MPI library
+config DIMLIB
+ bool "DIM library"
+ default y
+ help
+ Dynamic Interrupt Moderation library.
+ Implements an algorithm for dynamically change CQ modertion values
+ according to run time performance.
+
#
# libfdt files, only selected if needed.
#
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index cbdfae379896..99272b5dd980 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1909,6 +1909,15 @@ config TEST_BPF
If unsure, say N.
+config TEST_BLACKHOLE_DEV
+ tristate "Test blackhole netdev functionality"
+ depends on m && NET
+ help
+ This builds the "test_blackhole_dev" module that validates the
+ data path through this blackhole netdev.
+
+ If unsure, say N.
+
config FIND_BIT_BENCHMARK
tristate "Test find_bit functions"
help
diff --git a/lib/Makefile b/lib/Makefile
index fb7697031a79..6ac44fe2a37f 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -91,6 +91,7 @@ obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o
obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o
obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o
obj-$(CONFIG_TEST_STACKINIT) += test_stackinit.o
+obj-$(CONFIG_TEST_BLACKHOLE_DEV) += test_blackhole_dev.o
obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/
@@ -202,6 +203,7 @@ obj-$(CONFIG_GLOB) += glob.o
obj-$(CONFIG_GLOB_SELFTEST) += globtest.o
obj-$(CONFIG_MPILIB) += mpi/
+obj-$(CONFIG_DIMLIB) += dim/
obj-$(CONFIG_SIGNATURE) += digsig.o
lib-$(CONFIG_CLZ_TAB) += clz_tab.o
diff --git a/lib/dim/Makefile b/lib/dim/Makefile
new file mode 100644
index 000000000000..160afe288df0
--- /dev/null
+++ b/lib/dim/Makefile
@@ -0,0 +1,9 @@
+#
+# DIM Dynamic Interrupt Moderation library
+#
+
+obj-$(CONFIG_DIMLIB) = net_dim.o
+
+net_dim-y = \
+ dim.o \
+ net_dim.o
diff --git a/lib/dim/dim.c b/lib/dim/dim.c
new file mode 100644
index 000000000000..439d641ec796
--- /dev/null
+++ b/lib/dim/dim.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <linux/dim.h>
+
+bool dim_on_top(struct dim *dim)
+{
+ switch (dim->tune_state) {
+ case DIM_PARKING_ON_TOP:
+ case DIM_PARKING_TIRED:
+ return true;
+ case DIM_GOING_RIGHT:
+ return (dim->steps_left > 1) && (dim->steps_right == 1);
+ default: /* DIM_GOING_LEFT */
+ return (dim->steps_right > 1) && (dim->steps_left == 1);
+ }
+}
+EXPORT_SYMBOL(dim_on_top);
+
+void dim_turn(struct dim *dim)
+{
+ switch (dim->tune_state) {
+ case DIM_PARKING_ON_TOP:
+ case DIM_PARKING_TIRED:
+ break;
+ case DIM_GOING_RIGHT:
+ dim->tune_state = DIM_GOING_LEFT;
+ dim->steps_left = 0;
+ break;
+ case DIM_GOING_LEFT:
+ dim->tune_state = DIM_GOING_RIGHT;
+ dim->steps_right = 0;
+ break;
+ }
+}
+EXPORT_SYMBOL(dim_turn);
+
+void dim_park_on_top(struct dim *dim)
+{
+ dim->steps_right = 0;
+ dim->steps_left = 0;
+ dim->tired = 0;
+ dim->tune_state = DIM_PARKING_ON_TOP;
+}
+EXPORT_SYMBOL(dim_park_on_top);
+
+void dim_park_tired(struct dim *dim)
+{
+ dim->steps_right = 0;
+ dim->steps_left = 0;
+ dim->tune_state = DIM_PARKING_TIRED;
+}
+EXPORT_SYMBOL(dim_park_tired);
+
+void dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
+ struct dim_stats *curr_stats)
+{
+ /* u32 holds up to 71 minutes, should be enough */
+ u32 delta_us = ktime_us_delta(end->time, start->time);
+ u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr);
+ u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr,
+ start->byte_ctr);
+ u32 ncomps = BIT_GAP(BITS_PER_TYPE(u32), end->comp_ctr,
+ start->comp_ctr);
+
+ if (!delta_us)
+ return;
+
+ curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us);
+ curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us);
+ curr_stats->epms = DIV_ROUND_UP(DIM_NEVENTS * USEC_PER_MSEC,
+ delta_us);
+ curr_stats->cpms = DIV_ROUND_UP(ncomps * USEC_PER_MSEC, delta_us);
+ if (curr_stats->epms != 0)
+ curr_stats->cpe_ratio =
+ (curr_stats->cpms * 100) / curr_stats->epms;
+ else
+ curr_stats->cpe_ratio = 0;
+
+}
+EXPORT_SYMBOL(dim_calc_stats);
diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c
new file mode 100644
index 000000000000..5bcc902c5388
--- /dev/null
+++ b/lib/dim/net_dim.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <linux/dim.h>
+
+struct dim_cq_moder
+net_dim_get_rx_moderation(u8 cq_period_mode, int ix)
+{
+ struct dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix];
+
+ cq_moder.cq_period_mode = cq_period_mode;
+ return cq_moder;
+}
+EXPORT_SYMBOL(net_dim_get_rx_moderation);
+
+struct dim_cq_moder
+net_dim_get_def_rx_moderation(u8 cq_period_mode)
+{
+ u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
+ NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
+
+ return net_dim_get_rx_moderation(cq_period_mode, profile_ix);
+}
+EXPORT_SYMBOL(net_dim_get_def_rx_moderation);
+
+struct dim_cq_moder
+net_dim_get_tx_moderation(u8 cq_period_mode, int ix)
+{
+ struct dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix];
+
+ cq_moder.cq_period_mode = cq_period_mode;
+ return cq_moder;
+}
+EXPORT_SYMBOL(net_dim_get_tx_moderation);
+
+struct dim_cq_moder
+net_dim_get_def_tx_moderation(u8 cq_period_mode)
+{
+ u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
+ NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
+
+ return net_dim_get_tx_moderation(cq_period_mode, profile_ix);
+}
+EXPORT_SYMBOL(net_dim_get_def_tx_moderation);
+
+static int net_dim_step(struct dim *dim)
+{
+ if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2))
+ return DIM_TOO_TIRED;
+
+ switch (dim->tune_state) {
+ case DIM_PARKING_ON_TOP:
+ case DIM_PARKING_TIRED:
+ break;
+ case DIM_GOING_RIGHT:
+ if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1))
+ return DIM_ON_EDGE;
+ dim->profile_ix++;
+ dim->steps_right++;
+ break;
+ case DIM_GOING_LEFT:
+ if (dim->profile_ix == 0)
+ return DIM_ON_EDGE;
+ dim->profile_ix--;
+ dim->steps_left++;
+ break;
+ }
+
+ dim->tired++;
+ return DIM_STEPPED;
+}
+
+static void net_dim_exit_parking(struct dim *dim)
+{
+ dim->tune_state = dim->profile_ix ? DIM_GOING_LEFT : DIM_GOING_RIGHT;
+ net_dim_step(dim);
+}
+
+static int net_dim_stats_compare(struct dim_stats *curr,
+ struct dim_stats *prev)
+{
+ if (!prev->bpms)
+ return curr->bpms ? DIM_STATS_BETTER : DIM_STATS_SAME;
+
+ if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms))
+ return (curr->bpms > prev->bpms) ? DIM_STATS_BETTER :
+ DIM_STATS_WORSE;
+
+ if (!prev->ppms)
+ return curr->ppms ? DIM_STATS_BETTER :
+ DIM_STATS_SAME;
+
+ if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms))
+ return (curr->ppms > prev->ppms) ? DIM_STATS_BETTER :
+ DIM_STATS_WORSE;
+
+ if (!prev->epms)
+ return DIM_STATS_SAME;
+
+ if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms))
+ return (curr->epms < prev->epms) ? DIM_STATS_BETTER :
+ DIM_STATS_WORSE;
+
+ return DIM_STATS_SAME;
+}
+
+static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim)
+{
+ int prev_state = dim->tune_state;
+ int prev_ix = dim->profile_ix;
+ int stats_res;
+ int step_res;
+
+ switch (dim->tune_state) {
+ case DIM_PARKING_ON_TOP:
+ stats_res = net_dim_stats_compare(curr_stats,
+ &dim->prev_stats);
+ if (stats_res != DIM_STATS_SAME)
+ net_dim_exit_parking(dim);
+ break;
+
+ case DIM_PARKING_TIRED:
+ dim->tired--;
+ if (!dim->tired)
+ net_dim_exit_parking(dim);
+ break;
+
+ case DIM_GOING_RIGHT:
+ case DIM_GOING_LEFT:
+ stats_res = net_dim_stats_compare(curr_stats,
+ &dim->prev_stats);
+ if (stats_res != DIM_STATS_BETTER)
+ dim_turn(dim);
+
+ if (dim_on_top(dim)) {
+ dim_park_on_top(dim);
+ break;
+ }
+
+ step_res = net_dim_step(dim);
+ switch (step_res) {
+ case DIM_ON_EDGE:
+ dim_park_on_top(dim);
+ break;
+ case DIM_TOO_TIRED:
+ dim_park_tired(dim);
+ break;
+ }
+
+ break;
+ }
+
+ if (prev_state != DIM_PARKING_ON_TOP ||
+ dim->tune_state != DIM_PARKING_ON_TOP)
+ dim->prev_stats = *curr_stats;
+
+ return dim->profile_ix != prev_ix;
+}
+
+void net_dim(struct dim *dim, struct dim_sample end_sample)
+{
+ struct dim_stats curr_stats;
+ u16 nevents;
+
+ switch (dim->state) {
+ case DIM_MEASURE_IN_PROGRESS:
+ nevents = BIT_GAP(BITS_PER_TYPE(u16),
+ end_sample.event_ctr,
+ dim->start_sample.event_ctr);
+ if (nevents < DIM_NEVENTS)
+ break;
+ dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats);
+ if (net_dim_decision(&curr_stats, dim)) {
+ dim->state = DIM_APPLY_NEW_PROFILE;
+ schedule_work(&dim->work);
+ break;
+ }
+ /* fall through */
+ case DIM_START_MEASURE:
+ dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr,
+ end_sample.byte_ctr, &dim->start_sample);
+ dim->state = DIM_MEASURE_IN_PROGRESS;
+ break;
+ case DIM_APPLY_NEW_PROFILE:
+ break;
+ }
+}
+EXPORT_SYMBOL(net_dim);
diff --git a/lib/test_blackhole_dev.c b/lib/test_blackhole_dev.c
new file mode 100644
index 000000000000..4c40580a99a3
--- /dev/null
+++ b/lib/test_blackhole_dev.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This module tests the blackhole_dev that is created during the
+ * net subsystem initialization. The test this module performs is
+ * by injecting an skb into the stack with skb->dev as the
+ * blackhole_dev and expects kernel to behave in a sane manner
+ * (in other words, *not crash*)!
+ *
+ * Copyright (c) 2018, Mahesh Bandewar <[email protected]>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/udp.h>
+#include <linux/ipv6.h>
+
+#include <net/dst.h>
+
+#define SKB_SIZE 256
+#define HEAD_SIZE (14+40+8) /* Ether + IPv6 + UDP */
+#define TAIL_SIZE 32 /* random tail-room */
+
+#define UDP_PORT 1234
+
+static int __init test_blackholedev_init(void)
+{
+ struct ipv6hdr *ip6h;
+ struct sk_buff *skb;
+ struct ethhdr *ethh;
+ struct udphdr *uh;
+ int data_len;
+ int ret;
+
+ skb = alloc_skb(SKB_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ /* Reserve head-room for the headers */
+ skb_reserve(skb, HEAD_SIZE);
+
+ /* Add data to the skb */
+ data_len = SKB_SIZE - (HEAD_SIZE + TAIL_SIZE);
+ memset(__skb_put(skb, data_len), 0xf, data_len);
+
+ /* Add protocol data */
+ /* (Transport) UDP */
+ uh = (struct udphdr *)skb_push(skb, sizeof(struct udphdr));
+ skb_set_transport_header(skb, 0);
+ uh->source = uh->dest = htons(UDP_PORT);
+ uh->len = htons(data_len);
+ uh->check = 0;
+ /* (Network) IPv6 */
+ ip6h = (struct ipv6hdr *)skb_push(skb, sizeof(struct ipv6hdr));
+ skb_set_network_header(skb, 0);
+ ip6h->hop_limit = 32;
+ ip6h->payload_len = data_len + sizeof(struct udphdr);
+ ip6h->nexthdr = IPPROTO_UDP;
+ ip6h->saddr = in6addr_loopback;
+ ip6h->daddr = in6addr_loopback;
+ /* Ether */
+ ethh = (struct ethhdr *)skb_push(skb, sizeof(struct ethhdr));
+ skb_set_mac_header(skb, 0);
+
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->pkt_type = PACKET_HOST;
+ skb->dev = blackhole_netdev;
+
+ /* Now attempt to send the packet */
+ ret = dev_queue_xmit(skb);
+
+ switch (ret) {
+ case NET_XMIT_SUCCESS:
+ pr_warn("dev_queue_xmit() returned NET_XMIT_SUCCESS\n");
+ break;
+ case NET_XMIT_DROP:
+ pr_warn("dev_queue_xmit() returned NET_XMIT_DROP\n");
+ break;
+ case NET_XMIT_CN:
+ pr_warn("dev_queue_xmit() returned NET_XMIT_CN\n");
+ break;
+ default:
+ pr_err("dev_queue_xmit() returned UNKNOWN(%d)\n", ret);
+ }
+
+ return 0;
+}
+
+static void __exit test_blackholedev_exit(void)
+{
+ pr_warn("test_blackholedev module terminating.\n");
+}
+
+module_init(test_blackholedev_init);
+module_exit(test_blackholedev_exit);
+
+MODULE_AUTHOR("Mahesh Bandewar <[email protected]>");
+MODULE_LICENSE("GPL");
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index cb7d57d16c9d..37898da8ad48 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -9,12 +9,11 @@
#include "main.h"
+#include <linux/netlink.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
#include <linux/types.h>
-struct netlink_callback;
-struct seq_file;
-struct sk_buff;
-
extern char batadv_routing_algo[];
extern struct list_head batadv_hardif_list;
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 231b4aab4d8d..22672cb3e25d 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -21,6 +21,7 @@
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/seq_file.h>
+#include <linux/skbuff.h>
#include <linux/spinlock.h>
#include <linux/stddef.h>
#include <linux/types.h>
@@ -41,8 +42,6 @@
#include "netlink.h"
#include "originator.h"
-struct sk_buff;
-
static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index bb3d40f73bfe..1a29505f4f66 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -9,8 +9,8 @@
#include "main.h"
-struct sk_buff;
-struct work_struct;
+#include <linux/skbuff.h>
+#include <linux/workqueue.h>
int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface);
void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface);
diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
index 616bf2ea8755..2a50df7fc2bf 100644
--- a/net/batman-adv/bat_v_ogm.h
+++ b/net/batman-adv/bat_v_ogm.h
@@ -9,10 +9,9 @@
#include "main.h"
+#include <linux/skbuff.h>
#include <linux/types.h>
-struct sk_buff;
-
int batadv_v_ogm_init(struct batadv_priv *bat_priv);
void batadv_v_ogm_free(struct batadv_priv *bat_priv);
int batadv_v_ogm_iface_enable(struct batadv_hard_iface *hard_iface);
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 012d72c8d064..02b24a861a85 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -10,14 +10,13 @@
#include "main.h"
#include <linux/compiler.h>
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
#include <linux/stddef.h>
#include <linux/types.h>
-struct net_device;
-struct netlink_callback;
-struct seq_file;
-struct sk_buff;
-
/**
* batadv_bla_is_loopdetect_mac() - check if the mac address is from a loop
* detect frame sent by bridge loop avoidance
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index d38d70ccdd5a..38c4d8e51155 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -10,7 +10,6 @@
#include <asm/current.h>
#include <linux/dcache.h>
#include <linux/debugfs.h>
-#include <linux/err.h>
#include <linux/errno.h>
#include <linux/export.h>
#include <linux/fs.h>
@@ -293,31 +292,13 @@ static struct batadv_debuginfo *batadv_hardif_debuginfos[] = {
void batadv_debugfs_init(void)
{
struct batadv_debuginfo **bat_debug;
- struct dentry *file;
batadv_debugfs = debugfs_create_dir(BATADV_DEBUGFS_SUBDIR, NULL);
- if (batadv_debugfs == ERR_PTR(-ENODEV))
- batadv_debugfs = NULL;
-
- if (!batadv_debugfs)
- goto err;
-
- for (bat_debug = batadv_general_debuginfos; *bat_debug; ++bat_debug) {
- file = debugfs_create_file(((*bat_debug)->attr).name,
- S_IFREG | ((*bat_debug)->attr).mode,
- batadv_debugfs, NULL,
- &(*bat_debug)->fops);
- if (!file) {
- pr_err("Can't add general debugfs file: %s\n",
- ((*bat_debug)->attr).name);
- goto err;
- }
- }
- return;
-err:
- debugfs_remove_recursive(batadv_debugfs);
- batadv_debugfs = NULL;
+ for (bat_debug = batadv_general_debuginfos; *bat_debug; ++bat_debug)
+ debugfs_create_file(((*bat_debug)->attr).name,
+ S_IFREG | ((*bat_debug)->attr).mode,
+ batadv_debugfs, NULL, &(*bat_debug)->fops);
}
/**
@@ -333,42 +314,23 @@ void batadv_debugfs_destroy(void)
* batadv_debugfs_add_hardif() - creates the base directory for a hard interface
* in debugfs.
* @hard_iface: hard interface which should be added.
- *
- * Return: 0 on success or negative error number in case of failure
*/
-int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
+void batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
{
struct net *net = dev_net(hard_iface->net_dev);
struct batadv_debuginfo **bat_debug;
- struct dentry *file;
-
- if (!batadv_debugfs)
- goto out;
if (net != &init_net)
- return 0;
+ return;
hard_iface->debug_dir = debugfs_create_dir(hard_iface->net_dev->name,
batadv_debugfs);
- if (!hard_iface->debug_dir)
- goto out;
-
- for (bat_debug = batadv_hardif_debuginfos; *bat_debug; ++bat_debug) {
- file = debugfs_create_file(((*bat_debug)->attr).name,
- S_IFREG | ((*bat_debug)->attr).mode,
- hard_iface->debug_dir,
- hard_iface->net_dev,
- &(*bat_debug)->fops);
- if (!file)
- goto rem_attr;
- }
- return 0;
-rem_attr:
- debugfs_remove_recursive(hard_iface->debug_dir);
- hard_iface->debug_dir = NULL;
-out:
- return -ENOMEM;
+ for (bat_debug = batadv_hardif_debuginfos; *bat_debug; ++bat_debug)
+ debugfs_create_file(((*bat_debug)->attr).name,
+ S_IFREG | ((*bat_debug)->attr).mode,
+ hard_iface->debug_dir, hard_iface->net_dev,
+ &(*bat_debug)->fops);
}
/**
@@ -379,15 +341,12 @@ void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface)
{
const char *name = hard_iface->net_dev->name;
struct dentry *dir;
- struct dentry *d;
dir = hard_iface->debug_dir;
if (!dir)
return;
- d = debugfs_rename(dir->d_parent, dir, dir->d_parent, name);
- if (!d)
- pr_err("Can't rename debugfs dir to %s\n", name);
+ debugfs_rename(dir->d_parent, dir, dir->d_parent, name);
}
/**
@@ -419,44 +378,29 @@ int batadv_debugfs_add_meshif(struct net_device *dev)
struct batadv_priv *bat_priv = netdev_priv(dev);
struct batadv_debuginfo **bat_debug;
struct net *net = dev_net(dev);
- struct dentry *file;
-
- if (!batadv_debugfs)
- goto out;
if (net != &init_net)
return 0;
bat_priv->debug_dir = debugfs_create_dir(dev->name, batadv_debugfs);
- if (!bat_priv->debug_dir)
- goto out;
- if (batadv_socket_setup(bat_priv) < 0)
- goto rem_attr;
+ batadv_socket_setup(bat_priv);
if (batadv_debug_log_setup(bat_priv) < 0)
goto rem_attr;
- for (bat_debug = batadv_mesh_debuginfos; *bat_debug; ++bat_debug) {
- file = debugfs_create_file(((*bat_debug)->attr).name,
- S_IFREG | ((*bat_debug)->attr).mode,
- bat_priv->debug_dir,
- dev, &(*bat_debug)->fops);
- if (!file) {
- batadv_err(dev, "Can't add debugfs file: %s/%s\n",
- dev->name, ((*bat_debug)->attr).name);
- goto rem_attr;
- }
- }
+ for (bat_debug = batadv_mesh_debuginfos; *bat_debug; ++bat_debug)
+ debugfs_create_file(((*bat_debug)->attr).name,
+ S_IFREG | ((*bat_debug)->attr).mode,
+ bat_priv->debug_dir, dev,
+ &(*bat_debug)->fops);
- if (batadv_nc_init_debugfs(bat_priv) < 0)
- goto rem_attr;
+ batadv_nc_init_debugfs(bat_priv);
return 0;
rem_attr:
debugfs_remove_recursive(bat_priv->debug_dir);
bat_priv->debug_dir = NULL;
-out:
return -ENOMEM;
}
@@ -469,15 +413,12 @@ void batadv_debugfs_rename_meshif(struct net_device *dev)
struct batadv_priv *bat_priv = netdev_priv(dev);
const char *name = dev->name;
struct dentry *dir;
- struct dentry *d;
dir = bat_priv->debug_dir;
if (!dir)
return;
- d = debugfs_rename(dir->d_parent, dir, dir->d_parent, name);
- if (!d)
- pr_err("Can't rename debugfs dir to %s\n", name);
+ debugfs_rename(dir->d_parent, dir, dir->d_parent, name);
}
/**
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 7fac680cf740..1c5afd301ce9 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -9,8 +9,8 @@
#include "main.h"
-struct file;
-struct net_device;
+#include <linux/fs.h>
+#include <linux/netdevice.h>
#define BATADV_DEBUGFS_SUBDIR "batman_adv"
@@ -22,7 +22,7 @@ void batadv_debugfs_destroy(void);
int batadv_debugfs_add_meshif(struct net_device *dev);
void batadv_debugfs_rename_meshif(struct net_device *dev);
void batadv_debugfs_del_meshif(struct net_device *dev);
-int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface);
+void batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface);
void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface);
void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface);
@@ -54,9 +54,8 @@ static inline void batadv_debugfs_del_meshif(struct net_device *dev)
}
static inline
-int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
+void batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
{
- return 0;
}
static inline
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index 110c27447d70..67c7729add55 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -11,15 +11,14 @@
#include <linux/compiler.h>
#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
#include <linux/types.h>
#include <uapi/linux/batadv_packet.h>
#include "originator.h"
-struct netlink_callback;
-struct seq_file;
-struct sk_buff;
-
#ifdef CONFIG_BATMAN_ADV_DAT
/* BATADV_DAT_ADDR_MAX - maximum address value in the DHT space */
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index d6074ba2ada7..abfe8c6556de 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -11,11 +11,10 @@
#include <linux/compiler.h>
#include <linux/list.h>
+#include <linux/skbuff.h>
#include <linux/stddef.h>
#include <linux/types.h>
-struct sk_buff;
-
void batadv_frag_purge_orig(struct batadv_orig_node *orig,
bool (*check_cb)(struct batadv_frag_table_entry *));
bool batadv_frag_skb_fwd(struct sk_buff *skb,
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 0e14026feebd..0be8e7178ec7 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -9,12 +9,11 @@
#include "main.h"
+#include <linux/netlink.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
#include <linux/types.h>
-
-struct batadv_tvlv_gateway_data;
-struct netlink_callback;
-struct seq_file;
-struct sk_buff;
+#include <uapi/linux/batadv_packet.h>
void batadv_gw_check_client_stop(struct batadv_priv *bat_priv);
void batadv_gw_reselect(struct batadv_priv *bat_priv);
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index dac097f9be03..fc55750542e4 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -11,6 +11,7 @@
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
#include <linux/kernel.h>
+#include <linux/limits.h>
#include <linux/math64.h>
#include <linux/netdevice.h>
#include <linux/stddef.h>
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index 5cf50736c635..211b14b37db8 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -9,10 +9,9 @@
#include "main.h"
+#include <linux/netdevice.h>
#include <linux/types.h>
-struct net_device;
-
/**
* enum batadv_bandwidth_units - bandwidth unit types
*/
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 79d1731b8306..b5465e6e380d 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -16,6 +16,7 @@
#include <linux/if_ether.h>
#include <linux/kernel.h>
#include <linux/kref.h>
+#include <linux/limits.h>
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
@@ -920,9 +921,7 @@ batadv_hardif_add_interface(struct net_device *net_dev)
hard_iface->soft_iface = NULL;
hard_iface->if_status = BATADV_IF_NOT_IN_USE;
- ret = batadv_debugfs_add_hardif(hard_iface);
- if (ret)
- goto free_sysfs;
+ batadv_debugfs_add_hardif(hard_iface);
INIT_LIST_HEAD(&hard_iface->list);
INIT_HLIST_HEAD(&hard_iface->neigh_list);
@@ -944,8 +943,6 @@ batadv_hardif_add_interface(struct net_device *net_dev)
return hard_iface;
-free_sysfs:
- batadv_sysfs_del_hardif(&hard_iface->hardif_obj);
free_if:
kfree(hard_iface);
release_dev:
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index c8ef6aa0e865..bbb8a6f18d6b 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -11,13 +11,12 @@
#include <linux/compiler.h>
#include <linux/kref.h>
+#include <linux/netdevice.h>
#include <linux/notifier.h>
#include <linux/rcupdate.h>
#include <linux/stddef.h>
#include <linux/types.h>
-
-struct net_device;
-struct net;
+#include <net/net_namespace.h>
/**
* enum batadv_hard_if_state - State of a hard interface
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index ceef171f7f98..57877f0b78e0 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -12,13 +12,12 @@
#include <linux/atomic.h>
#include <linux/compiler.h>
#include <linux/list.h>
+#include <linux/lockdep.h>
#include <linux/rculist.h>
#include <linux/spinlock.h>
#include <linux/stddef.h>
#include <linux/types.h>
-struct lock_class_key;
-
/* callback to a compare function. should compare 2 element datas for their
* keys
*
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 0a91c8661357..0a70b66e8770 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -314,25 +314,11 @@ static const struct file_operations batadv_fops = {
/**
* batadv_socket_setup() - Create debugfs "socket" file
* @bat_priv: the bat priv with all the soft interface information
- *
- * Return: 0 on success or negative error number in case of failure
*/
-int batadv_socket_setup(struct batadv_priv *bat_priv)
+void batadv_socket_setup(struct batadv_priv *bat_priv)
{
- struct dentry *d;
-
- if (!bat_priv->debug_dir)
- goto err;
-
- d = debugfs_create_file(BATADV_ICMP_SOCKET, 0600, bat_priv->debug_dir,
- bat_priv, &batadv_fops);
- if (!d)
- goto err;
-
- return 0;
-
-err:
- return -ENOMEM;
+ debugfs_create_file(BATADV_ICMP_SOCKET, 0600, bat_priv->debug_dir,
+ bat_priv, &batadv_fops);
}
/**
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index 35eecbfd2e65..27fafff586df 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -10,12 +10,11 @@
#include "main.h"
#include <linux/types.h>
-
-struct batadv_icmp_header;
+#include <uapi/linux/batadv_packet.h>
#define BATADV_ICMP_SOCKET "socket"
-int batadv_socket_setup(struct batadv_priv *bat_priv);
+void batadv_socket_setup(struct batadv_priv *bat_priv);
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index f79ebd5b46e9..11941cf1adcc 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -190,27 +190,16 @@ static const struct file_operations batadv_log_fops = {
*/
int batadv_debug_log_setup(struct batadv_priv *bat_priv)
{
- struct dentry *d;
-
- if (!bat_priv->debug_dir)
- goto err;
-
bat_priv->debug_log = kzalloc(sizeof(*bat_priv->debug_log), GFP_ATOMIC);
if (!bat_priv->debug_log)
- goto err;
+ return -ENOMEM;
spin_lock_init(&bat_priv->debug_log->lock);
init_waitqueue_head(&bat_priv->debug_log->queue_wait);
- d = debugfs_create_file("log", 0400, bat_priv->debug_dir, bat_priv,
- &batadv_log_fops);
- if (!d)
- goto err;
-
+ debugfs_create_file("log", 0400, bat_priv->debug_dir, bat_priv,
+ &batadv_log_fops);
return 0;
-
-err:
- return -ENOMEM;
}
/**
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index 5504637e63d8..741cfa3719ff 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h
@@ -9,6 +9,7 @@
#include "main.h"
+#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/compiler.h>
#include <linux/printk.h>
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index c59afcba31e0..3d4c04d87ff3 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2019.2"
+#define BATADV_SOURCE_VERSION "2019.3"
#endif
/* B.A.T.M.A.N. parameters */
@@ -205,20 +205,20 @@ enum batadv_uev_type {
/* Kernel headers */
+#include <linux/atomic.h>
#include <linux/compiler.h>
#include <linux/etherdevice.h>
#include <linux/if_vlan.h>
#include <linux/jiffies.h>
+#include <linux/netdevice.h>
#include <linux/percpu.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
#include <linux/types.h>
#include <uapi/linux/batadv_packet.h>
#include "types.h"
-
-struct net_device;
-struct packet_type;
-struct seq_file;
-struct sk_buff;
+#include "main.h"
/**
* batadv_print_vid() - return printable version of vid information
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index ec54e236e345..67d7f83009ae 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -20,6 +20,7 @@
#include <linux/igmp.h>
#include <linux/in.h>
#include <linux/in6.h>
+#include <linux/inetdevice.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/jiffies.h>
@@ -98,69 +99,307 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
}
/**
- * batadv_mcast_addr_is_ipv4() - check if multicast MAC is IPv4
- * @addr: the MAC address to check
+ * batadv_mcast_mla_rtr_flags_softif_get_ipv4() - get mcast router flags from
+ * node for IPv4
+ * @dev: the interface to check
*
- * Return: True, if MAC address is one reserved for IPv4 multicast, false
- * otherwise.
+ * Checks the presence of an IPv4 multicast router on this node.
+ *
+ * Caller needs to hold rcu read lock.
+ *
+ * Return: BATADV_NO_FLAGS if present, BATADV_MCAST_WANT_NO_RTR4 otherwise.
*/
-static bool batadv_mcast_addr_is_ipv4(const u8 *addr)
+static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv4(struct net_device *dev)
{
- static const u8 prefix[] = {0x01, 0x00, 0x5E};
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
- return memcmp(prefix, addr, sizeof(prefix)) == 0;
+ if (in_dev && IN_DEV_MFORWARD(in_dev))
+ return BATADV_NO_FLAGS;
+ else
+ return BATADV_MCAST_WANT_NO_RTR4;
}
/**
- * batadv_mcast_addr_is_ipv6() - check if multicast MAC is IPv6
- * @addr: the MAC address to check
+ * batadv_mcast_mla_rtr_flags_softif_get_ipv6() - get mcast router flags from
+ * node for IPv6
+ * @dev: the interface to check
*
- * Return: True, if MAC address is one reserved for IPv6 multicast, false
- * otherwise.
+ * Checks the presence of an IPv6 multicast router on this node.
+ *
+ * Caller needs to hold rcu read lock.
+ *
+ * Return: BATADV_NO_FLAGS if present, BATADV_MCAST_WANT_NO_RTR6 otherwise.
*/
-static bool batadv_mcast_addr_is_ipv6(const u8 *addr)
+#if IS_ENABLED(CONFIG_IPV6_MROUTE)
+static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev)
{
- static const u8 prefix[] = {0x33, 0x33};
+ struct inet6_dev *in6_dev = __in6_dev_get(dev);
- return memcmp(prefix, addr, sizeof(prefix)) == 0;
+ if (in6_dev && in6_dev->cnf.mc_forwarding)
+ return BATADV_NO_FLAGS;
+ else
+ return BATADV_MCAST_WANT_NO_RTR6;
+}
+#else
+static inline u8
+batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev)
+{
+ return BATADV_MCAST_WANT_NO_RTR6;
}
+#endif
/**
- * batadv_mcast_mla_softif_get() - get softif multicast listeners
+ * batadv_mcast_mla_rtr_flags_softif_get() - get mcast router flags from node
+ * @bat_priv: the bat priv with all the soft interface information
+ * @bridge: bridge interface on top of the soft_iface if present,
+ * otherwise pass NULL
+ *
+ * Checks the presence of IPv4 and IPv6 multicast routers on this
+ * node.
+ *
+ * Return:
+ * BATADV_NO_FLAGS: Both an IPv4 and IPv6 multicast router is present
+ * BATADV_MCAST_WANT_NO_RTR4: No IPv4 multicast router is present
+ * BATADV_MCAST_WANT_NO_RTR6: No IPv6 multicast router is present
+ * The former two OR'd: no multicast router is present
+ */
+static u8 batadv_mcast_mla_rtr_flags_softif_get(struct batadv_priv *bat_priv,
+ struct net_device *bridge)
+{
+ struct net_device *dev = bridge ? bridge : bat_priv->soft_iface;
+ u8 flags = BATADV_NO_FLAGS;
+
+ rcu_read_lock();
+
+ flags |= batadv_mcast_mla_rtr_flags_softif_get_ipv4(dev);
+ flags |= batadv_mcast_mla_rtr_flags_softif_get_ipv6(dev);
+
+ rcu_read_unlock();
+
+ return flags;
+}
+
+/**
+ * batadv_mcast_mla_rtr_flags_bridge_get() - get mcast router flags from bridge
+ * @bat_priv: the bat priv with all the soft interface information
+ * @bridge: bridge interface on top of the soft_iface if present,
+ * otherwise pass NULL
+ *
+ * Checks the presence of IPv4 and IPv6 multicast routers behind a bridge.
+ *
+ * Return:
+ * BATADV_NO_FLAGS: Both an IPv4 and IPv6 multicast router is present
+ * BATADV_MCAST_WANT_NO_RTR4: No IPv4 multicast router is present
+ * BATADV_MCAST_WANT_NO_RTR6: No IPv6 multicast router is present
+ * The former two OR'd: no multicast router is present
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+static u8 batadv_mcast_mla_rtr_flags_bridge_get(struct batadv_priv *bat_priv,
+ struct net_device *bridge)
+{
+ struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list);
+ struct net_device *dev = bat_priv->soft_iface;
+ struct br_ip_list *br_ip_entry, *tmp;
+ u8 flags = BATADV_MCAST_WANT_NO_RTR6;
+ int ret;
+
+ if (!bridge)
+ return BATADV_MCAST_WANT_NO_RTR4 | BATADV_MCAST_WANT_NO_RTR6;
+
+ /* TODO: ask the bridge if a multicast router is present (the bridge
+ * is capable of performing proper RFC4286 multicast multicast router
+ * discovery) instead of searching for a ff02::2 listener here
+ */
+ ret = br_multicast_list_adjacent(dev, &bridge_mcast_list);
+ if (ret < 0)
+ return BATADV_NO_FLAGS;
+
+ list_for_each_entry_safe(br_ip_entry, tmp, &bridge_mcast_list, list) {
+ /* the bridge snooping does not maintain IPv4 link-local
+ * addresses - therefore we won't find any IPv4 multicast router
+ * address here, only IPv6 ones
+ */
+ if (br_ip_entry->addr.proto == htons(ETH_P_IPV6) &&
+ ipv6_addr_is_ll_all_routers(&br_ip_entry->addr.u.ip6))
+ flags &= ~BATADV_MCAST_WANT_NO_RTR6;
+
+ list_del(&br_ip_entry->list);
+ kfree(br_ip_entry);
+ }
+
+ return flags;
+}
+#else
+static inline u8
+batadv_mcast_mla_rtr_flags_bridge_get(struct batadv_priv *bat_priv,
+ struct net_device *bridge)
+{
+ if (bridge)
+ return BATADV_NO_FLAGS;
+ else
+ return BATADV_MCAST_WANT_NO_RTR4 | BATADV_MCAST_WANT_NO_RTR6;
+}
+#endif
+
+/**
+ * batadv_mcast_mla_rtr_flags_get() - get multicast router flags
+ * @bat_priv: the bat priv with all the soft interface information
+ * @bridge: bridge interface on top of the soft_iface if present,
+ * otherwise pass NULL
+ *
+ * Checks the presence of IPv4 and IPv6 multicast routers on this
+ * node or behind its bridge.
+ *
+ * Return:
+ * BATADV_NO_FLAGS: Both an IPv4 and IPv6 multicast router is present
+ * BATADV_MCAST_WANT_NO_RTR4: No IPv4 multicast router is present
+ * BATADV_MCAST_WANT_NO_RTR6: No IPv6 multicast router is present
+ * The former two OR'd: no multicast router is present
+ */
+static u8 batadv_mcast_mla_rtr_flags_get(struct batadv_priv *bat_priv,
+ struct net_device *bridge)
+{
+ u8 flags = BATADV_MCAST_WANT_NO_RTR4 | BATADV_MCAST_WANT_NO_RTR6;
+
+ flags &= batadv_mcast_mla_rtr_flags_softif_get(bat_priv, bridge);
+ flags &= batadv_mcast_mla_rtr_flags_bridge_get(bat_priv, bridge);
+
+ return flags;
+}
+
+/**
+ * batadv_mcast_mla_flags_get() - get the new multicast flags
* @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: A set of flags for the current/next TVLV, querier and
+ * bridge state.
+ */
+static struct batadv_mcast_mla_flags
+batadv_mcast_mla_flags_get(struct batadv_priv *bat_priv)
+{
+ struct net_device *dev = bat_priv->soft_iface;
+ struct batadv_mcast_querier_state *qr4, *qr6;
+ struct batadv_mcast_mla_flags mla_flags;
+ struct net_device *bridge;
+
+ bridge = batadv_mcast_get_bridge(dev);
+
+ memset(&mla_flags, 0, sizeof(mla_flags));
+ mla_flags.enabled = 1;
+ mla_flags.tvlv_flags |= batadv_mcast_mla_rtr_flags_get(bat_priv,
+ bridge);
+
+ if (!bridge)
+ return mla_flags;
+
+ dev_put(bridge);
+
+ mla_flags.bridged = 1;
+ qr4 = &mla_flags.querier_ipv4;
+ qr6 = &mla_flags.querier_ipv6;
+
+ if (!IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING))
+ pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n");
+
+ qr4->exists = br_multicast_has_querier_anywhere(dev, ETH_P_IP);
+ qr4->shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IP);
+
+ qr6->exists = br_multicast_has_querier_anywhere(dev, ETH_P_IPV6);
+ qr6->shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IPV6);
+
+ mla_flags.tvlv_flags |= BATADV_MCAST_WANT_ALL_UNSNOOPABLES;
+
+ /* 1) If no querier exists at all, then multicast listeners on
+ * our local TT clients behind the bridge will keep silent.
+ * 2) If the selected querier is on one of our local TT clients,
+ * behind the bridge, then this querier might shadow multicast
+ * listeners on our local TT clients, behind this bridge.
+ *
+ * In both cases, we will signalize other batman nodes that
+ * we need all multicast traffic of the according protocol.
+ */
+ if (!qr4->exists || qr4->shadowing) {
+ mla_flags.tvlv_flags |= BATADV_MCAST_WANT_ALL_IPV4;
+ mla_flags.tvlv_flags &= ~BATADV_MCAST_WANT_NO_RTR4;
+ }
+
+ if (!qr6->exists || qr6->shadowing) {
+ mla_flags.tvlv_flags |= BATADV_MCAST_WANT_ALL_IPV6;
+ mla_flags.tvlv_flags &= ~BATADV_MCAST_WANT_NO_RTR6;
+ }
+
+ return mla_flags;
+}
+
+/**
+ * batadv_mcast_mla_is_duplicate() - check whether an address is in a list
+ * @mcast_addr: the multicast address to check
+ * @mcast_list: the list with multicast addresses to search in
+ *
+ * Return: true if the given address is already in the given list.
+ * Otherwise returns false.
+ */
+static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr,
+ struct hlist_head *mcast_list)
+{
+ struct batadv_hw_addr *mcast_entry;
+
+ hlist_for_each_entry(mcast_entry, mcast_list, list)
+ if (batadv_compare_eth(mcast_entry->addr, mcast_addr))
+ return true;
+
+ return false;
+}
+
+/**
+ * batadv_mcast_mla_softif_get_ipv4() - get softif IPv4 multicast listeners
* @dev: the device to collect multicast addresses from
* @mcast_list: a list to put found addresses into
+ * @flags: flags indicating the new multicast state
*
- * Collects multicast addresses of multicast listeners residing
+ * Collects multicast addresses of IPv4 multicast listeners residing
* on this kernel on the given soft interface, dev, in
* the given mcast_list. In general, multicast listeners provided by
* your multicast receiving applications run directly on this node.
*
- * If there is a bridge interface on top of dev, collects from that one
- * instead. Just like with IP addresses and routes, multicast listeners
- * will(/should) register to the bridge interface instead of an
- * enslaved bat0.
- *
* Return: -ENOMEM on memory allocation error or the number of
* items added to the mcast_list otherwise.
*/
-static int batadv_mcast_mla_softif_get(struct batadv_priv *bat_priv,
- struct net_device *dev,
- struct hlist_head *mcast_list)
+static int
+batadv_mcast_mla_softif_get_ipv4(struct net_device *dev,
+ struct hlist_head *mcast_list,
+ struct batadv_mcast_mla_flags *flags)
{
- bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4;
- bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6;
- struct net_device *bridge = batadv_mcast_get_bridge(dev);
- struct netdev_hw_addr *mc_list_entry;
struct batadv_hw_addr *new;
+ struct in_device *in_dev;
+ u8 mcast_addr[ETH_ALEN];
+ struct ip_mc_list *pmc;
int ret = 0;
- netif_addr_lock_bh(bridge ? bridge : dev);
- netdev_for_each_mc_addr(mc_list_entry, bridge ? bridge : dev) {
- if (all_ipv4 && batadv_mcast_addr_is_ipv4(mc_list_entry->addr))
+ if (flags->tvlv_flags & BATADV_MCAST_WANT_ALL_IPV4)
+ return 0;
+
+ rcu_read_lock();
+
+ in_dev = __in_dev_get_rcu(dev);
+ if (!in_dev) {
+ rcu_read_unlock();
+ return 0;
+ }
+
+ for (pmc = rcu_dereference(in_dev->mc_list); pmc;
+ pmc = rcu_dereference(pmc->next_rcu)) {
+ if (flags->tvlv_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES &&
+ ipv4_is_local_multicast(pmc->multiaddr))
+ continue;
+
+ if (!(flags->tvlv_flags & BATADV_MCAST_WANT_NO_RTR4) &&
+ !ipv4_is_local_multicast(pmc->multiaddr))
continue;
- if (all_ipv6 && batadv_mcast_addr_is_ipv6(mc_list_entry->addr))
+ ip_eth_mc_map(pmc->multiaddr, mcast_addr);
+
+ if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list))
continue;
new = kmalloc(sizeof(*new), GFP_ATOMIC);
@@ -169,36 +408,142 @@ static int batadv_mcast_mla_softif_get(struct batadv_priv *bat_priv,
break;
}
- ether_addr_copy(new->addr, mc_list_entry->addr);
+ ether_addr_copy(new->addr, mcast_addr);
hlist_add_head(&new->list, mcast_list);
ret++;
}
- netif_addr_unlock_bh(bridge ? bridge : dev);
+ rcu_read_unlock();
- if (bridge)
- dev_put(bridge);
+ return ret;
+}
+
+/**
+ * batadv_mcast_mla_softif_get_ipv6() - get softif IPv6 multicast listeners
+ * @dev: the device to collect multicast addresses from
+ * @mcast_list: a list to put found addresses into
+ * @flags: flags indicating the new multicast state
+ *
+ * Collects multicast addresses of IPv6 multicast listeners residing
+ * on this kernel on the given soft interface, dev, in
+ * the given mcast_list. In general, multicast listeners provided by
+ * your multicast receiving applications run directly on this node.
+ *
+ * Return: -ENOMEM on memory allocation error or the number of
+ * items added to the mcast_list otherwise.
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+static int
+batadv_mcast_mla_softif_get_ipv6(struct net_device *dev,
+ struct hlist_head *mcast_list,
+ struct batadv_mcast_mla_flags *flags)
+{
+ struct batadv_hw_addr *new;
+ struct inet6_dev *in6_dev;
+ u8 mcast_addr[ETH_ALEN];
+ struct ifmcaddr6 *pmc6;
+ int ret = 0;
+
+ if (flags->tvlv_flags & BATADV_MCAST_WANT_ALL_IPV6)
+ return 0;
+
+ rcu_read_lock();
+
+ in6_dev = __in6_dev_get(dev);
+ if (!in6_dev) {
+ rcu_read_unlock();
+ return 0;
+ }
+
+ read_lock_bh(&in6_dev->lock);
+ for (pmc6 = in6_dev->mc_list; pmc6; pmc6 = pmc6->next) {
+ if (IPV6_ADDR_MC_SCOPE(&pmc6->mca_addr) <
+ IPV6_ADDR_SCOPE_LINKLOCAL)
+ continue;
+
+ if (flags->tvlv_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES &&
+ ipv6_addr_is_ll_all_nodes(&pmc6->mca_addr))
+ continue;
+
+ if (!(flags->tvlv_flags & BATADV_MCAST_WANT_NO_RTR6) &&
+ IPV6_ADDR_MC_SCOPE(&pmc6->mca_addr) >
+ IPV6_ADDR_SCOPE_LINKLOCAL)
+ continue;
+
+ ipv6_eth_mc_map(&pmc6->mca_addr, mcast_addr);
+
+ if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list))
+ continue;
+
+ new = kmalloc(sizeof(*new), GFP_ATOMIC);
+ if (!new) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ ether_addr_copy(new->addr, mcast_addr);
+ hlist_add_head(&new->list, mcast_list);
+ ret++;
+ }
+ read_unlock_bh(&in6_dev->lock);
+ rcu_read_unlock();
return ret;
}
+#else
+static inline int
+batadv_mcast_mla_softif_get_ipv6(struct net_device *dev,
+ struct hlist_head *mcast_list,
+ struct batadv_mcast_mla_flags *flags)
+{
+ return 0;
+}
+#endif
/**
- * batadv_mcast_mla_is_duplicate() - check whether an address is in a list
- * @mcast_addr: the multicast address to check
- * @mcast_list: the list with multicast addresses to search in
+ * batadv_mcast_mla_softif_get() - get softif multicast listeners
+ * @dev: the device to collect multicast addresses from
+ * @mcast_list: a list to put found addresses into
+ * @flags: flags indicating the new multicast state
*
- * Return: true if the given address is already in the given list.
- * Otherwise returns false.
+ * Collects multicast addresses of multicast listeners residing
+ * on this kernel on the given soft interface, dev, in
+ * the given mcast_list. In general, multicast listeners provided by
+ * your multicast receiving applications run directly on this node.
+ *
+ * If there is a bridge interface on top of dev, collects from that one
+ * instead. Just like with IP addresses and routes, multicast listeners
+ * will(/should) register to the bridge interface instead of an
+ * enslaved bat0.
+ *
+ * Return: -ENOMEM on memory allocation error or the number of
+ * items added to the mcast_list otherwise.
*/
-static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr,
- struct hlist_head *mcast_list)
+static int
+batadv_mcast_mla_softif_get(struct net_device *dev,
+ struct hlist_head *mcast_list,
+ struct batadv_mcast_mla_flags *flags)
{
- struct batadv_hw_addr *mcast_entry;
+ struct net_device *bridge = batadv_mcast_get_bridge(dev);
+ int ret4, ret6 = 0;
- hlist_for_each_entry(mcast_entry, mcast_list, list)
- if (batadv_compare_eth(mcast_entry->addr, mcast_addr))
- return true;
+ if (bridge)
+ dev = bridge;
- return false;
+ ret4 = batadv_mcast_mla_softif_get_ipv4(dev, mcast_list, flags);
+ if (ret4 < 0)
+ goto out;
+
+ ret6 = batadv_mcast_mla_softif_get_ipv6(dev, mcast_list, flags);
+ if (ret6 < 0) {
+ ret4 = 0;
+ goto out;
+ }
+
+out:
+ if (bridge)
+ dev_put(bridge);
+
+ return ret4 + ret6;
}
/**
@@ -227,9 +572,9 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
/**
* batadv_mcast_mla_bridge_get() - get bridged-in multicast listeners
- * @bat_priv: the bat priv with all the soft interface information
* @dev: a bridge slave whose bridge to collect multicast addresses from
* @mcast_list: a list to put found addresses into
+ * @flags: flags indicating the new multicast state
*
* Collects multicast addresses of multicast listeners residing
* on foreign, non-mesh devices which we gave access to our mesh via
@@ -239,14 +584,13 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
* Return: -ENOMEM on memory allocation error or the number of
* items added to the mcast_list otherwise.
*/
-static int batadv_mcast_mla_bridge_get(struct batadv_priv *bat_priv,
- struct net_device *dev,
- struct hlist_head *mcast_list)
+static int batadv_mcast_mla_bridge_get(struct net_device *dev,
+ struct hlist_head *mcast_list,
+ struct batadv_mcast_mla_flags *flags)
{
struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list);
- bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4;
- bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6;
struct br_ip_list *br_ip_entry, *tmp;
+ u8 tvlv_flags = flags->tvlv_flags;
struct batadv_hw_addr *new;
u8 mcast_addr[ETH_ALEN];
int ret;
@@ -259,11 +603,34 @@ static int batadv_mcast_mla_bridge_get(struct batadv_priv *bat_priv,
goto out;
list_for_each_entry(br_ip_entry, &bridge_mcast_list, list) {
- if (all_ipv4 && br_ip_entry->addr.proto == htons(ETH_P_IP))
- continue;
+ if (br_ip_entry->addr.proto == htons(ETH_P_IP)) {
+ if (tvlv_flags & BATADV_MCAST_WANT_ALL_IPV4)
+ continue;
- if (all_ipv6 && br_ip_entry->addr.proto == htons(ETH_P_IPV6))
- continue;
+ if (tvlv_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES &&
+ ipv4_is_local_multicast(br_ip_entry->addr.u.ip4))
+ continue;
+
+ if (!(tvlv_flags & BATADV_MCAST_WANT_NO_RTR4) &&
+ !ipv4_is_local_multicast(br_ip_entry->addr.u.ip4))
+ continue;
+ }
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (br_ip_entry->addr.proto == htons(ETH_P_IPV6)) {
+ if (tvlv_flags & BATADV_MCAST_WANT_ALL_IPV6)
+ continue;
+
+ if (tvlv_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES &&
+ ipv6_addr_is_ll_all_nodes(&br_ip_entry->addr.u.ip6))
+ continue;
+
+ if (!(tvlv_flags & BATADV_MCAST_WANT_NO_RTR6) &&
+ IPV6_ADDR_MC_SCOPE(&br_ip_entry->addr.u.ip6) >
+ IPV6_ADDR_SCOPE_LINKLOCAL)
+ continue;
+ }
+#endif
batadv_mcast_mla_br_addr_cpy(mcast_addr, &br_ip_entry->addr);
if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list))
@@ -370,27 +737,6 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_has_bridge() - check whether the soft-iface is bridged
- * @bat_priv: the bat priv with all the soft interface information
- *
- * Checks whether there is a bridge on top of our soft interface.
- *
- * Return: true if there is a bridge, false otherwise.
- */
-static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv)
-{
- struct net_device *upper = bat_priv->soft_iface;
-
- rcu_read_lock();
- do {
- upper = netdev_master_upper_dev_get_rcu(upper);
- } while (upper && !(upper->priv_flags & IFF_EBRIDGE));
- rcu_read_unlock();
-
- return upper;
-}
-
-/**
* batadv_mcast_querier_log() - debug output regarding the querier status on
* link
* @bat_priv: the bat priv with all the soft interface information
@@ -424,7 +770,7 @@ batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto,
batadv_info(bat_priv->soft_iface,
"%s Querier disappeared - multicast optimizations disabled\n",
str_proto);
- else if (!bat_priv->mcast.bridged && !new_state->exists)
+ else if (!bat_priv->mcast.mla_flags.bridged && !new_state->exists)
batadv_info(bat_priv->soft_iface,
"No %s Querier present - multicast optimizations disabled\n",
str_proto);
@@ -446,9 +792,7 @@ batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto,
* batadv_mcast_bridge_log() - debug output for topology changes in bridged
* setups
* @bat_priv: the bat priv with all the soft interface information
- * @bridged: a flag about whether the soft interface is currently bridged or not
- * @querier_ipv4: (maybe) new status of a potential, selected IGMP querier
- * @querier_ipv6: (maybe) new status of a potential, selected MLD querier
+ * @new_flags: flags indicating the new multicast state
*
* If no bridges are ever used on this node, then this function does nothing.
*
@@ -461,126 +805,86 @@ batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto,
* multicast flags this node is going to set.
*/
static void
-batadv_mcast_bridge_log(struct batadv_priv *bat_priv, bool bridged,
- struct batadv_mcast_querier_state *querier_ipv4,
- struct batadv_mcast_querier_state *querier_ipv6)
+batadv_mcast_bridge_log(struct batadv_priv *bat_priv,
+ struct batadv_mcast_mla_flags *new_flags)
{
- if (!bat_priv->mcast.bridged && bridged)
+ struct batadv_mcast_mla_flags *old_flags = &bat_priv->mcast.mla_flags;
+
+ if (!old_flags->bridged && new_flags->bridged)
batadv_dbg(BATADV_DBG_MCAST, bat_priv,
"Bridge added: Setting Unsnoopables(U)-flag\n");
- else if (bat_priv->mcast.bridged && !bridged)
+ else if (old_flags->bridged && !new_flags->bridged)
batadv_dbg(BATADV_DBG_MCAST, bat_priv,
"Bridge removed: Unsetting Unsnoopables(U)-flag\n");
- if (bridged) {
+ if (new_flags->bridged) {
batadv_mcast_querier_log(bat_priv, "IGMP",
- &bat_priv->mcast.querier_ipv4,
- querier_ipv4);
+ &old_flags->querier_ipv4,
+ &new_flags->querier_ipv4);
batadv_mcast_querier_log(bat_priv, "MLD",
- &bat_priv->mcast.querier_ipv6,
- querier_ipv6);
+ &old_flags->querier_ipv6,
+ &new_flags->querier_ipv6);
}
}
/**
* batadv_mcast_flags_logs() - output debug information about mcast flag changes
* @bat_priv: the bat priv with all the soft interface information
- * @flags: flags indicating the new multicast state
+ * @flags: TVLV flags indicating the new multicast state
*
- * Whenever the multicast flags this nodes announces changes (@mcast_flags vs.
- * bat_priv->mcast.flags), this notifies userspace via the 'mcast' log level.
+ * Whenever the multicast TVLV flags this nodes announces change this notifies
+ * userspace via the 'mcast' log level.
*/
static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags)
{
- u8 old_flags = bat_priv->mcast.flags;
- char str_old_flags[] = "[...]";
+ bool old_enabled = bat_priv->mcast.mla_flags.enabled;
+ u8 old_flags = bat_priv->mcast.mla_flags.tvlv_flags;
+ char str_old_flags[] = "[.... . ]";
- sprintf(str_old_flags, "[%c%c%c]",
+ sprintf(str_old_flags, "[%c%c%c%s%s]",
(old_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
(old_flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
- (old_flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.');
+ (old_flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.',
+ !(old_flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ",
+ !(old_flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ");
batadv_dbg(BATADV_DBG_MCAST, bat_priv,
- "Changing multicast flags from '%s' to '[%c%c%c]'\n",
- bat_priv->mcast.enabled ? str_old_flags : "<undefined>",
+ "Changing multicast flags from '%s' to '[%c%c%c%s%s]'\n",
+ old_enabled ? str_old_flags : "<undefined>",
(flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
(flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
- (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.');
+ (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.',
+ !(flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ",
+ !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ");
}
/**
- * batadv_mcast_mla_tvlv_update() - update multicast tvlv
+ * batadv_mcast_mla_flags_update() - update multicast flags
* @bat_priv: the bat priv with all the soft interface information
+ * @flags: flags indicating the new multicast state
*
* Updates the own multicast tvlv with our current multicast related settings,
* capabilities and inabilities.
- *
- * Return: false if we want all IPv4 && IPv6 multicast traffic and true
- * otherwise.
*/
-static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv)
+static void
+batadv_mcast_mla_flags_update(struct batadv_priv *bat_priv,
+ struct batadv_mcast_mla_flags *flags)
{
struct batadv_tvlv_mcast_data mcast_data;
- struct batadv_mcast_querier_state querier4 = {false, false};
- struct batadv_mcast_querier_state querier6 = {false, false};
- struct net_device *dev = bat_priv->soft_iface;
- bool bridged;
-
- mcast_data.flags = BATADV_NO_FLAGS;
- memset(mcast_data.reserved, 0, sizeof(mcast_data.reserved));
-
- bridged = batadv_mcast_has_bridge(bat_priv);
- if (!bridged)
- goto update;
-
- if (!IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING))
- pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n");
-
- querier4.exists = br_multicast_has_querier_anywhere(dev, ETH_P_IP);
- querier4.shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IP);
- querier6.exists = br_multicast_has_querier_anywhere(dev, ETH_P_IPV6);
- querier6.shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IPV6);
-
- mcast_data.flags |= BATADV_MCAST_WANT_ALL_UNSNOOPABLES;
-
- /* 1) If no querier exists at all, then multicast listeners on
- * our local TT clients behind the bridge will keep silent.
- * 2) If the selected querier is on one of our local TT clients,
- * behind the bridge, then this querier might shadow multicast
- * listeners on our local TT clients, behind this bridge.
- *
- * In both cases, we will signalize other batman nodes that
- * we need all multicast traffic of the according protocol.
- */
- if (!querier4.exists || querier4.shadowing)
- mcast_data.flags |= BATADV_MCAST_WANT_ALL_IPV4;
-
- if (!querier6.exists || querier6.shadowing)
- mcast_data.flags |= BATADV_MCAST_WANT_ALL_IPV6;
-
-update:
- batadv_mcast_bridge_log(bat_priv, bridged, &querier4, &querier6);
-
- bat_priv->mcast.querier_ipv4.exists = querier4.exists;
- bat_priv->mcast.querier_ipv4.shadowing = querier4.shadowing;
+ if (!memcmp(flags, &bat_priv->mcast.mla_flags, sizeof(*flags)))
+ return;
- bat_priv->mcast.querier_ipv6.exists = querier6.exists;
- bat_priv->mcast.querier_ipv6.shadowing = querier6.shadowing;
+ batadv_mcast_bridge_log(bat_priv, flags);
+ batadv_mcast_flags_log(bat_priv, flags->tvlv_flags);
- bat_priv->mcast.bridged = bridged;
+ mcast_data.flags = flags->tvlv_flags;
+ memset(mcast_data.reserved, 0, sizeof(mcast_data.reserved));
- if (!bat_priv->mcast.enabled ||
- mcast_data.flags != bat_priv->mcast.flags) {
- batadv_mcast_flags_log(bat_priv, mcast_data.flags);
- batadv_tvlv_container_register(bat_priv, BATADV_TVLV_MCAST, 2,
- &mcast_data, sizeof(mcast_data));
- bat_priv->mcast.flags = mcast_data.flags;
- bat_priv->mcast.enabled = true;
- }
+ batadv_tvlv_container_register(bat_priv, BATADV_TVLV_MCAST, 2,
+ &mcast_data, sizeof(mcast_data));
- return !(mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV4 &&
- mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV6);
+ bat_priv->mcast.mla_flags = *flags;
}
/**
@@ -599,22 +903,24 @@ static void __batadv_mcast_mla_update(struct batadv_priv *bat_priv)
{
struct net_device *soft_iface = bat_priv->soft_iface;
struct hlist_head mcast_list = HLIST_HEAD_INIT;
+ struct batadv_mcast_mla_flags flags;
int ret;
- if (!batadv_mcast_mla_tvlv_update(bat_priv))
- goto update;
+ flags = batadv_mcast_mla_flags_get(bat_priv);
- ret = batadv_mcast_mla_softif_get(bat_priv, soft_iface, &mcast_list);
+ ret = batadv_mcast_mla_softif_get(soft_iface, &mcast_list, &flags);
if (ret < 0)
goto out;
- ret = batadv_mcast_mla_bridge_get(bat_priv, soft_iface, &mcast_list);
+ ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list, &flags);
if (ret < 0)
goto out;
-update:
+ spin_lock(&bat_priv->mcast.mla_lock);
batadv_mcast_mla_tt_retract(bat_priv, &mcast_list);
batadv_mcast_mla_tt_add(bat_priv, &mcast_list);
+ batadv_mcast_mla_flags_update(bat_priv, &flags);
+ spin_unlock(&bat_priv->mcast.mla_lock);
out:
batadv_mcast_mla_list_free(&mcast_list);
@@ -639,10 +945,7 @@ static void batadv_mcast_mla_update(struct work_struct *work)
priv_mcast = container_of(delayed_work, struct batadv_priv_mcast, work);
bat_priv = container_of(priv_mcast, struct batadv_priv, mcast);
- spin_lock(&bat_priv->mcast.mla_lock);
__batadv_mcast_mla_update(bat_priv);
- spin_unlock(&bat_priv->mcast.mla_lock);
-
batadv_mcast_start_timer(bat_priv);
}
@@ -677,6 +980,7 @@ static bool batadv_mcast_is_report_ipv4(struct sk_buff *skb)
* @bat_priv: the bat priv with all the soft interface information
* @skb: the IPv4 packet to check
* @is_unsnoopable: stores whether the destination is snoopable
+ * @is_routable: stores whether the destination is routable
*
* Checks whether the given IPv4 packet has the potential to be forwarded with a
* mode more optimal than classic flooding.
@@ -686,7 +990,8 @@ static bool batadv_mcast_is_report_ipv4(struct sk_buff *skb)
*/
static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
struct sk_buff *skb,
- bool *is_unsnoopable)
+ bool *is_unsnoopable,
+ int *is_routable)
{
struct iphdr *iphdr;
@@ -699,16 +1004,13 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
iphdr = ip_hdr(skb);
- /* TODO: Implement Multicast Router Discovery (RFC4286),
- * then allow scope > link local, too
- */
- if (!ipv4_is_local_multicast(iphdr->daddr))
- return -EINVAL;
-
/* link-local multicast listeners behind a bridge are
* not snoopable (see RFC4541, section 2.1.2.2)
*/
- *is_unsnoopable = true;
+ if (ipv4_is_local_multicast(iphdr->daddr))
+ *is_unsnoopable = true;
+ else
+ *is_routable = ETH_P_IP;
return 0;
}
@@ -743,6 +1045,7 @@ static bool batadv_mcast_is_report_ipv6(struct sk_buff *skb)
* @bat_priv: the bat priv with all the soft interface information
* @skb: the IPv6 packet to check
* @is_unsnoopable: stores whether the destination is snoopable
+ * @is_routable: stores whether the destination is routable
*
* Checks whether the given IPv6 packet has the potential to be forwarded with a
* mode more optimal than classic flooding.
@@ -751,7 +1054,8 @@ static bool batadv_mcast_is_report_ipv6(struct sk_buff *skb)
*/
static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
struct sk_buff *skb,
- bool *is_unsnoopable)
+ bool *is_unsnoopable,
+ int *is_routable)
{
struct ipv6hdr *ip6hdr;
@@ -764,10 +1068,7 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
ip6hdr = ipv6_hdr(skb);
- /* TODO: Implement Multicast Router Discovery (RFC4286),
- * then allow scope > link local, too
- */
- if (IPV6_ADDR_MC_SCOPE(&ip6hdr->daddr) != IPV6_ADDR_SCOPE_LINKLOCAL)
+ if (IPV6_ADDR_MC_SCOPE(&ip6hdr->daddr) < IPV6_ADDR_SCOPE_LINKLOCAL)
return -EINVAL;
/* link-local-all-nodes multicast listeners behind a bridge are
@@ -775,6 +1076,8 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
*/
if (ipv6_addr_is_ll_all_nodes(&ip6hdr->daddr))
*is_unsnoopable = true;
+ else if (IPV6_ADDR_MC_SCOPE(&ip6hdr->daddr) > IPV6_ADDR_SCOPE_LINKLOCAL)
+ *is_routable = ETH_P_IPV6;
return 0;
}
@@ -784,6 +1087,7 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
* @bat_priv: the bat priv with all the soft interface information
* @skb: the multicast frame to check
* @is_unsnoopable: stores whether the destination is snoopable
+ * @is_routable: stores whether the destination is routable
*
* Checks whether the given multicast ethernet frame has the potential to be
* forwarded with a mode more optimal than classic flooding.
@@ -792,7 +1096,8 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
*/
static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv,
struct sk_buff *skb,
- bool *is_unsnoopable)
+ bool *is_unsnoopable,
+ int *is_routable)
{
struct ethhdr *ethhdr = eth_hdr(skb);
@@ -802,13 +1107,15 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv,
switch (ntohs(ethhdr->h_proto)) {
case ETH_P_IP:
return batadv_mcast_forw_mode_check_ipv4(bat_priv, skb,
- is_unsnoopable);
+ is_unsnoopable,
+ is_routable);
case ETH_P_IPV6:
if (!IS_ENABLED(CONFIG_IPV6))
return -EINVAL;
return batadv_mcast_forw_mode_check_ipv6(bat_priv, skb,
- is_unsnoopable);
+ is_unsnoopable,
+ is_routable);
default:
return -EINVAL;
}
@@ -839,6 +1146,29 @@ static int batadv_mcast_forw_want_all_ip_count(struct batadv_priv *bat_priv,
}
/**
+ * batadv_mcast_forw_rtr_count() - count nodes with a multicast router
+ * @bat_priv: the bat priv with all the soft interface information
+ * @protocol: the ethernet protocol type to count multicast routers for
+ *
+ * Return: the number of nodes which want all routable IPv4 multicast traffic
+ * if the protocol is ETH_P_IP or the number of nodes which want all routable
+ * IPv6 traffic if the protocol is ETH_P_IPV6. Otherwise returns 0.
+ */
+
+static int batadv_mcast_forw_rtr_count(struct batadv_priv *bat_priv,
+ int protocol)
+{
+ switch (protocol) {
+ case ETH_P_IP:
+ return atomic_read(&bat_priv->mcast.num_want_all_rtr4);
+ case ETH_P_IPV6:
+ return atomic_read(&bat_priv->mcast.num_want_all_rtr6);
+ default:
+ return 0;
+ }
+}
+
+/**
* batadv_mcast_forw_tt_node_get() - get a multicast tt node
* @bat_priv: the bat priv with all the soft interface information
* @ethhdr: the ether header containing the multicast destination
@@ -960,6 +1290,84 @@ batadv_mcast_forw_unsnoop_node_get(struct batadv_priv *bat_priv)
}
/**
+ * batadv_mcast_forw_rtr4_node_get() - get a node with an ipv4 mcast router flag
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: an orig_node which has the BATADV_MCAST_WANT_NO_RTR4 flag unset and
+ * increases its refcount.
+ */
+static struct batadv_orig_node *
+batadv_mcast_forw_rtr4_node_get(struct batadv_priv *bat_priv)
+{
+ struct batadv_orig_node *tmp_orig_node, *orig_node = NULL;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tmp_orig_node,
+ &bat_priv->mcast.want_all_rtr4_list,
+ mcast_want_all_rtr4_node) {
+ if (!kref_get_unless_zero(&tmp_orig_node->refcount))
+ continue;
+
+ orig_node = tmp_orig_node;
+ break;
+ }
+ rcu_read_unlock();
+
+ return orig_node;
+}
+
+/**
+ * batadv_mcast_forw_rtr6_node_get() - get a node with an ipv6 mcast router flag
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: an orig_node which has the BATADV_MCAST_WANT_NO_RTR6 flag unset
+ * and increases its refcount.
+ */
+static struct batadv_orig_node *
+batadv_mcast_forw_rtr6_node_get(struct batadv_priv *bat_priv)
+{
+ struct batadv_orig_node *tmp_orig_node, *orig_node = NULL;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tmp_orig_node,
+ &bat_priv->mcast.want_all_rtr6_list,
+ mcast_want_all_rtr6_node) {
+ if (!kref_get_unless_zero(&tmp_orig_node->refcount))
+ continue;
+
+ orig_node = tmp_orig_node;
+ break;
+ }
+ rcu_read_unlock();
+
+ return orig_node;
+}
+
+/**
+ * batadv_mcast_forw_rtr_node_get() - get a node with an ipv4/ipv6 router flag
+ * @bat_priv: the bat priv with all the soft interface information
+ * @ethhdr: an ethernet header to determine the protocol family from
+ *
+ * Return: an orig_node which has no BATADV_MCAST_WANT_NO_RTR4 or
+ * BATADV_MCAST_WANT_NO_RTR6 flag, depending on the provided ethhdr, set and
+ * increases its refcount.
+ */
+static struct batadv_orig_node *
+batadv_mcast_forw_rtr_node_get(struct batadv_priv *bat_priv,
+ struct ethhdr *ethhdr)
+{
+ switch (ntohs(ethhdr->h_proto)) {
+ case ETH_P_IP:
+ return batadv_mcast_forw_rtr4_node_get(bat_priv);
+ case ETH_P_IPV6:
+ return batadv_mcast_forw_rtr6_node_get(bat_priv);
+ default:
+ /* we shouldn't be here... */
+ return NULL;
+ }
+}
+
+/**
* batadv_mcast_forw_mode() - check on how to forward a multicast packet
* @bat_priv: the bat priv with all the soft interface information
* @skb: The multicast packet to check
@@ -977,8 +1385,11 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
bool is_unsnoopable = false;
unsigned int mcast_fanout;
struct ethhdr *ethhdr;
+ int is_routable = 0;
+ int rtr_count = 0;
- ret = batadv_mcast_forw_mode_check(bat_priv, skb, &is_unsnoopable);
+ ret = batadv_mcast_forw_mode_check(bat_priv, skb, &is_unsnoopable,
+ &is_routable);
if (ret == -ENOMEM)
return BATADV_FORW_NONE;
else if (ret < 0)
@@ -991,8 +1402,9 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
ip_count = batadv_mcast_forw_want_all_ip_count(bat_priv, ethhdr);
unsnoop_count = !is_unsnoopable ? 0 :
atomic_read(&bat_priv->mcast.num_want_all_unsnoopables);
+ rtr_count = batadv_mcast_forw_rtr_count(bat_priv, is_routable);
- total_count = tt_count + ip_count + unsnoop_count;
+ total_count = tt_count + ip_count + unsnoop_count + rtr_count;
switch (total_count) {
case 1:
@@ -1002,6 +1414,9 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
*orig = batadv_mcast_forw_ip_node_get(bat_priv, ethhdr);
else if (unsnoop_count)
*orig = batadv_mcast_forw_unsnoop_node_get(bat_priv);
+ else if (rtr_count)
+ *orig = batadv_mcast_forw_rtr_node_get(bat_priv,
+ ethhdr);
if (*orig)
return BATADV_FORW_SINGLE;
@@ -1173,6 +1588,111 @@ batadv_mcast_forw_want_all(struct batadv_priv *bat_priv,
}
/**
+ * batadv_mcast_forw_want_all_rtr4() - forward to nodes with want-all-rtr4
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to transmit
+ * @vid: the vlan identifier
+ *
+ * Sends copies of a frame with multicast destination to any node with a
+ * BATADV_MCAST_WANT_NO_RTR4 flag unset. A transmission is performed via a
+ * batman-adv unicast packet for each such destination node.
+ *
+ * Return: NET_XMIT_DROP on memory allocation failure, NET_XMIT_SUCCESS
+ * otherwise.
+ */
+static int
+batadv_mcast_forw_want_all_rtr4(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, unsigned short vid)
+{
+ struct batadv_orig_node *orig_node;
+ int ret = NET_XMIT_SUCCESS;
+ struct sk_buff *newskb;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(orig_node,
+ &bat_priv->mcast.want_all_rtr4_list,
+ mcast_want_all_rtr4_node) {
+ newskb = skb_copy(skb, GFP_ATOMIC);
+ if (!newskb) {
+ ret = NET_XMIT_DROP;
+ break;
+ }
+
+ batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0,
+ orig_node, vid);
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+/**
+ * batadv_mcast_forw_want_all_rtr6() - forward to nodes with want-all-rtr6
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: The multicast packet to transmit
+ * @vid: the vlan identifier
+ *
+ * Sends copies of a frame with multicast destination to any node with a
+ * BATADV_MCAST_WANT_NO_RTR6 flag unset. A transmission is performed via a
+ * batman-adv unicast packet for each such destination node.
+ *
+ * Return: NET_XMIT_DROP on memory allocation failure, NET_XMIT_SUCCESS
+ * otherwise.
+ */
+static int
+batadv_mcast_forw_want_all_rtr6(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, unsigned short vid)
+{
+ struct batadv_orig_node *orig_node;
+ int ret = NET_XMIT_SUCCESS;
+ struct sk_buff *newskb;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(orig_node,
+ &bat_priv->mcast.want_all_rtr6_list,
+ mcast_want_all_rtr6_node) {
+ newskb = skb_copy(skb, GFP_ATOMIC);
+ if (!newskb) {
+ ret = NET_XMIT_DROP;
+ break;
+ }
+
+ batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0,
+ orig_node, vid);
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+/**
+ * batadv_mcast_forw_want_rtr() - forward packet to nodes in a want-all-rtr list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to transmit
+ * @vid: the vlan identifier
+ *
+ * Sends copies of a frame with multicast destination to any node with a
+ * BATADV_MCAST_WANT_NO_RTR4 or BATADV_MCAST_WANT_NO_RTR6 flag unset. A
+ * transmission is performed via a batman-adv unicast packet for each such
+ * destination node.
+ *
+ * Return: NET_XMIT_DROP on memory allocation failure or if the protocol family
+ * is neither IPv4 nor IPv6. NET_XMIT_SUCCESS otherwise.
+ */
+static int
+batadv_mcast_forw_want_rtr(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, unsigned short vid)
+{
+ switch (ntohs(eth_hdr(skb)->h_proto)) {
+ case ETH_P_IP:
+ return batadv_mcast_forw_want_all_rtr4(bat_priv, skb, vid);
+ case ETH_P_IPV6:
+ return batadv_mcast_forw_want_all_rtr6(bat_priv, skb, vid);
+ default:
+ /* we shouldn't be here... */
+ return NET_XMIT_DROP;
+ }
+}
+
+/**
* batadv_mcast_forw_send() - send packet to any detected multicast recpient
* @bat_priv: the bat priv with all the soft interface information
* @skb: the multicast packet to transmit
@@ -1205,6 +1725,12 @@ int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
return ret;
}
+ ret = batadv_mcast_forw_want_rtr(bat_priv, skb, vid);
+ if (ret != NET_XMIT_SUCCESS) {
+ kfree_skb(skb);
+ return ret;
+ }
+
consume_skb(skb);
return ret;
}
@@ -1345,6 +1871,127 @@ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv,
}
/**
+ * batadv_mcast_want_rtr4_update() - update want-all-rtr4 counter and list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig: the orig_node which multicast state might have changed of
+ * @mcast_flags: flags indicating the new multicast state
+ *
+ * If the BATADV_MCAST_WANT_NO_RTR4 flag of this originator, orig, has
+ * toggled then this method updates counter and list accordingly.
+ *
+ * Caller needs to hold orig->mcast_handler_lock.
+ */
+static void batadv_mcast_want_rtr4_update(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ u8 mcast_flags)
+{
+ struct hlist_node *node = &orig->mcast_want_all_rtr4_node;
+ struct hlist_head *head = &bat_priv->mcast.want_all_rtr4_list;
+
+ lockdep_assert_held(&orig->mcast_handler_lock);
+
+ /* switched from flag set to unset */
+ if (!(mcast_flags & BATADV_MCAST_WANT_NO_RTR4) &&
+ orig->mcast_flags & BATADV_MCAST_WANT_NO_RTR4) {
+ atomic_inc(&bat_priv->mcast.num_want_all_rtr4);
+
+ spin_lock_bh(&bat_priv->mcast.want_lists_lock);
+ /* flag checks above + mcast_handler_lock prevents this */
+ WARN_ON(!hlist_unhashed(node));
+
+ hlist_add_head_rcu(node, head);
+ spin_unlock_bh(&bat_priv->mcast.want_lists_lock);
+ /* switched from flag unset to set */
+ } else if (mcast_flags & BATADV_MCAST_WANT_NO_RTR4 &&
+ !(orig->mcast_flags & BATADV_MCAST_WANT_NO_RTR4)) {
+ atomic_dec(&bat_priv->mcast.num_want_all_rtr4);
+
+ spin_lock_bh(&bat_priv->mcast.want_lists_lock);
+ /* flag checks above + mcast_handler_lock prevents this */
+ WARN_ON(hlist_unhashed(node));
+
+ hlist_del_init_rcu(node);
+ spin_unlock_bh(&bat_priv->mcast.want_lists_lock);
+ }
+}
+
+/**
+ * batadv_mcast_want_rtr6_update() - update want-all-rtr6 counter and list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig: the orig_node which multicast state might have changed of
+ * @mcast_flags: flags indicating the new multicast state
+ *
+ * If the BATADV_MCAST_WANT_NO_RTR6 flag of this originator, orig, has
+ * toggled then this method updates counter and list accordingly.
+ *
+ * Caller needs to hold orig->mcast_handler_lock.
+ */
+static void batadv_mcast_want_rtr6_update(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ u8 mcast_flags)
+{
+ struct hlist_node *node = &orig->mcast_want_all_rtr6_node;
+ struct hlist_head *head = &bat_priv->mcast.want_all_rtr6_list;
+
+ lockdep_assert_held(&orig->mcast_handler_lock);
+
+ /* switched from flag set to unset */
+ if (!(mcast_flags & BATADV_MCAST_WANT_NO_RTR6) &&
+ orig->mcast_flags & BATADV_MCAST_WANT_NO_RTR6) {
+ atomic_inc(&bat_priv->mcast.num_want_all_rtr6);
+
+ spin_lock_bh(&bat_priv->mcast.want_lists_lock);
+ /* flag checks above + mcast_handler_lock prevents this */
+ WARN_ON(!hlist_unhashed(node));
+
+ hlist_add_head_rcu(node, head);
+ spin_unlock_bh(&bat_priv->mcast.want_lists_lock);
+ /* switched from flag unset to set */
+ } else if (mcast_flags & BATADV_MCAST_WANT_NO_RTR6 &&
+ !(orig->mcast_flags & BATADV_MCAST_WANT_NO_RTR6)) {
+ atomic_dec(&bat_priv->mcast.num_want_all_rtr6);
+
+ spin_lock_bh(&bat_priv->mcast.want_lists_lock);
+ /* flag checks above + mcast_handler_lock prevents this */
+ WARN_ON(hlist_unhashed(node));
+
+ hlist_del_init_rcu(node);
+ spin_unlock_bh(&bat_priv->mcast.want_lists_lock);
+ }
+}
+
+/**
+ * batadv_mcast_tvlv_flags_get() - get multicast flags from an OGM TVLV
+ * @enabled: whether the originator has multicast TVLV support enabled
+ * @tvlv_value: tvlv buffer containing the multicast flags
+ * @tvlv_value_len: tvlv buffer length
+ *
+ * Return: multicast flags for the given tvlv buffer
+ */
+static u8
+batadv_mcast_tvlv_flags_get(bool enabled, void *tvlv_value, u16 tvlv_value_len)
+{
+ u8 mcast_flags = BATADV_NO_FLAGS;
+
+ if (enabled && tvlv_value && tvlv_value_len >= sizeof(mcast_flags))
+ mcast_flags = *(u8 *)tvlv_value;
+
+ if (!enabled) {
+ mcast_flags |= BATADV_MCAST_WANT_ALL_IPV4;
+ mcast_flags |= BATADV_MCAST_WANT_ALL_IPV6;
+ }
+
+ /* remove redundant flags to avoid sending duplicate packets later */
+ if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV4)
+ mcast_flags |= BATADV_MCAST_WANT_NO_RTR4;
+
+ if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV6)
+ mcast_flags |= BATADV_MCAST_WANT_NO_RTR6;
+
+ return mcast_flags;
+}
+
+/**
* batadv_mcast_tvlv_ogm_handler() - process incoming multicast tvlv container
* @bat_priv: the bat priv with all the soft interface information
* @orig: the orig_node of the ogm
@@ -1359,16 +2006,10 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
u16 tvlv_value_len)
{
bool orig_mcast_enabled = !(flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
- u8 mcast_flags = BATADV_NO_FLAGS;
+ u8 mcast_flags;
- if (orig_mcast_enabled && tvlv_value &&
- tvlv_value_len >= sizeof(mcast_flags))
- mcast_flags = *(u8 *)tvlv_value;
-
- if (!orig_mcast_enabled) {
- mcast_flags |= BATADV_MCAST_WANT_ALL_IPV4;
- mcast_flags |= BATADV_MCAST_WANT_ALL_IPV6;
- }
+ mcast_flags = batadv_mcast_tvlv_flags_get(orig_mcast_enabled,
+ tvlv_value, tvlv_value_len);
spin_lock_bh(&orig->mcast_handler_lock);
@@ -1385,6 +2026,8 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
batadv_mcast_want_unsnoop_update(bat_priv, orig, mcast_flags);
batadv_mcast_want_ipv4_update(bat_priv, orig, mcast_flags);
batadv_mcast_want_ipv6_update(bat_priv, orig, mcast_flags);
+ batadv_mcast_want_rtr4_update(bat_priv, orig, mcast_flags);
+ batadv_mcast_want_rtr6_update(bat_priv, orig, mcast_flags);
orig->mcast_flags = mcast_flags;
spin_unlock_bh(&orig->mcast_handler_lock);
@@ -1417,15 +2060,16 @@ void batadv_mcast_init(struct batadv_priv *bat_priv)
static void batadv_mcast_flags_print_header(struct batadv_priv *bat_priv,
struct seq_file *seq)
{
- u8 flags = bat_priv->mcast.flags;
+ struct batadv_mcast_mla_flags *mla_flags = &bat_priv->mcast.mla_flags;
char querier4, querier6, shadowing4, shadowing6;
- bool bridged = bat_priv->mcast.bridged;
+ bool bridged = mla_flags->bridged;
+ u8 flags = mla_flags->tvlv_flags;
if (bridged) {
- querier4 = bat_priv->mcast.querier_ipv4.exists ? '.' : '4';
- querier6 = bat_priv->mcast.querier_ipv6.exists ? '.' : '6';
- shadowing4 = bat_priv->mcast.querier_ipv4.shadowing ? '4' : '.';
- shadowing6 = bat_priv->mcast.querier_ipv6.shadowing ? '6' : '.';
+ querier4 = mla_flags->querier_ipv4.exists ? '.' : '4';
+ querier6 = mla_flags->querier_ipv6.exists ? '.' : '6';
+ shadowing4 = mla_flags->querier_ipv4.shadowing ? '4' : '.';
+ shadowing6 = mla_flags->querier_ipv6.shadowing ? '6' : '.';
} else {
querier4 = '?';
querier6 = '?';
@@ -1433,10 +2077,12 @@ static void batadv_mcast_flags_print_header(struct batadv_priv *bat_priv,
shadowing6 = '?';
}
- seq_printf(seq, "Multicast flags (own flags: [%c%c%c])\n",
+ seq_printf(seq, "Multicast flags (own flags: [%c%c%c%s%s])\n",
(flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
(flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
- (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.');
+ (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.',
+ !(flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ",
+ !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ");
seq_printf(seq, "* Bridged [U]\t\t\t\t%c\n", bridged ? 'U' : '.');
seq_printf(seq, "* No IGMP/MLD Querier [4/6]:\t\t%c/%c\n",
querier4, querier6);
@@ -1490,13 +2136,17 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset)
flags = orig_node->mcast_flags;
- seq_printf(seq, "%pM [%c%c%c]\n", orig_node->orig,
+ seq_printf(seq, "%pM [%c%c%c%s%s]\n", orig_node->orig,
(flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES)
? 'U' : '.',
(flags & BATADV_MCAST_WANT_ALL_IPV4)
? '4' : '.',
(flags & BATADV_MCAST_WANT_ALL_IPV6)
- ? '6' : '.');
+ ? '6' : '.',
+ !(flags & BATADV_MCAST_WANT_NO_RTR4)
+ ? "R4" : ". ",
+ !(flags & BATADV_MCAST_WANT_NO_RTR6)
+ ? "R6" : ". ");
}
rcu_read_unlock();
}
@@ -1517,19 +2167,19 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset)
int batadv_mcast_mesh_info_put(struct sk_buff *msg,
struct batadv_priv *bat_priv)
{
- u32 flags = bat_priv->mcast.flags;
+ u32 flags = bat_priv->mcast.mla_flags.tvlv_flags;
u32 flags_priv = BATADV_NO_FLAGS;
- if (bat_priv->mcast.bridged) {
+ if (bat_priv->mcast.mla_flags.bridged) {
flags_priv |= BATADV_MCAST_FLAGS_BRIDGED;
- if (bat_priv->mcast.querier_ipv4.exists)
+ if (bat_priv->mcast.mla_flags.querier_ipv4.exists)
flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS;
- if (bat_priv->mcast.querier_ipv6.exists)
+ if (bat_priv->mcast.mla_flags.querier_ipv6.exists)
flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS;
- if (bat_priv->mcast.querier_ipv4.shadowing)
+ if (bat_priv->mcast.mla_flags.querier_ipv4.shadowing)
flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING;
- if (bat_priv->mcast.querier_ipv6.shadowing)
+ if (bat_priv->mcast.mla_flags.querier_ipv6.shadowing)
flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING;
}
@@ -1770,6 +2420,8 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig)
batadv_mcast_want_unsnoop_update(bat_priv, orig, BATADV_NO_FLAGS);
batadv_mcast_want_ipv4_update(bat_priv, orig, BATADV_NO_FLAGS);
batadv_mcast_want_ipv6_update(bat_priv, orig, BATADV_NO_FLAGS);
+ batadv_mcast_want_rtr4_update(bat_priv, orig, BATADV_NO_FLAGS);
+ batadv_mcast_want_rtr6_update(bat_priv, orig, BATADV_NO_FLAGS);
spin_unlock_bh(&orig->mcast_handler_lock);
}
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 653b9b76fabe..5d9e2bb29c97 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -9,9 +9,9 @@
#include "main.h"
-struct netlink_callback;
-struct seq_file;
-struct sk_buff;
+#include <linux/netlink.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
/**
* enum batadv_forw_mode - the way a packet should be forwarded as
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index a67720fad46c..6f08fd122a8d 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -21,6 +21,7 @@
#include <linux/if_vlan.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/limits.h>
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
@@ -30,6 +31,7 @@
#include <linux/stddef.h>
#include <linux/types.h>
#include <net/genetlink.h>
+#include <net/net_namespace.h>
#include <net/netlink.h>
#include <net/sock.h>
#include <uapi/linux/batadv_packet.h>
@@ -49,8 +51,6 @@
#include "tp_meter.h"
#include "translation-table.h"
-struct net;
-
struct genl_family batadv_netlink_family;
/* multicast groups */
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index d1e0681b8743..ddc674e47dbb 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -9,11 +9,10 @@
#include "main.h"
+#include <linux/netlink.h>
#include <linux/types.h>
#include <net/genetlink.h>
-struct nlmsghdr;
-
void batadv_netlink_register(void);
void batadv_netlink_unregister(void);
int batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype);
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index c5e7906045f3..580609389f0f 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1951,34 +1951,19 @@ out:
/**
* batadv_nc_init_debugfs() - create nc folder and related files in debugfs
* @bat_priv: the bat priv with all the soft interface information
- *
- * Return: 0 on success or negative error number in case of failure
*/
-int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
+void batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
{
- struct dentry *nc_dir, *file;
+ struct dentry *nc_dir;
nc_dir = debugfs_create_dir("nc", bat_priv->debug_dir);
- if (!nc_dir)
- goto out;
- file = debugfs_create_u8("min_tq", 0644, nc_dir, &bat_priv->nc.min_tq);
- if (!file)
- goto out;
+ debugfs_create_u8("min_tq", 0644, nc_dir, &bat_priv->nc.min_tq);
- file = debugfs_create_u32("max_fwd_delay", 0644, nc_dir,
- &bat_priv->nc.max_fwd_delay);
- if (!file)
- goto out;
+ debugfs_create_u32("max_fwd_delay", 0644, nc_dir,
+ &bat_priv->nc.max_fwd_delay);
- file = debugfs_create_u32("max_buffer_time", 0644, nc_dir,
- &bat_priv->nc.max_buffer_time);
- if (!file)
- goto out;
-
- return 0;
-
-out:
- return -ENOMEM;
+ debugfs_create_u32("max_buffer_time", 0644, nc_dir,
+ &bat_priv->nc.max_buffer_time);
}
#endif
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index 74f56113a5d0..753fa49723cf 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -9,12 +9,11 @@
#include "main.h"
+#include <linux/netdevice.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
#include <linux/types.h>
-
-struct batadv_ogm_packet;
-struct net_device;
-struct seq_file;
-struct sk_buff;
+#include <uapi/linux/batadv_packet.h>
#ifdef CONFIG_BATMAN_ADV_NC
@@ -40,7 +39,7 @@ void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
struct sk_buff *skb);
int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset);
-int batadv_nc_init_debugfs(struct batadv_priv *bat_priv);
+void batadv_nc_init_debugfs(struct batadv_priv *bat_priv);
#else /* ifdef CONFIG_BATMAN_ADV_NC */
@@ -111,9 +110,8 @@ static inline int batadv_nc_nodes_seq_print_text(struct seq_file *seq,
return 0;
}
-static inline int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
+static inline void batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
{
- return 0;
}
#endif /* ifdef CONFIG_BATMAN_ADV_NC */
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 45db798a7297..38613487fb1b 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -27,6 +27,7 @@
#include <linux/stddef.h>
#include <linux/workqueue.h>
#include <net/sock.h>
+#include <uapi/linux/batadv_packet.h>
#include <uapi/linux/batman_adv.h>
#include "bat_algo.h"
@@ -1043,7 +1044,8 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
orig_node->bcast_seqno_reset = reset_time;
#ifdef CONFIG_BATMAN_ADV_MCAST
- orig_node->mcast_flags = BATADV_NO_FLAGS;
+ orig_node->mcast_flags = BATADV_MCAST_WANT_NO_RTR4;
+ orig_node->mcast_flags |= BATADV_MCAST_WANT_NO_RTR6;
INIT_HLIST_NODE(&orig_node->mcast_want_all_unsnoopables_node);
INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv4_node);
INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv6_node);
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 3829e26f9c5d..512a1f99dd75 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -12,12 +12,11 @@
#include <linux/compiler.h>
#include <linux/if_ether.h>
#include <linux/jhash.h>
+#include <linux/netlink.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
#include <linux/types.h>
-struct netlink_callback;
-struct seq_file;
-struct sk_buff;
-
bool batadv_compare_orig(const struct hlist_node *node, const void *data2);
int batadv_originator_init(struct batadv_priv *bat_priv);
void batadv_originator_free(struct batadv_priv *bat_priv);
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index b96c6d06d188..c20feac95107 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -9,10 +9,9 @@
#include "main.h"
+#include <linux/skbuff.h>
#include <linux/types.h>
-struct sk_buff;
-
bool batadv_check_management_packet(struct sk_buff *skb,
struct batadv_hard_iface *hard_iface,
int header_len);
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 5921ee4e107c..5fc0fd1e5d08 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -10,12 +10,11 @@
#include "main.h"
#include <linux/compiler.h>
+#include <linux/skbuff.h>
#include <linux/spinlock.h>
#include <linux/types.h>
#include <uapi/linux/batadv_packet.h>
-struct sk_buff;
-
void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet,
bool dropped);
struct batadv_forw_packet *
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index a7677e1d000f..c7a2e77ca1da 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -24,6 +24,7 @@
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
+#include <linux/netlink.h>
#include <linux/percpu.h>
#include <linux/printk.h>
#include <linux/random.h>
@@ -803,11 +804,6 @@ static int batadv_softif_init_late(struct net_device *dev)
atomic_set(&bat_priv->distributed_arp_table, 1);
#endif
#ifdef CONFIG_BATMAN_ADV_MCAST
- bat_priv->mcast.querier_ipv4.exists = false;
- bat_priv->mcast.querier_ipv4.shadowing = false;
- bat_priv->mcast.querier_ipv6.exists = false;
- bat_priv->mcast.querier_ipv6.shadowing = false;
- bat_priv->mcast.flags = BATADV_NO_FLAGS;
atomic_set(&bat_priv->multicast_mode, 1);
atomic_set(&bat_priv->multicast_fanout, 16);
atomic_set(&bat_priv->mcast.num_want_all_unsnoopables, 0);
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 275442a7acb6..29139ad769fe 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -9,13 +9,12 @@
#include "main.h"
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
#include <linux/types.h>
+#include <net/net_namespace.h>
#include <net/rtnetlink.h>
-struct net_device;
-struct net;
-struct sk_buff;
-
int batadv_skb_head_push(struct sk_buff *skb, unsigned int len);
void batadv_interface_rx(struct net_device *soft_iface,
struct sk_buff *skb, int hdr_size,
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 80fc3253c336..1efcb97039cd 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -18,6 +18,7 @@
#include <linux/kernel.h>
#include <linux/kobject.h>
#include <linux/kref.h>
+#include <linux/limits.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
#include <linux/rculist.h>
diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index 83fa808b1871..5e466093dfa5 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h
@@ -9,12 +9,11 @@
#include "main.h"
+#include <linux/kobject.h>
+#include <linux/netdevice.h>
#include <linux/sysfs.h>
#include <linux/types.h>
-struct kobject;
-struct net_device;
-
#define BATADV_SYSFS_IF_MESH_SUBDIR "mesh"
#define BATADV_SYSFS_IF_BAT_SUBDIR "batman_adv"
/**
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 820392146249..dd6a9a40dbb9 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -21,6 +21,7 @@
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/kthread.h>
+#include <linux/limits.h>
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/param.h>
diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h
index 604b3799c972..78d310da0ad3 100644
--- a/net/batman-adv/tp_meter.h
+++ b/net/batman-adv/tp_meter.h
@@ -9,10 +9,9 @@
#include "main.h"
+#include <linux/skbuff.h>
#include <linux/types.h>
-struct sk_buff;
-
void batadv_tp_meter_init(void);
void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
u32 test_length, u32 *cookie);
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index c8c48d62a430..4a98860d7f0e 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -9,13 +9,12 @@
#include "main.h"
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
#include <linux/types.h>
-struct netlink_callback;
-struct net_device;
-struct seq_file;
-struct sk_buff;
-
int batadv_tt_init(struct batadv_priv *bat_priv);
bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
unsigned short vid, int ifindex, u32 mark);
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
index 114ac01e06af..36985000a0a8 100644
--- a/net/batman-adv/tvlv.h
+++ b/net/batman-adv/tvlv.h
@@ -10,8 +10,7 @@
#include "main.h"
#include <linux/types.h>
-
-struct batadv_ogm_packet;
+#include <uapi/linux/batadv_packet.h>
void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
u8 type, u8 version,
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 74b644738a36..c2996296b953 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -14,20 +14,22 @@
#include <linux/average.h>
#include <linux/bitops.h>
#include <linux/compiler.h>
+#include <linux/if.h>
#include <linux/if_ether.h>
#include <linux/kref.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/sched.h> /* for linux/wait.h */
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
#include <linux/spinlock.h>
+#include <linux/timer.h>
#include <linux/types.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
#include <uapi/linux/batadv_packet.h>
#include <uapi/linux/batman_adv.h>
-struct seq_file;
-
#ifdef CONFIG_BATMAN_ADV_DAT
/**
@@ -402,6 +404,17 @@ struct batadv_orig_node {
* list
*/
struct hlist_node mcast_want_all_ipv6_node;
+
+ /**
+ * @mcast_want_all_rtr4_node: a list node for the mcast.want_all_rtr4
+ * list
+ */
+ struct hlist_node mcast_want_all_rtr4_node;
+ /**
+ * @mcast_want_all_rtr6_node: a list node for the mcast.want_all_rtr6
+ * list
+ */
+ struct hlist_node mcast_want_all_rtr6_node;
#endif
/** @capabilities: announced capabilities of this originator */
@@ -1169,6 +1182,26 @@ struct batadv_mcast_querier_state {
};
/**
+ * struct batadv_mcast_mla_flags - flags for the querier, bridge and tvlv state
+ */
+struct batadv_mcast_mla_flags {
+ /** @querier_ipv4: the current state of an IGMP querier in the mesh */
+ struct batadv_mcast_querier_state querier_ipv4;
+
+ /** @querier_ipv6: the current state of an MLD querier in the mesh */
+ struct batadv_mcast_querier_state querier_ipv6;
+
+ /** @enabled: whether the multicast tvlv is currently enabled */
+ unsigned char enabled:1;
+
+ /** @bridged: whether the soft interface has a bridge on top */
+ unsigned char bridged:1;
+
+ /** @tvlv_flags: the flags we have last sent in our mcast tvlv */
+ u8 tvlv_flags;
+};
+
+/**
* struct batadv_priv_mcast - per mesh interface mcast data
*/
struct batadv_priv_mcast {
@@ -1196,20 +1229,22 @@ struct batadv_priv_mcast {
*/
struct hlist_head want_all_ipv6_list;
- /** @querier_ipv4: the current state of an IGMP querier in the mesh */
- struct batadv_mcast_querier_state querier_ipv4;
-
- /** @querier_ipv6: the current state of an MLD querier in the mesh */
- struct batadv_mcast_querier_state querier_ipv6;
-
- /** @flags: the flags we have last sent in our mcast tvlv */
- u8 flags;
+ /**
+ * @want_all_rtr4_list: a list of orig_nodes wanting all routable IPv4
+ * multicast traffic
+ */
+ struct hlist_head want_all_rtr4_list;
- /** @enabled: whether the multicast tvlv is currently enabled */
- unsigned char enabled:1;
+ /**
+ * @want_all_rtr6_list: a list of orig_nodes wanting all routable IPv6
+ * multicast traffic
+ */
+ struct hlist_head want_all_rtr6_list;
- /** @bridged: whether the soft interface has a bridge on top */
- unsigned char bridged:1;
+ /**
+ * @mla_flags: flags for the querier, bridge and tvlv state
+ */
+ struct batadv_mcast_mla_flags mla_flags;
/**
* @mla_lock: a lock protecting mla_list and mla_flags
@@ -1228,6 +1263,12 @@ struct batadv_priv_mcast {
/** @num_want_all_ipv6: counter for items in want_all_ipv6_list */
atomic_t num_want_all_ipv6;
+ /** @num_want_all_rtr4: counter for items in want_all_rtr4_list */
+ atomic_t num_want_all_rtr4;
+
+ /** @num_want_all_rtr6: counter for items in want_all_rtr6_list */
+ atomic_t num_want_all_rtr6;
+
/**
* @want_lists_lock: lock for protecting modifications to mcasts
* want_all_{unsnoopables,ipv4,ipv6}_list (traversals are rcu-locked)
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 19d27bee285e..1555b0c6f7ec 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -160,10 +160,10 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev,
struct in6_addr *daddr,
struct sk_buff *skb)
{
- struct lowpan_peer *peer;
- struct in6_addr *nexthop;
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
int count = atomic_read(&dev->peer_count);
+ const struct in6_addr *nexthop;
+ struct lowpan_peer *peer;
BT_DBG("peers %d addr %pI6c rt %p", count, daddr, rt);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 3cf0764d5793..15d1cb5aee18 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1276,14 +1276,6 @@ int hci_conn_check_link_mode(struct hci_conn *conn)
!test_bit(HCI_CONN_ENCRYPT, &conn->flags))
return 0;
- /* The minimum encryption key size needs to be enforced by the
- * host stack before establishing any L2CAP connections. The
- * specification in theory allows a minimum of 1, but to align
- * BR/EDR and LE transports, a minimum of 7 is chosen.
- */
- if (conn->enc_key_size < HCI_MIN_ENC_KEY_SIZE)
- return 0;
-
return 1;
}
@@ -1400,8 +1392,16 @@ auth:
return 0;
encrypt:
- if (test_bit(HCI_CONN_ENCRYPT, &conn->flags))
+ if (test_bit(HCI_CONN_ENCRYPT, &conn->flags)) {
+ /* Ensure that the encryption key size has been read,
+ * otherwise stall the upper layer responses.
+ */
+ if (!conn->enc_key_size)
+ return 0;
+
+ /* Nothing else needed, all requirements are met */
return 1;
+ }
hci_conn_encrypt(conn);
return 0;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index b53acd6c9a3d..9f77432dbe38 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1341,6 +1341,21 @@ static void l2cap_request_info(struct l2cap_conn *conn)
sizeof(req), &req);
}
+static bool l2cap_check_enc_key_size(struct hci_conn *hcon)
+{
+ /* The minimum encryption key size needs to be enforced by the
+ * host stack before establishing any L2CAP connections. The
+ * specification in theory allows a minimum of 1, but to align
+ * BR/EDR and LE transports, a minimum of 7 is chosen.
+ *
+ * This check might also be called for unencrypted connections
+ * that have no key size requirements. Ensure that the link is
+ * actually encrypted before enforcing a key size.
+ */
+ return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) ||
+ hcon->enc_key_size > HCI_MIN_ENC_KEY_SIZE);
+}
+
static void l2cap_do_start(struct l2cap_chan *chan)
{
struct l2cap_conn *conn = chan->conn;
@@ -1358,9 +1373,14 @@ static void l2cap_do_start(struct l2cap_chan *chan)
if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE))
return;
- if (l2cap_chan_check_security(chan, true) &&
- __l2cap_no_conn_pending(chan))
+ if (!l2cap_chan_check_security(chan, true) ||
+ !__l2cap_no_conn_pending(chan))
+ return;
+
+ if (l2cap_check_enc_key_size(conn->hcon))
l2cap_start_connection(chan);
+ else
+ __set_chan_timer(chan, L2CAP_DISC_TIMEOUT);
}
static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
@@ -1439,7 +1459,10 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
continue;
}
- l2cap_start_connection(chan);
+ if (l2cap_check_enc_key_size(conn->hcon))
+ l2cap_start_connection(chan);
+ else
+ l2cap_chan_close(chan, ECONNREFUSED);
} else if (chan->state == BT_CONNECT2) {
struct l2cap_conn_rsp rsp;
@@ -7490,7 +7513,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
}
if (chan->state == BT_CONNECT) {
- if (!status)
+ if (!status && l2cap_check_enc_key_size(hcon))
l2cap_start_connection(chan);
else
__set_chan_timer(chan, L2CAP_DISC_TIMEOUT);
@@ -7499,7 +7522,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
struct l2cap_conn_rsp rsp;
__u16 res, stat;
- if (!status) {
+ if (!status && l2cap_check_enc_key_size(hcon)) {
if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) {
res = L2CAP_CR_PEND;
stat = L2CAP_CS_AUTHOR_PEND;
diff --git a/net/core/dev.c b/net/core/dev.c
index d6edd218babd..58529318b3a9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4689,9 +4689,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
__skb_push(skb, skb->mac_len);
skb_do_redirect(skb);
return NULL;
- case TC_ACT_REINSERT:
- /* this does not scrub the packet, and updates stats on error */
- skb_tc_reinsert(skb, &cl_res);
+ case TC_ACT_CONSUMED:
return NULL;
default:
break;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 4baf716e535e..89c533778135 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1549,7 +1549,8 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
u32 seq, int flags)
{
const struct devlink_ops *ops = devlink->ops;
- u8 inline_mode, encap_mode;
+ enum devlink_eswitch_encap_mode encap_mode;
+ u8 inline_mode;
void *hdr;
int err = 0;
u16 mode;
@@ -1625,7 +1626,8 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
{
struct devlink *devlink = info->user_ptr[0];
const struct devlink_ops *ops = devlink->ops;
- u8 inline_mode, encap_mode;
+ enum devlink_eswitch_encap_mode encap_mode;
+ u8 inline_mode;
int err = 0;
u16 mode;
diff --git a/net/core/dst.c b/net/core/dst.c
index e46366228eaf..1325316d9eab 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -160,7 +160,7 @@ void dst_dev_put(struct dst_entry *dst)
dst->ops->ifdown(dst, dev, true);
dst->input = dst_discard;
dst->output = dst_discard_out;
- dst->dev = dev_net(dst->dev)->loopback_dev;
+ dst->dev = blackhole_netdev;
dev_hold(dst->dev);
dev_put(dev);
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 4d1011b2e24f..6288e69e94fc 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -2883,6 +2883,30 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
match->mask.basic.n_proto = htons(0xffff);
switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) {
+ case ETHER_FLOW: {
+ const struct ethhdr *ether_spec, *ether_m_spec;
+
+ ether_spec = &fs->h_u.ether_spec;
+ ether_m_spec = &fs->m_u.ether_spec;
+
+ if (!is_zero_ether_addr(ether_m_spec->h_source)) {
+ ether_addr_copy(match->key.eth_addrs.src,
+ ether_spec->h_source);
+ ether_addr_copy(match->mask.eth_addrs.src,
+ ether_m_spec->h_source);
+ }
+ if (!is_zero_ether_addr(ether_m_spec->h_dest)) {
+ ether_addr_copy(match->key.eth_addrs.dst,
+ ether_spec->h_dest);
+ ether_addr_copy(match->mask.eth_addrs.dst,
+ ether_m_spec->h_dest);
+ }
+ if (ether_m_spec->h_proto) {
+ match->key.basic.n_proto = ether_spec->h_proto;
+ match->mask.basic.n_proto = ether_m_spec->h_proto;
+ }
+ }
+ break;
case TCP_V4_FLOW:
case UDP_V4_FLOW: {
const struct ethtool_tcpip4_spec *v4_spec, *v4_m_spec;
diff --git a/net/core/filter.c b/net/core/filter.c
index 2014d76e0d2a..089aaea0ccc6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2158,8 +2158,8 @@ BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
if (unlikely(flags & ~(BPF_F_INGRESS)))
return TC_ACT_SHOT;
- ri->ifindex = ifindex;
ri->flags = flags;
+ ri->tgt_index = ifindex;
return TC_ACT_REDIRECT;
}
@@ -2169,8 +2169,8 @@ int skb_do_redirect(struct sk_buff *skb)
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct net_device *dev;
- dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
- ri->ifindex = 0;
+ dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->tgt_index);
+ ri->tgt_index = 0;
if (unlikely(!dev)) {
kfree_skb(skb);
return -EINVAL;
@@ -3488,11 +3488,11 @@ xdp_do_redirect_slow(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog, struct bpf_redirect_info *ri)
{
struct net_device *fwd;
- u32 index = ri->ifindex;
+ u32 index = ri->tgt_index;
int err;
fwd = dev_get_by_index_rcu(dev_net(dev), index);
- ri->ifindex = 0;
+ ri->tgt_index = 0;
if (unlikely(!fwd)) {
err = -EINVAL;
goto err;
@@ -3523,7 +3523,6 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
err = dev_map_enqueue(dst, xdp, dev_rx);
if (unlikely(err))
return err;
- __dev_map_insert_ctx(map, index);
break;
}
case BPF_MAP_TYPE_CPUMAP: {
@@ -3532,7 +3531,6 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
err = cpu_map_enqueue(rcpu, xdp, dev_rx);
if (unlikely(err))
return err;
- __cpu_map_insert_ctx(map, index);
break;
}
case BPF_MAP_TYPE_XSKMAP: {
@@ -3606,18 +3604,14 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog, struct bpf_map *map,
struct bpf_redirect_info *ri)
{
- u32 index = ri->ifindex;
- void *fwd = NULL;
+ u32 index = ri->tgt_index;
+ void *fwd = ri->tgt_value;
int err;
- ri->ifindex = 0;
+ ri->tgt_index = 0;
+ ri->tgt_value = NULL;
WRITE_ONCE(ri->map, NULL);
- fwd = __xdp_map_lookup_elem(map, index);
- if (unlikely(!fwd)) {
- err = -EINVAL;
- goto err;
- }
if (ri->map_to_flush && unlikely(ri->map_to_flush != map))
xdp_do_flush_map();
@@ -3653,19 +3647,14 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
struct bpf_map *map)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
- u32 index = ri->ifindex;
- void *fwd = NULL;
+ u32 index = ri->tgt_index;
+ void *fwd = ri->tgt_value;
int err = 0;
- ri->ifindex = 0;
+ ri->tgt_index = 0;
+ ri->tgt_value = NULL;
WRITE_ONCE(ri->map, NULL);
- fwd = __xdp_map_lookup_elem(map, index);
- if (unlikely(!fwd)) {
- err = -EINVAL;
- goto err;
- }
-
if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
struct bpf_dtab_netdev *dst = fwd;
@@ -3697,14 +3686,14 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct bpf_map *map = READ_ONCE(ri->map);
- u32 index = ri->ifindex;
+ u32 index = ri->tgt_index;
struct net_device *fwd;
int err = 0;
if (map)
return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog,
map);
- ri->ifindex = 0;
+ ri->tgt_index = 0;
fwd = dev_get_by_index_rcu(dev_net(dev), index);
if (unlikely(!fwd)) {
err = -EINVAL;
@@ -3732,8 +3721,9 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
if (unlikely(flags))
return XDP_ABORTED;
- ri->ifindex = ifindex;
ri->flags = flags;
+ ri->tgt_index = ifindex;
+ ri->tgt_value = NULL;
WRITE_ONCE(ri->map, NULL);
return XDP_REDIRECT;
@@ -3752,11 +3742,23 @@ BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
- if (unlikely(flags))
+ /* Lower bits of the flags are used as return code on lookup failure */
+ if (unlikely(flags > XDP_TX))
return XDP_ABORTED;
- ri->ifindex = ifindex;
+ ri->tgt_value = __xdp_map_lookup_elem(map, ifindex);
+ if (unlikely(!ri->tgt_value)) {
+ /* If the lookup fails we want to clear out the state in the
+ * redirect_info struct completely, so that if an eBPF program
+ * performs multiple lookups, the last one always takes
+ * precedence.
+ */
+ WRITE_ONCE(ri->map, NULL);
+ return flags;
+ }
+
ri->flags = flags;
+ ri->tgt_index = ifindex;
WRITE_ONCE(ri->map, map);
return XDP_REDIRECT;
@@ -5192,54 +5194,6 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
};
#endif /* CONFIG_IPV6_SEG6_BPF */
-#define CONVERT_COMMON_TCP_SOCK_FIELDS(md_type, CONVERT) \
-do { \
- switch (si->off) { \
- case offsetof(md_type, snd_cwnd): \
- CONVERT(snd_cwnd); break; \
- case offsetof(md_type, srtt_us): \
- CONVERT(srtt_us); break; \
- case offsetof(md_type, snd_ssthresh): \
- CONVERT(snd_ssthresh); break; \
- case offsetof(md_type, rcv_nxt): \
- CONVERT(rcv_nxt); break; \
- case offsetof(md_type, snd_nxt): \
- CONVERT(snd_nxt); break; \
- case offsetof(md_type, snd_una): \
- CONVERT(snd_una); break; \
- case offsetof(md_type, mss_cache): \
- CONVERT(mss_cache); break; \
- case offsetof(md_type, ecn_flags): \
- CONVERT(ecn_flags); break; \
- case offsetof(md_type, rate_delivered): \
- CONVERT(rate_delivered); break; \
- case offsetof(md_type, rate_interval_us): \
- CONVERT(rate_interval_us); break; \
- case offsetof(md_type, packets_out): \
- CONVERT(packets_out); break; \
- case offsetof(md_type, retrans_out): \
- CONVERT(retrans_out); break; \
- case offsetof(md_type, total_retrans): \
- CONVERT(total_retrans); break; \
- case offsetof(md_type, segs_in): \
- CONVERT(segs_in); break; \
- case offsetof(md_type, data_segs_in): \
- CONVERT(data_segs_in); break; \
- case offsetof(md_type, segs_out): \
- CONVERT(segs_out); break; \
- case offsetof(md_type, data_segs_out): \
- CONVERT(data_segs_out); break; \
- case offsetof(md_type, lost_out): \
- CONVERT(lost_out); break; \
- case offsetof(md_type, sacked_out): \
- CONVERT(sacked_out); break; \
- case offsetof(md_type, bytes_received): \
- CONVERT(bytes_received); break; \
- case offsetof(md_type, bytes_acked): \
- CONVERT(bytes_acked); break; \
- } \
-} while (0)
-
#ifdef CONFIG_INET
static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
int dif, int sdif, u8 family, u8 proto)
@@ -5590,7 +5544,8 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
- if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, bytes_acked))
+ if (off < 0 || off >= offsetofend(struct bpf_tcp_sock,
+ icsk_retransmits))
return false;
if (off % size != 0)
@@ -5621,8 +5576,19 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
offsetof(struct tcp_sock, FIELD)); \
} while (0)
- CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_tcp_sock,
- BPF_TCP_SOCK_GET_COMMON);
+#define BPF_INET_SOCK_GET_COMMON(FIELD) \
+ do { \
+ BUILD_BUG_ON(FIELD_SIZEOF(struct inet_connection_sock, \
+ FIELD) > \
+ FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
+ struct inet_connection_sock, \
+ FIELD), \
+ si->dst_reg, si->src_reg, \
+ offsetof( \
+ struct inet_connection_sock, \
+ FIELD)); \
+ } while (0)
if (insn > insn_buf)
return insn - insn_buf;
@@ -5638,6 +5604,81 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
offsetof(struct tcp_sock, rtt_min) +
offsetof(struct minmax_sample, v));
break;
+ case offsetof(struct bpf_tcp_sock, snd_cwnd):
+ BPF_TCP_SOCK_GET_COMMON(snd_cwnd);
+ break;
+ case offsetof(struct bpf_tcp_sock, srtt_us):
+ BPF_TCP_SOCK_GET_COMMON(srtt_us);
+ break;
+ case offsetof(struct bpf_tcp_sock, snd_ssthresh):
+ BPF_TCP_SOCK_GET_COMMON(snd_ssthresh);
+ break;
+ case offsetof(struct bpf_tcp_sock, rcv_nxt):
+ BPF_TCP_SOCK_GET_COMMON(rcv_nxt);
+ break;
+ case offsetof(struct bpf_tcp_sock, snd_nxt):
+ BPF_TCP_SOCK_GET_COMMON(snd_nxt);
+ break;
+ case offsetof(struct bpf_tcp_sock, snd_una):
+ BPF_TCP_SOCK_GET_COMMON(snd_una);
+ break;
+ case offsetof(struct bpf_tcp_sock, mss_cache):
+ BPF_TCP_SOCK_GET_COMMON(mss_cache);
+ break;
+ case offsetof(struct bpf_tcp_sock, ecn_flags):
+ BPF_TCP_SOCK_GET_COMMON(ecn_flags);
+ break;
+ case offsetof(struct bpf_tcp_sock, rate_delivered):
+ BPF_TCP_SOCK_GET_COMMON(rate_delivered);
+ break;
+ case offsetof(struct bpf_tcp_sock, rate_interval_us):
+ BPF_TCP_SOCK_GET_COMMON(rate_interval_us);
+ break;
+ case offsetof(struct bpf_tcp_sock, packets_out):
+ BPF_TCP_SOCK_GET_COMMON(packets_out);
+ break;
+ case offsetof(struct bpf_tcp_sock, retrans_out):
+ BPF_TCP_SOCK_GET_COMMON(retrans_out);
+ break;
+ case offsetof(struct bpf_tcp_sock, total_retrans):
+ BPF_TCP_SOCK_GET_COMMON(total_retrans);
+ break;
+ case offsetof(struct bpf_tcp_sock, segs_in):
+ BPF_TCP_SOCK_GET_COMMON(segs_in);
+ break;
+ case offsetof(struct bpf_tcp_sock, data_segs_in):
+ BPF_TCP_SOCK_GET_COMMON(data_segs_in);
+ break;
+ case offsetof(struct bpf_tcp_sock, segs_out):
+ BPF_TCP_SOCK_GET_COMMON(segs_out);
+ break;
+ case offsetof(struct bpf_tcp_sock, data_segs_out):
+ BPF_TCP_SOCK_GET_COMMON(data_segs_out);
+ break;
+ case offsetof(struct bpf_tcp_sock, lost_out):
+ BPF_TCP_SOCK_GET_COMMON(lost_out);
+ break;
+ case offsetof(struct bpf_tcp_sock, sacked_out):
+ BPF_TCP_SOCK_GET_COMMON(sacked_out);
+ break;
+ case offsetof(struct bpf_tcp_sock, bytes_received):
+ BPF_TCP_SOCK_GET_COMMON(bytes_received);
+ break;
+ case offsetof(struct bpf_tcp_sock, bytes_acked):
+ BPF_TCP_SOCK_GET_COMMON(bytes_acked);
+ break;
+ case offsetof(struct bpf_tcp_sock, dsack_dups):
+ BPF_TCP_SOCK_GET_COMMON(dsack_dups);
+ break;
+ case offsetof(struct bpf_tcp_sock, delivered):
+ BPF_TCP_SOCK_GET_COMMON(delivered);
+ break;
+ case offsetof(struct bpf_tcp_sock, delivered_ce):
+ BPF_TCP_SOCK_GET_COMMON(delivered_ce);
+ break;
+ case offsetof(struct bpf_tcp_sock, icsk_retransmits):
+ BPF_INET_SOCK_GET_COMMON(icsk_retransmits);
+ break;
}
return insn - insn_buf;
@@ -5651,7 +5692,7 @@ BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
return (unsigned long)NULL;
}
-static const struct bpf_func_proto bpf_tcp_sock_proto = {
+const struct bpf_func_proto bpf_tcp_sock_proto = {
.func = bpf_tcp_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL,
@@ -7911,9 +7952,6 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
} while (0)
- CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_sock_ops,
- SOCK_OPS_GET_TCP_SOCK_FIELD);
-
if (insn > insn_buf)
return insn - insn_buf;
@@ -8083,6 +8121,69 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
struct sock, type);
break;
+ case offsetof(struct bpf_sock_ops, snd_cwnd):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(snd_cwnd);
+ break;
+ case offsetof(struct bpf_sock_ops, srtt_us):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(srtt_us);
+ break;
+ case offsetof(struct bpf_sock_ops, snd_ssthresh):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(snd_ssthresh);
+ break;
+ case offsetof(struct bpf_sock_ops, rcv_nxt):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(rcv_nxt);
+ break;
+ case offsetof(struct bpf_sock_ops, snd_nxt):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(snd_nxt);
+ break;
+ case offsetof(struct bpf_sock_ops, snd_una):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(snd_una);
+ break;
+ case offsetof(struct bpf_sock_ops, mss_cache):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(mss_cache);
+ break;
+ case offsetof(struct bpf_sock_ops, ecn_flags):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(ecn_flags);
+ break;
+ case offsetof(struct bpf_sock_ops, rate_delivered):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(rate_delivered);
+ break;
+ case offsetof(struct bpf_sock_ops, rate_interval_us):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(rate_interval_us);
+ break;
+ case offsetof(struct bpf_sock_ops, packets_out):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(packets_out);
+ break;
+ case offsetof(struct bpf_sock_ops, retrans_out):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(retrans_out);
+ break;
+ case offsetof(struct bpf_sock_ops, total_retrans):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(total_retrans);
+ break;
+ case offsetof(struct bpf_sock_ops, segs_in):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(segs_in);
+ break;
+ case offsetof(struct bpf_sock_ops, data_segs_in):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_in);
+ break;
+ case offsetof(struct bpf_sock_ops, segs_out):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(segs_out);
+ break;
+ case offsetof(struct bpf_sock_ops, data_segs_out):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_out);
+ break;
+ case offsetof(struct bpf_sock_ops, lost_out):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(lost_out);
+ break;
+ case offsetof(struct bpf_sock_ops, sacked_out):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(sacked_out);
+ break;
+ case offsetof(struct bpf_sock_ops, bytes_received):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_received);
+ break;
+ case offsetof(struct bpf_sock_ops, bytes_acked):
+ SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_acked);
+ break;
case offsetof(struct bpf_sock_ops, sk):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct bpf_sock_ops_kern,
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 04fdc9535772..f153e0601838 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -163,9 +163,16 @@ static void linkwatch_do_dev(struct net_device *dev)
static void __linkwatch_run_queue(int urgent_only)
{
+#define MAX_DO_DEV_PER_LOOP 100
+
+ int do_dev = MAX_DO_DEV_PER_LOOP;
struct net_device *dev;
LIST_HEAD(wrk);
+ /* Give urgent case more budget */
+ if (urgent_only)
+ do_dev += MAX_DO_DEV_PER_LOOP;
+
/*
* Limit the number of linkwatch events to one
* per second so that a runaway driver does not
@@ -184,7 +191,7 @@ static void __linkwatch_run_queue(int urgent_only)
spin_lock_irq(&lweventlist_lock);
list_splice_init(&lweventlist, &wrk);
- while (!list_empty(&wrk)) {
+ while (!list_empty(&wrk) && do_dev > 0) {
dev = list_first_entry(&wrk, struct net_device, link_watch_list);
list_del_init(&dev->link_watch_list);
@@ -195,9 +202,13 @@ static void __linkwatch_run_queue(int urgent_only)
}
spin_unlock_irq(&lweventlist_lock);
linkwatch_do_dev(dev);
+ do_dev--;
spin_lock_irq(&lweventlist_lock);
}
+ /* Add the remaining work back to lweventlist */
+ list_splice_init(&wrk, &lweventlist);
+
if (!list_empty(&lweventlist))
linkwatch_schedule_work(0);
spin_unlock_irq(&lweventlist_lock);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index b29d7b513a18..829377cc83db 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -85,7 +85,7 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
kfree(xa);
}
-bool __mem_id_disconnect(int id, bool force)
+static bool __mem_id_disconnect(int id, bool force)
{
struct xdp_mem_allocator *xa;
bool safe_to_remove = true;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 52bdb881a506..ed2301ef872e 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -784,10 +784,8 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
}
EXPORT_SYMBOL(inet_getname);
-int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+int inet_send_prepare(struct sock *sk)
{
- struct sock *sk = sock->sk;
-
sock_rps_record_flow(sk);
/* We may need to bind the socket. */
@@ -795,7 +793,19 @@ int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
inet_autobind(sk))
return -EAGAIN;
- return sk->sk_prot->sendmsg(sk, msg, size);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(inet_send_prepare);
+
+int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+{
+ struct sock *sk = sock->sk;
+
+ if (unlikely(inet_send_prepare(sk)))
+ return -EAGAIN;
+
+ return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udp_sendmsg,
+ sk, msg, size);
}
EXPORT_SYMBOL(inet_sendmsg);
@@ -804,11 +814,7 @@ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
{
struct sock *sk = sock->sk;
- sock_rps_record_flow(sk);
-
- /* We may need to bind the socket. */
- if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
- inet_autobind(sk))
+ if (unlikely(inet_send_prepare(sk)))
return -EAGAIN;
if (sk->sk_prot->sendpage)
@@ -817,6 +823,8 @@ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
}
EXPORT_SYMBOL(inet_sendpage);
+INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *, struct msghdr *,
+ size_t, int, int, int *));
int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
@@ -827,8 +835,9 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
if (likely(!(flags & MSG_ERRQUEUE)))
sock_rps_record_flow(sk);
- err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
- flags & ~MSG_DONTWAIT, &addr_len);
+ err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg,
+ sk, msg, size, flags & MSG_DONTWAIT,
+ flags & ~MSG_DONTWAIT, &addr_len);
if (err >= 0)
msg->msg_namelen = addr_len;
return err;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 9c3afd550612..974179b3b314 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -590,8 +590,7 @@ static void __exit ah4_fini(void)
{
if (xfrm4_protocol_deregister(&ah4_protocol, IPPROTO_AH) < 0)
pr_info("%s: can't remove protocol\n", __func__);
- if (xfrm_unregister_type(&ah_type, AF_INET) < 0)
- pr_info("%s: can't remove xfrm type\n", __func__);
+ xfrm_unregister_type(&ah_type, AF_INET);
}
module_init(ah4_init);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 7874303220c5..137d1892395d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -428,8 +428,9 @@ no_promotions:
if (prev_prom) {
struct in_ifaddr *last_sec;
- last_sec = rtnl_dereference(last_prim->ifa_next);
rcu_assign_pointer(prev_prom->ifa_next, next_sec);
+
+ last_sec = rtnl_dereference(last_prim->ifa_next);
rcu_assign_pointer(promote->ifa_next, last_sec);
rcu_assign_pointer(last_prim->ifa_next, promote);
}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b9ae95576084..5c967764041f 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -33,8 +33,6 @@ struct esp_output_extra {
#define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
-static u32 esp4_get_mtu(struct xfrm_state *x, int mtu);
-
/*
* Allocate an AEAD request structure with extra space for SG and IV.
*
@@ -506,7 +504,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
u32 padto;
- padto = min(x->tfcpad, esp4_get_mtu(x, dst->child_mtu_cached));
+ padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached));
if (skb->len < padto)
esp.tfclen = padto - skb->len;
}
@@ -788,28 +786,6 @@ out:
return err;
}
-static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
-{
- struct crypto_aead *aead = x->data;
- u32 blksize = ALIGN(crypto_aead_blocksize(aead), 4);
- unsigned int net_adj;
-
- switch (x->props.mode) {
- case XFRM_MODE_TRANSPORT:
- case XFRM_MODE_BEET:
- net_adj = sizeof(struct iphdr);
- break;
- case XFRM_MODE_TUNNEL:
- net_adj = 0;
- break;
- default:
- BUG();
- }
-
- return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
- net_adj) & ~(blksize - 1)) + net_adj - 2;
-}
-
static int esp4_err(struct sk_buff *skb, u32 info)
{
struct net *net = dev_net(skb->dev);
@@ -1035,7 +1011,6 @@ static const struct xfrm_type esp_type =
.flags = XFRM_TYPE_REPLAY_PROT,
.init_state = esp_init_state,
.destructor = esp_destroy,
- .get_mtu = esp4_get_mtu,
.input = esp_input,
.output = esp_output,
};
@@ -1066,8 +1041,7 @@ static void __exit esp4_fini(void)
{
if (xfrm4_protocol_deregister(&esp4_protocol, IPPROTO_ESP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
- if (xfrm_unregister_type(&esp_type, AF_INET) < 0)
- pr_info("%s: can't remove xfrm type\n", __func__);
+ xfrm_unregister_type(&esp_type, AF_INET);
}
module_init(esp4_init);
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 2e5e377f50a1..0e4a7cf6bc87 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -312,9 +312,7 @@ static int __init esp4_offload_init(void)
static void __exit esp4_offload_exit(void)
{
- if (xfrm_unregister_type_offload(&esp_type_offload, AF_INET) < 0)
- pr_info("%s: can't remove xfrm type offload\n", __func__);
-
+ xfrm_unregister_type_offload(&esp_type_offload, AF_INET);
inet_del_offload(&esp4_offload, IPPROTO_ESP);
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 4400f5051977..2b2b3d291ab0 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2126,14 +2126,20 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
goto next;
}
- if (filter->dump_routes && !s_fa) {
- err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, RTM_NEWROUTE,
- tb->tb_id, fa->fa_type,
- xkey, KEYLENGTH - fa->fa_slen,
- fa->fa_tos, fi, flags);
- if (err < 0)
- goto stop;
+ if (filter->dump_routes) {
+ if (!s_fa) {
+ err = fib_dump_info(skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWROUTE,
+ tb->tb_id, fa->fa_type,
+ xkey,
+ KEYLENGTH - fa->fa_slen,
+ fa->fa_tos, fi, flags);
+ if (err < 0)
+ goto stop;
+ }
+
i_fa++;
}
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 293acfb36376..44bfeecac33e 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -83,7 +83,7 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
options = (__be32 *)(greh + 1);
if (greh->flags & GRE_CSUM) {
if (!skb_checksum_simple_validate(skb)) {
- skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
+ skb_checksum_try_convert(skb, IPPROTO_GRE,
null_compute_pseudo);
} else if (csum_err) {
*csum_err = true;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index cdd6c3418b9e..cc7ef0d05bbd 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -327,18 +327,35 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
static int ip_mc_finish_output(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
- int ret;
+ struct rtable *new_rt;
+ bool do_cn = false;
+ int ret, err;
ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
switch (ret) {
- case NET_XMIT_SUCCESS:
- return dev_loopback_xmit(net, sk, skb);
case NET_XMIT_CN:
- return dev_loopback_xmit(net, sk, skb) ? : ret;
+ do_cn = true;
+ /* fall through */
+ case NET_XMIT_SUCCESS:
+ break;
default:
kfree_skb(skb);
return ret;
}
+
+ /* Reset rt_iif so that inet_iif() will return skb->skb_iif. Setting
+ * this to non-zero causes ipi_ifindex in in_pktinfo to be overwritten,
+ * see ipv4_pktinfo_prepare().
+ */
+ new_rt = rt_dst_clone(net->loopback_dev, skb_rtable(skb));
+ if (new_rt) {
+ new_rt->rt_iif = 0;
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &new_rt->dst);
+ }
+
+ err = dev_loopback_xmit(net, sk, skb);
+ return (do_cn && err) ? ret : err;
}
int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 2f4cdcc13d53..59bfa3825810 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -186,8 +186,7 @@ static void __exit ipcomp4_fini(void)
{
if (xfrm4_protocol_deregister(&ipcomp4_protocol, IPPROTO_COMP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
- if (xfrm_unregister_type(&ipcomp_type, AF_INET) < 0)
- pr_info("%s: can't remove xfrm type\n", __func__);
+ xfrm_unregister_type(&ipcomp_type, AF_INET);
}
module_init(ipcomp4_init);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 0b8e06ca75d6..40a6abbc9cf6 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -197,7 +197,7 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
}
sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
iph->saddr, iph->daddr,
- skb->dev->ifindex, sdif);
+ dif, sdif);
}
out:
read_unlock(&raw_v4_hashinfo.lock);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 59670fafcd26..dc1f510a7c81 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1532,7 +1532,6 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
void rt_flush_dev(struct net_device *dev)
{
- struct net *net = dev_net(dev);
struct rtable *rt;
int cpu;
@@ -1543,7 +1542,7 @@ void rt_flush_dev(struct net_device *dev)
list_for_each_entry(rt, &ul->head, rt_uncached) {
if (rt->dst.dev != dev)
continue;
- rt->dst.dev = net->loopback_dev;
+ rt->dst.dev = blackhole_netdev;
dev_hold(rt->dst.dev);
dev_put(dev);
}
@@ -1648,6 +1647,39 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
}
EXPORT_SYMBOL(rt_dst_alloc);
+struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
+{
+ struct rtable *new_rt;
+
+ new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
+ rt->dst.flags);
+
+ if (new_rt) {
+ new_rt->rt_genid = rt_genid_ipv4(dev_net(dev));
+ new_rt->rt_flags = rt->rt_flags;
+ new_rt->rt_type = rt->rt_type;
+ new_rt->rt_is_input = rt->rt_is_input;
+ new_rt->rt_iif = rt->rt_iif;
+ new_rt->rt_pmtu = rt->rt_pmtu;
+ new_rt->rt_mtu_locked = rt->rt_mtu_locked;
+ new_rt->rt_gw_family = rt->rt_gw_family;
+ if (rt->rt_gw_family == AF_INET)
+ new_rt->rt_gw4 = rt->rt_gw4;
+ else if (rt->rt_gw_family == AF_INET6)
+ new_rt->rt_gw6 = rt->rt_gw6;
+ INIT_LIST_HEAD(&new_rt->rt_uncached);
+
+ new_rt->dst.flags |= DST_HOST;
+ new_rt->dst.input = rt->dst.input;
+ new_rt->dst.output = rt->dst.output;
+ new_rt->dst.error = rt->dst.error;
+ new_rt->dst.lastuse = jiffies;
+ new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate);
+ }
+ return new_rt;
+}
+EXPORT_SYMBOL(rt_dst_clone);
+
/* called in rcu_read_lock() section */
int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev,
@@ -3293,9 +3325,11 @@ static struct ctl_table ipv4_route_table[] = {
{ }
};
+static const char ipv4_route_flush_procname[] = "flush";
+
static struct ctl_table ipv4_route_flush_table[] = {
{
- .procname = "flush",
+ .procname = ipv4_route_flush_procname,
.maxlen = sizeof(int),
.mode = 0200,
.proc_handler = ipv4_sysctl_rtcache_flush,
@@ -3313,9 +3347,11 @@ static __net_init int sysctl_route_net_init(struct net *net)
if (!tbl)
goto err_dup;
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
- tbl[0].procname = NULL;
+ /* Don't export non-whitelisted sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns) {
+ if (tbl[0].procname != ipv4_route_flush_procname)
+ tbl[0].procname = NULL;
+ }
}
tbl[0].extra1 = net;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b71efeb0ae5b..c21e8a22fb3b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -778,6 +778,8 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
tp->rtt_seq = tp->snd_nxt;
tp->mdev_max_us = tcp_rto_min_us(sk);
+
+ tcp_bpf_rtt(sk);
}
} else {
/* no previous measure. */
@@ -786,6 +788,8 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
tp->mdev_max_us = tp->rttvar_us;
tp->rtt_seq = tp->snd_nxt;
+
+ tcp_bpf_rtt(sk);
}
tp->srtt_us = max(1U, srtt);
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1b971bd95786..c21862ba9c02 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2224,8 +2224,7 @@ static int udp_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb,
int ret;
if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
- skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
- inet_compute_pseudo);
+ skb_checksum_try_convert(skb, IPPROTO_UDP, inet_compute_pseudo);
ret = udp_queue_rcv_skb(sk, skb);
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 80c40b4981bb..f8ed3c3bb928 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -15,46 +15,6 @@
#include <linux/netfilter_ipv4.h>
#include <linux/export.h>
-static int xfrm4_init_flags(struct xfrm_state *x)
-{
- if (xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)
- x->props.flags |= XFRM_STATE_NOPMTUDISC;
- return 0;
-}
-
-static void
-__xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
-{
- const struct flowi4 *fl4 = &fl->u.ip4;
-
- sel->daddr.a4 = fl4->daddr;
- sel->saddr.a4 = fl4->saddr;
- sel->dport = xfrm_flowi_dport(fl, &fl4->uli);
- sel->dport_mask = htons(0xffff);
- sel->sport = xfrm_flowi_sport(fl, &fl4->uli);
- sel->sport_mask = htons(0xffff);
- sel->family = AF_INET;
- sel->prefixlen_d = 32;
- sel->prefixlen_s = 32;
- sel->proto = fl4->flowi4_proto;
- sel->ifindex = fl4->flowi4_oif;
-}
-
-static void
-xfrm4_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl,
- const xfrm_address_t *daddr, const xfrm_address_t *saddr)
-{
- x->id = tmpl->id;
- if (x->id.daddr.a4 == 0)
- x->id.daddr.a4 = daddr->a4;
- x->props.saddr = tmpl->saddr;
- if (x->props.saddr.a4 == 0)
- x->props.saddr.a4 = saddr->a4;
- x->props.mode = tmpl->mode;
- x->props.reqid = tmpl->reqid;
- x->props.family = AF_INET;
-}
-
int xfrm4_extract_header(struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
@@ -74,11 +34,6 @@ int xfrm4_extract_header(struct sk_buff *skb)
static struct xfrm_state_afinfo xfrm4_state_afinfo = {
.family = AF_INET,
.proto = IPPROTO_IPIP,
- .eth_proto = htons(ETH_P_IP),
- .owner = THIS_MODULE,
- .init_flags = xfrm4_init_flags,
- .init_tempsel = __xfrm4_init_tempsel,
- .init_temprop = xfrm4_init_temprop,
.output = xfrm4_output,
.output_finish = xfrm4_output_finish,
.extract_input = xfrm4_extract_input,
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 5d00e54cd319..dc19aff7c2e0 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -108,8 +108,7 @@ static void __exit ipip_fini(void)
if (xfrm4_tunnel_deregister(&xfrm_tunnel_handler, AF_INET))
pr_info("%s: can't remove xfrm handler for AF_INET\n",
__func__);
- if (xfrm_unregister_type(&ipip_type, AF_INET) < 0)
- pr_info("%s: can't remove xfrm type\n", __func__);
+ xfrm_unregister_type(&ipip_type, AF_INET);
}
module_init(ipip_init);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 7382a927d1eb..ef37e0574f54 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -208,7 +208,7 @@ lookup_protocol:
np->mc_loop = 1;
np->mc_all = 1;
np->pmtudisc = IPV6_PMTUDISC_WANT;
- np->repflow = net->ipv6.sysctl.flowlabel_reflect & 1;
+ np->repflow = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED;
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
/* Init the ipv4 part of the socket since we can have sockets
@@ -564,6 +564,39 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
}
EXPORT_SYMBOL(inet6_ioctl);
+INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *,
+ size_t));
+int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+{
+ struct sock *sk = sock->sk;
+
+ if (unlikely(inet_send_prepare(sk)))
+ return -EAGAIN;
+
+ return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udpv6_sendmsg,
+ sk, msg, size);
+}
+
+INDIRECT_CALLABLE_DECLARE(int udpv6_recvmsg(struct sock *, struct msghdr *,
+ size_t, int, int, int *));
+int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags)
+{
+ struct sock *sk = sock->sk;
+ int addr_len = 0;
+ int err;
+
+ if (likely(!(flags & MSG_ERRQUEUE)))
+ sock_rps_record_flow(sk);
+
+ err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udpv6_recvmsg,
+ sk, msg, size, flags & MSG_DONTWAIT,
+ flags & ~MSG_DONTWAIT, &addr_len);
+ if (err >= 0)
+ msg->msg_namelen = addr_len;
+ return err;
+}
+
const struct proto_ops inet6_stream_ops = {
.family = PF_INET6,
.owner = THIS_MODULE,
@@ -580,8 +613,8 @@ const struct proto_ops inet6_stream_ops = {
.shutdown = inet_shutdown, /* ok */
.setsockopt = sock_common_setsockopt, /* ok */
.getsockopt = sock_common_getsockopt, /* ok */
- .sendmsg = inet_sendmsg, /* ok */
- .recvmsg = inet_recvmsg, /* ok */
+ .sendmsg = inet6_sendmsg, /* retpoline's sake */
+ .recvmsg = inet6_recvmsg, /* retpoline's sake */
#ifdef CONFIG_MMU
.mmap = tcp_mmap,
#endif
@@ -614,8 +647,8 @@ const struct proto_ops inet6_dgram_ops = {
.shutdown = inet_shutdown, /* ok */
.setsockopt = sock_common_setsockopt, /* ok */
.getsockopt = sock_common_getsockopt, /* ok */
- .sendmsg = inet_sendmsg, /* ok */
- .recvmsg = inet_recvmsg, /* ok */
+ .sendmsg = inet6_sendmsg, /* retpoline's sake */
+ .recvmsg = inet6_recvmsg, /* retpoline's sake */
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
.set_peek_off = sk_set_peek_off,
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 68b9e92e469e..25e1172fd1c3 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -793,9 +793,7 @@ static void __exit ah6_fini(void)
if (xfrm6_protocol_deregister(&ah6_protocol, IPPROTO_AH) < 0)
pr_info("%s: can't remove protocol\n", __func__);
- if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0)
- pr_info("%s: can't remove xfrm type\n", __func__);
-
+ xfrm_unregister_type(&ah6_type, AF_INET6);
}
module_init(ah6_init);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index ae6a739c5f52..a3b403ba8f8f 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -41,8 +41,6 @@ struct esp_skb_cb {
#define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
-static u32 esp6_get_mtu(struct xfrm_state *x, int mtu);
-
/*
* Allocate an AEAD request structure with extra space for SG and IV.
*
@@ -447,7 +445,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
u32 padto;
- padto = min(x->tfcpad, esp6_get_mtu(x, dst->child_mtu_cached));
+ padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached));
if (skb->len < padto)
esp.tfclen = padto - skb->len;
}
@@ -687,21 +685,6 @@ out:
return ret;
}
-static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
-{
- struct crypto_aead *aead = x->data;
- u32 blksize = ALIGN(crypto_aead_blocksize(aead), 4);
- unsigned int net_adj;
-
- if (x->props.mode != XFRM_MODE_TUNNEL)
- net_adj = sizeof(struct ipv6hdr);
- else
- net_adj = 0;
-
- return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
- net_adj) & ~(blksize - 1)) + net_adj - 2;
-}
-
static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
@@ -919,7 +902,6 @@ static const struct xfrm_type esp6_type = {
.flags = XFRM_TYPE_REPLAY_PROT,
.init_state = esp6_init_state,
.destructor = esp6_destroy,
- .get_mtu = esp6_get_mtu,
.input = esp6_input,
.output = esp6_output,
.hdr_offset = xfrm6_find_1stfragopt,
@@ -951,8 +933,7 @@ static void __exit esp6_fini(void)
{
if (xfrm6_protocol_deregister(&esp6_protocol, IPPROTO_ESP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
- if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0)
- pr_info("%s: can't remove xfrm type\n", __func__);
+ xfrm_unregister_type(&esp6_type, AF_INET6);
}
module_init(esp6_init);
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index d0d8528b294a..e31626ffccd1 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -336,9 +336,7 @@ static int __init esp6_offload_init(void)
static void __exit esp6_offload_exit(void)
{
- if (xfrm_unregister_type_offload(&esp6_type_offload, AF_INET6) < 0)
- pr_info("%s: can't remove xfrm type offload\n", __func__);
-
+ xfrm_unregister_type_offload(&esp6_type_offload, AF_INET6);
inet6_del_offload(&esp6_offload, IPPROTO_ESP);
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 12906301ec7b..62c997201970 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -703,6 +703,9 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
memset(&fl6, 0, sizeof(fl6));
+ if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
+ fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
+
fl6.flowi6_proto = IPPROTO_ICMPV6;
fl6.daddr = ipv6_hdr(skb)->saddr;
if (saddr)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5e3a7963b3cb..8e49fd62eea9 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -59,8 +59,8 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
+ const struct in6_addr *nexthop;
struct neighbour *neigh;
- struct in6_addr *nexthop;
int ret;
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 51fd33294c7c..3752bd3e92ce 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -206,8 +206,7 @@ static void __exit ipcomp6_fini(void)
{
if (xfrm6_protocol_deregister(&ipcomp6_protocol, IPPROTO_COMP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
- if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0)
- pr_info("%s: can't remove xfrm type\n", __func__);
+ xfrm_unregister_type(&ipcomp6_type, AF_INET6);
}
module_init(ipcomp6_init);
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 91801432878c..878fcec14949 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -499,10 +499,8 @@ static void __exit mip6_fini(void)
{
if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0)
pr_info("%s: can't remove rawv6 mh filter\n", __func__);
- if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0)
- pr_info("%s: can't remove xfrm type(rthdr)\n", __func__);
- if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0)
- pr_info("%s: can't remove xfrm type(destopt)\n", __func__);
+ xfrm_unregister_type(&mip6_rthdr_type, AF_INET6);
+ xfrm_unregister_type(&mip6_destopt_type, AF_INET6);
}
module_init(mip6_init);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c59e97cf9d25..39361f57351a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -176,7 +176,7 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
}
if (rt_dev == dev) {
- rt->dst.dev = loopback_dev;
+ rt->dst.dev = blackhole_netdev;
dev_hold(rt->dst.dev);
dev_put(rt_dev);
}
@@ -218,7 +218,8 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
{
const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
- return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
+ return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
+ dst->dev, skb, daddr);
}
static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
@@ -3144,10 +3145,9 @@ out:
return entries > rt_max_size;
}
-static struct rt6_info *ip6_nh_lookup_table(struct net *net,
- struct fib6_config *cfg,
- const struct in6_addr *gw_addr,
- u32 tbid, int flags)
+static int ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg,
+ const struct in6_addr *gw_addr, u32 tbid,
+ int flags, struct fib6_result *res)
{
struct flowi6 fl6 = {
.flowi6_oif = cfg->fc_ifindex,
@@ -3155,25 +3155,23 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
.saddr = cfg->fc_prefsrc,
};
struct fib6_table *table;
- struct rt6_info *rt;
+ int err;
table = fib6_get_table(net, tbid);
if (!table)
- return NULL;
+ return -EINVAL;
if (!ipv6_addr_any(&cfg->fc_prefsrc))
flags |= RT6_LOOKUP_F_HAS_SADDR;
flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
- rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
- /* if table lookup failed, fall back to full lookup */
- if (rt == net->ipv6.ip6_null_entry) {
- ip6_rt_put(rt);
- rt = NULL;
- }
+ err = fib6_table_lookup(net, table, cfg->fc_ifindex, &fl6, res, flags);
+ if (!err && res->f6i != net->ipv6.fib6_null_entry)
+ fib6_select_path(net, res, &fl6, cfg->fc_ifindex,
+ cfg->fc_ifindex != 0, NULL, flags);
- return rt;
+ return err;
}
static int ip6_route_check_nh_onlink(struct net *net,
@@ -3181,29 +3179,19 @@ static int ip6_route_check_nh_onlink(struct net *net,
const struct net_device *dev,
struct netlink_ext_ack *extack)
{
- u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
+ u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
const struct in6_addr *gw_addr = &cfg->fc_gateway;
- u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
- struct fib6_info *from;
- struct rt6_info *grt;
+ struct fib6_result res = {};
int err;
- err = 0;
- grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
- if (grt) {
- rcu_read_lock();
- from = rcu_dereference(grt->from);
- if (!grt->dst.error &&
- /* ignore match if it is the default route */
- from && !ipv6_addr_any(&from->fib6_dst.addr) &&
- (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
- NL_SET_ERR_MSG(extack,
- "Nexthop has invalid gateway or device mismatch");
- err = -EINVAL;
- }
- rcu_read_unlock();
-
- ip6_rt_put(grt);
+ err = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0, &res);
+ if (!err && !(res.fib6_flags & RTF_REJECT) &&
+ /* ignore match if it is the default route */
+ !ipv6_addr_any(&res.f6i->fib6_dst.addr) &&
+ (res.fib6_type != RTN_UNICAST || dev != res.nh->fib_nh_dev)) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop has invalid gateway or device mismatch");
+ err = -EINVAL;
}
return err;
@@ -3216,47 +3204,50 @@ static int ip6_route_check_nh(struct net *net,
{
const struct in6_addr *gw_addr = &cfg->fc_gateway;
struct net_device *dev = _dev ? *_dev : NULL;
- struct rt6_info *grt = NULL;
+ int flags = RT6_LOOKUP_F_IFACE;
+ struct fib6_result res = {};
int err = -EHOSTUNREACH;
if (cfg->fc_table) {
- int flags = RT6_LOOKUP_F_IFACE;
-
- grt = ip6_nh_lookup_table(net, cfg, gw_addr,
- cfg->fc_table, flags);
- if (grt) {
- if (grt->rt6i_flags & RTF_GATEWAY ||
- (dev && dev != grt->dst.dev)) {
- ip6_rt_put(grt);
- grt = NULL;
- }
- }
+ err = ip6_nh_lookup_table(net, cfg, gw_addr,
+ cfg->fc_table, flags, &res);
+ /* gw_addr can not require a gateway or resolve to a reject
+ * route. If a device is given, it must match the result.
+ */
+ if (err || res.fib6_flags & RTF_REJECT ||
+ res.nh->fib_nh_gw_family ||
+ (dev && dev != res.nh->fib_nh_dev))
+ err = -EHOSTUNREACH;
}
- if (!grt)
- grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
+ if (err < 0) {
+ struct flowi6 fl6 = {
+ .flowi6_oif = cfg->fc_ifindex,
+ .daddr = *gw_addr,
+ };
- if (!grt)
- goto out;
+ err = fib6_lookup(net, cfg->fc_ifindex, &fl6, &res, flags);
+ if (err || res.fib6_flags & RTF_REJECT ||
+ res.nh->fib_nh_gw_family)
+ err = -EHOSTUNREACH;
+ if (err)
+ return err;
+
+ fib6_select_path(net, &res, &fl6, cfg->fc_ifindex,
+ cfg->fc_ifindex != 0, NULL, flags);
+ }
+
+ err = 0;
if (dev) {
- if (dev != grt->dst.dev) {
- ip6_rt_put(grt);
- goto out;
- }
+ if (dev != res.nh->fib_nh_dev)
+ err = -EHOSTUNREACH;
} else {
- *_dev = dev = grt->dst.dev;
- *idev = grt->rt6i_idev;
+ *_dev = dev = res.nh->fib_nh_dev;
dev_hold(dev);
- in6_dev_hold(grt->rt6i_idev);
+ *idev = in6_dev_get(dev);
}
- if (!(grt->rt6i_flags & RTF_GATEWAY))
- err = 0;
-
- ip6_rt_put(grt);
-
-out:
return err;
}
@@ -3297,11 +3288,15 @@ static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
goto out;
}
+ rcu_read_lock();
+
if (cfg->fc_flags & RTNH_F_ONLINK)
err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
else
err = ip6_route_check_nh(net, cfg, _dev, idev);
+ rcu_read_unlock();
+
if (err)
goto out;
}
@@ -6079,7 +6074,7 @@ static struct ctl_table ipv6_route_table_template[] = {
.data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 6d86fac472e7..dc4c91e0bfb8 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -23,7 +23,7 @@
static int zero;
static int one = 1;
-static int three = 3;
+static int flowlabel_reflect_max = 0x7;
static int auto_flowlabels_min;
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
@@ -114,9 +114,9 @@ static struct ctl_table ipv6_table_template[] = {
.data = &init_net.ipv6.sysctl.flowlabel_reflect,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
- .extra2 = &three,
+ .extra2 = &flowlabel_reflect_max,
},
{
.procname = "max_dst_opts_number",
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 408d9ec26971..4f3f99b39820 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -989,7 +989,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
if (sk->sk_state == TCP_TIME_WAIT)
label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
} else {
- if (net->ipv6.sysctl.flowlabel_reflect & 2)
+ if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
label = ip6_flowlabel(ipv6h);
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 66ca5a4b17c4..4406e059da68 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -826,8 +826,7 @@ static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb,
int ret;
if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
- skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
- ip6_compute_pseudo);
+ skb_checksum_try_convert(skb, IPPROTO_UDP, ip6_compute_pseudo);
ret = udpv6_queue_rcv_skb(sk, skb);
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 5bdca3d5d6b7..78daadecbdef 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -21,137 +21,6 @@
#include <net/ipv6.h>
#include <net/addrconf.h>
-static void
-__xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
-{
- const struct flowi6 *fl6 = &fl->u.ip6;
-
- /* Initialize temporary selector matching only
- * to current session. */
- *(struct in6_addr *)&sel->daddr = fl6->daddr;
- *(struct in6_addr *)&sel->saddr = fl6->saddr;
- sel->dport = xfrm_flowi_dport(fl, &fl6->uli);
- sel->dport_mask = htons(0xffff);
- sel->sport = xfrm_flowi_sport(fl, &fl6->uli);
- sel->sport_mask = htons(0xffff);
- sel->family = AF_INET6;
- sel->prefixlen_d = 128;
- sel->prefixlen_s = 128;
- sel->proto = fl6->flowi6_proto;
- sel->ifindex = fl6->flowi6_oif;
-}
-
-static void
-xfrm6_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl,
- const xfrm_address_t *daddr, const xfrm_address_t *saddr)
-{
- x->id = tmpl->id;
- if (ipv6_addr_any((struct in6_addr *)&x->id.daddr))
- memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr));
- memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
- if (ipv6_addr_any((struct in6_addr *)&x->props.saddr))
- memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
- x->props.mode = tmpl->mode;
- x->props.reqid = tmpl->reqid;
- x->props.family = AF_INET6;
-}
-
-/* distribution counting sort function for xfrm_state and xfrm_tmpl */
-static int
-__xfrm6_sort(void **dst, void **src, int n, int (*cmp)(void *p), int maxclass)
-{
- int count[XFRM_MAX_DEPTH] = { };
- int class[XFRM_MAX_DEPTH];
- int i;
-
- for (i = 0; i < n; i++) {
- int c;
- class[i] = c = cmp(src[i]);
- count[c]++;
- }
-
- for (i = 2; i < maxclass; i++)
- count[i] += count[i - 1];
-
- for (i = 0; i < n; i++) {
- dst[count[class[i] - 1]++] = src[i];
- src[i] = NULL;
- }
-
- return 0;
-}
-
-/*
- * Rule for xfrm_state:
- *
- * rule 1: select IPsec transport except AH
- * rule 2: select MIPv6 RO or inbound trigger
- * rule 3: select IPsec transport AH
- * rule 4: select IPsec tunnel
- * rule 5: others
- */
-static int __xfrm6_state_sort_cmp(void *p)
-{
- struct xfrm_state *v = p;
-
- switch (v->props.mode) {
- case XFRM_MODE_TRANSPORT:
- if (v->id.proto != IPPROTO_AH)
- return 1;
- else
- return 3;
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
- case XFRM_MODE_ROUTEOPTIMIZATION:
- case XFRM_MODE_IN_TRIGGER:
- return 2;
-#endif
- case XFRM_MODE_TUNNEL:
- case XFRM_MODE_BEET:
- return 4;
- }
- return 5;
-}
-
-static int
-__xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n)
-{
- return __xfrm6_sort((void **)dst, (void **)src, n,
- __xfrm6_state_sort_cmp, 6);
-}
-
-/*
- * Rule for xfrm_tmpl:
- *
- * rule 1: select IPsec transport
- * rule 2: select MIPv6 RO or inbound trigger
- * rule 3: select IPsec tunnel
- * rule 4: others
- */
-static int __xfrm6_tmpl_sort_cmp(void *p)
-{
- struct xfrm_tmpl *v = p;
- switch (v->mode) {
- case XFRM_MODE_TRANSPORT:
- return 1;
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
- case XFRM_MODE_ROUTEOPTIMIZATION:
- case XFRM_MODE_IN_TRIGGER:
- return 2;
-#endif
- case XFRM_MODE_TUNNEL:
- case XFRM_MODE_BEET:
- return 3;
- }
- return 4;
-}
-
-static int
-__xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n)
-{
- return __xfrm6_sort((void **)dst, (void **)src, n,
- __xfrm6_tmpl_sort_cmp, 5);
-}
-
int xfrm6_extract_header(struct sk_buff *skb)
{
struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -171,12 +40,6 @@ int xfrm6_extract_header(struct sk_buff *skb)
static struct xfrm_state_afinfo xfrm6_state_afinfo = {
.family = AF_INET6,
.proto = IPPROTO_IPV6,
- .eth_proto = htons(ETH_P_IPV6),
- .owner = THIS_MODULE,
- .init_tempsel = __xfrm6_init_tempsel,
- .init_temprop = xfrm6_init_temprop,
- .tmpl_sort = __xfrm6_tmpl_sort,
- .state_sort = __xfrm6_state_sort,
.output = xfrm6_output,
.output_finish = xfrm6_output_finish,
.extract_input = xfrm6_extract_input,
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 241317473114..cdfc33517e85 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -439,9 +439,9 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
struct nf_flowtable *flow_table = priv;
struct flow_offload_tuple tuple = {};
enum flow_offload_tuple_dir dir;
+ const struct in6_addr *nexthop;
struct flow_offload *flow;
struct net_device *outdev;
- struct in6_addr *nexthop;
struct ipv6hdr *ip6h;
struct rt6_info *rt;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8c27e198268a..8d54f3047768 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2412,6 +2412,9 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
ts = __packet_set_timestamp(po, ph, skb);
__packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
+
+ if (!packet_read_pending(&po->tx_ring))
+ complete(&po->skb_completion);
}
sock_wfree(skb);
@@ -2596,7 +2599,7 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
{
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
struct net_device *dev;
struct virtio_net_hdr *vnet_hdr = NULL;
struct sockcm_cookie sockc;
@@ -2611,6 +2614,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
int len_sum = 0;
int status = TP_STATUS_AVAILABLE;
int hlen, tlen, copylen = 0;
+ long timeo = 0;
mutex_lock(&po->pg_vec_lock);
@@ -2657,12 +2661,21 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
size_max = dev->mtu + reserve + VLAN_HLEN;
+ reinit_completion(&po->skb_completion);
+
do {
ph = packet_current_frame(po, &po->tx_ring,
TP_STATUS_SEND_REQUEST);
if (unlikely(ph == NULL)) {
- if (need_wait && need_resched())
- schedule();
+ if (need_wait && skb) {
+ timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT);
+ timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo);
+ if (timeo <= 0) {
+ err = !timeo ? -ETIMEDOUT : -ERESTARTSYS;
+ goto out_put;
+ }
+ }
+ /* check for additional frames */
continue;
}
@@ -3218,6 +3231,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
sock_init_data(sock, sk);
po = pkt_sk(sk);
+ init_completion(&po->skb_completion);
sk->sk_family = PF_PACKET;
po->num = proto;
po->xmit = dev_queue_xmit;
@@ -4327,7 +4341,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
req3->tp_sizeof_priv ||
req3->tp_feature_req_word) {
err = -EINVAL;
- goto out;
+ goto out_free_pg_vec;
}
}
break;
@@ -4391,6 +4405,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
prb_shutdown_retire_blk_timer(po, rb_queue);
}
+out_free_pg_vec:
if (pg_vec)
free_pg_vec(pg_vec, order, req->tp_block_nr);
out:
diff --git a/net/packet/internal.h b/net/packet/internal.h
index b5bcff2b7a43..82fb2b10f790 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -128,6 +128,7 @@ struct packet_sock {
unsigned int tp_hdrlen;
unsigned int tp_reserve;
unsigned int tp_tstamp;
+ struct completion skb_completion;
struct net_device __rcu *cached_dev;
int (*xmit)(struct sk_buff *skb);
struct packet_type prot_hook ____cacheline_aligned_in_smp;
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index a0b6abfbd277..948e3fe249ec 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -519,6 +519,9 @@ send_fragmentable:
}
break;
#endif
+
+ default:
+ BUG();
}
if (ret < 0)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 58e7573dded4..055faa298c8e 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -27,6 +27,9 @@
static LIST_HEAD(mirred_list);
static DEFINE_SPINLOCK(mirred_list_lock);
+#define MIRRED_RECURSION_LIMIT 4
+static DEFINE_PER_CPU(unsigned int, mirred_rec_level);
+
static bool tcf_mirred_is_act_redirect(int action)
{
return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR;
@@ -210,6 +213,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
struct sk_buff *skb2 = skb;
bool m_mac_header_xmit;
struct net_device *dev;
+ unsigned int rec_level;
int retval, err = 0;
bool use_reinsert;
bool want_ingress;
@@ -217,6 +221,14 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
int m_eaction;
int mac_len;
+ rec_level = __this_cpu_inc_return(mirred_rec_level);
+ if (unlikely(rec_level > MIRRED_RECURSION_LIMIT)) {
+ net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n",
+ netdev_name(skb->dev));
+ __this_cpu_dec(mirred_rec_level);
+ return TC_ACT_SHOT;
+ }
+
tcf_lastuse_update(&m->tcf_tm);
bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
@@ -277,7 +289,9 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
if (use_reinsert) {
res->ingress = want_ingress;
res->qstats = this_cpu_ptr(m->common.cpu_qstats);
- return TC_ACT_REINSERT;
+ skb_tc_reinsert(skb, res);
+ __this_cpu_dec(mirred_rec_level);
+ return TC_ACT_CONSUMED;
}
}
@@ -292,6 +306,7 @@ out:
if (tcf_mirred_is_act_redirect(m_eaction))
retval = TC_ACT_SHOT;
}
+ __this_cpu_dec(mirred_rec_level);
return retval;
}
@@ -411,6 +426,11 @@ static void tcf_mirred_put_dev(struct net_device *dev)
dev_put(dev);
}
+static size_t tcf_mirred_get_fill_size(const struct tc_action *act)
+{
+ return nla_total_size(sizeof(struct tc_mirred));
+}
+
static struct tc_action_ops act_mirred_ops = {
.kind = "mirred",
.id = TCA_ID_MIRRED,
@@ -422,6 +442,7 @@ static struct tc_action_ops act_mirred_ops = {
.init = tcf_mirred_init,
.walk = tcf_mirred_walker,
.lookup = tcf_mirred_search,
+ .get_fill_size = tcf_mirred_get_fill_size,
.size = sizeof(struct tcf_mirred),
.get_dev = tcf_mirred_get_dev,
.put_dev = tcf_mirred_put_dev,
diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c
index 243fd22f2248..9fff6480acc6 100644
--- a/net/sched/em_ipt.c
+++ b/net/sched/em_ipt.c
@@ -21,6 +21,7 @@
struct em_ipt_match {
const struct xt_match *match;
u32 hook;
+ u8 nfproto;
u8 match_data[0] __aligned(8);
};
@@ -71,11 +72,25 @@ static int policy_validate_match_data(struct nlattr **tb, u8 mrev)
return 0;
}
+static int addrtype_validate_match_data(struct nlattr **tb, u8 mrev)
+{
+ if (mrev != 1) {
+ pr_err("only addrtype match revision 1 supported");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static const struct em_ipt_xt_match em_ipt_xt_matches[] = {
{
.match_name = "policy",
.validate_match_data = policy_validate_match_data
},
+ {
+ .match_name = "addrtype",
+ .validate_match_data = addrtype_validate_match_data
+ },
{}
};
@@ -115,6 +130,7 @@ static int em_ipt_change(struct net *net, void *data, int data_len,
struct em_ipt_match *im = NULL;
struct xt_match *match;
int mdata_len, ret;
+ u8 nfproto;
ret = nla_parse_deprecated(tb, TCA_EM_IPT_MAX, data, data_len,
em_ipt_policy, NULL);
@@ -125,6 +141,15 @@ static int em_ipt_change(struct net *net, void *data, int data_len,
!tb[TCA_EM_IPT_MATCH_DATA] || !tb[TCA_EM_IPT_NFPROTO])
return -EINVAL;
+ nfproto = nla_get_u8(tb[TCA_EM_IPT_NFPROTO]);
+ switch (nfproto) {
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ break;
+ default:
+ return -EINVAL;
+ }
+
match = get_xt_match(tb);
if (IS_ERR(match)) {
pr_err("unable to load match\n");
@@ -140,6 +165,7 @@ static int em_ipt_change(struct net *net, void *data, int data_len,
im->match = match;
im->hook = nla_get_u32(tb[TCA_EM_IPT_HOOK]);
+ im->nfproto = nfproto;
nla_memcpy(im->match_data, tb[TCA_EM_IPT_MATCH_DATA], mdata_len);
ret = check_match(net, im, mdata_len);
@@ -182,15 +208,33 @@ static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em,
const struct em_ipt_match *im = (const void *)em->data;
struct xt_action_param acpar = {};
struct net_device *indev = NULL;
+ u8 nfproto = im->match->family;
struct nf_hook_state state;
int ret;
+ switch (tc_skb_protocol(skb)) {
+ case htons(ETH_P_IP):
+ if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
+ return 0;
+ if (nfproto == NFPROTO_UNSPEC)
+ nfproto = NFPROTO_IPV4;
+ break;
+ case htons(ETH_P_IPV6):
+ if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
+ return 0;
+ if (nfproto == NFPROTO_UNSPEC)
+ nfproto = NFPROTO_IPV6;
+ break;
+ default:
+ return 0;
+ }
+
rcu_read_lock();
if (skb->skb_iif)
indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
- nf_hook_state_init(&state, im->hook, im->match->family,
+ nf_hook_state_init(&state, im->hook, nfproto,
indev ?: skb->dev, skb->dev, NULL, em->net, NULL);
acpar.match = im->match;
@@ -213,7 +257,7 @@ static int em_ipt_dump(struct sk_buff *skb, struct tcf_ematch *em)
return -EMSGSIZE;
if (nla_put_u8(skb, TCA_EM_IPT_MATCH_REVISION, im->match->revision) < 0)
return -EMSGSIZE;
- if (nla_put_u8(skb, TCA_EM_IPT_NFPROTO, im->match->family) < 0)
+ if (nla_put_u8(skb, TCA_EM_IPT_NFPROTO, im->nfproto) < 0)
return -EMSGSIZE;
if (nla_put(skb, TCA_EM_IPT_MATCH_DATA,
im->match->usersize ?: im->match->matchsize,
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index e16a3d37d2bc..732e109c3055 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -549,12 +549,17 @@ static struct notifier_block cbs_device_notifier = {
static int __init cbs_module_init(void)
{
- int err = register_netdevice_notifier(&cbs_device_notifier);
+ int err;
+ err = register_netdevice_notifier(&cbs_device_notifier);
if (err)
return err;
- return register_qdisc(&cbs_qdisc_ops);
+ err = register_qdisc(&cbs_qdisc_ops);
+ if (err)
+ unregister_netdevice_notifier(&cbs_device_notifier);
+
+ return err;
}
static void __exit cbs_module_exit(void)
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index db0c2ba1d156..cebfb65d8556 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -22,10 +22,12 @@
#define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
#define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
+#define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK)
struct etf_sched_data {
bool offload;
bool deadline_mode;
+ bool skip_sock_check;
int clockid;
int queue;
s32 delta; /* in ns */
@@ -77,6 +79,9 @@ static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
struct sock *sk = nskb->sk;
ktime_t now;
+ if (q->skip_sock_check)
+ goto skip;
+
if (!sk)
return false;
@@ -92,6 +97,7 @@ static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
if (sk->sk_txtime_deadline_mode != q->deadline_mode)
return false;
+skip:
now = q->get_time();
if (ktime_before(txtime, now) || ktime_before(txtime, q->last))
return false;
@@ -385,6 +391,7 @@ static int etf_init(struct Qdisc *sch, struct nlattr *opt,
q->clockid = qopt->clockid;
q->offload = OFFLOAD_IS_ON(qopt);
q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
+ q->skip_sock_check = SKIP_SOCK_CHECK_IS_SET(qopt);
switch (q->clockid) {
case CLOCK_REALTIME:
@@ -473,6 +480,9 @@ static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
if (q->deadline_mode)
opt.flags |= TC_ETF_DEADLINE_MODE_ON;
+ if (q->skip_sock_check)
+ opt.flags |= TC_ETF_SKIP_SOCK_CHECK;
+
if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 9ecfb8f5902a..388750ddc57a 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -21,12 +21,17 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
#include <net/sch_generic.h>
+#include <net/sock.h>
+#include <net/tcp.h>
static LIST_HEAD(taprio_list);
static DEFINE_SPINLOCK(taprio_list_lock);
#define TAPRIO_ALL_GATES_OPEN -1
+#define FLAGS_VALID(flags) (!((flags) & ~TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST))
+#define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)
+
struct sched_entry {
struct list_head list;
@@ -35,6 +40,7 @@ struct sched_entry {
* packet leaves after this time.
*/
ktime_t close_time;
+ ktime_t next_txtime;
atomic_t budget;
int index;
u32 gate_mask;
@@ -55,6 +61,8 @@ struct sched_gate_list {
struct taprio_sched {
struct Qdisc **qdiscs;
struct Qdisc *root;
+ u32 flags;
+ enum tk_offsets tk_offset;
int clockid;
atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
* speeds it's sub-nanoseconds per byte
@@ -65,9 +73,9 @@ struct taprio_sched {
struct sched_entry __rcu *current_entry;
struct sched_gate_list __rcu *oper_sched;
struct sched_gate_list __rcu *admin_sched;
- ktime_t (*get_time)(void);
struct hrtimer advance_timer;
struct list_head taprio_list;
+ int txtime_delay;
};
static ktime_t sched_base_time(const struct sched_gate_list *sched)
@@ -78,6 +86,20 @@ static ktime_t sched_base_time(const struct sched_gate_list *sched)
return ns_to_ktime(sched->base_time);
}
+static ktime_t taprio_get_time(struct taprio_sched *q)
+{
+ ktime_t mono = ktime_get();
+
+ switch (q->tk_offset) {
+ case TK_OFFS_MAX:
+ return mono;
+ default:
+ return ktime_mono_to_any(mono, q->tk_offset);
+ }
+
+ return KTIME_MAX;
+}
+
static void taprio_free_sched_cb(struct rcu_head *head)
{
struct sched_gate_list *sched = container_of(head, struct sched_gate_list, rcu);
@@ -108,20 +130,263 @@ static void switch_schedules(struct taprio_sched *q,
*admin = NULL;
}
-static ktime_t get_cycle_time(struct sched_gate_list *sched)
+/* Get how much time has been already elapsed in the current cycle. */
+static s32 get_cycle_time_elapsed(struct sched_gate_list *sched, ktime_t time)
+{
+ ktime_t time_since_sched_start;
+ s32 time_elapsed;
+
+ time_since_sched_start = ktime_sub(time, sched->base_time);
+ div_s64_rem(time_since_sched_start, sched->cycle_time, &time_elapsed);
+
+ return time_elapsed;
+}
+
+static ktime_t get_interval_end_time(struct sched_gate_list *sched,
+ struct sched_gate_list *admin,
+ struct sched_entry *entry,
+ ktime_t intv_start)
+{
+ s32 cycle_elapsed = get_cycle_time_elapsed(sched, intv_start);
+ ktime_t intv_end, cycle_ext_end, cycle_end;
+
+ cycle_end = ktime_add_ns(intv_start, sched->cycle_time - cycle_elapsed);
+ intv_end = ktime_add_ns(intv_start, entry->interval);
+ cycle_ext_end = ktime_add(cycle_end, sched->cycle_time_extension);
+
+ if (ktime_before(intv_end, cycle_end))
+ return intv_end;
+ else if (admin && admin != sched &&
+ ktime_after(admin->base_time, cycle_end) &&
+ ktime_before(admin->base_time, cycle_ext_end))
+ return admin->base_time;
+ else
+ return cycle_end;
+}
+
+static int length_to_duration(struct taprio_sched *q, int len)
+{
+ return div_u64(len * atomic64_read(&q->picos_per_byte), 1000);
+}
+
+/* Returns the entry corresponding to next available interval. If
+ * validate_interval is set, it only validates whether the timestamp occurs
+ * when the gate corresponding to the skb's traffic class is open.
+ */
+static struct sched_entry *find_entry_to_transmit(struct sk_buff *skb,
+ struct Qdisc *sch,
+ struct sched_gate_list *sched,
+ struct sched_gate_list *admin,
+ ktime_t time,
+ ktime_t *interval_start,
+ ktime_t *interval_end,
+ bool validate_interval)
+{
+ ktime_t curr_intv_start, curr_intv_end, cycle_end, packet_transmit_time;
+ ktime_t earliest_txtime = KTIME_MAX, txtime, cycle, transmit_end_time;
+ struct sched_entry *entry = NULL, *entry_found = NULL;
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ bool entry_available = false;
+ s32 cycle_elapsed;
+ int tc, n;
+
+ tc = netdev_get_prio_tc_map(dev, skb->priority);
+ packet_transmit_time = length_to_duration(q, qdisc_pkt_len(skb));
+
+ *interval_start = 0;
+ *interval_end = 0;
+
+ if (!sched)
+ return NULL;
+
+ cycle = sched->cycle_time;
+ cycle_elapsed = get_cycle_time_elapsed(sched, time);
+ curr_intv_end = ktime_sub_ns(time, cycle_elapsed);
+ cycle_end = ktime_add_ns(curr_intv_end, cycle);
+
+ list_for_each_entry(entry, &sched->entries, list) {
+ curr_intv_start = curr_intv_end;
+ curr_intv_end = get_interval_end_time(sched, admin, entry,
+ curr_intv_start);
+
+ if (ktime_after(curr_intv_start, cycle_end))
+ break;
+
+ if (!(entry->gate_mask & BIT(tc)) ||
+ packet_transmit_time > entry->interval)
+ continue;
+
+ txtime = entry->next_txtime;
+
+ if (ktime_before(txtime, time) || validate_interval) {
+ transmit_end_time = ktime_add_ns(time, packet_transmit_time);
+ if ((ktime_before(curr_intv_start, time) &&
+ ktime_before(transmit_end_time, curr_intv_end)) ||
+ (ktime_after(curr_intv_start, time) && !validate_interval)) {
+ entry_found = entry;
+ *interval_start = curr_intv_start;
+ *interval_end = curr_intv_end;
+ break;
+ } else if (!entry_available && !validate_interval) {
+ /* Here, we are just trying to find out the
+ * first available interval in the next cycle.
+ */
+ entry_available = 1;
+ entry_found = entry;
+ *interval_start = ktime_add_ns(curr_intv_start, cycle);
+ *interval_end = ktime_add_ns(curr_intv_end, cycle);
+ }
+ } else if (ktime_before(txtime, earliest_txtime) &&
+ !entry_available) {
+ earliest_txtime = txtime;
+ entry_found = entry;
+ n = div_s64(ktime_sub(txtime, curr_intv_start), cycle);
+ *interval_start = ktime_add(curr_intv_start, n * cycle);
+ *interval_end = ktime_add(curr_intv_end, n * cycle);
+ }
+ }
+
+ return entry_found;
+}
+
+static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch)
{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct sched_gate_list *sched, *admin;
+ ktime_t interval_start, interval_end;
struct sched_entry *entry;
- ktime_t cycle = 0;
- if (sched->cycle_time != 0)
- return sched->cycle_time;
+ rcu_read_lock();
+ sched = rcu_dereference(q->oper_sched);
+ admin = rcu_dereference(q->admin_sched);
+
+ entry = find_entry_to_transmit(skb, sch, sched, admin, skb->tstamp,
+ &interval_start, &interval_end, true);
+ rcu_read_unlock();
- list_for_each_entry(entry, &sched->entries, list)
- cycle = ktime_add_ns(cycle, entry->interval);
+ return entry;
+}
- sched->cycle_time = cycle;
+/* This returns the tstamp value set by TCP in terms of the set clock. */
+static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb)
+{
+ unsigned int offset = skb_network_offset(skb);
+ const struct ipv6hdr *ipv6h;
+ const struct iphdr *iph;
+ struct ipv6hdr _ipv6h;
- return cycle;
+ ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h);
+ if (!ipv6h)
+ return 0;
+
+ if (ipv6h->version == 4) {
+ iph = (struct iphdr *)ipv6h;
+ offset += iph->ihl * 4;
+
+ /* special-case 6in4 tunnelling, as that is a common way to get
+ * v6 connectivity in the home
+ */
+ if (iph->protocol == IPPROTO_IPV6) {
+ ipv6h = skb_header_pointer(skb, offset,
+ sizeof(_ipv6h), &_ipv6h);
+
+ if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP)
+ return 0;
+ } else if (iph->protocol != IPPROTO_TCP) {
+ return 0;
+ }
+ } else if (ipv6h->version == 6 && ipv6h->nexthdr != IPPROTO_TCP) {
+ return 0;
+ }
+
+ return ktime_mono_to_any(skb->skb_mstamp_ns, q->tk_offset);
+}
+
+/* There are a few scenarios where we will have to modify the txtime from
+ * what is read from next_txtime in sched_entry. They are:
+ * 1. If txtime is in the past,
+ * a. The gate for the traffic class is currently open and packet can be
+ * transmitted before it closes, schedule the packet right away.
+ * b. If the gate corresponding to the traffic class is going to open later
+ * in the cycle, set the txtime of packet to the interval start.
+ * 2. If txtime is in the future, there are packets corresponding to the
+ * current traffic class waiting to be transmitted. So, the following
+ * possibilities exist:
+ * a. We can transmit the packet before the window containing the txtime
+ * closes.
+ * b. The window might close before the transmission can be completed
+ * successfully. So, schedule the packet in the next open window.
+ */
+static long get_packet_txtime(struct sk_buff *skb, struct Qdisc *sch)
+{
+ ktime_t transmit_end_time, interval_end, interval_start, tcp_tstamp;
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct sched_gate_list *sched, *admin;
+ ktime_t minimum_time, now, txtime;
+ int len, packet_transmit_time;
+ struct sched_entry *entry;
+ bool sched_changed;
+
+ now = taprio_get_time(q);
+ minimum_time = ktime_add_ns(now, q->txtime_delay);
+
+ tcp_tstamp = get_tcp_tstamp(q, skb);
+ minimum_time = max_t(ktime_t, minimum_time, tcp_tstamp);
+
+ rcu_read_lock();
+ admin = rcu_dereference(q->admin_sched);
+ sched = rcu_dereference(q->oper_sched);
+ if (admin && ktime_after(minimum_time, admin->base_time))
+ switch_schedules(q, &admin, &sched);
+
+ /* Until the schedule starts, all the queues are open */
+ if (!sched || ktime_before(minimum_time, sched->base_time)) {
+ txtime = minimum_time;
+ goto done;
+ }
+
+ len = qdisc_pkt_len(skb);
+ packet_transmit_time = length_to_duration(q, len);
+
+ do {
+ sched_changed = 0;
+
+ entry = find_entry_to_transmit(skb, sch, sched, admin,
+ minimum_time,
+ &interval_start, &interval_end,
+ false);
+ if (!entry) {
+ txtime = 0;
+ goto done;
+ }
+
+ txtime = entry->next_txtime;
+ txtime = max_t(ktime_t, txtime, minimum_time);
+ txtime = max_t(ktime_t, txtime, interval_start);
+
+ if (admin && admin != sched &&
+ ktime_after(txtime, admin->base_time)) {
+ sched = admin;
+ sched_changed = 1;
+ continue;
+ }
+
+ transmit_end_time = ktime_add(txtime, packet_transmit_time);
+ minimum_time = transmit_end_time;
+
+ /* Update the txtime of current entry to the next time it's
+ * interval starts.
+ */
+ if (ktime_after(transmit_end_time, interval_end))
+ entry->next_txtime = ktime_add(interval_start, sched->cycle_time);
+ } while (sched_changed || ktime_after(transmit_end_time, interval_end));
+
+ entry->next_txtime = transmit_end_time;
+
+done:
+ rcu_read_unlock();
+ return txtime;
}
static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
@@ -137,6 +402,15 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (unlikely(!child))
return qdisc_drop(skb, sch, to_free);
+ if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) {
+ if (!is_valid_interval(skb, sch))
+ return qdisc_drop(skb, sch, to_free);
+ } else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
+ skb->tstamp = get_packet_txtime(skb, sch);
+ if (!skb->tstamp)
+ return qdisc_drop(skb, sch, to_free);
+ }
+
qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
@@ -172,6 +446,9 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch)
if (!skb)
continue;
+ if (TXTIME_ASSIST_IS_ENABLED(q->flags))
+ return skb;
+
prio = skb->priority;
tc = netdev_get_prio_tc_map(dev, prio);
@@ -184,11 +461,6 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch)
return NULL;
}
-static inline int length_to_duration(struct taprio_sched *q, int len)
-{
- return div_u64(len * atomic64_read(&q->picos_per_byte), 1000);
-}
-
static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry)
{
atomic_set(&entry->budget,
@@ -232,6 +504,13 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
if (unlikely(!child))
continue;
+ if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
+ skb = child->ops->dequeue(child);
+ if (!skb)
+ continue;
+ goto skb_found;
+ }
+
skb = child->ops->peek(child);
if (!skb)
continue;
@@ -243,7 +522,7 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
continue;
len = qdisc_pkt_len(skb);
- guard = ktime_add_ns(q->get_time(),
+ guard = ktime_add_ns(taprio_get_time(q),
length_to_duration(q, len));
/* In the case that there's no gate entry, there's no
@@ -262,6 +541,7 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
if (unlikely(!skb))
goto done;
+skb_found:
qdisc_bstats_update(sch, skb);
qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
@@ -524,12 +804,22 @@ static int parse_taprio_schedule(struct nlattr **tb,
if (err < 0)
return err;
+ if (!new->cycle_time) {
+ struct sched_entry *entry;
+ ktime_t cycle = 0;
+
+ list_for_each_entry(entry, &new->entries, list)
+ cycle = ktime_add_ns(cycle, entry->interval);
+ new->cycle_time = cycle;
+ }
+
return 0;
}
static int taprio_parse_mqprio_opt(struct net_device *dev,
struct tc_mqprio_qopt *qopt,
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ u32 taprio_flags)
{
int i, j;
@@ -577,6 +867,9 @@ static int taprio_parse_mqprio_opt(struct net_device *dev,
return -EINVAL;
}
+ if (TXTIME_ASSIST_IS_ENABLED(taprio_flags))
+ continue;
+
/* Verify that the offset and counts do not overlap */
for (j = i + 1; j < qopt->num_tc; j++) {
if (last > qopt->offset[j]) {
@@ -598,14 +891,14 @@ static int taprio_get_start_time(struct Qdisc *sch,
s64 n;
base = sched_base_time(sched);
- now = q->get_time();
+ now = taprio_get_time(q);
if (ktime_after(base, now)) {
*start = base;
return 0;
}
- cycle = get_cycle_time(sched);
+ cycle = sched->cycle_time;
/* The qdisc is expected to have at least one sched_entry. Moreover,
* any entry must have 'interval' > 0. Thus if the cycle time is zero,
@@ -632,7 +925,7 @@ static void setup_first_close_time(struct taprio_sched *q,
first = list_first_entry(&sched->entries,
struct sched_entry, list);
- cycle = get_cycle_time(sched);
+ cycle = sched->cycle_time;
/* FIXME: find a better place to do this */
sched->cycle_close_time = ktime_add_ns(base, cycle);
@@ -707,6 +1000,18 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
return NOTIFY_DONE;
}
+static void setup_txtime(struct taprio_sched *q,
+ struct sched_gate_list *sched, ktime_t base)
+{
+ struct sched_entry *entry;
+ u32 interval = 0;
+
+ list_for_each_entry(entry, &sched->entries, list) {
+ entry->next_txtime = ktime_add_ns(base, interval);
+ interval += entry->interval;
+ }
+}
+
static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -715,6 +1020,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
struct tc_mqprio_qopt *mqprio = NULL;
+ u32 taprio_flags = 0;
int i, err, clockid;
unsigned long flags;
ktime_t start;
@@ -727,7 +1033,21 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
- err = taprio_parse_mqprio_opt(dev, mqprio, extack);
+ if (tb[TCA_TAPRIO_ATTR_FLAGS]) {
+ taprio_flags = nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]);
+
+ if (q->flags != 0 && q->flags != taprio_flags) {
+ NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported");
+ return -EOPNOTSUPP;
+ } else if (!FLAGS_VALID(taprio_flags)) {
+ NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid");
+ return -EINVAL;
+ }
+
+ q->flags = taprio_flags;
+ }
+
+ err = taprio_parse_mqprio_opt(dev, mqprio, extack, taprio_flags);
if (err < 0)
return err;
@@ -786,7 +1106,18 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
/* Protects against enqueue()/dequeue() */
spin_lock_bh(qdisc_lock(sch));
- if (!hrtimer_active(&q->advance_timer)) {
+ if (tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]) {
+ if (!TXTIME_ASSIST_IS_ENABLED(q->flags)) {
+ NL_SET_ERR_MSG_MOD(extack, "txtime-delay can only be set when txtime-assist mode is enabled");
+ err = -EINVAL;
+ goto unlock;
+ }
+
+ q->txtime_delay = nla_get_s32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]);
+ }
+
+ if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) &&
+ !hrtimer_active(&q->advance_timer)) {
hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
q->advance_timer.function = advance_sched;
}
@@ -806,16 +1137,16 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
switch (q->clockid) {
case CLOCK_REALTIME:
- q->get_time = ktime_get_real;
+ q->tk_offset = TK_OFFS_REAL;
break;
case CLOCK_MONOTONIC:
- q->get_time = ktime_get;
+ q->tk_offset = TK_OFFS_MAX;
break;
case CLOCK_BOOTTIME:
- q->get_time = ktime_get_boottime;
+ q->tk_offset = TK_OFFS_BOOT;
break;
case CLOCK_TAI:
- q->get_time = ktime_get_clocktai;
+ q->tk_offset = TK_OFFS_TAI;
break;
default:
NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
@@ -829,20 +1160,35 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
goto unlock;
}
- setup_first_close_time(q, new_admin, start);
+ if (TXTIME_ASSIST_IS_ENABLED(taprio_flags)) {
+ setup_txtime(q, new_admin, start);
- /* Protects against advance_sched() */
- spin_lock_irqsave(&q->current_entry_lock, flags);
+ if (!oper) {
+ rcu_assign_pointer(q->oper_sched, new_admin);
+ err = 0;
+ new_admin = NULL;
+ goto unlock;
+ }
- taprio_start_sched(sch, start, new_admin);
+ rcu_assign_pointer(q->admin_sched, new_admin);
+ if (admin)
+ call_rcu(&admin->rcu, taprio_free_sched_cb);
+ } else {
+ setup_first_close_time(q, new_admin, start);
- rcu_assign_pointer(q->admin_sched, new_admin);
- if (admin)
- call_rcu(&admin->rcu, taprio_free_sched_cb);
- new_admin = NULL;
+ /* Protects against advance_sched() */
+ spin_lock_irqsave(&q->current_entry_lock, flags);
- spin_unlock_irqrestore(&q->current_entry_lock, flags);
+ taprio_start_sched(sch, start, new_admin);
+ rcu_assign_pointer(q->admin_sched, new_admin);
+ if (admin)
+ call_rcu(&admin->rcu, taprio_free_sched_cb);
+
+ spin_unlock_irqrestore(&q->current_entry_lock, flags);
+ }
+
+ new_admin = NULL;
err = 0;
unlock:
@@ -1080,6 +1426,13 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
goto options_error;
+ if (q->flags && nla_put_u32(skb, TCA_TAPRIO_ATTR_FLAGS, q->flags))
+ goto options_error;
+
+ if (q->txtime_delay &&
+ nla_put_s32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
+ goto options_error;
+
if (oper && dump_schedule(skb, oper))
goto options_error;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index e358437ba29b..69cebb2c998b 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -118,10 +118,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
/* Initialize the bind addr area */
sctp_bind_addr_init(&ep->base.bind_addr, 0);
- /* Remember who we are attached to. */
- ep->base.sk = sk;
- sock_hold(ep->base.sk);
-
/* Create the lists of associations. */
INIT_LIST_HEAD(&ep->asocs);
@@ -154,6 +150,10 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
ep->prsctp_enable = net->sctp.prsctp_enable;
ep->reconf_enable = net->sctp.reconf_enable;
+ /* Remember who we are attached to. */
+ ep->base.sk = sk;
+ sock_hold(ep->base.sk);
+
return ep;
nomem_shkey:
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 0c874e996f85..302e355f2ebc 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -123,30 +123,11 @@ struct proto smc_proto6 = {
};
EXPORT_SYMBOL_GPL(smc_proto6);
-static int smc_release(struct socket *sock)
+static int __smc_release(struct smc_sock *smc)
{
- struct sock *sk = sock->sk;
- struct smc_sock *smc;
+ struct sock *sk = &smc->sk;
int rc = 0;
- if (!sk)
- goto out;
-
- smc = smc_sk(sk);
-
- /* cleanup for a dangling non-blocking connect */
- if (smc->connect_nonblock && sk->sk_state == SMC_INIT)
- tcp_abort(smc->clcsock->sk, ECONNABORTED);
- flush_work(&smc->connect_work);
-
- if (sk->sk_state == SMC_LISTEN)
- /* smc_close_non_accepted() is called and acquires
- * sock lock for child sockets again
- */
- lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
- else
- lock_sock(sk);
-
if (!smc->use_fallback) {
rc = smc_close_active(smc);
sock_set_flag(sk, SOCK_DEAD);
@@ -174,6 +155,35 @@ static int smc_release(struct socket *sock)
smc_conn_free(&smc->conn);
}
+ return rc;
+}
+
+static int smc_release(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ struct smc_sock *smc;
+ int rc = 0;
+
+ if (!sk)
+ goto out;
+
+ smc = smc_sk(sk);
+
+ /* cleanup for a dangling non-blocking connect */
+ if (smc->connect_nonblock && sk->sk_state == SMC_INIT)
+ tcp_abort(smc->clcsock->sk, ECONNABORTED);
+ flush_work(&smc->connect_work);
+
+ if (sk->sk_state == SMC_LISTEN)
+ /* smc_close_non_accepted() is called and acquires
+ * sock lock for child sockets again
+ */
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+ else
+ lock_sock(sk);
+
+ rc = __smc_release(smc);
+
/* detach socket */
sock_orphan(sk);
sock->sk = NULL;
@@ -964,26 +974,7 @@ void smc_close_non_accepted(struct sock *sk)
if (!sk->sk_lingertime)
/* wait for peer closing */
sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
- if (!smc->use_fallback) {
- smc_close_active(smc);
- sock_set_flag(sk, SOCK_DEAD);
- sk->sk_shutdown |= SHUTDOWN_MASK;
- }
- sk->sk_prot->unhash(sk);
- if (smc->clcsock) {
- struct socket *tcp;
-
- tcp = smc->clcsock;
- smc->clcsock = NULL;
- sock_release(tcp);
- }
- if (smc->use_fallback) {
- sock_put(sk); /* passive closing */
- sk->sk_state = SMC_CLOSED;
- } else {
- if (sk->sk_state == SMC_CLOSED)
- smc_conn_free(&smc->conn);
- }
+ __smc_release(smc);
release_sock(sk);
sock_put(sk); /* final sock_put */
}
@@ -2029,7 +2020,7 @@ static int __init smc_init(void)
rc = smc_pnet_init();
if (rc)
- return rc;
+ goto out_pernet_subsys;
rc = smc_llc_init();
if (rc) {
@@ -2080,6 +2071,9 @@ out_proto:
proto_unregister(&smc_proto);
out_pnet:
smc_pnet_exit();
+out_pernet_subsys:
+ unregister_pernet_subsys(&smc_net_ops);
+
return rc;
}
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 2d2850adc2a3..4ca50ddf8d16 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -652,7 +652,10 @@ create:
rc = smc_lgr_create(smc, ini);
if (rc)
goto out;
+ lgr = conn->lgr;
+ write_lock_bh(&lgr->conns_lock);
smc_lgr_register_conn(conn); /* add smc conn to lgr */
+ write_unlock_bh(&lgr->conns_lock);
}
conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
diff --git a/net/socket.c b/net/socket.c
index 963df5dbdd54..d97b74f762e8 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -103,13 +103,6 @@
#include <net/busy_poll.h>
#include <linux/errqueue.h>
-/* proto_ops for ipv4 and ipv6 use the same {recv,send}msg function */
-#if IS_ENABLED(CONFIG_INET)
-#define INDIRECT_CALL_INET4(f, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__)
-#else
-#define INDIRECT_CALL_INET4(f, f1, ...) f(__VA_ARGS__)
-#endif
-
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sysctl_net_busy_read __read_mostly;
unsigned int sysctl_net_busy_poll __read_mostly;
@@ -641,10 +634,13 @@ EXPORT_SYMBOL(__sock_tx_timestamp);
INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
size_t));
+INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
+ size_t));
static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
{
- int ret = INDIRECT_CALL_INET4(sock->ops->sendmsg, inet_sendmsg, sock,
- msg, msg_data_left(msg));
+ int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
+ inet_sendmsg, sock, msg,
+ msg_data_left(msg));
BUG_ON(ret == -EIOCBQUEUED);
return ret;
}
@@ -870,12 +866,15 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
- size_t , int ));
+ size_t, int));
+INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
+ size_t, int));
static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
int flags)
{
- return INDIRECT_CALL_INET4(sock->ops->recvmsg, inet_recvmsg, sock, msg,
- msg_data_left(msg), flags);
+ return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
+ inet_recvmsg, sock, msg, msg_data_left(msg),
+ flags);
}
/**
@@ -2051,6 +2050,8 @@ SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
static int __sys_setsockopt(int fd, int level, int optname,
char __user *optval, int optlen)
{
+ mm_segment_t oldfs = get_fs();
+ char *kernel_optval = NULL;
int err, fput_needed;
struct socket *sock;
@@ -2063,6 +2064,22 @@ static int __sys_setsockopt(int fd, int level, int optname,
if (err)
goto out_put;
+ err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level,
+ &optname, optval, &optlen,
+ &kernel_optval);
+
+ if (err < 0) {
+ goto out_put;
+ } else if (err > 0) {
+ err = 0;
+ goto out_put;
+ }
+
+ if (kernel_optval) {
+ set_fs(KERNEL_DS);
+ optval = (char __user __force *)kernel_optval;
+ }
+
if (level == SOL_SOCKET)
err =
sock_setsockopt(sock, level, optname, optval,
@@ -2071,6 +2088,11 @@ static int __sys_setsockopt(int fd, int level, int optname,
err =
sock->ops->setsockopt(sock, level, optname, optval,
optlen);
+
+ if (kernel_optval) {
+ set_fs(oldfs);
+ kfree(kernel_optval);
+ }
out_put:
fput_light(sock->file, fput_needed);
}
@@ -2093,6 +2115,7 @@ static int __sys_getsockopt(int fd, int level, int optname,
{
int err, fput_needed;
struct socket *sock;
+ int max_optlen;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock != NULL) {
@@ -2100,6 +2123,8 @@ static int __sys_getsockopt(int fd, int level, int optname,
if (err)
goto out_put;
+ max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
+
if (level == SOL_SOCKET)
err =
sock_getsockopt(sock, level, optname, optval,
@@ -2108,6 +2133,10 @@ static int __sys_getsockopt(int fd, int level, int optname,
err =
sock->ops->getsockopt(sock, level, optname, optval,
optlen);
+
+ err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
+ optval, optlen,
+ max_optlen, err);
out_put:
fput_light(sock->file, fput_needed);
}
diff --git a/net/tipc/core.c b/net/tipc/core.c
index ed536c05252a..c8370722f0bb 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -134,7 +134,7 @@ static int __init tipc_init(void)
if (err)
goto out_sysctl;
- err = register_pernet_subsys(&tipc_net_ops);
+ err = register_pernet_device(&tipc_net_ops);
if (err)
goto out_pernet;
@@ -142,7 +142,7 @@ static int __init tipc_init(void)
if (err)
goto out_socket;
- err = register_pernet_subsys(&tipc_topsrv_net_ops);
+ err = register_pernet_device(&tipc_topsrv_net_ops);
if (err)
goto out_pernet_topsrv;
@@ -153,11 +153,11 @@ static int __init tipc_init(void)
pr_info("Started in single node mode\n");
return 0;
out_bearer:
- unregister_pernet_subsys(&tipc_topsrv_net_ops);
+ unregister_pernet_device(&tipc_topsrv_net_ops);
out_pernet_topsrv:
tipc_socket_stop();
out_socket:
- unregister_pernet_subsys(&tipc_net_ops);
+ unregister_pernet_device(&tipc_net_ops);
out_pernet:
tipc_unregister_sysctl();
out_sysctl:
@@ -172,9 +172,9 @@ out_netlink:
static void __exit tipc_exit(void)
{
tipc_bearer_cleanup();
- unregister_pernet_subsys(&tipc_topsrv_net_ops);
+ unregister_pernet_device(&tipc_topsrv_net_ops);
tipc_socket_stop();
- unregister_pernet_subsys(&tipc_net_ops);
+ unregister_pernet_device(&tipc_net_ops);
tipc_netlink_stop();
tipc_netlink_compat_stop();
tipc_unregister_sysctl();
diff --git a/net/tipc/link.c b/net/tipc/link.c
index f8bf63befe1f..66d3a07bc571 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -207,7 +207,7 @@ enum {
BC_NACK_SND_SUPPRESS,
};
-#define TIPC_BC_RETR_LIM msecs_to_jiffies(10) /* [ms] */
+#define TIPC_BC_RETR_LIM (jiffies + msecs_to_jiffies(10))
#define TIPC_UC_RETR_TIME (jiffies + msecs_to_jiffies(1))
/*
@@ -976,8 +976,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
__skb_queue_tail(transmq, skb);
/* next retransmit attempt */
if (link_is_bc_sndlink(l))
- TIPC_SKB_CB(skb)->nxt_retr =
- jiffies + TIPC_BC_RETR_LIM;
+ TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
__skb_queue_tail(xmitq, _skb);
TIPC_SKB_CB(skb)->ackers = l->ackers;
l->rcv_unacked = 0;
@@ -1027,7 +1026,7 @@ static void tipc_link_advance_backlog(struct tipc_link *l,
__skb_queue_tail(&l->transmq, skb);
/* next retransmit attempt */
if (link_is_bc_sndlink(l))
- TIPC_SKB_CB(skb)->nxt_retr = jiffies + TIPC_BC_RETR_LIM;
+ TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
__skb_queue_tail(xmitq, _skb);
TIPC_SKB_CB(skb)->ackers = l->ackers;
@@ -1123,7 +1122,7 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
if (link_is_bc_sndlink(l)) {
if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
continue;
- TIPC_SKB_CB(skb)->nxt_retr = jiffies + TIPC_BC_RETR_LIM;
+ TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
}
_skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE, GFP_ATOMIC);
if (!_skb)
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 20783ccab794..d86030ef1232 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -445,7 +445,11 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd,
if (!bearer)
return -EMSGSIZE;
- len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME);
+ len = TLV_GET_DATA_LEN(msg->req);
+ if (len <= 0)
+ return -EINVAL;
+
+ len = min_t(int, len, TIPC_MAX_BEARER_NAME);
if (!string_is_valid(name, len))
return -EINVAL;
@@ -539,7 +543,11 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
name = (char *)TLV_DATA(msg->req);
- len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME);
+ len = TLV_GET_DATA_LEN(msg->req);
+ if (len <= 0)
+ return -EINVAL;
+
+ len = min_t(int, len, TIPC_MAX_BEARER_NAME);
if (!string_is_valid(name, len))
return -EINVAL;
@@ -807,7 +815,11 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd,
if (!link)
return -EMSGSIZE;
- len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME);
+ len = TLV_GET_DATA_LEN(msg->req);
+ if (len <= 0)
+ return -EINVAL;
+
+ len = min_t(int, len, TIPC_MAX_BEARER_NAME);
if (!string_is_valid(name, len))
return -EINVAL;
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 1405ccc9101c..c0f694ae57ab 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -76,6 +76,7 @@ struct udp_media_addr {
/* struct udp_replicast - container for UDP remote addresses */
struct udp_replicast {
struct udp_media_addr addr;
+ struct dst_cache dst_cache;
struct rcu_head rcu;
struct list_head list;
};
@@ -158,22 +159,27 @@ static int tipc_udp_addr2msg(char *msg, struct tipc_media_addr *a)
/* tipc_send_msg - enqueue a send request */
static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
struct udp_bearer *ub, struct udp_media_addr *src,
- struct udp_media_addr *dst)
+ struct udp_media_addr *dst, struct dst_cache *cache)
{
+ struct dst_entry *ndst = dst_cache_get(cache);
int ttl, err = 0;
- struct rtable *rt;
if (dst->proto == htons(ETH_P_IP)) {
- struct flowi4 fl = {
- .daddr = dst->ipv4.s_addr,
- .saddr = src->ipv4.s_addr,
- .flowi4_mark = skb->mark,
- .flowi4_proto = IPPROTO_UDP
- };
- rt = ip_route_output_key(net, &fl);
- if (IS_ERR(rt)) {
- err = PTR_ERR(rt);
- goto tx_error;
+ struct rtable *rt = (struct rtable *)ndst;
+
+ if (!rt) {
+ struct flowi4 fl = {
+ .daddr = dst->ipv4.s_addr,
+ .saddr = src->ipv4.s_addr,
+ .flowi4_mark = skb->mark,
+ .flowi4_proto = IPPROTO_UDP
+ };
+ rt = ip_route_output_key(net, &fl);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ goto tx_error;
+ }
+ dst_cache_set_ip4(cache, &rt->dst, fl.saddr);
}
ttl = ip4_dst_hoplimit(&rt->dst);
@@ -182,17 +188,19 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
dst->port, false, true);
#if IS_ENABLED(CONFIG_IPV6)
} else {
- struct dst_entry *ndst;
- struct flowi6 fl6 = {
- .flowi6_oif = ub->ifindex,
- .daddr = dst->ipv6,
- .saddr = src->ipv6,
- .flowi6_proto = IPPROTO_UDP
- };
- err = ipv6_stub->ipv6_dst_lookup(net, ub->ubsock->sk, &ndst,
- &fl6);
- if (err)
- goto tx_error;
+ if (!ndst) {
+ struct flowi6 fl6 = {
+ .flowi6_oif = ub->ifindex,
+ .daddr = dst->ipv6,
+ .saddr = src->ipv6,
+ .flowi6_proto = IPPROTO_UDP
+ };
+ err = ipv6_stub->ipv6_dst_lookup(net, ub->ubsock->sk,
+ &ndst, &fl6);
+ if (err)
+ goto tx_error;
+ dst_cache_set_ip6(cache, ndst, &fl6.saddr);
+ }
ttl = ip6_dst_hoplimit(ndst);
err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL,
&src->ipv6, &dst->ipv6, 0, ttl, 0,
@@ -230,7 +238,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
}
if (addr->broadcast != TIPC_REPLICAST_SUPPORT)
- return tipc_udp_xmit(net, skb, ub, src, dst);
+ return tipc_udp_xmit(net, skb, ub, src, dst,
+ &ub->rcast.dst_cache);
/* Replicast, send an skb to each configured IP address */
list_for_each_entry_rcu(rcast, &ub->rcast.list, list) {
@@ -242,7 +251,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
goto out;
}
- err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr);
+ err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr,
+ &rcast->dst_cache);
if (err)
goto out;
}
@@ -286,6 +296,11 @@ static int tipc_udp_rcast_add(struct tipc_bearer *b,
if (!rcast)
return -ENOMEM;
+ if (dst_cache_init(&rcast->dst_cache, GFP_ATOMIC)) {
+ kfree(rcast);
+ return -ENOMEM;
+ }
+
memcpy(&rcast->addr, addr, sizeof(struct udp_media_addr));
if (ntohs(addr->proto) == ETH_P_IP)
@@ -742,6 +757,10 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
tuncfg.encap_destroy = NULL;
setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg);
+ err = dst_cache_init(&ub->rcast.dst_cache, GFP_ATOMIC);
+ if (err)
+ goto free;
+
/**
* The bcast media address port is used for all peers and the ip
* is used if it's a multicast address.
@@ -752,12 +771,14 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
else
err = tipc_udp_rcast_add(b, &remote);
if (err)
- goto err;
+ goto free;
return 0;
+
+free:
+ dst_cache_destroy(&ub->rcast.dst_cache);
+ udp_tunnel_sock_release(ub->ubsock);
err:
- if (ub->ubsock)
- udp_tunnel_sock_release(ub->ubsock);
kfree(ub);
return err;
}
@@ -769,12 +790,13 @@ static void cleanup_bearer(struct work_struct *work)
struct udp_replicast *rcast, *tmp;
list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
+ dst_cache_destroy(&rcast->dst_cache);
list_del_rcu(&rcast->list);
kfree_rcu(rcast, rcu);
}
- if (ub->ubsock)
- udp_tunnel_sock_release(ub->ubsock);
+ dst_cache_destroy(&ub->rcast.dst_cache);
+ udp_tunnel_sock_release(ub->ubsock);
synchronize_net();
kfree(ub);
}
@@ -789,8 +811,7 @@ static void tipc_udp_disable(struct tipc_bearer *b)
pr_err("UDP bearer instance not found\n");
return;
}
- if (ub->ubsock)
- sock_set_flag(ub->ubsock->sk, SOCK_DEAD);
+ sock_set_flag(ub->ubsock->sk, SOCK_DEAD);
RCU_INIT_POINTER(ub->bearer, NULL);
/* sock_release need to be done outside of rtnl lock */
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index fc81ae18cc44..e2b69e805d46 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -279,7 +279,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
goto skip_tx_cleanup;
}
- if (!tls_complete_pending_work(sk, ctx, 0, &timeo))
+ if (unlikely(sk->sk_write_pending) &&
+ !wait_on_pending_writer(sk, &timeo))
tls_handle_open_record(sk, 0);
/* We need these for tls_sw_fallback handling of other packets */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index a14e8864e4fa..74417a851ed5 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -37,6 +37,12 @@ bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
READ_ONCE(xs->umem->fq);
}
+bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt)
+{
+ return xskq_has_addrs(umem->fq, cnt);
+}
+EXPORT_SYMBOL(xsk_umem_has_addrs);
+
u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
{
return xskq_peek_addr(umem->fq, addr);
@@ -166,22 +172,18 @@ void xsk_umem_consume_tx_done(struct xdp_umem *umem)
}
EXPORT_SYMBOL(xsk_umem_consume_tx_done);
-bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len)
+bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
{
- struct xdp_desc desc;
struct xdp_sock *xs;
rcu_read_lock();
list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
- if (!xskq_peek_desc(xs->tx, &desc))
+ if (!xskq_peek_desc(xs->tx, desc))
continue;
- if (xskq_produce_addr_lazy(umem->cq, desc.addr))
+ if (xskq_produce_addr_lazy(umem->cq, desc->addr))
goto out;
- *dma = xdp_umem_get_dma(umem, desc.addr);
- *len = desc.len;
-
xskq_discard_desc(xs->tx);
rcu_read_unlock();
return true;
@@ -644,6 +646,26 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
return 0;
}
+ case XDP_OPTIONS:
+ {
+ struct xdp_options opts = {};
+
+ if (len < sizeof(opts))
+ return -EINVAL;
+
+ mutex_lock(&xs->mutex);
+ if (xs->zc)
+ opts.flags |= XDP_OPTIONS_ZEROCOPY;
+ mutex_unlock(&xs->mutex);
+
+ len = sizeof(opts);
+ if (copy_to_user(optval, &opts, len))
+ return -EFAULT;
+ if (put_user(len, optlen))
+ return -EFAULT;
+
+ return 0;
+ }
default:
break;
}
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 88b9ae24658d..12b49784a6d5 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -117,6 +117,20 @@ static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt)
return q->nentries - (producer - q->cons_tail);
}
+static inline bool xskq_has_addrs(struct xsk_queue *q, u32 cnt)
+{
+ u32 entries = q->prod_tail - q->cons_tail;
+
+ if (entries >= cnt)
+ return true;
+
+ /* Refresh the local pointer. */
+ q->prod_tail = READ_ONCE(q->ring->producer);
+ entries = q->prod_tail - q->cons_tail;
+
+ return entries >= cnt;
+}
+
/* UMEM queue */
static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index ff654306d836..189ef15acbbc 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -271,9 +271,8 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
return false;
if ((!dev || (dev == xfrm_dst_path(dst)->dev)) &&
- (!xdst->child->xfrm && x->type->get_mtu)) {
- mtu = x->type->get_mtu(x, xdst->child_mtu_cached);
-
+ (!xdst->child->xfrm)) {
+ mtu = xfrm_state_mtu(x, xdst->child_mtu_cached);
if (skb->len <= mtu)
goto ok;
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 314973aaa414..6088bc2dc11e 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -359,28 +359,29 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family);
if (likely(afinfo))
err = afinfo->extract_input(x, skb);
+ rcu_read_unlock();
- if (err) {
- rcu_read_unlock();
+ if (err)
return err;
- }
if (x->sel.family == AF_UNSPEC) {
inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
- if (!inner_mode) {
- rcu_read_unlock();
+ if (!inner_mode)
return -EAFNOSUPPORT;
- }
}
- afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
- if (unlikely(!afinfo)) {
- rcu_read_unlock();
- return -EAFNOSUPPORT;
+ switch (inner_mode->family) {
+ case AF_INET:
+ skb->protocol = htons(ETH_P_IP);
+ break;
+ case AF_INET6:
+ skb->protocol = htons(ETH_P_IPV6);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
}
- skb->protocol = afinfo->eth_proto;
- rcu_read_unlock();
return xfrm_inner_mode_encap_remove(x, inner_mode, skb);
}
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index ad3a2555c517..f8eb9e342173 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -793,11 +793,6 @@ static void __net_exit xfrmi_destroy_interfaces(struct xfrmi_net *xfrmn)
unregister_netdevice_many(&list);
}
-static int __net_init xfrmi_init_net(struct net *net)
-{
- return 0;
-}
-
static void __net_exit xfrmi_exit_net(struct net *net)
{
struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
@@ -808,7 +803,6 @@ static void __net_exit xfrmi_exit_net(struct net *net)
}
static struct pernet_operations xfrmi_net_ops = {
- .init = xfrmi_init_net,
.exit = xfrmi_exit_net,
.id = &xfrmi_net_id,
.size = sizeof(struct xfrmi_net),
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b1694d5d15d3..1070dfece76b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3628,7 +3628,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
}
xfrm_nr = ti;
if (npols > 1) {
- xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net);
+ xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
tpp = stp;
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 50621d982970..c6f3c4a1bd99 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -27,6 +27,8 @@
#include <linux/interrupt.h>
#include <linux/kernel.h>
+#include <crypto/aead.h>
+
#include "xfrm_hash.h"
#define xfrm_state_deref_prot(table, net) \
@@ -177,63 +179,132 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
static bool km_is_alive(const struct km_event *c);
void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
-static DEFINE_SPINLOCK(xfrm_type_lock);
int xfrm_register_type(const struct xfrm_type *type, unsigned short family)
{
struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
- const struct xfrm_type **typemap;
int err = 0;
- if (unlikely(afinfo == NULL))
+ if (!afinfo)
return -EAFNOSUPPORT;
- typemap = afinfo->type_map;
- spin_lock_bh(&xfrm_type_lock);
- if (likely(typemap[type->proto] == NULL))
- typemap[type->proto] = type;
- else
- err = -EEXIST;
- spin_unlock_bh(&xfrm_type_lock);
+#define X(afi, T, name) do { \
+ WARN_ON((afi)->type_ ## name); \
+ (afi)->type_ ## name = (T); \
+ } while (0)
+
+ switch (type->proto) {
+ case IPPROTO_COMP:
+ X(afinfo, type, comp);
+ break;
+ case IPPROTO_AH:
+ X(afinfo, type, ah);
+ break;
+ case IPPROTO_ESP:
+ X(afinfo, type, esp);
+ break;
+ case IPPROTO_IPIP:
+ X(afinfo, type, ipip);
+ break;
+ case IPPROTO_DSTOPTS:
+ X(afinfo, type, dstopts);
+ break;
+ case IPPROTO_ROUTING:
+ X(afinfo, type, routing);
+ break;
+ case IPPROTO_IPV6:
+ X(afinfo, type, ipip6);
+ break;
+ default:
+ WARN_ON(1);
+ err = -EPROTONOSUPPORT;
+ break;
+ }
+#undef X
rcu_read_unlock();
return err;
}
EXPORT_SYMBOL(xfrm_register_type);
-int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
+void xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
{
struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
- const struct xfrm_type **typemap;
- int err = 0;
if (unlikely(afinfo == NULL))
- return -EAFNOSUPPORT;
- typemap = afinfo->type_map;
- spin_lock_bh(&xfrm_type_lock);
+ return;
- if (unlikely(typemap[type->proto] != type))
- err = -ENOENT;
- else
- typemap[type->proto] = NULL;
- spin_unlock_bh(&xfrm_type_lock);
+#define X(afi, T, name) do { \
+ WARN_ON((afi)->type_ ## name != (T)); \
+ (afi)->type_ ## name = NULL; \
+ } while (0)
+
+ switch (type->proto) {
+ case IPPROTO_COMP:
+ X(afinfo, type, comp);
+ break;
+ case IPPROTO_AH:
+ X(afinfo, type, ah);
+ break;
+ case IPPROTO_ESP:
+ X(afinfo, type, esp);
+ break;
+ case IPPROTO_IPIP:
+ X(afinfo, type, ipip);
+ break;
+ case IPPROTO_DSTOPTS:
+ X(afinfo, type, dstopts);
+ break;
+ case IPPROTO_ROUTING:
+ X(afinfo, type, routing);
+ break;
+ case IPPROTO_IPV6:
+ X(afinfo, type, ipip6);
+ break;
+ default:
+ WARN_ON(1);
+ break;
+ }
+#undef X
rcu_read_unlock();
- return err;
}
EXPORT_SYMBOL(xfrm_unregister_type);
static const struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
{
+ const struct xfrm_type *type = NULL;
struct xfrm_state_afinfo *afinfo;
- const struct xfrm_type **typemap;
- const struct xfrm_type *type;
int modload_attempted = 0;
retry:
afinfo = xfrm_state_get_afinfo(family);
if (unlikely(afinfo == NULL))
return NULL;
- typemap = afinfo->type_map;
- type = READ_ONCE(typemap[proto]);
+ switch (proto) {
+ case IPPROTO_COMP:
+ type = afinfo->type_comp;
+ break;
+ case IPPROTO_AH:
+ type = afinfo->type_ah;
+ break;
+ case IPPROTO_ESP:
+ type = afinfo->type_esp;
+ break;
+ case IPPROTO_IPIP:
+ type = afinfo->type_ipip;
+ break;
+ case IPPROTO_DSTOPTS:
+ type = afinfo->type_dstopts;
+ break;
+ case IPPROTO_ROUTING:
+ type = afinfo->type_routing;
+ break;
+ case IPPROTO_IPV6:
+ type = afinfo->type_ipip6;
+ break;
+ default:
+ break;
+ }
+
if (unlikely(type && !try_module_get(type->owner)))
type = NULL;
@@ -253,65 +324,71 @@ static void xfrm_put_type(const struct xfrm_type *type)
module_put(type->owner);
}
-static DEFINE_SPINLOCK(xfrm_type_offload_lock);
int xfrm_register_type_offload(const struct xfrm_type_offload *type,
unsigned short family)
{
struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
- const struct xfrm_type_offload **typemap;
int err = 0;
if (unlikely(afinfo == NULL))
return -EAFNOSUPPORT;
- typemap = afinfo->type_offload_map;
- spin_lock_bh(&xfrm_type_offload_lock);
- if (likely(typemap[type->proto] == NULL))
- typemap[type->proto] = type;
- else
- err = -EEXIST;
- spin_unlock_bh(&xfrm_type_offload_lock);
+ switch (type->proto) {
+ case IPPROTO_ESP:
+ WARN_ON(afinfo->type_offload_esp);
+ afinfo->type_offload_esp = type;
+ break;
+ default:
+ WARN_ON(1);
+ err = -EPROTONOSUPPORT;
+ break;
+ }
+
rcu_read_unlock();
return err;
}
EXPORT_SYMBOL(xfrm_register_type_offload);
-int xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
- unsigned short family)
+void xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
+ unsigned short family)
{
struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
- const struct xfrm_type_offload **typemap;
- int err = 0;
if (unlikely(afinfo == NULL))
- return -EAFNOSUPPORT;
- typemap = afinfo->type_offload_map;
- spin_lock_bh(&xfrm_type_offload_lock);
+ return;
- if (unlikely(typemap[type->proto] != type))
- err = -ENOENT;
- else
- typemap[type->proto] = NULL;
- spin_unlock_bh(&xfrm_type_offload_lock);
+ switch (type->proto) {
+ case IPPROTO_ESP:
+ WARN_ON(afinfo->type_offload_esp != type);
+ afinfo->type_offload_esp = NULL;
+ break;
+ default:
+ WARN_ON(1);
+ break;
+ }
rcu_read_unlock();
- return err;
}
EXPORT_SYMBOL(xfrm_unregister_type_offload);
static const struct xfrm_type_offload *
xfrm_get_type_offload(u8 proto, unsigned short family, bool try_load)
{
+ const struct xfrm_type_offload *type = NULL;
struct xfrm_state_afinfo *afinfo;
- const struct xfrm_type_offload **typemap;
- const struct xfrm_type_offload *type;
retry:
afinfo = xfrm_state_get_afinfo(family);
if (unlikely(afinfo == NULL))
return NULL;
- typemap = afinfo->type_offload_map;
- type = typemap[proto];
+ switch (proto) {
+ case IPPROTO_ESP:
+ type = afinfo->type_offload_esp;
+ break;
+ default:
+ break;
+ }
+
if ((type && !try_module_get(type->owner)))
type = NULL;
@@ -770,24 +847,79 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
EXPORT_SYMBOL(xfrm_sad_getinfo);
static void
+__xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
+{
+ const struct flowi4 *fl4 = &fl->u.ip4;
+
+ sel->daddr.a4 = fl4->daddr;
+ sel->saddr.a4 = fl4->saddr;
+ sel->dport = xfrm_flowi_dport(fl, &fl4->uli);
+ sel->dport_mask = htons(0xffff);
+ sel->sport = xfrm_flowi_sport(fl, &fl4->uli);
+ sel->sport_mask = htons(0xffff);
+ sel->family = AF_INET;
+ sel->prefixlen_d = 32;
+ sel->prefixlen_s = 32;
+ sel->proto = fl4->flowi4_proto;
+ sel->ifindex = fl4->flowi4_oif;
+}
+
+static void
+__xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
+{
+ const struct flowi6 *fl6 = &fl->u.ip6;
+
+ /* Initialize temporary selector matching only to current session. */
+ *(struct in6_addr *)&sel->daddr = fl6->daddr;
+ *(struct in6_addr *)&sel->saddr = fl6->saddr;
+ sel->dport = xfrm_flowi_dport(fl, &fl6->uli);
+ sel->dport_mask = htons(0xffff);
+ sel->sport = xfrm_flowi_sport(fl, &fl6->uli);
+ sel->sport_mask = htons(0xffff);
+ sel->family = AF_INET6;
+ sel->prefixlen_d = 128;
+ sel->prefixlen_s = 128;
+ sel->proto = fl6->flowi6_proto;
+ sel->ifindex = fl6->flowi6_oif;
+}
+
+static void
xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
const struct xfrm_tmpl *tmpl,
const xfrm_address_t *daddr, const xfrm_address_t *saddr,
unsigned short family)
{
- struct xfrm_state_afinfo *afinfo = xfrm_state_afinfo_get_rcu(family);
-
- if (!afinfo)
- return;
+ switch (family) {
+ case AF_INET:
+ __xfrm4_init_tempsel(&x->sel, fl);
+ break;
+ case AF_INET6:
+ __xfrm6_init_tempsel(&x->sel, fl);
+ break;
+ }
- afinfo->init_tempsel(&x->sel, fl);
+ x->id = tmpl->id;
- if (family != tmpl->encap_family) {
- afinfo = xfrm_state_afinfo_get_rcu(tmpl->encap_family);
- if (!afinfo)
- return;
+ switch (tmpl->encap_family) {
+ case AF_INET:
+ if (x->id.daddr.a4 == 0)
+ x->id.daddr.a4 = daddr->a4;
+ x->props.saddr = tmpl->saddr;
+ if (x->props.saddr.a4 == 0)
+ x->props.saddr.a4 = saddr->a4;
+ break;
+ case AF_INET6:
+ if (ipv6_addr_any((struct in6_addr *)&x->id.daddr))
+ memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr));
+ memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
+ if (ipv6_addr_any((struct in6_addr *)&x->props.saddr))
+ memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
+ break;
}
- afinfo->init_temprop(x, tmpl, daddr, saddr);
+
+ x->props.mode = tmpl->mode;
+ x->props.reqid = tmpl->reqid;
+ x->props.family = tmpl->encap_family;
}
static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
@@ -1633,51 +1765,129 @@ xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
EXPORT_SYMBOL(xfrm_find_acq);
#ifdef CONFIG_XFRM_SUB_POLICY
-int
+#if IS_ENABLED(CONFIG_IPV6)
+/* distribution counting sort function for xfrm_state and xfrm_tmpl */
+static void
+__xfrm6_sort(void **dst, void **src, int n,
+ int (*cmp)(const void *p), int maxclass)
+{
+ int count[XFRM_MAX_DEPTH] = { };
+ int class[XFRM_MAX_DEPTH];
+ int i;
+
+ for (i = 0; i < n; i++) {
+ int c = cmp(src[i]);
+
+ class[i] = c;
+ count[c]++;
+ }
+
+ for (i = 2; i < maxclass; i++)
+ count[i] += count[i - 1];
+
+ for (i = 0; i < n; i++) {
+ dst[count[class[i] - 1]++] = src[i];
+ src[i] = NULL;
+ }
+}
+
+/* Rule for xfrm_state:
+ *
+ * rule 1: select IPsec transport except AH
+ * rule 2: select MIPv6 RO or inbound trigger
+ * rule 3: select IPsec transport AH
+ * rule 4: select IPsec tunnel
+ * rule 5: others
+ */
+static int __xfrm6_state_sort_cmp(const void *p)
+{
+ const struct xfrm_state *v = p;
+
+ switch (v->props.mode) {
+ case XFRM_MODE_TRANSPORT:
+ if (v->id.proto != IPPROTO_AH)
+ return 1;
+ else
+ return 3;
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+ case XFRM_MODE_ROUTEOPTIMIZATION:
+ case XFRM_MODE_IN_TRIGGER:
+ return 2;
+#endif
+ case XFRM_MODE_TUNNEL:
+ case XFRM_MODE_BEET:
+ return 4;
+ }
+ return 5;
+}
+
+/* Rule for xfrm_tmpl:
+ *
+ * rule 1: select IPsec transport
+ * rule 2: select MIPv6 RO or inbound trigger
+ * rule 3: select IPsec tunnel
+ * rule 4: others
+ */
+static int __xfrm6_tmpl_sort_cmp(const void *p)
+{
+ const struct xfrm_tmpl *v = p;
+
+ switch (v->mode) {
+ case XFRM_MODE_TRANSPORT:
+ return 1;
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+ case XFRM_MODE_ROUTEOPTIMIZATION:
+ case XFRM_MODE_IN_TRIGGER:
+ return 2;
+#endif
+ case XFRM_MODE_TUNNEL:
+ case XFRM_MODE_BEET:
+ return 3;
+ }
+ return 4;
+}
+#else
+static inline int __xfrm6_state_sort_cmp(const void *p) { return 5; }
+static inline int __xfrm6_tmpl_sort_cmp(const void *p) { return 4; }
+
+static inline void
+__xfrm6_sort(void **dst, void **src, int n,
+ int (*cmp)(const void *p), int maxclass)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ dst[i] = src[i];
+}
+#endif /* CONFIG_IPV6 */
+
+void
xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
- unsigned short family, struct net *net)
+ unsigned short family)
{
int i;
- int err = 0;
- struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
- if (!afinfo)
- return -EAFNOSUPPORT;
- spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/
- if (afinfo->tmpl_sort)
- err = afinfo->tmpl_sort(dst, src, n);
+ if (family == AF_INET6)
+ __xfrm6_sort((void **)dst, (void **)src, n,
+ __xfrm6_tmpl_sort_cmp, 5);
else
for (i = 0; i < n; i++)
dst[i] = src[i];
- spin_unlock_bh(&net->xfrm.xfrm_state_lock);
- rcu_read_unlock();
- return err;
}
-EXPORT_SYMBOL(xfrm_tmpl_sort);
-int
+void
xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
unsigned short family)
{
int i;
- int err = 0;
- struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
- struct net *net = xs_net(*src);
-
- if (!afinfo)
- return -EAFNOSUPPORT;
- spin_lock_bh(&net->xfrm.xfrm_state_lock);
- if (afinfo->state_sort)
- err = afinfo->state_sort(dst, src, n);
+ if (family == AF_INET6)
+ __xfrm6_sort((void **)dst, (void **)src, n,
+ __xfrm6_state_sort_cmp, 6);
else
for (i = 0; i < n; i++)
dst[i] = src[i];
- spin_unlock_bh(&net->xfrm.xfrm_state_lock);
- rcu_read_unlock();
- return err;
}
-EXPORT_SYMBOL(xfrm_state_sort);
#endif
/* Silly enough, but I'm lazy to build resolution list */
@@ -2195,38 +2405,49 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x)
}
EXPORT_SYMBOL(xfrm_state_delete_tunnel);
-int xfrm_state_mtu(struct xfrm_state *x, int mtu)
+u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
{
const struct xfrm_type *type = READ_ONCE(x->type);
+ struct crypto_aead *aead;
+ u32 blksize, net_adj = 0;
+
+ if (x->km.state != XFRM_STATE_VALID ||
+ !type || type->proto != IPPROTO_ESP)
+ return mtu - x->props.header_len;
+
+ aead = x->data;
+ blksize = ALIGN(crypto_aead_blocksize(aead), 4);
- if (x->km.state == XFRM_STATE_VALID &&
- type && type->get_mtu)
- return type->get_mtu(x, mtu);
+ switch (x->props.mode) {
+ case XFRM_MODE_TRANSPORT:
+ case XFRM_MODE_BEET:
+ if (x->props.family == AF_INET)
+ net_adj = sizeof(struct iphdr);
+ else if (x->props.family == AF_INET6)
+ net_adj = sizeof(struct ipv6hdr);
+ break;
+ case XFRM_MODE_TUNNEL:
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
- return mtu - x->props.header_len;
+ return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
+ net_adj) & ~(blksize - 1)) + net_adj - 2;
}
+EXPORT_SYMBOL_GPL(xfrm_state_mtu);
int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
{
- const struct xfrm_state_afinfo *afinfo;
const struct xfrm_mode *inner_mode;
const struct xfrm_mode *outer_mode;
int family = x->props.family;
int err;
- err = -EAFNOSUPPORT;
- afinfo = xfrm_state_get_afinfo(family);
- if (!afinfo)
- goto error;
-
- err = 0;
- if (afinfo->init_flags)
- err = afinfo->init_flags(x);
-
- rcu_read_unlock();
-
- if (err)
- goto error;
+ if (family == AF_INET &&
+ xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)
+ x->props.flags |= XFRM_STATE_NOPMTUDISC;
err = -EPROTONOSUPPORT;
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 0917f8cf4fab..f90daadfbc89 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -154,6 +154,7 @@ always += tcp_iw_kern.o
always += tcp_clamp_kern.o
always += tcp_basertt_kern.o
always += tcp_tos_reflect_kern.o
+always += tcp_dumpstats_kern.o
always += xdp_redirect_kern.o
always += xdp_redirect_map_kern.o
always += xdp_redirect_cpu_kern.o
@@ -168,6 +169,7 @@ always += task_fd_query_kern.o
always += xdp_sample_pkts_kern.o
always += ibumad_kern.o
always += hbm_out_kern.o
+always += hbm_edt_kern.o
KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include
KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/bpf/
@@ -272,6 +274,7 @@ $(src)/*.c: verify_target_bpf $(LIBBPF)
$(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
$(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
$(obj)/hbm.o: $(src)/hbm.h
+$(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
# But, there is no easy way to fix it, so just exclude it since it is
diff --git a/samples/bpf/do_hbm_test.sh b/samples/bpf/do_hbm_test.sh
index e48b047d4646..ffe4c0607341 100755
--- a/samples/bpf/do_hbm_test.sh
+++ b/samples/bpf/do_hbm_test.sh
@@ -14,7 +14,7 @@ Usage() {
echo "loads. The output is the goodput in Mbps (unless -D was used)."
echo ""
echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]"
- echo " [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E]"
+ echo " [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E] [--edt]"
echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]"
echo " [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]"
echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]"
@@ -30,6 +30,7 @@ Usage() {
echo " other detailed information. This information is"
echo " test dependent (i.e. iperf3 or netperf)."
echo " -E enable ECN (not required for dctcp)"
+ echo " --edt use fq's Earliest Departure Time (requires fq)"
echo " -f or --flows number of concurrent flows (default=1)"
echo " -i or --id cgroup id (an integer, default is 1)"
echo " -N use netperf instead of iperf3"
@@ -130,13 +131,12 @@ processArgs () {
details=1
;;
-E)
- ecn=1
+ ecn=1
+ ;;
+ --edt)
+ flags="$flags --edt"
+ qdisc="fq"
;;
- # Support for upcomming fq Early Departure Time egress rate limiting
- #--edt)
- # prog="hbm_out_edt_kern.o"
- # qdisc="fq"
- # ;;
-f=*|--flows=*)
flows="${i#*=}"
;;
@@ -228,8 +228,8 @@ if [ "$netem" -ne "0" ] ; then
tc qdisc del dev lo root > /dev/null 2>&1
tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1
elif [ "$qdisc" != "" ] ; then
- tc qdisc del dev lo root > /dev/null 2>&1
- tc qdisc add dev lo root $qdisc > /dev/null 2>&1
+ tc qdisc del dev eth0 root > /dev/null 2>&1
+ tc qdisc add dev eth0 root $qdisc > /dev/null 2>&1
fi
n=0
@@ -399,7 +399,9 @@ fi
if [ "$netem" -ne "0" ] ; then
tc qdisc del dev lo root > /dev/null 2>&1
fi
-
+if [ "$qdisc" != "" ] ; then
+ tc qdisc del dev eth0 root > /dev/null 2>&1
+fi
sleep 2
hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'`
diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c
index b905b32ff185..e0fbab9bec83 100644
--- a/samples/bpf/hbm.c
+++ b/samples/bpf/hbm.c
@@ -62,6 +62,7 @@ bool loopback_flag;
bool debugFlag;
bool work_conserving_flag;
bool no_cn_flag;
+bool edt_flag;
static void Usage(void);
static void read_trace_pipe2(void);
@@ -372,9 +373,14 @@ static int run_bpf_prog(char *prog, int cg_id)
fprintf(fout, "avg rtt:%d\n",
(int)(qstats.sum_rtt / (qstats.pkts_total + 1)));
// Average credit
- fprintf(fout, "avg credit:%d\n",
- (int)(qstats.sum_credit /
- (1500 * ((int)qstats.pkts_total) + 1)));
+ if (edt_flag)
+ fprintf(fout, "avg credit_ms:%.03f\n",
+ (qstats.sum_credit /
+ (qstats.pkts_total + 1.0)) / 1000000.0);
+ else
+ fprintf(fout, "avg credit:%d\n",
+ (int)(qstats.sum_credit /
+ (1500 * ((int)qstats.pkts_total ) + 1)));
// Return values stats
for (k = 0; k < RET_VAL_COUNT; k++) {
@@ -408,6 +414,7 @@ static void Usage(void)
" Where:\n"
" -o indicates egress direction (default)\n"
" -d print BPF trace debug buffer\n"
+ " --edt use fq's Earliest Departure Time\n"
" -l also limit flows using loopback\n"
" -n <#> to create cgroup \"/hbm#\" and attach prog\n"
" Default is /hbm1\n"
@@ -433,6 +440,7 @@ int main(int argc, char **argv)
char *optstring = "iodln:r:st:wh";
struct option loptions[] = {
{"no_cn", 0, NULL, 1},
+ {"edt", 0, NULL, 2},
{NULL, 0, NULL, 0}
};
@@ -441,6 +449,10 @@ int main(int argc, char **argv)
case 1:
no_cn_flag = true;
break;
+ case 2:
+ prog = "hbm_edt_kern.o";
+ edt_flag = true;
+ break;
case'o':
break;
case 'd':
diff --git a/samples/bpf/hbm_edt_kern.c b/samples/bpf/hbm_edt_kern.c
new file mode 100644
index 000000000000..a65b677acdb0
--- /dev/null
+++ b/samples/bpf/hbm_edt_kern.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Sample Host Bandwidth Manager (HBM) BPF program.
+ *
+ * A cgroup skb BPF egress program to limit cgroup output bandwidth.
+ * It uses a modified virtual token bucket queue to limit average
+ * egress bandwidth. The implementation uses credits instead of tokens.
+ * Negative credits imply that queueing would have happened (this is
+ * a virtual queue, so no queueing is done by it. However, queueing may
+ * occur at the actual qdisc (which is not used for rate limiting).
+ *
+ * This implementation uses 3 thresholds, one to start marking packets and
+ * the other two to drop packets:
+ * CREDIT
+ * - <--------------------------|------------------------> +
+ * | | | 0
+ * | Large pkt |
+ * | drop thresh |
+ * Small pkt drop Mark threshold
+ * thresh
+ *
+ * The effect of marking depends on the type of packet:
+ * a) If the packet is ECN enabled and it is a TCP packet, then the packet
+ * is ECN marked.
+ * b) If the packet is a TCP packet, then we probabilistically call tcp_cwr
+ * to reduce the congestion window. The current implementation uses a linear
+ * distribution (0% probability at marking threshold, 100% probability
+ * at drop threshold).
+ * c) If the packet is not a TCP packet, then it is dropped.
+ *
+ * If the credit is below the drop threshold, the packet is dropped. If it
+ * is a TCP packet, then it also calls tcp_cwr since packets dropped by
+ * by a cgroup skb BPF program do not automatically trigger a call to
+ * tcp_cwr in the current kernel code.
+ *
+ * This BPF program actually uses 2 drop thresholds, one threshold
+ * for larger packets (>= 120 bytes) and another for smaller packets. This
+ * protects smaller packets such as SYNs, ACKs, etc.
+ *
+ * The default bandwidth limit is set at 1Gbps but this can be changed by
+ * a user program through a shared BPF map. In addition, by default this BPF
+ * program does not limit connections using loopback. This behavior can be
+ * overwritten by the user program. There is also an option to calculate
+ * some statistics, such as percent of packets marked or dropped, which
+ * a user program, such as hbm, can access.
+ */
+
+#include "hbm_kern.h"
+
+SEC("cgroup_skb/egress")
+int _hbm_out_cg(struct __sk_buff *skb)
+{
+ long long delta = 0, delta_send;
+ unsigned long long curtime, sendtime;
+ struct hbm_queue_stats *qsp = NULL;
+ unsigned int queue_index = 0;
+ bool congestion_flag = false;
+ bool ecn_ce_flag = false;
+ struct hbm_pkt_info pkti = {};
+ struct hbm_vqueue *qdp;
+ bool drop_flag = false;
+ bool cwr_flag = false;
+ int len = skb->len;
+ int rv = ALLOW_PKT;
+
+ qsp = bpf_map_lookup_elem(&queue_stats, &queue_index);
+
+ // Check if we should ignore loopback traffic
+ if (qsp != NULL && !qsp->loopback && (skb->ifindex == 1))
+ return ALLOW_PKT;
+
+ hbm_get_pkt_info(skb, &pkti);
+
+ // We may want to account for the length of headers in len
+ // calculation, like ETH header + overhead, specially if it
+ // is a gso packet. But I am not doing it right now.
+
+ qdp = bpf_get_local_storage(&queue_state, 0);
+ if (!qdp)
+ return ALLOW_PKT;
+ if (qdp->lasttime == 0)
+ hbm_init_edt_vqueue(qdp, 1024);
+
+ curtime = bpf_ktime_get_ns();
+
+ // Begin critical section
+ bpf_spin_lock(&qdp->lock);
+ delta = qdp->lasttime - curtime;
+ // bound bursts to 100us
+ if (delta < -BURST_SIZE_NS) {
+ // negative delta is a credit that allows bursts
+ qdp->lasttime = curtime - BURST_SIZE_NS;
+ delta = -BURST_SIZE_NS;
+ }
+ sendtime = qdp->lasttime;
+ delta_send = BYTES_TO_NS(len, qdp->rate);
+ __sync_add_and_fetch(&(qdp->lasttime), delta_send);
+ bpf_spin_unlock(&qdp->lock);
+ // End critical section
+
+ // Set EDT of packet
+ skb->tstamp = sendtime;
+
+ // Check if we should update rate
+ if (qsp != NULL && (qsp->rate * 128) != qdp->rate)
+ qdp->rate = qsp->rate * 128;
+
+ // Set flags (drop, congestion, cwr)
+ // last packet will be sent in the future, bound latency
+ if (delta > DROP_THRESH_NS || (delta > LARGE_PKT_DROP_THRESH_NS &&
+ len > LARGE_PKT_THRESH)) {
+ drop_flag = true;
+ if (pkti.is_tcp && pkti.ecn == 0)
+ cwr_flag = true;
+ } else if (delta > MARK_THRESH_NS) {
+ if (pkti.is_tcp)
+ congestion_flag = true;
+ else
+ drop_flag = true;
+ }
+
+ if (congestion_flag) {
+ if (bpf_skb_ecn_set_ce(skb)) {
+ ecn_ce_flag = true;
+ } else {
+ if (pkti.is_tcp) {
+ unsigned int rand = bpf_get_prandom_u32();
+
+ if (delta >= MARK_THRESH_NS +
+ (rand % MARK_REGION_SIZE_NS)) {
+ // Do congestion control
+ cwr_flag = true;
+ }
+ } else if (len > LARGE_PKT_THRESH) {
+ // Problem if too many small packets?
+ drop_flag = true;
+ congestion_flag = false;
+ }
+ }
+ }
+
+ if (pkti.is_tcp && drop_flag && pkti.packets_out <= 1) {
+ drop_flag = false;
+ cwr_flag = true;
+ congestion_flag = false;
+ }
+
+ if (qsp != NULL && qsp->no_cn)
+ cwr_flag = false;
+
+ hbm_update_stats(qsp, len, curtime, congestion_flag, drop_flag,
+ cwr_flag, ecn_ce_flag, &pkti, (int) delta);
+
+ if (drop_flag) {
+ __sync_add_and_fetch(&(qdp->lasttime), -delta_send);
+ rv = DROP_PKT;
+ }
+
+ if (cwr_flag)
+ rv |= CWR;
+ return rv;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/hbm_kern.h b/samples/bpf/hbm_kern.h
index be19cf1d5cd5..aa207a2eebbd 100644
--- a/samples/bpf/hbm_kern.h
+++ b/samples/bpf/hbm_kern.h
@@ -29,6 +29,7 @@
#define DROP_PKT 0
#define ALLOW_PKT 1
#define TCP_ECN_OK 1
+#define CWR 2
#ifndef HBM_DEBUG // Define HBM_DEBUG to enable debugging
#undef bpf_printk
@@ -45,8 +46,18 @@
#define MAX_CREDIT (100 * MAX_BYTES_PER_PACKET)
#define INIT_CREDIT (INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET)
+// Time base accounting for fq's EDT
+#define BURST_SIZE_NS 100000 // 100us
+#define MARK_THRESH_NS 50000 // 50us
+#define DROP_THRESH_NS 500000 // 500us
+// Reserve 20us of queuing for small packets (less than 120 bytes)
+#define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000)
+#define MARK_REGION_SIZE_NS (LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS)
+
// rate in bytes per ns << 20
#define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
+#define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
+#define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate))
struct bpf_map_def SEC("maps") queue_state = {
.type = BPF_MAP_TYPE_CGROUP_STORAGE,
@@ -67,6 +78,7 @@ BPF_ANNOTATE_KV_PAIR(queue_stats, int, struct hbm_queue_stats);
struct hbm_pkt_info {
int cwnd;
int rtt;
+ int packets_out;
bool is_ip;
bool is_tcp;
short ecn;
@@ -86,16 +98,20 @@ static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti)
if (tp) {
pkti->cwnd = tp->snd_cwnd;
pkti->rtt = tp->srtt_us >> 3;
+ pkti->packets_out = tp->packets_out;
return 0;
}
}
}
}
+ pkti->cwnd = 0;
+ pkti->rtt = 0;
+ pkti->packets_out = 0;
return 1;
}
-static __always_inline void hbm_get_pkt_info(struct __sk_buff *skb,
- struct hbm_pkt_info *pkti)
+static void hbm_get_pkt_info(struct __sk_buff *skb,
+ struct hbm_pkt_info *pkti)
{
struct iphdr iph;
struct ipv6hdr *ip6h;
@@ -123,10 +139,22 @@ static __always_inline void hbm_get_pkt_info(struct __sk_buff *skb,
static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate)
{
- bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
- qdp->lasttime = bpf_ktime_get_ns();
- qdp->credit = INIT_CREDIT;
- qdp->rate = rate * 128;
+ bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
+ qdp->lasttime = bpf_ktime_get_ns();
+ qdp->credit = INIT_CREDIT;
+ qdp->rate = rate * 128;
+}
+
+static __always_inline void hbm_init_edt_vqueue(struct hbm_vqueue *qdp,
+ int rate)
+{
+ unsigned long long curtime;
+
+ curtime = bpf_ktime_get_ns();
+ bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
+ qdp->lasttime = curtime - BURST_SIZE_NS; // support initial burst
+ qdp->credit = 0; // not used
+ qdp->rate = rate * 128;
}
static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp,
diff --git a/samples/bpf/ibumad_kern.c b/samples/bpf/ibumad_kern.c
index 38b2b3f22049..f281df7e0089 100644
--- a/samples/bpf/ibumad_kern.c
+++ b/samples/bpf/ibumad_kern.c
@@ -31,15 +31,9 @@ struct bpf_map_def SEC("maps") write_count = {
};
#undef DEBUG
-#ifdef DEBUG
-#define bpf_debug(fmt, ...) \
-({ \
- char ____fmt[] = fmt; \
- bpf_trace_printk(____fmt, sizeof(____fmt), \
- ##__VA_ARGS__); \
-})
-#else
-#define bpf_debug(fmt, ...)
+#ifndef DEBUG
+#undef bpf_printk
+#define bpf_printk(fmt, ...)
#endif
/* Taken from the current format defined in
@@ -86,7 +80,7 @@ int on_ib_umad_read_recv(struct ib_umad_rw_args *ctx)
u64 zero = 0, *val;
u8 class = ctx->mgmt_class;
- bpf_debug("ib_umad read recv : class 0x%x\n", class);
+ bpf_printk("ib_umad read recv : class 0x%x\n", class);
val = bpf_map_lookup_elem(&read_count, &class);
if (!val) {
@@ -106,7 +100,7 @@ int on_ib_umad_read_send(struct ib_umad_rw_args *ctx)
u64 zero = 0, *val;
u8 class = ctx->mgmt_class;
- bpf_debug("ib_umad read send : class 0x%x\n", class);
+ bpf_printk("ib_umad read send : class 0x%x\n", class);
val = bpf_map_lookup_elem(&read_count, &class);
if (!val) {
@@ -126,7 +120,7 @@ int on_ib_umad_write(struct ib_umad_rw_args *ctx)
u64 zero = 0, *val;
u8 class = ctx->mgmt_class;
- bpf_debug("ib_umad write : class 0x%x\n", class);
+ bpf_printk("ib_umad write : class 0x%x\n", class);
val = bpf_map_lookup_elem(&write_count, &class);
if (!val) {
diff --git a/samples/bpf/tcp_bpf.readme b/samples/bpf/tcp_bpf.readme
index fee746621aec..78e247f62108 100644
--- a/samples/bpf/tcp_bpf.readme
+++ b/samples/bpf/tcp_bpf.readme
@@ -25,4 +25,4 @@ attached to the cgroupv2).
To remove (unattach) a socket_ops BPF program from a cgroupv2:
- bpftool cgroup attach /tmp/cgroupv2/foo sock_ops pinned /sys/fs/bpf/tcp_prog
+ bpftool cgroup detach /tmp/cgroupv2/foo sock_ops pinned /sys/fs/bpf/tcp_prog
diff --git a/samples/bpf/tcp_dumpstats_kern.c b/samples/bpf/tcp_dumpstats_kern.c
new file mode 100644
index 000000000000..8557913106a0
--- /dev/null
+++ b/samples/bpf/tcp_dumpstats_kern.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Refer to samples/bpf/tcp_bpf.readme for the instructions on
+ * how to run this sample program.
+ */
+#include <linux/bpf.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define INTERVAL 1000000000ULL
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __u32 type;
+ __u32 map_flags;
+ int *key;
+ __u64 *value;
+} bpf_next_dump SEC(".maps") = {
+ .type = BPF_MAP_TYPE_SK_STORAGE,
+ .map_flags = BPF_F_NO_PREALLOC,
+};
+
+SEC("sockops")
+int _sockops(struct bpf_sock_ops *ctx)
+{
+ struct bpf_tcp_sock *tcp_sk;
+ struct bpf_sock *sk;
+ __u64 *next_dump;
+ __u64 now;
+
+ switch (ctx->op) {
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ bpf_sock_ops_cb_flags_set(ctx, BPF_SOCK_OPS_RTT_CB_FLAG);
+ return 1;
+ case BPF_SOCK_OPS_RTT_CB:
+ break;
+ default:
+ return 1;
+ }
+
+ sk = ctx->sk;
+ if (!sk)
+ return 1;
+
+ next_dump = bpf_sk_storage_get(&bpf_next_dump, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!next_dump)
+ return 1;
+
+ now = bpf_ktime_get_ns();
+ if (now < *next_dump)
+ return 1;
+
+ tcp_sk = bpf_tcp_sock(sk);
+ if (!tcp_sk)
+ return 1;
+
+ *next_dump = now + INTERVAL;
+
+ bpf_printk("dsack_dups=%u delivered=%u\n",
+ tcp_sk->dsack_dups, tcp_sk->delivered);
+ bpf_printk("delivered_ce=%u icsk_retransmits=%u\n",
+ tcp_sk->delivered_ce, tcp_sk->icsk_retransmits);
+
+ return 1;
+}
diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c
index 586ff751aba9..a3596b617c4c 100644
--- a/samples/bpf/xdp_adjust_tail_user.c
+++ b/samples/bpf/xdp_adjust_tail_user.c
@@ -13,6 +13,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <net/if.h>
#include <sys/resource.h>
#include <arpa/inet.h>
#include <netinet/ether.h>
@@ -69,7 +70,7 @@ static void usage(const char *cmd)
printf("Start a XDP prog which send ICMP \"packet too big\" \n"
"messages if ingress packet is bigger then MAX_SIZE bytes\n");
printf("Usage: %s [...]\n", cmd);
- printf(" -i <ifindex> Interface Index\n");
+ printf(" -i <ifname|ifindex> Interface\n");
printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n");
printf(" -S use skb-mode\n");
printf(" -N enforce native mode\n");
@@ -102,7 +103,9 @@ int main(int argc, char **argv)
switch (opt) {
case 'i':
- ifindex = atoi(optarg);
+ ifindex = if_nametoindex(optarg);
+ if (!ifindex)
+ ifindex = atoi(optarg);
break;
case 'T':
kill_after_s = atoi(optarg);
@@ -136,6 +139,11 @@ int main(int argc, char **argv)
return 1;
}
+ if (!ifindex) {
+ fprintf(stderr, "Invalid ifname\n");
+ return 1;
+ }
+
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
prog_load_attr.file = filename;
diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c
index 15bb6f67f9c3..f70ee33907fd 100644
--- a/samples/bpf/xdp_redirect_map_user.c
+++ b/samples/bpf/xdp_redirect_map_user.c
@@ -10,6 +10,7 @@
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
+#include <net/if.h>
#include <unistd.h>
#include <libgen.h>
#include <sys/resource.h>
@@ -85,7 +86,7 @@ static void poll_stats(int interval, int ifindex)
static void usage(const char *prog)
{
fprintf(stderr,
- "usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n"
+ "usage: %s [OPTS] <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n\n"
"OPTS:\n"
" -S use skb-mode\n"
" -N enforce native mode\n"
@@ -127,7 +128,7 @@ int main(int argc, char **argv)
}
if (optind == argc) {
- printf("usage: %s IFINDEX_IN IFINDEX_OUT\n", argv[0]);
+ printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
return 1;
}
@@ -136,8 +137,14 @@ int main(int argc, char **argv)
return 1;
}
- ifindex_in = strtoul(argv[optind], NULL, 0);
- ifindex_out = strtoul(argv[optind + 1], NULL, 0);
+ ifindex_in = if_nametoindex(argv[optind]);
+ if (!ifindex_in)
+ ifindex_in = strtoul(argv[optind], NULL, 0);
+
+ ifindex_out = if_nametoindex(argv[optind + 1]);
+ if (!ifindex_out)
+ ifindex_out = strtoul(argv[optind + 1], NULL, 0);
+
printf("input: %d output: %d\n", ifindex_in, ifindex_out);
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index ce71be187205..39de06f3ec25 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c
@@ -10,6 +10,7 @@
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
+#include <net/if.h>
#include <unistd.h>
#include <libgen.h>
#include <sys/resource.h>
@@ -85,7 +86,7 @@ static void poll_stats(int interval, int ifindex)
static void usage(const char *prog)
{
fprintf(stderr,
- "usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n"
+ "usage: %s [OPTS] <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n\n"
"OPTS:\n"
" -S use skb-mode\n"
" -N enforce native mode\n"
@@ -128,7 +129,7 @@ int main(int argc, char **argv)
}
if (optind == argc) {
- printf("usage: %s IFINDEX_IN IFINDEX_OUT\n", argv[0]);
+ printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
return 1;
}
@@ -137,8 +138,14 @@ int main(int argc, char **argv)
return 1;
}
- ifindex_in = strtoul(argv[optind], NULL, 0);
- ifindex_out = strtoul(argv[optind + 1], NULL, 0);
+ ifindex_in = if_nametoindex(argv[optind]);
+ if (!ifindex_in)
+ ifindex_in = strtoul(argv[optind], NULL, 0);
+
+ ifindex_out = if_nametoindex(argv[optind + 1]);
+ if (!ifindex_out)
+ ifindex_out = strtoul(argv[optind + 1], NULL, 0);
+
printf("input: %d output: %d\n", ifindex_in, ifindex_out);
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
index 394896430712..dfb68582e243 100644
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -9,6 +9,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <net/if.h>
#include <sys/resource.h>
#include <arpa/inet.h>
#include <netinet/ether.h>
@@ -83,7 +84,7 @@ static void usage(const char *cmd)
"in an IPv4/v6 header and XDP_TX it out. The dst <VIP:PORT>\n"
"is used to select packets to encapsulate\n\n");
printf("Usage: %s [...]\n", cmd);
- printf(" -i <ifindex> Interface Index\n");
+ printf(" -i <ifname|ifindex> Interface\n");
printf(" -a <vip-service-address> IPv4 or IPv6\n");
printf(" -p <vip-service-port> A port range (e.g. 433-444) is also allowed\n");
printf(" -s <source-ip> Used in the IPTunnel header\n");
@@ -181,7 +182,9 @@ int main(int argc, char **argv)
switch (opt) {
case 'i':
- ifindex = atoi(optarg);
+ ifindex = if_nametoindex(optarg);
+ if (!ifindex)
+ ifindex = atoi(optarg);
break;
case 'a':
vip.family = parse_ipstr(optarg, vip.daddr.v6);
@@ -253,6 +256,11 @@ int main(int argc, char **argv)
return 1;
}
+ if (!ifindex) {
+ fprintf(stderr, "Invalid ifname\n");
+ return 1;
+ }
+
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
prog_load_attr.file = filename;
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 0f5eb0d7f2df..93eaaf7239b2 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -68,6 +68,7 @@ static int opt_queue;
static int opt_poll;
static int opt_interval = 1;
static u32 opt_xdp_bind_flags;
+static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
static __u32 prog_id;
struct xsk_umem_info {
@@ -276,6 +277,12 @@ static size_t gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
{
struct xsk_umem_info *umem;
+ struct xsk_umem_config cfg = {
+ .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+ .frame_size = opt_xsk_frame_size,
+ .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
+ };
int ret;
umem = calloc(1, sizeof(*umem));
@@ -283,7 +290,7 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
exit_with_error(errno);
ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
- NULL);
+ &cfg);
if (ret)
exit_with_error(-ret);
@@ -323,11 +330,9 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem)
&idx);
if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
exit_with_error(-ret);
- for (i = 0;
- i < XSK_RING_PROD__DEFAULT_NUM_DESCS *
- XSK_UMEM__DEFAULT_FRAME_SIZE;
- i += XSK_UMEM__DEFAULT_FRAME_SIZE)
- *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) = i;
+ for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
+ *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) =
+ i * opt_xsk_frame_size;
xsk_ring_prod__submit(&xsk->umem->fq,
XSK_RING_PROD__DEFAULT_NUM_DESCS);
@@ -346,6 +351,7 @@ static struct option long_options[] = {
{"interval", required_argument, 0, 'n'},
{"zero-copy", no_argument, 0, 'z'},
{"copy", no_argument, 0, 'c'},
+ {"frame-size", required_argument, 0, 'f'},
{0, 0, 0, 0}
};
@@ -365,8 +371,9 @@ static void usage(const char *prog)
" -n, --interval=n Specify statistics update interval (default 1 sec).\n"
" -z, --zero-copy Force zero-copy mode.\n"
" -c, --copy Force copy mode.\n"
+ " -f, --frame-size=n Set the frame size (must be a power of two, default is %d).\n"
"\n";
- fprintf(stderr, str, prog);
+ fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE);
exit(EXIT_FAILURE);
}
@@ -377,7 +384,7 @@ static void parse_command_line(int argc, char **argv)
opterr = 0;
for (;;) {
- c = getopt_long(argc, argv, "Frtli:q:psSNn:cz", long_options,
+ c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:", long_options,
&option_index);
if (c == -1)
break;
@@ -420,6 +427,9 @@ static void parse_command_line(int argc, char **argv)
case 'F':
opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
break;
+ case 'f':
+ opt_xsk_frame_size = atoi(optarg);
+ break;
default:
usage(basename(argv[0]));
}
@@ -432,6 +442,11 @@ static void parse_command_line(int argc, char **argv)
usage(basename(argv[0]));
}
+ if (opt_xsk_frame_size & (opt_xsk_frame_size - 1)) {
+ fprintf(stderr, "--frame-size=%d is not a power of two\n",
+ opt_xsk_frame_size);
+ usage(basename(argv[0]));
+ }
}
static void kick_tx(struct xsk_socket_info *xsk)
@@ -583,8 +598,7 @@ static void tx_only(struct xsk_socket_info *xsk)
for (i = 0; i < BATCH_SIZE; i++) {
xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr
- = (frame_nb + i) <<
- XSK_UMEM__DEFAULT_FRAME_SHIFT;
+ = (frame_nb + i) * opt_xsk_frame_size;
xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
sizeof(pkt_data) - 1;
}
@@ -661,21 +675,19 @@ int main(int argc, char **argv)
}
ret = posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */
- NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE);
+ NUM_FRAMES * opt_xsk_frame_size);
if (ret)
exit_with_error(ret);
/* Create sockets... */
- umem = xsk_configure_umem(bufs,
- NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE);
+ umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size);
xsks[num_socks++] = xsk_configure_socket(umem);
if (opt_bench == BENCH_TXONLY) {
int i;
- for (i = 0; i < NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE;
- i += XSK_UMEM__DEFAULT_FRAME_SIZE)
- (void)gen_eth_frame(umem, i);
+ for (i = 0; i < NUM_FRAMES; i++)
+ (void)gen_eth_frame(umem, i * opt_xsk_frame_size);
}
signal(SIGINT, int_exit);
diff --git a/samples/pidfd/pidfd-metadata.c b/samples/pidfd/pidfd-metadata.c
index 14b454448429..c459155daf9a 100644
--- a/samples/pidfd/pidfd-metadata.c
+++ b/samples/pidfd/pidfd-metadata.c
@@ -83,7 +83,7 @@ static int pidfd_metadata_fd(pid_t pid, int pidfd)
int main(int argc, char *argv[])
{
- int pidfd = 0, ret = EXIT_FAILURE;
+ int pidfd = -1, ret = EXIT_FAILURE;
char buf[4096] = { 0 };
pid_t pid;
int procfd, statusfd;
@@ -91,7 +91,11 @@ int main(int argc, char *argv[])
pid = pidfd_clone(CLONE_PIDFD, &pidfd);
if (pid < 0)
- exit(ret);
+ err(ret, "CLONE_PIDFD");
+ if (pidfd == -1) {
+ warnx("CLONE_PIDFD is not supported by the kernel");
+ goto out;
+ }
procfd = pidfd_metadata_fd(pid, pidfd);
close(pidfd);
diff --git a/samples/pktgen/README.rst b/samples/pktgen/README.rst
index ff8929da61c5..fd39215db508 100644
--- a/samples/pktgen/README.rst
+++ b/samples/pktgen/README.rst
@@ -20,6 +20,7 @@ across the sample scripts. Usage example is printed on errors::
-s : ($PKT_SIZE) packet size
-d : ($DEST_IP) destination IP
-m : ($DST_MAC) destination MAC-addr
+ -p : ($DST_PORT) destination PORT range (e.g. 433-444) is also allowed
-t : ($THREADS) threads to start
-f : ($F_THREAD) index of first thread (zero indexed CPU number)
-c : ($SKB_CLONE) SKB clones send before alloc new SKB
diff --git a/samples/pktgen/functions.sh b/samples/pktgen/functions.sh
index f8bb3cd0f4ce..4af4046d71be 100644
--- a/samples/pktgen/functions.sh
+++ b/samples/pktgen/functions.sh
@@ -162,3 +162,37 @@ function get_node_cpus()
echo $node_cpu_list
}
+
+# Given a single or range of port(s), return minimum and maximum port number.
+function parse_ports()
+{
+ local port_str=$1
+ local port_list
+ local min_port
+ local max_port
+
+ IFS="-" read -ra port_list <<< $port_str
+
+ min_port=${port_list[0]}
+ max_port=${port_list[1]:-$min_port}
+
+ echo $min_port $max_port
+}
+
+# Given a minimum and maximum port, verify port number.
+function validate_ports()
+{
+ local min_port=$1
+ local max_port=$2
+
+ # 0 < port < 65536
+ if [[ $min_port -gt 0 && $min_port -lt 65536 ]]; then
+ if [[ $max_port -gt 0 && $max_port -lt 65536 ]]; then
+ if [[ $min_port -le $max_port ]]; then
+ return 0
+ fi
+ fi
+ fi
+
+ err 5 "Invalid port(s): $min_port-$max_port"
+}
diff --git a/samples/pktgen/parameters.sh b/samples/pktgen/parameters.sh
index 72fc562876e2..a06b00a0c7b6 100644
--- a/samples/pktgen/parameters.sh
+++ b/samples/pktgen/parameters.sh
@@ -10,6 +10,7 @@ function usage() {
echo " -s : (\$PKT_SIZE) packet size"
echo " -d : (\$DEST_IP) destination IP"
echo " -m : (\$DST_MAC) destination MAC-addr"
+ echo " -p : (\$DST_PORT) destination PORT range (e.g. 433-444) is also allowed"
echo " -t : (\$THREADS) threads to start"
echo " -f : (\$F_THREAD) index of first thread (zero indexed CPU number)"
echo " -c : (\$SKB_CLONE) SKB clones send before alloc new SKB"
@@ -23,7 +24,7 @@ function usage() {
## --- Parse command line arguments / parameters ---
## echo "Commandline options:"
-while getopts "s:i:d:m:f:t:c:n:b:vxh6" option; do
+while getopts "s:i:d:m:p:f:t:c:n:b:vxh6" option; do
case $option in
i) # interface
export DEV=$OPTARG
@@ -41,6 +42,10 @@ while getopts "s:i:d:m:f:t:c:n:b:vxh6" option; do
export DST_MAC=$OPTARG
info "Destination MAC set to: DST_MAC=$DST_MAC"
;;
+ p) # PORT
+ export DST_PORT=$OPTARG
+ info "Destination PORT set to: DST_PORT=$DST_PORT"
+ ;;
f)
export F_THREAD=$OPTARG
info "Index of first thread (zero indexed CPU number): $F_THREAD"
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
index 2839f7d315cf..e14b1a9144d9 100755
--- a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+++ b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
@@ -41,6 +41,10 @@ fi
[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
[ -z "$BURST" ] && BURST=1024
[ -z "$COUNT" ] && COUNT="10000000" # Zero means indefinitely
+if [ -n "$DST_PORT" ]; then
+ read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
+ validate_ports $DST_MIN $DST_MAX
+fi
# Base Config
DELAY="0" # Zero means max speed
@@ -69,6 +73,13 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
pg_set $dev "dst_mac $DST_MAC"
pg_set $dev "dst$IP6 $DEST_IP"
+ if [ -n "$DST_PORT" ]; then
+ # Single destination port or random port range
+ pg_set $dev "flag UDPDST_RND"
+ pg_set $dev "udp_dst_min $DST_MIN"
+ pg_set $dev "udp_dst_max $DST_MAX"
+ fi
+
# Inject packet into RX path of stack
pg_set $dev "xmit_mode netif_receive"
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
index e1ee54465def..82c3e504e056 100755
--- a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
+++ b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
@@ -24,6 +24,10 @@ if [[ -n "$BURST" ]]; then
err 1 "Bursting not supported for this mode"
fi
[ -z "$COUNT" ] && COUNT="10000000" # Zero means indefinitely
+if [ -n "$DST_PORT" ]; then
+ read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
+ validate_ports $DST_MIN $DST_MAX
+fi
# Base Config
DELAY="0" # Zero means max speed
@@ -52,6 +56,13 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
pg_set $dev "dst_mac $DST_MAC"
pg_set $dev "dst$IP6 $DEST_IP"
+ if [ -n "$DST_PORT" ]; then
+ # Single destination port or random port range
+ pg_set $dev "flag UDPDST_RND"
+ pg_set $dev "udp_dst_min $DST_MIN"
+ pg_set $dev "udp_dst_max $DST_MAX"
+ fi
+
# Inject packet into TX qdisc egress path of stack
pg_set $dev "xmit_mode queue_xmit"
done
diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh
index e9ab4edba2d7..d1702fdde8f3 100755
--- a/samples/pktgen/pktgen_sample01_simple.sh
+++ b/samples/pktgen/pktgen_sample01_simple.sh
@@ -22,6 +22,10 @@ fi
# Example enforce param "-m" for dst_mac
[ -z "$DST_MAC" ] && usage && err 2 "Must specify -m dst_mac"
[ -z "$COUNT" ] && COUNT="100000" # Zero means indefinitely
+if [ -n "$DST_PORT" ]; then
+ read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
+ validate_ports $DST_MIN $DST_MAX
+fi
# Base Config
DELAY="0" # Zero means max speed
@@ -59,6 +63,13 @@ pg_set $DEV "flag NO_TIMESTAMP"
pg_set $DEV "dst_mac $DST_MAC"
pg_set $DEV "dst$IP6 $DEST_IP"
+if [ -n "$DST_PORT" ]; then
+ # Single destination port or random port range
+ pg_set $DEV "flag UDPDST_RND"
+ pg_set $DEV "udp_dst_min $DST_MIN"
+ pg_set $DEV "udp_dst_max $DST_MAX"
+fi
+
# Setup random UDP port src range
pg_set $DEV "flag UDPSRC_RND"
pg_set $DEV "udp_src_min $UDP_MIN"
diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh
index 99f740ae9857..7f7a9a27548f 100755
--- a/samples/pktgen/pktgen_sample02_multiqueue.sh
+++ b/samples/pktgen/pktgen_sample02_multiqueue.sh
@@ -29,6 +29,10 @@ if [ -z "$DEST_IP" ]; then
[ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
fi
[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
+if [ -n "$DST_PORT" ]; then
+ read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
+ validate_ports $DST_MIN $DST_MAX
+fi
# General cleanup everything since last run
pg_ctrl "reset"
@@ -60,6 +64,13 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
pg_set $dev "dst_mac $DST_MAC"
pg_set $dev "dst$IP6 $DEST_IP"
+ if [ -n "$DST_PORT" ]; then
+ # Single destination port or random port range
+ pg_set $dev "flag UDPDST_RND"
+ pg_set $dev "udp_dst_min $DST_MIN"
+ pg_set $dev "udp_dst_max $DST_MAX"
+ fi
+
# Setup random UDP port src range
pg_set $dev "flag UDPSRC_RND"
pg_set $dev "udp_src_min $UDP_MIN"
diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
index 8fdd36722d9e..b520637817ce 100755
--- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh
+++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
@@ -33,6 +33,10 @@ fi
[ -z "$BURST" ] && BURST=32
[ -z "$CLONE_SKB" ] && CLONE_SKB="0" # No need for clones when bursting
[ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely
+if [ -n "$DST_PORT" ]; then
+ read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
+ validate_ports $DST_MIN $DST_MAX
+fi
# Base Config
DELAY="0" # Zero means max speed
@@ -60,6 +64,13 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
pg_set $dev "dst_mac $DST_MAC"
pg_set $dev "dst$IP6 $DEST_IP"
+ if [ -n "$DST_PORT" ]; then
+ # Single destination port or random port range
+ pg_set $dev "flag UDPDST_RND"
+ pg_set $dev "udp_dst_min $DST_MIN"
+ pg_set $dev "udp_dst_max $DST_MAX"
+ fi
+
# Setup burst, for easy testing -b 0 disable bursting
# (internally in pktgen default and minimum burst=1)
if [[ ${BURST} -ne 0 ]]; then
diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh
index 4df92b7176da..5b6e9d9cb5b5 100755
--- a/samples/pktgen/pktgen_sample04_many_flows.sh
+++ b/samples/pktgen/pktgen_sample04_many_flows.sh
@@ -17,6 +17,10 @@ source ${basedir}/parameters.sh
[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
[ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely
+if [ -n "$DST_PORT" ]; then
+ read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
+ validate_ports $DST_MIN $DST_MAX
+fi
# NOTICE: Script specific settings
# =======
@@ -56,6 +60,13 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
pg_set $dev "dst_mac $DST_MAC"
pg_set $dev "dst $DEST_IP"
+ if [ -n "$DST_PORT" ]; then
+ # Single destination port or random port range
+ pg_set $dev "flag UDPDST_RND"
+ pg_set $dev "udp_dst_min $DST_MIN"
+ pg_set $dev "udp_dst_max $DST_MAX"
+ fi
+
# Randomize source IP-addresses
pg_set $dev "flag IPSRC_RND"
pg_set $dev "src_min 198.18.0.0"
diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
index 7f8b5e59f01e..0c06e63fbe97 100755
--- a/samples/pktgen/pktgen_sample05_flow_per_thread.sh
+++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
@@ -22,7 +22,10 @@ source ${basedir}/parameters.sh
[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
[ -z "$BURST" ] && BURST=32
[ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely
-
+if [ -n "$DST_PORT" ]; then
+ read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
+ validate_ports $DST_MIN $DST_MAX
+fi
# Base Config
DELAY="0" # Zero means max speed
@@ -50,6 +53,13 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
pg_set $dev "dst_mac $DST_MAC"
pg_set $dev "dst $DEST_IP"
+ if [ -n "$DST_PORT" ]; then
+ # Single destination port or random port range
+ pg_set $dev "flag UDPDST_RND"
+ pg_set $dev "udp_dst_min $DST_MIN"
+ pg_set $dev "udp_dst_max $DST_MAX"
+ fi
+
# Setup source IP-addresses based on thread number
pg_set $dev "src_min 198.18.$((thread+1)).1"
pg_set $dev "src_max 198.18.$((thread+1)).1"
diff --git a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
index 353adc17205e..97f0266c0356 100755
--- a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
+++ b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
@@ -35,6 +35,10 @@ if [ -z "$DEST_IP" ]; then
[ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
fi
[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
+if [ -n "$DST_PORT" ]; then
+ read -r DST_MIN DST_MAX <<< $(parse_ports $DST_PORT)
+ validate_ports $DST_MIN $DST_MAX
+fi
# General cleanup everything since last run
pg_ctrl "reset"
@@ -77,6 +81,13 @@ for ((i = 0; i < $THREADS; i++)); do
pg_set $dev "dst_mac $DST_MAC"
pg_set $dev "dst$IP6 $DEST_IP"
+ if [ -n "$DST_PORT" ]; then
+ # Single destination port or random port range
+ pg_set $dev "flag UDPDST_RND"
+ pg_set $dev "udp_dst_min $DST_MIN"
+ pg_set $dev "udp_dst_max $DST_MAX"
+ fi
+
# Setup random UDP port src range
pg_set $dev "flag UDPSRC_RND"
pg_set $dev "udp_src_min $UDP_MIN"
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index d80fdde79c22..585f270c2d25 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -29,7 +29,8 @@ CGROUP COMMANDS
| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
| **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** |
-| **sendmsg4** | **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** }
+| **sendmsg4** | **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** |
+| **getsockopt** | **setsockopt** }
| *ATTACH_FLAGS* := { **multi** | **override** }
DESCRIPTION
@@ -90,7 +91,9 @@ DESCRIPTION
an unconnected udp4 socket (since 5.2);
**recvmsg6** call to recvfrom(2), recvmsg(2), recvmmsg(2) for
an unconnected udp6 socket (since 5.2);
- **sysctl** sysctl access (since 5.2).
+ **sysctl** sysctl access (since 5.2);
+ **getsockopt** call to getsockopt (since 5.3);
+ **setsockopt** call to setsockopt (since 5.3).
**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
Detach *PROG* from the cgroup *CGROUP* and attach type
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 55dd06517a3b..1df637f85f94 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -40,7 +40,8 @@ PROG COMMANDS
| **lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** |
| **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
| **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
-| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl**
+| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
+| **cgroup/getsockopt** | **cgroup/setsockopt**
| }
| *ATTACH_TYPE* := {
| **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index a17e84c67498..ba37095e1f62 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -379,7 +379,8 @@ _bpftool()
cgroup/sendmsg4 cgroup/sendmsg6 \
cgroup/recvmsg4 cgroup/recvmsg6 \
cgroup/post_bind4 cgroup/post_bind6 \
- cgroup/sysctl" -- \
+ cgroup/sysctl cgroup/getsockopt \
+ cgroup/setsockopt" -- \
"$cur" ) )
return 0
;;
@@ -689,7 +690,8 @@ _bpftool()
attach|detach)
local ATTACH_TYPES='ingress egress sock_create sock_ops \
device bind4 bind6 post_bind4 post_bind6 connect4 \
- connect6 sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl'
+ connect6 sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl \
+ getsockopt setsockopt'
local ATTACH_FLAGS='multi override'
local PROG_TYPE='id pinned tag'
case $prev in
@@ -699,7 +701,8 @@ _bpftool()
;;
ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
post_bind4|post_bind6|connect4|connect6|sendmsg4|\
- sendmsg6|recvmsg4|recvmsg6|sysctl)
+ sendmsg6|recvmsg4|recvmsg6|sysctl|getsockopt|\
+ setsockopt)
COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
"$cur" ) )
return 0
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 73ec8ea33fb4..390b89a224f1 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -26,7 +26,8 @@
" sock_ops | device | bind4 | bind6 |\n" \
" post_bind4 | post_bind6 | connect4 |\n" \
" connect6 | sendmsg4 | sendmsg6 |\n" \
- " recvmsg4 | recvmsg6 | sysctl }"
+ " recvmsg4 | recvmsg6 | sysctl |\n" \
+ " getsockopt | setsockopt }"
static const char * const attach_type_strings[] = {
[BPF_CGROUP_INET_INGRESS] = "ingress",
@@ -45,6 +46,8 @@ static const char * const attach_type_strings[] = {
[BPF_CGROUP_SYSCTL] = "sysctl",
[BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
[BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
+ [BPF_CGROUP_GETSOCKOPT] = "getsockopt",
+ [BPF_CGROUP_SETSOCKOPT] = "setsockopt",
[__MAX_BPF_ATTACH_TYPE] = NULL,
};
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 28a2a5857e14..9c5d9c80f71e 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -74,6 +74,7 @@ static const char * const prog_type_name[] = {
[BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport",
[BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
[BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
+ [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
};
extern const char * const map_type_name[];
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index f1a831f05010..9b0db5d14e31 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -1071,7 +1071,8 @@ static int do_help(int argc, char **argv)
" cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n"
" cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n"
" cgroup/sendmsg4 | cgroup/sendmsg6 | cgroup/recvmsg4 |\n"
- " cgroup/recvmsg6 }\n"
+ " cgroup/recvmsg6 | cgroup/getsockopt |\n"
+ " cgroup/setsockopt }\n"
" ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
" flow_dissector }\n"
" " HELP_SPEC_OPTIONS "\n"
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b077507efa3f..cecf42c871d4 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -170,6 +170,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_FLOW_DISSECTOR,
BPF_PROG_TYPE_CGROUP_SYSCTL,
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
+ BPF_PROG_TYPE_CGROUP_SOCKOPT,
};
enum bpf_attach_type {
@@ -194,6 +195,8 @@ enum bpf_attach_type {
BPF_CGROUP_SYSCTL,
BPF_CGROUP_UDP4_RECVMSG,
BPF_CGROUP_UDP6_RECVMSG,
+ BPF_CGROUP_GETSOCKOPT,
+ BPF_CGROUP_SETSOCKOPT,
__MAX_BPF_ATTACH_TYPE
};
@@ -1764,6 +1767,7 @@ union bpf_attr {
* * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
* * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
* * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT)
*
* Therefore, this function can be used to clear a callback flag by
* setting the appropriate bit to zero. e.g. to disable the RTO
@@ -3066,6 +3070,12 @@ struct bpf_tcp_sock {
* sum(delta(snd_una)), or how many bytes
* were acked.
*/
+ __u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups
+ * total number of DSACK blocks received
+ */
+ __u32 delivered; /* Total data packets delivered incl. rexmits */
+ __u32 delivered_ce; /* Like the above but only ECE marked packets */
+ __u32 icsk_retransmits; /* Number of unrecovered [RTO] timeouts */
};
struct bpf_sock_tuple {
@@ -3308,7 +3318,8 @@ struct bpf_sock_ops {
#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
-#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently
+#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently
* supported cb flags
*/
@@ -3363,6 +3374,8 @@ enum {
BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after
* socket transition to LISTEN state.
*/
+ BPF_SOCK_OPS_RTT_CB, /* Called on every RTT.
+ */
};
/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
@@ -3541,4 +3554,15 @@ struct bpf_sysctl {
*/
};
+struct bpf_sockopt {
+ __bpf_md_ptr(struct bpf_sock *, sk);
+ __bpf_md_ptr(void *, optval);
+ __bpf_md_ptr(void *, optval_end);
+
+ __s32 level;
+ __s32 optname;
+ __s32 optlen;
+ __s32 retval;
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 5b225ff63b48..7d113a9602f0 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -636,6 +636,7 @@ enum {
IFLA_BOND_AD_USER_PORT_KEY,
IFLA_BOND_AD_ACTOR_SYSTEM,
IFLA_BOND_TLB_DYNAMIC_LB,
+ IFLA_BOND_PEER_NOTIF_DELAY,
__IFLA_BOND_MAX,
};
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index caed8b1614ff..faaa5ca2a117 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -46,6 +46,7 @@ struct xdp_mmap_offsets {
#define XDP_UMEM_FILL_RING 5
#define XDP_UMEM_COMPLETION_RING 6
#define XDP_STATISTICS 7
+#define XDP_OPTIONS 8
struct xdp_umem_reg {
__u64 addr; /* Start of packet data area */
@@ -60,6 +61,13 @@ struct xdp_statistics {
__u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
};
+struct xdp_options {
+ __u32 flags;
+};
+
+/* Flags for the flags field of struct xdp_options */
+#define XDP_OPTIONS_ZEROCOPY (1 << 0)
+
/* Pgoff for mmaping the rings */
#define XDP_PGOFF_RX_RING 0
#define XDP_PGOFF_TX_RING 0x80000000
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 4259c9f0cfe7..4907997289e9 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -778,7 +778,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
if (obj->nr_maps < obj->maps_cap)
return &obj->maps[obj->nr_maps++];
- new_cap = max(4ul, obj->maps_cap * 3 / 2);
+ new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps));
if (!new_maps) {
pr_warning("alloc maps for object failed\n");
@@ -1169,7 +1169,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
pr_debug("map '%s': found key_size = %u.\n",
map_name, sz);
if (map->def.key_size && map->def.key_size != sz) {
- pr_warning("map '%s': conflictling key size %u != %u.\n",
+ pr_warning("map '%s': conflicting key size %u != %u.\n",
map_name, map->def.key_size, sz);
return -EINVAL;
}
@@ -1197,7 +1197,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
pr_debug("map '%s': found key [%u], sz = %lld.\n",
map_name, t->type, sz);
if (map->def.key_size && map->def.key_size != sz) {
- pr_warning("map '%s': conflictling key size %u != %lld.\n",
+ pr_warning("map '%s': conflicting key size %u != %lld.\n",
map_name, map->def.key_size, sz);
return -EINVAL;
}
@@ -1212,7 +1212,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
pr_debug("map '%s': found value_size = %u.\n",
map_name, sz);
if (map->def.value_size && map->def.value_size != sz) {
- pr_warning("map '%s': conflictling value size %u != %u.\n",
+ pr_warning("map '%s': conflicting value size %u != %u.\n",
map_name, map->def.value_size, sz);
return -EINVAL;
}
@@ -1240,7 +1240,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
pr_debug("map '%s': found value [%u], sz = %lld.\n",
map_name, t->type, sz);
if (map->def.value_size && map->def.value_size != sz) {
- pr_warning("map '%s': conflictling value size %u != %lld.\n",
+ pr_warning("map '%s': conflicting value size %u != %lld.\n",
map_name, map->def.value_size, sz);
return -EINVAL;
}
@@ -2646,6 +2646,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
case BPF_PROG_TYPE_PERF_EVENT:
case BPF_PROG_TYPE_CGROUP_SYSCTL:
+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
return false;
case BPF_PROG_TYPE_KPROBE:
default:
@@ -3604,6 +3605,10 @@ static const struct {
BPF_CGROUP_UDP6_RECVMSG),
BPF_EAPROG_SEC("cgroup/sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL,
BPF_CGROUP_SYSCTL),
+ BPF_EAPROG_SEC("cgroup/getsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT,
+ BPF_CGROUP_GETSOCKOPT),
+ BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT,
+ BPF_CGROUP_SETSOCKOPT),
};
#undef BPF_PROG_SEC_IMPL
@@ -3867,10 +3872,7 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
struct bpf_object **pobj, int *prog_fd)
{
- struct bpf_object_open_attr open_attr = {
- .file = attr->file,
- .prog_type = attr->prog_type,
- };
+ struct bpf_object_open_attr open_attr = {};
struct bpf_program *prog, *first_prog = NULL;
enum bpf_attach_type expected_attach_type;
enum bpf_prog_type prog_type;
@@ -3883,6 +3885,9 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
if (!attr->file)
return -EINVAL;
+ open_attr.file = attr->file;
+ open_attr.prog_type = attr->prog_type;
+
obj = bpf_object__open_xattr(&open_attr);
if (IS_ERR_OR_NULL(obj))
return -ENOENT;
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 6635a31a7a16..ace1a0708d99 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -101,6 +101,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
case BPF_PROG_TYPE_SK_REUSEPORT:
case BPF_PROG_TYPE_FLOW_DISSECTOR:
case BPF_PROG_TYPE_CGROUP_SYSCTL:
+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
default:
break;
}
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 7ef6293b4fd7..b33740221b7e 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -65,6 +65,7 @@ struct xsk_socket {
int xsks_map_fd;
__u32 queue_id;
char ifname[IFNAMSIZ];
+ bool zc;
};
struct xsk_nl_info {
@@ -326,7 +327,8 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)
channels.cmd = ETHTOOL_GCHANNELS;
ifr.ifr_data = (void *)&channels;
- strncpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ);
+ strncpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ - 1);
+ ifr.ifr_name[IFNAMSIZ - 1] = '\0';
err = ioctl(fd, SIOCETHTOOL, &ifr);
if (err && errno != EOPNOTSUPP) {
ret = -errno;
@@ -480,6 +482,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
void *rx_map = NULL, *tx_map = NULL;
struct sockaddr_xdp sxdp = {};
struct xdp_mmap_offsets off;
+ struct xdp_options opts;
struct xsk_socket *xsk;
socklen_t optlen;
int err;
@@ -597,6 +600,16 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
}
xsk->prog_fd = -1;
+
+ optlen = sizeof(opts);
+ err = getsockopt(xsk->fd, SOL_XDP, XDP_OPTIONS, &opts, &optlen);
+ if (err) {
+ err = -errno;
+ goto out_mmap_tx;
+ }
+
+ xsk->zc = opts.flags & XDP_OPTIONS_ZEROCOPY;
+
if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
err = xsk_setup_xdp_prog(xsk);
if (err)
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index 82ea71a0f3ec..833a6e60d065 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -167,7 +167,7 @@ LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk);
#define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048
#define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048
-#define XSK_UMEM__DEFAULT_FRAME_SHIFT 11 /* 2048 bytes */
+#define XSK_UMEM__DEFAULT_FRAME_SHIFT 12 /* 4096 bytes */
#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT)
#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 7470327edcfe..a2f7f79c7908 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -39,3 +39,6 @@ libbpf.so.*
test_hashmap
test_btf_dump
xdping
+test_sockopt
+test_sockopt_sk
+test_sockopt_multi
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index fb5ce43e28b3..2620406a53ec 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -15,7 +15,7 @@ LLC ?= llc
LLVM_OBJCOPY ?= llvm-objcopy
LLVM_READELF ?= llvm-readelf
BTF_PAHOLE ?= pahole
-CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include \
+CFLAGS += -g -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include \
-Dbpf_prog_load=bpf_prog_test_load \
-Dbpf_load_program=bpf_test_load_program
LDLIBS += -lcap -lelf -lrt -lpthread
@@ -26,7 +26,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
test_cgroup_storage test_select_reuseport test_section_names \
test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
- test_btf_dump test_cgroup_attach xdping
+ test_btf_dump test_cgroup_attach xdping test_sockopt test_sockopt_sk \
+ test_sockopt_multi test_tcp_rtt
BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
TEST_GEN_FILES = $(BPF_OBJ_FILES)
@@ -46,6 +47,7 @@ TEST_PROGS := test_kmod.sh \
test_libbpf.sh \
test_xdp_redirect.sh \
test_xdp_meta.sh \
+ test_xdp_veth.sh \
test_offload.py \
test_sock_addr.sh \
test_tunnel.sh \
@@ -102,6 +104,10 @@ $(OUTPUT)/test_netcnt: cgroup_helpers.c
$(OUTPUT)/test_sock_fields: cgroup_helpers.c
$(OUTPUT)/test_sysctl: cgroup_helpers.c
$(OUTPUT)/test_cgroup_attach: cgroup_helpers.c
+$(OUTPUT)/test_sockopt: cgroup_helpers.c
+$(OUTPUT)/test_sockopt_sk: cgroup_helpers.c
+$(OUTPUT)/test_sockopt_multi: cgroup_helpers.c
+$(OUTPUT)/test_tcp_rtt: cgroup_helpers.c
.PHONY: force
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index 6b0781391be5..abf6224649be 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -75,8 +75,7 @@ typedef struct {
void* co_name; // PyCodeObject.co_name
} FrameData;
-static inline __attribute__((__always_inline__)) void*
-get_thread_state(void* tls_base, PidData* pidData)
+static __always_inline void *get_thread_state(void *tls_base, PidData *pidData)
{
void* thread_state;
int key;
@@ -87,8 +86,8 @@ get_thread_state(void* tls_base, PidData* pidData)
return thread_state;
}
-static inline __attribute__((__always_inline__)) bool
-get_frame_data(void* frame_ptr, PidData* pidData, FrameData* frame, Symbol* symbol)
+static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData,
+ FrameData *frame, Symbol *symbol)
{
// read data from PyFrameObject
bpf_probe_read(&frame->f_back,
@@ -161,7 +160,7 @@ struct bpf_elf_map SEC("maps") stackmap = {
.max_elem = 1000,
};
-static inline __attribute__((__always_inline__)) int __on_event(struct pt_regs *ctx)
+static __always_inline int __on_event(struct pt_regs *ctx)
{
uint64_t pid_tgid = bpf_get_current_pid_tgid();
pid_t pid = (pid_t)(pid_tgid >> 32);
diff --git a/tools/testing/selftests/bpf/progs/sockopt_multi.c b/tools/testing/selftests/bpf/progs/sockopt_multi.c
new file mode 100644
index 000000000000..4afd2595c08e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sockopt_multi.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <netinet/in.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1;
+
+SEC("cgroup/getsockopt/child")
+int _getsockopt_child(struct bpf_sockopt *ctx)
+{
+ __u8 *optval_end = ctx->optval_end;
+ __u8 *optval = ctx->optval;
+
+ if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
+ return 1;
+
+ if (optval + 1 > optval_end)
+ return 0; /* EPERM, bounds check */
+
+ if (optval[0] != 0x80)
+ return 0; /* EPERM, unexpected optval from the kernel */
+
+ ctx->retval = 0; /* Reset system call return value to zero */
+
+ optval[0] = 0x90;
+ ctx->optlen = 1;
+
+ return 1;
+}
+
+SEC("cgroup/getsockopt/parent")
+int _getsockopt_parent(struct bpf_sockopt *ctx)
+{
+ __u8 *optval_end = ctx->optval_end;
+ __u8 *optval = ctx->optval;
+
+ if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
+ return 1;
+
+ if (optval + 1 > optval_end)
+ return 0; /* EPERM, bounds check */
+
+ if (optval[0] != 0x90)
+ return 0; /* EPERM, unexpected optval from the kernel */
+
+ ctx->retval = 0; /* Reset system call return value to zero */
+
+ optval[0] = 0xA0;
+ ctx->optlen = 1;
+
+ return 1;
+}
+
+SEC("cgroup/setsockopt")
+int _setsockopt(struct bpf_sockopt *ctx)
+{
+ __u8 *optval_end = ctx->optval_end;
+ __u8 *optval = ctx->optval;
+
+ if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
+ return 1;
+
+ if (optval + 1 > optval_end)
+ return 0; /* EPERM, bounds check */
+
+ optval[0] += 0x10;
+ ctx->optlen = 1;
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
new file mode 100644
index 000000000000..076122c898e9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <netinet/in.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1;
+
+#define SOL_CUSTOM 0xdeadbeef
+
+struct sockopt_sk {
+ __u8 val;
+};
+
+struct bpf_map_def SEC("maps") socket_storage_map = {
+ .type = BPF_MAP_TYPE_SK_STORAGE,
+ .key_size = sizeof(int),
+ .value_size = sizeof(struct sockopt_sk),
+ .map_flags = BPF_F_NO_PREALLOC,
+};
+BPF_ANNOTATE_KV_PAIR(socket_storage_map, int, struct sockopt_sk);
+
+SEC("cgroup/getsockopt")
+int _getsockopt(struct bpf_sockopt *ctx)
+{
+ __u8 *optval_end = ctx->optval_end;
+ __u8 *optval = ctx->optval;
+ struct sockopt_sk *storage;
+
+ if (ctx->level == SOL_IP && ctx->optname == IP_TOS)
+ /* Not interested in SOL_IP:IP_TOS;
+ * let next BPF program in the cgroup chain or kernel
+ * handle it.
+ */
+ return 1;
+
+ if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
+ /* Not interested in SOL_SOCKET:SO_SNDBUF;
+ * let next BPF program in the cgroup chain or kernel
+ * handle it.
+ */
+ return 1;
+ }
+
+ if (ctx->level != SOL_CUSTOM)
+ return 0; /* EPERM, deny everything except custom level */
+
+ if (optval + 1 > optval_end)
+ return 0; /* EPERM, bounds check */
+
+ storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 0; /* EPERM, couldn't get sk storage */
+
+ if (!ctx->retval)
+ return 0; /* EPERM, kernel should not have handled
+ * SOL_CUSTOM, something is wrong!
+ */
+ ctx->retval = 0; /* Reset system call return value to zero */
+
+ optval[0] = storage->val;
+ ctx->optlen = 1;
+
+ return 1;
+}
+
+SEC("cgroup/setsockopt")
+int _setsockopt(struct bpf_sockopt *ctx)
+{
+ __u8 *optval_end = ctx->optval_end;
+ __u8 *optval = ctx->optval;
+ struct sockopt_sk *storage;
+
+ if (ctx->level == SOL_IP && ctx->optname == IP_TOS)
+ /* Not interested in SOL_IP:IP_TOS;
+ * let next BPF program in the cgroup chain or kernel
+ * handle it.
+ */
+ return 1;
+
+ if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
+ /* Overwrite SO_SNDBUF value */
+
+ if (optval + sizeof(__u32) > optval_end)
+ return 0; /* EPERM, bounds check */
+
+ *(__u32 *)optval = 0x55AA;
+ ctx->optlen = 4;
+
+ return 1;
+ }
+
+ if (ctx->level != SOL_CUSTOM)
+ return 0; /* EPERM, deny everything except custom level */
+
+ if (optval + 1 > optval_end)
+ return 0; /* EPERM, bounds check */
+
+ storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 0; /* EPERM, couldn't get sk storage */
+
+ storage->val = optval[0];
+ ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
+ * setsockopt handler.
+ */
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index 1ff73f60a3e4..553bc3b62e89 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -266,8 +266,8 @@ struct tls_index {
uint64_t offset;
};
-static inline __attribute__((always_inline))
-void *calc_location(struct strobe_value_loc *loc, void *tls_base)
+static __always_inline void *calc_location(struct strobe_value_loc *loc,
+ void *tls_base)
{
/*
* tls_mode value is:
@@ -327,10 +327,10 @@ void *calc_location(struct strobe_value_loc *loc, void *tls_base)
: NULL;
}
-static inline __attribute__((always_inline))
-void read_int_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base,
- struct strobe_value_generic *value,
- struct strobemeta_payload *data)
+static __always_inline void read_int_var(struct strobemeta_cfg *cfg,
+ size_t idx, void *tls_base,
+ struct strobe_value_generic *value,
+ struct strobemeta_payload *data)
{
void *location = calc_location(&cfg->int_locs[idx], tls_base);
if (!location)
@@ -342,10 +342,11 @@ void read_int_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base,
data->int_vals_set_mask |= (1 << idx);
}
-static inline __attribute__((always_inline))
-uint64_t read_str_var(struct strobemeta_cfg* cfg, size_t idx, void *tls_base,
- struct strobe_value_generic *value,
- struct strobemeta_payload *data, void *payload)
+static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
+ size_t idx, void *tls_base,
+ struct strobe_value_generic *value,
+ struct strobemeta_payload *data,
+ void *payload)
{
void *location;
uint32_t len;
@@ -371,10 +372,11 @@ uint64_t read_str_var(struct strobemeta_cfg* cfg, size_t idx, void *tls_base,
return len;
}
-static inline __attribute__((always_inline))
-void *read_map_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base,
- struct strobe_value_generic *value,
- struct strobemeta_payload* data, void *payload)
+static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
+ size_t idx, void *tls_base,
+ struct strobe_value_generic *value,
+ struct strobemeta_payload *data,
+ void *payload)
{
struct strobe_map_descr* descr = &data->map_descrs[idx];
struct strobe_map_raw map;
@@ -435,9 +437,9 @@ void *read_map_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base,
* read_strobe_meta returns NULL, if no metadata was read; otherwise returns
* pointer to *right after* payload ends
*/
-static inline __attribute__((always_inline))
-void *read_strobe_meta(struct task_struct* task,
- struct strobemeta_payload* data) {
+static __always_inline void *read_strobe_meta(struct task_struct *task,
+ struct strobemeta_payload *data)
+{
pid_t pid = bpf_get_current_pid_tgid() >> 32;
struct strobe_value_generic value = {0};
struct strobemeta_cfg *cfg;
diff --git a/tools/testing/selftests/bpf/progs/tcp_rtt.c b/tools/testing/selftests/bpf/progs/tcp_rtt.c
new file mode 100644
index 000000000000..233bdcb1659e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_rtt.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1;
+
+struct tcp_rtt_storage {
+ __u32 invoked;
+ __u32 dsack_dups;
+ __u32 delivered;
+ __u32 delivered_ce;
+ __u32 icsk_retransmits;
+};
+
+struct bpf_map_def SEC("maps") socket_storage_map = {
+ .type = BPF_MAP_TYPE_SK_STORAGE,
+ .key_size = sizeof(int),
+ .value_size = sizeof(struct tcp_rtt_storage),
+ .map_flags = BPF_F_NO_PREALLOC,
+};
+BPF_ANNOTATE_KV_PAIR(socket_storage_map, int, struct tcp_rtt_storage);
+
+SEC("sockops")
+int _sockops(struct bpf_sock_ops *ctx)
+{
+ struct tcp_rtt_storage *storage;
+ struct bpf_tcp_sock *tcp_sk;
+ int op = (int) ctx->op;
+ struct bpf_sock *sk;
+
+ sk = ctx->sk;
+ if (!sk)
+ return 1;
+
+ storage = bpf_sk_storage_get(&socket_storage_map, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 1;
+
+ if (op == BPF_SOCK_OPS_TCP_CONNECT_CB) {
+ bpf_sock_ops_cb_flags_set(ctx, BPF_SOCK_OPS_RTT_CB_FLAG);
+ return 1;
+ }
+
+ if (op != BPF_SOCK_OPS_RTT_CB)
+ return 1;
+
+ tcp_sk = bpf_tcp_sock(sk);
+ if (!tcp_sk)
+ return 1;
+
+ storage->invoked++;
+
+ storage->dsack_dups = tcp_sk->dsack_dups;
+ storage->delivered = tcp_sk->delivered;
+ storage->delivered_ce = tcp_sk->delivered_ce;
+ storage->icsk_retransmits = tcp_sk->icsk_retransmits;
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_jhash.h b/tools/testing/selftests/bpf/progs/test_jhash.h
index 3d12c11a8d47..c300734d26f6 100644
--- a/tools/testing/selftests/bpf/progs/test_jhash.h
+++ b/tools/testing/selftests/bpf/progs/test_jhash.h
@@ -1,9 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
+#include <features.h>
typedef unsigned int u32;
-static __attribute__((always_inline)) u32 rol32(u32 word, unsigned int shift)
+static __always_inline u32 rol32(u32 word, unsigned int shift)
{
return (word << shift) | (word >> ((-shift) & 31));
}
diff --git a/tools/testing/selftests/bpf/progs/test_seg6_loop.c b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
index 463964d79f73..1dbe1d4d467e 100644
--- a/tools/testing/selftests/bpf/progs/test_seg6_loop.c
+++ b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
@@ -54,7 +54,7 @@ struct sr6_tlv_t {
unsigned char value[0];
} BPF_PACKET_HEADER;
-static __attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb)
+static __always_inline struct ip6_srh_t *get_srh(struct __sk_buff *skb)
{
void *cursor, *data_end;
struct ip6_srh_t *srh;
@@ -88,9 +88,9 @@ static __attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff
return srh;
}
-static __attribute__((always_inline))
-int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
- uint32_t old_pad, uint32_t pad_off)
+static __always_inline int update_tlv_pad(struct __sk_buff *skb,
+ uint32_t new_pad, uint32_t old_pad,
+ uint32_t pad_off)
{
int err;
@@ -118,10 +118,11 @@ int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
return 0;
}
-static __attribute__((always_inline))
-int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
- uint32_t *tlv_off, uint32_t *pad_size,
- uint32_t *pad_off)
+static __always_inline int is_valid_tlv_boundary(struct __sk_buff *skb,
+ struct ip6_srh_t *srh,
+ uint32_t *tlv_off,
+ uint32_t *pad_size,
+ uint32_t *pad_off)
{
uint32_t srh_off, cur_off;
int offset_valid = 0;
@@ -177,9 +178,9 @@ int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
return 0;
}
-static __attribute__((always_inline))
-int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
- struct sr6_tlv_t *itlv, uint8_t tlv_size)
+static __always_inline int add_tlv(struct __sk_buff *skb,
+ struct ip6_srh_t *srh, uint32_t tlv_off,
+ struct sr6_tlv_t *itlv, uint8_t tlv_size)
{
uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
uint8_t len_remaining, new_pad;
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale2.c b/tools/testing/selftests/bpf/progs/test_verif_scale2.c
index 77830693eccb..9897150ed516 100644
--- a/tools/testing/selftests/bpf/progs/test_verif_scale2.c
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale2.c
@@ -2,7 +2,7 @@
// Copyright (c) 2019 Facebook
#include <linux/bpf.h>
#include "bpf_helpers.h"
-#define ATTR __attribute__((always_inline))
+#define ATTR __always_inline
#include "test_jhash.h"
SEC("scale90_inline")
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
new file mode 100644
index 000000000000..e87a985b9df9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") tx_port = {
+ .type = BPF_MAP_TYPE_DEVMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 8,
+};
+
+SEC("redirect_map_0")
+int xdp_redirect_map_0(struct xdp_md *xdp)
+{
+ return bpf_redirect_map(&tx_port, 0, 0);
+}
+
+SEC("redirect_map_1")
+int xdp_redirect_map_1(struct xdp_md *xdp)
+{
+ return bpf_redirect_map(&tx_port, 1, 0);
+}
+
+SEC("redirect_map_2")
+int xdp_redirect_map_2(struct xdp_md *xdp)
+{
+ return bpf_redirect_map(&tx_port, 2, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_tx.c b/tools/testing/selftests/bpf/progs/xdp_tx.c
new file mode 100644
index 000000000000..57912e7c94b0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_tx.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+SEC("tx")
+int xdp_tx(struct xdp_md *xdp)
+{
+ return XDP_TX;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_section_names.c b/tools/testing/selftests/bpf/test_section_names.c
index dee2f2eceb0f..29833aeaf0de 100644
--- a/tools/testing/selftests/bpf/test_section_names.c
+++ b/tools/testing/selftests/bpf/test_section_names.c
@@ -134,6 +134,16 @@ static struct sec_name_test tests[] = {
{0, BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL},
{0, BPF_CGROUP_SYSCTL},
},
+ {
+ "cgroup/getsockopt",
+ {0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT},
+ {0, BPF_CGROUP_GETSOCKOPT},
+ },
+ {
+ "cgroup/setsockopt",
+ {0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT},
+ {0, BPF_CGROUP_SETSOCKOPT},
+ },
};
static int test_prog_type_by_name(const struct sec_name_test *test)
diff --git a/tools/testing/selftests/bpf/test_sockopt.c b/tools/testing/selftests/bpf/test_sockopt.c
new file mode 100644
index 000000000000..23bd0819382d
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockopt.c
@@ -0,0 +1,1021 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include <linux/filter.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+#define CG_PATH "/sockopt"
+
+static char bpf_log_buf[4096];
+static bool verbose;
+
+enum sockopt_test_error {
+ OK = 0,
+ DENY_LOAD,
+ DENY_ATTACH,
+ EPERM_GETSOCKOPT,
+ EFAULT_GETSOCKOPT,
+ EPERM_SETSOCKOPT,
+ EFAULT_SETSOCKOPT,
+};
+
+static struct sockopt_test {
+ const char *descr;
+ const struct bpf_insn insns[64];
+ enum bpf_attach_type attach_type;
+ enum bpf_attach_type expected_attach_type;
+
+ int set_optname;
+ int set_level;
+ const char set_optval[64];
+ socklen_t set_optlen;
+
+ int get_optname;
+ int get_level;
+ const char get_optval[64];
+ socklen_t get_optlen;
+ socklen_t get_optlen_ret;
+
+ enum sockopt_test_error error;
+} tests[] = {
+
+ /* ==================== getsockopt ==================== */
+
+ {
+ .descr = "getsockopt: no expected_attach_type",
+ .insns = {
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+
+ },
+ .attach_type = BPF_CGROUP_GETSOCKOPT,
+ .expected_attach_type = 0,
+ .error = DENY_LOAD,
+ },
+ {
+ .descr = "getsockopt: wrong expected_attach_type",
+ .insns = {
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+
+ },
+ .attach_type = BPF_CGROUP_GETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+ .error = DENY_ATTACH,
+ },
+ {
+ .descr = "getsockopt: bypass bpf hook",
+ .insns = {
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_GETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+ .get_level = SOL_IP,
+ .set_level = SOL_IP,
+
+ .get_optname = IP_TOS,
+ .set_optname = IP_TOS,
+
+ .set_optval = { 1 << 3 },
+ .set_optlen = 1,
+
+ .get_optval = { 1 << 3 },
+ .get_optlen = 1,
+ },
+ {
+ .descr = "getsockopt: return EPERM from bpf hook",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_GETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+ .get_level = SOL_IP,
+ .get_optname = IP_TOS,
+
+ .get_optlen = 1,
+ .error = EPERM_GETSOCKOPT,
+ },
+ {
+ .descr = "getsockopt: no optval bounds check, deny loading",
+ .insns = {
+ /* r6 = ctx->optval */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval)),
+
+ /* ctx->optval[0] = 0x80 */
+ BPF_MOV64_IMM(BPF_REG_0, 0x80),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_0, 0),
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_GETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ .error = DENY_LOAD,
+ },
+ {
+ .descr = "getsockopt: read ctx->level",
+ .insns = {
+ /* r6 = ctx->level */
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct bpf_sockopt, level)),
+
+ /* if (ctx->level == 123) { */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 123, 4),
+ /* ctx->retval = 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, retval)),
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+ /* } else { */
+ /* return 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ /* } */
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_GETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+ .get_level = 123,
+
+ .get_optlen = 1,
+ },
+ {
+ .descr = "getsockopt: deny writing to ctx->level",
+ .insns = {
+ /* ctx->level = 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, level)),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_GETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+ .error = DENY_LOAD,
+ },
+ {
+ .descr = "getsockopt: read ctx->optname",
+ .insns = {
+ /* r6 = ctx->optname */
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optname)),
+
+ /* if (ctx->optname == 123) { */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 123, 4),
+ /* ctx->retval = 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, retval)),
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+ /* } else { */
+ /* return 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ /* } */
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_GETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+ .get_optname = 123,
+
+ .get_optlen = 1,
+ },
+ {
+ .descr = "getsockopt: read ctx->retval",
+ .insns = {
+ /* r6 = ctx->retval */
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct bpf_sockopt, retval)),
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_GETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+ .get_level = SOL_IP,
+ .get_optname = IP_TOS,
+ .get_optlen = 1,
+ },
+ {
+ .descr = "getsockopt: deny writing to ctx->optname",
+ .insns = {
+ /* ctx->optname = 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, optname)),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_GETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+ .error = DENY_LOAD,
+ },
+ {
+ .descr = "getsockopt: read ctx->optlen",
+ .insns = {
+ /* r6 = ctx->optlen */
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optlen)),
+
+ /* if (ctx->optlen == 64) { */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 64, 4),
+ /* ctx->retval = 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, retval)),
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+ /* } else { */
+ /* return 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ /* } */
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_GETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+ .get_optlen = 64,
+ },
+ {
+ .descr = "getsockopt: deny bigger ctx->optlen",
+ .insns = {
+ /* ctx->optlen = 65 */
+ BPF_MOV64_IMM(BPF_REG_0, 65),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, optlen)),
+
+ /* ctx->retval = 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, retval)),
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_GETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+ .get_optlen = 64,
+
+ .error = EFAULT_GETSOCKOPT,
+ },
+ {
+ .descr = "getsockopt: deny arbitrary ctx->retval",
+ .insns = {
+ /* ctx->retval = 123 */
+ BPF_MOV64_IMM(BPF_REG_0, 123),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, retval)),
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_GETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+ .get_optlen = 64,
+
+ .error = EFAULT_GETSOCKOPT,
+ },
+ {
+ .descr = "getsockopt: support smaller ctx->optlen",
+ .insns = {
+ /* ctx->optlen = 32 */
+ BPF_MOV64_IMM(BPF_REG_0, 32),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, optlen)),
+ /* ctx->retval = 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, retval)),
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_GETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+ .get_optlen = 64,
+ .get_optlen_ret = 32,
+ },
+ {
+ .descr = "getsockopt: deny writing to ctx->optval",
+ .insns = {
+ /* ctx->optval = 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, optval)),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_GETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+ .error = DENY_LOAD,
+ },
+ {
+ .descr = "getsockopt: deny writing to ctx->optval_end",
+ .insns = {
+ /* ctx->optval_end = 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, optval_end)),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_GETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+ .error = DENY_LOAD,
+ },
+ {
+ .descr = "getsockopt: rewrite value",
+ .insns = {
+ /* r6 = ctx->optval */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval)),
+ /* r2 = ctx->optval */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+ /* r6 = ctx->optval + 1 */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+
+ /* r7 = ctx->optval_end */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval_end)),
+
+ /* if (ctx->optval + 1 <= ctx->optval_end) { */
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
+ /* ctx->optval[0] = 0xF0 */
+ BPF_ST_MEM(BPF_B, BPF_REG_2, 0, 0xF0),
+ /* } */
+
+ /* ctx->retval = 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, retval)),
+
+ /* return 1*/
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_GETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+ .get_level = SOL_IP,
+ .get_optname = IP_TOS,
+
+ .get_optval = { 0xF0 },
+ .get_optlen = 1,
+ },
+
+ /* ==================== setsockopt ==================== */
+
+ {
+ .descr = "setsockopt: no expected_attach_type",
+ .insns = {
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = 0,
+ .error = DENY_LOAD,
+ },
+ {
+ .descr = "setsockopt: wrong expected_attach_type",
+ .insns = {
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ .error = DENY_ATTACH,
+ },
+ {
+ .descr = "setsockopt: bypass bpf hook",
+ .insns = {
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+ .get_level = SOL_IP,
+ .set_level = SOL_IP,
+
+ .get_optname = IP_TOS,
+ .set_optname = IP_TOS,
+
+ .set_optval = { 1 << 3 },
+ .set_optlen = 1,
+
+ .get_optval = { 1 << 3 },
+ .get_optlen = 1,
+ },
+ {
+ .descr = "setsockopt: return EPERM from bpf hook",
+ .insns = {
+ /* return 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+ .set_level = SOL_IP,
+ .set_optname = IP_TOS,
+
+ .set_optlen = 1,
+ .error = EPERM_SETSOCKOPT,
+ },
+ {
+ .descr = "setsockopt: no optval bounds check, deny loading",
+ .insns = {
+ /* r6 = ctx->optval */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval)),
+
+ /* r0 = ctx->optval[0] */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0),
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+ .error = DENY_LOAD,
+ },
+ {
+ .descr = "setsockopt: read ctx->level",
+ .insns = {
+ /* r6 = ctx->level */
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct bpf_sockopt, level)),
+
+ /* if (ctx->level == 123) { */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 123, 4),
+ /* ctx->optlen = -1 */
+ BPF_MOV64_IMM(BPF_REG_0, -1),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, optlen)),
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+ /* } else { */
+ /* return 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ /* } */
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+ .set_level = 123,
+
+ .set_optlen = 1,
+ },
+ {
+ .descr = "setsockopt: allow changing ctx->level",
+ .insns = {
+ /* ctx->level = SOL_IP */
+ BPF_MOV64_IMM(BPF_REG_0, SOL_IP),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, level)),
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+ .get_level = SOL_IP,
+ .set_level = 234, /* should be rewritten to SOL_IP */
+
+ .get_optname = IP_TOS,
+ .set_optname = IP_TOS,
+
+ .set_optval = { 1 << 3 },
+ .set_optlen = 1,
+ .get_optval = { 1 << 3 },
+ .get_optlen = 1,
+ },
+ {
+ .descr = "setsockopt: read ctx->optname",
+ .insns = {
+ /* r6 = ctx->optname */
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optname)),
+
+ /* if (ctx->optname == 123) { */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 123, 4),
+ /* ctx->optlen = -1 */
+ BPF_MOV64_IMM(BPF_REG_0, -1),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, optlen)),
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+ /* } else { */
+ /* return 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ /* } */
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+ .set_optname = 123,
+
+ .set_optlen = 1,
+ },
+ {
+ .descr = "setsockopt: allow changing ctx->optname",
+ .insns = {
+ /* ctx->optname = IP_TOS */
+ BPF_MOV64_IMM(BPF_REG_0, IP_TOS),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, optname)),
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+ .get_level = SOL_IP,
+ .set_level = SOL_IP,
+
+ .get_optname = IP_TOS,
+ .set_optname = 456, /* should be rewritten to IP_TOS */
+
+ .set_optval = { 1 << 3 },
+ .set_optlen = 1,
+ .get_optval = { 1 << 3 },
+ .get_optlen = 1,
+ },
+ {
+ .descr = "setsockopt: read ctx->optlen",
+ .insns = {
+ /* r6 = ctx->optlen */
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optlen)),
+
+ /* if (ctx->optlen == 64) { */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 64, 4),
+ /* ctx->optlen = -1 */
+ BPF_MOV64_IMM(BPF_REG_0, -1),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, optlen)),
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+ /* } else { */
+ /* return 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ /* } */
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+ .set_optlen = 64,
+ },
+ {
+ .descr = "setsockopt: ctx->optlen == -1 is ok",
+ .insns = {
+ /* ctx->optlen = -1 */
+ BPF_MOV64_IMM(BPF_REG_0, -1),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, optlen)),
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+ .set_optlen = 64,
+ },
+ {
+ .descr = "setsockopt: deny ctx->optlen < 0 (except -1)",
+ .insns = {
+ /* ctx->optlen = -2 */
+ BPF_MOV64_IMM(BPF_REG_0, -2),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, optlen)),
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+ .set_optlen = 4,
+
+ .error = EFAULT_SETSOCKOPT,
+ },
+ {
+ .descr = "setsockopt: deny ctx->optlen > input optlen",
+ .insns = {
+ /* ctx->optlen = 65 */
+ BPF_MOV64_IMM(BPF_REG_0, 65),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, optlen)),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+ .set_optlen = 64,
+
+ .error = EFAULT_SETSOCKOPT,
+ },
+ {
+ .descr = "setsockopt: allow changing ctx->optlen within bounds",
+ .insns = {
+ /* r6 = ctx->optval */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval)),
+ /* r2 = ctx->optval */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+ /* r6 = ctx->optval + 1 */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+
+ /* r7 = ctx->optval_end */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval_end)),
+
+ /* if (ctx->optval + 1 <= ctx->optval_end) { */
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
+ /* ctx->optval[0] = 1 << 3 */
+ BPF_ST_MEM(BPF_B, BPF_REG_2, 0, 1 << 3),
+ /* } */
+
+ /* ctx->optlen = 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, optlen)),
+
+ /* return 1*/
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+ .get_level = SOL_IP,
+ .set_level = SOL_IP,
+
+ .get_optname = IP_TOS,
+ .set_optname = IP_TOS,
+
+ .set_optval = { 1, 1, 1, 1 },
+ .set_optlen = 4,
+ .get_optval = { 1 << 3 },
+ .get_optlen = 1,
+ },
+ {
+ .descr = "setsockopt: deny write ctx->retval",
+ .insns = {
+ /* ctx->retval = 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, retval)),
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+ .error = DENY_LOAD,
+ },
+ {
+ .descr = "setsockopt: deny read ctx->retval",
+ .insns = {
+ /* r6 = ctx->retval */
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct bpf_sockopt, retval)),
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+ .error = DENY_LOAD,
+ },
+ {
+ .descr = "setsockopt: deny writing to ctx->optval",
+ .insns = {
+ /* ctx->optval = 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, optval)),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+ .error = DENY_LOAD,
+ },
+ {
+ .descr = "setsockopt: deny writing to ctx->optval_end",
+ .insns = {
+ /* ctx->optval_end = 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct bpf_sockopt, optval_end)),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+ .error = DENY_LOAD,
+ },
+ {
+ .descr = "setsockopt: allow IP_TOS <= 128",
+ .insns = {
+ /* r6 = ctx->optval */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval)),
+ /* r7 = ctx->optval + 1 */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 1),
+
+ /* r8 = ctx->optval_end */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval_end)),
+
+ /* if (ctx->optval + 1 <= ctx->optval_end) { */
+ BPF_JMP_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 4),
+
+ /* r9 = ctx->optval[0] */
+ BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_6, 0),
+
+ /* if (ctx->optval[0] < 128) */
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_9, 128, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+ /* } */
+
+ /* } else { */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ /* } */
+
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+ .get_level = SOL_IP,
+ .set_level = SOL_IP,
+
+ .get_optname = IP_TOS,
+ .set_optname = IP_TOS,
+
+ .set_optval = { 0x80 },
+ .set_optlen = 1,
+ .get_optval = { 0x80 },
+ .get_optlen = 1,
+ },
+ {
+ .descr = "setsockopt: deny IP_TOS > 128",
+ .insns = {
+ /* r6 = ctx->optval */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval)),
+ /* r7 = ctx->optval + 1 */
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 1),
+
+ /* r8 = ctx->optval_end */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval_end)),
+
+ /* if (ctx->optval + 1 <= ctx->optval_end) { */
+ BPF_JMP_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 4),
+
+ /* r9 = ctx->optval[0] */
+ BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_6, 0),
+
+ /* if (ctx->optval[0] < 128) */
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_9, 128, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+ /* } */
+
+ /* } else { */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ /* } */
+
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+ .get_level = SOL_IP,
+ .set_level = SOL_IP,
+
+ .get_optname = IP_TOS,
+ .set_optname = IP_TOS,
+
+ .set_optval = { 0x81 },
+ .set_optlen = 1,
+ .get_optval = { 0x00 },
+ .get_optlen = 1,
+
+ .error = EPERM_SETSOCKOPT,
+ },
+};
+
+static int load_prog(const struct bpf_insn *insns,
+ enum bpf_attach_type expected_attach_type)
+{
+ struct bpf_load_program_attr attr = {
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT,
+ .expected_attach_type = expected_attach_type,
+ .insns = insns,
+ .license = "GPL",
+ .log_level = 2,
+ };
+ int fd;
+
+ for (;
+ insns[attr.insns_cnt].code != (BPF_JMP | BPF_EXIT);
+ attr.insns_cnt++) {
+ }
+ attr.insns_cnt++;
+
+ fd = bpf_load_program_xattr(&attr, bpf_log_buf, sizeof(bpf_log_buf));
+ if (verbose && fd < 0)
+ fprintf(stderr, "%s\n", bpf_log_buf);
+
+ return fd;
+}
+
+static int run_test(int cgroup_fd, struct sockopt_test *test)
+{
+ int sock_fd, err, prog_fd;
+ void *optval = NULL;
+ int ret = 0;
+
+ prog_fd = load_prog(test->insns, test->expected_attach_type);
+ if (prog_fd < 0) {
+ if (test->error == DENY_LOAD)
+ return 0;
+
+ log_err("Failed to load BPF program");
+ return -1;
+ }
+
+ err = bpf_prog_attach(prog_fd, cgroup_fd, test->attach_type, 0);
+ if (err < 0) {
+ if (test->error == DENY_ATTACH)
+ goto close_prog_fd;
+
+ log_err("Failed to attach BPF program");
+ ret = -1;
+ goto close_prog_fd;
+ }
+
+ sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (sock_fd < 0) {
+ log_err("Failed to create AF_INET socket");
+ ret = -1;
+ goto detach_prog;
+ }
+
+ if (test->set_optlen) {
+ err = setsockopt(sock_fd, test->set_level, test->set_optname,
+ test->set_optval, test->set_optlen);
+ if (err) {
+ if (errno == EPERM && test->error == EPERM_SETSOCKOPT)
+ goto close_sock_fd;
+ if (errno == EFAULT && test->error == EFAULT_SETSOCKOPT)
+ goto free_optval;
+
+ log_err("Failed to call setsockopt");
+ ret = -1;
+ goto close_sock_fd;
+ }
+ }
+
+ if (test->get_optlen) {
+ optval = malloc(test->get_optlen);
+ socklen_t optlen = test->get_optlen;
+ socklen_t expected_get_optlen = test->get_optlen_ret ?:
+ test->get_optlen;
+
+ err = getsockopt(sock_fd, test->get_level, test->get_optname,
+ optval, &optlen);
+ if (err) {
+ if (errno == EPERM && test->error == EPERM_GETSOCKOPT)
+ goto free_optval;
+ if (errno == EFAULT && test->error == EFAULT_GETSOCKOPT)
+ goto free_optval;
+
+ log_err("Failed to call getsockopt");
+ ret = -1;
+ goto free_optval;
+ }
+
+ if (optlen != expected_get_optlen) {
+ errno = 0;
+ log_err("getsockopt returned unexpected optlen");
+ ret = -1;
+ goto free_optval;
+ }
+
+ if (memcmp(optval, test->get_optval, optlen) != 0) {
+ errno = 0;
+ log_err("getsockopt returned unexpected optval");
+ ret = -1;
+ goto free_optval;
+ }
+ }
+
+ ret = test->error != OK;
+
+free_optval:
+ free(optval);
+close_sock_fd:
+ close(sock_fd);
+detach_prog:
+ bpf_prog_detach2(prog_fd, cgroup_fd, test->attach_type);
+close_prog_fd:
+ close(prog_fd);
+ return ret;
+}
+
+int main(int args, char **argv)
+{
+ int err = EXIT_FAILURE, error_cnt = 0;
+ int cgroup_fd, i;
+
+ if (setup_cgroup_environment())
+ goto cleanup_obj;
+
+ cgroup_fd = create_and_get_cgroup(CG_PATH);
+ if (cgroup_fd < 0)
+ goto cleanup_cgroup_env;
+
+ if (join_cgroup(CG_PATH))
+ goto cleanup_cgroup;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ int err = run_test(cgroup_fd, &tests[i]);
+
+ if (err)
+ error_cnt++;
+
+ printf("#%d %s: %s\n", i, err ? "FAIL" : "PASS",
+ tests[i].descr);
+ }
+
+ printf("Summary: %ld PASSED, %d FAILED\n",
+ ARRAY_SIZE(tests) - error_cnt, error_cnt);
+ err = error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
+
+cleanup_cgroup:
+ close(cgroup_fd);
+cleanup_cgroup_env:
+ cleanup_cgroup_environment();
+cleanup_obj:
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/test_sockopt_multi.c b/tools/testing/selftests/bpf/test_sockopt_multi.c
new file mode 100644
index 000000000000..4be3441db867
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockopt_multi.c
@@ -0,0 +1,374 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <error.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include <linux/filter.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
+{
+ enum bpf_attach_type attach_type;
+ enum bpf_prog_type prog_type;
+ struct bpf_program *prog;
+ int err;
+
+ err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
+ if (err) {
+ log_err("Failed to deduct types for %s BPF program", title);
+ return -1;
+ }
+
+ prog = bpf_object__find_program_by_title(obj, title);
+ if (!prog) {
+ log_err("Failed to find %s BPF program", title);
+ return -1;
+ }
+
+ err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
+ attach_type, BPF_F_ALLOW_MULTI);
+ if (err) {
+ log_err("Failed to attach %s BPF program", title);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int prog_detach(struct bpf_object *obj, int cgroup_fd, const char *title)
+{
+ enum bpf_attach_type attach_type;
+ enum bpf_prog_type prog_type;
+ struct bpf_program *prog;
+ int err;
+
+ err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
+ if (err)
+ return -1;
+
+ prog = bpf_object__find_program_by_title(obj, title);
+ if (!prog)
+ return -1;
+
+ err = bpf_prog_detach2(bpf_program__fd(prog), cgroup_fd,
+ attach_type);
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
+ int cg_child, int sock_fd)
+{
+ socklen_t optlen;
+ __u8 buf;
+ int err;
+
+ /* Set IP_TOS to the expected value (0x80). */
+
+ buf = 0x80;
+ err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+ if (err < 0) {
+ log_err("Failed to call setsockopt(IP_TOS)");
+ goto detach;
+ }
+
+ buf = 0x00;
+ optlen = 1;
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (err) {
+ log_err("Failed to call getsockopt(IP_TOS)");
+ goto detach;
+ }
+
+ if (buf != 0x80) {
+ log_err("Unexpected getsockopt 0x%x != 0x80 without BPF", buf);
+ err = -1;
+ goto detach;
+ }
+
+ /* Attach child program and make sure it returns new value:
+ * - kernel: -> 0x80
+ * - child: 0x80 -> 0x90
+ */
+
+ err = prog_attach(obj, cg_child, "cgroup/getsockopt/child");
+ if (err)
+ goto detach;
+
+ buf = 0x00;
+ optlen = 1;
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (err) {
+ log_err("Failed to call getsockopt(IP_TOS)");
+ goto detach;
+ }
+
+ if (buf != 0x90) {
+ log_err("Unexpected getsockopt 0x%x != 0x90", buf);
+ err = -1;
+ goto detach;
+ }
+
+ /* Attach parent program and make sure it returns new value:
+ * - kernel: -> 0x80
+ * - child: 0x80 -> 0x90
+ * - parent: 0x90 -> 0xA0
+ */
+
+ err = prog_attach(obj, cg_parent, "cgroup/getsockopt/parent");
+ if (err)
+ goto detach;
+
+ buf = 0x00;
+ optlen = 1;
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (err) {
+ log_err("Failed to call getsockopt(IP_TOS)");
+ goto detach;
+ }
+
+ if (buf != 0xA0) {
+ log_err("Unexpected getsockopt 0x%x != 0xA0", buf);
+ err = -1;
+ goto detach;
+ }
+
+ /* Setting unexpected initial sockopt should return EPERM:
+ * - kernel: -> 0x40
+ * - child: unexpected 0x40, EPERM
+ * - parent: unexpected 0x40, EPERM
+ */
+
+ buf = 0x40;
+ if (setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1) < 0) {
+ log_err("Failed to call setsockopt(IP_TOS)");
+ goto detach;
+ }
+
+ buf = 0x00;
+ optlen = 1;
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (!err) {
+ log_err("Unexpected success from getsockopt(IP_TOS)");
+ goto detach;
+ }
+
+ /* Detach child program and make sure we still get EPERM:
+ * - kernel: -> 0x40
+ * - parent: unexpected 0x40, EPERM
+ */
+
+ err = prog_detach(obj, cg_child, "cgroup/getsockopt/child");
+ if (err) {
+ log_err("Failed to detach child program");
+ goto detach;
+ }
+
+ buf = 0x00;
+ optlen = 1;
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (!err) {
+ log_err("Unexpected success from getsockopt(IP_TOS)");
+ goto detach;
+ }
+
+ /* Set initial value to the one the parent program expects:
+ * - kernel: -> 0x90
+ * - parent: 0x90 -> 0xA0
+ */
+
+ buf = 0x90;
+ err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+ if (err < 0) {
+ log_err("Failed to call setsockopt(IP_TOS)");
+ goto detach;
+ }
+
+ buf = 0x00;
+ optlen = 1;
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (err) {
+ log_err("Failed to call getsockopt(IP_TOS)");
+ goto detach;
+ }
+
+ if (buf != 0xA0) {
+ log_err("Unexpected getsockopt 0x%x != 0xA0", buf);
+ err = -1;
+ goto detach;
+ }
+
+detach:
+ prog_detach(obj, cg_child, "cgroup/getsockopt/child");
+ prog_detach(obj, cg_parent, "cgroup/getsockopt/parent");
+
+ return err;
+}
+
+static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
+ int cg_child, int sock_fd)
+{
+ socklen_t optlen;
+ __u8 buf;
+ int err;
+
+ /* Set IP_TOS to the expected value (0x80). */
+
+ buf = 0x80;
+ err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+ if (err < 0) {
+ log_err("Failed to call setsockopt(IP_TOS)");
+ goto detach;
+ }
+
+ buf = 0x00;
+ optlen = 1;
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (err) {
+ log_err("Failed to call getsockopt(IP_TOS)");
+ goto detach;
+ }
+
+ if (buf != 0x80) {
+ log_err("Unexpected getsockopt 0x%x != 0x80 without BPF", buf);
+ err = -1;
+ goto detach;
+ }
+
+ /* Attach child program and make sure it adds 0x10. */
+
+ err = prog_attach(obj, cg_child, "cgroup/setsockopt");
+ if (err)
+ goto detach;
+
+ buf = 0x80;
+ err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+ if (err < 0) {
+ log_err("Failed to call setsockopt(IP_TOS)");
+ goto detach;
+ }
+
+ buf = 0x00;
+ optlen = 1;
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (err) {
+ log_err("Failed to call getsockopt(IP_TOS)");
+ goto detach;
+ }
+
+ if (buf != 0x80 + 0x10) {
+ log_err("Unexpected getsockopt 0x%x != 0x80 + 0x10", buf);
+ err = -1;
+ goto detach;
+ }
+
+ /* Attach parent program and make sure it adds another 0x10. */
+
+ err = prog_attach(obj, cg_parent, "cgroup/setsockopt");
+ if (err)
+ goto detach;
+
+ buf = 0x80;
+ err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+ if (err < 0) {
+ log_err("Failed to call setsockopt(IP_TOS)");
+ goto detach;
+ }
+
+ buf = 0x00;
+ optlen = 1;
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (err) {
+ log_err("Failed to call getsockopt(IP_TOS)");
+ goto detach;
+ }
+
+ if (buf != 0x80 + 2 * 0x10) {
+ log_err("Unexpected getsockopt 0x%x != 0x80 + 2 * 0x10", buf);
+ err = -1;
+ goto detach;
+ }
+
+detach:
+ prog_detach(obj, cg_child, "cgroup/setsockopt");
+ prog_detach(obj, cg_parent, "cgroup/setsockopt");
+
+ return err;
+}
+
+int main(int argc, char **argv)
+{
+ struct bpf_prog_load_attr attr = {
+ .file = "./sockopt_multi.o",
+ };
+ int cg_parent = -1, cg_child = -1;
+ struct bpf_object *obj = NULL;
+ int sock_fd = -1;
+ int err = -1;
+ int ignored;
+
+ if (setup_cgroup_environment()) {
+ log_err("Failed to setup cgroup environment\n");
+ goto out;
+ }
+
+ cg_parent = create_and_get_cgroup("/parent");
+ if (cg_parent < 0) {
+ log_err("Failed to create cgroup /parent\n");
+ goto out;
+ }
+
+ cg_child = create_and_get_cgroup("/parent/child");
+ if (cg_child < 0) {
+ log_err("Failed to create cgroup /parent/child\n");
+ goto out;
+ }
+
+ if (join_cgroup("/parent/child")) {
+ log_err("Failed to join cgroup /parent/child\n");
+ goto out;
+ }
+
+ err = bpf_prog_load_xattr(&attr, &obj, &ignored);
+ if (err) {
+ log_err("Failed to load BPF object");
+ goto out;
+ }
+
+ sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (sock_fd < 0) {
+ log_err("Failed to create socket");
+ goto out;
+ }
+
+ if (run_getsockopt_test(obj, cg_parent, cg_child, sock_fd))
+ err = -1;
+ printf("test_sockopt_multi: getsockopt %s\n",
+ err ? "FAILED" : "PASSED");
+
+ if (run_setsockopt_test(obj, cg_parent, cg_child, sock_fd))
+ err = -1;
+ printf("test_sockopt_multi: setsockopt %s\n",
+ err ? "FAILED" : "PASSED");
+
+out:
+ close(sock_fd);
+ bpf_object__close(obj);
+ close(cg_child);
+ close(cg_parent);
+
+ printf("test_sockopt_multi: %s\n", err ? "FAILED" : "PASSED");
+ return err ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/bpf/test_sockopt_sk.c b/tools/testing/selftests/bpf/test_sockopt_sk.c
new file mode 100644
index 000000000000..036b652e5ca9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockopt_sk.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include <linux/filter.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+#define CG_PATH "/sockopt"
+
+#define SOL_CUSTOM 0xdeadbeef
+
+static int getsetsockopt(void)
+{
+ int fd, err;
+ union {
+ char u8[4];
+ __u32 u32;
+ } buf = {};
+ socklen_t optlen;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd < 0) {
+ log_err("Failed to create socket");
+ return -1;
+ }
+
+ /* IP_TOS - BPF bypass */
+
+ buf.u8[0] = 0x08;
+ err = setsockopt(fd, SOL_IP, IP_TOS, &buf, 1);
+ if (err) {
+ log_err("Failed to call setsockopt(IP_TOS)");
+ goto err;
+ }
+
+ buf.u8[0] = 0x00;
+ optlen = 1;
+ err = getsockopt(fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (err) {
+ log_err("Failed to call getsockopt(IP_TOS)");
+ goto err;
+ }
+
+ if (buf.u8[0] != 0x08) {
+ log_err("Unexpected getsockopt(IP_TOS) buf[0] 0x%02x != 0x08",
+ buf.u8[0]);
+ goto err;
+ }
+
+ /* IP_TTL - EPERM */
+
+ buf.u8[0] = 1;
+ err = setsockopt(fd, SOL_IP, IP_TTL, &buf, 1);
+ if (!err || errno != EPERM) {
+ log_err("Unexpected success from setsockopt(IP_TTL)");
+ goto err;
+ }
+
+ /* SOL_CUSTOM - handled by BPF */
+
+ buf.u8[0] = 0x01;
+ err = setsockopt(fd, SOL_CUSTOM, 0, &buf, 1);
+ if (err) {
+ log_err("Failed to call setsockopt");
+ goto err;
+ }
+
+ buf.u32 = 0x00;
+ optlen = 4;
+ err = getsockopt(fd, SOL_CUSTOM, 0, &buf, &optlen);
+ if (err) {
+ log_err("Failed to call getsockopt");
+ goto err;
+ }
+
+ if (optlen != 1) {
+ log_err("Unexpected optlen %d != 1", optlen);
+ goto err;
+ }
+ if (buf.u8[0] != 0x01) {
+ log_err("Unexpected buf[0] 0x%02x != 0x01", buf.u8[0]);
+ goto err;
+ }
+
+ /* SO_SNDBUF is overwritten */
+
+ buf.u32 = 0x01010101;
+ err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buf, 4);
+ if (err) {
+ log_err("Failed to call setsockopt(SO_SNDBUF)");
+ goto err;
+ }
+
+ buf.u32 = 0x00;
+ optlen = 4;
+ err = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buf, &optlen);
+ if (err) {
+ log_err("Failed to call getsockopt(SO_SNDBUF)");
+ goto err;
+ }
+
+ if (buf.u32 != 0x55AA*2) {
+ log_err("Unexpected getsockopt(SO_SNDBUF) 0x%x != 0x55AA*2",
+ buf.u32);
+ goto err;
+ }
+
+ close(fd);
+ return 0;
+err:
+ close(fd);
+ return -1;
+}
+
+static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
+{
+ enum bpf_attach_type attach_type;
+ enum bpf_prog_type prog_type;
+ struct bpf_program *prog;
+ int err;
+
+ err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
+ if (err) {
+ log_err("Failed to deduct types for %s BPF program", title);
+ return -1;
+ }
+
+ prog = bpf_object__find_program_by_title(obj, title);
+ if (!prog) {
+ log_err("Failed to find %s BPF program", title);
+ return -1;
+ }
+
+ err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
+ attach_type, 0);
+ if (err) {
+ log_err("Failed to attach %s BPF program", title);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int run_test(int cgroup_fd)
+{
+ struct bpf_prog_load_attr attr = {
+ .file = "./sockopt_sk.o",
+ };
+ struct bpf_object *obj;
+ int ignored;
+ int err;
+
+ err = bpf_prog_load_xattr(&attr, &obj, &ignored);
+ if (err) {
+ log_err("Failed to load BPF object");
+ return -1;
+ }
+
+ err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt");
+ if (err)
+ goto close_bpf_object;
+
+ err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt");
+ if (err)
+ goto close_bpf_object;
+
+ err = getsetsockopt();
+
+close_bpf_object:
+ bpf_object__close(obj);
+ return err;
+}
+
+int main(int args, char **argv)
+{
+ int cgroup_fd;
+ int err = EXIT_SUCCESS;
+
+ if (setup_cgroup_environment())
+ goto cleanup_obj;
+
+ cgroup_fd = create_and_get_cgroup(CG_PATH);
+ if (cgroup_fd < 0)
+ goto cleanup_cgroup_env;
+
+ if (join_cgroup(CG_PATH))
+ goto cleanup_cgroup;
+
+ if (run_test(cgroup_fd))
+ err = EXIT_FAILURE;
+
+ printf("test_sockopt_sk: %s\n",
+ err == EXIT_SUCCESS ? "PASSED" : "FAILED");
+
+cleanup_cgroup:
+ close(cgroup_fd);
+cleanup_cgroup_env:
+ cleanup_cgroup_environment();
+cleanup_obj:
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/test_tcp_rtt.c b/tools/testing/selftests/bpf/test_tcp_rtt.c
new file mode 100644
index 000000000000..90c3862f74a8
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcp_rtt.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <pthread.h>
+
+#include <linux/filter.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+#define CG_PATH "/tcp_rtt"
+
+struct tcp_rtt_storage {
+ __u32 invoked;
+ __u32 dsack_dups;
+ __u32 delivered;
+ __u32 delivered_ce;
+ __u32 icsk_retransmits;
+};
+
+static void send_byte(int fd)
+{
+ char b = 0x55;
+
+ if (write(fd, &b, sizeof(b)) != 1)
+ error(1, errno, "Failed to send single byte");
+}
+
+static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked,
+ __u32 dsack_dups, __u32 delivered, __u32 delivered_ce,
+ __u32 icsk_retransmits)
+{
+ int err = 0;
+ struct tcp_rtt_storage val;
+
+ if (bpf_map_lookup_elem(map_fd, &client_fd, &val) < 0)
+ error(1, errno, "Failed to read socket storage");
+
+ if (val.invoked != invoked) {
+ log_err("%s: unexpected bpf_tcp_sock.invoked %d != %d",
+ msg, val.invoked, invoked);
+ err++;
+ }
+
+ if (val.dsack_dups != dsack_dups) {
+ log_err("%s: unexpected bpf_tcp_sock.dsack_dups %d != %d",
+ msg, val.dsack_dups, dsack_dups);
+ err++;
+ }
+
+ if (val.delivered != delivered) {
+ log_err("%s: unexpected bpf_tcp_sock.delivered %d != %d",
+ msg, val.delivered, delivered);
+ err++;
+ }
+
+ if (val.delivered_ce != delivered_ce) {
+ log_err("%s: unexpected bpf_tcp_sock.delivered_ce %d != %d",
+ msg, val.delivered_ce, delivered_ce);
+ err++;
+ }
+
+ if (val.icsk_retransmits != icsk_retransmits) {
+ log_err("%s: unexpected bpf_tcp_sock.icsk_retransmits %d != %d",
+ msg, val.icsk_retransmits, icsk_retransmits);
+ err++;
+ }
+
+ return err;
+}
+
+static int connect_to_server(int server_fd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int fd;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd < 0) {
+ log_err("Failed to create client socket");
+ return -1;
+ }
+
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+ log_err("Failed to get server addr");
+ goto out;
+ }
+
+ if (connect(fd, (const struct sockaddr *)&addr, len) < 0) {
+ log_err("Fail to connect to server");
+ goto out;
+ }
+
+ return fd;
+
+out:
+ close(fd);
+ return -1;
+}
+
+static int run_test(int cgroup_fd, int server_fd)
+{
+ struct bpf_prog_load_attr attr = {
+ .prog_type = BPF_PROG_TYPE_SOCK_OPS,
+ .file = "./tcp_rtt.o",
+ .expected_attach_type = BPF_CGROUP_SOCK_OPS,
+ };
+ struct bpf_object *obj;
+ struct bpf_map *map;
+ int client_fd;
+ int prog_fd;
+ int map_fd;
+ int err;
+
+ err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
+ if (err) {
+ log_err("Failed to load BPF object");
+ return -1;
+ }
+
+ map = bpf_map__next(NULL, obj);
+ map_fd = bpf_map__fd(map);
+
+ err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0);
+ if (err) {
+ log_err("Failed to attach BPF program");
+ goto close_bpf_object;
+ }
+
+ client_fd = connect_to_server(server_fd);
+ if (client_fd < 0) {
+ err = -1;
+ goto close_bpf_object;
+ }
+
+ err += verify_sk(map_fd, client_fd, "syn-ack",
+ /*invoked=*/1,
+ /*dsack_dups=*/0,
+ /*delivered=*/1,
+ /*delivered_ce=*/0,
+ /*icsk_retransmits=*/0);
+
+ send_byte(client_fd);
+
+ err += verify_sk(map_fd, client_fd, "first payload byte",
+ /*invoked=*/2,
+ /*dsack_dups=*/0,
+ /*delivered=*/2,
+ /*delivered_ce=*/0,
+ /*icsk_retransmits=*/0);
+
+ close(client_fd);
+
+close_bpf_object:
+ bpf_object__close(obj);
+ return err;
+}
+
+static int start_server(void)
+{
+ struct sockaddr_in addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ };
+ int fd;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd < 0) {
+ log_err("Failed to create server socket");
+ return -1;
+ }
+
+ if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
+ log_err("Failed to bind socket");
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+static void *server_thread(void *arg)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int fd = *(int *)arg;
+ int client_fd;
+
+ if (listen(fd, 1) < 0)
+ error(1, errno, "Failed to listed on socket");
+
+ client_fd = accept(fd, (struct sockaddr *)&addr, &len);
+ if (client_fd < 0)
+ error(1, errno, "Failed to accept client");
+
+ /* Wait for the next connection (that never arrives)
+ * to keep this thread alive to prevent calling
+ * close() on client_fd.
+ */
+ if (accept(fd, (struct sockaddr *)&addr, &len) >= 0)
+ error(1, errno, "Unexpected success in second accept");
+
+ close(client_fd);
+
+ return NULL;
+}
+
+int main(int args, char **argv)
+{
+ int server_fd, cgroup_fd;
+ int err = EXIT_SUCCESS;
+ pthread_t tid;
+
+ if (setup_cgroup_environment())
+ goto cleanup_obj;
+
+ cgroup_fd = create_and_get_cgroup(CG_PATH);
+ if (cgroup_fd < 0)
+ goto cleanup_cgroup_env;
+
+ if (join_cgroup(CG_PATH))
+ goto cleanup_cgroup;
+
+ server_fd = start_server();
+ if (server_fd < 0) {
+ err = EXIT_FAILURE;
+ goto cleanup_cgroup;
+ }
+
+ pthread_create(&tid, NULL, server_thread, (void *)&server_fd);
+
+ if (run_test(cgroup_fd, server_fd))
+ err = EXIT_FAILURE;
+
+ close(server_fd);
+
+ printf("test_sockopt_sk: %s\n",
+ err == EXIT_SUCCESS ? "PASSED" : "FAILED");
+
+cleanup_cgroup:
+ close(cgroup_fd);
+cleanup_cgroup_env:
+ cleanup_cgroup_environment();
+cleanup_obj:
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh
new file mode 100755
index 000000000000..ba8ffcdaac30
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_veth.sh
@@ -0,0 +1,118 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Create 3 namespaces with 3 veth peers, and
+# forward packets in-between using native XDP
+#
+# XDP_TX
+# NS1(veth11) NS2(veth22) NS3(veth33)
+# | | |
+# | | |
+# (veth1, (veth2, (veth3,
+# id:111) id:122) id:133)
+# ^ | ^ | ^ |
+# | | XDP_REDIRECT | | XDP_REDIRECT | |
+# | ------------------ ------------------ |
+# -----------------------------------------
+# XDP_REDIRECT
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+TESTNAME=xdp_veth
+BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
+BPF_DIR=$BPF_FS/test_$TESTNAME
+
+_cleanup()
+{
+ set +e
+ ip link del veth1 2> /dev/null
+ ip link del veth2 2> /dev/null
+ ip link del veth3 2> /dev/null
+ ip netns del ns1 2> /dev/null
+ ip netns del ns2 2> /dev/null
+ ip netns del ns3 2> /dev/null
+ rm -rf $BPF_DIR 2> /dev/null
+}
+
+cleanup_skip()
+{
+ echo "selftests: $TESTNAME [SKIP]"
+ _cleanup
+
+ exit $ksft_skip
+}
+
+cleanup()
+{
+ if [ "$?" = 0 ]; then
+ echo "selftests: $TESTNAME [PASS]"
+ else
+ echo "selftests: $TESTNAME [FAILED]"
+ fi
+ _cleanup
+}
+
+if [ $(id -u) -ne 0 ]; then
+ echo "selftests: $TESTNAME [SKIP] Need root privileges"
+ exit $ksft_skip
+fi
+
+if ! ip link set dev lo xdp off > /dev/null 2>&1; then
+ echo "selftests: $TESTNAME [SKIP] Could not run test without the ip xdp support"
+ exit $ksft_skip
+fi
+
+if [ -z "$BPF_FS" ]; then
+ echo "selftests: $TESTNAME [SKIP] Could not run test without bpffs mounted"
+ exit $ksft_skip
+fi
+
+if ! bpftool version > /dev/null 2>&1; then
+ echo "selftests: $TESTNAME [SKIP] Could not run test without bpftool"
+ exit $ksft_skip
+fi
+
+set -e
+
+trap cleanup_skip EXIT
+
+ip netns add ns1
+ip netns add ns2
+ip netns add ns3
+
+ip link add veth1 index 111 type veth peer name veth11 netns ns1
+ip link add veth2 index 122 type veth peer name veth22 netns ns2
+ip link add veth3 index 133 type veth peer name veth33 netns ns3
+
+ip link set veth1 up
+ip link set veth2 up
+ip link set veth3 up
+
+ip -n ns1 addr add 10.1.1.11/24 dev veth11
+ip -n ns3 addr add 10.1.1.33/24 dev veth33
+
+ip -n ns1 link set dev veth11 up
+ip -n ns2 link set dev veth22 up
+ip -n ns3 link set dev veth33 up
+
+mkdir $BPF_DIR
+bpftool prog loadall \
+ xdp_redirect_map.o $BPF_DIR/progs type xdp \
+ pinmaps $BPF_DIR/maps
+bpftool map update pinned $BPF_DIR/maps/tx_port key 0 0 0 0 value 122 0 0 0
+bpftool map update pinned $BPF_DIR/maps/tx_port key 1 0 0 0 value 133 0 0 0
+bpftool map update pinned $BPF_DIR/maps/tx_port key 2 0 0 0 value 111 0 0 0
+ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0
+ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
+ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
+
+ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy
+ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec tx
+ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy
+
+trap cleanup EXIT
+
+ip netns exec ns1 ping -c 1 -W 1 10.1.1.33
+
+exit 0
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 9a275d932fd5..1b24e36b4047 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,7 +5,7 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
CFLAGS += -I../../../../usr/include/
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \
- rtnetlink.sh xfrm_policy.sh
+ rtnetlink.sh xfrm_policy.sh test_blackhole_dev.sh
TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 89f84b5118bf..e4b878d95ba0 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -27,3 +27,4 @@ CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_NET_SCH_FQ=m
CONFIG_NET_SCH_ETF=m
+CONFIG_TEST_BLACKHOLE_DEV=m
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index ed606a2e3865..bdbf4b3125b6 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -269,6 +269,25 @@ kci_test_addrlft()
echo "PASS: preferred_lft addresses have expired"
}
+kci_test_promote_secondaries()
+{
+ promote=$(sysctl -n net.ipv4.conf.$devdummy.promote_secondaries)
+
+ sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=1
+
+ for i in $(seq 2 254);do
+ IP="10.23.11.$i"
+ ip -f inet addr add $IP/16 brd + dev "$devdummy"
+ ifconfig "$devdummy" $IP netmask 255.255.0.0
+ done
+
+ ip addr flush dev "$devdummy"
+
+ [ $promote -eq 0 ] && sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=0
+
+ echo "PASS: promote_secondaries complete"
+}
+
kci_test_addrlabel()
{
ret=0
@@ -719,13 +738,17 @@ kci_test_ipsec_offload()
sysfsd=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/
sysfsf=$sysfsd/ipsec
sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
+ probed=false
# setup netdevsim since dummydev doesn't have offload support
- modprobe netdevsim
- check_err $?
- if [ $ret -ne 0 ]; then
- echo "FAIL: ipsec_offload can't load netdevsim"
- return 1
+ if [ ! -w /sys/bus/netdevsim/new_device ] ; then
+ modprobe -q netdevsim
+ check_err $?
+ if [ $ret -ne 0 ]; then
+ echo "SKIP: ipsec_offload can't load netdevsim"
+ return $ksft_skip
+ fi
+ probed=true
fi
echo "0" > /sys/bus/netdevsim/new_device
@@ -805,7 +828,7 @@ EOF
fi
# clean up any leftovers
- rmmod netdevsim
+ $probed && rmmod netdevsim
if [ $ret -ne 0 ]; then
echo "FAIL: ipsec_offload"
@@ -1161,6 +1184,7 @@ kci_test_rtnl()
kci_test_polrouting
kci_test_route_get
kci_test_addrlft
+ kci_test_promote_secondaries
kci_test_tc
kci_test_gre
kci_test_gretap
diff --git a/tools/testing/selftests/net/test_blackhole_dev.sh b/tools/testing/selftests/net/test_blackhole_dev.sh
new file mode 100755
index 000000000000..3119b80e711f
--- /dev/null
+++ b/tools/testing/selftests/net/test_blackhole_dev.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs blackhole-dev test using blackhole-dev kernel module
+
+if /sbin/modprobe -q test_blackhole_dev ; then
+ /sbin/modprobe -q -r test_blackhole_dev;
+ echo "test_blackhole_dev: ok";
+else
+ echo "test_blackhole_dev: [FAIL]";
+ exit 1;
+fi
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index ba919308fe30..d503b8764a8e 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -3,4 +3,5 @@ subpage_prot
tempfile
prot_sao
segv_errors
-wild_bctr \ No newline at end of file
+wild_bctr
+large_vm_fork_separation \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index 43d68420e363..f1fbc15800c4 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -2,7 +2,8 @@
noarg:
$(MAKE) -C ../
-TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
+ large_vm_fork_separation
TEST_GEN_FILES := tempfile
top_srcdir = ../../../../..
@@ -13,6 +14,7 @@ $(TEST_GEN_PROGS): ../harness.c
$(OUTPUT)/prot_sao: ../utils.c
$(OUTPUT)/wild_bctr: CFLAGS += -m64
+$(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
$(OUTPUT)/tempfile:
dd if=/dev/zero of=$@ bs=64k count=1
diff --git a/tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c b/tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c
new file mode 100644
index 000000000000..2363a7f3ab0d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2019, Michael Ellerman, IBM Corp.
+//
+// Test that allocating memory beyond the memory limit and then forking is
+// handled correctly, ie. the child is able to access the mappings beyond the
+// memory limit and the child's writes are not visible to the parent.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+
+#ifndef MAP_FIXED_NOREPLACE
+#define MAP_FIXED_NOREPLACE MAP_FIXED // "Should be safe" above 512TB
+#endif
+
+
+static int test(void)
+{
+ int p2c[2], c2p[2], rc, status, c, *p;
+ unsigned long page_size;
+ pid_t pid;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ SKIP_IF(page_size != 65536);
+
+ // Create a mapping at 512TB to allocate an extended_id
+ p = mmap((void *)(512ul << 40), page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0);
+ if (p == MAP_FAILED) {
+ perror("mmap");
+ printf("Error: couldn't mmap(), confirm kernel has 4TB support?\n");
+ return 1;
+ }
+
+ printf("parent writing %p = 1\n", p);
+ *p = 1;
+
+ FAIL_IF(pipe(p2c) == -1 || pipe(c2p) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ FAIL_IF(read(p2c[0], &c, 1) != 1);
+
+ pid = getpid();
+ printf("child writing %p = %d\n", p, pid);
+ *p = pid;
+
+ FAIL_IF(write(c2p[1], &c, 1) != 1);
+ FAIL_IF(read(p2c[0], &c, 1) != 1);
+ exit(0);
+ }
+
+ c = 0;
+ FAIL_IF(write(p2c[1], &c, 1) != 1);
+ FAIL_IF(read(c2p[0], &c, 1) != 1);
+
+ // Prevent compiler optimisation
+ barrier();
+
+ rc = 0;
+ printf("parent reading %p = %d\n", p, *p);
+ if (*p != 1) {
+ printf("Error: BUG! parent saw child's write! *p = %d\n", *p);
+ rc = 1;
+ }
+
+ FAIL_IF(write(p2c[1], &c, 1) != 1);
+ FAIL_IF(waitpid(pid, &status, 0) == -1);
+ FAIL_IF(!WIFEXITED(status) || WEXITSTATUS(status));
+
+ if (rc == 0)
+ printf("success: test completed OK\n");
+
+ return rc;
+}
+
+int main(void)
+{
+ return test_harness(test, "large_vm_fork_separation");
+}
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
index 6e5fb3d25681..2232b21e2510 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
@@ -459,5 +459,99 @@
"teardown": [
"$TC actions flush action mirred"
]
+ },
+ {
+ "id": "4749",
+ "name": "Add batch of 32 mirred redirect egress actions with cookie",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action mirred egress redirect dev lo index \\$i cookie aabbccddeeff112233445566778800a1 \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "^[ \t]+index [0-9]+ ref",
+ "matchCount": "32",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "5c69",
+ "name": "Delete batch of 32 mirred redirect egress actions",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action mirred egress redirect dev lo index \\$i \\\"; args=\\\"\\$args\\$cmd\\\"; done && $TC actions add \\$args\""
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action mirred index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "^[ \t]+index [0-9]+ ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "d3c0",
+ "name": "Add batch of 32 mirred mirror ingress actions with cookie",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action mirred ingress mirror dev lo index \\$i cookie aabbccddeeff112233445566778800a1 \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "^[ \t]+index [0-9]+ ref",
+ "matchCount": "32",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "e684",
+ "name": "Delete batch of 32 mirred mirror ingress actions",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action mirred ingress mirror dev lo index \\$i \\\"; args=\\\"\\$args\\$cmd\\\"; done && $TC actions add \\$args\""
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action mirred index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "^[ \t]+index [0-9]+ ref",
+ "matchCount": "0",
+ "teardown": []
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json
new file mode 100644
index 000000000000..9c792fa8ca23
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json
@@ -0,0 +1,276 @@
+[
+ {
+ "id": "ddd9",
+ "name": "Add prio qdisc on egress",
+ "category": [
+ "qdisc",
+ "prio"
+ ],
+ "setup": [
+ "$IP link add dev $DEV1 type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc prio 1: root",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 handle 1: root prio",
+ "$IP link del dev $DEV1 type dummy"
+ ]
+ },
+ {
+ "id": "aa71",
+ "name": "Add prio qdisc on egress with handle of maximum value",
+ "category": [
+ "qdisc",
+ "prio"
+ ],
+ "setup": [
+ "$IP link add dev $DEV1 type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DEV1 root handle ffff: prio",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc prio ffff: root",
+ "matchCount": "1",
+ "teardown": [
+ "$IP link del dev $DEV1 type dummy"
+ ]
+ },
+ {
+ "id": "db37",
+ "name": "Add prio qdisc on egress with invalid handle exceeding maximum value",
+ "category": [
+ "qdisc",
+ "prio"
+ ],
+ "setup": [
+ "$IP link add dev $DEV1 type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DEV1 root handle 10000: prio",
+ "expExitCode": "255",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc prio 10000: root",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DEV1 type dummy"
+ ]
+ },
+ {
+ "id": "39d8",
+ "name": "Add prio qdisc on egress with unsupported argument",
+ "category": [
+ "qdisc",
+ "prio"
+ ],
+ "setup": [
+ "$IP link add dev $DEV1 type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio foorbar",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc prio 1: root",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DEV1 type dummy"
+ ]
+ },
+ {
+ "id": "5769",
+ "name": "Add prio qdisc on egress with 4 bands and new priomap",
+ "category": [
+ "qdisc",
+ "prio"
+ ],
+ "setup": [
+ "$IP link add dev $DEV1 type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 4 priomap 1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc prio 1: root.*bands 4 priomap.*1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 handle 1: root prio",
+ "$IP link del dev $DEV1 type dummy"
+ ]
+ },
+ {
+ "id": "fe0f",
+ "name": "Add prio qdisc on egress with 4 bands and priomap exceeding TC_PRIO_MAX entries",
+ "category": [
+ "qdisc",
+ "prio"
+ ],
+ "setup": [
+ "$IP link add dev $DEV1 type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 4 priomap 1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0 1 1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc prio 1: root.*bands 4 priomap.*1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0 1 1",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DEV1 type dummy"
+ ]
+ },
+ {
+ "id": "1f91",
+ "name": "Add prio qdisc on egress with 4 bands and priomap's values exceeding bands number",
+ "category": [
+ "qdisc",
+ "prio"
+ ],
+ "setup": [
+ "$IP link add dev $DEV1 type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 4 priomap 1 1 2 2 7 5 0 0 1 2 3 0 0 0 0 0",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc prio 1: root.*bands 4 priomap.*1 1 2 2 7 5 0 0 1 2 3 0 0 0 0 0",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DEV1 type dummy"
+ ]
+ },
+ {
+ "id": "d248",
+ "name": "Add prio qdisc on egress with invalid bands value (< 2)",
+ "category": [
+ "qdisc",
+ "prio"
+ ],
+ "setup": [
+ "$IP link add dev $DEV1 type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 1 priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc prio 1: root.*bands 1 priomap.*0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DEV1 type dummy"
+ ]
+ },
+ {
+ "id": "1d0e",
+ "name": "Add prio qdisc on egress with invalid bands value exceeding TCQ_PRIO_BANDS",
+ "category": [
+ "qdisc",
+ "prio"
+ ],
+ "setup": [
+ "$IP link add dev $DEV1 type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 1024 priomap 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc prio 1: root.*bands 1024 priomap.*1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DEV1 type dummy"
+ ]
+ },
+ {
+ "id": "1971",
+ "name": "Replace default prio qdisc on egress with 8 bands and new priomap",
+ "category": [
+ "qdisc",
+ "prio"
+ ],
+ "setup": [
+ "$IP link add dev $DEV1 type dummy || /bin/true",
+ "$TC qdisc add dev $DEV1 handle 1: root prio"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $DEV1 handle 1: root prio bands 8 priomap 1 1 2 2 3 3 4 4 5 5 6 6 7 7 0 0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc prio 1: root.*bands 8 priomap.*1 1 2 2 3 3 4 4 5 5 6 6 7 7 0 0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 handle 1: root prio",
+ "$IP link del dev $DEV1 type dummy"
+ ]
+ },
+ {
+ "id": "d88a",
+ "name": "Add duplicate prio qdisc on egress",
+ "category": [
+ "qdisc",
+ "prio"
+ ],
+ "setup": [
+ "$IP link add dev $DEV1 type dummy || /bin/true",
+ "$TC qdisc add dev $DEV1 handle 1: root prio"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc prio 1: root",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 handle 1: root prio",
+ "$IP link del dev $DEV1 type dummy"
+ ]
+ },
+ {
+ "id": "5948",
+ "name": "Delete nonexistent prio qdisc",
+ "category": [
+ "qdisc",
+ "prio"
+ ],
+ "setup": [
+ "$IP link add dev $DEV1 type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DEV1 root handle 1: prio",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc prio 1: root",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DEV1 type dummy"
+ ]
+ },
+ {
+ "id": "6c0a",
+ "name": "Add prio qdisc on egress with invalid format for handles",
+ "category": [
+ "qdisc",
+ "prio"
+ ],
+ "setup": [
+ "$IP link add dev $DEV1 type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DEV1 root handle 123^ prio",
+ "expExitCode": "255",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc prio 123 root",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DEV1 type dummy"
+ ]
+ },
+ {
+ "id": "0175",
+ "name": "Delete prio qdisc twice",
+ "category": [
+ "qdisc",
+ "prio"
+ ],
+ "setup": [
+ "$IP link add dev $DEV1 type dummy || /bin/true",
+ "$TC qdisc add dev $DEV1 root handle 1: prio",
+ "$TC qdisc del dev $DEV1 root handle 1: prio"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DEV1 handle 1: root prio",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc ingress ffff:",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DEV1 type dummy"
+ ]
+ }
+]