aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap1
-rw-r--r--Documentation/devicetree/bindings/net/can/tcan4x5x.txt7
-rw-r--r--Documentation/devicetree/bindings/net/dsa/mt7530.txt214
-rw-r--r--Documentation/networking/af_xdp.rst10
-rw-r--r--Documentation/networking/device_drivers/index.rst1
-rw-r--r--Documentation/networking/device_drivers/mellanox/mlx5.rst33
-rw-r--r--Documentation/networking/device_drivers/pensando/ionic.rst43
-rw-r--r--Documentation/networking/devlink-info-versions.rst16
-rw-r--r--Documentation/networking/index.rst1
-rw-r--r--Documentation/networking/j1939.rst422
-rw-r--r--MAINTAINERS18
-rw-r--r--arch/arm64/net/bpf_jit.h3
-rw-r--r--arch/arm64/net/bpf_jit_comp.c6
-rw-r--r--arch/s390/net/bpf_jit_comp.c67
-rw-r--r--drivers/crypto/chelsio/chtls/chtls_main.c6
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c130
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h4
-rw-r--r--drivers/infiniband/hw/mlx5/flow.c21
-rw-r--r--drivers/infiniband/hw/mlx5/main.c109
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h7
-rw-r--r--drivers/net/can/dev.c155
-rw-r--r--drivers/net/can/kvaser_pciefd.c6
-rw-r--r--drivers/net/can/m_can/tcan4x5x.c24
-rw-r--r--drivers/net/can/slcan.c6
-rw-r--r--drivers/net/can/spi/mcp251x.c68
-rw-r--r--drivers/net/can/vcan.c7
-rw-r--r--drivers/net/can/vxcan.c4
-rw-r--r--drivers/net/dsa/mt7530.c371
-rw-r--r--drivers/net/dsa/mt7530.h61
-rw-r--r--drivers/net/ethernet/Kconfig1
-rw-r--r--drivers/net/ethernet/Makefile1
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c31
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c37
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpni.c2
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpni.h40
-rw-r--r--drivers/net/ethernet/freescale/gianfar.c2759
-rw-r--r--drivers/net/ethernet/freescale/gianfar.h45
-rw-r--r--drivers/net/ethernet/freescale/gianfar_ethtool.c13
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.c16
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c59
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c5
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.c52
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adminq_cmd.h5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c14
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb.c8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_lib.c16
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c88
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c43
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h9
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c30
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c9
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c5
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c112
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c87
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c116
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c160
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h39
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c223
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/rdma.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c1588
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c480
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_crc32.c98
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c395
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c93
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c570
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c770
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c1243
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c976
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c2308
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c294
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h1060
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c600
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h60
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h604
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h212
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.c3
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/cmsg.c187
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/fw.h1
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.c33
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h24
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/offload.c3
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net.h2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c9
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_main.c4
-rw-r--r--drivers/net/ethernet/pensando/Kconfig32
-rw-r--r--drivers/net/ethernet/pensando/Makefile6
-rw-r--r--drivers/net/ethernet/pensando/ionic/Makefile8
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic.h73
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_bus.h16
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c292
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_debugfs.c248
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_debugfs.h34
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.c500
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.h299
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_devlink.c99
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_devlink.h14
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_ethtool.c779
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_ethtool.h9
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_if.h2482
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c2274
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.h277
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_main.c549
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_regs.h136
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c150
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h35
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_stats.c310
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_stats.h53
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_txrx.c925
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_txrx.h15
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_iwarp.c2
-rw-r--r--drivers/net/ethernet/renesas/ravb.h9
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c21
-rw-r--r--drivers/net/ethernet/rocker/rocker_main.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h33
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c205
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/hwif.h19
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac.h12
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c21
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c18
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c22
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c483
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c247
-rw-r--r--drivers/net/geneve.c2
-rw-r--r--drivers/net/phy/phy.c6
-rw-r--r--drivers/net/phy/xilinx_gmii2rgmii.c4
-rw-r--r--drivers/net/usb/r8152.c384
-rw-r--r--include/linux/bpf.h5
-rw-r--r--include/linux/bpf_verifier.h1
-rw-r--r--include/linux/can/can-ml.h68
-rw-r--r--include/linux/can/core.h8
-rw-r--r--include/linux/can/dev.h3
-rw-r--r--include/linux/can/rx-offload.h13
-rw-r--r--include/linux/mdio.h11
-rw-r--r--include/linux/mlx5/device.h7
-rw-r--r--include/linux/mlx5/driver.h14
-rw-r--r--include/linux/mlx5/eswitch.h8
-rw-r--r--include/linux/mlx5/fs.h33
-rw-r--r--include/linux/mlx5/mlx5_ifc.h235
-rw-r--r--include/linux/netdevice.h14
-rw-r--r--include/linux/skbuff.h13
-rw-r--r--include/linux/tnum.h6
-rw-r--r--include/net/bpf_sk_storage.h10
-rw-r--r--include/net/devlink.h7
-rw-r--r--include/net/netns/can.h14
-rw-r--r--include/net/tls.h48
-rw-r--r--include/net/vxlan.h4
-rw-r--r--include/net/xdp_sock.h122
-rw-r--r--include/uapi/linux/bpf.h15
-rw-r--r--include/uapi/linux/can.h20
-rw-r--r--include/uapi/linux/can/j1939.h99
-rw-r--r--include/uapi/linux/if_xdp.h22
-rw-r--r--include/uapi/linux/openvswitch.h3
-rw-r--r--include/uapi/linux/pkt_cls.h2
-rw-r--r--kernel/bpf/btf.c16
-rw-r--r--kernel/bpf/syscall.c21
-rw-r--r--kernel/bpf/sysfs_btf.c9
-rw-r--r--kernel/bpf/verifier.c5
-rw-r--r--kernel/bpf/xskmap.c133
-rw-r--r--kernel/trace/Kconfig3
-rw-r--r--lib/test_bpf.c2
-rw-r--r--net/atm/mpoa_caches.c6
-rw-r--r--net/atm/pppoatm.c4
-rw-r--r--net/can/Kconfig2
-rw-r--r--net/can/Makefile2
-rw-r--r--net/can/af_can.c302
-rw-r--r--net/can/af_can.h19
-rw-r--r--net/can/bcm.c4
-rw-r--r--net/can/j1939/Kconfig15
-rw-r--r--net/can/j1939/Makefile10
-rw-r--r--net/can/j1939/address-claim.c230
-rw-r--r--net/can/j1939/bus.c333
-rw-r--r--net/can/j1939/j1939-priv.h338
-rw-r--r--net/can/j1939/main.c403
-rw-r--r--net/can/j1939/socket.c1160
-rw-r--r--net/can/j1939/transport.c2027
-rw-r--r--net/can/proc.c163
-rw-r--r--net/can/raw.c4
-rw-r--r--net/core/bpf_sk_storage.c104
-rw-r--r--net/core/dev.c15
-rw-r--r--net/core/filter.c2
-rw-r--r--net/core/skbuff.c6
-rw-r--r--net/core/sock.c9
-rw-r--r--net/openvswitch/datapath.c38
-rw-r--r--net/openvswitch/datapath.h2
-rw-r--r--net/openvswitch/flow.c13
-rw-r--r--net/rds/af_rds.c4
-rw-r--r--net/rds/bind.c4
-rw-r--r--net/rds/send.c4
-rw-r--r--net/sched/Kconfig13
-rw-r--r--net/sched/act_police.c27
-rw-r--r--net/sched/cls_api.c12
-rw-r--r--net/sched/sch_cbs.c2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_keys.c9
-rw-r--r--net/tls/tls_device.c78
-rw-r--r--net/tls/tls_main.c46
-rw-r--r--net/tls/tls_sw.c6
-rw-r--r--net/vmw_vsock/virtio_transport_common.c9
-rw-r--r--net/xdp/xdp_umem.c67
-rw-r--r--net/xdp/xsk.c349
-rw-r--r--net/xdp/xsk.h13
-rw-r--r--net/xdp/xsk_diag.c5
-rw-r--r--net/xdp/xsk_queue.h71
-rw-r--r--samples/bpf/syscall_nrs.c6
-rw-r--r--samples/bpf/tracex5_kern.c13
-rw-r--r--samples/bpf/xdpsock_user.c243
-rwxr-xr-xscripts/link-vmlinux.sh6
-rw-r--r--tools/bpf/.gitignore1
-rw-r--r--tools/bpf/Makefile5
-rw-r--r--tools/bpf/bpftool/.gitignore2
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-btf.rst7
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst9
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-net.rst57
-rw-r--r--tools/bpf/bpftool/Makefile31
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool89
-rw-r--r--tools/bpf/bpftool/btf.c344
-rw-r--r--tools/bpf/bpftool/btf_dumper.c8
-rw-r--r--tools/bpf/bpftool/cgroup.c2
-rw-r--r--tools/bpf/bpftool/common.c4
-rw-r--r--tools/bpf/bpftool/json_writer.c6
-rw-r--r--tools/bpf/bpftool/json_writer.h6
-rw-r--r--tools/bpf/bpftool/main.c2
-rw-r--r--tools/bpf/bpftool/main.h4
-rw-r--r--tools/bpf/bpftool/map.c64
-rw-r--r--tools/bpf/bpftool/map_perf_ring.c4
-rw-r--r--tools/bpf/bpftool/net.c178
-rw-r--r--tools/bpf/bpftool/perf.c4
-rw-r--r--tools/include/linux/compiler-gcc.h2
-rw-r--r--tools/include/uapi/linux/bpf.h15
-rw-r--r--tools/include/uapi/linux/if_xdp.h22
-rw-r--r--tools/lib/bpf/Makefile26
-rw-r--r--tools/lib/bpf/bpf.c24
-rw-r--r--tools/lib/bpf/bpf.h1
-rw-r--r--tools/lib/bpf/libbpf.map6
-rw-r--r--tools/lib/bpf/xsk.c86
-rw-r--r--tools/lib/bpf/xsk.h33
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/Makefile6
-rw-r--r--tools/testing/selftests/bpf/bpf_endian.h16
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_data.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/l4lb_all.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_lock.c38
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pkt_access.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pkt_md_access.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/queue_stack_map.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reference_tracking.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spinlock.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map.c17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_estats.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_noinline.c8
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_inherit.c97
-rw-r--r--tools/testing/selftests/bpf/progs/test_lwt_seg6local.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_seg6_loop.c8
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_build.sh143
-rwxr-xr-xtools/testing/selftests/bpf/test_offload.py2
-rw-r--r--tools/testing/selftests/bpf/test_progs.c42
-rw-r--r--tools/testing/selftests/bpf/test_progs.h19
-rw-r--r--tools/testing/selftests/bpf/test_sockopt_inherit.c253
-rw-r--r--tools/testing/selftests/bpf/test_sysctl.c130
-rw-r--r--tools/testing/selftests/bpf/test_tcp_rtt.c31
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c68
-rw-r--r--tools/testing/selftests/bpf/verifier/precise.c194
304 files changed, 34897 insertions, 3526 deletions
diff --git a/.mailmap b/.mailmap
index afaad605284a..dfd7fedbf578 100644
--- a/.mailmap
+++ b/.mailmap
@@ -63,6 +63,7 @@ Dengcheng Zhu <[email protected]> <[email protected]>
Dmitry Eremin-Solenikov <[email protected]>
diff --git a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
index c388f7d9feb1..27e1b4cebfbd 100644
--- a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
+++ b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
@@ -10,8 +10,10 @@ Required properties:
- #size-cells: 0
- spi-max-frequency: Maximum frequency of the SPI bus the chip can
operate at should be less than or equal to 18 MHz.
- - data-ready-gpios: Interrupt GPIO for data and error reporting.
- device-wake-gpios: Wake up GPIO to wake up the TCAN device.
+ - interrupt-parent: the phandle to the interrupt controller which provides
+ the interrupt.
+ - interrupts: interrupt specification for data-ready.
See Documentation/devicetree/bindings/net/can/m_can.txt for additional
required property details.
@@ -30,7 +32,8 @@ tcan4x5x: tcan4x5x@0 {
#size-cells = <1>;
spi-max-frequency = <10000000>;
bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>;
- data-ready-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>;
+ interrupt-parent = <&gpio1>;
+ interrupts = <14 GPIO_ACTIVE_LOW>;
device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
device-wake-gpios = <&gpio1 15 GPIO_ACTIVE_HIGH>;
reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>;
diff --git a/Documentation/devicetree/bindings/net/dsa/mt7530.txt b/Documentation/devicetree/bindings/net/dsa/mt7530.txt
index 47aa205ee0bd..c5ed5d25f642 100644
--- a/Documentation/devicetree/bindings/net/dsa/mt7530.txt
+++ b/Documentation/devicetree/bindings/net/dsa/mt7530.txt
@@ -35,6 +35,42 @@ Required properties for the child nodes within ports container:
- phy-mode: String, must be either "trgmii" or "rgmii" for port labeled
"cpu".
+Port 5 of the switch is muxed between:
+1. GMAC5: GMAC5 can interface with another external MAC or PHY.
+2. PHY of port 0 or port 4: PHY interfaces with an external MAC like 2nd GMAC
+ of the SOC. Used in many setups where port 0/4 becomes the WAN port.
+ Note: On a MT7621 SOC with integrated switch: 2nd GMAC can only connected to
+ GMAC5 when the gpios for RGMII2 (GPIO 22-33) are not used and not
+ connected to external component!
+
+Port 5 modes/configurations:
+1. Port 5 is disabled and isolated: An external phy can interface to the 2nd
+ GMAC of the SOC.
+ In the case of a build-in MT7530 switch, port 5 shares the RGMII bus with 2nd
+ GMAC and an optional external phy. Mind the GPIO/pinctl settings of the SOC!
+2. Port 5 is muxed to PHY of port 0/4: Port 0/4 interfaces with 2nd GMAC.
+ It is a simple MAC to PHY interface, port 5 needs to be setup for xMII mode
+ and RGMII delay.
+3. Port 5 is muxed to GMAC5 and can interface to an external phy.
+ Port 5 becomes an extra switch port.
+ Only works on platform where external phy TX<->RX lines are swapped.
+ Like in the Ubiquiti ER-X-SFP.
+4. Port 5 is muxed to GMAC5 and interfaces with the 2nd GAMC as 2nd CPU port.
+ Currently a 2nd CPU port is not supported by DSA code.
+
+Depending on how the external PHY is wired:
+1. normal: The PHY can only connect to 2nd GMAC but not to the switch
+2. swapped: RGMII TX, RX are swapped; external phy interface with the switch as
+ a ethernet port. But can't interface to the 2nd GMAC.
+
+Based on the DT the port 5 mode is configured.
+
+Driver tries to lookup the phy-handle of the 2nd GMAC of the master device.
+When phy-handle matches PHY of port 0 or 4 then port 5 set-up as mode 2.
+phy-mode must be set, see also example 2 below!
+ * mt7621: phy-mode = "rgmii-txid";
+ * mt7623: phy-mode = "rgmii";
+
See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional
required, optional properties and how the integrated switch subnodes must
be specified.
@@ -94,3 +130,181 @@ Example:
};
};
};
+
+Example 2: MT7621: Port 4 is WAN port: 2nd GMAC -> Port 5 -> PHY port 4.
+
+&eth {
+ gmac0: mac@0 {
+ compatible = "mediatek,eth-mac";
+ reg = <0>;
+ phy-mode = "rgmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+ };
+
+ gmac1: mac@1 {
+ compatible = "mediatek,eth-mac";
+ reg = <1>;
+ phy-mode = "rgmii-txid";
+ phy-handle = <&phy4>;
+ };
+
+ mdio: mdio-bus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* Internal phy */
+ phy4: ethernet-phy@4 {
+ reg = <4>;
+ };
+
+ mt7530: switch@1f {
+ compatible = "mediatek,mt7621";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x1f>;
+ pinctrl-names = "default";
+ mediatek,mcm;
+
+ resets = <&rstctrl 2>;
+ reset-names = "mcm";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ label = "lan0";
+ };
+
+ port@1 {
+ reg = <1>;
+ label = "lan1";
+ };
+
+ port@2 {
+ reg = <2>;
+ label = "lan2";
+ };
+
+ port@3 {
+ reg = <3>;
+ label = "lan3";
+ };
+
+/* Commented out. Port 4 is handled by 2nd GMAC.
+ port@4 {
+ reg = <4>;
+ label = "lan4";
+ };
+*/
+
+ cpu_port0: port@6 {
+ reg = <6>;
+ label = "cpu";
+ ethernet = <&gmac0>;
+ phy-mode = "rgmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+ };
+ };
+ };
+ };
+};
+
+Example 3: MT7621: Port 5 is connected to external PHY: Port 5 -> external PHY.
+
+&eth {
+ gmac0: mac@0 {
+ compatible = "mediatek,eth-mac";
+ reg = <0>;
+ phy-mode = "rgmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+ };
+
+ mdio: mdio-bus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* External phy */
+ ephy5: ethernet-phy@7 {
+ reg = <7>;
+ };
+
+ mt7530: switch@1f {
+ compatible = "mediatek,mt7621";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x1f>;
+ pinctrl-names = "default";
+ mediatek,mcm;
+
+ resets = <&rstctrl 2>;
+ reset-names = "mcm";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ label = "lan0";
+ };
+
+ port@1 {
+ reg = <1>;
+ label = "lan1";
+ };
+
+ port@2 {
+ reg = <2>;
+ label = "lan2";
+ };
+
+ port@3 {
+ reg = <3>;
+ label = "lan3";
+ };
+
+ port@4 {
+ reg = <4>;
+ label = "lan4";
+ };
+
+ port@5 {
+ reg = <5>;
+ label = "lan5";
+ phy-mode = "rgmii";
+ phy-handle = <&ephy5>;
+ };
+
+ cpu_port0: port@6 {
+ reg = <6>;
+ label = "cpu";
+ ethernet = <&gmac0>;
+ phy-mode = "rgmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+ };
+ };
+ };
+ };
+};
diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst
index eeedc2e826aa..83f7ae5fc045 100644
--- a/Documentation/networking/af_xdp.rst
+++ b/Documentation/networking/af_xdp.rst
@@ -153,10 +153,12 @@ an example, if the UMEM is 64k and each chunk is 4k, then the UMEM has
Frames passed to the kernel are used for the ingress path (RX rings).
-The user application produces UMEM addrs to this ring. Note that the
-kernel will mask the incoming addr. E.g. for a chunk size of 2k, the
-log2(2048) LSB of the addr will be masked off, meaning that 2048, 2050
-and 3000 refers to the same chunk.
+The user application produces UMEM addrs to this ring. Note that, if
+running the application with aligned chunk mode, the kernel will mask
+the incoming addr. E.g. for a chunk size of 2k, the log2(2048) LSB of
+the addr will be masked off, meaning that 2048, 2050 and 3000 refers
+to the same chunk. If the user application is run in the unaligned
+chunks mode, then the incoming addr will be left untouched.
UMEM Completion Ring
diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 2b7fefe72351..57fec66c5419 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -23,6 +23,7 @@ Contents:
intel/ice
google/gve
mellanox/mlx5
+ pensando/ionic
.. only:: subproject
diff --git a/Documentation/networking/device_drivers/mellanox/mlx5.rst b/Documentation/networking/device_drivers/mellanox/mlx5.rst
index b30a63dbf4b7..d071c6b49e1f 100644
--- a/Documentation/networking/device_drivers/mellanox/mlx5.rst
+++ b/Documentation/networking/device_drivers/mellanox/mlx5.rst
@@ -11,6 +11,7 @@ Contents
- `Enabling the driver and kconfig options`_
- `Devlink info`_
+- `Devlink parameters`_
- `Devlink health reporters`_
- `mlx5 tracepoints`_
@@ -122,6 +123,38 @@ User command example::
stored:
fw.version 16.26.0100
+Devlink parameters
+==================
+
+flow_steering_mode: Device flow steering mode
+---------------------------------------------
+The flow steering mode parameter controls the flow steering mode of the driver.
+Two modes are supported:
+1. 'dmfs' - Device managed flow steering.
+2. 'smfs - Software/Driver managed flow steering.
+
+In DMFS mode, the HW steering entities are created and managed through the
+Firmware.
+In SMFS mode, the HW steering entities are created and managed though by
+the driver directly into Hardware without firmware intervention.
+
+SMFS mode is faster and provides better rule inserstion rate compared to default DMFS mode.
+
+User command examples:
+
+- Set SMFS flow steering mode::
+
+ $ devlink dev param set pci/0000:06:00.0 name flow_steering_mode value "smfs" cmode runtime
+
+- Read device flow steering mode::
+
+ $ devlink dev param show pci/0000:06:00.0 name flow_steering_mode
+ pci/0000:06:00.0:
+ name flow_steering_mode type driver-specific
+ values:
+ cmode runtime value smfs
+
+
Devlink health reporters
========================
diff --git a/Documentation/networking/device_drivers/pensando/ionic.rst b/Documentation/networking/device_drivers/pensando/ionic.rst
new file mode 100644
index 000000000000..67b6839d516b
--- /dev/null
+++ b/Documentation/networking/device_drivers/pensando/ionic.rst
@@ -0,0 +1,43 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+==========================================================
+Linux* Driver for the Pensando(R) Ethernet adapter family
+==========================================================
+
+Pensando Linux Ethernet driver.
+Copyright(c) 2019 Pensando Systems, Inc
+
+Contents
+========
+
+- Identifying the Adapter
+- Support
+
+Identifying the Adapter
+=======================
+
+To find if one or more Pensando PCI Ethernet devices are installed on the
+host, check for the PCI devices::
+
+ $ lspci -d 1dd8:
+ b5:00.0 Ethernet controller: Device 1dd8:1002
+ b6:00.0 Ethernet controller: Device 1dd8:1002
+
+If such devices are listed as above, then the ionic.ko driver should find
+and configure them for use. There should be log entries in the kernel
+messages such as these::
+
+ $ dmesg | grep ionic
+ ionic Pensando Ethernet NIC Driver, ver 0.15.0-k
+ ionic 0000:b5:00.0 enp181s0: renamed from eth0
+ ionic 0000:b6:00.0 enp182s0: renamed from eth0
+
+Support
+=======
+For general Linux networking support, please use the netdev mailing
+list, which is monitored by Pensando personnel::
+
+For more specific support needs, please use the Pensando driver support
+email::
diff --git a/Documentation/networking/devlink-info-versions.rst b/Documentation/networking/devlink-info-versions.rst
index 4316342b7746..4914f581b1fd 100644
--- a/Documentation/networking/devlink-info-versions.rst
+++ b/Documentation/networking/devlink-info-versions.rst
@@ -14,11 +14,27 @@ board.rev
Board design revision.
+asic.id
+=======
+
+ASIC design identifier.
+
+asic.rev
+========
+
+ASIC design revision.
+
board.manufacture
=================
An identifier of the company or the facility which produced the part.
+fw
+==
+
+Overall firmware version, often representing the collection of
+fw.mgmt, fw.app, etc.
+
fw.mgmt
=======
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 37eabc17894c..0481d0ffebed 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -17,6 +17,7 @@ Contents:
devlink-trap
devlink-trap-netdevsim
ieee802154
+ j1939
kapi
z8530book
msg_zerocopy
diff --git a/Documentation/networking/j1939.rst b/Documentation/networking/j1939.rst
new file mode 100644
index 000000000000..ce7e7a044e08
--- /dev/null
+++ b/Documentation/networking/j1939.rst
@@ -0,0 +1,422 @@
+.. SPDX-License-Identifier: (GPL-2.0 OR MIT)
+
+===================
+J1939 Documentation
+===================
+
+Overview / What Is J1939
+========================
+
+SAE J1939 defines a higher layer protocol on CAN. It implements a more
+sophisticated addressing scheme and extends the maximum packet size above 8
+bytes. Several derived specifications exist, which differ from the original
+J1939 on the application level, like MilCAN A, NMEA2000 and especially
+ISO-11783 (ISOBUS). This last one specifies the so-called ETP (Extended
+Transport Protocol) which is has been included in this implementation. This
+results in a maximum packet size of ((2 ^ 24) - 1) * 7 bytes == 111 MiB.
+
+Specifications used
+-------------------
+
+* SAE J1939-21 : data link layer
+* SAE J1939-81 : network management
+* ISO 11783-6 : Virtual Terminal (Extended Transport Protocol)
+
+.. _j1939-motivation:
+
+Motivation
+==========
+
+Given the fact there's something like SocketCAN with an API similar to BSD
+sockets, we found some reasons to justify a kernel implementation for the
+addressing and transport methods used by J1939.
+
+* **Addressing:** when a process on an ECU communicates via J1939, it should
+ not necessarily know its source address. Although at least one process per
+ ECU should know the source address. Other processes should be able to reuse
+ that address. This way, address parameters for different processes
+ cooperating for the same ECU, are not duplicated. This way of working is
+ closely related to the UNIX concept where programs do just one thing, and do
+ it well.
+
+* **Dynamic addressing:** Address Claiming in J1939 is time critical.
+ Furthermore data transport should be handled properly during the address
+ negotiation. Putting this functionality in the kernel eliminates it as a
+ requirement for _every_ user space process that communicates via J1939. This
+ results in a consistent J1939 bus with proper addressing.
+
+* **Transport:** both TP & ETP reuse some PGNs to relay big packets over them.
+ Different processes may thus use the same TP & ETP PGNs without actually
+ knowing it. The individual TP & ETP sessions _must_ be serialized
+ (synchronized) between different processes. The kernel solves this problem
+ properly and eliminates the serialization (synchronization) as a requirement
+ for _every_ user space process that communicates via J1939.
+
+J1939 defines some other features (relaying, gateway, fast packet transport,
+...). In-kernel code for these would not contribute to protocol stability.
+Therefore, these parts are left to user space.
+
+The J1939 sockets operate on CAN network devices (see SocketCAN). Any J1939
+user space library operating on CAN raw sockets will still operate properly.
+Since such library does not communicate with the in-kernel implementation, care
+must be taken that these two do not interfere. In practice, this means they
+cannot share ECU addresses. A single ECU (or virtual ECU) address is used by
+the library exclusively, or by the in-kernel system exclusively.
+
+J1939 concepts
+==============
+
+PGN
+---
+
+The PGN (Parameter Group Number) is a number to identify a packet. The PGN
+is composed as follows:
+1 bit : Reserved Bit
+1 bit : Data Page
+8 bits : PF (PDU Format)
+8 bits : PS (PDU Specific)
+
+In J1939-21 distinction is made between PDU1 format (where PF < 240) and PDU2
+format (where PF >= 240). Furthermore, when using PDU2 format, the PS-field
+contains a so-called Group Extension, which is part of the PGN. When using PDU2
+format, the Group Extension is set in the PS-field.
+
+On the other hand, when using PDU1 format, the PS-field contains a so-called
+Destination Address, which is _not_ part of the PGN. When communicating a PGN
+from user space to kernel (or visa versa) and PDU2 format is used, the PS-field
+of the PGN shall be set to zero. The Destination Address shall be set
+elsewhere.
+
+Regarding PGN mapping to 29-bit CAN identifier, the Destination Address shall
+be get/set from/to the appropriate bits of the identifier by the kernel.
+
+
+Addressing
+----------
+
+Both static and dynamic addressing methods can be used.
+
+For static addresses, no extra checks are made by the kernel, and provided
+addresses are considered right. This responsibility is for the OEM or system
+integrator.
+
+For dynamic addressing, so-called Address Claiming, extra support is foreseen
+in the kernel. In J1939 any ECU is known by it's 64-bit NAME. At the moment of
+a successful address claim, the kernel keeps track of both NAME and source
+address being claimed. This serves as a base for filter schemes. By default,
+packets with a destination that is not locally, will be rejected.
+
+Mixed mode packets (from a static to a dynamic address or vice versa) are
+allowed. The BSD sockets define separate API calls for getting/setting the
+local & remote address and are applicable for J1939 sockets.
+
+Filtering
+---------
+
+J1939 defines white list filters per socket that a user can set in order to
+receive a subset of the J1939 traffic. Filtering can be based on:
+
+* SA
+* SOURCE_NAME
+* PGN
+
+When multiple filters are in place for a single socket, and a packet comes in
+that matches several of those filters, the packet is only received once for
+that socket.
+
+How to Use J1939
+================
+
+API Calls
+---------
+
+On CAN, you first need to open a socket for communicating over a CAN network.
+To use J1939, #include <linux/can/j1939.h>. From there, <linux/can.h> will be
+included too. To open a socket, use:
+
+.. code-block:: C
+
+ s = socket(PF_CAN, SOCK_DGRAM, CAN_J1939);
+
+J1939 does use SOCK_DGRAM sockets. In the J1939 specification, connections are
+mentioned in the context of transport protocol sessions. These still deliver
+packets to the other end (using several CAN packets). SOCK_STREAM is not
+supported.
+
+After the successful creation of the socket, you would normally use the bind(2)
+and/or connect(2) system call to bind the socket to a CAN interface. After
+binding and/or connecting the socket, you can read(2) and write(2) from/to the
+socket or use send(2), sendto(2), sendmsg(2) and the recv*() counterpart
+operations on the socket as usual. There are also J1939 specific socket options
+described below.
+
+In order to send data, a bind(2) must have been successful. bind(2) assigns a
+local address to a socket.
+
+Different from CAN is that the payload data is just the data that get send,
+without it's header info. The header info is derived from the sockaddr supplied
+to bind(2), connect(2), sendto(2) and recvfrom(2). A write(2) with size 4 will
+result in a packet with 4 bytes.
+
+The sockaddr structure has extensions for use with J1939 as specified below:
+
+.. code-block:: C
+
+ struct sockaddr_can {
+ sa_family_t can_family;
+ int can_ifindex;
+ union {
+ struct {
+ __u64 name;
+ /* pgn:
+ * 8 bit: PS in PDU2 case, else 0
+ * 8 bit: PF
+ * 1 bit: DP
+ * 1 bit: reserved
+ */
+ __u32 pgn;
+ __u8 addr;
+ } j1939;
+ } can_addr;
+ }
+
+can_family & can_ifindex serve the same purpose as for other SocketCAN sockets.
+
+can_addr.j1939.pgn specifies the PGN (max 0x3ffff). Individual bits are
+specified above.
+
+can_addr.j1939.name contains the 64-bit J1939 NAME.
+
+can_addr.j1939.addr contains the address.
+
+The bind(2) system call assigns the local address, i.e. the source address when
+sending packages. If a PGN during bind(2) is set, it's used as a RX filter.
+I.e. only packets with a matching PGN are received. If an ADDR or NAME is set
+it is used as a receive filter, too. It will match the destination NAME or ADDR
+of the incoming packet. The NAME filter will work only if appropriate Address
+Claiming for this name was done on the CAN bus and registered/cached by the
+kernel.
+
+On the other hand connect(2) assigns the remote address, i.e. the destination
+address. The PGN from connect(2) is used as the default PGN when sending
+packets. If ADDR or NAME is set it will be used as the default destination ADDR
+or NAME. Further a set ADDR or NAME during connect(2) is used as a receive
+filter. It will match the source NAME or ADDR of the incoming packet.
+
+Both write(2) and send(2) will send a packet with local address from bind(2) and
+the remote address from connect(2). Use sendto(2) to overwrite the destination
+address.
+
+If can_addr.j1939.name is set (!= 0) the NAME is looked up by the kernel and
+the corresponding ADDR is used. If can_addr.j1939.name is not set (== 0),
+can_addr.j1939.addr is used.
+
+When creating a socket, reasonable defaults are set. Some options can be
+modified with setsockopt(2) & getsockopt(2).
+
+RX path related options:
+
+- SO_J1939_FILTER - configure array of filters
+- SO_J1939_PROMISC - disable filters set by bind(2) and connect(2)
+
+By default no broadcast packets can be send or received. To enable sending or
+receiving broadcast packets use the socket option SO_BROADCAST:
+
+.. code-block:: C
+
+ int value = 1;
+ setsockopt(sock, SOL_SOCKET, SO_BROADCAST, &value, sizeof(value));
+
+The following diagram illustrates the RX path:
+
+.. code::
+
+ +--------------------+
+ | incoming packet |
+ +--------------------+
+ |
+ V
+ +--------------------+
+ | SO_J1939_PROMISC? |
+ +--------------------+
+ | |
+ no | | yes
+ | |
+ .---------' `---------.
+ | |
+ +---------------------------+ |
+ | bind() + connect() + | |
+ | SOCK_BROADCAST filter | |
+ +---------------------------+ |
+ | |
+ |<---------------------'
+ V
+ +---------------------------+
+ | SO_J1939_FILTER |
+ +---------------------------+
+ |
+ V
+ +---------------------------+
+ | socket recv() |
+ +---------------------------+
+
+TX path related options:
+SO_J1939_SEND_PRIO - change default send priority for the socket
+
+Message Flags during send() and Related System Calls
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+send(2), sendto(2) and sendmsg(2) take a 'flags' argument. Currently
+supported flags are:
+
+* MSG_DONTWAIT, i.e. non-blocking operation.
+
+recvmsg(2)
+^^^^^^^^^
+
+In most cases recvmsg(2) is needed if you want to extract more information than
+recvfrom(2) can provide. For example package priority and timestamp. The
+Destination Address, name and packet priority (if applicable) are attached to
+the msghdr in the recvmsg(2) call. They can be extracted using cmsg(3) macros,
+with cmsg_level == SOL_J1939 && cmsg_type == SCM_J1939_DEST_ADDR,
+SCM_J1939_DEST_NAME or SCM_J1939_PRIO. The returned data is a uint8_t for
+priority and dst_addr, and uint64_t for dst_name.
+
+.. code-block:: C
+
+ uint8_t priority, dst_addr;
+ uint64_t dst_name;
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ switch (cmsg->cmsg_level) {
+ case SOL_CAN_J1939:
+ if (cmsg->cmsg_type == SCM_J1939_DEST_ADDR)
+ dst_addr = *CMSG_DATA(cmsg);
+ else if (cmsg->cmsg_type == SCM_J1939_DEST_NAME)
+ memcpy(&dst_name, CMSG_DATA(cmsg), cmsg->cmsg_len - CMSG_LEN(0));
+ else if (cmsg->cmsg_type == SCM_J1939_PRIO)
+ priority = *CMSG_DATA(cmsg);
+ break;
+ }
+ }
+
+Dynamic Addressing
+------------------
+
+Distinction has to be made between using the claimed address and doing an
+address claim. To use an already claimed address, one has to fill in the
+j1939.name member and provide it to bind(2). If the name had claimed an address
+earlier, all further messages being sent will use that address. And the
+j1939.addr member will be ignored.
+
+An exception on this is PGN 0x0ee00. This is the "Address Claim/Cannot Claim
+Address" message and the kernel will use the j1939.addr member for that PGN if
+necessary.
+
+To claim an address following code example can be used:
+
+.. code-block:: C
+
+ struct sockaddr_can baddr = {
+ .can_family = AF_CAN,
+ .can_addr.j1939 = {
+ .name = name,
+ .addr = J1939_IDLE_ADDR,
+ .pgn = J1939_NO_PGN, /* to disable bind() rx filter for PGN */
+ },
+ .can_ifindex = if_nametoindex("can0"),
+ };
+
+ bind(sock, (struct sockaddr *)&baddr, sizeof(baddr));
+
+ /* for Address Claiming broadcast must be allowed */
+ int value = 1;
+ setsockopt(sock, SOL_SOCKET, SO_BROADCAST, &value, sizeof(value));
+
+ /* configured advanced RX filter with PGN needed for Address Claiming */
+ const struct j1939_filter filt[] = {
+ {
+ .pgn = J1939_PGN_ADDRESS_CLAIMED,
+ .pgn_mask = J1939_PGN_PDU1_MAX,
+ }, {
+ .pgn = J1939_PGN_ADDRESS_REQUEST,
+ .pgn_mask = J1939_PGN_PDU1_MAX,
+ }, {
+ .pgn = J1939_PGN_ADDRESS_COMMANDED,
+ .pgn_mask = J1939_PGN_MAX,
+ },
+ };
+
+ setsockopt(sock, SOL_CAN_J1939, SO_J1939_FILTER, &filt, sizeof(filt));
+
+ uint64_t dat = htole64(name);
+ const struct sockaddr_can saddr = {
+ .can_family = AF_CAN,
+ .can_addr.j1939 = {
+ .pgn = J1939_PGN_ADDRESS_CLAIMED,
+ .addr = J1939_NO_ADDR,
+ },
+ };
+
+ /* Afterwards do a sendto(2) with data set to the NAME (Little Endian). If the
+ * NAME provided, does not match the j1939.name provided to bind(2), EPROTO
+ * will be returned.
+ */
+ sendto(sock, dat, sizeof(dat), 0, (const struct sockaddr *)&saddr, sizeof(saddr));
+
+If no-one else contests the address claim within 250ms after transmission, the
+kernel marks the NAME-SA assignment as valid. The valid assignment will be kept
+among other valid NAME-SA assignments. From that point, any socket bound to the
+NAME can send packets.
+
+If another ECU claims the address, the kernel will mark the NAME-SA expired.
+No socket bound to the NAME can send packets (other than address claims). To
+claim another address, some socket bound to NAME, must bind(2) again, but with
+only j1939.addr changed to the new SA, and must then send a valid address claim
+packet. This restarts the state machine in the kernel (and any other
+participant on the bus) for this NAME.
+
+can-utils also include the jacd tool, so it can be used as code example or as
+default Address Claiming daemon.
+
+Send Examples
+-------------
+
+Static Addressing
+^^^^^^^^^^^^^^^^^
+
+This example will send a PGN (0x12300) from SA 0x20 to DA 0x30.
+
+Bind:
+
+.. code-block:: C
+
+ struct sockaddr_can baddr = {
+ .can_family = AF_CAN,
+ .can_addr.j1939 = {
+ .name = J1939_NO_NAME,
+ .addr = 0x20,
+ .pgn = J1939_NO_PGN,
+ },
+ .can_ifindex = if_nametoindex("can0"),
+ };
+
+ bind(sock, (struct sockaddr *)&baddr, sizeof(baddr));
+
+Now, the socket 'sock' is bound to the SA 0x20. Since no connect(2) was called,
+at this point we can use only sendto(2) or sendmsg(2).
+
+Send:
+
+.. code-block:: C
+
+ const struct sockaddr_can saddr = {
+ .can_family = AF_CAN,
+ .can_addr.j1939 = {
+ .name = J1939_NO_NAME;
+ .pgn = 0x30,
+ .addr = 0x12300,
+ },
+ };
+
+ sendto(sock, dat, sizeof(dat), 0, (const struct sockaddr *)&saddr, sizeof(saddr));
diff --git a/MAINTAINERS b/MAINTAINERS
index a081c477d1d1..84bb34727f81 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3669,6 +3669,16 @@ F: include/uapi/linux/can/bcm.h
F: include/uapi/linux/can/raw.h
F: include/uapi/linux/can/gw.h
+CAN-J1939 NETWORK LAYER
+M: Robin van der Gracht <[email protected]>
+M: Oleksij Rempel <[email protected]>
+R: Pengutronix Kernel Team <[email protected]>
+S: Maintained
+F: Documentation/networking/j1939.txt
+F: net/can/j1939/
+F: include/uapi/linux/can/j1939.h
+
CAPABILITIES
M: Serge Hallyn <[email protected]>
@@ -12608,6 +12618,14 @@ L: [email protected]
S: Maintained
F: drivers/platform/x86/peaq-wmi.c
+PENSANDO ETHERNET DRIVERS
+M: Shannon Nelson <[email protected]>
+M: Pensando Drivers <[email protected]>
+S: Supported
+F: Documentation/networking/device_drivers/pensando/ionic.rst
+F: drivers/net/ethernet/pensando/
+
PER-CPU MEMORY ALLOCATOR
M: Dennis Zhou <[email protected]>
M: Tejun Heo <[email protected]>
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index cb7ab50b7657..eb73f9f72c46 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -171,6 +171,9 @@
/* Rd = Ra + Rn * Rm */
#define A64_MADD(sf, Rd, Ra, Rn, Rm) aarch64_insn_gen_data3(Rd, Ra, Rn, Rm, \
A64_VARIANT(sf), AARCH64_INSN_DATA3_MADD)
+/* Rd = Ra - Rn * Rm */
+#define A64_MSUB(sf, Rd, Ra, Rn, Rm) aarch64_insn_gen_data3(Rd, Ra, Rn, Rm, \
+ A64_VARIANT(sf), AARCH64_INSN_DATA3_MSUB)
/* Rd = Rn * Rm */
#define A64_MUL(sf, Rd, Rn, Rm) A64_MADD(sf, Rd, A64_ZR, Rn, Rm)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index f5b437f8a22b..cdc79de0c794 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -409,8 +409,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
case BPF_MOD:
emit(A64_UDIV(is64, tmp, dst, src), ctx);
- emit(A64_MUL(is64, tmp, tmp, src), ctx);
- emit(A64_SUB(is64, dst, dst, tmp), ctx);
+ emit(A64_MSUB(is64, dst, dst, tmp, src), ctx);
break;
}
break;
@@ -516,8 +515,7 @@ emit_bswap_uxt:
case BPF_ALU64 | BPF_MOD | BPF_K:
emit_a64_mov_i(is64, tmp2, imm, ctx);
emit(A64_UDIV(is64, tmp, dst, tmp2), ctx);
- emit(A64_MUL(is64, tmp, tmp, tmp2), ctx);
- emit(A64_SUB(is64, dst, dst, tmp), ctx);
+ emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx);
break;
case BPF_ALU | BPF_LSH | BPF_K:
case BPF_ALU64 | BPF_LSH | BPF_K:
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 955eb355c2fd..ce88211b9c6c 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -502,7 +502,8 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
* NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
* stack space for the large switch statement.
*/
-static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
+static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
+ int i, bool extra_pass)
{
struct bpf_insn *insn = &fp->insnsi[i];
int jmp_off, last, insn_count = 1;
@@ -1011,10 +1012,14 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
*/
case BPF_JMP | BPF_CALL:
{
- /*
- * b0 = (__bpf_call_base + imm)(b1, b2, b3, b4, b5)
- */
- const u64 func = (u64)__bpf_call_base + imm;
+ u64 func;
+ bool func_addr_fixed;
+ int ret;
+
+ ret = bpf_jit_get_func_addr(fp, insn, extra_pass,
+ &func, &func_addr_fixed);
+ if (ret < 0)
+ return -1;
REG_SET_SEEN(BPF_REG_5);
jit->seen |= SEEN_FUNC;
@@ -1283,7 +1288,8 @@ branch_oc:
/*
* Compile eBPF program into s390x code
*/
-static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)
+static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
+ bool extra_pass)
{
int i, insn_count;
@@ -1292,7 +1298,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)
bpf_jit_prologue(jit, fp->aux->stack_depth);
for (i = 0; i < fp->len; i += insn_count) {
- insn_count = bpf_jit_insn(jit, fp, i);
+ insn_count = bpf_jit_insn(jit, fp, i, extra_pass);
if (insn_count < 0)
return -1;
/* Next instruction address */
@@ -1311,6 +1317,12 @@ bool bpf_jit_needs_zext(void)
return true;
}
+struct s390_jit_data {
+ struct bpf_binary_header *header;
+ struct bpf_jit ctx;
+ int pass;
+};
+
/*
* Compile eBPF program "fp"
*/
@@ -1318,7 +1330,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
struct bpf_prog *tmp, *orig_fp = fp;
struct bpf_binary_header *header;
+ struct s390_jit_data *jit_data;
bool tmp_blinded = false;
+ bool extra_pass = false;
struct bpf_jit jit;
int pass;
@@ -1337,6 +1351,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
fp = tmp;
}
+ jit_data = fp->aux->jit_data;
+ if (!jit_data) {
+ jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+ if (!jit_data) {
+ fp = orig_fp;
+ goto out;
+ }
+ fp->aux->jit_data = jit_data;
+ }
+ if (jit_data->ctx.addrs) {
+ jit = jit_data->ctx;
+ header = jit_data->header;
+ extra_pass = true;
+ pass = jit_data->pass + 1;
+ goto skip_init_ctx;
+ }
+
memset(&jit, 0, sizeof(jit));
jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
if (jit.addrs == NULL) {
@@ -1349,7 +1380,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
* - 3: Calculate program size and addrs arrray
*/
for (pass = 1; pass <= 3; pass++) {
- if (bpf_jit_prog(&jit, fp)) {
+ if (bpf_jit_prog(&jit, fp, extra_pass)) {
fp = orig_fp;
goto free_addrs;
}
@@ -1361,12 +1392,14 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
fp = orig_fp;
goto free_addrs;
}
+
header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole);
if (!header) {
fp = orig_fp;
goto free_addrs;
}
- if (bpf_jit_prog(&jit, fp)) {
+skip_init_ctx:
+ if (bpf_jit_prog(&jit, fp, extra_pass)) {
bpf_jit_binary_free(header);
fp = orig_fp;
goto free_addrs;
@@ -1375,12 +1408,24 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf);
print_fn_code(jit.prg_buf, jit.size_prg);
}
- bpf_jit_binary_lock_ro(header);
+ if (!fp->is_func || extra_pass) {
+ bpf_jit_binary_lock_ro(header);
+ } else {
+ jit_data->header = header;
+ jit_data->ctx = jit;
+ jit_data->pass = pass;
+ }
fp->bpf_func = (void *) jit.prg_buf;
fp->jited = 1;
fp->jited_len = jit.size;
+
+ if (!fp->is_func || extra_pass) {
+ bpf_prog_fill_jited_linfo(fp, jit.addrs + 1);
free_addrs:
- kfree(jit.addrs);
+ kfree(jit.addrs);
+ kfree(jit_data);
+ fp->aux->jit_data = NULL;
+ }
out:
if (tmp_blinded)
bpf_jit_prog_release_other(fp, fp == orig_fp ?
diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c
index 635bb4b447fb..e6df5b95ed47 100644
--- a/drivers/crypto/chelsio/chtls/chtls_main.c
+++ b/drivers/crypto/chelsio/chtls/chtls_main.c
@@ -474,7 +474,8 @@ static int chtls_getsockopt(struct sock *sk, int level, int optname,
struct tls_context *ctx = tls_get_ctx(sk);
if (level != SOL_TLS)
- return ctx->getsockopt(sk, level, optname, optval, optlen);
+ return ctx->sk_proto->getsockopt(sk, level,
+ optname, optval, optlen);
return do_chtls_getsockopt(sk, optval, optlen);
}
@@ -541,7 +542,8 @@ static int chtls_setsockopt(struct sock *sk, int level, int optname,
struct tls_context *ctx = tls_get_ctx(sk);
if (level != SOL_TLS)
- return ctx->setsockopt(sk, level, optname, optval, optlen);
+ return ctx->sk_proto->setsockopt(sk, level,
+ optname, optval, optlen);
return do_chtls_setsockopt(sk, optname, optval, optlen);
}
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 6c8645033102..4937947400cd 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -186,136 +186,6 @@ int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
return err;
}
-int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
- u16 uid, phys_addr_t *addr, u32 *obj_id)
-{
- struct mlx5_core_dev *dev = dm->dev;
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
- u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {};
- unsigned long *block_map;
- u64 icm_start_addr;
- u32 log_icm_size;
- u32 num_blocks;
- u32 max_blocks;
- u64 block_idx;
- void *sw_icm;
- int ret;
-
- MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
- MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
- MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
-
- switch (type) {
- case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
- icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
- steering_sw_icm_start_address);
- log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size);
- block_map = dm->steering_sw_icm_alloc_blocks;
- break;
- case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
- icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
- header_modify_sw_icm_start_address);
- log_icm_size = MLX5_CAP_DEV_MEM(dev,
- log_header_modify_sw_icm_size);
- block_map = dm->header_modify_sw_icm_alloc_blocks;
- break;
- default:
- return -EINVAL;
- }
-
- num_blocks = (length + MLX5_SW_ICM_BLOCK_SIZE(dev) - 1) >>
- MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
- max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
- spin_lock(&dm->lock);
- block_idx = bitmap_find_next_zero_area(block_map,
- max_blocks,
- 0,
- num_blocks, 0);
-
- if (block_idx < max_blocks)
- bitmap_set(block_map,
- block_idx, num_blocks);
-
- spin_unlock(&dm->lock);
-
- if (block_idx >= max_blocks)
- return -ENOMEM;
-
- sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm);
- icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
- MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr,
- icm_start_addr);
- MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length));
-
- ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
- if (ret) {
- spin_lock(&dm->lock);
- bitmap_clear(block_map,
- block_idx, num_blocks);
- spin_unlock(&dm->lock);
-
- return ret;
- }
-
- *addr = icm_start_addr;
- *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
-
- return 0;
-}
-
-int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
- u16 uid, phys_addr_t addr, u32 obj_id)
-{
- struct mlx5_core_dev *dev = dm->dev;
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
- u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
- unsigned long *block_map;
- u32 num_blocks;
- u64 start_idx;
- int err;
-
- num_blocks = (length + MLX5_SW_ICM_BLOCK_SIZE(dev) - 1) >>
- MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
-
- switch (type) {
- case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
- start_idx =
- (addr - MLX5_CAP64_DEV_MEM(
- dev, steering_sw_icm_start_address)) >>
- MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
- block_map = dm->steering_sw_icm_alloc_blocks;
- break;
- case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
- start_idx =
- (addr -
- MLX5_CAP64_DEV_MEM(
- dev, header_modify_sw_icm_start_address)) >>
- MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
- block_map = dm->header_modify_sw_icm_alloc_blocks;
- break;
- default:
- return -EINVAL;
- }
-
- MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
- MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
-
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
- if (err)
- return err;
-
- spin_lock(&dm->lock);
- bitmap_clear(block_map,
- start_idx, num_blocks);
- spin_unlock(&dm->lock);
-
- return 0;
-}
-
int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
{
u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 0572dcba6eae..169cab4915e3 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -65,8 +65,4 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id,
u16 uid);
int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
u16 opmod, u8 port);
-int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
- u16 uid, phys_addr_t *addr, u32 *obj_id);
-int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
- u16 uid, phys_addr_t addr, u32 obj_id);
#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
index b8841355fcd5..1c8f04abee0c 100644
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -322,11 +322,11 @@ void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
switch (maction->flow_action_raw.sub_type) {
case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
- maction->flow_action_raw.action_id);
+ maction->flow_action_raw.modify_hdr);
break;
case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
- maction->flow_action_raw.action_id);
+ maction->flow_action_raw.pkt_reformat);
break;
case MLX5_IB_FLOW_ACTION_DECAP:
break;
@@ -352,10 +352,11 @@ mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
if (!maction)
return ERR_PTR(-ENOMEM);
- ret = mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in,
- &maction->flow_action_raw.action_id);
+ maction->flow_action_raw.modify_hdr =
+ mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in);
- if (ret) {
+ if (IS_ERR(maction->flow_action_raw.modify_hdr)) {
+ ret = PTR_ERR(maction->flow_action_raw.modify_hdr);
kfree(maction);
return ERR_PTR(ret);
}
@@ -479,11 +480,13 @@ static int mlx5_ib_flow_action_create_packet_reformat_ctx(
if (ret)
return ret;
- ret = mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len,
- in, namespace,
- &maction->flow_action_raw.action_id);
- if (ret)
+ maction->flow_action_raw.pkt_reformat =
+ mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len,
+ in, namespace);
+ if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
+ ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
return ret;
+ }
maction->flow_action_raw.sub_type =
MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 0569bcab02d4..4e9f1507ffd9 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2280,6 +2280,7 @@ static inline int check_dm_type_support(struct mlx5_ib_dev *dev,
return -EOPNOTSUPP;
break;
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
if (!capable(CAP_SYS_RAWIO) ||
!capable(CAP_NET_RAW))
return -EPERM;
@@ -2344,20 +2345,20 @@ static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
struct uverbs_attr_bundle *attrs,
int type)
{
- struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
+ struct mlx5_core_dev *dev = to_mdev(ctx->device)->mdev;
u64 act_size;
int err;
/* Allocation size must a multiple of the basic block size
* and a power of 2.
*/
- act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev));
+ act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dev));
act_size = roundup_pow_of_two(act_size);
dm->size = act_size;
- err = mlx5_cmd_alloc_sw_icm(dm_db, type, act_size,
- to_mucontext(ctx)->devx_uid, &dm->dev_addr,
- &dm->icm_dm.obj_id);
+ err = mlx5_dm_sw_icm_alloc(dev, type, act_size,
+ to_mucontext(ctx)->devx_uid, &dm->dev_addr,
+ &dm->icm_dm.obj_id);
if (err)
return err;
@@ -2365,9 +2366,9 @@ static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
&dm->dev_addr, sizeof(dm->dev_addr));
if (err)
- mlx5_cmd_dealloc_sw_icm(dm_db, type, dm->size,
- to_mucontext(ctx)->devx_uid,
- dm->dev_addr, dm->icm_dm.obj_id);
+ mlx5_dm_sw_icm_dealloc(dev, type, dm->size,
+ to_mucontext(ctx)->devx_uid, dm->dev_addr,
+ dm->icm_dm.obj_id);
return err;
}
@@ -2407,8 +2408,14 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
attrs);
break;
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ err = handle_alloc_dm_sw_icm(context, dm,
+ attr, attrs,
+ MLX5_SW_ICM_TYPE_STEERING);
+ break;
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
- err = handle_alloc_dm_sw_icm(context, dm, attr, attrs, type);
+ err = handle_alloc_dm_sw_icm(context, dm,
+ attr, attrs,
+ MLX5_SW_ICM_TYPE_HEADER_MODIFY);
break;
default:
err = -EOPNOTSUPP;
@@ -2428,6 +2435,7 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context(
&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_core_dev *dev = to_mdev(ibdm->device)->mdev;
struct mlx5_dm *dm_db = &to_mdev(ibdm->device)->dm;
struct mlx5_ib_dm *dm = to_mdm(ibdm);
u32 page_idx;
@@ -2439,19 +2447,23 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
if (ret)
return ret;
- page_idx = (dm->dev_addr -
- pci_resource_start(dm_db->dev->pdev, 0) -
- MLX5_CAP64_DEV_MEM(dm_db->dev,
- memic_bar_start_addr)) >>
- PAGE_SHIFT;
+ page_idx = (dm->dev_addr - pci_resource_start(dev->pdev, 0) -
+ MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr)) >>
+ PAGE_SHIFT;
bitmap_clear(ctx->dm_pages, page_idx,
DIV_ROUND_UP(dm->size, PAGE_SIZE));
break;
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_STEERING,
+ dm->size, ctx->devx_uid, dm->dev_addr,
+ dm->icm_dm.obj_id);
+ if (ret)
+ return ret;
+ break;
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
- ret = mlx5_cmd_dealloc_sw_icm(dm_db, dm->type, dm->size,
- ctx->devx_uid, dm->dev_addr,
- dm->icm_dm.obj_id);
+ ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_HEADER_MODIFY,
+ dm->size, ctx->devx_uid, dm->dev_addr,
+ dm->icm_dm.obj_id);
if (ret)
return ret;
break;
@@ -2646,7 +2658,8 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
return -EINVAL;
action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- action->modify_id = maction->flow_action_raw.action_id;
+ action->modify_hdr =
+ maction->flow_action_raw.modify_hdr;
return 0;
}
if (maction->flow_action_raw.sub_type ==
@@ -2663,8 +2676,8 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
return -EINVAL;
action->action |=
MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
- action->reformat_id =
- maction->flow_action_raw.action_id;
+ action->pkt_reformat =
+ maction->flow_action_raw.pkt_reformat;
return 0;
}
/* fall through */
@@ -6096,8 +6109,6 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
- struct mlx5_core_dev *mdev = dev->mdev;
-
mlx5_ib_cleanup_multiport_master(dev);
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
srcu_barrier(&dev->mr_srcu);
@@ -6105,29 +6116,11 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
}
WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES));
-
- WARN_ON(dev->dm.steering_sw_icm_alloc_blocks &&
- !bitmap_empty(
- dev->dm.steering_sw_icm_alloc_blocks,
- BIT(MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size) -
- MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev))));
-
- kfree(dev->dm.steering_sw_icm_alloc_blocks);
-
- WARN_ON(dev->dm.header_modify_sw_icm_alloc_blocks &&
- !bitmap_empty(dev->dm.header_modify_sw_icm_alloc_blocks,
- BIT(MLX5_CAP_DEV_MEM(
- mdev, log_header_modify_sw_icm_size) -
- MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev))));
-
- kfree(dev->dm.header_modify_sw_icm_alloc_blocks);
}
static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
- u64 header_modify_icm_blocks = 0;
- u64 steering_icm_blocks = 0;
int err;
int i;
@@ -6174,51 +6167,17 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
INIT_LIST_HEAD(&dev->qp_list);
spin_lock_init(&dev->reset_flow_resource_lock);
- if (MLX5_CAP_GEN_64(mdev, general_obj_types) &
- MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) {
- if (MLX5_CAP64_DEV_MEM(mdev, steering_sw_icm_start_address)) {
- steering_icm_blocks =
- BIT(MLX5_CAP_DEV_MEM(mdev,
- log_steering_sw_icm_size) -
- MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev));
-
- dev->dm.steering_sw_icm_alloc_blocks =
- kcalloc(BITS_TO_LONGS(steering_icm_blocks),
- sizeof(unsigned long), GFP_KERNEL);
- if (!dev->dm.steering_sw_icm_alloc_blocks)
- goto err_mp;
- }
-
- if (MLX5_CAP64_DEV_MEM(mdev,
- header_modify_sw_icm_start_address)) {
- header_modify_icm_blocks = BIT(
- MLX5_CAP_DEV_MEM(
- mdev, log_header_modify_sw_icm_size) -
- MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev));
-
- dev->dm.header_modify_sw_icm_alloc_blocks =
- kcalloc(BITS_TO_LONGS(header_modify_icm_blocks),
- sizeof(unsigned long), GFP_KERNEL);
- if (!dev->dm.header_modify_sw_icm_alloc_blocks)
- goto err_dm;
- }
- }
-
spin_lock_init(&dev->dm.lock);
dev->dm.dev = mdev;
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
err = init_srcu_struct(&dev->mr_srcu);
if (err)
- goto err_dm;
+ goto err_mp;
}
return 0;
-err_dm:
- kfree(dev->dm.steering_sw_icm_alloc_blocks);
- kfree(dev->dm.header_modify_sw_icm_alloc_blocks);
-
err_mp:
mlx5_ib_cleanup_multiport_master(dev);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 9ae587b74b12..125a507c10ed 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -868,7 +868,10 @@ struct mlx5_ib_flow_action {
struct {
struct mlx5_ib_dev *dev;
u32 sub_type;
- u32 action_id;
+ union {
+ struct mlx5_modify_hdr *modify_hdr;
+ struct mlx5_pkt_reformat *pkt_reformat;
+ };
} flow_action_raw;
};
};
@@ -881,8 +884,6 @@ struct mlx5_dm {
*/
spinlock_t lock;
DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES);
- unsigned long *steering_sw_icm_alloc_blocks;
- unsigned long *header_modify_sw_icm_alloc_blocks;
};
struct mlx5_read_counters_attr {
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 483d270664cc..ac86be52b461 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2005 Marc Kleine-Budde, Pengutronix
+/* Copyright (C) 2005 Marc Kleine-Budde, Pengutronix
* Copyright (C) 2006 Andrey Volkov, Varma Electronics
* Copyright (C) 2008-2009 Wolfgang Grandegger <[email protected]>
*/
@@ -12,6 +11,7 @@
#include <linux/if_arp.h>
#include <linux/workqueue.h>
#include <linux/can.h>
+#include <linux/can/can-ml.h>
#include <linux/can/dev.h>
#include <linux/can/skb.h>
#include <linux/can/netlink.h>
@@ -62,8 +62,7 @@ EXPORT_SYMBOL_GPL(can_len2dlc);
#define CAN_CALC_MAX_ERROR 50 /* in one-tenth of a percent */
#define CAN_CALC_SYNC_SEG 1
-/*
- * Bit-timing calculation derived from:
+/* Bit-timing calculation derived from:
*
* Code based on LinCAN sources and H8S2638 project
* Copyright 2004-2006 Pavel Pisa - DCE FELK CVUT cz
@@ -75,10 +74,11 @@ EXPORT_SYMBOL_GPL(can_len2dlc);
* registers of the CAN controller. You can find more information
* in the header file linux/can/netlink.h.
*/
-static int can_update_sample_point(const struct can_bittiming_const *btc,
- unsigned int sample_point_nominal, unsigned int tseg,
- unsigned int *tseg1_ptr, unsigned int *tseg2_ptr,
- unsigned int *sample_point_error_ptr)
+static int
+can_update_sample_point(const struct can_bittiming_const *btc,
+ unsigned int sample_point_nominal, unsigned int tseg,
+ unsigned int *tseg1_ptr, unsigned int *tseg2_ptr,
+ unsigned int *sample_point_error_ptr)
{
unsigned int sample_point_error, best_sample_point_error = UINT_MAX;
unsigned int sample_point, best_sample_point = 0;
@@ -86,7 +86,9 @@ static int can_update_sample_point(const struct can_bittiming_const *btc,
int i;
for (i = 0; i <= 1; i++) {
- tseg2 = tseg + CAN_CALC_SYNC_SEG - (sample_point_nominal * (tseg + CAN_CALC_SYNC_SEG)) / 1000 - i;
+ tseg2 = tseg + CAN_CALC_SYNC_SEG -
+ (sample_point_nominal * (tseg + CAN_CALC_SYNC_SEG)) /
+ 1000 - i;
tseg2 = clamp(tseg2, btc->tseg2_min, btc->tseg2_max);
tseg1 = tseg - tseg2;
if (tseg1 > btc->tseg1_max) {
@@ -94,10 +96,12 @@ static int can_update_sample_point(const struct can_bittiming_const *btc,
tseg2 = tseg - tseg1;
}
- sample_point = 1000 * (tseg + CAN_CALC_SYNC_SEG - tseg2) / (tseg + CAN_CALC_SYNC_SEG);
+ sample_point = 1000 * (tseg + CAN_CALC_SYNC_SEG - tseg2) /
+ (tseg + CAN_CALC_SYNC_SEG);
sample_point_error = abs(sample_point_nominal - sample_point);
- if ((sample_point <= sample_point_nominal) && (sample_point_error < best_sample_point_error)) {
+ if (sample_point <= sample_point_nominal &&
+ sample_point_error < best_sample_point_error) {
best_sample_point = sample_point;
best_sample_point_error = sample_point_error;
*tseg1_ptr = tseg1;
@@ -148,7 +152,7 @@ static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
/* choose brp step which is possible in system */
brp = (brp / btc->brp_inc) * btc->brp_inc;
- if ((brp < btc->brp_min) || (brp > btc->brp_max))
+ if (brp < btc->brp_min || brp > btc->brp_max)
continue;
bitrate = priv->clock.freq / (brp * tsegall);
@@ -162,7 +166,8 @@ static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
if (bitrate_error < best_bitrate_error)
best_sample_point_error = UINT_MAX;
- can_update_sample_point(btc, sample_point_nominal, tseg / 2, &tseg1, &tseg2, &sample_point_error);
+ can_update_sample_point(btc, sample_point_nominal, tseg / 2,
+ &tseg1, &tseg2, &sample_point_error);
if (sample_point_error > best_sample_point_error)
continue;
@@ -191,8 +196,9 @@ static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
}
/* real sample point */
- bt->sample_point = can_update_sample_point(btc, sample_point_nominal, best_tseg,
- &tseg1, &tseg2, NULL);
+ bt->sample_point = can_update_sample_point(btc, sample_point_nominal,
+ best_tseg, &tseg1, &tseg2,
+ NULL);
v64 = (u64)best_brp * 1000 * 1000 * 1000;
do_div(v64, priv->clock.freq);
@@ -216,7 +222,8 @@ static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
bt->brp = best_brp;
/* real bitrate */
- bt->bitrate = priv->clock.freq / (bt->brp * (CAN_CALC_SYNC_SEG + tseg1 + tseg2));
+ bt->bitrate = priv->clock.freq /
+ (bt->brp * (CAN_CALC_SYNC_SEG + tseg1 + tseg2));
return 0;
}
@@ -229,8 +236,7 @@ static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
}
#endif /* CONFIG_CAN_CALC_BITTIMING */
-/*
- * Checks the validity of the specified bit-timing parameters prop_seg,
+/* Checks the validity of the specified bit-timing parameters prop_seg,
* phase_seg1, phase_seg2 and sjw and tries to determine the bitrate
* prescaler value brp. You can find more information in the header
* file linux/can/netlink.h.
@@ -270,9 +276,10 @@ static int can_fixup_bittiming(struct net_device *dev, struct can_bittiming *bt,
}
/* Checks the validity of predefined bitrate settings */
-static int can_validate_bitrate(struct net_device *dev, struct can_bittiming *bt,
- const u32 *bitrate_const,
- const unsigned int bitrate_const_cnt)
+static int
+can_validate_bitrate(struct net_device *dev, struct can_bittiming *bt,
+ const u32 *bitrate_const,
+ const unsigned int bitrate_const_cnt)
{
struct can_priv *priv = netdev_priv(dev);
unsigned int i;
@@ -295,8 +302,7 @@ static int can_get_bittiming(struct net_device *dev, struct can_bittiming *bt,
{
int err;
- /*
- * Depending on the given can_bittiming parameter structure the CAN
+ /* Depending on the given can_bittiming parameter structure the CAN
* timing parameters are calculated based on the provided bitrate OR
* alternatively the CAN timing parameters (tq, prop_seg, etc.) are
* provided directly which are then checked and fixed up.
@@ -397,8 +403,7 @@ void can_change_state(struct net_device *dev, struct can_frame *cf,
}
EXPORT_SYMBOL_GPL(can_change_state);
-/*
- * Local echo of CAN messages
+/* Local echo of CAN messages
*
* CAN network devices *should* support a local echo functionality
* (see Documentation/networking/can.rst). To test the handling of CAN
@@ -423,8 +428,7 @@ static void can_flush_echo_skb(struct net_device *dev)
}
}
-/*
- * Put the skb on the stack to be looped backed locally lateron
+/* Put the skb on the stack to be looped backed locally lateron
*
* The function is typically called in the start_xmit function
* of the device driver. The driver must protect access to
@@ -446,7 +450,6 @@ void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
}
if (!priv->echo_skb[idx]) {
-
skb = can_create_echo_skb(skb);
if (!skb)
return;
@@ -466,7 +469,8 @@ void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
}
EXPORT_SYMBOL_GPL(can_put_echo_skb);
-struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr)
+struct sk_buff *
+__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr)
{
struct can_priv *priv = netdev_priv(dev);
@@ -493,8 +497,7 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8
return NULL;
}
-/*
- * Get the skb from the stack and loop it back locally
+/* Get the skb from the stack and loop it back locally
*
* The function is typically called when the TX done interrupt
* is handled in the device driver. The driver must protect
@@ -515,11 +518,10 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx)
}
EXPORT_SYMBOL_GPL(can_get_echo_skb);
-/*
- * Remove the skb from the stack and free it.
- *
- * The function is typically called when TX failed.
- */
+/* Remove the skb from the stack and free it.
+ *
+ * The function is typically called when TX failed.
+ */
void can_free_echo_skb(struct net_device *dev, unsigned int idx)
{
struct can_priv *priv = netdev_priv(dev);
@@ -533,9 +535,7 @@ void can_free_echo_skb(struct net_device *dev, unsigned int idx)
}
EXPORT_SYMBOL_GPL(can_free_echo_skb);
-/*
- * CAN device restart for bus-off recovery
- */
+/* CAN device restart for bus-off recovery */
static void can_restart(struct net_device *dev)
{
struct can_priv *priv = netdev_priv(dev);
@@ -546,15 +546,14 @@ static void can_restart(struct net_device *dev)
BUG_ON(netif_carrier_ok(dev));
- /*
- * No synchronization needed because the device is bus-off and
+ /* No synchronization needed because the device is bus-off and
* no messages can come in or go out.
*/
can_flush_echo_skb(dev);
/* send restart message upstream */
skb = alloc_can_err_skb(dev, &cf);
- if (skb == NULL) {
+ if (!skb) {
err = -ENOMEM;
goto restart;
}
@@ -580,7 +579,8 @@ restart:
static void can_restart_work(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
- struct can_priv *priv = container_of(dwork, struct can_priv, restart_work);
+ struct can_priv *priv = container_of(dwork, struct can_priv,
+ restart_work);
can_restart(priv->dev);
}
@@ -589,8 +589,7 @@ int can_restart_now(struct net_device *dev)
{
struct can_priv *priv = netdev_priv(dev);
- /*
- * A manual restart is only permitted if automatic restart is
+ /* A manual restart is only permitted if automatic restart is
* disabled and the device is in the bus-off state
*/
if (priv->restart_ms)
@@ -604,8 +603,7 @@ int can_restart_now(struct net_device *dev)
return 0;
}
-/*
- * CAN bus-off
+/* CAN bus-off
*
* This functions should be called when the device goes bus-off to
* tell the netif layer that no more packets can be sent or received.
@@ -708,9 +706,7 @@ struct sk_buff *alloc_can_err_skb(struct net_device *dev, struct can_frame **cf)
}
EXPORT_SYMBOL_GPL(alloc_can_err_skb);
-/*
- * Allocate and setup space for the CAN network device
- */
+/* Allocate and setup space for the CAN network device */
struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max,
unsigned int txqs, unsigned int rxqs)
{
@@ -718,11 +714,24 @@ struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max,
struct can_priv *priv;
int size;
+ /* We put the driver's priv, the CAN mid layer priv and the
+ * echo skb into the netdevice's priv. The memory layout for
+ * the netdev_priv is like this:
+ *
+ * +-------------------------+
+ * | driver's priv |
+ * +-------------------------+
+ * | struct can_ml_priv |
+ * +-------------------------+
+ * | array of struct sk_buff |
+ * +-------------------------+
+ */
+
+ size = ALIGN(sizeof_priv, NETDEV_ALIGN) + sizeof(struct can_ml_priv);
+
if (echo_skb_max)
- size = ALIGN(sizeof_priv, sizeof(struct sk_buff *)) +
+ size = ALIGN(size, sizeof(struct sk_buff *)) +
echo_skb_max * sizeof(struct sk_buff *);
- else
- size = sizeof_priv;
dev = alloc_netdev_mqs(size, "can%d", NET_NAME_UNKNOWN, can_setup,
txqs, rxqs);
@@ -732,10 +741,12 @@ struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max,
priv = netdev_priv(dev);
priv->dev = dev;
+ dev->ml_priv = (void *)priv + ALIGN(sizeof_priv, NETDEV_ALIGN);
+
if (echo_skb_max) {
priv->echo_skb_max = echo_skb_max;
priv->echo_skb = (void *)priv +
- ALIGN(sizeof_priv, sizeof(struct sk_buff *));
+ (size - echo_skb_max * sizeof(struct sk_buff *));
}
priv->state = CAN_STATE_STOPPED;
@@ -746,18 +757,14 @@ struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max,
}
EXPORT_SYMBOL_GPL(alloc_candev_mqs);
-/*
- * Free space of the CAN network device
- */
+/* Free space of the CAN network device */
void free_candev(struct net_device *dev)
{
free_netdev(dev);
}
EXPORT_SYMBOL_GPL(free_candev);
-/*
- * changing MTU and control mode for CAN/CANFD devices
- */
+/* changing MTU and control mode for CAN/CANFD devices */
int can_change_mtu(struct net_device *dev, int new_mtu)
{
struct can_priv *priv = netdev_priv(dev);
@@ -794,8 +801,7 @@ int can_change_mtu(struct net_device *dev, int new_mtu)
}
EXPORT_SYMBOL_GPL(can_change_mtu);
-/*
- * Common open function when the device gets opened.
+/* Common open function when the device gets opened.
*
* This function should be called in the open function of the device
* driver.
@@ -812,7 +818,7 @@ int open_candev(struct net_device *dev)
/* For CAN FD the data bitrate has to be >= the arbitration bitrate */
if ((priv->ctrlmode & CAN_CTRLMODE_FD) &&
(!priv->data_bittiming.bitrate ||
- (priv->data_bittiming.bitrate < priv->bittiming.bitrate))) {
+ priv->data_bittiming.bitrate < priv->bittiming.bitrate)) {
netdev_err(dev, "incorrect/missing data bit-timing\n");
return -EINVAL;
}
@@ -848,8 +854,7 @@ void of_can_transceiver(struct net_device *dev)
EXPORT_SYMBOL_GPL(of_can_transceiver);
#endif
-/*
- * Common close function for cleanup before the device gets closed.
+/* Common close function for cleanup before the device gets closed.
*
* This function should be called in the close function of the device
* driver.
@@ -863,9 +868,7 @@ void close_candev(struct net_device *dev)
}
EXPORT_SYMBOL_GPL(close_candev);
-/*
- * CAN netlink interface
- */
+/* CAN netlink interface */
static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = {
[IFLA_CAN_STATE] = { .type = NLA_U32 },
[IFLA_CAN_CTRLMODE] = { .len = sizeof(struct can_ctrlmode) },
@@ -1209,7 +1212,6 @@ static int can_newlink(struct net *src_net, struct net_device *dev,
static void can_dellink(struct net_device *dev, struct list_head *head)
{
- return;
}
static struct rtnl_link_ops can_link_ops __read_mostly = {
@@ -1227,9 +1229,7 @@ static struct rtnl_link_ops can_link_ops __read_mostly = {
.fill_xstats = can_fill_xstats,
};
-/*
- * Register the CAN network device
- */
+/* Register the CAN network device */
int register_candev(struct net_device *dev)
{
struct can_priv *priv = netdev_priv(dev);
@@ -1255,22 +1255,19 @@ int register_candev(struct net_device *dev)
}
EXPORT_SYMBOL_GPL(register_candev);
-/*
- * Unregister the CAN network device
- */
+/* Unregister the CAN network device */
void unregister_candev(struct net_device *dev)
{
unregister_netdev(dev);
}
EXPORT_SYMBOL_GPL(unregister_candev);
-/*
- * Test if a network device is a candev based device
+/* Test if a network device is a candev based device
* and return the can_priv* if so.
*/
struct can_priv *safe_candev_priv(struct net_device *dev)
{
- if ((dev->type != ARPHRD_CAN) || (dev->rtnl_link_ops != &can_link_ops))
+ if (dev->type != ARPHRD_CAN || dev->rtnl_link_ops != &can_link_ops)
return NULL;
return netdev_priv(dev);
@@ -1285,7 +1282,7 @@ static __init int can_dev_init(void)
err = rtnl_link_register(&can_link_ops);
if (!err)
- printk(KERN_INFO MOD_DESC "\n");
+ pr_info(MOD_DESC "\n");
return err;
}
diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c
index f9815fda8840..6f766918211a 100644
--- a/drivers/net/can/kvaser_pciefd.c
+++ b/drivers/net/can/kvaser_pciefd.c
@@ -65,6 +65,7 @@ MODULE_DESCRIPTION("CAN driver for Kvaser CAN/PCIe devices");
#define KVASER_PCIEFD_SYSID_BASE 0x1f020
#define KVASER_PCIEFD_SYSID_VERSION_REG (KVASER_PCIEFD_SYSID_BASE + 0x8)
#define KVASER_PCIEFD_SYSID_CANFREQ_REG (KVASER_PCIEFD_SYSID_BASE + 0xc)
+#define KVASER_PCIEFD_SYSID_BUSFREQ_REG (KVASER_PCIEFD_SYSID_BASE + 0x10)
#define KVASER_PCIEFD_SYSID_BUILD_REG (KVASER_PCIEFD_SYSID_BASE + 0x14)
/* Shared receive buffer registers */
#define KVASER_PCIEFD_SRB_BASE 0x1f200
@@ -268,6 +269,7 @@ struct kvaser_pciefd {
struct kvaser_pciefd_can *can[KVASER_PCIEFD_MAX_CAN_CHANNELS];
void *dma_data[KVASER_PCIEFD_DMA_COUNT];
u8 nr_channels;
+ u32 bus_freq;
u32 freq;
u32 freq_to_ticks_div;
};
@@ -666,7 +668,7 @@ static void kvaser_pciefd_pwm_start(struct kvaser_pciefd_can *can)
spin_lock_irqsave(&can->lock, irq);
/* Set frequency to 500 KHz*/
- top = can->can.clock.freq / (2 * 500000) - 1;
+ top = can->kv_pcie->bus_freq / (2 * 500000) - 1;
pwm_ctrl = top & 0xff;
pwm_ctrl |= (top & 0xff) << KVASER_PCIEFD_KCAN_PWM_TOP_SHIFT;
@@ -1119,6 +1121,8 @@ static int kvaser_pciefd_setup_board(struct kvaser_pciefd *pcie)
return -ENODEV;
}
+ pcie->bus_freq = ioread32(pcie->reg_base +
+ KVASER_PCIEFD_SYSID_BUSFREQ_REG);
pcie->freq = ioread32(pcie->reg_base + KVASER_PCIEFD_SYSID_CANFREQ_REG);
pcie->freq_to_ticks_div = pcie->freq / 1000000;
if (pcie->freq_to_ticks_div == 0)
diff --git a/drivers/net/can/m_can/tcan4x5x.c b/drivers/net/can/m_can/tcan4x5x.c
index a697996d81b4..3db619209fe1 100644
--- a/drivers/net/can/m_can/tcan4x5x.c
+++ b/drivers/net/can/m_can/tcan4x5x.c
@@ -117,7 +117,6 @@ struct tcan4x5x_priv {
struct m_can_classdev *mcan_dev;
struct gpio_desc *reset_gpio;
- struct gpio_desc *interrupt_gpio;
struct gpio_desc *device_wake_gpio;
struct gpio_desc *device_state_gpio;
struct regulator *power;
@@ -236,8 +235,6 @@ static u32 tcan4x5x_read_reg(struct m_can_classdev *cdev, int reg)
struct tcan4x5x_priv *priv = cdev->device_data;
u32 val;
- tcan4x5x_check_wake(priv);
-
regmap_read(priv->regmap, priv->reg_offset + reg, &val);
return val;
@@ -248,8 +245,6 @@ static u32 tcan4x5x_read_fifo(struct m_can_classdev *cdev, int addr_offset)
struct tcan4x5x_priv *priv = cdev->device_data;
u32 val;
- tcan4x5x_check_wake(priv);
-
regmap_read(priv->regmap, priv->mram_start + addr_offset, &val);
return val;
@@ -259,8 +254,6 @@ static int tcan4x5x_write_reg(struct m_can_classdev *cdev, int reg, int val)
{
struct tcan4x5x_priv *priv = cdev->device_data;
- tcan4x5x_check_wake(priv);
-
return regmap_write(priv->regmap, priv->reg_offset + reg, val);
}
@@ -269,8 +262,6 @@ static int tcan4x5x_write_fifo(struct m_can_classdev *cdev,
{
struct tcan4x5x_priv *priv = cdev->device_data;
- tcan4x5x_check_wake(priv);
-
return regmap_write(priv->regmap, priv->mram_start + addr_offset, val);
}
@@ -290,18 +281,13 @@ static int tcan4x5x_write_tcan_reg(struct m_can_classdev *cdev,
{
struct tcan4x5x_priv *priv = cdev->device_data;
- tcan4x5x_check_wake(priv);
-
return regmap_write(priv->regmap, reg, val);
}
static int tcan4x5x_clear_interrupts(struct m_can_classdev *cdev)
{
- struct tcan4x5x_priv *tcan4x5x = cdev->device_data;
int ret;
- tcan4x5x_check_wake(tcan4x5x);
-
ret = tcan4x5x_write_tcan_reg(cdev, TCAN4X5X_STATUS,
TCAN4X5X_CLEAR_ALL_INT);
if (ret)
@@ -356,13 +342,6 @@ static int tcan4x5x_parse_config(struct m_can_classdev *cdev)
{
struct tcan4x5x_priv *tcan4x5x = cdev->device_data;
- tcan4x5x->interrupt_gpio = devm_gpiod_get(cdev->dev, "data-ready",
- GPIOD_IN);
- if (IS_ERR(tcan4x5x->interrupt_gpio)) {
- dev_err(cdev->dev, "data-ready gpio not defined\n");
- return -EINVAL;
- }
-
tcan4x5x->device_wake_gpio = devm_gpiod_get(cdev->dev, "device-wake",
GPIOD_OUT_HIGH);
if (IS_ERR(tcan4x5x->device_wake_gpio)) {
@@ -381,8 +360,6 @@ static int tcan4x5x_parse_config(struct m_can_classdev *cdev)
if (IS_ERR(tcan4x5x->device_state_gpio))
tcan4x5x->device_state_gpio = NULL;
- cdev->net->irq = gpiod_to_irq(tcan4x5x->interrupt_gpio);
-
tcan4x5x->power = devm_regulator_get_optional(cdev->dev,
"vsup");
if (PTR_ERR(tcan4x5x->power) == -EPROBE_DEFER)
@@ -447,6 +424,7 @@ static int tcan4x5x_can_probe(struct spi_device *spi)
mcan_class->is_peripheral = true;
mcan_class->bit_timing = &tcan4x5x_bittiming_const;
mcan_class->data_timing = &tcan4x5x_data_bittiming_const;
+ mcan_class->net->irq = spi->irq;
spi_set_drvdata(spi, priv);
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index aa97dbc797b6..bb6032211043 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -55,6 +55,7 @@
#include <linux/workqueue.h>
#include <linux/can.h>
#include <linux/can/skb.h>
+#include <linux/can/can-ml.h>
MODULE_ALIAS_LDISC(N_SLCAN);
MODULE_DESCRIPTION("serial line CAN interface");
@@ -514,6 +515,7 @@ static struct slcan *slc_alloc(void)
char name[IFNAMSIZ];
struct net_device *dev = NULL;
struct slcan *sl;
+ int size;
for (i = 0; i < maxdev; i++) {
dev = slcan_devs[i];
@@ -527,12 +529,14 @@ static struct slcan *slc_alloc(void)
return NULL;
sprintf(name, "slcan%d", i);
- dev = alloc_netdev(sizeof(*sl), name, NET_NAME_UNKNOWN, slc_setup);
+ size = ALIGN(sizeof(*sl), NETDEV_ALIGN) + sizeof(struct can_ml_priv);
+ dev = alloc_netdev(size, name, NET_NAME_UNKNOWN, slc_setup);
if (!dev)
return NULL;
dev->base_addr = i;
sl = netdev_priv(dev);
+ dev->ml_priv = (void *)sl + ALIGN(sizeof(*sl), NETDEV_ALIGN);
/* Initialize channel control data */
sl->magic = SLCAN_MAGIC;
diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c
index 58992fd61cb9..bee9f7b8dad6 100644
--- a/drivers/net/can/spi/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c
@@ -17,26 +17,6 @@
* - Sascha Hauer, Marc Kleine-Budde, Pengutronix
* - Simon Kallweit, intefo AG
* Copyright 2007
- *
- * Your platform definition file should specify something like:
- *
- * static struct mcp251x_platform_data mcp251x_info = {
- * .oscillator_frequency = 8000000,
- * };
- *
- * static struct spi_board_info spi_board_info[] = {
- * {
- * .modalias = "mcp2510",
- * // "mcp2515" or "mcp25625" depending on your controller
- * .platform_data = &mcp251x_info,
- * .irq = IRQ_EINT13,
- * .max_speed_hz = 2*1000*1000,
- * .chip_select = 2,
- * },
- * };
- *
- * Please see mcp251x.h for a description of the fields in
- * struct mcp251x_platform_data.
*/
#include <linux/can/core.h>
@@ -53,8 +33,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/netdevice.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/property.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/spi/spi.h>
@@ -914,7 +893,7 @@ static int mcp251x_open(struct net_device *net)
priv->tx_skb = NULL;
priv->tx_len = 0;
- if (!spi->dev.of_node)
+ if (!dev_fwnode(&spi->dev))
flags = IRQF_TRIGGER_FALLING;
ret = request_threaded_irq(spi->irq, NULL, mcp251x_can_ist,
@@ -1006,23 +985,20 @@ MODULE_DEVICE_TABLE(spi, mcp251x_id_table);
static int mcp251x_can_probe(struct spi_device *spi)
{
- const struct of_device_id *of_id = of_match_device(mcp251x_of_match,
- &spi->dev);
+ const void *match = device_get_match_data(&spi->dev);
struct mcp251x_platform_data *pdata = dev_get_platdata(&spi->dev);
struct net_device *net;
struct mcp251x_priv *priv;
struct clk *clk;
int freq, ret;
- clk = devm_clk_get(&spi->dev, NULL);
- if (IS_ERR(clk)) {
- if (pdata)
- freq = pdata->oscillator_frequency;
- else
- return PTR_ERR(clk);
- } else {
- freq = clk_get_rate(clk);
- }
+ clk = devm_clk_get_optional(&spi->dev, NULL);
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
+
+ freq = clk_get_rate(clk);
+ if (freq == 0 && pdata)
+ freq = pdata->oscillator_frequency;
/* Sanity check */
if (freq < 1000000 || freq > 25000000)
@@ -1033,11 +1009,9 @@ static int mcp251x_can_probe(struct spi_device *spi)
if (!net)
return -ENOMEM;
- if (!IS_ERR(clk)) {
- ret = clk_prepare_enable(clk);
- if (ret)
- goto out_free;
- }
+ ret = clk_prepare_enable(clk);
+ if (ret)
+ goto out_free;
net->netdev_ops = &mcp251x_netdev_ops;
net->flags |= IFF_ECHO;
@@ -1048,8 +1022,8 @@ static int mcp251x_can_probe(struct spi_device *spi)
priv->can.clock.freq = freq / 2;
priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES |
CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY;
- if (of_id)
- priv->model = (enum mcp251x_model)of_id->data;
+ if (match)
+ priv->model = (enum mcp251x_model)match;
else
priv->model = spi_get_device_id(spi)->driver_data;
priv->net = net;
@@ -1122,8 +1096,7 @@ error_probe:
mcp251x_power_enable(priv->power, 0);
out_clk:
- if (!IS_ERR(clk))
- clk_disable_unprepare(clk);
+ clk_disable_unprepare(clk);
out_free:
free_candev(net);
@@ -1141,8 +1114,7 @@ static int mcp251x_can_remove(struct spi_device *spi)
mcp251x_power_enable(priv->power, 0);
- if (!IS_ERR(priv->clk))
- clk_disable_unprepare(priv->clk);
+ clk_disable_unprepare(priv->clk);
free_candev(net);
@@ -1170,10 +1142,8 @@ static int __maybe_unused mcp251x_can_suspend(struct device *dev)
priv->after_suspend = AFTER_SUSPEND_DOWN;
}
- if (!IS_ERR_OR_NULL(priv->power)) {
- regulator_disable(priv->power);
- priv->after_suspend |= AFTER_SUSPEND_POWER;
- }
+ mcp251x_power_enable(priv->power, 0);
+ priv->after_suspend |= AFTER_SUSPEND_POWER;
return 0;
}
diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c
index daf27133887b..39ca14b0585d 100644
--- a/drivers/net/can/vcan.c
+++ b/drivers/net/can/vcan.c
@@ -46,6 +46,7 @@
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/can.h>
+#include <linux/can/can-ml.h>
#include <linux/can/dev.h>
#include <linux/can/skb.h>
#include <linux/slab.h>
@@ -152,6 +153,7 @@ static void vcan_setup(struct net_device *dev)
dev->addr_len = 0;
dev->tx_queue_len = 0;
dev->flags = IFF_NOARP;
+ dev->ml_priv = netdev_priv(dev);
/* set flags according to driver capabilities */
if (echo)
@@ -162,8 +164,9 @@ static void vcan_setup(struct net_device *dev)
}
static struct rtnl_link_ops vcan_link_ops __read_mostly = {
- .kind = DRV_NAME,
- .setup = vcan_setup,
+ .kind = DRV_NAME,
+ .priv_size = sizeof(struct can_ml_priv),
+ .setup = vcan_setup,
};
static __init int vcan_init_module(void)
diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c
index b2106292230e..d6ba9426be4d 100644
--- a/drivers/net/can/vxcan.c
+++ b/drivers/net/can/vxcan.c
@@ -18,6 +18,7 @@
#include <linux/can/dev.h>
#include <linux/can/skb.h>
#include <linux/can/vxcan.h>
+#include <linux/can/can-ml.h>
#include <linux/slab.h>
#include <net/rtnetlink.h>
@@ -146,6 +147,7 @@ static void vxcan_setup(struct net_device *dev)
dev->flags = (IFF_NOARP|IFF_ECHO);
dev->netdev_ops = &vxcan_netdev_ops;
dev->needs_free_netdev = true;
+ dev->ml_priv = netdev_priv(dev) + ALIGN(sizeof(struct vxcan_priv), NETDEV_ALIGN);
}
/* forward declaration for rtnl_create_link() */
@@ -281,7 +283,7 @@ static struct net *vxcan_get_link_net(const struct net_device *dev)
static struct rtnl_link_ops vxcan_link_ops = {
.kind = DRV_NAME,
- .priv_size = sizeof(struct vxcan_priv),
+ .priv_size = ALIGN(sizeof(struct vxcan_priv), NETDEV_ALIGN) + sizeof(struct can_ml_priv),
.setup = vxcan_setup,
.newlink = vxcan_newlink,
.dellink = vxcan_dellink,
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index c48e29486b10..1d8d36de4d20 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -13,7 +13,7 @@
#include <linux/of_mdio.h>
#include <linux/of_net.h>
#include <linux/of_platform.h>
-#include <linux/phy.h>
+#include <linux/phylink.h>
#include <linux/regmap.h>
#include <linux/regulator/consumer.h>
#include <linux/reset.h>
@@ -633,61 +633,75 @@ mt7530_get_sset_count(struct dsa_switch *ds, int port, int sset)
return ARRAY_SIZE(mt7530_mib);
}
-static void mt7530_adjust_link(struct dsa_switch *ds, int port,
- struct phy_device *phydev)
+static void mt7530_setup_port5(struct dsa_switch *ds, phy_interface_t interface)
{
struct mt7530_priv *priv = ds->priv;
+ u8 tx_delay = 0;
+ int val;
- if (phy_is_pseudo_fixed_link(phydev)) {
- dev_dbg(priv->dev, "phy-mode for master device = %x\n",
- phydev->interface);
+ mutex_lock(&priv->reg_mutex);
- /* Setup TX circuit incluing relevant PAD and driving */
- mt7530_pad_clk_setup(ds, phydev->interface);
+ val = mt7530_read(priv, MT7530_MHWTRAP);
- if (priv->id == ID_MT7530) {
- /* Setup RX circuit, relevant PAD and driving on the
- * host which must be placed after the setup on the
- * device side is all finished.
- */
- mt7623_pad_clk_setup(ds);
- }
- } else {
- u16 lcl_adv = 0, rmt_adv = 0;
- u8 flowctrl;
- u32 mcr = PMCR_USERP_LINK | PMCR_FORCE_MODE;
+ val |= MHWTRAP_MANUAL | MHWTRAP_P5_MAC_SEL | MHWTRAP_P5_DIS;
+ val &= ~MHWTRAP_P5_RGMII_MODE & ~MHWTRAP_PHY0_SEL;
- switch (phydev->speed) {
- case SPEED_1000:
- mcr |= PMCR_FORCE_SPEED_1000;
- break;
- case SPEED_100:
- mcr |= PMCR_FORCE_SPEED_100;
- break;
- }
+ switch (priv->p5_intf_sel) {
+ case P5_INTF_SEL_PHY_P0:
+ /* MT7530_P5_MODE_GPHY_P0: 2nd GMAC -> P5 -> P0 */
+ val |= MHWTRAP_PHY0_SEL;
+ /* fall through */
+ case P5_INTF_SEL_PHY_P4:
+ /* MT7530_P5_MODE_GPHY_P4: 2nd GMAC -> P5 -> P4 */
+ val &= ~MHWTRAP_P5_MAC_SEL & ~MHWTRAP_P5_DIS;
+
+ /* Setup the MAC by default for the cpu port */
+ mt7530_write(priv, MT7530_PMCR_P(5), 0x56300);
+ break;
+ case P5_INTF_SEL_GMAC5:
+ /* MT7530_P5_MODE_GMAC: P5 -> External phy or 2nd GMAC */
+ val &= ~MHWTRAP_P5_DIS;
+ break;
+ case P5_DISABLED:
+ interface = PHY_INTERFACE_MODE_NA;
+ break;
+ default:
+ dev_err(ds->dev, "Unsupported p5_intf_sel %d\n",
+ priv->p5_intf_sel);
+ goto unlock_exit;
+ }
- if (phydev->link)
- mcr |= PMCR_FORCE_LNK;
+ /* Setup RGMII settings */
+ if (phy_interface_mode_is_rgmii(interface)) {
+ val |= MHWTRAP_P5_RGMII_MODE;
- if (phydev->duplex) {
- mcr |= PMCR_FORCE_FDX;
+ /* P5 RGMII RX Clock Control: delay setting for 1000M */
+ mt7530_write(priv, MT7530_P5RGMIIRXCR, CSR_RGMII_EDGE_ALIGN);
- if (phydev->pause)
- rmt_adv = LPA_PAUSE_CAP;
- if (phydev->asym_pause)
- rmt_adv |= LPA_PAUSE_ASYM;
+ /* Don't set delay in DSA mode */
+ if (!dsa_is_dsa_port(priv->ds, 5) &&
+ (interface == PHY_INTERFACE_MODE_RGMII_TXID ||
+ interface == PHY_INTERFACE_MODE_RGMII_ID))
+ tx_delay = 4; /* n * 0.5 ns */
- lcl_adv = linkmode_adv_to_lcl_adv_t(
- phydev->advertising);
- flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
+ /* P5 RGMII TX Clock Control: delay x */
+ mt7530_write(priv, MT7530_P5RGMIITXCR,
+ CSR_RGMII_TXC_CFG(0x10 + tx_delay));
- if (flowctrl & FLOW_CTRL_TX)
- mcr |= PMCR_TX_FC_EN;
- if (flowctrl & FLOW_CTRL_RX)
- mcr |= PMCR_RX_FC_EN;
- }
- mt7530_write(priv, MT7530_PMCR_P(port), mcr);
+ /* reduce P5 RGMII Tx driving, 8mA */
+ mt7530_write(priv, MT7530_IO_DRV_CR,
+ P5_IO_CLK_DRV(1) | P5_IO_DATA_DRV(1));
}
+
+ mt7530_write(priv, MT7530_MHWTRAP, val);
+
+ dev_dbg(ds->dev, "Setup P5, HWTRAP=0x%x, intf_sel=%s, phy-mode=%s\n",
+ val, p5_intf_modes(priv->p5_intf_sel), phy_modes(interface));
+
+ priv->p5_interface = interface;
+
+unlock_exit:
+ mutex_unlock(&priv->reg_mutex);
}
static int
@@ -698,9 +712,6 @@ mt7530_cpu_port_enable(struct mt7530_priv *priv,
mt7530_write(priv, MT7530_PVC_P(port),
PORT_SPEC_TAG);
- /* Setup the MAC by default for the cpu port */
- mt7530_write(priv, MT7530_PMCR_P(port), PMCR_CPUP_LINK);
-
/* Disable auto learning on the cpu port */
mt7530_set(priv, MT7530_PSC_P(port), SA_DIS);
@@ -731,9 +742,6 @@ mt7530_port_enable(struct dsa_switch *ds, int port,
mutex_lock(&priv->reg_mutex);
- /* Setup the MAC for the user port */
- mt7530_write(priv, MT7530_PMCR_P(port), PMCR_USERP_LINK);
-
/* Allow the user port gets connected to the cpu port and also
* restore the port matrix if the port is the member of a certain
* bridge.
@@ -742,7 +750,7 @@ mt7530_port_enable(struct dsa_switch *ds, int port,
priv->ports[port].enable = true;
mt7530_rmw(priv, MT7530_PCR_P(port), PCR_MATRIX_MASK,
priv->ports[port].pm);
- mt7530_port_set_status(priv, port, 1);
+ mt7530_port_set_status(priv, port, 0);
mutex_unlock(&priv->reg_mutex);
@@ -1232,10 +1240,13 @@ static int
mt7530_setup(struct dsa_switch *ds)
{
struct mt7530_priv *priv = ds->priv;
- int ret, i;
- u32 id, val;
- struct device_node *dn;
+ struct device_node *phy_node;
+ struct device_node *mac_np;
struct mt7530_dummy_poll p;
+ phy_interface_t interface;
+ struct device_node *dn;
+ u32 id, val;
+ int ret, i;
/* The parent node of master netdev which holds the common system
* controller also is the container for two GMACs nodes representing
@@ -1305,6 +1316,8 @@ mt7530_setup(struct dsa_switch *ds)
val |= MHWTRAP_MANUAL;
mt7530_write(priv, MT7530_MHWTRAP, val);
+ priv->p6_interface = PHY_INTERFACE_MODE_NA;
+
/* Enable and reset MIB counters */
mt7530_mib_reset(ds);
@@ -1321,6 +1334,40 @@ mt7530_setup(struct dsa_switch *ds)
mt7530_port_disable(ds, i);
}
+ /* Setup port 5 */
+ priv->p5_intf_sel = P5_DISABLED;
+ interface = PHY_INTERFACE_MODE_NA;
+
+ if (!dsa_is_unused_port(ds, 5)) {
+ priv->p5_intf_sel = P5_INTF_SEL_GMAC5;
+ interface = of_get_phy_mode(ds->ports[5].dn);
+ } else {
+ /* Scan the ethernet nodes. look for GMAC1, lookup used phy */
+ for_each_child_of_node(dn, mac_np) {
+ if (!of_device_is_compatible(mac_np,
+ "mediatek,eth-mac"))
+ continue;
+
+ ret = of_property_read_u32(mac_np, "reg", &id);
+ if (ret < 0 || id != 1)
+ continue;
+
+ phy_node = of_parse_phandle(mac_np, "phy-handle", 0);
+ if (phy_node->parent == priv->dev->of_node->parent) {
+ interface = of_get_phy_mode(mac_np);
+ id = of_mdio_parse_addr(ds->dev, phy_node);
+ if (id == 0)
+ priv->p5_intf_sel = P5_INTF_SEL_PHY_P0;
+ if (id == 4)
+ priv->p5_intf_sel = P5_INTF_SEL_PHY_P4;
+ }
+ of_node_put(phy_node);
+ break;
+ }
+ }
+
+ mt7530_setup_port5(ds, interface);
+
/* Flush the FDB table */
ret = mt7530_fdb_cmd(priv, MT7530_FDB_FLUSH, NULL);
if (ret < 0)
@@ -1329,6 +1376,216 @@ mt7530_setup(struct dsa_switch *ds)
return 0;
}
+static void mt7530_phylink_mac_config(struct dsa_switch *ds, int port,
+ unsigned int mode,
+ const struct phylink_link_state *state)
+{
+ struct mt7530_priv *priv = ds->priv;
+ u32 mcr_cur, mcr_new;
+
+ switch (port) {
+ case 0: /* Internal phy */
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ if (state->interface != PHY_INTERFACE_MODE_GMII)
+ return;
+ break;
+ case 5: /* 2nd cpu port with phy of port 0 or 4 / external phy */
+ if (priv->p5_interface == state->interface)
+ break;
+ if (!phy_interface_mode_is_rgmii(state->interface) &&
+ state->interface != PHY_INTERFACE_MODE_MII &&
+ state->interface != PHY_INTERFACE_MODE_GMII)
+ return;
+
+ mt7530_setup_port5(ds, state->interface);
+ break;
+ case 6: /* 1st cpu port */
+ if (priv->p6_interface == state->interface)
+ break;
+
+ if (state->interface != PHY_INTERFACE_MODE_RGMII &&
+ state->interface != PHY_INTERFACE_MODE_TRGMII)
+ return;
+
+ /* Setup TX circuit incluing relevant PAD and driving */
+ mt7530_pad_clk_setup(ds, state->interface);
+
+ if (priv->id == ID_MT7530) {
+ /* Setup RX circuit, relevant PAD and driving on the
+ * host which must be placed after the setup on the
+ * device side is all finished.
+ */
+ mt7623_pad_clk_setup(ds);
+ }
+
+ priv->p6_interface = state->interface;
+ break;
+ default:
+ dev_err(ds->dev, "%s: unsupported port: %i\n", __func__, port);
+ return;
+ }
+
+ if (phylink_autoneg_inband(mode)) {
+ dev_err(ds->dev, "%s: in-band negotiation unsupported\n",
+ __func__);
+ return;
+ }
+
+ mcr_cur = mt7530_read(priv, MT7530_PMCR_P(port));
+ mcr_new = mcr_cur;
+ mcr_new &= ~(PMCR_FORCE_SPEED_1000 | PMCR_FORCE_SPEED_100 |
+ PMCR_FORCE_FDX | PMCR_TX_FC_EN | PMCR_RX_FC_EN);
+ mcr_new |= PMCR_IFG_XMIT(1) | PMCR_MAC_MODE | PMCR_BACKOFF_EN |
+ PMCR_BACKPR_EN | PMCR_FORCE_MODE | PMCR_FORCE_LNK;
+
+ /* Are we connected to external phy */
+ if (port == 5 && dsa_is_user_port(ds, 5))
+ mcr_new |= PMCR_EXT_PHY;
+
+ switch (state->speed) {
+ case SPEED_1000:
+ mcr_new |= PMCR_FORCE_SPEED_1000;
+ break;
+ case SPEED_100:
+ mcr_new |= PMCR_FORCE_SPEED_100;
+ break;
+ }
+ if (state->duplex == DUPLEX_FULL) {
+ mcr_new |= PMCR_FORCE_FDX;
+ if (state->pause & MLO_PAUSE_TX)
+ mcr_new |= PMCR_TX_FC_EN;
+ if (state->pause & MLO_PAUSE_RX)
+ mcr_new |= PMCR_RX_FC_EN;
+ }
+
+ if (mcr_new != mcr_cur)
+ mt7530_write(priv, MT7530_PMCR_P(port), mcr_new);
+}
+
+static void mt7530_phylink_mac_link_down(struct dsa_switch *ds, int port,
+ unsigned int mode,
+ phy_interface_t interface)
+{
+ struct mt7530_priv *priv = ds->priv;
+
+ mt7530_port_set_status(priv, port, 0);
+}
+
+static void mt7530_phylink_mac_link_up(struct dsa_switch *ds, int port,
+ unsigned int mode,
+ phy_interface_t interface,
+ struct phy_device *phydev)
+{
+ struct mt7530_priv *priv = ds->priv;
+
+ mt7530_port_set_status(priv, port, 1);
+}
+
+static void mt7530_phylink_validate(struct dsa_switch *ds, int port,
+ unsigned long *supported,
+ struct phylink_link_state *state)
+{
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+
+ switch (port) {
+ case 0: /* Internal phy */
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ if (state->interface != PHY_INTERFACE_MODE_NA &&
+ state->interface != PHY_INTERFACE_MODE_GMII)
+ goto unsupported;
+ break;
+ case 5: /* 2nd cpu port with phy of port 0 or 4 / external phy */
+ if (state->interface != PHY_INTERFACE_MODE_NA &&
+ !phy_interface_mode_is_rgmii(state->interface) &&
+ state->interface != PHY_INTERFACE_MODE_MII &&
+ state->interface != PHY_INTERFACE_MODE_GMII)
+ goto unsupported;
+ break;
+ case 6: /* 1st cpu port */
+ if (state->interface != PHY_INTERFACE_MODE_NA &&
+ state->interface != PHY_INTERFACE_MODE_RGMII &&
+ state->interface != PHY_INTERFACE_MODE_TRGMII)
+ goto unsupported;
+ break;
+ default:
+ dev_err(ds->dev, "%s: unsupported port: %i\n", __func__, port);
+unsupported:
+ linkmode_zero(supported);
+ return;
+ }
+
+ phylink_set_port_modes(mask);
+ phylink_set(mask, Autoneg);
+
+ if (state->interface == PHY_INTERFACE_MODE_TRGMII) {
+ phylink_set(mask, 1000baseT_Full);
+ } else {
+ phylink_set(mask, 10baseT_Half);
+ phylink_set(mask, 10baseT_Full);
+ phylink_set(mask, 100baseT_Half);
+ phylink_set(mask, 100baseT_Full);
+
+ if (state->interface != PHY_INTERFACE_MODE_MII) {
+ phylink_set(mask, 1000baseT_Half);
+ phylink_set(mask, 1000baseT_Full);
+ if (port == 5)
+ phylink_set(mask, 1000baseX_Full);
+ }
+ }
+
+ phylink_set(mask, Pause);
+ phylink_set(mask, Asym_Pause);
+
+ linkmode_and(supported, supported, mask);
+ linkmode_and(state->advertising, state->advertising, mask);
+}
+
+static int
+mt7530_phylink_mac_link_state(struct dsa_switch *ds, int port,
+ struct phylink_link_state *state)
+{
+ struct mt7530_priv *priv = ds->priv;
+ u32 pmsr;
+
+ if (port < 0 || port >= MT7530_NUM_PORTS)
+ return -EINVAL;
+
+ pmsr = mt7530_read(priv, MT7530_PMSR_P(port));
+
+ state->link = (pmsr & PMSR_LINK);
+ state->an_complete = state->link;
+ state->duplex = !!(pmsr & PMSR_DPX);
+
+ switch (pmsr & PMSR_SPEED_MASK) {
+ case PMSR_SPEED_10:
+ state->speed = SPEED_10;
+ break;
+ case PMSR_SPEED_100:
+ state->speed = SPEED_100;
+ break;
+ case PMSR_SPEED_1000:
+ state->speed = SPEED_1000;
+ break;
+ default:
+ state->speed = SPEED_UNKNOWN;
+ break;
+ }
+
+ state->pause &= ~(MLO_PAUSE_RX | MLO_PAUSE_TX);
+ if (pmsr & PMSR_RX_FC)
+ state->pause |= MLO_PAUSE_RX;
+ if (pmsr & PMSR_TX_FC)
+ state->pause |= MLO_PAUSE_TX;
+
+ return 1;
+}
+
static const struct dsa_switch_ops mt7530_switch_ops = {
.get_tag_protocol = mtk_get_tag_protocol,
.setup = mt7530_setup,
@@ -1337,7 +1594,6 @@ static const struct dsa_switch_ops mt7530_switch_ops = {
.phy_write = mt7530_phy_write,
.get_ethtool_stats = mt7530_get_ethtool_stats,
.get_sset_count = mt7530_get_sset_count,
- .adjust_link = mt7530_adjust_link,
.port_enable = mt7530_port_enable,
.port_disable = mt7530_port_disable,
.port_stp_state_set = mt7530_stp_state_set,
@@ -1350,6 +1606,11 @@ static const struct dsa_switch_ops mt7530_switch_ops = {
.port_vlan_prepare = mt7530_port_vlan_prepare,
.port_vlan_add = mt7530_port_vlan_add,
.port_vlan_del = mt7530_port_vlan_del,
+ .phylink_validate = mt7530_phylink_validate,
+ .phylink_mac_link_state = mt7530_phylink_mac_link_state,
+ .phylink_mac_config = mt7530_phylink_mac_config,
+ .phylink_mac_link_down = mt7530_phylink_mac_link_down,
+ .phylink_mac_link_up = mt7530_phylink_mac_link_up,
};
static const struct of_device_id mt7530_of_match[] = {
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index bfac90f48102..ccb9da8cad0d 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -186,6 +186,7 @@ enum mt7530_vlan_port_attr {
/* Register for port MAC control register */
#define MT7530_PMCR_P(x) (0x3000 + ((x) * 0x100))
#define PMCR_IFG_XMIT(x) (((x) & 0x3) << 18)
+#define PMCR_EXT_PHY BIT(17)
#define PMCR_MAC_MODE BIT(16)
#define PMCR_FORCE_MODE BIT(15)
#define PMCR_TX_EN BIT(14)
@@ -198,26 +199,20 @@ enum mt7530_vlan_port_attr {
#define PMCR_FORCE_SPEED_100 BIT(2)
#define PMCR_FORCE_FDX BIT(1)
#define PMCR_FORCE_LNK BIT(0)
-#define PMCR_COMMON_LINK (PMCR_IFG_XMIT(1) | PMCR_MAC_MODE | \
- PMCR_BACKOFF_EN | PMCR_BACKPR_EN | \
- PMCR_TX_EN | PMCR_RX_EN | \
- PMCR_TX_FC_EN | PMCR_RX_FC_EN)
-#define PMCR_CPUP_LINK (PMCR_COMMON_LINK | PMCR_FORCE_MODE | \
- PMCR_FORCE_SPEED_1000 | \
- PMCR_FORCE_FDX | \
- PMCR_FORCE_LNK)
-#define PMCR_USERP_LINK PMCR_COMMON_LINK
-#define PMCR_FIXED_LINK (PMCR_IFG_XMIT(1) | PMCR_MAC_MODE | \
- PMCR_FORCE_MODE | PMCR_TX_EN | \
- PMCR_RX_EN | PMCR_BACKPR_EN | \
- PMCR_BACKOFF_EN | \
- PMCR_FORCE_SPEED_1000 | \
- PMCR_FORCE_FDX | \
- PMCR_FORCE_LNK)
-#define PMCR_FIXED_LINK_FC (PMCR_FIXED_LINK | \
- PMCR_TX_FC_EN | PMCR_RX_FC_EN)
+#define PMCR_SPEED_MASK (PMCR_FORCE_SPEED_100 | \
+ PMCR_FORCE_SPEED_1000)
#define MT7530_PMSR_P(x) (0x3008 + (x) * 0x100)
+#define PMSR_EEE1G BIT(7)
+#define PMSR_EEE100M BIT(6)
+#define PMSR_RX_FC BIT(5)
+#define PMSR_TX_FC BIT(4)
+#define PMSR_SPEED_1000 BIT(3)
+#define PMSR_SPEED_100 BIT(2)
+#define PMSR_SPEED_10 0x00
+#define PMSR_SPEED_MASK (PMSR_SPEED_100 | PMSR_SPEED_1000)
+#define PMSR_DPX BIT(1)
+#define PMSR_LINK BIT(0)
/* Register for MIB */
#define MT7530_PORT_MIB_COUNTER(x) (0x4000 + (x) * 0x100)
@@ -251,6 +246,7 @@ enum mt7530_vlan_port_attr {
/* Register for hw trap modification */
#define MT7530_MHWTRAP 0x7804
+#define MHWTRAP_PHY0_SEL BIT(20)
#define MHWTRAP_MANUAL BIT(16)
#define MHWTRAP_P5_MAC_SEL BIT(13)
#define MHWTRAP_P6_DIS BIT(8)
@@ -408,6 +404,30 @@ struct mt7530_port {
u16 pvid;
};
+/* Port 5 interface select definitions */
+enum p5_interface_select {
+ P5_DISABLED = 0,
+ P5_INTF_SEL_PHY_P0,
+ P5_INTF_SEL_PHY_P4,
+ P5_INTF_SEL_GMAC5,
+};
+
+static const char *p5_intf_modes(unsigned int p5_interface)
+{
+ switch (p5_interface) {
+ case P5_DISABLED:
+ return "DISABLED";
+ case P5_INTF_SEL_PHY_P0:
+ return "PHY P0";
+ case P5_INTF_SEL_PHY_P4:
+ return "PHY P4";
+ case P5_INTF_SEL_GMAC5:
+ return "GMAC5";
+ default:
+ return "unknown";
+ }
+}
+
/* struct mt7530_priv - This is the main data structure for holding the state
* of the driver
* @dev: The device pointer
@@ -423,6 +443,8 @@ struct mt7530_port {
* @ports: Holding the state among ports
* @reg_mutex: The lock for protecting among process accessing
* registers
+ * @p6_interface Holding the current port 6 interface
+ * @p5_intf_sel: Holding the current port 5 interface select
*/
struct mt7530_priv {
struct device *dev;
@@ -435,6 +457,9 @@ struct mt7530_priv {
struct gpio_desc *reset;
unsigned int id;
bool mcm;
+ phy_interface_t p6_interface;
+ phy_interface_t p5_interface;
+ unsigned int p5_intf_sel;
struct mt7530_port ports[MT7530_NUM_PORTS];
/* protect among processes for registers access*/
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 93a2d4deb27c..2830dc283ce6 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -168,6 +168,7 @@ config ETHOC
source "drivers/net/ethernet/packetengines/Kconfig"
source "drivers/net/ethernet/pasemi/Kconfig"
+source "drivers/net/ethernet/pensando/Kconfig"
source "drivers/net/ethernet/qlogic/Kconfig"
source "drivers/net/ethernet/qualcomm/Kconfig"
source "drivers/net/ethernet/rdc/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index fb9155cffcff..061edd22f507 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -97,3 +97,4 @@ obj-$(CONFIG_NET_VENDOR_WIZNET) += wiznet/
obj-$(CONFIG_NET_VENDOR_XILINX) += xilinx/
obj-$(CONFIG_NET_VENDOR_XIRCOM) += xircom/
obj-$(CONFIG_NET_VENDOR_SYNOPSYS) += synopsys/
+obj-$(CONFIG_NET_VENDOR_PENSANDO) += pensando/
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 5402867272be..162d7d8fb295 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -1348,7 +1348,7 @@ static u32 ingress_fq_count(struct dpaa2_eth_priv *priv)
return total;
}
-static void wait_for_fq_empty(struct dpaa2_eth_priv *priv)
+static void wait_for_ingress_fq_empty(struct dpaa2_eth_priv *priv)
{
int retries = 10;
u32 pending;
@@ -1360,6 +1360,31 @@ static void wait_for_fq_empty(struct dpaa2_eth_priv *priv)
} while (pending && --retries);
}
+#define DPNI_TX_PENDING_VER_MAJOR 7
+#define DPNI_TX_PENDING_VER_MINOR 13
+static void wait_for_egress_fq_empty(struct dpaa2_eth_priv *priv)
+{
+ union dpni_statistics stats;
+ int retries = 10;
+ int err;
+
+ if (dpaa2_eth_cmp_dpni_ver(priv, DPNI_TX_PENDING_VER_MAJOR,
+ DPNI_TX_PENDING_VER_MINOR) < 0)
+ goto out;
+
+ do {
+ err = dpni_get_statistics(priv->mc_io, 0, priv->mc_token, 6,
+ &stats);
+ if (err)
+ goto out;
+ if (stats.page_6.tx_pending_frames == 0)
+ return;
+ } while (--retries);
+
+out:
+ msleep(500);
+}
+
static int dpaa2_eth_stop(struct net_device *net_dev)
{
struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
@@ -1379,7 +1404,7 @@ static int dpaa2_eth_stop(struct net_device *net_dev)
* on WRIOP. After it finishes, wait until all remaining frames on Rx
* and Tx conf queues are consumed on NAPI poll.
*/
- msleep(500);
+ wait_for_egress_fq_empty(priv);
do {
dpni_disable(priv->mc_io, 0, priv->mc_token);
@@ -1395,7 +1420,7 @@ static int dpaa2_eth_stop(struct net_device *net_dev)
*/
}
- wait_for_fq_empty(priv);
+ wait_for_ingress_fq_empty(priv);
disable_ch_napi(priv);
/* Empty the buffer pool */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
index 93076fe01b45..0aa1c34019bb 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
@@ -28,6 +28,11 @@ static char dpaa2_ethtool_stats[][ETH_GSTRING_LEN] = {
"[hw] rx nobuffer discards",
"[hw] tx discarded frames",
"[hw] tx confirmed frames",
+ "[hw] tx dequeued bytes",
+ "[hw] tx dequeued frames",
+ "[hw] tx rejected bytes",
+ "[hw] tx rejected frames",
+ "[hw] tx pending frames",
};
#define DPAA2_ETH_NUM_STATS ARRAY_SIZE(dpaa2_ethtool_stats)
@@ -188,27 +193,33 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
struct dpaa2_eth_drv_stats *extras;
struct dpaa2_eth_ch_stats *ch_stats;
+ int dpni_stats_page_size[DPNI_STATISTICS_CNT] = {
+ sizeof(dpni_stats.page_0),
+ sizeof(dpni_stats.page_1),
+ sizeof(dpni_stats.page_2),
+ sizeof(dpni_stats.page_3),
+ sizeof(dpni_stats.page_4),
+ sizeof(dpni_stats.page_5),
+ sizeof(dpni_stats.page_6),
+ };
memset(data, 0,
sizeof(u64) * (DPAA2_ETH_NUM_STATS + DPAA2_ETH_NUM_EXTRA_STATS));
/* Print standard counters, from DPNI statistics */
- for (j = 0; j <= 2; j++) {
+ for (j = 0; j <= 6; j++) {
+ /* We're not interested in pages 4 & 5 for now */
+ if (j == 4 || j == 5)
+ continue;
err = dpni_get_statistics(priv->mc_io, 0, priv->mc_token,
j, &dpni_stats);
- if (err != 0)
+ if (err == -EINVAL)
+ /* Older firmware versions don't support all pages */
+ memset(&dpni_stats, 0, sizeof(dpni_stats));
+ else
netdev_warn(net_dev, "dpni_get_stats(%d) failed\n", j);
- switch (j) {
- case 0:
- num_cnt = sizeof(dpni_stats.page_0) / sizeof(u64);
- break;
- case 1:
- num_cnt = sizeof(dpni_stats.page_1) / sizeof(u64);
- break;
- case 2:
- num_cnt = sizeof(dpni_stats.page_2) / sizeof(u64);
- break;
- }
+
+ num_cnt = dpni_stats_page_size[j] / sizeof(u64);
for (k = 0; k < num_cnt; k++)
*(data + i++) = dpni_stats.raw.counter[k];
}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.c b/drivers/net/ethernet/freescale/dpaa2/dpni.c
index 05e30893dee6..dd54e6953aeb 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.c
@@ -1470,7 +1470,7 @@ int dpni_get_queue(struct fsl_mc_io *mc_io,
* @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
* @token: Token of DPNI object
* @page: Selects the statistics page to retrieve, see
- * DPNI_GET_STATISTICS output. Pages are numbered 0 to 2.
+ * DPNI_GET_STATISTICS output. Pages are numbered 0 to 6.
* @stat: Structure containing the statistics
*
* Return: '0' on Success; Error code otherwise.
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.h b/drivers/net/ethernet/freescale/dpaa2/dpni.h
index 3e8fc6c7a8da..fd583911b6c0 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.h
@@ -416,6 +416,26 @@ int dpni_get_tx_data_offset(struct fsl_mc_io *mc_io,
* lack of buffers
* @page_2.egress_discarded_frames: Egress discarded frame count
* @page_2.egress_confirmed_frames: Egress confirmed frame count
+ * @page3: Page_3 statistics structure
+ * @page_3.egress_dequeue_bytes: Cumulative count of the number of bytes
+ * dequeued from egress FQs
+ * @page_3.egress_dequeue_frames: Cumulative count of the number of frames
+ * dequeued from egress FQs
+ * @page_3.egress_reject_bytes: Cumulative count of the number of bytes in
+ * egress frames whose enqueue was rejected
+ * @page_3.egress_reject_frames: Cumulative count of the number of egress
+ * frames whose enqueue was rejected
+ * @page_4: Page_4 statistics structure: congestion points
+ * @page_4.cgr_reject_frames: number of rejected frames due to congestion point
+ * @page_4.cgr_reject_bytes: number of rejected bytes due to congestion point
+ * @page_5: Page_5 statistics structure: policer
+ * @page_5.policer_cnt_red: NUmber of red colored frames
+ * @page_5.policer_cnt_yellow: number of yellow colored frames
+ * @page_5.policer_cnt_green: number of green colored frames
+ * @page_5.policer_cnt_re_red: number of recolored red frames
+ * @page_5.policer_cnt_re_yellow: number of recolored yellow frames
+ * @page_6: Page_6 statistics structure
+ * @page_6.tx_pending_frames: total number of frames pending in egress FQs
* @raw: raw statistics structure, used to index counters
*/
union dpni_statistics {
@@ -443,6 +463,26 @@ union dpni_statistics {
u64 egress_confirmed_frames;
} page_2;
struct {
+ u64 egress_dequeue_bytes;
+ u64 egress_dequeue_frames;
+ u64 egress_reject_bytes;
+ u64 egress_reject_frames;
+ } page_3;
+ struct {
+ u64 cgr_reject_frames;
+ u64 cgr_reject_bytes;
+ } page_4;
+ struct {
+ u64 policer_cnt_red;
+ u64 policer_cnt_yellow;
+ u64 policer_cnt_green;
+ u64 policer_cnt_re_red;
+ u64 policer_cnt_re_yellow;
+ } page_5;
+ struct {
+ u64 tx_pending_frames;
+ } page_6;
+ struct {
u64 counter[DPNI_STATISTICS_CNT];
} raw;
};
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 412c0340fed9..24bf7f68375f 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -105,43 +105,6 @@
const char gfar_driver_version[] = "2.0";
-static int gfar_enet_open(struct net_device *dev);
-static netdev_tx_t gfar_start_xmit(struct sk_buff *skb, struct net_device *dev);
-static void gfar_reset_task(struct work_struct *work);
-static void gfar_timeout(struct net_device *dev);
-static int gfar_close(struct net_device *dev);
-static void gfar_alloc_rx_buffs(struct gfar_priv_rx_q *rx_queue,
- int alloc_cnt);
-static int gfar_set_mac_address(struct net_device *dev);
-static int gfar_change_mtu(struct net_device *dev, int new_mtu);
-static irqreturn_t gfar_error(int irq, void *dev_id);
-static irqreturn_t gfar_transmit(int irq, void *dev_id);
-static irqreturn_t gfar_interrupt(int irq, void *dev_id);
-static void adjust_link(struct net_device *dev);
-static noinline void gfar_update_link_state(struct gfar_private *priv);
-static int init_phy(struct net_device *dev);
-static int gfar_probe(struct platform_device *ofdev);
-static int gfar_remove(struct platform_device *ofdev);
-static void free_skb_resources(struct gfar_private *priv);
-static void gfar_set_multi(struct net_device *dev);
-static void gfar_set_hash_for_addr(struct net_device *dev, u8 *addr);
-static void gfar_configure_serdes(struct net_device *dev);
-static int gfar_poll_rx(struct napi_struct *napi, int budget);
-static int gfar_poll_tx(struct napi_struct *napi, int budget);
-static int gfar_poll_rx_sq(struct napi_struct *napi, int budget);
-static int gfar_poll_tx_sq(struct napi_struct *napi, int budget);
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void gfar_netpoll(struct net_device *dev);
-#endif
-int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit);
-static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue);
-static void gfar_process_frame(struct net_device *ndev, struct sk_buff *skb);
-static void gfar_halt_nodisable(struct gfar_private *priv);
-static void gfar_clear_exact_match(struct net_device *dev);
-static void gfar_set_mac_for_addr(struct net_device *dev, int num,
- const u8 *addr);
-static int gfar_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
-
MODULE_AUTHOR("Freescale Semiconductor, Inc");
MODULE_DESCRIPTION("Gianfar Ethernet Driver");
MODULE_LICENSE("GPL");
@@ -162,138 +125,6 @@ static void gfar_init_rxbdp(struct gfar_priv_rx_q *rx_queue, struct rxbd8 *bdp,
bdp->lstatus = cpu_to_be32(lstatus);
}
-static void gfar_init_bds(struct net_device *ndev)
-{
- struct gfar_private *priv = netdev_priv(ndev);
- struct gfar __iomem *regs = priv->gfargrp[0].regs;
- struct gfar_priv_tx_q *tx_queue = NULL;
- struct gfar_priv_rx_q *rx_queue = NULL;
- struct txbd8 *txbdp;
- u32 __iomem *rfbptr;
- int i, j;
-
- for (i = 0; i < priv->num_tx_queues; i++) {
- tx_queue = priv->tx_queue[i];
- /* Initialize some variables in our dev structure */
- tx_queue->num_txbdfree = tx_queue->tx_ring_size;
- tx_queue->dirty_tx = tx_queue->tx_bd_base;
- tx_queue->cur_tx = tx_queue->tx_bd_base;
- tx_queue->skb_curtx = 0;
- tx_queue->skb_dirtytx = 0;
-
- /* Initialize Transmit Descriptor Ring */
- txbdp = tx_queue->tx_bd_base;
- for (j = 0; j < tx_queue->tx_ring_size; j++) {
- txbdp->lstatus = 0;
- txbdp->bufPtr = 0;
- txbdp++;
- }
-
- /* Set the last descriptor in the ring to indicate wrap */
- txbdp--;
- txbdp->status = cpu_to_be16(be16_to_cpu(txbdp->status) |
- TXBD_WRAP);
- }
-
- rfbptr = &regs->rfbptr0;
- for (i = 0; i < priv->num_rx_queues; i++) {
- rx_queue = priv->rx_queue[i];
-
- rx_queue->next_to_clean = 0;
- rx_queue->next_to_use = 0;
- rx_queue->next_to_alloc = 0;
-
- /* make sure next_to_clean != next_to_use after this
- * by leaving at least 1 unused descriptor
- */
- gfar_alloc_rx_buffs(rx_queue, gfar_rxbd_unused(rx_queue));
-
- rx_queue->rfbptr = rfbptr;
- rfbptr += 2;
- }
-}
-
-static int gfar_alloc_skb_resources(struct net_device *ndev)
-{
- void *vaddr;
- dma_addr_t addr;
- int i, j;
- struct gfar_private *priv = netdev_priv(ndev);
- struct device *dev = priv->dev;
- struct gfar_priv_tx_q *tx_queue = NULL;
- struct gfar_priv_rx_q *rx_queue = NULL;
-
- priv->total_tx_ring_size = 0;
- for (i = 0; i < priv->num_tx_queues; i++)
- priv->total_tx_ring_size += priv->tx_queue[i]->tx_ring_size;
-
- priv->total_rx_ring_size = 0;
- for (i = 0; i < priv->num_rx_queues; i++)
- priv->total_rx_ring_size += priv->rx_queue[i]->rx_ring_size;
-
- /* Allocate memory for the buffer descriptors */
- vaddr = dma_alloc_coherent(dev,
- (priv->total_tx_ring_size *
- sizeof(struct txbd8)) +
- (priv->total_rx_ring_size *
- sizeof(struct rxbd8)),
- &addr, GFP_KERNEL);
- if (!vaddr)
- return -ENOMEM;
-
- for (i = 0; i < priv->num_tx_queues; i++) {
- tx_queue = priv->tx_queue[i];
- tx_queue->tx_bd_base = vaddr;
- tx_queue->tx_bd_dma_base = addr;
- tx_queue->dev = ndev;
- /* enet DMA only understands physical addresses */
- addr += sizeof(struct txbd8) * tx_queue->tx_ring_size;
- vaddr += sizeof(struct txbd8) * tx_queue->tx_ring_size;
- }
-
- /* Start the rx descriptor ring where the tx ring leaves off */
- for (i = 0; i < priv->num_rx_queues; i++) {
- rx_queue = priv->rx_queue[i];
- rx_queue->rx_bd_base = vaddr;
- rx_queue->rx_bd_dma_base = addr;
- rx_queue->ndev = ndev;
- rx_queue->dev = dev;
- addr += sizeof(struct rxbd8) * rx_queue->rx_ring_size;
- vaddr += sizeof(struct rxbd8) * rx_queue->rx_ring_size;
- }
-
- /* Setup the skbuff rings */
- for (i = 0; i < priv->num_tx_queues; i++) {
- tx_queue = priv->tx_queue[i];
- tx_queue->tx_skbuff =
- kmalloc_array(tx_queue->tx_ring_size,
- sizeof(*tx_queue->tx_skbuff),
- GFP_KERNEL);
- if (!tx_queue->tx_skbuff)
- goto cleanup;
-
- for (j = 0; j < tx_queue->tx_ring_size; j++)
- tx_queue->tx_skbuff[j] = NULL;
- }
-
- for (i = 0; i < priv->num_rx_queues; i++) {
- rx_queue = priv->rx_queue[i];
- rx_queue->rx_buff = kcalloc(rx_queue->rx_ring_size,
- sizeof(*rx_queue->rx_buff),
- GFP_KERNEL);
- if (!rx_queue->rx_buff)
- goto cleanup;
- }
-
- gfar_init_bds(ndev);
-
- return 0;
-
-cleanup:
- free_skb_resources(priv);
- return -ENOMEM;
-}
-
static void gfar_init_tx_rx_base(struct gfar_private *priv)
{
struct gfar __iomem *regs = priv->gfargrp[0].regs;
@@ -444,7 +275,7 @@ static void gfar_configure_coalescing(struct gfar_private *priv,
}
}
-void gfar_configure_coalescing_all(struct gfar_private *priv)
+static void gfar_configure_coalescing_all(struct gfar_private *priv)
{
gfar_configure_coalescing(priv, 0xFF, 0xFF);
}
@@ -477,6 +308,62 @@ static struct net_device_stats *gfar_get_stats(struct net_device *dev)
return &dev->stats;
}
+/* Set the appropriate hash bit for the given addr */
+/* The algorithm works like so:
+ * 1) Take the Destination Address (ie the multicast address), and
+ * do a CRC on it (little endian), and reverse the bits of the
+ * result.
+ * 2) Use the 8 most significant bits as a hash into a 256-entry
+ * table. The table is controlled through 8 32-bit registers:
+ * gaddr0-7. gaddr0's MSB is entry 0, and gaddr7's LSB is
+ * gaddr7. This means that the 3 most significant bits in the
+ * hash index which gaddr register to use, and the 5 other bits
+ * indicate which bit (assuming an IBM numbering scheme, which
+ * for PowerPC (tm) is usually the case) in the register holds
+ * the entry.
+ */
+static void gfar_set_hash_for_addr(struct net_device *dev, u8 *addr)
+{
+ u32 tempval;
+ struct gfar_private *priv = netdev_priv(dev);
+ u32 result = ether_crc(ETH_ALEN, addr);
+ int width = priv->hash_width;
+ u8 whichbit = (result >> (32 - width)) & 0x1f;
+ u8 whichreg = result >> (32 - width + 5);
+ u32 value = (1 << (31-whichbit));
+
+ tempval = gfar_read(priv->hash_regs[whichreg]);
+ tempval |= value;
+ gfar_write(priv->hash_regs[whichreg], tempval);
+}
+
+/* There are multiple MAC Address register pairs on some controllers
+ * This function sets the numth pair to a given address
+ */
+static void gfar_set_mac_for_addr(struct net_device *dev, int num,
+ const u8 *addr)
+{
+ struct gfar_private *priv = netdev_priv(dev);
+ struct gfar __iomem *regs = priv->gfargrp[0].regs;
+ u32 tempval;
+ u32 __iomem *macptr = &regs->macstnaddr1;
+
+ macptr += num*2;
+
+ /* For a station address of 0x12345678ABCD in transmission
+ * order (BE), MACnADDR1 is set to 0xCDAB7856 and
+ * MACnADDR2 is set to 0x34120000.
+ */
+ tempval = (addr[5] << 24) | (addr[4] << 16) |
+ (addr[3] << 8) | addr[2];
+
+ gfar_write(macptr, tempval);
+
+ tempval = (addr[1] << 24) | (addr[0] << 16);
+
+ gfar_write(macptr+1, tempval);
+}
+
static int gfar_set_mac_addr(struct net_device *dev, void *p)
{
eth_mac_addr(dev, p);
@@ -486,24 +373,6 @@ static int gfar_set_mac_addr(struct net_device *dev, void *p)
return 0;
}
-static const struct net_device_ops gfar_netdev_ops = {
- .ndo_open = gfar_enet_open,
- .ndo_start_xmit = gfar_start_xmit,
- .ndo_stop = gfar_close,
- .ndo_change_mtu = gfar_change_mtu,
- .ndo_set_features = gfar_set_features,
- .ndo_set_rx_mode = gfar_set_multi,
- .ndo_tx_timeout = gfar_timeout,
- .ndo_do_ioctl = gfar_ioctl,
- .ndo_get_stats = gfar_get_stats,
- .ndo_change_carrier = fixed_phy_change_carrier,
- .ndo_set_mac_address = gfar_set_mac_addr,
- .ndo_validate_addr = eth_validate_addr,
-#ifdef CONFIG_NET_POLL_CONTROLLER
- .ndo_poll_controller = gfar_netpoll,
-#endif
-};
-
static void gfar_ints_disable(struct gfar_private *priv)
{
int i;
@@ -723,10 +592,53 @@ static int gfar_of_group_count(struct device_node *np)
return num;
}
+/* Reads the controller's registers to determine what interface
+ * connects it to the PHY.
+ */
+static phy_interface_t gfar_get_interface(struct net_device *dev)
+{
+ struct gfar_private *priv = netdev_priv(dev);
+ struct gfar __iomem *regs = priv->gfargrp[0].regs;
+ u32 ecntrl;
+
+ ecntrl = gfar_read(&regs->ecntrl);
+
+ if (ecntrl & ECNTRL_SGMII_MODE)
+ return PHY_INTERFACE_MODE_SGMII;
+
+ if (ecntrl & ECNTRL_TBI_MODE) {
+ if (ecntrl & ECNTRL_REDUCED_MODE)
+ return PHY_INTERFACE_MODE_RTBI;
+ else
+ return PHY_INTERFACE_MODE_TBI;
+ }
+
+ if (ecntrl & ECNTRL_REDUCED_MODE) {
+ if (ecntrl & ECNTRL_REDUCED_MII_MODE) {
+ return PHY_INTERFACE_MODE_RMII;
+ }
+ else {
+ phy_interface_t interface = priv->interface;
+
+ /* This isn't autodetected right now, so it must
+ * be set by the device tree or platform code.
+ */
+ if (interface == PHY_INTERFACE_MODE_RGMII_ID)
+ return PHY_INTERFACE_MODE_RGMII_ID;
+
+ return PHY_INTERFACE_MODE_RGMII;
+ }
+ }
+
+ if (priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT)
+ return PHY_INTERFACE_MODE_GMII;
+
+ return PHY_INTERFACE_MODE_MII;
+}
+
static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
{
const char *model;
- const char *ctype;
const void *mac_addr;
int err = 0, i;
struct net_device *dev = NULL;
@@ -889,13 +801,15 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
FSL_GIANFAR_DEV_HAS_TIMER |
FSL_GIANFAR_DEV_HAS_RX_FILER;
- err = of_property_read_string(np, "phy-connection-type", &ctype);
-
- /* We only care about rgmii-id. The rest are autodetected */
- if (err == 0 && !strcmp(ctype, "rgmii-id"))
- priv->interface = PHY_INTERFACE_MODE_RGMII_ID;
+ /* Use PHY connection type from the DT node if one is specified there.
+ * rgmii-id really needs to be specified. Other types can be
+ * detected by hardware
+ */
+ err = of_get_phy_mode(np);
+ if (err >= 0)
+ priv->interface = err;
else
- priv->interface = PHY_INTERFACE_MODE_MII;
+ priv->interface = gfar_get_interface(dev);
if (of_find_property(np, "fsl,magic-packet", NULL))
priv->device_flags |= FSL_GIANFAR_DEV_HAS_MAGIC_PACKET;
@@ -931,85 +845,6 @@ tx_alloc_failed:
return err;
}
-static int gfar_hwtstamp_set(struct net_device *netdev, struct ifreq *ifr)
-{
- struct hwtstamp_config config;
- struct gfar_private *priv = netdev_priv(netdev);
-
- if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
- return -EFAULT;
-
- /* reserved for future extensions */
- if (config.flags)
- return -EINVAL;
-
- switch (config.tx_type) {
- case HWTSTAMP_TX_OFF:
- priv->hwts_tx_en = 0;
- break;
- case HWTSTAMP_TX_ON:
- if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER))
- return -ERANGE;
- priv->hwts_tx_en = 1;
- break;
- default:
- return -ERANGE;
- }
-
- switch (config.rx_filter) {
- case HWTSTAMP_FILTER_NONE:
- if (priv->hwts_rx_en) {
- priv->hwts_rx_en = 0;
- reset_gfar(netdev);
- }
- break;
- default:
- if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER))
- return -ERANGE;
- if (!priv->hwts_rx_en) {
- priv->hwts_rx_en = 1;
- reset_gfar(netdev);
- }
- config.rx_filter = HWTSTAMP_FILTER_ALL;
- break;
- }
-
- return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
- -EFAULT : 0;
-}
-
-static int gfar_hwtstamp_get(struct net_device *netdev, struct ifreq *ifr)
-{
- struct hwtstamp_config config;
- struct gfar_private *priv = netdev_priv(netdev);
-
- config.flags = 0;
- config.tx_type = priv->hwts_tx_en ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
- config.rx_filter = (priv->hwts_rx_en ?
- HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE);
-
- return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
- -EFAULT : 0;
-}
-
-static int gfar_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-{
- struct phy_device *phydev = dev->phydev;
-
- if (!netif_running(dev))
- return -EINVAL;
-
- if (cmd == SIOCSHWTSTAMP)
- return gfar_hwtstamp_set(dev, rq);
- if (cmd == SIOCGHWTSTAMP)
- return gfar_hwtstamp_get(dev, rq);
-
- if (!phydev)
- return -ENODEV;
-
- return phy_mii_ioctl(phydev, rq, cmd);
-}
-
static u32 cluster_entry_per_class(struct gfar_private *priv, u32 rqfar,
u32 class)
{
@@ -1133,135 +968,6 @@ static void gfar_detect_errata(struct gfar_private *priv)
priv->errata);
}
-void gfar_mac_reset(struct gfar_private *priv)
-{
- struct gfar __iomem *regs = priv->gfargrp[0].regs;
- u32 tempval;
-
- /* Reset MAC layer */
- gfar_write(&regs->maccfg1, MACCFG1_SOFT_RESET);
-
- /* We need to delay at least 3 TX clocks */
- udelay(3);
-
- /* the soft reset bit is not self-resetting, so we need to
- * clear it before resuming normal operation
- */
- gfar_write(&regs->maccfg1, 0);
-
- udelay(3);
-
- gfar_rx_offload_en(priv);
-
- /* Initialize the max receive frame/buffer lengths */
- gfar_write(&regs->maxfrm, GFAR_JUMBO_FRAME_SIZE);
- gfar_write(&regs->mrblr, GFAR_RXB_SIZE);
-
- /* Initialize the Minimum Frame Length Register */
- gfar_write(&regs->minflr, MINFLR_INIT_SETTINGS);
-
- /* Initialize MACCFG2. */
- tempval = MACCFG2_INIT_SETTINGS;
-
- /* eTSEC74 erratum: Rx frames of length MAXFRM or MAXFRM-1
- * are marked as truncated. Avoid this by MACCFG2[Huge Frame]=1,
- * and by checking RxBD[LG] and discarding larger than MAXFRM.
- */
- if (gfar_has_errata(priv, GFAR_ERRATA_74))
- tempval |= MACCFG2_HUGEFRAME | MACCFG2_LENGTHCHECK;
-
- gfar_write(&regs->maccfg2, tempval);
-
- /* Clear mac addr hash registers */
- gfar_write(&regs->igaddr0, 0);
- gfar_write(&regs->igaddr1, 0);
- gfar_write(&regs->igaddr2, 0);
- gfar_write(&regs->igaddr3, 0);
- gfar_write(&regs->igaddr4, 0);
- gfar_write(&regs->igaddr5, 0);
- gfar_write(&regs->igaddr6, 0);
- gfar_write(&regs->igaddr7, 0);
-
- gfar_write(&regs->gaddr0, 0);
- gfar_write(&regs->gaddr1, 0);
- gfar_write(&regs->gaddr2, 0);
- gfar_write(&regs->gaddr3, 0);
- gfar_write(&regs->gaddr4, 0);
- gfar_write(&regs->gaddr5, 0);
- gfar_write(&regs->gaddr6, 0);
- gfar_write(&regs->gaddr7, 0);
-
- if (priv->extended_hash)
- gfar_clear_exact_match(priv->ndev);
-
- gfar_mac_rx_config(priv);
-
- gfar_mac_tx_config(priv);
-
- gfar_set_mac_address(priv->ndev);
-
- gfar_set_multi(priv->ndev);
-
- /* clear ievent and imask before configuring coalescing */
- gfar_ints_disable(priv);
-
- /* Configure the coalescing support */
- gfar_configure_coalescing_all(priv);
-}
-
-static void gfar_hw_init(struct gfar_private *priv)
-{
- struct gfar __iomem *regs = priv->gfargrp[0].regs;
- u32 attrs;
-
- /* Stop the DMA engine now, in case it was running before
- * (The firmware could have used it, and left it running).
- */
- gfar_halt(priv);
-
- gfar_mac_reset(priv);
-
- /* Zero out the rmon mib registers if it has them */
- if (priv->device_flags & FSL_GIANFAR_DEV_HAS_RMON) {
- memset_io(&(regs->rmon), 0, sizeof(struct rmon_mib));
-
- /* Mask off the CAM interrupts */
- gfar_write(&regs->rmon.cam1, 0xffffffff);
- gfar_write(&regs->rmon.cam2, 0xffffffff);
- }
-
- /* Initialize ECNTRL */
- gfar_write(&regs->ecntrl, ECNTRL_INIT_SETTINGS);
-
- /* Set the extraction length and index */
- attrs = ATTRELI_EL(priv->rx_stash_size) |
- ATTRELI_EI(priv->rx_stash_index);
-
- gfar_write(&regs->attreli, attrs);
-
- /* Start with defaults, and add stashing
- * depending on driver parameters
- */
- attrs = ATTR_INIT_SETTINGS;
-
- if (priv->bd_stash_en)
- attrs |= ATTR_BDSTASH;
-
- if (priv->rx_stash_size != 0)
- attrs |= ATTR_BUFSTASH;
-
- gfar_write(&regs->attr, attrs);
-
- /* FIFO configs */
- gfar_write(&regs->fifo_tx_thr, DEFAULT_FIFO_TX_THR);
- gfar_write(&regs->fifo_tx_starve, DEFAULT_FIFO_TX_STARVE);
- gfar_write(&regs->fifo_tx_starve_shutoff, DEFAULT_FIFO_TX_STARVE_OFF);
-
- /* Program the interrupt steering regs, only for MG devices */
- if (priv->num_grps > 1)
- gfar_write_isrg(priv);
-}
-
static void gfar_init_addr_hash_table(struct gfar_private *priv)
{
struct gfar __iomem *regs = priv->gfargrp[0].regs;
@@ -1302,578 +1008,6 @@ static void gfar_init_addr_hash_table(struct gfar_private *priv)
}
}
-/* Set up the ethernet device structure, private data,
- * and anything else we need before we start
- */
-static int gfar_probe(struct platform_device *ofdev)
-{
- struct device_node *np = ofdev->dev.of_node;
- struct net_device *dev = NULL;
- struct gfar_private *priv = NULL;
- int err = 0, i;
-
- err = gfar_of_init(ofdev, &dev);
-
- if (err)
- return err;
-
- priv = netdev_priv(dev);
- priv->ndev = dev;
- priv->ofdev = ofdev;
- priv->dev = &ofdev->dev;
- SET_NETDEV_DEV(dev, &ofdev->dev);
-
- INIT_WORK(&priv->reset_task, gfar_reset_task);
-
- platform_set_drvdata(ofdev, priv);
-
- gfar_detect_errata(priv);
-
- /* Set the dev->base_addr to the gfar reg region */
- dev->base_addr = (unsigned long) priv->gfargrp[0].regs;
-
- /* Fill in the dev structure */
- dev->watchdog_timeo = TX_TIMEOUT;
- /* MTU range: 50 - 9586 */
- dev->mtu = 1500;
- dev->min_mtu = 50;
- dev->max_mtu = GFAR_JUMBO_FRAME_SIZE - ETH_HLEN;
- dev->netdev_ops = &gfar_netdev_ops;
- dev->ethtool_ops = &gfar_ethtool_ops;
-
- /* Register for napi ...We are registering NAPI for each grp */
- for (i = 0; i < priv->num_grps; i++) {
- if (priv->poll_mode == GFAR_SQ_POLLING) {
- netif_napi_add(dev, &priv->gfargrp[i].napi_rx,
- gfar_poll_rx_sq, GFAR_DEV_WEIGHT);
- netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx,
- gfar_poll_tx_sq, 2);
- } else {
- netif_napi_add(dev, &priv->gfargrp[i].napi_rx,
- gfar_poll_rx, GFAR_DEV_WEIGHT);
- netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx,
- gfar_poll_tx, 2);
- }
- }
-
- if (priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM) {
- dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG |
- NETIF_F_RXCSUM;
- dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG |
- NETIF_F_RXCSUM | NETIF_F_HIGHDMA;
- }
-
- if (priv->device_flags & FSL_GIANFAR_DEV_HAS_VLAN) {
- dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX |
- NETIF_F_HW_VLAN_CTAG_RX;
- dev->features |= NETIF_F_HW_VLAN_CTAG_RX;
- }
-
- dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
-
- gfar_init_addr_hash_table(priv);
-
- /* Insert receive time stamps into padding alignment bytes, and
- * plus 2 bytes padding to ensure the cpu alignment.
- */
- if (priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER)
- priv->padding = 8 + DEFAULT_PADDING;
-
- if (dev->features & NETIF_F_IP_CSUM ||
- priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER)
- dev->needed_headroom = GMAC_FCB_LEN;
-
- /* Initializing some of the rx/tx queue level parameters */
- for (i = 0; i < priv->num_tx_queues; i++) {
- priv->tx_queue[i]->tx_ring_size = DEFAULT_TX_RING_SIZE;
- priv->tx_queue[i]->num_txbdfree = DEFAULT_TX_RING_SIZE;
- priv->tx_queue[i]->txcoalescing = DEFAULT_TX_COALESCE;
- priv->tx_queue[i]->txic = DEFAULT_TXIC;
- }
-
- for (i = 0; i < priv->num_rx_queues; i++) {
- priv->rx_queue[i]->rx_ring_size = DEFAULT_RX_RING_SIZE;
- priv->rx_queue[i]->rxcoalescing = DEFAULT_RX_COALESCE;
- priv->rx_queue[i]->rxic = DEFAULT_RXIC;
- }
-
- /* Always enable rx filer if available */
- priv->rx_filer_enable =
- (priv->device_flags & FSL_GIANFAR_DEV_HAS_RX_FILER) ? 1 : 0;
- /* Enable most messages by default */
- priv->msg_enable = (NETIF_MSG_IFUP << 1 ) - 1;
- /* use pritority h/w tx queue scheduling for single queue devices */
- if (priv->num_tx_queues == 1)
- priv->prio_sched_en = 1;
-
- set_bit(GFAR_DOWN, &priv->state);
-
- gfar_hw_init(priv);
-
- /* Carrier starts down, phylib will bring it up */
- netif_carrier_off(dev);
-
- err = register_netdev(dev);
-
- if (err) {
- pr_err("%s: Cannot register net device, aborting\n", dev->name);
- goto register_fail;
- }
-
- if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET)
- priv->wol_supported |= GFAR_WOL_MAGIC;
-
- if ((priv->device_flags & FSL_GIANFAR_DEV_HAS_WAKE_ON_FILER) &&
- priv->rx_filer_enable)
- priv->wol_supported |= GFAR_WOL_FILER_UCAST;
-
- device_set_wakeup_capable(&ofdev->dev, priv->wol_supported);
-
- /* fill out IRQ number and name fields */
- for (i = 0; i < priv->num_grps; i++) {
- struct gfar_priv_grp *grp = &priv->gfargrp[i];
- if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
- sprintf(gfar_irq(grp, TX)->name, "%s%s%c%s",
- dev->name, "_g", '0' + i, "_tx");
- sprintf(gfar_irq(grp, RX)->name, "%s%s%c%s",
- dev->name, "_g", '0' + i, "_rx");
- sprintf(gfar_irq(grp, ER)->name, "%s%s%c%s",
- dev->name, "_g", '0' + i, "_er");
- } else
- strcpy(gfar_irq(grp, TX)->name, dev->name);
- }
-
- /* Initialize the filer table */
- gfar_init_filer_table(priv);
-
- /* Print out the device info */
- netdev_info(dev, "mac: %pM\n", dev->dev_addr);
-
- /* Even more device info helps when determining which kernel
- * provided which set of benchmarks.
- */
- netdev_info(dev, "Running with NAPI enabled\n");
- for (i = 0; i < priv->num_rx_queues; i++)
- netdev_info(dev, "RX BD ring size for Q[%d]: %d\n",
- i, priv->rx_queue[i]->rx_ring_size);
- for (i = 0; i < priv->num_tx_queues; i++)
- netdev_info(dev, "TX BD ring size for Q[%d]: %d\n",
- i, priv->tx_queue[i]->tx_ring_size);
-
- return 0;
-
-register_fail:
- if (of_phy_is_fixed_link(np))
- of_phy_deregister_fixed_link(np);
- unmap_group_regs(priv);
- gfar_free_rx_queues(priv);
- gfar_free_tx_queues(priv);
- of_node_put(priv->phy_node);
- of_node_put(priv->tbi_node);
- free_gfar_dev(priv);
- return err;
-}
-
-static int gfar_remove(struct platform_device *ofdev)
-{
- struct gfar_private *priv = platform_get_drvdata(ofdev);
- struct device_node *np = ofdev->dev.of_node;
-
- of_node_put(priv->phy_node);
- of_node_put(priv->tbi_node);
-
- unregister_netdev(priv->ndev);
-
- if (of_phy_is_fixed_link(np))
- of_phy_deregister_fixed_link(np);
-
- unmap_group_regs(priv);
- gfar_free_rx_queues(priv);
- gfar_free_tx_queues(priv);
- free_gfar_dev(priv);
-
- return 0;
-}
-
-#ifdef CONFIG_PM
-
-static void __gfar_filer_disable(struct gfar_private *priv)
-{
- struct gfar __iomem *regs = priv->gfargrp[0].regs;
- u32 temp;
-
- temp = gfar_read(&regs->rctrl);
- temp &= ~(RCTRL_FILREN | RCTRL_PRSDEP_INIT);
- gfar_write(&regs->rctrl, temp);
-}
-
-static void __gfar_filer_enable(struct gfar_private *priv)
-{
- struct gfar __iomem *regs = priv->gfargrp[0].regs;
- u32 temp;
-
- temp = gfar_read(&regs->rctrl);
- temp |= RCTRL_FILREN | RCTRL_PRSDEP_INIT;
- gfar_write(&regs->rctrl, temp);
-}
-
-/* Filer rules implementing wol capabilities */
-static void gfar_filer_config_wol(struct gfar_private *priv)
-{
- unsigned int i;
- u32 rqfcr;
-
- __gfar_filer_disable(priv);
-
- /* clear the filer table, reject any packet by default */
- rqfcr = RQFCR_RJE | RQFCR_CMP_MATCH;
- for (i = 0; i <= MAX_FILER_IDX; i++)
- gfar_write_filer(priv, i, rqfcr, 0);
-
- i = 0;
- if (priv->wol_opts & GFAR_WOL_FILER_UCAST) {
- /* unicast packet, accept it */
- struct net_device *ndev = priv->ndev;
- /* get the default rx queue index */
- u8 qindex = (u8)priv->gfargrp[0].rx_queue->qindex;
- u32 dest_mac_addr = (ndev->dev_addr[0] << 16) |
- (ndev->dev_addr[1] << 8) |
- ndev->dev_addr[2];
-
- rqfcr = (qindex << 10) | RQFCR_AND |
- RQFCR_CMP_EXACT | RQFCR_PID_DAH;
-
- gfar_write_filer(priv, i++, rqfcr, dest_mac_addr);
-
- dest_mac_addr = (ndev->dev_addr[3] << 16) |
- (ndev->dev_addr[4] << 8) |
- ndev->dev_addr[5];
- rqfcr = (qindex << 10) | RQFCR_GPI |
- RQFCR_CMP_EXACT | RQFCR_PID_DAL;
- gfar_write_filer(priv, i++, rqfcr, dest_mac_addr);
- }
-
- __gfar_filer_enable(priv);
-}
-
-static void gfar_filer_restore_table(struct gfar_private *priv)
-{
- u32 rqfcr, rqfpr;
- unsigned int i;
-
- __gfar_filer_disable(priv);
-
- for (i = 0; i <= MAX_FILER_IDX; i++) {
- rqfcr = priv->ftp_rqfcr[i];
- rqfpr = priv->ftp_rqfpr[i];
- gfar_write_filer(priv, i, rqfcr, rqfpr);
- }
-
- __gfar_filer_enable(priv);
-}
-
-/* gfar_start() for Rx only and with the FGPI filer interrupt enabled */
-static void gfar_start_wol_filer(struct gfar_private *priv)
-{
- struct gfar __iomem *regs = priv->gfargrp[0].regs;
- u32 tempval;
- int i = 0;
-
- /* Enable Rx hw queues */
- gfar_write(&regs->rqueue, priv->rqueue);
-
- /* Initialize DMACTRL to have WWR and WOP */
- tempval = gfar_read(&regs->dmactrl);
- tempval |= DMACTRL_INIT_SETTINGS;
- gfar_write(&regs->dmactrl, tempval);
-
- /* Make sure we aren't stopped */
- tempval = gfar_read(&regs->dmactrl);
- tempval &= ~DMACTRL_GRS;
- gfar_write(&regs->dmactrl, tempval);
-
- for (i = 0; i < priv->num_grps; i++) {
- regs = priv->gfargrp[i].regs;
- /* Clear RHLT, so that the DMA starts polling now */
- gfar_write(&regs->rstat, priv->gfargrp[i].rstat);
- /* enable the Filer General Purpose Interrupt */
- gfar_write(&regs->imask, IMASK_FGPI);
- }
-
- /* Enable Rx DMA */
- tempval = gfar_read(&regs->maccfg1);
- tempval |= MACCFG1_RX_EN;
- gfar_write(&regs->maccfg1, tempval);
-}
-
-static int gfar_suspend(struct device *dev)
-{
- struct gfar_private *priv = dev_get_drvdata(dev);
- struct net_device *ndev = priv->ndev;
- struct gfar __iomem *regs = priv->gfargrp[0].regs;
- u32 tempval;
- u16 wol = priv->wol_opts;
-
- if (!netif_running(ndev))
- return 0;
-
- disable_napi(priv);
- netif_tx_lock(ndev);
- netif_device_detach(ndev);
- netif_tx_unlock(ndev);
-
- gfar_halt(priv);
-
- if (wol & GFAR_WOL_MAGIC) {
- /* Enable interrupt on Magic Packet */
- gfar_write(&regs->imask, IMASK_MAG);
-
- /* Enable Magic Packet mode */
- tempval = gfar_read(&regs->maccfg2);
- tempval |= MACCFG2_MPEN;
- gfar_write(&regs->maccfg2, tempval);
-
- /* re-enable the Rx block */
- tempval = gfar_read(&regs->maccfg1);
- tempval |= MACCFG1_RX_EN;
- gfar_write(&regs->maccfg1, tempval);
-
- } else if (wol & GFAR_WOL_FILER_UCAST) {
- gfar_filer_config_wol(priv);
- gfar_start_wol_filer(priv);
-
- } else {
- phy_stop(ndev->phydev);
- }
-
- return 0;
-}
-
-static int gfar_resume(struct device *dev)
-{
- struct gfar_private *priv = dev_get_drvdata(dev);
- struct net_device *ndev = priv->ndev;
- struct gfar __iomem *regs = priv->gfargrp[0].regs;
- u32 tempval;
- u16 wol = priv->wol_opts;
-
- if (!netif_running(ndev))
- return 0;
-
- if (wol & GFAR_WOL_MAGIC) {
- /* Disable Magic Packet mode */
- tempval = gfar_read(&regs->maccfg2);
- tempval &= ~MACCFG2_MPEN;
- gfar_write(&regs->maccfg2, tempval);
-
- } else if (wol & GFAR_WOL_FILER_UCAST) {
- /* need to stop rx only, tx is already down */
- gfar_halt(priv);
- gfar_filer_restore_table(priv);
-
- } else {
- phy_start(ndev->phydev);
- }
-
- gfar_start(priv);
-
- netif_device_attach(ndev);
- enable_napi(priv);
-
- return 0;
-}
-
-static int gfar_restore(struct device *dev)
-{
- struct gfar_private *priv = dev_get_drvdata(dev);
- struct net_device *ndev = priv->ndev;
-
- if (!netif_running(ndev)) {
- netif_device_attach(ndev);
-
- return 0;
- }
-
- gfar_init_bds(ndev);
-
- gfar_mac_reset(priv);
-
- gfar_init_tx_rx_base(priv);
-
- gfar_start(priv);
-
- priv->oldlink = 0;
- priv->oldspeed = 0;
- priv->oldduplex = -1;
-
- if (ndev->phydev)
- phy_start(ndev->phydev);
-
- netif_device_attach(ndev);
- enable_napi(priv);
-
- return 0;
-}
-
-static const struct dev_pm_ops gfar_pm_ops = {
- .suspend = gfar_suspend,
- .resume = gfar_resume,
- .freeze = gfar_suspend,
- .thaw = gfar_resume,
- .restore = gfar_restore,
-};
-
-#define GFAR_PM_OPS (&gfar_pm_ops)
-
-#else
-
-#define GFAR_PM_OPS NULL
-
-#endif
-
-/* Reads the controller's registers to determine what interface
- * connects it to the PHY.
- */
-static phy_interface_t gfar_get_interface(struct net_device *dev)
-{
- struct gfar_private *priv = netdev_priv(dev);
- struct gfar __iomem *regs = priv->gfargrp[0].regs;
- u32 ecntrl;
-
- ecntrl = gfar_read(&regs->ecntrl);
-
- if (ecntrl & ECNTRL_SGMII_MODE)
- return PHY_INTERFACE_MODE_SGMII;
-
- if (ecntrl & ECNTRL_TBI_MODE) {
- if (ecntrl & ECNTRL_REDUCED_MODE)
- return PHY_INTERFACE_MODE_RTBI;
- else
- return PHY_INTERFACE_MODE_TBI;
- }
-
- if (ecntrl & ECNTRL_REDUCED_MODE) {
- if (ecntrl & ECNTRL_REDUCED_MII_MODE) {
- return PHY_INTERFACE_MODE_RMII;
- }
- else {
- phy_interface_t interface = priv->interface;
-
- /* This isn't autodetected right now, so it must
- * be set by the device tree or platform code.
- */
- if (interface == PHY_INTERFACE_MODE_RGMII_ID)
- return PHY_INTERFACE_MODE_RGMII_ID;
-
- return PHY_INTERFACE_MODE_RGMII;
- }
- }
-
- if (priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT)
- return PHY_INTERFACE_MODE_GMII;
-
- return PHY_INTERFACE_MODE_MII;
-}
-
-
-/* Initializes driver's PHY state, and attaches to the PHY.
- * Returns 0 on success.
- */
-static int init_phy(struct net_device *dev)
-{
- __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
- struct gfar_private *priv = netdev_priv(dev);
- phy_interface_t interface;
- struct phy_device *phydev;
- struct ethtool_eee edata;
-
- linkmode_set_bit_array(phy_10_100_features_array,
- ARRAY_SIZE(phy_10_100_features_array),
- mask);
- linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, mask);
- linkmode_set_bit(ETHTOOL_LINK_MODE_MII_BIT, mask);
- if (priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT)
- linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, mask);
-
- priv->oldlink = 0;
- priv->oldspeed = 0;
- priv->oldduplex = -1;
-
- interface = gfar_get_interface(dev);
-
- phydev = of_phy_connect(dev, priv->phy_node, &adjust_link, 0,
- interface);
- if (!phydev) {
- dev_err(&dev->dev, "could not attach to PHY\n");
- return -ENODEV;
- }
-
- if (interface == PHY_INTERFACE_MODE_SGMII)
- gfar_configure_serdes(dev);
-
- /* Remove any features not supported by the controller */
- linkmode_and(phydev->supported, phydev->supported, mask);
- linkmode_copy(phydev->advertising, phydev->supported);
-
- /* Add support for flow control */
- phy_support_asym_pause(phydev);
-
- /* disable EEE autoneg, EEE not supported by eTSEC */
- memset(&edata, 0, sizeof(struct ethtool_eee));
- phy_ethtool_set_eee(phydev, &edata);
-
- return 0;
-}
-
-/* Initialize TBI PHY interface for communicating with the
- * SERDES lynx PHY on the chip. We communicate with this PHY
- * through the MDIO bus on each controller, treating it as a
- * "normal" PHY at the address found in the TBIPA register. We assume
- * that the TBIPA register is valid. Either the MDIO bus code will set
- * it to a value that doesn't conflict with other PHYs on the bus, or the
- * value doesn't matter, as there are no other PHYs on the bus.
- */
-static void gfar_configure_serdes(struct net_device *dev)
-{
- struct gfar_private *priv = netdev_priv(dev);
- struct phy_device *tbiphy;
-
- if (!priv->tbi_node) {
- dev_warn(&dev->dev, "error: SGMII mode requires that the "
- "device tree specify a tbi-handle\n");
- return;
- }
-
- tbiphy = of_phy_find_device(priv->tbi_node);
- if (!tbiphy) {
- dev_err(&dev->dev, "error: Could not get TBI device\n");
- return;
- }
-
- /* If the link is already up, we must already be ok, and don't need to
- * configure and reset the TBI<->SerDes link. Maybe U-Boot configured
- * everything for us? Resetting it takes the link down and requires
- * several seconds for it to come back.
- */
- if (phy_read(tbiphy, MII_BMSR) & BMSR_LSTATUS) {
- put_device(&tbiphy->mdio.dev);
- return;
- }
-
- /* Single clk mode, mii mode off(for serdes communication) */
- phy_write(tbiphy, MII_TBICON, TBICON_CLK_SELECT);
-
- phy_write(tbiphy, MII_ADVERTISE,
- ADVERTISE_1000XFULL | ADVERTISE_1000XPAUSE |
- ADVERTISE_1000XPSE_ASYM);
-
- phy_write(tbiphy, MII_BMCR,
- BMCR_ANENABLE | BMCR_ANRESTART | BMCR_FULLDPLX |
- BMCR_SPEED1000);
-
- put_device(&tbiphy->mdio.dev);
-}
-
static int __gfar_is_rx_idle(struct gfar_private *priv)
{
u32 res;
@@ -1930,7 +1064,7 @@ retry:
}
/* Halt the receive and transmit queues */
-void gfar_halt(struct gfar_private *priv)
+static void gfar_halt(struct gfar_private *priv)
{
struct gfar __iomem *regs = priv->gfargrp[0].regs;
u32 tempval;
@@ -1949,26 +1083,6 @@ void gfar_halt(struct gfar_private *priv)
gfar_write(&regs->maccfg1, tempval);
}
-void stop_gfar(struct net_device *dev)
-{
- struct gfar_private *priv = netdev_priv(dev);
-
- netif_tx_stop_all_queues(dev);
-
- smp_mb__before_atomic();
- set_bit(GFAR_DOWN, &priv->state);
- smp_mb__after_atomic();
-
- disable_napi(priv);
-
- /* disable ints and gracefully shut down Rx/Tx DMA */
- gfar_halt(priv);
-
- phy_stop(dev->phydev);
-
- free_skb_resources(priv);
-}
-
static void free_skb_tx_queue(struct gfar_priv_tx_q *tx_queue)
{
struct txbd8 *txbdp;
@@ -2061,7 +1175,27 @@ static void free_skb_resources(struct gfar_private *priv)
priv->tx_queue[0]->tx_bd_dma_base);
}
-void gfar_start(struct gfar_private *priv)
+void stop_gfar(struct net_device *dev)
+{
+ struct gfar_private *priv = netdev_priv(dev);
+
+ netif_tx_stop_all_queues(dev);
+
+ smp_mb__before_atomic();
+ set_bit(GFAR_DOWN, &priv->state);
+ smp_mb__after_atomic();
+
+ disable_napi(priv);
+
+ /* disable ints and gracefully shut down Rx/Tx DMA */
+ gfar_halt(priv);
+
+ phy_stop(dev->phydev);
+
+ free_skb_resources(priv);
+}
+
+static void gfar_start(struct gfar_private *priv)
{
struct gfar __iomem *regs = priv->gfargrp[0].regs;
u32 tempval;
@@ -2098,103 +1232,207 @@ void gfar_start(struct gfar_private *priv)
netif_trans_update(priv->ndev); /* prevent tx timeout */
}
-static void free_grp_irqs(struct gfar_priv_grp *grp)
+static bool gfar_new_page(struct gfar_priv_rx_q *rxq, struct gfar_rx_buff *rxb)
{
- free_irq(gfar_irq(grp, TX)->irq, grp);
- free_irq(gfar_irq(grp, RX)->irq, grp);
- free_irq(gfar_irq(grp, ER)->irq, grp);
+ struct page *page;
+ dma_addr_t addr;
+
+ page = dev_alloc_page();
+ if (unlikely(!page))
+ return false;
+
+ addr = dma_map_page(rxq->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(rxq->dev, addr))) {
+ __free_page(page);
+
+ return false;
+ }
+
+ rxb->dma = addr;
+ rxb->page = page;
+ rxb->page_offset = 0;
+
+ return true;
}
-static int register_grp_irqs(struct gfar_priv_grp *grp)
+static void gfar_rx_alloc_err(struct gfar_priv_rx_q *rx_queue)
{
- struct gfar_private *priv = grp->priv;
- struct net_device *dev = priv->ndev;
- int err;
+ struct gfar_private *priv = netdev_priv(rx_queue->ndev);
+ struct gfar_extra_stats *estats = &priv->extra_stats;
- /* If the device has multiple interrupts, register for
- * them. Otherwise, only register for the one
- */
- if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
- /* Install our interrupt handlers for Error,
- * Transmit, and Receive
- */
- err = request_irq(gfar_irq(grp, ER)->irq, gfar_error, 0,
- gfar_irq(grp, ER)->name, grp);
- if (err < 0) {
- netif_err(priv, intr, dev, "Can't get IRQ %d\n",
- gfar_irq(grp, ER)->irq);
+ netdev_err(rx_queue->ndev, "Can't alloc RX buffers\n");
+ atomic64_inc(&estats->rx_alloc_err);
+}
- goto err_irq_fail;
- }
- enable_irq_wake(gfar_irq(grp, ER)->irq);
+static void gfar_alloc_rx_buffs(struct gfar_priv_rx_q *rx_queue,
+ int alloc_cnt)
+{
+ struct rxbd8 *bdp;
+ struct gfar_rx_buff *rxb;
+ int i;
- err = request_irq(gfar_irq(grp, TX)->irq, gfar_transmit, 0,
- gfar_irq(grp, TX)->name, grp);
- if (err < 0) {
- netif_err(priv, intr, dev, "Can't get IRQ %d\n",
- gfar_irq(grp, TX)->irq);
- goto tx_irq_fail;
+ i = rx_queue->next_to_use;
+ bdp = &rx_queue->rx_bd_base[i];
+ rxb = &rx_queue->rx_buff[i];
+
+ while (alloc_cnt--) {
+ /* try reuse page */
+ if (unlikely(!rxb->page)) {
+ if (unlikely(!gfar_new_page(rx_queue, rxb))) {
+ gfar_rx_alloc_err(rx_queue);
+ break;
+ }
}
- err = request_irq(gfar_irq(grp, RX)->irq, gfar_receive, 0,
- gfar_irq(grp, RX)->name, grp);
- if (err < 0) {
- netif_err(priv, intr, dev, "Can't get IRQ %d\n",
- gfar_irq(grp, RX)->irq);
- goto rx_irq_fail;
+
+ /* Setup the new RxBD */
+ gfar_init_rxbdp(rx_queue, bdp,
+ rxb->dma + rxb->page_offset + RXBUF_ALIGNMENT);
+
+ /* Update to the next pointer */
+ bdp++;
+ rxb++;
+
+ if (unlikely(++i == rx_queue->rx_ring_size)) {
+ i = 0;
+ bdp = rx_queue->rx_bd_base;
+ rxb = rx_queue->rx_buff;
}
- enable_irq_wake(gfar_irq(grp, RX)->irq);
+ }
- } else {
- err = request_irq(gfar_irq(grp, TX)->irq, gfar_interrupt, 0,
- gfar_irq(grp, TX)->name, grp);
- if (err < 0) {
- netif_err(priv, intr, dev, "Can't get IRQ %d\n",
- gfar_irq(grp, TX)->irq);
- goto err_irq_fail;
+ rx_queue->next_to_use = i;
+ rx_queue->next_to_alloc = i;
+}
+
+static void gfar_init_bds(struct net_device *ndev)
+{
+ struct gfar_private *priv = netdev_priv(ndev);
+ struct gfar __iomem *regs = priv->gfargrp[0].regs;
+ struct gfar_priv_tx_q *tx_queue = NULL;
+ struct gfar_priv_rx_q *rx_queue = NULL;
+ struct txbd8 *txbdp;
+ u32 __iomem *rfbptr;
+ int i, j;
+
+ for (i = 0; i < priv->num_tx_queues; i++) {
+ tx_queue = priv->tx_queue[i];
+ /* Initialize some variables in our dev structure */
+ tx_queue->num_txbdfree = tx_queue->tx_ring_size;
+ tx_queue->dirty_tx = tx_queue->tx_bd_base;
+ tx_queue->cur_tx = tx_queue->tx_bd_base;
+ tx_queue->skb_curtx = 0;
+ tx_queue->skb_dirtytx = 0;
+
+ /* Initialize Transmit Descriptor Ring */
+ txbdp = tx_queue->tx_bd_base;
+ for (j = 0; j < tx_queue->tx_ring_size; j++) {
+ txbdp->lstatus = 0;
+ txbdp->bufPtr = 0;
+ txbdp++;
}
- enable_irq_wake(gfar_irq(grp, TX)->irq);
+
+ /* Set the last descriptor in the ring to indicate wrap */
+ txbdp--;
+ txbdp->status = cpu_to_be16(be16_to_cpu(txbdp->status) |
+ TXBD_WRAP);
}
- return 0;
+ rfbptr = &regs->rfbptr0;
+ for (i = 0; i < priv->num_rx_queues; i++) {
+ rx_queue = priv->rx_queue[i];
-rx_irq_fail:
- free_irq(gfar_irq(grp, TX)->irq, grp);
-tx_irq_fail:
- free_irq(gfar_irq(grp, ER)->irq, grp);
-err_irq_fail:
- return err;
+ rx_queue->next_to_clean = 0;
+ rx_queue->next_to_use = 0;
+ rx_queue->next_to_alloc = 0;
+
+ /* make sure next_to_clean != next_to_use after this
+ * by leaving at least 1 unused descriptor
+ */
+ gfar_alloc_rx_buffs(rx_queue, gfar_rxbd_unused(rx_queue));
+ rx_queue->rfbptr = rfbptr;
+ rfbptr += 2;
+ }
}
-static void gfar_free_irq(struct gfar_private *priv)
+static int gfar_alloc_skb_resources(struct net_device *ndev)
{
- int i;
+ void *vaddr;
+ dma_addr_t addr;
+ int i, j;
+ struct gfar_private *priv = netdev_priv(ndev);
+ struct device *dev = priv->dev;
+ struct gfar_priv_tx_q *tx_queue = NULL;
+ struct gfar_priv_rx_q *rx_queue = NULL;
- /* Free the IRQs */
- if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
- for (i = 0; i < priv->num_grps; i++)
- free_grp_irqs(&priv->gfargrp[i]);
- } else {
- for (i = 0; i < priv->num_grps; i++)
- free_irq(gfar_irq(&priv->gfargrp[i], TX)->irq,
- &priv->gfargrp[i]);
+ priv->total_tx_ring_size = 0;
+ for (i = 0; i < priv->num_tx_queues; i++)
+ priv->total_tx_ring_size += priv->tx_queue[i]->tx_ring_size;
+
+ priv->total_rx_ring_size = 0;
+ for (i = 0; i < priv->num_rx_queues; i++)
+ priv->total_rx_ring_size += priv->rx_queue[i]->rx_ring_size;
+
+ /* Allocate memory for the buffer descriptors */
+ vaddr = dma_alloc_coherent(dev,
+ (priv->total_tx_ring_size *
+ sizeof(struct txbd8)) +
+ (priv->total_rx_ring_size *
+ sizeof(struct rxbd8)),
+ &addr, GFP_KERNEL);
+ if (!vaddr)
+ return -ENOMEM;
+
+ for (i = 0; i < priv->num_tx_queues; i++) {
+ tx_queue = priv->tx_queue[i];
+ tx_queue->tx_bd_base = vaddr;
+ tx_queue->tx_bd_dma_base = addr;
+ tx_queue->dev = ndev;
+ /* enet DMA only understands physical addresses */
+ addr += sizeof(struct txbd8) * tx_queue->tx_ring_size;
+ vaddr += sizeof(struct txbd8) * tx_queue->tx_ring_size;
}
-}
-static int gfar_request_irq(struct gfar_private *priv)
-{
- int err, i, j;
+ /* Start the rx descriptor ring where the tx ring leaves off */
+ for (i = 0; i < priv->num_rx_queues; i++) {
+ rx_queue = priv->rx_queue[i];
+ rx_queue->rx_bd_base = vaddr;
+ rx_queue->rx_bd_dma_base = addr;
+ rx_queue->ndev = ndev;
+ rx_queue->dev = dev;
+ addr += sizeof(struct rxbd8) * rx_queue->rx_ring_size;
+ vaddr += sizeof(struct rxbd8) * rx_queue->rx_ring_size;
+ }
- for (i = 0; i < priv->num_grps; i++) {
- err = register_grp_irqs(&priv->gfargrp[i]);
- if (err) {
- for (j = 0; j < i; j++)
- free_grp_irqs(&priv->gfargrp[j]);
- return err;
- }
+ /* Setup the skbuff rings */
+ for (i = 0; i < priv->num_tx_queues; i++) {
+ tx_queue = priv->tx_queue[i];
+ tx_queue->tx_skbuff =
+ kmalloc_array(tx_queue->tx_ring_size,
+ sizeof(*tx_queue->tx_skbuff),
+ GFP_KERNEL);
+ if (!tx_queue->tx_skbuff)
+ goto cleanup;
+
+ for (j = 0; j < tx_queue->tx_ring_size; j++)
+ tx_queue->tx_skbuff[j] = NULL;
}
+ for (i = 0; i < priv->num_rx_queues; i++) {
+ rx_queue = priv->rx_queue[i];
+ rx_queue->rx_buff = kcalloc(rx_queue->rx_ring_size,
+ sizeof(*rx_queue->rx_buff),
+ GFP_KERNEL);
+ if (!rx_queue->rx_buff)
+ goto cleanup;
+ }
+
+ gfar_init_bds(ndev);
+
return 0;
+
+cleanup:
+ free_skb_resources(priv);
+ return -ENOMEM;
}
/* Bring the controller up and running */
@@ -2232,27 +1470,245 @@ int startup_gfar(struct net_device *ndev)
return 0;
}
-/* Called when something needs to use the ethernet device
- * Returns 0 for success.
+static u32 gfar_get_flowctrl_cfg(struct gfar_private *priv)
+{
+ struct net_device *ndev = priv->ndev;
+ struct phy_device *phydev = ndev->phydev;
+ u32 val = 0;
+
+ if (!phydev->duplex)
+ return val;
+
+ if (!priv->pause_aneg_en) {
+ if (priv->tx_pause_en)
+ val |= MACCFG1_TX_FLOW;
+ if (priv->rx_pause_en)
+ val |= MACCFG1_RX_FLOW;
+ } else {
+ u16 lcl_adv, rmt_adv;
+ u8 flowctrl;
+ /* get link partner capabilities */
+ rmt_adv = 0;
+ if (phydev->pause)
+ rmt_adv = LPA_PAUSE_CAP;
+ if (phydev->asym_pause)
+ rmt_adv |= LPA_PAUSE_ASYM;
+
+ lcl_adv = linkmode_adv_to_lcl_adv_t(phydev->advertising);
+ flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
+ if (flowctrl & FLOW_CTRL_TX)
+ val |= MACCFG1_TX_FLOW;
+ if (flowctrl & FLOW_CTRL_RX)
+ val |= MACCFG1_RX_FLOW;
+ }
+
+ return val;
+}
+
+static noinline void gfar_update_link_state(struct gfar_private *priv)
+{
+ struct gfar __iomem *regs = priv->gfargrp[0].regs;
+ struct net_device *ndev = priv->ndev;
+ struct phy_device *phydev = ndev->phydev;
+ struct gfar_priv_rx_q *rx_queue = NULL;
+ int i;
+
+ if (unlikely(test_bit(GFAR_RESETTING, &priv->state)))
+ return;
+
+ if (phydev->link) {
+ u32 tempval1 = gfar_read(&regs->maccfg1);
+ u32 tempval = gfar_read(&regs->maccfg2);
+ u32 ecntrl = gfar_read(&regs->ecntrl);
+ u32 tx_flow_oldval = (tempval1 & MACCFG1_TX_FLOW);
+
+ if (phydev->duplex != priv->oldduplex) {
+ if (!(phydev->duplex))
+ tempval &= ~(MACCFG2_FULL_DUPLEX);
+ else
+ tempval |= MACCFG2_FULL_DUPLEX;
+
+ priv->oldduplex = phydev->duplex;
+ }
+
+ if (phydev->speed != priv->oldspeed) {
+ switch (phydev->speed) {
+ case 1000:
+ tempval =
+ ((tempval & ~(MACCFG2_IF)) | MACCFG2_GMII);
+
+ ecntrl &= ~(ECNTRL_R100);
+ break;
+ case 100:
+ case 10:
+ tempval =
+ ((tempval & ~(MACCFG2_IF)) | MACCFG2_MII);
+
+ /* Reduced mode distinguishes
+ * between 10 and 100
+ */
+ if (phydev->speed == SPEED_100)
+ ecntrl |= ECNTRL_R100;
+ else
+ ecntrl &= ~(ECNTRL_R100);
+ break;
+ default:
+ netif_warn(priv, link, priv->ndev,
+ "Ack! Speed (%d) is not 10/100/1000!\n",
+ phydev->speed);
+ break;
+ }
+
+ priv->oldspeed = phydev->speed;
+ }
+
+ tempval1 &= ~(MACCFG1_TX_FLOW | MACCFG1_RX_FLOW);
+ tempval1 |= gfar_get_flowctrl_cfg(priv);
+
+ /* Turn last free buffer recording on */
+ if ((tempval1 & MACCFG1_TX_FLOW) && !tx_flow_oldval) {
+ for (i = 0; i < priv->num_rx_queues; i++) {
+ u32 bdp_dma;
+
+ rx_queue = priv->rx_queue[i];
+ bdp_dma = gfar_rxbd_dma_lastfree(rx_queue);
+ gfar_write(rx_queue->rfbptr, bdp_dma);
+ }
+
+ priv->tx_actual_en = 1;
+ }
+
+ if (unlikely(!(tempval1 & MACCFG1_TX_FLOW) && tx_flow_oldval))
+ priv->tx_actual_en = 0;
+
+ gfar_write(&regs->maccfg1, tempval1);
+ gfar_write(&regs->maccfg2, tempval);
+ gfar_write(&regs->ecntrl, ecntrl);
+
+ if (!priv->oldlink)
+ priv->oldlink = 1;
+
+ } else if (priv->oldlink) {
+ priv->oldlink = 0;
+ priv->oldspeed = 0;
+ priv->oldduplex = -1;
+ }
+
+ if (netif_msg_link(priv))
+ phy_print_status(phydev);
+}
+
+/* Called every time the controller might need to be made
+ * aware of new link state. The PHY code conveys this
+ * information through variables in the phydev structure, and this
+ * function converts those variables into the appropriate
+ * register values, and can bring down the device if needed.
*/
-static int gfar_enet_open(struct net_device *dev)
+static void adjust_link(struct net_device *dev)
{
struct gfar_private *priv = netdev_priv(dev);
- int err;
+ struct phy_device *phydev = dev->phydev;
- err = init_phy(dev);
- if (err)
- return err;
+ if (unlikely(phydev->link != priv->oldlink ||
+ (phydev->link && (phydev->duplex != priv->oldduplex ||
+ phydev->speed != priv->oldspeed))))
+ gfar_update_link_state(priv);
+}
- err = gfar_request_irq(priv);
- if (err)
- return err;
+/* Initialize TBI PHY interface for communicating with the
+ * SERDES lynx PHY on the chip. We communicate with this PHY
+ * through the MDIO bus on each controller, treating it as a
+ * "normal" PHY at the address found in the TBIPA register. We assume
+ * that the TBIPA register is valid. Either the MDIO bus code will set
+ * it to a value that doesn't conflict with other PHYs on the bus, or the
+ * value doesn't matter, as there are no other PHYs on the bus.
+ */
+static void gfar_configure_serdes(struct net_device *dev)
+{
+ struct gfar_private *priv = netdev_priv(dev);
+ struct phy_device *tbiphy;
- err = startup_gfar(dev);
- if (err)
- return err;
+ if (!priv->tbi_node) {
+ dev_warn(&dev->dev, "error: SGMII mode requires that the "
+ "device tree specify a tbi-handle\n");
+ return;
+ }
- return err;
+ tbiphy = of_phy_find_device(priv->tbi_node);
+ if (!tbiphy) {
+ dev_err(&dev->dev, "error: Could not get TBI device\n");
+ return;
+ }
+
+ /* If the link is already up, we must already be ok, and don't need to
+ * configure and reset the TBI<->SerDes link. Maybe U-Boot configured
+ * everything for us? Resetting it takes the link down and requires
+ * several seconds for it to come back.
+ */
+ if (phy_read(tbiphy, MII_BMSR) & BMSR_LSTATUS) {
+ put_device(&tbiphy->mdio.dev);
+ return;
+ }
+
+ /* Single clk mode, mii mode off(for serdes communication) */
+ phy_write(tbiphy, MII_TBICON, TBICON_CLK_SELECT);
+
+ phy_write(tbiphy, MII_ADVERTISE,
+ ADVERTISE_1000XFULL | ADVERTISE_1000XPAUSE |
+ ADVERTISE_1000XPSE_ASYM);
+
+ phy_write(tbiphy, MII_BMCR,
+ BMCR_ANENABLE | BMCR_ANRESTART | BMCR_FULLDPLX |
+ BMCR_SPEED1000);
+
+ put_device(&tbiphy->mdio.dev);
+}
+
+/* Initializes driver's PHY state, and attaches to the PHY.
+ * Returns 0 on success.
+ */
+static int init_phy(struct net_device *dev)
+{
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+ struct gfar_private *priv = netdev_priv(dev);
+ phy_interface_t interface = priv->interface;
+ struct phy_device *phydev;
+ struct ethtool_eee edata;
+
+ linkmode_set_bit_array(phy_10_100_features_array,
+ ARRAY_SIZE(phy_10_100_features_array),
+ mask);
+ linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, mask);
+ linkmode_set_bit(ETHTOOL_LINK_MODE_MII_BIT, mask);
+ if (priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT)
+ linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, mask);
+
+ priv->oldlink = 0;
+ priv->oldspeed = 0;
+ priv->oldduplex = -1;
+
+ phydev = of_phy_connect(dev, priv->phy_node, &adjust_link, 0,
+ interface);
+ if (!phydev) {
+ dev_err(&dev->dev, "could not attach to PHY\n");
+ return -ENODEV;
+ }
+
+ if (interface == PHY_INTERFACE_MODE_SGMII)
+ gfar_configure_serdes(dev);
+
+ /* Remove any features not supported by the controller */
+ linkmode_and(phydev->supported, phydev->supported, mask);
+ linkmode_copy(phydev->advertising, phydev->supported);
+
+ /* Add support for flow control */
+ phy_support_asym_pause(phydev);
+
+ /* disable EEE autoneg, EEE not supported by eTSEC */
+ memset(&edata, 0, sizeof(struct ethtool_eee));
+ phy_ethtool_set_eee(phydev, &edata);
+
+ return 0;
}
static inline struct txfcb *gfar_add_fcb(struct sk_buff *skb)
@@ -2583,22 +2039,6 @@ dma_map_err:
return NETDEV_TX_OK;
}
-/* Stops the kernel queue, and halts the controller */
-static int gfar_close(struct net_device *dev)
-{
- struct gfar_private *priv = netdev_priv(dev);
-
- cancel_work_sync(&priv->reset_task);
- stop_gfar(dev);
-
- /* Disconnect from the PHY */
- phy_disconnect(dev->phydev);
-
- gfar_free_irq(priv);
-
- return 0;
-}
-
/* Changes the mac address if the controller is not running. */
static int gfar_set_mac_address(struct net_device *dev)
{
@@ -2660,6 +2100,85 @@ static void gfar_timeout(struct net_device *dev)
schedule_work(&priv->reset_task);
}
+static int gfar_hwtstamp_set(struct net_device *netdev, struct ifreq *ifr)
+{
+ struct hwtstamp_config config;
+ struct gfar_private *priv = netdev_priv(netdev);
+
+ if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+ return -EFAULT;
+
+ /* reserved for future extensions */
+ if (config.flags)
+ return -EINVAL;
+
+ switch (config.tx_type) {
+ case HWTSTAMP_TX_OFF:
+ priv->hwts_tx_en = 0;
+ break;
+ case HWTSTAMP_TX_ON:
+ if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER))
+ return -ERANGE;
+ priv->hwts_tx_en = 1;
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ switch (config.rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ if (priv->hwts_rx_en) {
+ priv->hwts_rx_en = 0;
+ reset_gfar(netdev);
+ }
+ break;
+ default:
+ if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER))
+ return -ERANGE;
+ if (!priv->hwts_rx_en) {
+ priv->hwts_rx_en = 1;
+ reset_gfar(netdev);
+ }
+ config.rx_filter = HWTSTAMP_FILTER_ALL;
+ break;
+ }
+
+ return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+ -EFAULT : 0;
+}
+
+static int gfar_hwtstamp_get(struct net_device *netdev, struct ifreq *ifr)
+{
+ struct hwtstamp_config config;
+ struct gfar_private *priv = netdev_priv(netdev);
+
+ config.flags = 0;
+ config.tx_type = priv->hwts_tx_en ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+ config.rx_filter = (priv->hwts_rx_en ?
+ HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE);
+
+ return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+ -EFAULT : 0;
+}
+
+static int gfar_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+ struct phy_device *phydev = dev->phydev;
+
+ if (!netif_running(dev))
+ return -EINVAL;
+
+ if (cmd == SIOCSHWTSTAMP)
+ return gfar_hwtstamp_set(dev, rq);
+ if (cmd == SIOCGHWTSTAMP)
+ return gfar_hwtstamp_get(dev, rq);
+
+ if (!phydev)
+ return -ENODEV;
+
+ return phy_mii_ioctl(phydev, rq, cmd);
+}
+
/* Interrupt Handler for Transmit complete */
static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
{
@@ -2767,77 +2286,6 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
netdev_tx_completed_queue(txq, howmany, bytes_sent);
}
-static bool gfar_new_page(struct gfar_priv_rx_q *rxq, struct gfar_rx_buff *rxb)
-{
- struct page *page;
- dma_addr_t addr;
-
- page = dev_alloc_page();
- if (unlikely(!page))
- return false;
-
- addr = dma_map_page(rxq->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
- if (unlikely(dma_mapping_error(rxq->dev, addr))) {
- __free_page(page);
-
- return false;
- }
-
- rxb->dma = addr;
- rxb->page = page;
- rxb->page_offset = 0;
-
- return true;
-}
-
-static void gfar_rx_alloc_err(struct gfar_priv_rx_q *rx_queue)
-{
- struct gfar_private *priv = netdev_priv(rx_queue->ndev);
- struct gfar_extra_stats *estats = &priv->extra_stats;
-
- netdev_err(rx_queue->ndev, "Can't alloc RX buffers\n");
- atomic64_inc(&estats->rx_alloc_err);
-}
-
-static void gfar_alloc_rx_buffs(struct gfar_priv_rx_q *rx_queue,
- int alloc_cnt)
-{
- struct rxbd8 *bdp;
- struct gfar_rx_buff *rxb;
- int i;
-
- i = rx_queue->next_to_use;
- bdp = &rx_queue->rx_bd_base[i];
- rxb = &rx_queue->rx_buff[i];
-
- while (alloc_cnt--) {
- /* try reuse page */
- if (unlikely(!rxb->page)) {
- if (unlikely(!gfar_new_page(rx_queue, rxb))) {
- gfar_rx_alloc_err(rx_queue);
- break;
- }
- }
-
- /* Setup the new RxBD */
- gfar_init_rxbdp(rx_queue, bdp,
- rxb->dma + rxb->page_offset + RXBUF_ALIGNMENT);
-
- /* Update to the next pointer */
- bdp++;
- rxb++;
-
- if (unlikely(++i == rx_queue->rx_ring_size)) {
- i = 0;
- bdp = rx_queue->rx_bd_base;
- rxb = rx_queue->rx_buff;
- }
- }
-
- rx_queue->next_to_use = i;
- rx_queue->next_to_alloc = i;
-}
-
static void count_errors(u32 lstatus, struct net_device *ndev)
{
struct gfar_private *priv = netdev_priv(ndev);
@@ -2875,7 +2323,7 @@ static void count_errors(u32 lstatus, struct net_device *ndev)
}
}
-irqreturn_t gfar_receive(int irq, void *grp_id)
+static irqreturn_t gfar_receive(int irq, void *grp_id)
{
struct gfar_priv_grp *grp = (struct gfar_priv_grp *)grp_id;
unsigned long flags;
@@ -3077,7 +2525,8 @@ static void gfar_process_frame(struct net_device *ndev, struct sk_buff *skb)
* until the budget/quota has been reached. Returns the number
* of frames handled
*/
-int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit)
+static int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue,
+ int rx_work_limit)
{
struct net_device *ndev = rx_queue->ndev;
struct gfar_private *priv = netdev_priv(ndev);
@@ -3327,6 +2776,98 @@ static int gfar_poll_tx(struct napi_struct *napi, int budget)
return 0;
}
+/* GFAR error interrupt handler */
+static irqreturn_t gfar_error(int irq, void *grp_id)
+{
+ struct gfar_priv_grp *gfargrp = grp_id;
+ struct gfar __iomem *regs = gfargrp->regs;
+ struct gfar_private *priv= gfargrp->priv;
+ struct net_device *dev = priv->ndev;
+
+ /* Save ievent for future reference */
+ u32 events = gfar_read(&regs->ievent);
+
+ /* Clear IEVENT */
+ gfar_write(&regs->ievent, events & IEVENT_ERR_MASK);
+
+ /* Magic Packet is not an error. */
+ if ((priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) &&
+ (events & IEVENT_MAG))
+ events &= ~IEVENT_MAG;
+
+ /* Hmm... */
+ if (netif_msg_rx_err(priv) || netif_msg_tx_err(priv))
+ netdev_dbg(dev,
+ "error interrupt (ievent=0x%08x imask=0x%08x)\n",
+ events, gfar_read(&regs->imask));
+
+ /* Update the error counters */
+ if (events & IEVENT_TXE) {
+ dev->stats.tx_errors++;
+
+ if (events & IEVENT_LC)
+ dev->stats.tx_window_errors++;
+ if (events & IEVENT_CRL)
+ dev->stats.tx_aborted_errors++;
+ if (events & IEVENT_XFUN) {
+ netif_dbg(priv, tx_err, dev,
+ "TX FIFO underrun, packet dropped\n");
+ dev->stats.tx_dropped++;
+ atomic64_inc(&priv->extra_stats.tx_underrun);
+
+ schedule_work(&priv->reset_task);
+ }
+ netif_dbg(priv, tx_err, dev, "Transmit Error\n");
+ }
+ if (events & IEVENT_BSY) {
+ dev->stats.rx_over_errors++;
+ atomic64_inc(&priv->extra_stats.rx_bsy);
+
+ netif_dbg(priv, rx_err, dev, "busy error (rstat: %x)\n",
+ gfar_read(&regs->rstat));
+ }
+ if (events & IEVENT_BABR) {
+ dev->stats.rx_errors++;
+ atomic64_inc(&priv->extra_stats.rx_babr);
+
+ netif_dbg(priv, rx_err, dev, "babbling RX error\n");
+ }
+ if (events & IEVENT_EBERR) {
+ atomic64_inc(&priv->extra_stats.eberr);
+ netif_dbg(priv, rx_err, dev, "bus error\n");
+ }
+ if (events & IEVENT_RXC)
+ netif_dbg(priv, rx_status, dev, "control frame\n");
+
+ if (events & IEVENT_BABT) {
+ atomic64_inc(&priv->extra_stats.tx_babt);
+ netif_dbg(priv, tx_err, dev, "babbling TX error\n");
+ }
+ return IRQ_HANDLED;
+}
+
+/* The interrupt handler for devices with one interrupt */
+static irqreturn_t gfar_interrupt(int irq, void *grp_id)
+{
+ struct gfar_priv_grp *gfargrp = grp_id;
+
+ /* Save ievent for future reference */
+ u32 events = gfar_read(&gfargrp->regs->ievent);
+
+ /* Check for reception */
+ if (events & IEVENT_RX_MASK)
+ gfar_receive(irq, grp_id);
+
+ /* Check for transmit completion */
+ if (events & IEVENT_TX_MASK)
+ gfar_transmit(irq, grp_id);
+
+ /* Check for errors */
+ if (events & IEVENT_ERR_MASK)
+ gfar_error(irq, grp_id);
+
+ return IRQ_HANDLED;
+}
#ifdef CONFIG_NET_POLL_CONTROLLER
/* Polling 'interrupt' - used by things like netconsole to send skbs
@@ -3363,44 +2904,154 @@ static void gfar_netpoll(struct net_device *dev)
}
#endif
-/* The interrupt handler for devices with one interrupt */
-static irqreturn_t gfar_interrupt(int irq, void *grp_id)
+static void free_grp_irqs(struct gfar_priv_grp *grp)
{
- struct gfar_priv_grp *gfargrp = grp_id;
+ free_irq(gfar_irq(grp, TX)->irq, grp);
+ free_irq(gfar_irq(grp, RX)->irq, grp);
+ free_irq(gfar_irq(grp, ER)->irq, grp);
+}
- /* Save ievent for future reference */
- u32 events = gfar_read(&gfargrp->regs->ievent);
+static int register_grp_irqs(struct gfar_priv_grp *grp)
+{
+ struct gfar_private *priv = grp->priv;
+ struct net_device *dev = priv->ndev;
+ int err;
- /* Check for reception */
- if (events & IEVENT_RX_MASK)
- gfar_receive(irq, grp_id);
+ /* If the device has multiple interrupts, register for
+ * them. Otherwise, only register for the one
+ */
+ if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
+ /* Install our interrupt handlers for Error,
+ * Transmit, and Receive
+ */
+ err = request_irq(gfar_irq(grp, ER)->irq, gfar_error, 0,
+ gfar_irq(grp, ER)->name, grp);
+ if (err < 0) {
+ netif_err(priv, intr, dev, "Can't get IRQ %d\n",
+ gfar_irq(grp, ER)->irq);
- /* Check for transmit completion */
- if (events & IEVENT_TX_MASK)
- gfar_transmit(irq, grp_id);
+ goto err_irq_fail;
+ }
+ enable_irq_wake(gfar_irq(grp, ER)->irq);
- /* Check for errors */
- if (events & IEVENT_ERR_MASK)
- gfar_error(irq, grp_id);
+ err = request_irq(gfar_irq(grp, TX)->irq, gfar_transmit, 0,
+ gfar_irq(grp, TX)->name, grp);
+ if (err < 0) {
+ netif_err(priv, intr, dev, "Can't get IRQ %d\n",
+ gfar_irq(grp, TX)->irq);
+ goto tx_irq_fail;
+ }
+ err = request_irq(gfar_irq(grp, RX)->irq, gfar_receive, 0,
+ gfar_irq(grp, RX)->name, grp);
+ if (err < 0) {
+ netif_err(priv, intr, dev, "Can't get IRQ %d\n",
+ gfar_irq(grp, RX)->irq);
+ goto rx_irq_fail;
+ }
+ enable_irq_wake(gfar_irq(grp, RX)->irq);
+
+ } else {
+ err = request_irq(gfar_irq(grp, TX)->irq, gfar_interrupt, 0,
+ gfar_irq(grp, TX)->name, grp);
+ if (err < 0) {
+ netif_err(priv, intr, dev, "Can't get IRQ %d\n",
+ gfar_irq(grp, TX)->irq);
+ goto err_irq_fail;
+ }
+ enable_irq_wake(gfar_irq(grp, TX)->irq);
+ }
+
+ return 0;
+
+rx_irq_fail:
+ free_irq(gfar_irq(grp, TX)->irq, grp);
+tx_irq_fail:
+ free_irq(gfar_irq(grp, ER)->irq, grp);
+err_irq_fail:
+ return err;
- return IRQ_HANDLED;
}
-/* Called every time the controller might need to be made
- * aware of new link state. The PHY code conveys this
- * information through variables in the phydev structure, and this
- * function converts those variables into the appropriate
- * register values, and can bring down the device if needed.
+static void gfar_free_irq(struct gfar_private *priv)
+{
+ int i;
+
+ /* Free the IRQs */
+ if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
+ for (i = 0; i < priv->num_grps; i++)
+ free_grp_irqs(&priv->gfargrp[i]);
+ } else {
+ for (i = 0; i < priv->num_grps; i++)
+ free_irq(gfar_irq(&priv->gfargrp[i], TX)->irq,
+ &priv->gfargrp[i]);
+ }
+}
+
+static int gfar_request_irq(struct gfar_private *priv)
+{
+ int err, i, j;
+
+ for (i = 0; i < priv->num_grps; i++) {
+ err = register_grp_irqs(&priv->gfargrp[i]);
+ if (err) {
+ for (j = 0; j < i; j++)
+ free_grp_irqs(&priv->gfargrp[j]);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+/* Called when something needs to use the ethernet device
+ * Returns 0 for success.
*/
-static void adjust_link(struct net_device *dev)
+static int gfar_enet_open(struct net_device *dev)
{
struct gfar_private *priv = netdev_priv(dev);
- struct phy_device *phydev = dev->phydev;
+ int err;
- if (unlikely(phydev->link != priv->oldlink ||
- (phydev->link && (phydev->duplex != priv->oldduplex ||
- phydev->speed != priv->oldspeed))))
- gfar_update_link_state(priv);
+ err = init_phy(dev);
+ if (err)
+ return err;
+
+ err = gfar_request_irq(priv);
+ if (err)
+ return err;
+
+ err = startup_gfar(dev);
+ if (err)
+ return err;
+
+ return err;
+}
+
+/* Stops the kernel queue, and halts the controller */
+static int gfar_close(struct net_device *dev)
+{
+ struct gfar_private *priv = netdev_priv(dev);
+
+ cancel_work_sync(&priv->reset_task);
+ stop_gfar(dev);
+
+ /* Disconnect from the PHY */
+ phy_disconnect(dev->phydev);
+
+ gfar_free_irq(priv);
+
+ return 0;
+}
+
+/* Clears each of the exact match registers to zero, so they
+ * don't interfere with normal reception
+ */
+static void gfar_clear_exact_match(struct net_device *dev)
+{
+ int idx;
+ static const u8 zero_arr[ETH_ALEN] = {0, 0, 0, 0, 0, 0};
+
+ for (idx = 1; idx < GFAR_EM_NUM + 1; idx++)
+ gfar_set_mac_for_addr(dev, idx, zero_arr);
}
/* Update the hash table based on the current list of multicast
@@ -3494,274 +3145,582 @@ static void gfar_set_multi(struct net_device *dev)
}
}
-
-/* Clears each of the exact match registers to zero, so they
- * don't interfere with normal reception
- */
-static void gfar_clear_exact_match(struct net_device *dev)
+void gfar_mac_reset(struct gfar_private *priv)
{
- int idx;
- static const u8 zero_arr[ETH_ALEN] = {0, 0, 0, 0, 0, 0};
+ struct gfar __iomem *regs = priv->gfargrp[0].regs;
+ u32 tempval;
- for (idx = 1; idx < GFAR_EM_NUM + 1; idx++)
- gfar_set_mac_for_addr(dev, idx, zero_arr);
-}
+ /* Reset MAC layer */
+ gfar_write(&regs->maccfg1, MACCFG1_SOFT_RESET);
-/* Set the appropriate hash bit for the given addr */
-/* The algorithm works like so:
- * 1) Take the Destination Address (ie the multicast address), and
- * do a CRC on it (little endian), and reverse the bits of the
- * result.
- * 2) Use the 8 most significant bits as a hash into a 256-entry
- * table. The table is controlled through 8 32-bit registers:
- * gaddr0-7. gaddr0's MSB is entry 0, and gaddr7's LSB is
- * gaddr7. This means that the 3 most significant bits in the
- * hash index which gaddr register to use, and the 5 other bits
- * indicate which bit (assuming an IBM numbering scheme, which
- * for PowerPC (tm) is usually the case) in the register holds
- * the entry.
- */
-static void gfar_set_hash_for_addr(struct net_device *dev, u8 *addr)
-{
- u32 tempval;
- struct gfar_private *priv = netdev_priv(dev);
- u32 result = ether_crc(ETH_ALEN, addr);
- int width = priv->hash_width;
- u8 whichbit = (result >> (32 - width)) & 0x1f;
- u8 whichreg = result >> (32 - width + 5);
- u32 value = (1 << (31-whichbit));
+ /* We need to delay at least 3 TX clocks */
+ udelay(3);
- tempval = gfar_read(priv->hash_regs[whichreg]);
- tempval |= value;
- gfar_write(priv->hash_regs[whichreg], tempval);
-}
+ /* the soft reset bit is not self-resetting, so we need to
+ * clear it before resuming normal operation
+ */
+ gfar_write(&regs->maccfg1, 0);
+ udelay(3);
-/* There are multiple MAC Address register pairs on some controllers
- * This function sets the numth pair to a given address
- */
-static void gfar_set_mac_for_addr(struct net_device *dev, int num,
- const u8 *addr)
+ gfar_rx_offload_en(priv);
+
+ /* Initialize the max receive frame/buffer lengths */
+ gfar_write(&regs->maxfrm, GFAR_JUMBO_FRAME_SIZE);
+ gfar_write(&regs->mrblr, GFAR_RXB_SIZE);
+
+ /* Initialize the Minimum Frame Length Register */
+ gfar_write(&regs->minflr, MINFLR_INIT_SETTINGS);
+
+ /* Initialize MACCFG2. */
+ tempval = MACCFG2_INIT_SETTINGS;
+
+ /* eTSEC74 erratum: Rx frames of length MAXFRM or MAXFRM-1
+ * are marked as truncated. Avoid this by MACCFG2[Huge Frame]=1,
+ * and by checking RxBD[LG] and discarding larger than MAXFRM.
+ */
+ if (gfar_has_errata(priv, GFAR_ERRATA_74))
+ tempval |= MACCFG2_HUGEFRAME | MACCFG2_LENGTHCHECK;
+
+ gfar_write(&regs->maccfg2, tempval);
+
+ /* Clear mac addr hash registers */
+ gfar_write(&regs->igaddr0, 0);
+ gfar_write(&regs->igaddr1, 0);
+ gfar_write(&regs->igaddr2, 0);
+ gfar_write(&regs->igaddr3, 0);
+ gfar_write(&regs->igaddr4, 0);
+ gfar_write(&regs->igaddr5, 0);
+ gfar_write(&regs->igaddr6, 0);
+ gfar_write(&regs->igaddr7, 0);
+
+ gfar_write(&regs->gaddr0, 0);
+ gfar_write(&regs->gaddr1, 0);
+ gfar_write(&regs->gaddr2, 0);
+ gfar_write(&regs->gaddr3, 0);
+ gfar_write(&regs->gaddr4, 0);
+ gfar_write(&regs->gaddr5, 0);
+ gfar_write(&regs->gaddr6, 0);
+ gfar_write(&regs->gaddr7, 0);
+
+ if (priv->extended_hash)
+ gfar_clear_exact_match(priv->ndev);
+
+ gfar_mac_rx_config(priv);
+
+ gfar_mac_tx_config(priv);
+
+ gfar_set_mac_address(priv->ndev);
+
+ gfar_set_multi(priv->ndev);
+
+ /* clear ievent and imask before configuring coalescing */
+ gfar_ints_disable(priv);
+
+ /* Configure the coalescing support */
+ gfar_configure_coalescing_all(priv);
+}
+
+static void gfar_hw_init(struct gfar_private *priv)
{
- struct gfar_private *priv = netdev_priv(dev);
struct gfar __iomem *regs = priv->gfargrp[0].regs;
- u32 tempval;
- u32 __iomem *macptr = &regs->macstnaddr1;
+ u32 attrs;
- macptr += num*2;
+ /* Stop the DMA engine now, in case it was running before
+ * (The firmware could have used it, and left it running).
+ */
+ gfar_halt(priv);
- /* For a station address of 0x12345678ABCD in transmission
- * order (BE), MACnADDR1 is set to 0xCDAB7856 and
- * MACnADDR2 is set to 0x34120000.
+ gfar_mac_reset(priv);
+
+ /* Zero out the rmon mib registers if it has them */
+ if (priv->device_flags & FSL_GIANFAR_DEV_HAS_RMON) {
+ memset_io(&(regs->rmon), 0, sizeof(struct rmon_mib));
+
+ /* Mask off the CAM interrupts */
+ gfar_write(&regs->rmon.cam1, 0xffffffff);
+ gfar_write(&regs->rmon.cam2, 0xffffffff);
+ }
+
+ /* Initialize ECNTRL */
+ gfar_write(&regs->ecntrl, ECNTRL_INIT_SETTINGS);
+
+ /* Set the extraction length and index */
+ attrs = ATTRELI_EL(priv->rx_stash_size) |
+ ATTRELI_EI(priv->rx_stash_index);
+
+ gfar_write(&regs->attreli, attrs);
+
+ /* Start with defaults, and add stashing
+ * depending on driver parameters
*/
- tempval = (addr[5] << 24) | (addr[4] << 16) |
- (addr[3] << 8) | addr[2];
+ attrs = ATTR_INIT_SETTINGS;
- gfar_write(macptr, tempval);
+ if (priv->bd_stash_en)
+ attrs |= ATTR_BDSTASH;
- tempval = (addr[1] << 24) | (addr[0] << 16);
+ if (priv->rx_stash_size != 0)
+ attrs |= ATTR_BUFSTASH;
- gfar_write(macptr+1, tempval);
+ gfar_write(&regs->attr, attrs);
+
+ /* FIFO configs */
+ gfar_write(&regs->fifo_tx_thr, DEFAULT_FIFO_TX_THR);
+ gfar_write(&regs->fifo_tx_starve, DEFAULT_FIFO_TX_STARVE);
+ gfar_write(&regs->fifo_tx_starve_shutoff, DEFAULT_FIFO_TX_STARVE_OFF);
+
+ /* Program the interrupt steering regs, only for MG devices */
+ if (priv->num_grps > 1)
+ gfar_write_isrg(priv);
}
-/* GFAR error interrupt handler */
-static irqreturn_t gfar_error(int irq, void *grp_id)
+static const struct net_device_ops gfar_netdev_ops = {
+ .ndo_open = gfar_enet_open,
+ .ndo_start_xmit = gfar_start_xmit,
+ .ndo_stop = gfar_close,
+ .ndo_change_mtu = gfar_change_mtu,
+ .ndo_set_features = gfar_set_features,
+ .ndo_set_rx_mode = gfar_set_multi,
+ .ndo_tx_timeout = gfar_timeout,
+ .ndo_do_ioctl = gfar_ioctl,
+ .ndo_get_stats = gfar_get_stats,
+ .ndo_change_carrier = fixed_phy_change_carrier,
+ .ndo_set_mac_address = gfar_set_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ .ndo_poll_controller = gfar_netpoll,
+#endif
+};
+
+/* Set up the ethernet device structure, private data,
+ * and anything else we need before we start
+ */
+static int gfar_probe(struct platform_device *ofdev)
{
- struct gfar_priv_grp *gfargrp = grp_id;
- struct gfar __iomem *regs = gfargrp->regs;
- struct gfar_private *priv= gfargrp->priv;
- struct net_device *dev = priv->ndev;
+ struct device_node *np = ofdev->dev.of_node;
+ struct net_device *dev = NULL;
+ struct gfar_private *priv = NULL;
+ int err = 0, i;
- /* Save ievent for future reference */
- u32 events = gfar_read(&regs->ievent);
+ err = gfar_of_init(ofdev, &dev);
- /* Clear IEVENT */
- gfar_write(&regs->ievent, events & IEVENT_ERR_MASK);
+ if (err)
+ return err;
- /* Magic Packet is not an error. */
- if ((priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) &&
- (events & IEVENT_MAG))
- events &= ~IEVENT_MAG;
+ priv = netdev_priv(dev);
+ priv->ndev = dev;
+ priv->ofdev = ofdev;
+ priv->dev = &ofdev->dev;
+ SET_NETDEV_DEV(dev, &ofdev->dev);
- /* Hmm... */
- if (netif_msg_rx_err(priv) || netif_msg_tx_err(priv))
- netdev_dbg(dev,
- "error interrupt (ievent=0x%08x imask=0x%08x)\n",
- events, gfar_read(&regs->imask));
+ INIT_WORK(&priv->reset_task, gfar_reset_task);
- /* Update the error counters */
- if (events & IEVENT_TXE) {
- dev->stats.tx_errors++;
+ platform_set_drvdata(ofdev, priv);
- if (events & IEVENT_LC)
- dev->stats.tx_window_errors++;
- if (events & IEVENT_CRL)
- dev->stats.tx_aborted_errors++;
- if (events & IEVENT_XFUN) {
- netif_dbg(priv, tx_err, dev,
- "TX FIFO underrun, packet dropped\n");
- dev->stats.tx_dropped++;
- atomic64_inc(&priv->extra_stats.tx_underrun);
+ gfar_detect_errata(priv);
- schedule_work(&priv->reset_task);
+ /* Set the dev->base_addr to the gfar reg region */
+ dev->base_addr = (unsigned long) priv->gfargrp[0].regs;
+
+ /* Fill in the dev structure */
+ dev->watchdog_timeo = TX_TIMEOUT;
+ /* MTU range: 50 - 9586 */
+ dev->mtu = 1500;
+ dev->min_mtu = 50;
+ dev->max_mtu = GFAR_JUMBO_FRAME_SIZE - ETH_HLEN;
+ dev->netdev_ops = &gfar_netdev_ops;
+ dev->ethtool_ops = &gfar_ethtool_ops;
+
+ /* Register for napi ...We are registering NAPI for each grp */
+ for (i = 0; i < priv->num_grps; i++) {
+ if (priv->poll_mode == GFAR_SQ_POLLING) {
+ netif_napi_add(dev, &priv->gfargrp[i].napi_rx,
+ gfar_poll_rx_sq, GFAR_DEV_WEIGHT);
+ netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx,
+ gfar_poll_tx_sq, 2);
+ } else {
+ netif_napi_add(dev, &priv->gfargrp[i].napi_rx,
+ gfar_poll_rx, GFAR_DEV_WEIGHT);
+ netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx,
+ gfar_poll_tx, 2);
}
- netif_dbg(priv, tx_err, dev, "Transmit Error\n");
}
- if (events & IEVENT_BSY) {
- dev->stats.rx_over_errors++;
- atomic64_inc(&priv->extra_stats.rx_bsy);
- netif_dbg(priv, rx_err, dev, "busy error (rstat: %x)\n",
- gfar_read(&regs->rstat));
+ if (priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM) {
+ dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG |
+ NETIF_F_RXCSUM;
+ dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG |
+ NETIF_F_RXCSUM | NETIF_F_HIGHDMA;
}
- if (events & IEVENT_BABR) {
- dev->stats.rx_errors++;
- atomic64_inc(&priv->extra_stats.rx_babr);
- netif_dbg(priv, rx_err, dev, "babbling RX error\n");
+ if (priv->device_flags & FSL_GIANFAR_DEV_HAS_VLAN) {
+ dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_CTAG_RX;
+ dev->features |= NETIF_F_HW_VLAN_CTAG_RX;
}
- if (events & IEVENT_EBERR) {
- atomic64_inc(&priv->extra_stats.eberr);
- netif_dbg(priv, rx_err, dev, "bus error\n");
+
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+
+ gfar_init_addr_hash_table(priv);
+
+ /* Insert receive time stamps into padding alignment bytes, and
+ * plus 2 bytes padding to ensure the cpu alignment.
+ */
+ if (priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER)
+ priv->padding = 8 + DEFAULT_PADDING;
+
+ if (dev->features & NETIF_F_IP_CSUM ||
+ priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER)
+ dev->needed_headroom = GMAC_FCB_LEN;
+
+ /* Initializing some of the rx/tx queue level parameters */
+ for (i = 0; i < priv->num_tx_queues; i++) {
+ priv->tx_queue[i]->tx_ring_size = DEFAULT_TX_RING_SIZE;
+ priv->tx_queue[i]->num_txbdfree = DEFAULT_TX_RING_SIZE;
+ priv->tx_queue[i]->txcoalescing = DEFAULT_TX_COALESCE;
+ priv->tx_queue[i]->txic = DEFAULT_TXIC;
}
- if (events & IEVENT_RXC)
- netif_dbg(priv, rx_status, dev, "control frame\n");
- if (events & IEVENT_BABT) {
- atomic64_inc(&priv->extra_stats.tx_babt);
- netif_dbg(priv, tx_err, dev, "babbling TX error\n");
+ for (i = 0; i < priv->num_rx_queues; i++) {
+ priv->rx_queue[i]->rx_ring_size = DEFAULT_RX_RING_SIZE;
+ priv->rx_queue[i]->rxcoalescing = DEFAULT_RX_COALESCE;
+ priv->rx_queue[i]->rxic = DEFAULT_RXIC;
}
- return IRQ_HANDLED;
+
+ /* Always enable rx filer if available */
+ priv->rx_filer_enable =
+ (priv->device_flags & FSL_GIANFAR_DEV_HAS_RX_FILER) ? 1 : 0;
+ /* Enable most messages by default */
+ priv->msg_enable = (NETIF_MSG_IFUP << 1 ) - 1;
+ /* use pritority h/w tx queue scheduling for single queue devices */
+ if (priv->num_tx_queues == 1)
+ priv->prio_sched_en = 1;
+
+ set_bit(GFAR_DOWN, &priv->state);
+
+ gfar_hw_init(priv);
+
+ /* Carrier starts down, phylib will bring it up */
+ netif_carrier_off(dev);
+
+ err = register_netdev(dev);
+
+ if (err) {
+ pr_err("%s: Cannot register net device, aborting\n", dev->name);
+ goto register_fail;
+ }
+
+ if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET)
+ priv->wol_supported |= GFAR_WOL_MAGIC;
+
+ if ((priv->device_flags & FSL_GIANFAR_DEV_HAS_WAKE_ON_FILER) &&
+ priv->rx_filer_enable)
+ priv->wol_supported |= GFAR_WOL_FILER_UCAST;
+
+ device_set_wakeup_capable(&ofdev->dev, priv->wol_supported);
+
+ /* fill out IRQ number and name fields */
+ for (i = 0; i < priv->num_grps; i++) {
+ struct gfar_priv_grp *grp = &priv->gfargrp[i];
+ if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
+ sprintf(gfar_irq(grp, TX)->name, "%s%s%c%s",
+ dev->name, "_g", '0' + i, "_tx");
+ sprintf(gfar_irq(grp, RX)->name, "%s%s%c%s",
+ dev->name, "_g", '0' + i, "_rx");
+ sprintf(gfar_irq(grp, ER)->name, "%s%s%c%s",
+ dev->name, "_g", '0' + i, "_er");
+ } else
+ strcpy(gfar_irq(grp, TX)->name, dev->name);
+ }
+
+ /* Initialize the filer table */
+ gfar_init_filer_table(priv);
+
+ /* Print out the device info */
+ netdev_info(dev, "mac: %pM\n", dev->dev_addr);
+
+ /* Even more device info helps when determining which kernel
+ * provided which set of benchmarks.
+ */
+ netdev_info(dev, "Running with NAPI enabled\n");
+ for (i = 0; i < priv->num_rx_queues; i++)
+ netdev_info(dev, "RX BD ring size for Q[%d]: %d\n",
+ i, priv->rx_queue[i]->rx_ring_size);
+ for (i = 0; i < priv->num_tx_queues; i++)
+ netdev_info(dev, "TX BD ring size for Q[%d]: %d\n",
+ i, priv->tx_queue[i]->tx_ring_size);
+
+ return 0;
+
+register_fail:
+ if (of_phy_is_fixed_link(np))
+ of_phy_deregister_fixed_link(np);
+ unmap_group_regs(priv);
+ gfar_free_rx_queues(priv);
+ gfar_free_tx_queues(priv);
+ of_node_put(priv->phy_node);
+ of_node_put(priv->tbi_node);
+ free_gfar_dev(priv);
+ return err;
}
-static u32 gfar_get_flowctrl_cfg(struct gfar_private *priv)
+static int gfar_remove(struct platform_device *ofdev)
{
- struct net_device *ndev = priv->ndev;
- struct phy_device *phydev = ndev->phydev;
- u32 val = 0;
+ struct gfar_private *priv = platform_get_drvdata(ofdev);
+ struct device_node *np = ofdev->dev.of_node;
- if (!phydev->duplex)
- return val;
+ of_node_put(priv->phy_node);
+ of_node_put(priv->tbi_node);
- if (!priv->pause_aneg_en) {
- if (priv->tx_pause_en)
- val |= MACCFG1_TX_FLOW;
- if (priv->rx_pause_en)
- val |= MACCFG1_RX_FLOW;
- } else {
- u16 lcl_adv, rmt_adv;
- u8 flowctrl;
- /* get link partner capabilities */
- rmt_adv = 0;
- if (phydev->pause)
- rmt_adv = LPA_PAUSE_CAP;
- if (phydev->asym_pause)
- rmt_adv |= LPA_PAUSE_ASYM;
+ unregister_netdev(priv->ndev);
- lcl_adv = linkmode_adv_to_lcl_adv_t(phydev->advertising);
- flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
- if (flowctrl & FLOW_CTRL_TX)
- val |= MACCFG1_TX_FLOW;
- if (flowctrl & FLOW_CTRL_RX)
- val |= MACCFG1_RX_FLOW;
+ if (of_phy_is_fixed_link(np))
+ of_phy_deregister_fixed_link(np);
+
+ unmap_group_regs(priv);
+ gfar_free_rx_queues(priv);
+ gfar_free_tx_queues(priv);
+ free_gfar_dev(priv);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+
+static void __gfar_filer_disable(struct gfar_private *priv)
+{
+ struct gfar __iomem *regs = priv->gfargrp[0].regs;
+ u32 temp;
+
+ temp = gfar_read(&regs->rctrl);
+ temp &= ~(RCTRL_FILREN | RCTRL_PRSDEP_INIT);
+ gfar_write(&regs->rctrl, temp);
+}
+
+static void __gfar_filer_enable(struct gfar_private *priv)
+{
+ struct gfar __iomem *regs = priv->gfargrp[0].regs;
+ u32 temp;
+
+ temp = gfar_read(&regs->rctrl);
+ temp |= RCTRL_FILREN | RCTRL_PRSDEP_INIT;
+ gfar_write(&regs->rctrl, temp);
+}
+
+/* Filer rules implementing wol capabilities */
+static void gfar_filer_config_wol(struct gfar_private *priv)
+{
+ unsigned int i;
+ u32 rqfcr;
+
+ __gfar_filer_disable(priv);
+
+ /* clear the filer table, reject any packet by default */
+ rqfcr = RQFCR_RJE | RQFCR_CMP_MATCH;
+ for (i = 0; i <= MAX_FILER_IDX; i++)
+ gfar_write_filer(priv, i, rqfcr, 0);
+
+ i = 0;
+ if (priv->wol_opts & GFAR_WOL_FILER_UCAST) {
+ /* unicast packet, accept it */
+ struct net_device *ndev = priv->ndev;
+ /* get the default rx queue index */
+ u8 qindex = (u8)priv->gfargrp[0].rx_queue->qindex;
+ u32 dest_mac_addr = (ndev->dev_addr[0] << 16) |
+ (ndev->dev_addr[1] << 8) |
+ ndev->dev_addr[2];
+
+ rqfcr = (qindex << 10) | RQFCR_AND |
+ RQFCR_CMP_EXACT | RQFCR_PID_DAH;
+
+ gfar_write_filer(priv, i++, rqfcr, dest_mac_addr);
+
+ dest_mac_addr = (ndev->dev_addr[3] << 16) |
+ (ndev->dev_addr[4] << 8) |
+ ndev->dev_addr[5];
+ rqfcr = (qindex << 10) | RQFCR_GPI |
+ RQFCR_CMP_EXACT | RQFCR_PID_DAL;
+ gfar_write_filer(priv, i++, rqfcr, dest_mac_addr);
}
- return val;
+ __gfar_filer_enable(priv);
}
-static noinline void gfar_update_link_state(struct gfar_private *priv)
+static void gfar_filer_restore_table(struct gfar_private *priv)
+{
+ u32 rqfcr, rqfpr;
+ unsigned int i;
+
+ __gfar_filer_disable(priv);
+
+ for (i = 0; i <= MAX_FILER_IDX; i++) {
+ rqfcr = priv->ftp_rqfcr[i];
+ rqfpr = priv->ftp_rqfpr[i];
+ gfar_write_filer(priv, i, rqfcr, rqfpr);
+ }
+
+ __gfar_filer_enable(priv);
+}
+
+/* gfar_start() for Rx only and with the FGPI filer interrupt enabled */
+static void gfar_start_wol_filer(struct gfar_private *priv)
{
struct gfar __iomem *regs = priv->gfargrp[0].regs;
- struct net_device *ndev = priv->ndev;
- struct phy_device *phydev = ndev->phydev;
- struct gfar_priv_rx_q *rx_queue = NULL;
- int i;
+ u32 tempval;
+ int i = 0;
- if (unlikely(test_bit(GFAR_RESETTING, &priv->state)))
- return;
+ /* Enable Rx hw queues */
+ gfar_write(&regs->rqueue, priv->rqueue);
- if (phydev->link) {
- u32 tempval1 = gfar_read(&regs->maccfg1);
- u32 tempval = gfar_read(&regs->maccfg2);
- u32 ecntrl = gfar_read(&regs->ecntrl);
- u32 tx_flow_oldval = (tempval1 & MACCFG1_TX_FLOW);
+ /* Initialize DMACTRL to have WWR and WOP */
+ tempval = gfar_read(&regs->dmactrl);
+ tempval |= DMACTRL_INIT_SETTINGS;
+ gfar_write(&regs->dmactrl, tempval);
- if (phydev->duplex != priv->oldduplex) {
- if (!(phydev->duplex))
- tempval &= ~(MACCFG2_FULL_DUPLEX);
- else
- tempval |= MACCFG2_FULL_DUPLEX;
+ /* Make sure we aren't stopped */
+ tempval = gfar_read(&regs->dmactrl);
+ tempval &= ~DMACTRL_GRS;
+ gfar_write(&regs->dmactrl, tempval);
- priv->oldduplex = phydev->duplex;
- }
+ for (i = 0; i < priv->num_grps; i++) {
+ regs = priv->gfargrp[i].regs;
+ /* Clear RHLT, so that the DMA starts polling now */
+ gfar_write(&regs->rstat, priv->gfargrp[i].rstat);
+ /* enable the Filer General Purpose Interrupt */
+ gfar_write(&regs->imask, IMASK_FGPI);
+ }
- if (phydev->speed != priv->oldspeed) {
- switch (phydev->speed) {
- case 1000:
- tempval =
- ((tempval & ~(MACCFG2_IF)) | MACCFG2_GMII);
+ /* Enable Rx DMA */
+ tempval = gfar_read(&regs->maccfg1);
+ tempval |= MACCFG1_RX_EN;
+ gfar_write(&regs->maccfg1, tempval);
+}
- ecntrl &= ~(ECNTRL_R100);
- break;
- case 100:
- case 10:
- tempval =
- ((tempval & ~(MACCFG2_IF)) | MACCFG2_MII);
+static int gfar_suspend(struct device *dev)
+{
+ struct gfar_private *priv = dev_get_drvdata(dev);
+ struct net_device *ndev = priv->ndev;
+ struct gfar __iomem *regs = priv->gfargrp[0].regs;
+ u32 tempval;
+ u16 wol = priv->wol_opts;
- /* Reduced mode distinguishes
- * between 10 and 100
- */
- if (phydev->speed == SPEED_100)
- ecntrl |= ECNTRL_R100;
- else
- ecntrl &= ~(ECNTRL_R100);
- break;
- default:
- netif_warn(priv, link, priv->ndev,
- "Ack! Speed (%d) is not 10/100/1000!\n",
- phydev->speed);
- break;
- }
+ if (!netif_running(ndev))
+ return 0;
- priv->oldspeed = phydev->speed;
- }
+ disable_napi(priv);
+ netif_tx_lock(ndev);
+ netif_device_detach(ndev);
+ netif_tx_unlock(ndev);
- tempval1 &= ~(MACCFG1_TX_FLOW | MACCFG1_RX_FLOW);
- tempval1 |= gfar_get_flowctrl_cfg(priv);
+ gfar_halt(priv);
- /* Turn last free buffer recording on */
- if ((tempval1 & MACCFG1_TX_FLOW) && !tx_flow_oldval) {
- for (i = 0; i < priv->num_rx_queues; i++) {
- u32 bdp_dma;
+ if (wol & GFAR_WOL_MAGIC) {
+ /* Enable interrupt on Magic Packet */
+ gfar_write(&regs->imask, IMASK_MAG);
- rx_queue = priv->rx_queue[i];
- bdp_dma = gfar_rxbd_dma_lastfree(rx_queue);
- gfar_write(rx_queue->rfbptr, bdp_dma);
- }
+ /* Enable Magic Packet mode */
+ tempval = gfar_read(&regs->maccfg2);
+ tempval |= MACCFG2_MPEN;
+ gfar_write(&regs->maccfg2, tempval);
- priv->tx_actual_en = 1;
- }
+ /* re-enable the Rx block */
+ tempval = gfar_read(&regs->maccfg1);
+ tempval |= MACCFG1_RX_EN;
+ gfar_write(&regs->maccfg1, tempval);
- if (unlikely(!(tempval1 & MACCFG1_TX_FLOW) && tx_flow_oldval))
- priv->tx_actual_en = 0;
+ } else if (wol & GFAR_WOL_FILER_UCAST) {
+ gfar_filer_config_wol(priv);
+ gfar_start_wol_filer(priv);
- gfar_write(&regs->maccfg1, tempval1);
+ } else {
+ phy_stop(ndev->phydev);
+ }
+
+ return 0;
+}
+
+static int gfar_resume(struct device *dev)
+{
+ struct gfar_private *priv = dev_get_drvdata(dev);
+ struct net_device *ndev = priv->ndev;
+ struct gfar __iomem *regs = priv->gfargrp[0].regs;
+ u32 tempval;
+ u16 wol = priv->wol_opts;
+
+ if (!netif_running(ndev))
+ return 0;
+
+ if (wol & GFAR_WOL_MAGIC) {
+ /* Disable Magic Packet mode */
+ tempval = gfar_read(&regs->maccfg2);
+ tempval &= ~MACCFG2_MPEN;
gfar_write(&regs->maccfg2, tempval);
- gfar_write(&regs->ecntrl, ecntrl);
- if (!priv->oldlink)
- priv->oldlink = 1;
+ } else if (wol & GFAR_WOL_FILER_UCAST) {
+ /* need to stop rx only, tx is already down */
+ gfar_halt(priv);
+ gfar_filer_restore_table(priv);
- } else if (priv->oldlink) {
- priv->oldlink = 0;
- priv->oldspeed = 0;
- priv->oldduplex = -1;
+ } else {
+ phy_start(ndev->phydev);
}
- if (netif_msg_link(priv))
- phy_print_status(phydev);
+ gfar_start(priv);
+
+ netif_device_attach(ndev);
+ enable_napi(priv);
+
+ return 0;
+}
+
+static int gfar_restore(struct device *dev)
+{
+ struct gfar_private *priv = dev_get_drvdata(dev);
+ struct net_device *ndev = priv->ndev;
+
+ if (!netif_running(ndev)) {
+ netif_device_attach(ndev);
+
+ return 0;
+ }
+
+ gfar_init_bds(ndev);
+
+ gfar_mac_reset(priv);
+
+ gfar_init_tx_rx_base(priv);
+
+ gfar_start(priv);
+
+ priv->oldlink = 0;
+ priv->oldspeed = 0;
+ priv->oldduplex = -1;
+
+ if (ndev->phydev)
+ phy_start(ndev->phydev);
+
+ netif_device_attach(ndev);
+ enable_napi(priv);
+
+ return 0;
}
+static const struct dev_pm_ops gfar_pm_ops = {
+ .suspend = gfar_suspend,
+ .resume = gfar_resume,
+ .freeze = gfar_suspend,
+ .thaw = gfar_resume,
+ .restore = gfar_restore,
+};
+
+#define GFAR_PM_OPS (&gfar_pm_ops)
+
+#else
+
+#define GFAR_PM_OPS NULL
+
+#endif
+
static const struct of_device_id gfar_match[] =
{
{
diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index f2af96349c7b..f472a6dbbe6f 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -67,8 +67,6 @@ struct ethtool_rx_list {
/* Number of bytes to align the rx bufs to */
#define RXBUF_ALIGNMENT 64
-#define PHY_INIT_TIMEOUT 100000
-
#define DRV_NAME "gfar-enet"
extern const char gfar_driver_version[];
@@ -88,10 +86,6 @@ extern const char gfar_driver_version[];
#define GFAR_RX_MAX_RING_SIZE 256
#define GFAR_TX_MAX_RING_SIZE 256
-#define GFAR_MAX_FIFO_THRESHOLD 511
-#define GFAR_MAX_FIFO_STARVE 511
-#define GFAR_MAX_FIFO_STARVE_OFF 511
-
#define FBTHR_SHIFT 24
#define DEFAULT_RX_LFC_THR 16
#define DEFAULT_LFC_PTVVAL 4
@@ -109,9 +103,6 @@ extern const char gfar_driver_version[];
#define DEFAULT_FIFO_TX_THR 0x100
#define DEFAULT_FIFO_TX_STARVE 0x40
#define DEFAULT_FIFO_TX_STARVE_OFF 0x80
-#define DEFAULT_BD_STASH 1
-#define DEFAULT_STASH_LENGTH 96
-#define DEFAULT_STASH_INDEX 0
/* The number of Exact Match registers */
#define GFAR_EM_NUM 15
@@ -139,15 +130,6 @@ extern const char gfar_driver_version[];
#define DEFAULT_RX_COALESCE 0
#define DEFAULT_RXCOUNT 0
-#define GFAR_SUPPORTED (SUPPORTED_10baseT_Half \
- | SUPPORTED_10baseT_Full \
- | SUPPORTED_100baseT_Half \
- | SUPPORTED_100baseT_Full \
- | SUPPORTED_Autoneg \
- | SUPPORTED_MII)
-
-#define GFAR_SUPPORTED_GBIT SUPPORTED_1000baseT_Full
-
/* TBI register addresses */
#define MII_TBICON 0x11
@@ -185,8 +167,6 @@ extern const char gfar_driver_version[];
#define ECNTRL_REDUCED_MII_MODE 0x00000004
#define ECNTRL_SGMII_MODE 0x00000002
-#define MRBLR_INIT_SETTINGS DEFAULT_RX_BUFFER_SIZE
-
#define MINFLR_INIT_SETTINGS 0x00000040
/* Tqueue control */
@@ -266,12 +246,6 @@ extern const char gfar_driver_version[];
#define DEFAULT_TXIC mk_ic_value(DEFAULT_TXCOUNT, DEFAULT_TXTIME)
#define DEFAULT_RXIC mk_ic_value(DEFAULT_RXCOUNT, DEFAULT_RXTIME)
-#define skip_bd(bdp, stride, base, ring_size) ({ \
- typeof(bdp) new_bd = (bdp) + (stride); \
- (new_bd >= (base) + (ring_size)) ? (new_bd - (ring_size)) : new_bd; })
-
-#define next_bd(bdp, base, ring_size) skip_bd(bdp, 1, base, ring_size)
-
#define RCTRL_TS_ENABLE 0x01000000
#define RCTRL_PAL_MASK 0x001f0000
#define RCTRL_LFC 0x00004000
@@ -385,11 +359,6 @@ extern const char gfar_driver_version[];
#define IMASK_RX_DISABLED ((~(IMASK_RX_DEFAULT)) & IMASK_DEFAULT)
#define IMASK_TX_DISABLED ((~(IMASK_TX_DEFAULT)) & IMASK_DEFAULT)
-/* Fifo management */
-#define FIFO_TX_THR_MASK 0x01ff
-#define FIFO_TX_STARVE_MASK 0x01ff
-#define FIFO_TX_STARVE_OFF_MASK 0x01ff
-
/* Attribute fields */
/* This enables rx snooping for buffers and descriptors */
@@ -1326,16 +1295,9 @@ static inline u32 gfar_rxbd_dma_lastfree(struct gfar_priv_rx_q *rxq)
return bdp_dma;
}
-irqreturn_t gfar_receive(int irq, void *dev_id);
int startup_gfar(struct net_device *dev);
void stop_gfar(struct net_device *dev);
-void reset_gfar(struct net_device *dev);
void gfar_mac_reset(struct gfar_private *priv);
-void gfar_halt(struct gfar_private *priv);
-void gfar_start(struct gfar_private *priv);
-void gfar_phy_test(struct mii_bus *bus, struct phy_device *phydev, int enable,
- u32 regnum, u32 read);
-void gfar_configure_coalescing_all(struct gfar_private *priv);
int gfar_set_features(struct net_device *dev, netdev_features_t features);
extern const struct ethtool_ops gfar_ethtool_ops;
@@ -1348,13 +1310,6 @@ extern const struct ethtool_ops gfar_ethtool_ops;
#define RQFCR_PID_PORT_MASK 0xFFFF0000
#define RQFCR_PID_MAC_MASK 0xFF000000
-struct gfar_mask_entry {
- unsigned int mask; /* The mask value which is valid form start to end */
- unsigned int start;
- unsigned int end;
- unsigned int block; /* Same block values indicate depended entries */
-};
-
/* Represents a receive filer table entry */
struct gfar_filer_entry {
u32 ctrl;
diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c
index 3433b46b90c1..3c8e4e2efc07 100644
--- a/drivers/net/ethernet/freescale/gianfar_ethtool.c
+++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c
@@ -45,19 +45,6 @@
#define GFAR_MAX_COAL_USECS 0xffff
#define GFAR_MAX_COAL_FRAMES 0xff
-static void gfar_fill_stats(struct net_device *dev, struct ethtool_stats *dummy,
- u64 *buf);
-static void gfar_gstrings(struct net_device *dev, u32 stringset, u8 * buf);
-static int gfar_gcoalesce(struct net_device *dev,
- struct ethtool_coalesce *cvals);
-static int gfar_scoalesce(struct net_device *dev,
- struct ethtool_coalesce *cvals);
-static void gfar_gringparam(struct net_device *dev,
- struct ethtool_ringparam *rvals);
-static int gfar_sringparam(struct net_device *dev,
- struct ethtool_ringparam *rvals);
-static void gfar_gdrvinfo(struct net_device *dev,
- struct ethtool_drvinfo *drvinfo);
static const char stat_gstrings[][ETH_GSTRING_LEN] = {
/* extra stats */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index c1eba421ba82..3a14bbc26ea2 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -24,7 +24,7 @@
#include "hns_dsaf_rcb.h"
#include "hns_dsaf_misc.h"
-const static char *g_dsaf_mode_match[DSAF_MODE_MAX] = {
+static const char *g_dsaf_mode_match[DSAF_MODE_MAX] = {
[DSAF_MODE_DISABLE_2PORT_64VM] = "2port-64vf",
[DSAF_MODE_DISABLE_6PORT_0VM] = "6port-16rss",
[DSAF_MODE_DISABLE_6PORT_16VM] = "6port-16vf",
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c
index 528f6243cdc6..03ca7d925e8e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c
@@ -138,12 +138,28 @@ EXPORT_SYMBOL(hnae3_register_client);
void hnae3_unregister_client(struct hnae3_client *client)
{
+ struct hnae3_client *client_tmp;
struct hnae3_ae_dev *ae_dev;
+ bool existed = false;
if (!client)
return;
mutex_lock(&hnae3_common_lock);
+
+ list_for_each_entry(client_tmp, &hnae3_client_list, node) {
+ if (client_tmp->type == client->type) {
+ existed = true;
+ break;
+ }
+ }
+
+ if (!existed) {
+ mutex_unlock(&hnae3_common_lock);
+ pr_err("client %s does not exist!\n", client->name);
+ return;
+ }
+
/* un-initialize the client on every matched port */
list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
hnae3_uninit_client_instance(client, ae_dev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index c52eccc1621d..aa692b13b6f3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -908,9 +908,11 @@ static struct hns3_enet_ring *hns3_backup_ringparam(struct hns3_nic_priv *priv)
if (!tmp_rings)
return NULL;
- for (i = 0; i < handle->kinfo.num_tqps * 2; i++)
+ for (i = 0; i < handle->kinfo.num_tqps * 2; i++) {
memcpy(&tmp_rings[i], priv->ring_data[i].ring,
sizeof(struct hns3_enet_ring));
+ tmp_rings[i].skb = NULL;
+ }
return tmp_rings;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 1c6b501fb7ca..6dcce489fdc5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -949,7 +949,7 @@ static void hclge_dbg_dump_rst_info(struct hclge_dev *hdev)
hdev->rst_stats.reset_cnt);
}
-void hclge_dbg_get_m7_stats_info(struct hclge_dev *hdev)
+static void hclge_dbg_get_m7_stats_info(struct hclge_dev *hdev)
{
struct hclge_desc *desc_src, *desc_tmp;
struct hclge_get_m7_bd_cmd *req;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 2b65f2799846..8d4dc1b019c1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -66,6 +66,7 @@ static void hclge_rfs_filter_expire(struct hclge_dev *hdev);
static void hclge_clear_arfs_rules(struct hnae3_handle *handle);
static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev,
unsigned long *addr);
+static int hclge_set_default_loopback(struct hclge_dev *hdev);
static struct hnae3_ae_algo ae_algo;
@@ -2599,6 +2600,10 @@ static int hclge_mac_init(struct hclge_dev *hdev)
return ret;
}
+ ret = hclge_set_default_loopback(hdev);
+ if (ret)
+ return ret;
+
ret = hclge_buffer_alloc(hdev);
if (ret)
dev_err(&hdev->pdev->dev,
@@ -3619,6 +3624,7 @@ static int hclge_reset_stack(struct hclge_dev *hdev)
static void hclge_reset(struct hclge_dev *hdev)
{
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+ enum hnae3_reset_type reset_level;
int ret;
/* Initialize ae_dev reset status as well, in case enet layer wants to
@@ -3697,10 +3703,10 @@ static void hclge_reset(struct hclge_dev *hdev)
* it should be handled as soon as possible. since some errors
* need this kind of reset to fix.
*/
- hdev->reset_level = hclge_get_reset_level(ae_dev,
- &hdev->default_reset_request);
- if (hdev->reset_level != HNAE3_NONE_RESET)
- set_bit(hdev->reset_level, &hdev->reset_request);
+ reset_level = hclge_get_reset_level(ae_dev,
+ &hdev->default_reset_request);
+ if (reset_level != HNAE3_NONE_RESET)
+ set_bit(reset_level, &hdev->reset_request);
return;
@@ -6173,7 +6179,7 @@ static void hclge_enable_fd(struct hnae3_handle *handle, bool enable)
bool clear;
hdev->fd_en = enable;
- clear = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE ? true : false;
+ clear = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE;
if (!enable)
hclge_del_all_fd_entries(handle, clear);
else
@@ -6330,7 +6336,7 @@ static int hclge_set_app_loopback(struct hclge_dev *hdev, bool en)
return ret;
}
-static int hclge_set_serdes_loopback(struct hclge_dev *hdev, bool en,
+static int hclge_cfg_serdes_loopback(struct hclge_dev *hdev, bool en,
enum hnae3_loop loop_mode)
{
#define HCLGE_SERDES_RETRY_MS 10
@@ -6391,6 +6397,17 @@ static int hclge_set_serdes_loopback(struct hclge_dev *hdev, bool en,
dev_err(&hdev->pdev->dev, "serdes loopback set failed in fw\n");
return -EIO;
}
+ return ret;
+}
+
+static int hclge_set_serdes_loopback(struct hclge_dev *hdev, bool en,
+ enum hnae3_loop loop_mode)
+{
+ int ret;
+
+ ret = hclge_cfg_serdes_loopback(hdev, en, loop_mode);
+ if (ret)
+ return ret;
hclge_cfg_mac_mode(hdev, en);
@@ -6534,6 +6551,22 @@ static int hclge_set_loopback(struct hnae3_handle *handle,
return 0;
}
+static int hclge_set_default_loopback(struct hclge_dev *hdev)
+{
+ int ret;
+
+ ret = hclge_set_app_loopback(hdev, false);
+ if (ret)
+ return ret;
+
+ ret = hclge_cfg_serdes_loopback(hdev, false, HNAE3_LOOP_SERIAL_SERDES);
+ if (ret)
+ return ret;
+
+ return hclge_cfg_serdes_loopback(hdev, false,
+ HNAE3_LOOP_PARALLEL_SERDES);
+}
+
static void hclge_reset_tqp_stats(struct hnae3_handle *handle)
{
struct hclge_vport *vport = hclge_get_vport(handle);
@@ -7691,6 +7724,7 @@ static int hclge_set_vlan_tx_offload_cfg(struct hclge_vport *vport)
struct hclge_vport_vtag_tx_cfg_cmd *req;
struct hclge_dev *hdev = vport->back;
struct hclge_desc desc;
+ u16 bmap_index;
int status;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_PORT_TX_CFG, false);
@@ -7713,8 +7747,10 @@ static int hclge_set_vlan_tx_offload_cfg(struct hclge_vport *vport)
hnae3_set_bit(req->vport_vlan_cfg, HCLGE_CFG_NIC_ROCE_SEL_B, 0);
req->vf_offset = vport->vport_id / HCLGE_VF_NUM_PER_CMD;
- req->vf_bitmap[req->vf_offset] =
- 1 << (vport->vport_id % HCLGE_VF_NUM_PER_BYTE);
+ bmap_index = vport->vport_id % HCLGE_VF_NUM_PER_CMD /
+ HCLGE_VF_NUM_PER_BYTE;
+ req->vf_bitmap[bmap_index] =
+ 1U << (vport->vport_id % HCLGE_VF_NUM_PER_BYTE);
status = hclge_cmd_send(&hdev->hw, &desc, 1);
if (status)
@@ -7731,6 +7767,7 @@ static int hclge_set_vlan_rx_offload_cfg(struct hclge_vport *vport)
struct hclge_vport_vtag_rx_cfg_cmd *req;
struct hclge_dev *hdev = vport->back;
struct hclge_desc desc;
+ u16 bmap_index;
int status;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_PORT_RX_CFG, false);
@@ -7746,8 +7783,10 @@ static int hclge_set_vlan_rx_offload_cfg(struct hclge_vport *vport)
vcfg->vlan2_vlan_prionly ? 1 : 0);
req->vf_offset = vport->vport_id / HCLGE_VF_NUM_PER_CMD;
- req->vf_bitmap[req->vf_offset] =
- 1 << (vport->vport_id % HCLGE_VF_NUM_PER_BYTE);
+ bmap_index = vport->vport_id % HCLGE_VF_NUM_PER_CMD /
+ HCLGE_VF_NUM_PER_BYTE;
+ req->vf_bitmap[bmap_index] =
+ 1U << (vport->vport_id % HCLGE_VF_NUM_PER_BYTE);
status = hclge_cmd_send(&hdev->hw, &desc, 1);
if (status)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index fdf43d87e983..3c8a2f55c43a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -12530,7 +12530,8 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi,
if (need_reset && prog)
for (i = 0; i < vsi->num_queue_pairs; i++)
if (vsi->xdp_rings[i]->xsk_umem)
- (void)i40e_xsk_async_xmit(vsi->netdev, i);
+ (void)i40e_xsk_wakeup(vsi->netdev, i,
+ XDP_WAKEUP_RX);
return 0;
}
@@ -12852,7 +12853,7 @@ static const struct net_device_ops i40e_netdev_ops = {
.ndo_bridge_setlink = i40e_ndo_bridge_setlink,
.ndo_bpf = i40e_xdp,
.ndo_xdp_xmit = i40e_xdp_xmit,
- .ndo_xsk_async_xmit = i40e_xsk_async_xmit,
+ .ndo_xsk_wakeup = i40e_xsk_wakeup,
.ndo_dfwd_add_station = i40e_fwd_add,
.ndo_dfwd_del_station = i40e_fwd_del,
};
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 32bad014d76c..0373bc6c7e61 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -116,7 +116,7 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
return err;
/* Kick start the NAPI context so that receiving will start */
- err = i40e_xsk_async_xmit(vsi->netdev, qid);
+ err = i40e_xsk_wakeup(vsi->netdev, qid, XDP_WAKEUP_RX);
if (err)
return err;
}
@@ -190,7 +190,9 @@ int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
**/
static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
{
+ struct xdp_umem *umem = rx_ring->xsk_umem;
int err, result = I40E_XDP_PASS;
+ u64 offset = umem->headroom;
struct i40e_ring *xdp_ring;
struct bpf_prog *xdp_prog;
u32 act;
@@ -201,7 +203,10 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
*/
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
act = bpf_prog_run_xdp(xdp_prog, xdp);
- xdp->handle += xdp->data - xdp->data_hard_start;
+ offset += xdp->data - xdp->data_hard_start;
+
+ xdp->handle = xsk_umem_adjust_offset(umem, xdp->handle, offset);
+
switch (act) {
case XDP_PASS:
break;
@@ -262,7 +267,7 @@ static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring,
bi->addr = xdp_umem_get_data(umem, handle);
bi->addr += hr;
- bi->handle = handle + umem->headroom;
+ bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
xsk_umem_discard_addr(umem);
return true;
@@ -299,7 +304,7 @@ static bool i40e_alloc_buffer_slow_zc(struct i40e_ring *rx_ring,
bi->addr = xdp_umem_get_data(umem, handle);
bi->addr += hr;
- bi->handle = handle + umem->headroom;
+ bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
xsk_umem_discard_addr_rq(umem);
return true;
@@ -420,8 +425,6 @@ static void i40e_reuse_rx_buffer_zc(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *old_bi)
{
struct i40e_rx_buffer *new_bi = &rx_ring->rx_bi[rx_ring->next_to_alloc];
- unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask;
- u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
u16 nta = rx_ring->next_to_alloc;
/* update, and store next to alloc */
@@ -429,14 +432,9 @@ static void i40e_reuse_rx_buffer_zc(struct i40e_ring *rx_ring,
rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
/* transfer page from old buffer to new buffer */
- new_bi->dma = old_bi->dma & mask;
- new_bi->dma += hr;
-
- new_bi->addr = (void *)((unsigned long)old_bi->addr & mask);
- new_bi->addr += hr;
-
- new_bi->handle = old_bi->handle & mask;
- new_bi->handle += rx_ring->xsk_umem->headroom;
+ new_bi->dma = old_bi->dma;
+ new_bi->addr = old_bi->addr;
+ new_bi->handle = old_bi->handle;
old_bi->addr = NULL;
}
@@ -471,7 +469,8 @@ void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
bi->addr = xdp_umem_get_data(rx_ring->xsk_umem, handle);
bi->addr += hr;
- bi->handle = (u64)handle + rx_ring->xsk_umem->headroom;
+ bi->handle = xsk_umem_adjust_offset(rx_ring->xsk_umem, (u64)handle,
+ rx_ring->xsk_umem->headroom);
}
/**
@@ -626,6 +625,15 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets);
+
+ if (xsk_umem_uses_need_wakeup(rx_ring->xsk_umem)) {
+ if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
+ xsk_set_rx_need_wakeup(rx_ring->xsk_umem);
+ else
+ xsk_clear_rx_need_wakeup(rx_ring->xsk_umem);
+
+ return (int)total_rx_packets;
+ }
return failure ? budget : (int)total_rx_packets;
}
@@ -681,6 +689,8 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
i40e_xdp_ring_update_tail(xdp_ring);
xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
+ if (xsk_umem_uses_need_wakeup(xdp_ring->xsk_umem))
+ xsk_clear_tx_need_wakeup(xdp_ring->xsk_umem);
}
return !!budget && work_done;
@@ -759,19 +769,27 @@ bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi,
i40e_update_tx_stats(tx_ring, completed_frames, total_bytes);
out_xmit:
+ if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem)) {
+ if (tx_ring->next_to_clean == tx_ring->next_to_use)
+ xsk_set_tx_need_wakeup(tx_ring->xsk_umem);
+ else
+ xsk_clear_tx_need_wakeup(tx_ring->xsk_umem);
+ }
+
xmit_done = i40e_xmit_zc(tx_ring, budget);
return work_done && xmit_done;
}
/**
- * i40e_xsk_async_xmit - Implements the ndo_xsk_async_xmit
+ * i40e_xsk_wakeup - Implements the ndo_xsk_wakeup
* @dev: the netdevice
* @queue_id: queue id to wake up
+ * @flags: ignored in our case since we have Rx and Tx in the same NAPI.
*
* Returns <0 for errors, 0 otherwise.
**/
-int i40e_xsk_async_xmit(struct net_device *dev, u32 queue_id)
+int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
{
struct i40e_netdev_priv *np = netdev_priv(dev);
struct i40e_vsi *vsi = np->vsi;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
index 8cc0a2e7d9a2..9ed59c14eb55 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
@@ -18,6 +18,6 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi,
struct i40e_ring *tx_ring, int napi_budget);
-int i40e_xsk_async_xmit(struct net_device *dev, u32 queue_id);
+int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
#endif /* _I40E_XSK_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index bf9aa533a7c6..4da0cde9695b 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -35,9 +35,9 @@ struct ice_aqc_get_ver {
/* Queue Shutdown (direct 0x0003) */
struct ice_aqc_q_shutdown {
- __le32 driver_unloading;
+ u8 driver_unloading;
#define ICE_AQC_DRIVER_UNLOADING BIT(0)
- u8 reserved[12];
+ u8 reserved[15];
};
/* Request resource ownership (direct 0x0008)
@@ -91,6 +91,7 @@ struct ice_aqc_list_caps_elem {
#define ICE_AQC_CAPS_SRIOV 0x0012
#define ICE_AQC_CAPS_VF 0x0013
#define ICE_AQC_CAPS_VSI 0x0017
+#define ICE_AQC_CAPS_DCB 0x0018
#define ICE_AQC_CAPS_RSS 0x0040
#define ICE_AQC_CAPS_RXQS 0x0041
#define ICE_AQC_CAPS_TXQS 0x0042
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 302ad981129c..9492cd34b09d 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -1275,7 +1275,7 @@ enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading)
ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_q_shutdown);
if (unloading)
- cmd->driver_unloading = cpu_to_le32(ICE_AQC_DRIVER_UNLOADING);
+ cmd->driver_unloading = ICE_AQC_DRIVER_UNLOADING;
return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
}
@@ -1594,6 +1594,18 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
prefix, func_p->guar_num_vsi);
}
break;
+ case ICE_AQC_CAPS_DCB:
+ caps->dcb = (number == 1);
+ caps->active_tc_bitmap = logical_id;
+ caps->maxtc = phys_id;
+ ice_debug(hw, ICE_DBG_INIT,
+ "%s: DCB = %d\n", prefix, caps->dcb);
+ ice_debug(hw, ICE_DBG_INIT,
+ "%s: active TC bitmap = %d\n", prefix,
+ caps->active_tc_bitmap);
+ ice_debug(hw, ICE_DBG_INIT,
+ "%s: TC max = %d\n", prefix, caps->maxtc);
+ break;
case ICE_AQC_CAPS_RSS:
caps->rss_table_size = number;
caps->rss_table_entry_width = logical_id;
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c
index d60c942249e8..c5ee8d930611 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.c
@@ -444,9 +444,15 @@ ice_parse_cee_pgcfg_tlv(struct ice_cee_feat_tlv *tlv,
* |pg0|pg1|pg2|pg3|pg4|pg5|pg6|pg7|
* ---------------------------------
*/
- ice_for_each_traffic_class(i)
+ ice_for_each_traffic_class(i) {
etscfg->tcbwtable[i] = buf[offset++];
+ if (etscfg->prio_table[i] == ICE_CEE_PGID_STRICT)
+ dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_STRICT;
+ else
+ dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_ETS;
+ }
+
/* Number of TCs supported (1 octet) */
etscfg->maxtcs = buf[offset];
}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index d9578919aad8..e922adf1fa15 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -413,7 +413,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked)
memset(&pi->local_dcbx_cfg, 0, sizeof(*dcbcfg));
dcbcfg->etscfg.willing = 1;
- dcbcfg->etscfg.maxtcs = 8;
+ dcbcfg->etscfg.maxtcs = hw->func_caps.common_cap.maxtc;
dcbcfg->etscfg.tcbwtable[0] = 100;
dcbcfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
@@ -422,7 +422,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked)
dcbcfg->etsrec.willing = 0;
dcbcfg->pfc.willing = 1;
- dcbcfg->pfc.pfccap = IEEE_8021QAZ_MAX_TCS;
+ dcbcfg->pfc.pfccap = hw->func_caps.common_cap.maxtc;
dcbcfg->numapps = 1;
dcbcfg->app[0].selector = ICE_APP_SEL_ETHTYPE;
@@ -452,11 +452,18 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
port_info = hw->port_info;
err = ice_init_dcb(hw);
+ if (err && !port_info->is_sw_lldp) {
+ dev_err(&pf->pdev->dev, "Error initializing DCB %d\n", err);
+ goto dcb_init_err;
+ }
+
+ dev_info(&pf->pdev->dev,
+ "DCB is enabled in the hardware, max number of TCs supported on this port are %d\n",
+ pf->hw.func_caps.common_cap.maxtc);
if (err) {
/* FW LLDP is disabled, activate SW DCBX/LLDP mode */
dev_info(&pf->pdev->dev,
"FW LLDP is disabled, DCBx/LLDP in SW mode.\n");
- port_info->is_sw_lldp = true;
clear_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags);
err = ice_dcb_sw_dflt_cfg(pf, locked);
if (err) {
@@ -468,11 +475,9 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
pf->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
- set_bit(ICE_FLAG_DCB_ENA, pf->flags);
return 0;
}
- port_info->is_sw_lldp = false;
set_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags);
/* DCBX in FW and LLDP enabled in FW */
@@ -484,7 +489,6 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
if (err)
goto dcb_init_err;
- dev_info(&pf->pdev->dev, "DCBX offload supported\n");
return err;
dcb_init_err:
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index f7dd0bd03d39..edba5bd79097 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3253,25 +3253,25 @@ static int
ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
struct ice_ring_container *rc, struct ice_vsi *vsi)
{
+ const char *c_type_str = (c_type == ICE_RX_CONTAINER) ? "rx" : "tx";
+ u32 use_adaptive_coalesce, coalesce_usecs;
struct ice_pf *pf = vsi->back;
u16 itr_setting;
if (!rc->ring)
return -EINVAL;
- itr_setting = rc->itr_setting & ~ICE_ITR_DYNAMIC;
-
switch (c_type) {
case ICE_RX_CONTAINER:
if (ec->rx_coalesce_usecs_high > ICE_MAX_INTRL ||
(ec->rx_coalesce_usecs_high &&
ec->rx_coalesce_usecs_high < pf->hw.intrl_gran)) {
netdev_info(vsi->netdev,
- "Invalid value, rx-usecs-high valid values are 0 (disabled), %d-%d\n",
- pf->hw.intrl_gran, ICE_MAX_INTRL);
+ "Invalid value, %s-usecs-high valid values are 0 (disabled), %d-%d\n",
+ c_type_str, pf->hw.intrl_gran,
+ ICE_MAX_INTRL);
return -EINVAL;
}
-
if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl) {
rc->ring->q_vector->intrl = ec->rx_coalesce_usecs_high;
wr32(&pf->hw, GLINT_RATE(rc->ring->q_vector->reg_idx),
@@ -3279,60 +3279,60 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
pf->hw.intrl_gran));
}
- if (ec->rx_coalesce_usecs != itr_setting &&
- ec->use_adaptive_rx_coalesce) {
- netdev_info(vsi->netdev,
- "Rx interrupt throttling cannot be changed if adaptive-rx is enabled\n");
- return -EINVAL;
- }
+ use_adaptive_coalesce = ec->use_adaptive_rx_coalesce;
+ coalesce_usecs = ec->rx_coalesce_usecs;
- if (ec->rx_coalesce_usecs > ICE_ITR_MAX) {
- netdev_info(vsi->netdev,
- "Invalid value, rx-usecs range is 0-%d\n",
- ICE_ITR_MAX);
- return -EINVAL;
- }
-
- if (ec->use_adaptive_rx_coalesce) {
- rc->itr_setting |= ICE_ITR_DYNAMIC;
- } else {
- rc->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
- rc->target_itr = ITR_TO_REG(rc->itr_setting);
- }
break;
case ICE_TX_CONTAINER:
if (ec->tx_coalesce_usecs_high) {
netdev_info(vsi->netdev,
- "setting tx-usecs-high is not supported\n");
- return -EINVAL;
- }
-
- if (ec->tx_coalesce_usecs != itr_setting &&
- ec->use_adaptive_tx_coalesce) {
- netdev_info(vsi->netdev,
- "Tx interrupt throttling cannot be changed if adaptive-tx is enabled\n");
+ "setting %s-usecs-high is not supported\n",
+ c_type_str);
return -EINVAL;
}
- if (ec->tx_coalesce_usecs > ICE_ITR_MAX) {
- netdev_info(vsi->netdev,
- "Invalid value, tx-usecs range is 0-%d\n",
- ICE_ITR_MAX);
- return -EINVAL;
- }
+ use_adaptive_coalesce = ec->use_adaptive_tx_coalesce;
+ coalesce_usecs = ec->tx_coalesce_usecs;
- if (ec->use_adaptive_tx_coalesce) {
- rc->itr_setting |= ICE_ITR_DYNAMIC;
- } else {
- rc->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
- rc->target_itr = ITR_TO_REG(rc->itr_setting);
- }
break;
default:
dev_dbg(&pf->pdev->dev, "Invalid container type %d\n", c_type);
return -EINVAL;
}
+ itr_setting = rc->itr_setting & ~ICE_ITR_DYNAMIC;
+ if (coalesce_usecs != itr_setting && use_adaptive_coalesce) {
+ netdev_info(vsi->netdev,
+ "%s interrupt throttling cannot be changed if adaptive-%s is enabled\n",
+ c_type_str, c_type_str);
+ return -EINVAL;
+ }
+
+ if (coalesce_usecs > ICE_ITR_MAX) {
+ netdev_info(vsi->netdev,
+ "Invalid value, %s-usecs range is 0-%d\n",
+ c_type_str, ICE_ITR_MAX);
+ return -EINVAL;
+ }
+
+ /* hardware only supports an ITR granularity of 2us */
+ if (coalesce_usecs % 2 != 0) {
+ netdev_info(vsi->netdev,
+ "Invalid value, %s-usecs must be even\n",
+ c_type_str);
+ return -EINVAL;
+ }
+
+ if (use_adaptive_coalesce) {
+ rc->itr_setting |= ICE_ITR_DYNAMIC;
+ } else {
+ /* store user facing value how it was set */
+ rc->itr_setting = coalesce_usecs;
+ /* set to static and convert to value HW understands */
+ rc->target_itr =
+ ITR_TO_REG(ITR_REG_ALIGN(rc->itr_setting));
+ }
+
return 0;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index f029aee32913..50a17a0337be 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -567,6 +567,8 @@ static void ice_reset_subtask(struct ice_pf *pf)
reset_type = ICE_RESET_CORER;
if (test_and_clear_bit(__ICE_GLOBR_RECV, pf->state))
reset_type = ICE_RESET_GLOBR;
+ if (test_and_clear_bit(__ICE_EMPR_RECV, pf->state))
+ reset_type = ICE_RESET_EMPR;
/* return if no valid reset type requested */
if (reset_type == ICE_RESET_INVAL)
return;
@@ -612,6 +614,22 @@ static void ice_reset_subtask(struct ice_pf *pf)
}
/**
+ * ice_print_topo_conflict - print topology conflict message
+ * @vsi: the VSI whose topology status is being checked
+ */
+static void ice_print_topo_conflict(struct ice_vsi *vsi)
+{
+ switch (vsi->port_info->phy.link_info.topo_media_conflict) {
+ case ICE_AQ_LINK_TOPO_CONFLICT:
+ case ICE_AQ_LINK_MEDIA_CONFLICT:
+ netdev_info(vsi->netdev, "Possible mis-configuration of the Ethernet port detected, please use the Intel(R) Ethernet Port Configuration Tool application to address the issue.\n");
+ break;
+ default:
+ break;
+ }
+}
+
+/**
* ice_print_link_msg - print link up or down message
* @vsi: the VSI whose link status is being queried
* @isup: boolean for if the link is now up or down
@@ -624,6 +642,7 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
const char *speed;
const char *fec;
const char *fc;
+ const char *an;
if (!vsi)
return;
@@ -707,6 +726,12 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
break;
}
+ /* check if autoneg completed, might be false due to not supported */
+ if (vsi->port_info->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)
+ an = "True";
+ else
+ an = "False";
+
/* Get FEC mode requested based on PHY caps last SW configuration */
caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
if (!caps) {
@@ -731,8 +756,9 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
devm_kfree(&vsi->back->pdev->dev, caps);
done:
- netdev_info(vsi->netdev, "NIC Link is up %sbps, Requested FEC: %s, FEC: %s, Flow Control: %s\n",
- speed, fec_req, fec, fc);
+ netdev_info(vsi->netdev, "NIC Link is up %sbps, Requested FEC: %s, FEC: %s, Autoneg: %s, Flow Control: %s\n",
+ speed, fec_req, fec, an, fc);
+ ice_print_topo_conflict(vsi);
}
/**
@@ -2636,6 +2662,11 @@ static void ice_remove(struct pci_dev *pdev)
ice_deinit_pf(pf);
ice_deinit_hw(&pf->hw);
ice_clear_interrupt_scheme(pf);
+ /* Issue a PFR as part of the prescribed driver unload flow. Do not
+ * do it via ice_schedule_reset() since there is no need to rebuild
+ * and the service task is already stopped.
+ */
+ ice_reset(&pf->hw, ICE_RESET_PFR);
pci_disable_pcie_error_reporting(pdev);
}
@@ -3446,12 +3477,16 @@ void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
vsi_stats = &vsi->net_stats;
- if (test_bit(__ICE_DOWN, vsi->state) || !vsi->num_txq || !vsi->num_rxq)
+ if (!vsi->num_txq || !vsi->num_rxq)
return;
+
/* netdev packet/byte stats come from ring counter. These are obtained
* by summing up ring counters (done by ice_update_vsi_ring_stats).
+ * But, only call the update routine and read the registers if VSI is
+ * not down.
*/
- ice_update_vsi_ring_stats(vsi);
+ if (!test_bit(__ICE_DOWN, vsi->state))
+ ice_update_vsi_ring_stats(vsi);
stats->tx_packets = vsi_stats->tx_packets;
stats->tx_bytes = vsi_stats->tx_bytes;
stats->rx_packets = vsi_stats->rx_packets;
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 99cf527d2b1a..1acdd43a2edd 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -1623,12 +1623,13 @@ ice_remove_rule_internal(struct ice_hw *hw, u8 recp_id,
status = ice_aq_sw_rules(hw, s_rule,
ICE_SW_RULE_RX_TX_NO_HDR_SIZE, 1,
ice_aqc_opc_remove_sw_rules, NULL);
- if (status)
- goto exit;
/* Remove a book keeping from the list */
devm_kfree(ice_hw_to_dev(hw), s_rule);
+ if (status)
+ goto exit;
+
list_del(&list_elem->list_entry);
devm_kfree(ice_hw_to_dev(hw), list_elem);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index b538d0b9eb80..4501d50a7dcc 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -4,15 +4,15 @@
#ifndef _ICE_TYPE_H_
#define _ICE_TYPE_H_
+#define ICE_BYTES_PER_WORD 2
+#define ICE_BYTES_PER_DWORD 4
+
#include "ice_status.h"
#include "ice_hw_autogen.h"
#include "ice_osdep.h"
#include "ice_controlq.h"
#include "ice_lan_tx_rx.h"
-#define ICE_BYTES_PER_WORD 2
-#define ICE_BYTES_PER_DWORD 4
-
static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc)
{
return test_bit(tc, &bitmap);
@@ -139,6 +139,9 @@ struct ice_phy_info {
/* Common HW capabilities for SW use */
struct ice_hw_common_caps {
u32 valid_functions;
+ /* DCB capabilities */
+ u32 active_tc_bitmap;
+ u32 maxtc;
/* Tx/Rx queues */
u16 num_rxq; /* Number/Total Rx queues */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index b93324e9f4bc..c38939b1d496 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -1074,9 +1074,16 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
for (v = 0; v < pf->num_alloc_vfs; v++)
ice_trigger_vf_reset(&pf->vf[v], is_vflr);
- for (v = 0; v < pf->num_alloc_vfs; v++)
- if (test_bit(ICE_VF_STATE_QS_ENA, pf->vf[v].vf_states))
- ice_dis_vf_qs(&pf->vf[v]);
+ for (v = 0; v < pf->num_alloc_vfs; v++) {
+ struct ice_vsi *vsi;
+
+ vf = &pf->vf[v];
+ vsi = pf->vsi[vf->lan_vsi_idx];
+ if (test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states))
+ ice_dis_vf_qs(vf);
+ ice_dis_vsi_txq(vsi->port_info, vsi->idx, 0, 0, NULL, NULL,
+ NULL, ICE_VF_RESET, vf->vf_id, NULL);
+ }
/* HW requires some time to make sure it can flush the FIFO for a VF
* when it resets it. Poll the VPGEN_VFRSTAT register for each VF in
@@ -1171,12 +1178,12 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
if (test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states))
ice_dis_vf_qs(vf);
- else
- /* Call Disable LAN Tx queue AQ call even when queues are not
- * enabled. This is needed for successful completion of VFR
- */
- ice_dis_vsi_txq(vsi->port_info, vsi->idx, 0, 0, NULL, NULL,
- NULL, ICE_VF_RESET, vf->vf_id, NULL);
+
+ /* Call Disable LAN Tx queue AQ whether or not queues are
+ * enabled. This is needed for successful completion of VFR.
+ */
+ ice_dis_vsi_txq(vsi->port_info, vsi->idx, 0, 0, NULL, NULL,
+ NULL, ICE_VF_RESET, vf->vf_id, NULL);
hw = &pf->hw;
/* poll VPGEN_VFRSTAT reg to make sure
@@ -2760,8 +2767,9 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
}
vf->num_vlan--;
- /* Disable VLAN pruning when removing VLAN */
- ice_cfg_vlan_pruning(vsi, false, false);
+ /* Disable VLAN pruning when the last VLAN is removed */
+ if (!vf->num_vlan)
+ ice_cfg_vlan_pruning(vsi, false, false);
/* Disable Unicast/Multicast VLAN promiscuous mode */
if (vlan_promisc) {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 31629fc7e820..113f6087c7c9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -960,11 +960,9 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
return 0;
err_aead:
- memset(xs->aead, 0, sizeof(*xs->aead));
- kfree(xs->aead);
+ kzfree(xs->aead);
err_xs:
- memset(xs, 0, sizeof(*xs));
- kfree(xs);
+ kzfree(xs);
err_out:
msgbuf[1] = err;
return err;
@@ -1049,8 +1047,7 @@ int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
ixgbe_ipsec_del_sa(xs);
/* remove the xs that was made-up in the add request */
- memset(xs, 0, sizeof(*xs));
- kfree(xs);
+ kzfree(xs);
return 0;
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 17b7ae9f46ec..9bcae44e9883 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -10260,7 +10260,8 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
if (need_reset && prog)
for (i = 0; i < adapter->num_rx_queues; i++)
if (adapter->xdp_ring[i]->xsk_umem)
- (void)ixgbe_xsk_async_xmit(adapter->netdev, i);
+ (void)ixgbe_xsk_wakeup(adapter->netdev, i,
+ XDP_WAKEUP_RX);
return 0;
}
@@ -10379,7 +10380,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
.ndo_features_check = ixgbe_features_check,
.ndo_bpf = ixgbe_xdp,
.ndo_xdp_xmit = ixgbe_xdp_xmit,
- .ndo_xsk_async_xmit = ixgbe_xsk_async_xmit,
+ .ndo_xsk_wakeup = ixgbe_xsk_wakeup,
};
static void ixgbe_disable_txr_hw(struct ixgbe_adapter *adapter,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
index d93a690aff74..6d01700b46bc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
@@ -42,7 +42,7 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring);
bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
struct ixgbe_ring *tx_ring, int napi_budget);
-int ixgbe_xsk_async_xmit(struct net_device *dev, u32 queue_id);
+int ixgbe_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring);
#endif /* #define _IXGBE_TXRX_COMMON_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index 6b609553329f..ad802a8909e0 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -100,7 +100,7 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
ixgbe_txrx_ring_enable(adapter, qid);
/* Kick start the NAPI context so that receiving will start */
- err = ixgbe_xsk_async_xmit(adapter->netdev, qid);
+ err = ixgbe_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX);
if (err)
return err;
}
@@ -143,7 +143,9 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
struct ixgbe_ring *rx_ring,
struct xdp_buff *xdp)
{
+ struct xdp_umem *umem = rx_ring->xsk_umem;
int err, result = IXGBE_XDP_PASS;
+ u64 offset = umem->headroom;
struct bpf_prog *xdp_prog;
struct xdp_frame *xdpf;
u32 act;
@@ -151,7 +153,10 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
rcu_read_lock();
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
act = bpf_prog_run_xdp(xdp_prog, xdp);
- xdp->handle += xdp->data - xdp->data_hard_start;
+ offset += xdp->data - xdp->data_hard_start;
+
+ xdp->handle = xsk_umem_adjust_offset(umem, xdp->handle, offset);
+
switch (act) {
case XDP_PASS:
break;
@@ -201,8 +206,6 @@ ixgbe_rx_buffer *ixgbe_get_rx_buffer_zc(struct ixgbe_ring *rx_ring,
static void ixgbe_reuse_rx_buffer_zc(struct ixgbe_ring *rx_ring,
struct ixgbe_rx_buffer *obi)
{
- unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask;
- u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
u16 nta = rx_ring->next_to_alloc;
struct ixgbe_rx_buffer *nbi;
@@ -212,14 +215,9 @@ static void ixgbe_reuse_rx_buffer_zc(struct ixgbe_ring *rx_ring,
rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
/* transfer page from old buffer to new buffer */
- nbi->dma = obi->dma & mask;
- nbi->dma += hr;
-
- nbi->addr = (void *)((unsigned long)obi->addr & mask);
- nbi->addr += hr;
-
- nbi->handle = obi->handle & mask;
- nbi->handle += rx_ring->xsk_umem->headroom;
+ nbi->dma = obi->dma;
+ nbi->addr = obi->addr;
+ nbi->handle = obi->handle;
obi->addr = NULL;
obi->skb = NULL;
@@ -250,7 +248,8 @@ void ixgbe_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
bi->addr = xdp_umem_get_data(rx_ring->xsk_umem, handle);
bi->addr += hr;
- bi->handle = (u64)handle + rx_ring->xsk_umem->headroom;
+ bi->handle = xsk_umem_adjust_offset(rx_ring->xsk_umem, (u64)handle,
+ rx_ring->xsk_umem->headroom);
}
static bool ixgbe_alloc_buffer_zc(struct ixgbe_ring *rx_ring,
@@ -276,7 +275,7 @@ static bool ixgbe_alloc_buffer_zc(struct ixgbe_ring *rx_ring,
bi->addr = xdp_umem_get_data(umem, handle);
bi->addr += hr;
- bi->handle = handle + umem->headroom;
+ bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
xsk_umem_discard_addr(umem);
return true;
@@ -303,7 +302,7 @@ static bool ixgbe_alloc_buffer_slow_zc(struct ixgbe_ring *rx_ring,
bi->addr = xdp_umem_get_data(umem, handle);
bi->addr += hr;
- bi->handle = handle + umem->headroom;
+ bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
xsk_umem_discard_addr_rq(umem);
return true;
@@ -547,6 +546,14 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
q_vector->rx.total_packets += total_rx_packets;
q_vector->rx.total_bytes += total_rx_bytes;
+ if (xsk_umem_uses_need_wakeup(rx_ring->xsk_umem)) {
+ if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
+ xsk_set_rx_need_wakeup(rx_ring->xsk_umem);
+ else
+ xsk_clear_rx_need_wakeup(rx_ring->xsk_umem);
+
+ return (int)total_rx_packets;
+ }
return failure ? budget : (int)total_rx_packets;
}
@@ -615,6 +622,8 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
if (tx_desc) {
ixgbe_xdp_ring_update_tail(xdp_ring);
xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
+ if (xsk_umem_uses_need_wakeup(xdp_ring->xsk_umem))
+ xsk_clear_tx_need_wakeup(xdp_ring->xsk_umem);
}
return !!budget && work_done;
@@ -688,11 +697,19 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
if (xsk_frames)
xsk_umem_complete_tx(umem, xsk_frames);
+ if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem)) {
+ if (tx_ring->next_to_clean == tx_ring->next_to_use)
+ xsk_set_tx_need_wakeup(tx_ring->xsk_umem);
+ else
+ xsk_clear_tx_need_wakeup(tx_ring->xsk_umem);
+ }
+
xmit_done = ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit);
+
return budget > 0 && xmit_done;
}
-int ixgbe_xsk_async_xmit(struct net_device *dev, u32 qid)
+int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
{
struct ixgbe_adapter *adapter = netdev_priv(dev);
struct ixgbe_ring *ring;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 37fef8cd25e3..0d8dd885b7d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -154,3 +154,10 @@ config MLX5_EN_TLS
Build support for TLS cryptography-offload accelaration in the NIC.
Note: Support for hardware with this capability needs to be selected
for this option to become available.
+
+config MLX5_SW_STEERING
+ bool "Mellanox Technologies software-managed steering"
+ depends on MLX5_CORE_EN && MLX5_ESWITCH
+ default y
+ help
+ Build support for software-managed steering in the NIC.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index f4de9ccb5df1..5708fcc079ca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -15,7 +15,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
- lib/devcom.o lib/pci_vsc.o diag/fs_tracepoint.o \
+ lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \
diag/fw_tracer.o diag/crdump.o devlink.o
#
@@ -67,3 +67,10 @@ mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o \
en_accel/ktls.o en_accel/ktls_tx.o
+
+mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o \
+ steering/dr_matcher.o steering/dr_rule.o \
+ steering/dr_icm_pool.o steering/dr_crc32.o \
+ steering/dr_ste.o steering/dr_send.o \
+ steering/dr_cmd.o steering/dr_fw.o \
+ steering/dr_action.o steering/fs_dr.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index a400f4430c28..7bf7b6fbc776 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -4,6 +4,7 @@
#include <devlink.h>
#include "mlx5_core.h"
+#include "fs_core.h"
#include "eswitch.h"
static int mlx5_devlink_flash_update(struct devlink *devlink,
@@ -107,12 +108,121 @@ void mlx5_devlink_free(struct devlink *devlink)
devlink_free(devlink);
}
+static int mlx5_devlink_fs_mode_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ char *value = val.vstr;
+ int err = 0;
+
+ if (!strcmp(value, "dmfs")) {
+ return 0;
+ } else if (!strcmp(value, "smfs")) {
+ u8 eswitch_mode;
+ bool smfs_cap;
+
+ eswitch_mode = mlx5_eswitch_mode(dev->priv.eswitch);
+ smfs_cap = mlx5_fs_dr_is_supported(dev);
+
+ if (!smfs_cap) {
+ err = -EOPNOTSUPP;
+ NL_SET_ERR_MSG_MOD(extack,
+ "Software managed steering is not supported by current device");
+ }
+
+ else if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Software managed steering is not supported when eswitch offlaods enabled.");
+ err = -EOPNOTSUPP;
+ }
+ } else {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Bad parameter: supported values are [\"dmfs\", \"smfs\"]");
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+static int mlx5_devlink_fs_mode_set(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ enum mlx5_flow_steering_mode mode;
+
+ if (!strcmp(ctx->val.vstr, "smfs"))
+ mode = MLX5_FLOW_STEERING_MODE_SMFS;
+ else
+ mode = MLX5_FLOW_STEERING_MODE_DMFS;
+ dev->priv.steering->mode = mode;
+
+ return 0;
+}
+
+static int mlx5_devlink_fs_mode_get(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS)
+ strcpy(ctx->val.vstr, "smfs");
+ else
+ strcpy(ctx->val.vstr, "dmfs");
+ return 0;
+}
+
+enum mlx5_devlink_param_id {
+ MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+ MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE,
+};
+
+static const struct devlink_param mlx5_devlink_params[] = {
+ DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE,
+ "flow_steering_mode", DEVLINK_PARAM_TYPE_STRING,
+ BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+ mlx5_devlink_fs_mode_get, mlx5_devlink_fs_mode_set,
+ mlx5_devlink_fs_mode_validate),
+};
+
+static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ union devlink_param_value value;
+
+ if (dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS)
+ strcpy(value.vstr, "dmfs");
+ else
+ strcpy(value.vstr, "smfs");
+ devlink_param_driverinit_value_set(devlink,
+ MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE,
+ value);
+}
+
int mlx5_devlink_register(struct devlink *devlink, struct device *dev)
{
- return devlink_register(devlink, dev);
+ int err;
+
+ err = devlink_register(devlink, dev);
+ if (err)
+ return err;
+
+ err = devlink_params_register(devlink, mlx5_devlink_params,
+ ARRAY_SIZE(mlx5_devlink_params));
+ if (err)
+ goto params_reg_err;
+ mlx5_devlink_set_params_init_values(devlink);
+ devlink_params_publish(devlink);
+ return 0;
+
+params_reg_err:
+ devlink_unregister(devlink);
+ return err;
}
void mlx5_devlink_unregister(struct devlink *devlink)
{
+ devlink_params_unregister(devlink, mlx5_devlink_params,
+ ARRAY_SIZE(mlx5_devlink_params));
devlink_unregister(devlink);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 79301d116667..eb2e1f2138e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -25,18 +25,33 @@ u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
return headroom;
}
-u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk);
- u32 frag_sz = linear_rq_headroom + hw_mtu;
+
+ return linear_rq_headroom + hw_mtu;
+}
+
+u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ u32 frag_sz = mlx5e_rx_get_min_frag_sz(params, xsk);
/* AF_XDP doesn't build SKBs in place. */
if (!xsk)
frag_sz = MLX5_SKB_FRAG_SZ(frag_sz);
- /* XDP in mlx5e doesn't support multiple packets per page. */
+ /* XDP in mlx5e doesn't support multiple packets per page. AF_XDP is a
+ * special case. It can run with frames smaller than a page, as it
+ * doesn't allocate pages dynamically. However, here we pretend that
+ * fragments are page-sized: it allows to treat XSK frames like pages
+ * by redirecting alloc and free operations to XSK rings and by using
+ * the fact there are no multiple packets per "page" (which is a frame).
+ * The latter is important, because frames may come in a random order,
+ * and we will have trouble assemblying a real page of multiple frames.
+ */
if (mlx5e_rx_is_xdp(params, xsk))
frag_sz = max_t(u32, frag_sz, PAGE_SIZE);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index 3a615d663d84..989d8f429438 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -76,6 +76,8 @@ static inline bool mlx5e_qid_validate(const struct mlx5e_profile *profile,
u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
+u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index 4c4620db3d31..f8ee18b4da6f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -291,14 +291,14 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
*/
goto out;
}
-
- err = mlx5_packet_reformat_alloc(priv->mdev,
- e->reformat_type,
- ipv4_encap_size, encap_header,
- MLX5_FLOW_NAMESPACE_FDB,
- &e->encap_id);
- if (err)
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ e->reformat_type,
+ ipv4_encap_size, encap_header,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ err = PTR_ERR(e->pkt_reformat);
goto destroy_neigh_entry;
+ }
e->flags |= MLX5_ENCAP_ENTRY_VALID;
mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
@@ -407,13 +407,14 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
goto out;
}
- err = mlx5_packet_reformat_alloc(priv->mdev,
- e->reformat_type,
- ipv6_encap_size, encap_header,
- MLX5_FLOW_NAMESPACE_FDB,
- &e->encap_id);
- if (err)
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ e->reformat_type,
+ ipv6_encap_size, encap_header,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ err = PTR_ERR(e->pkt_reformat);
goto destroy_neigh_entry;
+ }
e->flags |= MLX5_ENCAP_ENTRY_VALID;
mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 1ed5c33e022f..f049e0ac308a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -122,6 +122,7 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
void *va, u16 *rx_headroom, u32 *len, bool xsk)
{
struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
+ struct xdp_umem *umem = rq->umem;
struct xdp_buff xdp;
u32 act;
int err;
@@ -138,8 +139,11 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
xdp.rxq = &rq->xdp_rxq;
act = bpf_prog_run_xdp(prog, &xdp);
- if (xsk)
- xdp.handle += xdp.data - xdp.data_hard_start;
+ if (xsk) {
+ u64 off = xdp.data - xdp.data_hard_start;
+
+ xdp.handle = xsk_umem_adjust_offset(umem, xdp.handle, off);
+ }
switch (act) {
case XDP_PASS:
*rx_headroom = xdp.data - xdp.data_hard_start;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
index 6a55573ec8f2..475b6bd5d29b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -24,7 +24,8 @@ int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
if (!xsk_umem_peek_addr_rq(umem, &handle))
return -ENOMEM;
- dma_info->xsk.handle = handle + rq->buff.umem_headroom;
+ dma_info->xsk.handle = xsk_umem_adjust_offset(umem, handle,
+ rq->buff.umem_headroom);
dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle);
/* No need to add headroom to the DMA address. In striding RQ case, we
@@ -104,7 +105,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
/* head_offset is not used in this function, because di->xsk.data and
* di->addr point directly to the necessary place. Furthermore, in the
- * current implementation, one page = one packet = one frame, so
+ * current implementation, UMR pages are mapped to XSK frames, so
* head_offset should always be 0.
*/
WARN_ON_ONCE(head_offset);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
index 307b923a1361..cab0e93497ae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -5,6 +5,7 @@
#define __MLX5_EN_XSK_RX_H__
#include "en.h"
+#include <net/xdp_sock.h>
/* RX data path */
@@ -24,4 +25,17 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
struct mlx5e_wqe_frag_info *wi,
u32 cqe_bcnt);
+static inline bool mlx5e_xsk_update_rx_wakeup(struct mlx5e_rq *rq, bool alloc_err)
+{
+ if (!xsk_umem_uses_need_wakeup(rq->umem))
+ return alloc_err;
+
+ if (unlikely(alloc_err))
+ xsk_set_rx_need_wakeup(rq->umem);
+ else
+ xsk_clear_rx_need_wakeup(rq->umem);
+
+ return false;
+}
+
#endif /* __MLX5_EN_XSK_RX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index d360750b25b7..631af8dee517 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -4,18 +4,23 @@
#include "setup.h"
#include "en/params.h"
+/* It matches XDP_UMEM_MIN_CHUNK_SIZE, but as this constant is private and may
+ * change unexpectedly, and mlx5e has a minimum valid stride size for striding
+ * RQ, keep this check in the driver.
+ */
+#define MLX5E_MIN_XSK_CHUNK_SIZE 2048
+
bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
struct mlx5_core_dev *mdev)
{
- /* AF_XDP doesn't support frames larger than PAGE_SIZE, and the current
- * mlx5e XDP implementation doesn't support multiple packets per page.
- */
- if (xsk->chunk_size != PAGE_SIZE)
+ /* AF_XDP doesn't support frames larger than PAGE_SIZE. */
+ if (xsk->chunk_size > PAGE_SIZE ||
+ xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE)
return false;
/* Current MTU and XSK headroom don't allow packets to fit the frames. */
- if (mlx5e_rx_get_linear_frag_sz(params, xsk) > xsk->chunk_size)
+ if (mlx5e_rx_get_min_frag_sz(params, xsk) > xsk->chunk_size)
return false;
/* frag_sz is different for regular and XSK RQs, so ensure that linear
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
index fd2c75b4b519..87827477d38c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -7,7 +7,7 @@
#include "en/params.h"
#include <net/xdp_sock.h>
-int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid)
+int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_params *params = &priv->channels.params;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
index 7add18bf78d8..79b487d89757 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
@@ -5,11 +5,23 @@
#define __MLX5_EN_XSK_TX_H__
#include "en.h"
+#include <net/xdp_sock.h>
/* TX data path */
-int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid);
+int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags);
bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget);
+static inline void mlx5e_xsk_update_tx_wakeup(struct mlx5e_xdpsq *sq)
+{
+ if (!xsk_umem_uses_need_wakeup(sq->umem))
+ return;
+
+ if (sq->pc != sq->cc)
+ xsk_clear_tx_need_wakeup(sq->umem);
+ else
+ xsk_set_tx_need_wakeup(sq->umem);
+}
+
#endif /* __MLX5_EN_XSK_TX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index dadadf221087..1cacda1bc1b4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4580,7 +4580,7 @@ const struct net_device_ops mlx5e_netdev_ops = {
.ndo_tx_timeout = mlx5e_tx_timeout,
.ndo_bpf = mlx5e_xdp,
.ndo_xdp_xmit = mlx5e_xdp_xmit,
- .ndo_xsk_async_xmit = mlx5e_xsk_async_xmit,
+ .ndo_xsk_wakeup = mlx5e_xsk_wakeup,
#ifdef CONFIG_MLX5_EN_ARFS
.ndo_rx_flow_steer = mlx5e_rx_flow_steer,
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index a0ae5069d8c3..8e512216deb8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -161,7 +161,7 @@ struct mlx5e_encap_entry {
*/
struct hlist_node encap_hlist;
struct list_head flows;
- u32 encap_id;
+ struct mlx5_pkt_reformat *pkt_reformat;
const struct ip_tunnel_info *tun_info;
unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 2fd2760d0bb7..d6a547238de0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -695,8 +695,11 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk;
rq->mpwqe.actual_wq_head = head;
- /* If XSK Fill Ring doesn't have enough frames, busy poll by
- * rescheduling the NAPI poll.
+ /* If XSK Fill Ring doesn't have enough frames, report the error, so
+ * that one of the actions can be performed:
+ * 1. If need_wakeup is used, signal that the application has to kick
+ * the driver when it refills the Fill Ring.
+ * 2. Otherwise, busy poll by rescheduling the NAPI poll.
*/
if (unlikely(alloc_err == -ENOMEM && rq->umem))
return true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 5581a8045ede..30d26eba75a3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -61,7 +61,7 @@
struct mlx5_nic_flow_attr {
u32 action;
u32 flow_tag;
- u32 mod_hdr_id;
+ struct mlx5_modify_hdr *modify_hdr;
u32 hairpin_tirn;
u8 match_level;
struct mlx5_flow_table *hairpin_ft;
@@ -201,7 +201,7 @@ struct mlx5e_mod_hdr_entry {
struct mod_hdr_key key;
- u32 mod_hdr_id;
+ struct mlx5_modify_hdr *modify_hdr;
refcount_t refcnt;
struct completion res_ready;
@@ -334,7 +334,7 @@ static void mlx5e_mod_hdr_put(struct mlx5e_priv *priv,
WARN_ON(!list_empty(&mh->flows));
if (mh->compl_result > 0)
- mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id);
+ mlx5_modify_header_dealloc(priv->mdev, mh->modify_hdr);
kfree(mh);
}
@@ -395,11 +395,11 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key);
mutex_unlock(&tbl->lock);
- err = mlx5_modify_header_alloc(priv->mdev, namespace,
- mh->key.num_actions,
- mh->key.actions,
- &mh->mod_hdr_id);
- if (err) {
+ mh->modify_hdr = mlx5_modify_header_alloc(priv->mdev, namespace,
+ mh->key.num_actions,
+ mh->key.actions);
+ if (IS_ERR(mh->modify_hdr)) {
+ err = PTR_ERR(mh->modify_hdr);
mh->compl_result = err;
goto alloc_header_err;
}
@@ -412,9 +412,9 @@ attach_flow:
list_add(&flow->mod_hdr, &mh->flows);
spin_unlock(&mh->flows_lock);
if (mlx5e_is_eswitch_flow(flow))
- flow->esw_attr->mod_hdr_id = mh->mod_hdr_id;
+ flow->esw_attr->modify_hdr = mh->modify_hdr;
else
- flow->nic_attr->mod_hdr_id = mh->mod_hdr_id;
+ flow->nic_attr->modify_hdr = mh->modify_hdr;
return 0;
@@ -906,7 +906,6 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
struct mlx5_flow_destination dest[2] = {};
struct mlx5_flow_act flow_act = {
.action = attr->action,
- .reformat_id = 0,
.flags = FLOW_ACT_NO_APPEND,
};
struct mlx5_fc *counter = NULL;
@@ -947,7 +946,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
- flow_act.modify_id = attr->mod_hdr_id;
+ flow_act.modify_hdr = attr->modify_hdr;
kfree(parse_attr->mod_hdr_actions);
if (err)
return err;
@@ -1304,14 +1303,13 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow;
int err;
- err = mlx5_packet_reformat_alloc(priv->mdev,
- e->reformat_type,
- e->encap_size, e->encap_header,
- MLX5_FLOW_NAMESPACE_FDB,
- &e->encap_id);
- if (err) {
- mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n",
- err);
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ e->reformat_type,
+ e->encap_size, e->encap_header,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
+ PTR_ERR(e->pkt_reformat));
return;
}
e->flags |= MLX5_ENCAP_ENTRY_VALID;
@@ -1326,7 +1324,7 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
esw_attr = flow->esw_attr;
spec = &esw_attr->parse_attr->spec;
- esw_attr->dests[flow->tmp_efi_index].encap_id = e->encap_id;
+ esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat;
esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
/* Flow can be associated with multiple encap entries.
* Before offloading the flow verify that all of them have
@@ -1395,7 +1393,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
/* we know that the encap is valid */
e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
- mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
}
static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
@@ -1561,7 +1559,7 @@ static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entr
mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
if (e->flags & MLX5_ENCAP_ENTRY_VALID)
- mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
}
kfree(e->encap_header);
@@ -1896,7 +1894,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
*match_level = MLX5_MATCH_L2;
}
} else if (*match_level != MLX5_MATCH_NONE) {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1);
+ /* cvlan_tag enabled in match criteria and
+ * disabled in match value means both S & C tags
+ * don't exist (untagged of both)
+ */
MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
*match_level = MLX5_MATCH_L2;
}
@@ -3045,7 +3046,7 @@ attach_flow:
flow->encaps[out_index].index = out_index;
*encap_dev = e->out_dev;
if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
- attr->dests[out_index].encap_id = e->encap_id;
+ attr->dests[out_index].pkt_reformat = e->pkt_reformat;
attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
*encap_valid = true;
} else {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 49b06b256c92..257a7c9f7a14 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -33,6 +33,7 @@
#include <linux/irq.h>
#include "en.h"
#include "en/xdp.h"
+#include "en/xsk/rx.h"
#include "en/xsk/tx.h"
static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
@@ -81,6 +82,29 @@ void mlx5e_trigger_irq(struct mlx5e_icosq *sq)
mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl);
}
+static bool mlx5e_napi_xsk_post(struct mlx5e_xdpsq *xsksq, struct mlx5e_rq *xskrq)
+{
+ bool busy_xsk = false, xsk_rx_alloc_err;
+
+ /* Handle the race between the application querying need_wakeup and the
+ * driver setting it:
+ * 1. Update need_wakeup both before and after the TX. If it goes to
+ * "yes", it can only happen with the first update.
+ * 2. If the application queried need_wakeup before we set it, the
+ * packets will be transmitted anyway, even w/o a wakeup.
+ * 3. Give a chance to clear need_wakeup after new packets were queued
+ * for TX.
+ */
+ mlx5e_xsk_update_tx_wakeup(xsksq);
+ busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET);
+ mlx5e_xsk_update_tx_wakeup(xsksq);
+
+ xsk_rx_alloc_err = xskrq->post_wqes(xskrq);
+ busy_xsk |= mlx5e_xsk_update_rx_wakeup(xskrq, xsk_rx_alloc_err);
+
+ return busy_xsk;
+}
+
int mlx5e_napi_poll(struct napi_struct *napi, int budget)
{
struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
@@ -122,8 +146,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
if (xsk_open) {
mlx5e_poll_ico_cq(&c->xskicosq.cq);
busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq);
- busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET);
- busy_xsk |= xskrq->post_wqes(xskrq);
+ busy_xsk |= mlx5e_napi_xsk_post(xsksq, xskrq);
}
busy |= busy_xsk;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index aba9e7a6ad3c..6bd6f5895244 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -69,7 +69,7 @@ struct vport_ingress {
struct mlx5_flow_group *allow_spoofchk_only_grp;
struct mlx5_flow_group *allow_untagged_only_grp;
struct mlx5_flow_group *drop_grp;
- int modify_metadata_id;
+ struct mlx5_modify_hdr *modify_metadata;
struct mlx5_flow_handle *modify_metadata_rule;
struct mlx5_flow_handle *allow_rule;
struct mlx5_flow_handle *drop_rule;
@@ -153,6 +153,7 @@ struct mlx5_eswitch_fdb {
} legacy;
struct offloads_fdb {
+ struct mlx5_flow_namespace *ns;
struct mlx5_flow_table *slow_fdb;
struct mlx5_flow_group *send_to_vport_grp;
struct mlx5_flow_group *peer_miss_grp;
@@ -385,11 +386,11 @@ struct mlx5_esw_flow_attr {
struct {
u32 flags;
struct mlx5_eswitch_rep *rep;
+ struct mlx5_pkt_reformat *pkt_reformat;
struct mlx5_core_dev *mdev;
- u32 encap_id;
struct mlx5_termtbl_handle *termtbl;
} dests[MLX5_MAX_FLOW_FWD_VPORTS];
- u32 mod_hdr_id;
+ struct mlx5_modify_hdr *modify_hdr;
u8 inner_match_level;
u8 outer_match_level;
struct mlx5_fc *counter;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 7d3582ee66b7..afa623b15a38 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -190,10 +190,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
MLX5_FLOW_DEST_VPORT_VHCA_ID;
if (attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) {
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
- flow_act.reformat_id = attr->dests[j].encap_id;
+ flow_act.pkt_reformat = attr->dests[j].pkt_reformat;
dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
- dest[i].vport.reformat_id =
- attr->dests[j].encap_id;
+ dest[i].vport.pkt_reformat =
+ attr->dests[j].pkt_reformat;
}
i++;
}
@@ -213,7 +213,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
- flow_act.modify_id = attr->mod_hdr_id;
+ flow_act.modify_hdr = attr->modify_hdr;
fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!split);
if (IS_ERR(fdb)) {
@@ -276,7 +276,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
if (attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) {
dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
- dest[i].vport.reformat_id = attr->dests[i].encap_id;
+ dest[i].vport.pkt_reformat = attr->dests[i].pkt_reformat;
}
}
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
@@ -1068,6 +1068,13 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
err = -EOPNOTSUPP;
goto ns_err;
}
+ esw->fdb_table.offloads.ns = root_ns;
+ err = mlx5_flow_namespace_set_mode(root_ns,
+ esw->dev->priv.steering->mode);
+ if (err) {
+ esw_warn(dev, "Failed to set FDB namespace steering mode\n");
+ goto ns_err;
+ }
max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
MLX5_CAP_GEN(dev, max_flow_counter_15_0);
@@ -1207,6 +1214,8 @@ send_vport_err:
esw_destroy_offloads_fast_fdb_tables(esw);
mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
slow_fdb_err:
+ /* Holds true only as long as DMFS is the default */
+ mlx5_flow_namespace_set_mode(root_ns, MLX5_FLOW_STEERING_MODE_DMFS);
ns_err:
kvfree(flow_group_in);
return err;
@@ -1226,6 +1235,9 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
esw_destroy_offloads_fast_fdb_tables(esw);
+ /* Holds true only as long as DMFS is the default */
+ mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns,
+ MLX5_FLOW_STEERING_MODE_DMFS);
}
static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports)
@@ -1623,13 +1635,42 @@ static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
esw_del_fdb_peer_miss_rules(esw);
}
+static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch *peer_esw,
+ bool pair)
+{
+ struct mlx5_flow_root_namespace *peer_ns;
+ struct mlx5_flow_root_namespace *ns;
+ int err;
+
+ peer_ns = peer_esw->dev->priv.steering->fdb_root_ns;
+ ns = esw->dev->priv.steering->fdb_root_ns;
+
+ if (pair) {
+ err = mlx5_flow_namespace_set_peer(ns, peer_ns);
+ if (err)
+ return err;
+
+ mlx5_flow_namespace_set_peer(peer_ns, ns);
+ if (err) {
+ mlx5_flow_namespace_set_peer(ns, NULL);
+ return err;
+ }
+ } else {
+ mlx5_flow_namespace_set_peer(ns, NULL);
+ mlx5_flow_namespace_set_peer(peer_ns, NULL);
+ }
+
+ return 0;
+}
+
static int mlx5_esw_offloads_devcom_event(int event,
void *my_data,
void *event_data)
{
struct mlx5_eswitch *esw = my_data;
- struct mlx5_eswitch *peer_esw = event_data;
struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+ struct mlx5_eswitch *peer_esw = event_data;
int err;
switch (event) {
@@ -1638,9 +1679,12 @@ static int mlx5_esw_offloads_devcom_event(int event,
mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
break;
- err = mlx5_esw_offloads_pair(esw, peer_esw);
+ err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true);
if (err)
goto err_out;
+ err = mlx5_esw_offloads_pair(esw, peer_esw);
+ if (err)
+ goto err_peer;
err = mlx5_esw_offloads_pair(peer_esw, esw);
if (err)
@@ -1656,6 +1700,7 @@ static int mlx5_esw_offloads_devcom_event(int event,
mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false);
mlx5_esw_offloads_unpair(peer_esw);
mlx5_esw_offloads_unpair(esw);
+ mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
break;
}
@@ -1663,7 +1708,8 @@ static int mlx5_esw_offloads_devcom_event(int event,
err_pair:
mlx5_esw_offloads_unpair(esw);
-
+err_peer:
+ mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
err_out:
mlx5_core_err(esw->dev, "esw offloads devcom event failure, event %u err %d",
event, err);
@@ -1734,7 +1780,7 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
if (vport->ingress.modify_metadata_rule) {
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- flow_act.modify_id = vport->ingress.modify_metadata_id;
+ flow_act.modify_hdr = vport->ingress.modify_metadata;
}
vport->ingress.allow_rule =
@@ -1770,9 +1816,11 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
MLX5_SET(set_action_in, action, data,
mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport));
- err = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
- 1, action, &vport->ingress.modify_metadata_id);
- if (err) {
+ vport->ingress.modify_metadata =
+ mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ 1, action);
+ if (IS_ERR(vport->ingress.modify_metadata)) {
+ err = PTR_ERR(vport->ingress.modify_metadata);
esw_warn(esw->dev,
"failed to alloc modify header for vport %d ingress acl (%d)\n",
vport->vport, err);
@@ -1780,7 +1828,7 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
}
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
- flow_act.modify_id = vport->ingress.modify_metadata_id;
+ flow_act.modify_hdr = vport->ingress.modify_metadata;
vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl,
&spec, &flow_act, NULL, 0);
if (IS_ERR(vport->ingress.modify_metadata_rule)) {
@@ -1794,7 +1842,7 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
out:
if (err)
- mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id);
+ mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata);
return err;
}
@@ -1803,7 +1851,7 @@ void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
{
if (vport->ingress.modify_metadata_rule) {
mlx5_del_flow_rules(vport->ingress.modify_metadata_rule);
- mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id);
+ mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata);
vport->ingress.modify_metadata_rule = NULL;
}
@@ -2113,9 +2161,10 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
else
esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+ mlx5_rdma_enable_roce(esw->dev);
err = esw_offloads_steering_init(esw);
if (err)
- return err;
+ goto err_steering_init;
err = esw_set_passing_vport_metadata(esw, true);
if (err)
@@ -2130,8 +2179,6 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
esw_offloads_devcom_init(esw);
mutex_init(&esw->offloads.termtbl_mutex);
- mlx5_rdma_enable_roce(esw->dev);
-
return 0;
err_reps:
@@ -2139,6 +2186,8 @@ err_reps:
esw_set_passing_vport_metadata(esw, false);
err_vport_metadata:
esw_offloads_steering_cleanup(esw);
+err_steering_init:
+ mlx5_rdma_disable_roce(esw->dev);
return err;
}
@@ -2163,12 +2212,12 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
void esw_offloads_disable(struct mlx5_eswitch *esw)
{
- mlx5_rdma_disable_roce(esw->dev);
esw_offloads_devcom_cleanup(esw);
esw_offloads_unload_all_reps(esw);
mlx5_eswitch_disable_pf_vf_vports(esw);
esw_set_passing_vport_metadata(esw, false);
esw_offloads_steering_cleanup(esw);
+ mlx5_rdma_disable_roce(esw->dev);
esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 1e3381604b3d..579c306caa7b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -107,6 +107,50 @@ static int mlx5_cmd_stub_delete_fte(struct mlx5_flow_root_namespace *ns,
return 0;
}
+static int mlx5_cmd_stub_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
+ int reformat_type,
+ size_t size,
+ void *reformat_data,
+ enum mlx5_flow_namespace_type namespace,
+ struct mlx5_pkt_reformat *pkt_reformat)
+{
+ return 0;
+}
+
+static void mlx5_cmd_stub_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_pkt_reformat *pkt_reformat)
+{
+}
+
+static int mlx5_cmd_stub_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
+ u8 namespace, u8 num_actions,
+ void *modify_actions,
+ struct mlx5_modify_hdr *modify_hdr)
+{
+ return 0;
+}
+
+static void mlx5_cmd_stub_modify_header_dealloc(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_modify_hdr *modify_hdr)
+{
+}
+
+static int mlx5_cmd_stub_set_peer(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_root_namespace *peer_ns)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_create_ns(struct mlx5_flow_root_namespace *ns)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns)
+{
+ return 0;
+}
+
static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft, u32 underlay_qpn,
bool disconnect)
@@ -412,11 +456,13 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
} else {
MLX5_SET(flow_context, in_flow_context, action,
fte->action.action);
- MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
- fte->action.reformat_id);
+ if (fte->action.pkt_reformat)
+ MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
+ fte->action.pkt_reformat->id);
}
- MLX5_SET(flow_context, in_flow_context, modify_header_id,
- fte->action.modify_id);
+ if (fte->action.modify_hdr)
+ MLX5_SET(flow_context, in_flow_context, modify_header_id,
+ fte->action.modify_hdr->id);
vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan);
@@ -468,7 +514,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_FLOW_DEST_VPORT_REFORMAT_ID));
MLX5_SET(extended_dest_format, in_dests,
packet_reformat_id,
- dst->dest_attr.vport.reformat_id);
+ dst->dest_attr.vport.pkt_reformat->id);
}
break;
default:
@@ -643,14 +689,15 @@ int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len,
return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
}
-int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
- int reformat_type,
- size_t size,
- void *reformat_data,
- enum mlx5_flow_namespace_type namespace,
- u32 *packet_reformat_id)
+static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
+ int reformat_type,
+ size_t size,
+ void *reformat_data,
+ enum mlx5_flow_namespace_type namespace,
+ struct mlx5_pkt_reformat *pkt_reformat)
{
u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)];
+ struct mlx5_core_dev *dev = ns->dev;
void *packet_reformat_context_in;
int max_encap_size;
void *reformat;
@@ -693,35 +740,36 @@ int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
memset(out, 0, sizeof(out));
err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
- *packet_reformat_id = MLX5_GET(alloc_packet_reformat_context_out,
- out, packet_reformat_id);
+ pkt_reformat->id = MLX5_GET(alloc_packet_reformat_context_out,
+ out, packet_reformat_id);
kfree(in);
return err;
}
-EXPORT_SYMBOL(mlx5_packet_reformat_alloc);
-void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
- u32 packet_reformat_id)
+static void mlx5_cmd_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_pkt_reformat *pkt_reformat)
{
u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)];
u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)];
+ struct mlx5_core_dev *dev = ns->dev;
memset(in, 0, sizeof(in));
MLX5_SET(dealloc_packet_reformat_context_in, in, opcode,
MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id,
- packet_reformat_id);
+ pkt_reformat->id);
mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
-EXPORT_SYMBOL(mlx5_packet_reformat_dealloc);
-int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
- u8 namespace, u8 num_actions,
- void *modify_actions, u32 *modify_header_id)
+static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
+ u8 namespace, u8 num_actions,
+ void *modify_actions,
+ struct mlx5_modify_hdr *modify_hdr)
{
u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)];
int max_actions, actions_size, inlen, err;
+ struct mlx5_core_dev *dev = ns->dev;
void *actions_in;
u8 table_type;
u32 *in;
@@ -772,26 +820,26 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
memset(out, 0, sizeof(out));
err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
- *modify_header_id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id);
+ modify_hdr->id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id);
kfree(in);
return err;
}
-EXPORT_SYMBOL(mlx5_modify_header_alloc);
-void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id)
+static void mlx5_cmd_modify_header_dealloc(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_modify_hdr *modify_hdr)
{
u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)];
u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)];
+ struct mlx5_core_dev *dev = ns->dev;
memset(in, 0, sizeof(in));
MLX5_SET(dealloc_modify_header_context_in, in, opcode,
MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id,
- modify_header_id);
+ modify_hdr->id);
mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
-EXPORT_SYMBOL(mlx5_modify_header_dealloc);
static const struct mlx5_flow_cmds mlx5_flow_cmds = {
.create_flow_table = mlx5_cmd_create_flow_table,
@@ -803,6 +851,13 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds = {
.update_fte = mlx5_cmd_update_fte,
.delete_fte = mlx5_cmd_delete_fte,
.update_root_ft = mlx5_cmd_update_root_ft,
+ .packet_reformat_alloc = mlx5_cmd_packet_reformat_alloc,
+ .packet_reformat_dealloc = mlx5_cmd_packet_reformat_dealloc,
+ .modify_header_alloc = mlx5_cmd_modify_header_alloc,
+ .modify_header_dealloc = mlx5_cmd_modify_header_dealloc,
+ .set_peer = mlx5_cmd_stub_set_peer,
+ .create_ns = mlx5_cmd_stub_create_ns,
+ .destroy_ns = mlx5_cmd_stub_destroy_ns,
};
static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
@@ -815,9 +870,16 @@ static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
.update_fte = mlx5_cmd_stub_update_fte,
.delete_fte = mlx5_cmd_stub_delete_fte,
.update_root_ft = mlx5_cmd_stub_update_root_ft,
+ .packet_reformat_alloc = mlx5_cmd_stub_packet_reformat_alloc,
+ .packet_reformat_dealloc = mlx5_cmd_stub_packet_reformat_dealloc,
+ .modify_header_alloc = mlx5_cmd_stub_modify_header_alloc,
+ .modify_header_dealloc = mlx5_cmd_stub_modify_header_dealloc,
+ .set_peer = mlx5_cmd_stub_set_peer,
+ .create_ns = mlx5_cmd_stub_create_ns,
+ .destroy_ns = mlx5_cmd_stub_destroy_ns,
};
-static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void)
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void)
{
return &mlx5_flow_cmds;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index bc4606306009..d62de642eca9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -75,6 +75,30 @@ struct mlx5_flow_cmds {
struct mlx5_flow_table *ft,
u32 underlay_qpn,
bool disconnect);
+
+ int (*packet_reformat_alloc)(struct mlx5_flow_root_namespace *ns,
+ int reformat_type,
+ size_t size,
+ void *reformat_data,
+ enum mlx5_flow_namespace_type namespace,
+ struct mlx5_pkt_reformat *pkt_reformat);
+
+ void (*packet_reformat_dealloc)(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_pkt_reformat *pkt_reformat);
+
+ int (*modify_header_alloc)(struct mlx5_flow_root_namespace *ns,
+ u8 namespace, u8 num_actions,
+ void *modify_actions,
+ struct mlx5_modify_hdr *modify_hdr);
+
+ void (*modify_header_dealloc)(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_modify_hdr *modify_hdr);
+
+ int (*set_peer)(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_root_namespace *peer_ns);
+
+ int (*create_ns)(struct mlx5_flow_root_namespace *ns);
+ int (*destroy_ns)(struct mlx5_flow_root_namespace *ns);
};
int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
@@ -90,5 +114,6 @@ int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len,
u32 *out);
const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type);
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 7bdec442f0ac..3bbb49354829 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1415,7 +1415,8 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ?
(d1->vport.vhca_id == d2->vport.vhca_id) : true) &&
((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ?
- (d1->vport.reformat_id == d2->vport.reformat_id) : true)) ||
+ (d1->vport.pkt_reformat->id ==
+ d2->vport.pkt_reformat->id) : true)) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
d1->ft == d2->ft) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
@@ -2888,3 +2889,160 @@ out:
return err;
}
EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn);
+
+static struct mlx5_flow_root_namespace
+*get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type ns_type)
+{
+ struct mlx5_flow_namespace *ns;
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_ESW_EGRESS ||
+ ns_type == MLX5_FLOW_NAMESPACE_ESW_INGRESS)
+ ns = mlx5_get_flow_vport_acl_namespace(dev, ns_type, 0);
+ else
+ ns = mlx5_get_flow_namespace(dev, ns_type);
+ if (!ns)
+ return NULL;
+
+ return find_root(&ns->node);
+}
+
+struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
+ u8 ns_type, u8 num_actions,
+ void *modify_actions)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_modify_hdr *modify_hdr;
+ int err;
+
+ root = get_root_namespace(dev, ns_type);
+ if (!root)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ modify_hdr = kzalloc(sizeof(*modify_hdr), GFP_KERNEL);
+ if (!modify_hdr)
+ return ERR_PTR(-ENOMEM);
+
+ modify_hdr->ns_type = ns_type;
+ err = root->cmds->modify_header_alloc(root, ns_type, num_actions,
+ modify_actions, modify_hdr);
+ if (err) {
+ kfree(modify_hdr);
+ return ERR_PTR(err);
+ }
+
+ return modify_hdr;
+}
+EXPORT_SYMBOL(mlx5_modify_header_alloc);
+
+void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
+ struct mlx5_modify_hdr *modify_hdr)
+{
+ struct mlx5_flow_root_namespace *root;
+
+ root = get_root_namespace(dev, modify_hdr->ns_type);
+ if (WARN_ON(!root))
+ return;
+ root->cmds->modify_header_dealloc(root, modify_hdr);
+ kfree(modify_hdr);
+}
+EXPORT_SYMBOL(mlx5_modify_header_dealloc);
+
+struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
+ int reformat_type,
+ size_t size,
+ void *reformat_data,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ struct mlx5_pkt_reformat *pkt_reformat;
+ struct mlx5_flow_root_namespace *root;
+ int err;
+
+ root = get_root_namespace(dev, ns_type);
+ if (!root)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ pkt_reformat = kzalloc(sizeof(*pkt_reformat), GFP_KERNEL);
+ if (!pkt_reformat)
+ return ERR_PTR(-ENOMEM);
+
+ pkt_reformat->ns_type = ns_type;
+ pkt_reformat->reformat_type = reformat_type;
+ err = root->cmds->packet_reformat_alloc(root, reformat_type, size,
+ reformat_data, ns_type,
+ pkt_reformat);
+ if (err) {
+ kfree(pkt_reformat);
+ return ERR_PTR(err);
+ }
+
+ return pkt_reformat;
+}
+EXPORT_SYMBOL(mlx5_packet_reformat_alloc);
+
+void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
+ struct mlx5_pkt_reformat *pkt_reformat)
+{
+ struct mlx5_flow_root_namespace *root;
+
+ root = get_root_namespace(dev, pkt_reformat->ns_type);
+ if (WARN_ON(!root))
+ return;
+ root->cmds->packet_reformat_dealloc(root, pkt_reformat);
+ kfree(pkt_reformat);
+}
+EXPORT_SYMBOL(mlx5_packet_reformat_dealloc);
+
+int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_root_namespace *peer_ns)
+{
+ if (peer_ns && ns->mode != peer_ns->mode) {
+ mlx5_core_err(ns->dev,
+ "Can't peer namespace of different steering mode\n");
+ return -EINVAL;
+ }
+
+ return ns->cmds->set_peer(ns, peer_ns);
+}
+
+/* This function should be called only at init stage of the namespace.
+ * It is not safe to call this function while steering operations
+ * are executed in the namespace.
+ */
+int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
+ enum mlx5_flow_steering_mode mode)
+{
+ struct mlx5_flow_root_namespace *root;
+ const struct mlx5_flow_cmds *cmds;
+ int err;
+
+ root = find_root(&ns->node);
+ if (&root->ns != ns)
+ /* Can't set cmds to non root namespace */
+ return -EINVAL;
+
+ if (root->table_type != FS_FT_FDB)
+ return -EOPNOTSUPP;
+
+ if (root->mode == mode)
+ return 0;
+
+ if (mode == MLX5_FLOW_STEERING_MODE_SMFS)
+ cmds = mlx5_fs_cmd_get_dr_cmds();
+ else
+ cmds = mlx5_fs_cmd_get_fw_cmds();
+ if (!cmds)
+ return -EOPNOTSUPP;
+
+ err = cmds->create_ns(root);
+ if (err) {
+ mlx5_core_err(root->dev, "Failed to create flow namespace (%d)\n",
+ err);
+ return err;
+ }
+
+ root->cmds->destroy_ns(root);
+ root->cmds = cmds;
+ root->mode = mode;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 0d16b4b5ab83..00717eba2256 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -37,6 +37,24 @@
#include <linux/mlx5/fs.h>
#include <linux/rhashtable.h>
#include <linux/llist.h>
+#include <steering/fs_dr.h>
+
+struct mlx5_modify_hdr {
+ enum mlx5_flow_namespace_type ns_type;
+ union {
+ struct mlx5_fs_dr_action action;
+ u32 id;
+ };
+};
+
+struct mlx5_pkt_reformat {
+ enum mlx5_flow_namespace_type ns_type;
+ int reformat_type; /* from mlx5_ifc */
+ union {
+ struct mlx5_fs_dr_action action;
+ u32 id;
+ };
+};
/* FS_TYPE_PRIO_CHAINS is a PRIO that will have namespaces only,
* and those are in parallel to one another when going over them to connect
@@ -80,9 +98,15 @@ enum fs_fte_status {
FS_FTE_STATUS_EXISTING = 1UL << 0,
};
+enum mlx5_flow_steering_mode {
+ MLX5_FLOW_STEERING_MODE_DMFS,
+ MLX5_FLOW_STEERING_MODE_SMFS
+};
+
struct mlx5_flow_steering {
struct mlx5_core_dev *dev;
- struct kmem_cache *fgs_cache;
+ enum mlx5_flow_steering_mode mode;
+ struct kmem_cache *fgs_cache;
struct kmem_cache *ftes_cache;
struct mlx5_flow_root_namespace *root_ns;
struct mlx5_flow_root_namespace *fdb_root_ns;
@@ -128,6 +152,7 @@ struct mlx5_flow_handle {
/* Type of children is mlx5_flow_group */
struct mlx5_flow_table {
struct fs_node node;
+ struct mlx5_fs_dr_table fs_dr_table;
u32 id;
u16 vport;
unsigned int max_fte;
@@ -168,6 +193,7 @@ struct mlx5_ft_underlay_qp {
/* Type of children is mlx5_flow_rule */
struct fs_fte {
struct fs_node node;
+ struct mlx5_fs_dr_rule fs_dr_rule;
u32 val[MLX5_ST_SZ_DW_MATCH_PARAM];
u32 dests_size;
u32 index;
@@ -203,6 +229,7 @@ struct mlx5_flow_group_mask {
/* Type of children is fs_fte */
struct mlx5_flow_group {
struct fs_node node;
+ struct mlx5_fs_dr_matcher fs_dr_matcher;
struct mlx5_flow_group_mask mask;
u32 start_index;
u32 max_ftes;
@@ -214,6 +241,8 @@ struct mlx5_flow_group {
struct mlx5_flow_root_namespace {
struct mlx5_flow_namespace ns;
+ enum mlx5_flow_steering_mode mode;
+ struct mlx5_fs_dr_domain fs_dr_domain;
enum fs_flow_table_type table_type;
struct mlx5_core_dev *dev;
struct mlx5_flow_table *root_ft;
@@ -231,6 +260,14 @@ void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev,
void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev,
unsigned long interval);
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void);
+
+int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_root_namespace *peer_ns);
+
+int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
+ enum mlx5_flow_steering_mode mode);
+
int mlx5_init_fs(struct mlx5_core_dev *dev);
void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
new file mode 100644
index 000000000000..e065c2f68f5a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
+
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+
+struct mlx5_dm {
+ /* protect access to icm bitmask */
+ spinlock_t lock;
+ unsigned long *steering_sw_icm_alloc_blocks;
+ unsigned long *header_modify_sw_icm_alloc_blocks;
+};
+
+struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev)
+{
+ u64 header_modify_icm_blocks = 0;
+ u64 steering_icm_blocks = 0;
+ struct mlx5_dm *dm;
+
+ if (!(MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM))
+ return 0;
+
+ dm = kzalloc(sizeof(*dm), GFP_KERNEL);
+ if (!dm)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_init(&dm->lock);
+
+ if (MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address)) {
+ steering_icm_blocks =
+ BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) -
+ MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+
+ dm->steering_sw_icm_alloc_blocks =
+ kcalloc(BITS_TO_LONGS(steering_icm_blocks),
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!dm->steering_sw_icm_alloc_blocks)
+ goto err_steering;
+ }
+
+ if (MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address)) {
+ header_modify_icm_blocks =
+ BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_sw_icm_size) -
+ MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+
+ dm->header_modify_sw_icm_alloc_blocks =
+ kcalloc(BITS_TO_LONGS(header_modify_icm_blocks),
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!dm->header_modify_sw_icm_alloc_blocks)
+ goto err_modify_hdr;
+ }
+
+ return dm;
+
+err_modify_hdr:
+ kfree(dm->steering_sw_icm_alloc_blocks);
+
+err_steering:
+ kfree(dm);
+
+ return ERR_PTR(-ENOMEM);
+}
+
+void mlx5_dm_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_dm *dm = dev->dm;
+
+ if (!dev->dm)
+ return;
+
+ if (dm->steering_sw_icm_alloc_blocks) {
+ WARN_ON(!bitmap_empty(dm->steering_sw_icm_alloc_blocks,
+ BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) -
+ MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))));
+ kfree(dm->steering_sw_icm_alloc_blocks);
+ }
+
+ if (dm->header_modify_sw_icm_alloc_blocks) {
+ WARN_ON(!bitmap_empty(dm->header_modify_sw_icm_alloc_blocks,
+ BIT(MLX5_CAP_DEV_MEM(dev,
+ log_header_modify_sw_icm_size) -
+ MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))));
+ kfree(dm->header_modify_sw_icm_alloc_blocks);
+ }
+
+ kfree(dm);
+}
+
+int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
+ u64 length, u16 uid, phys_addr_t *addr, u32 *obj_id)
+{
+ u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+ u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {};
+ struct mlx5_dm *dm = dev->dm;
+ unsigned long *block_map;
+ u64 icm_start_addr;
+ u32 log_icm_size;
+ u32 max_blocks;
+ u64 block_idx;
+ void *sw_icm;
+ int ret;
+
+ if (!dev->dm)
+ return -EOPNOTSUPP;
+
+ if (!length || (length & (length - 1)) ||
+ length & (MLX5_SW_ICM_BLOCK_SIZE(dev) - 1))
+ return -EINVAL;
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
+ MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
+
+ switch (type) {
+ case MLX5_SW_ICM_TYPE_STEERING:
+ icm_start_addr = MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address);
+ log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size);
+ block_map = dm->steering_sw_icm_alloc_blocks;
+ break;
+ case MLX5_SW_ICM_TYPE_HEADER_MODIFY:
+ icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address);
+ log_icm_size = MLX5_CAP_DEV_MEM(dev,
+ log_header_modify_sw_icm_size);
+ block_map = dm->header_modify_sw_icm_alloc_blocks;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (!block_map)
+ return -EOPNOTSUPP;
+
+ max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+ spin_lock(&dm->lock);
+ block_idx = bitmap_find_next_zero_area(block_map,
+ max_blocks,
+ 0,
+ num_blocks, 0);
+
+ if (block_idx < max_blocks)
+ bitmap_set(block_map,
+ block_idx, num_blocks);
+
+ spin_unlock(&dm->lock);
+
+ if (block_idx >= max_blocks)
+ return -ENOMEM;
+
+ sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm);
+ icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+ MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr,
+ icm_start_addr);
+ MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length));
+
+ ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (ret) {
+ spin_lock(&dm->lock);
+ bitmap_clear(block_map,
+ block_idx, num_blocks);
+ spin_unlock(&dm->lock);
+
+ return ret;
+ }
+
+ *addr = icm_start_addr;
+ *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_dm_sw_icm_alloc);
+
+int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
+ u64 length, u16 uid, phys_addr_t addr, u32 obj_id)
+{
+ u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ struct mlx5_dm *dm = dev->dm;
+ unsigned long *block_map;
+ u64 icm_start_addr;
+ u64 start_idx;
+ int err;
+
+ if (!dev->dm)
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case MLX5_SW_ICM_TYPE_STEERING:
+ icm_start_addr = MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address);
+ block_map = dm->steering_sw_icm_alloc_blocks;
+ break;
+ case MLX5_SW_ICM_TYPE_HEADER_MODIFY:
+ icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address);
+ block_map = dm->header_modify_sw_icm_alloc_blocks;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+ MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
+ MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ start_idx = (addr - icm_start_addr) >> MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+ spin_lock(&dm->lock);
+ bitmap_clear(block_map,
+ start_idx, num_blocks);
+ spin_unlock(&dm->lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_dm_sw_icm_dealloc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index dee1a8658c87..9648c2297803 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -876,6 +876,10 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
goto err_eswitch_cleanup;
}
+ dev->dm = mlx5_dm_create(dev);
+ if (IS_ERR(dev->dm))
+ mlx5_core_warn(dev, "Failed to init device memory%d\n", err);
+
dev->tracer = mlx5_fw_tracer_create(dev);
dev->hv_vhca = mlx5_hv_vhca_create(dev);
@@ -910,6 +914,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
{
mlx5_hv_vhca_destroy(dev->hv_vhca);
mlx5_fw_tracer_destroy(dev->tracer);
+ mlx5_dm_cleanup(dev);
mlx5_fpga_cleanup(dev);
mlx5_eswitch_cleanup(dev->priv.eswitch);
mlx5_sriov_cleanup(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 87b75b2207c4..b100489dc85c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -198,6 +198,9 @@ int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size);
int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode);
int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode);
+struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev);
+void mlx5_dm_cleanup(struct mlx5_core_dev *dev);
+
#define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) && \
MLX5_CAP_GEN((mdev), pps_modify) && \
MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) && \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
index 18af6981e0be..0fc7de4aa572 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
@@ -14,9 +14,6 @@ static void mlx5_rdma_disable_roce_steering(struct mlx5_core_dev *dev)
{
struct mlx5_core_roce *roce = &dev->priv.roce;
- if (!roce->ft)
- return;
-
mlx5_del_flow_rules(roce->allow_rule);
mlx5_destroy_flow_group(roce->fg);
mlx5_destroy_flow_table(roce->ft);
@@ -145,6 +142,11 @@ static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev)
void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev)
{
+ struct mlx5_core_roce *roce = &dev->priv.roce;
+
+ if (!roce->ft)
+ return;
+
mlx5_rdma_disable_roce_steering(dev);
mlx5_rdma_del_roce_addr(dev);
mlx5_nic_vport_disable_roce(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile
new file mode 100644
index 000000000000..c78512eed8d7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
new file mode 100644
index 000000000000..a02f87f85c17
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -0,0 +1,1588 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "dr_types.h"
+
+enum dr_action_domain {
+ DR_ACTION_DOMAIN_NIC_INGRESS,
+ DR_ACTION_DOMAIN_NIC_EGRESS,
+ DR_ACTION_DOMAIN_FDB_INGRESS,
+ DR_ACTION_DOMAIN_FDB_EGRESS,
+ DR_ACTION_DOMAIN_MAX,
+};
+
+enum dr_action_valid_state {
+ DR_ACTION_STATE_ERR,
+ DR_ACTION_STATE_NO_ACTION,
+ DR_ACTION_STATE_REFORMAT,
+ DR_ACTION_STATE_MODIFY_HDR,
+ DR_ACTION_STATE_MODIFY_VLAN,
+ DR_ACTION_STATE_NON_TERM,
+ DR_ACTION_STATE_TERM,
+ DR_ACTION_STATE_MAX,
+};
+
+static const enum dr_action_valid_state
+next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] = {
+ [DR_ACTION_DOMAIN_NIC_INGRESS] = {
+ [DR_ACTION_STATE_NO_ACTION] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_NON_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
+ [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ },
+ [DR_ACTION_STATE_REFORMAT] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ },
+ [DR_ACTION_STATE_MODIFY_HDR] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR,
+ },
+ [DR_ACTION_STATE_MODIFY_VLAN] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ },
+ [DR_ACTION_STATE_NON_TERM] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_NON_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
+ [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ },
+ [DR_ACTION_STATE_TERM] = {
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM,
+ },
+ },
+ [DR_ACTION_DOMAIN_NIC_EGRESS] = {
+ [DR_ACTION_STATE_NO_ACTION] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ },
+ [DR_ACTION_STATE_REFORMAT] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_REFORMAT,
+ },
+ [DR_ACTION_STATE_MODIFY_HDR] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ },
+ [DR_ACTION_STATE_MODIFY_VLAN] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT,
+ },
+ [DR_ACTION_STATE_NON_TERM] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ },
+ [DR_ACTION_STATE_TERM] = {
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM,
+ },
+ },
+ [DR_ACTION_DOMAIN_FDB_INGRESS] = {
+ [DR_ACTION_STATE_NO_ACTION] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
+ [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ },
+ [DR_ACTION_STATE_REFORMAT] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ },
+ [DR_ACTION_STATE_MODIFY_HDR] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ },
+ [DR_ACTION_STATE_MODIFY_VLAN] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ },
+ [DR_ACTION_STATE_NON_TERM] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
+ [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ },
+ [DR_ACTION_STATE_TERM] = {
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM,
+ },
+ },
+ [DR_ACTION_DOMAIN_FDB_EGRESS] = {
+ [DR_ACTION_STATE_NO_ACTION] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ },
+ [DR_ACTION_STATE_REFORMAT] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ },
+ [DR_ACTION_STATE_MODIFY_HDR] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ },
+ [DR_ACTION_STATE_MODIFY_VLAN] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ },
+ [DR_ACTION_STATE_NON_TERM] = {
+ [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM,
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM,
+ [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR,
+ [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT,
+ [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN,
+ [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM,
+ },
+ [DR_ACTION_STATE_TERM] = {
+ [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM,
+ },
+ },
+};
+
+struct dr_action_modify_field_conv {
+ u16 hw_field;
+ u8 start;
+ u8 end;
+ u8 l3_type;
+ u8 l4_type;
+};
+
+static const struct dr_action_modify_field_conv dr_action_conv_arr[] = {
+ [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_1, .start = 16, .end = 47,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_1, .start = 0, .end = 15,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_2, .start = 32, .end = 47,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_0, .start = 16, .end = 47,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_0, .start = 0, .end = 15,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_1, .start = 0, .end = 5,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 48, .end = 56,
+ .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 0, .end = 15,
+ .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 16, .end = 31,
+ .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_1, .start = 8, .end = 15,
+ .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_1, .start = 8, .end = 15,
+ .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 0, .end = 15,
+ .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_UDP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 16, .end = 31,
+ .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_UDP,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_3, .start = 32, .end = 63,
+ .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_3, .start = 0, .end = 31,
+ .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_4, .start = 32, .end = 63,
+ .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_4, .start = 0, .end = 31,
+ .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 32, .end = 63,
+ .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 0, .end = 31,
+ .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_2, .start = 32, .end = 63,
+ .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_2, .start = 0, .end = 31,
+ .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 0, .end = 31,
+ .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 32, .end = 63,
+ .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_METADATA, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_METADATA, .start = 32, .end = 63,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_0, .start = 32, .end = 63,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_0, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_1, .start = 32, .end = 63,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_2, .start = 32, .end = 63,
+ },
+ [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_2, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_1, .start = 32, .end = 63,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_1, .start = 0, .end = 31,
+ },
+ [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = {
+ .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_2, .start = 0, .end = 15,
+ },
+};
+
+#define MAX_VLANS 2
+struct dr_action_vlan_info {
+ int count;
+ u32 headers[MAX_VLANS];
+};
+
+struct dr_action_apply_attr {
+ u32 modify_index;
+ u16 modify_actions;
+ u32 decap_index;
+ u16 decap_actions;
+ u8 decap_with_vlan:1;
+ u64 final_icm_addr;
+ u32 flow_tag;
+ u32 ctr_id;
+ u16 gvmi;
+ u16 hit_gvmi;
+ u32 reformat_id;
+ u32 reformat_size;
+ struct dr_action_vlan_info vlans;
+};
+
+static int
+dr_action_reformat_to_action_type(enum mlx5dr_action_reformat_type reformat_type,
+ enum mlx5dr_action_type *action_type)
+{
+ switch (reformat_type) {
+ case DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2:
+ *action_type = DR_ACTION_TYP_TNL_L2_TO_L2;
+ break;
+ case DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2:
+ *action_type = DR_ACTION_TYP_L2_TO_TNL_L2;
+ break;
+ case DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2:
+ *action_type = DR_ACTION_TYP_TNL_L3_TO_L2;
+ break;
+ case DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3:
+ *action_type = DR_ACTION_TYP_L2_TO_TNL_L3;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void dr_actions_init_next_ste(u8 **last_ste,
+ u32 *added_stes,
+ enum mlx5dr_ste_entry_type entry_type,
+ u16 gvmi)
+{
+ (*added_stes)++;
+ *last_ste += DR_STE_SIZE;
+ mlx5dr_ste_init(*last_ste, MLX5DR_STE_LU_TYPE_DONT_CARE, entry_type, gvmi);
+}
+
+static void dr_actions_apply_tx(struct mlx5dr_domain *dmn,
+ u8 *action_type_set,
+ u8 *last_ste,
+ struct dr_action_apply_attr *attr,
+ u32 *added_stes)
+{
+ bool encap = action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2] ||
+ action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3];
+
+ /* We want to make sure the modify header comes before L2
+ * encapsulation. The reason for that is that we support
+ * modify headers for outer headers only
+ */
+ if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
+ mlx5dr_ste_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT);
+ mlx5dr_ste_set_rewrite_actions(last_ste,
+ attr->modify_actions,
+ attr->modify_index);
+ }
+
+ if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) {
+ int i;
+
+ for (i = 0; i < attr->vlans.count; i++) {
+ if (i || action_type_set[DR_ACTION_TYP_MODIFY_HDR])
+ dr_actions_init_next_ste(&last_ste,
+ added_stes,
+ MLX5DR_STE_TYPE_TX,
+ attr->gvmi);
+
+ mlx5dr_ste_set_tx_push_vlan(last_ste,
+ attr->vlans.headers[i],
+ encap);
+ }
+ }
+
+ if (encap) {
+ /* Modify header and encapsulation require a different STEs.
+ * Since modify header STE format doesn't support encapsulation
+ * tunneling_action.
+ */
+ if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] ||
+ action_type_set[DR_ACTION_TYP_PUSH_VLAN])
+ dr_actions_init_next_ste(&last_ste,
+ added_stes,
+ MLX5DR_STE_TYPE_TX,
+ attr->gvmi);
+
+ mlx5dr_ste_set_tx_encap(last_ste,
+ attr->reformat_id,
+ attr->reformat_size,
+ action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]);
+ /* Whenever prio_tag_required enabled, we can be sure that the
+ * previous table (ACL) already push vlan to our packet,
+ * And due to HW limitation we need to set this bit, otherwise
+ * push vlan + reformat will not work.
+ */
+ if (MLX5_CAP_GEN(dmn->mdev, prio_tag_required))
+ mlx5dr_ste_set_go_back_bit(last_ste);
+ }
+
+ if (action_type_set[DR_ACTION_TYP_CTR])
+ mlx5dr_ste_set_counter_id(last_ste, attr->ctr_id);
+}
+
+static void dr_actions_apply_rx(u8 *action_type_set,
+ u8 *last_ste,
+ struct dr_action_apply_attr *attr,
+ u32 *added_stes)
+{
+ if (action_type_set[DR_ACTION_TYP_CTR])
+ mlx5dr_ste_set_counter_id(last_ste, attr->ctr_id);
+
+ if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) {
+ mlx5dr_ste_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT);
+ mlx5dr_ste_set_rx_decap_l3(last_ste, attr->decap_with_vlan);
+ mlx5dr_ste_set_rewrite_actions(last_ste,
+ attr->decap_actions,
+ attr->decap_index);
+ }
+
+ if (action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2])
+ mlx5dr_ste_set_rx_decap(last_ste);
+
+ if (action_type_set[DR_ACTION_TYP_POP_VLAN]) {
+ int i;
+
+ for (i = 0; i < attr->vlans.count; i++) {
+ if (i ||
+ action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2] ||
+ action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2])
+ dr_actions_init_next_ste(&last_ste,
+ added_stes,
+ MLX5DR_STE_TYPE_RX,
+ attr->gvmi);
+
+ mlx5dr_ste_set_rx_pop_vlan(last_ste);
+ }
+ }
+
+ if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
+ if (mlx5dr_ste_get_entry_type(last_ste) == MLX5DR_STE_TYPE_MODIFY_PKT)
+ dr_actions_init_next_ste(&last_ste,
+ added_stes,
+ MLX5DR_STE_TYPE_MODIFY_PKT,
+ attr->gvmi);
+ else
+ mlx5dr_ste_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT);
+
+ mlx5dr_ste_set_rewrite_actions(last_ste,
+ attr->modify_actions,
+ attr->modify_index);
+ }
+
+ if (action_type_set[DR_ACTION_TYP_TAG]) {
+ if (mlx5dr_ste_get_entry_type(last_ste) == MLX5DR_STE_TYPE_MODIFY_PKT)
+ dr_actions_init_next_ste(&last_ste,
+ added_stes,
+ MLX5DR_STE_TYPE_RX,
+ attr->gvmi);
+
+ mlx5dr_ste_rx_set_flow_tag(last_ste, attr->flow_tag);
+ }
+}
+
+/* Apply the actions on the rule STE array starting from the last_ste.
+ * Actions might require more than one STE, new_num_stes will return
+ * the new size of the STEs array, rule with actions.
+ */
+static void dr_actions_apply(struct mlx5dr_domain *dmn,
+ enum mlx5dr_ste_entry_type ste_type,
+ u8 *action_type_set,
+ u8 *last_ste,
+ struct dr_action_apply_attr *attr,
+ u32 *new_num_stes)
+{
+ u32 added_stes = 0;
+
+ if (ste_type == MLX5DR_STE_TYPE_RX)
+ dr_actions_apply_rx(action_type_set, last_ste, attr, &added_stes);
+ else
+ dr_actions_apply_tx(dmn, action_type_set, last_ste, attr, &added_stes);
+
+ last_ste += added_stes * DR_STE_SIZE;
+ *new_num_stes += added_stes;
+
+ mlx5dr_ste_set_hit_gvmi(last_ste, attr->hit_gvmi);
+ mlx5dr_ste_set_hit_addr(last_ste, attr->final_icm_addr, 1);
+}
+
+static enum dr_action_domain
+dr_action_get_action_domain(enum mlx5dr_domain_type domain,
+ enum mlx5dr_ste_entry_type ste_type)
+{
+ switch (domain) {
+ case MLX5DR_DOMAIN_TYPE_NIC_RX:
+ return DR_ACTION_DOMAIN_NIC_INGRESS;
+ case MLX5DR_DOMAIN_TYPE_NIC_TX:
+ return DR_ACTION_DOMAIN_NIC_EGRESS;
+ case MLX5DR_DOMAIN_TYPE_FDB:
+ if (ste_type == MLX5DR_STE_TYPE_RX)
+ return DR_ACTION_DOMAIN_FDB_INGRESS;
+ return DR_ACTION_DOMAIN_FDB_EGRESS;
+ default:
+ WARN_ON(true);
+ return DR_ACTION_DOMAIN_MAX;
+ }
+}
+
+static
+int dr_action_validate_and_get_next_state(enum dr_action_domain action_domain,
+ u32 action_type,
+ u32 *state)
+{
+ u32 cur_state = *state;
+
+ /* Check action state machine is valid */
+ *state = next_action_state[action_domain][cur_state][action_type];
+
+ if (*state == DR_ACTION_STATE_ERR)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn,
+ struct mlx5dr_action *dest_action,
+ u64 *final_icm_addr)
+{
+ int ret;
+
+ switch (dest_action->action_type) {
+ case DR_ACTION_TYP_FT:
+ /* Allow destination flow table only if table is a terminating
+ * table, since there is an *assumption* that in such case FW
+ * will recalculate the CS.
+ */
+ if (dest_action->dest_tbl.is_fw_tbl) {
+ *final_icm_addr = dest_action->dest_tbl.fw_tbl.rx_icm_addr;
+ } else {
+ mlx5dr_dbg(dmn,
+ "Destination FT should be terminating when modify TTL is used\n");
+ return -EINVAL;
+ }
+ break;
+
+ case DR_ACTION_TYP_VPORT:
+ /* If destination is vport we will get the FW flow table
+ * that recalculates the CS and forwards to the vport.
+ */
+ ret = mlx5dr_domain_cache_get_recalc_cs_ft_addr(dest_action->vport.dmn,
+ dest_action->vport.num,
+ final_icm_addr);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed to get FW cs recalc flow table\n");
+ return ret;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+#define WITH_VLAN_NUM_HW_ACTIONS 6
+
+int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_action *actions[],
+ u32 num_actions,
+ u8 *ste_arr,
+ u32 *new_hw_ste_arr_sz)
+{
+ struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+ bool rx_rule = nic_dmn->ste_type == MLX5DR_STE_TYPE_RX;
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ u8 action_type_set[DR_ACTION_TYP_MAX] = {};
+ struct mlx5dr_action *dest_action = NULL;
+ u32 state = DR_ACTION_STATE_NO_ACTION;
+ struct dr_action_apply_attr attr = {};
+ enum dr_action_domain action_domain;
+ bool recalc_cs_required = false;
+ u8 *last_ste;
+ int i, ret;
+
+ attr.gvmi = dmn->info.caps.gvmi;
+ attr.hit_gvmi = dmn->info.caps.gvmi;
+ attr.final_icm_addr = nic_dmn->default_icm_addr;
+ action_domain = dr_action_get_action_domain(dmn->type, nic_dmn->ste_type);
+
+ for (i = 0; i < num_actions; i++) {
+ struct mlx5dr_action *action;
+ int max_actions_type = 1;
+ u32 action_type;
+
+ action = actions[i];
+ action_type = action->action_type;
+
+ switch (action_type) {
+ case DR_ACTION_TYP_DROP:
+ attr.final_icm_addr = nic_dmn->drop_icm_addr;
+ break;
+ case DR_ACTION_TYP_FT:
+ dest_action = action;
+ if (!action->dest_tbl.is_fw_tbl) {
+ if (action->dest_tbl.tbl->dmn != dmn) {
+ mlx5dr_dbg(dmn,
+ "Destination table belongs to a different domain\n");
+ goto out_invalid_arg;
+ }
+ if (action->dest_tbl.tbl->level <= matcher->tbl->level) {
+ mlx5dr_dbg(dmn,
+ "Destination table level should be higher than source table\n");
+ goto out_invalid_arg;
+ }
+ attr.final_icm_addr = rx_rule ?
+ action->dest_tbl.tbl->rx.s_anchor->chunk->icm_addr :
+ action->dest_tbl.tbl->tx.s_anchor->chunk->icm_addr;
+ } else {
+ struct mlx5dr_cmd_query_flow_table_details output;
+ int ret;
+
+ /* get the relevant addresses */
+ if (!action->dest_tbl.fw_tbl.rx_icm_addr) {
+ ret = mlx5dr_cmd_query_flow_table(action->dest_tbl.fw_tbl.mdev,
+ action->dest_tbl.fw_tbl.ft->type,
+ action->dest_tbl.fw_tbl.ft->id,
+ &output);
+ if (!ret) {
+ action->dest_tbl.fw_tbl.tx_icm_addr =
+ output.sw_owner_icm_root_1;
+ action->dest_tbl.fw_tbl.rx_icm_addr =
+ output.sw_owner_icm_root_0;
+ } else {
+ mlx5dr_dbg(dmn,
+ "Failed mlx5_cmd_query_flow_table ret: %d\n",
+ ret);
+ return ret;
+ }
+ }
+ attr.final_icm_addr = rx_rule ?
+ action->dest_tbl.fw_tbl.rx_icm_addr :
+ action->dest_tbl.fw_tbl.tx_icm_addr;
+ }
+ break;
+ case DR_ACTION_TYP_QP:
+ mlx5dr_info(dmn, "Domain doesn't support QP\n");
+ goto out_invalid_arg;
+ case DR_ACTION_TYP_CTR:
+ attr.ctr_id = action->ctr.ctr_id +
+ action->ctr.offeset;
+ break;
+ case DR_ACTION_TYP_TAG:
+ attr.flow_tag = action->flow_tag;
+ break;
+ case DR_ACTION_TYP_TNL_L2_TO_L2:
+ break;
+ case DR_ACTION_TYP_TNL_L3_TO_L2:
+ attr.decap_index = action->rewrite.index;
+ attr.decap_actions = action->rewrite.num_of_actions;
+ attr.decap_with_vlan =
+ attr.decap_actions == WITH_VLAN_NUM_HW_ACTIONS;
+ break;
+ case DR_ACTION_TYP_MODIFY_HDR:
+ attr.modify_index = action->rewrite.index;
+ attr.modify_actions = action->rewrite.num_of_actions;
+ recalc_cs_required = action->rewrite.modify_ttl;
+ break;
+ case DR_ACTION_TYP_L2_TO_TNL_L2:
+ case DR_ACTION_TYP_L2_TO_TNL_L3:
+ attr.reformat_size = action->reformat.reformat_size;
+ attr.reformat_id = action->reformat.reformat_id;
+ break;
+ case DR_ACTION_TYP_VPORT:
+ attr.hit_gvmi = action->vport.caps->vhca_gvmi;
+ dest_action = action;
+ if (rx_rule) {
+ /* Loopback on WIRE vport is not supported */
+ if (action->vport.num == WIRE_PORT)
+ goto out_invalid_arg;
+
+ attr.final_icm_addr = action->vport.caps->icm_address_rx;
+ } else {
+ attr.final_icm_addr = action->vport.caps->icm_address_tx;
+ }
+ break;
+ case DR_ACTION_TYP_POP_VLAN:
+ max_actions_type = MAX_VLANS;
+ attr.vlans.count++;
+ break;
+ case DR_ACTION_TYP_PUSH_VLAN:
+ max_actions_type = MAX_VLANS;
+ if (attr.vlans.count == MAX_VLANS)
+ return -EINVAL;
+
+ attr.vlans.headers[attr.vlans.count++] = action->push_vlan.vlan_hdr;
+ break;
+ default:
+ goto out_invalid_arg;
+ }
+
+ /* Check action duplication */
+ if (++action_type_set[action_type] > max_actions_type) {
+ mlx5dr_dbg(dmn, "Action type %d supports only max %d time(s)\n",
+ action_type, max_actions_type);
+ goto out_invalid_arg;
+ }
+
+ /* Check action state machine is valid */
+ if (dr_action_validate_and_get_next_state(action_domain,
+ action_type,
+ &state)) {
+ mlx5dr_dbg(dmn, "Invalid action sequence provided\n");
+ return -EOPNOTSUPP;
+ }
+ }
+
+ *new_hw_ste_arr_sz = nic_matcher->num_of_builders;
+ last_ste = ste_arr + DR_STE_SIZE * (nic_matcher->num_of_builders - 1);
+
+ /* Due to a HW bug, modifying TTL on RX flows will cause an incorrect
+ * checksum calculation. In this case we will use a FW table to
+ * recalculate.
+ */
+ if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB &&
+ rx_rule && recalc_cs_required && dest_action) {
+ ret = dr_action_handle_cs_recalc(dmn, dest_action, &attr.final_icm_addr);
+ if (ret) {
+ mlx5dr_dbg(dmn,
+ "Failed to handle checksum recalculation err %d\n",
+ ret);
+ return ret;
+ }
+ }
+
+ dr_actions_apply(dmn,
+ nic_dmn->ste_type,
+ action_type_set,
+ last_ste,
+ &attr,
+ new_hw_ste_arr_sz);
+
+ return 0;
+
+out_invalid_arg:
+ return -EINVAL;
+}
+
+#define CVLAN_ETHERTYPE 0x8100
+#define SVLAN_ETHERTYPE 0x88a8
+#define HDR_LEN_L2_ONLY 14
+#define HDR_LEN_L2_VLAN 18
+#define REWRITE_HW_ACTION_NUM 6
+
+static int dr_actions_l2_rewrite(struct mlx5dr_domain *dmn,
+ struct mlx5dr_action *action,
+ void *data, size_t data_sz)
+{
+ struct mlx5_ifc_l2_hdr_bits *l2_hdr = data;
+ u64 ops[REWRITE_HW_ACTION_NUM] = {};
+ u32 hdr_fld_4b;
+ u16 hdr_fld_2b;
+ u16 vlan_type;
+ bool vlan;
+ int i = 0;
+ int ret;
+
+ vlan = (data_sz != HDR_LEN_L2_ONLY);
+
+ /* dmac_47_16 */
+ MLX5_SET(dr_action_hw_set, ops + i,
+ opcode, MLX5DR_ACTION_MDFY_HW_OP_SET);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ destination_length, 0);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_0);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ destination_left_shifter, 16);
+ hdr_fld_4b = MLX5_GET(l2_hdr, l2_hdr, dmac_47_16);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ inline_data, hdr_fld_4b);
+ i++;
+
+ /* smac_47_16 */
+ MLX5_SET(dr_action_hw_set, ops + i,
+ opcode, MLX5DR_ACTION_MDFY_HW_OP_SET);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ destination_length, 0);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_1);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ destination_left_shifter, 16);
+ hdr_fld_4b = (MLX5_GET(l2_hdr, l2_hdr, smac_31_0) >> 16 |
+ MLX5_GET(l2_hdr, l2_hdr, smac_47_32) << 16);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ inline_data, hdr_fld_4b);
+ i++;
+
+ /* dmac_15_0 */
+ MLX5_SET(dr_action_hw_set, ops + i,
+ opcode, MLX5DR_ACTION_MDFY_HW_OP_SET);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ destination_length, 16);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_0);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ destination_left_shifter, 0);
+ hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, dmac_15_0);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ inline_data, hdr_fld_2b);
+ i++;
+
+ /* ethertype + (optional) vlan */
+ MLX5_SET(dr_action_hw_set, ops + i,
+ opcode, MLX5DR_ACTION_MDFY_HW_OP_SET);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_2);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ destination_left_shifter, 32);
+ if (!vlan) {
+ hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, ethertype);
+ MLX5_SET(dr_action_hw_set, ops + i, inline_data, hdr_fld_2b);
+ MLX5_SET(dr_action_hw_set, ops + i, destination_length, 16);
+ } else {
+ hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, ethertype);
+ vlan_type = hdr_fld_2b == SVLAN_ETHERTYPE ? DR_STE_SVLAN : DR_STE_CVLAN;
+ hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, vlan);
+ hdr_fld_4b = (vlan_type << 16) | hdr_fld_2b;
+ MLX5_SET(dr_action_hw_set, ops + i, inline_data, hdr_fld_4b);
+ MLX5_SET(dr_action_hw_set, ops + i, destination_length, 18);
+ }
+ i++;
+
+ /* smac_15_0 */
+ MLX5_SET(dr_action_hw_set, ops + i,
+ opcode, MLX5DR_ACTION_MDFY_HW_OP_SET);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ destination_length, 16);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_1);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ destination_left_shifter, 0);
+ hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, smac_31_0);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ inline_data, hdr_fld_2b);
+ i++;
+
+ if (vlan) {
+ MLX5_SET(dr_action_hw_set, ops + i,
+ opcode, MLX5DR_ACTION_MDFY_HW_OP_SET);
+ hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, vlan_type);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ inline_data, hdr_fld_2b);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ destination_length, 16);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_2);
+ MLX5_SET(dr_action_hw_set, ops + i,
+ destination_left_shifter, 0);
+ i++;
+ }
+
+ action->rewrite.data = (void *)ops;
+ action->rewrite.num_of_actions = i;
+ action->rewrite.chunk->byte_size = i * sizeof(*ops);
+
+ ret = mlx5dr_send_postsend_action(dmn, action);
+ if (ret) {
+ mlx5dr_dbg(dmn, "Writing encapsulation action to ICM failed\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static struct mlx5dr_action *
+dr_action_create_generic(enum mlx5dr_action_type action_type)
+{
+ struct mlx5dr_action *action;
+
+ action = kzalloc(sizeof(*action), GFP_KERNEL);
+ if (!action)
+ return NULL;
+
+ action->action_type = action_type;
+ refcount_set(&action->refcount, 1);
+
+ return action;
+}
+
+struct mlx5dr_action *mlx5dr_action_create_drop(void)
+{
+ return dr_action_create_generic(DR_ACTION_TYP_DROP);
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_dest_table(struct mlx5dr_table *tbl)
+{
+ struct mlx5dr_action *action;
+
+ refcount_inc(&tbl->refcount);
+
+ action = dr_action_create_generic(DR_ACTION_TYP_FT);
+ if (!action)
+ goto dec_ref;
+
+ action->dest_tbl.tbl = tbl;
+
+ return action;
+
+dec_ref:
+ refcount_dec(&tbl->refcount);
+ return NULL;
+}
+
+struct mlx5dr_action *
+mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft,
+ struct mlx5_core_dev *mdev)
+{
+ struct mlx5dr_action *action;
+
+ action = dr_action_create_generic(DR_ACTION_TYP_FT);
+ if (!action)
+ return NULL;
+
+ action->dest_tbl.is_fw_tbl = 1;
+ action->dest_tbl.fw_tbl.ft = ft;
+ action->dest_tbl.fw_tbl.mdev = mdev;
+
+ return action;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_flow_counter(u32 counter_id)
+{
+ struct mlx5dr_action *action;
+
+ action = dr_action_create_generic(DR_ACTION_TYP_CTR);
+ if (!action)
+ return NULL;
+
+ action->ctr.ctr_id = counter_id;
+
+ return action;
+}
+
+struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value)
+{
+ struct mlx5dr_action *action;
+
+ action = dr_action_create_generic(DR_ACTION_TYP_TAG);
+ if (!action)
+ return NULL;
+
+ action->flow_tag = tag_value & 0xffffff;
+
+ return action;
+}
+
+static int
+dr_action_verify_reformat_params(enum mlx5dr_action_type reformat_type,
+ struct mlx5dr_domain *dmn,
+ size_t data_sz,
+ void *data)
+{
+ if ((!data && data_sz) || (data && !data_sz) || reformat_type >
+ DR_ACTION_TYP_L2_TO_TNL_L3) {
+ mlx5dr_dbg(dmn, "Invalid reformat parameter!\n");
+ goto out_err;
+ }
+
+ if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB)
+ return 0;
+
+ if (dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) {
+ if (reformat_type != DR_ACTION_TYP_TNL_L2_TO_L2 &&
+ reformat_type != DR_ACTION_TYP_TNL_L3_TO_L2) {
+ mlx5dr_dbg(dmn, "Action reformat type not support on RX domain\n");
+ goto out_err;
+ }
+ } else if (dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) {
+ if (reformat_type != DR_ACTION_TYP_L2_TO_TNL_L2 &&
+ reformat_type != DR_ACTION_TYP_L2_TO_TNL_L3) {
+ mlx5dr_dbg(dmn, "Action reformat type not support on TX domain\n");
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ return -EINVAL;
+}
+
+#define ACTION_CACHE_LINE_SIZE 64
+
+static int
+dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
+ size_t data_sz, void *data,
+ struct mlx5dr_action *action)
+{
+ u32 reformat_id;
+ int ret;
+
+ switch (action->action_type) {
+ case DR_ACTION_TYP_L2_TO_TNL_L2:
+ case DR_ACTION_TYP_L2_TO_TNL_L3:
+ {
+ enum mlx5dr_action_type rt;
+
+ if (action->action_type == DR_ACTION_TYP_L2_TO_TNL_L2)
+ rt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
+ else
+ rt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
+
+ ret = mlx5dr_cmd_create_reformat_ctx(dmn->mdev, rt, data_sz, data,
+ &reformat_id);
+ if (ret)
+ return ret;
+
+ action->reformat.reformat_id = reformat_id;
+ action->reformat.reformat_size = data_sz;
+ return 0;
+ }
+ case DR_ACTION_TYP_TNL_L2_TO_L2:
+ {
+ return 0;
+ }
+ case DR_ACTION_TYP_TNL_L3_TO_L2:
+ {
+ /* Only Ethernet frame is supported, with VLAN (18) or without (14) */
+ if (data_sz != HDR_LEN_L2_ONLY && data_sz != HDR_LEN_L2_VLAN)
+ return -EINVAL;
+
+ action->rewrite.chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool,
+ DR_CHUNK_SIZE_8);
+ if (!action->rewrite.chunk)
+ return -ENOMEM;
+
+ action->rewrite.index = (action->rewrite.chunk->icm_addr -
+ dmn->info.caps.hdr_modify_icm_addr) /
+ ACTION_CACHE_LINE_SIZE;
+
+ ret = dr_actions_l2_rewrite(dmn, action, data, data_sz);
+ if (ret) {
+ mlx5dr_icm_free_chunk(action->rewrite.chunk);
+ return ret;
+ }
+ return 0;
+ }
+ default:
+ mlx5dr_info(dmn, "Reformat type is not supported %d\n", action->action_type);
+ return -EINVAL;
+ }
+}
+
+struct mlx5dr_action *mlx5dr_action_create_pop_vlan(void)
+{
+ return dr_action_create_generic(DR_ACTION_TYP_POP_VLAN);
+}
+
+struct mlx5dr_action *mlx5dr_action_create_push_vlan(struct mlx5dr_domain *dmn,
+ __be32 vlan_hdr)
+{
+ u32 vlan_hdr_h = ntohl(vlan_hdr);
+ u16 ethertype = vlan_hdr_h >> 16;
+ struct mlx5dr_action *action;
+
+ if (ethertype != SVLAN_ETHERTYPE && ethertype != CVLAN_ETHERTYPE) {
+ mlx5dr_dbg(dmn, "Invalid vlan ethertype\n");
+ return NULL;
+ }
+
+ action = dr_action_create_generic(DR_ACTION_TYP_PUSH_VLAN);
+ if (!action)
+ return NULL;
+
+ action->push_vlan.vlan_hdr = vlan_hdr_h;
+ return action;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn,
+ enum mlx5dr_action_reformat_type reformat_type,
+ size_t data_sz,
+ void *data)
+{
+ enum mlx5dr_action_type action_type;
+ struct mlx5dr_action *action;
+ int ret;
+
+ refcount_inc(&dmn->refcount);
+
+ /* General checks */
+ ret = dr_action_reformat_to_action_type(reformat_type, &action_type);
+ if (ret) {
+ mlx5dr_dbg(dmn, "Invalid reformat_type provided\n");
+ goto dec_ref;
+ }
+
+ ret = dr_action_verify_reformat_params(action_type, dmn, data_sz, data);
+ if (ret)
+ goto dec_ref;
+
+ action = dr_action_create_generic(action_type);
+ if (!action)
+ goto dec_ref;
+
+ action->reformat.dmn = dmn;
+
+ ret = dr_action_create_reformat_action(dmn,
+ data_sz,
+ data,
+ action);
+ if (ret) {
+ mlx5dr_dbg(dmn, "Failed creating reformat action %d\n", ret);
+ goto free_action;
+ }
+
+ return action;
+
+free_action:
+ kfree(action);
+dec_ref:
+ refcount_dec(&dmn->refcount);
+ return NULL;
+}
+
+static const struct dr_action_modify_field_conv *
+dr_action_modify_get_hw_info(u16 sw_field)
+{
+ const struct dr_action_modify_field_conv *hw_action_info;
+
+ if (sw_field >= ARRAY_SIZE(dr_action_conv_arr))
+ goto not_found;
+
+ hw_action_info = &dr_action_conv_arr[sw_field];
+ if (!hw_action_info->end && !hw_action_info->start)
+ goto not_found;
+
+ return hw_action_info;
+
+not_found:
+ return NULL;
+}
+
+static int
+dr_action_modify_sw_to_hw(struct mlx5dr_domain *dmn,
+ __be64 *sw_action,
+ __be64 *hw_action,
+ const struct dr_action_modify_field_conv **ret_hw_info)
+{
+ const struct dr_action_modify_field_conv *hw_action_info;
+ u8 offset, length, max_length, action;
+ u16 sw_field;
+ u8 hw_opcode;
+ u32 data;
+
+ /* Get SW modify action data */
+ action = MLX5_GET(set_action_in, sw_action, action_type);
+ length = MLX5_GET(set_action_in, sw_action, length);
+ offset = MLX5_GET(set_action_in, sw_action, offset);
+ sw_field = MLX5_GET(set_action_in, sw_action, field);
+ data = MLX5_GET(set_action_in, sw_action, data);
+
+ /* Convert SW data to HW modify action format */
+ hw_action_info = dr_action_modify_get_hw_info(sw_field);
+ if (!hw_action_info) {
+ mlx5dr_dbg(dmn, "Modify action invalid field given\n");
+ return -EINVAL;
+ }
+
+ max_length = hw_action_info->end - hw_action_info->start + 1;
+
+ switch (action) {
+ case MLX5_ACTION_TYPE_SET:
+ hw_opcode = MLX5DR_ACTION_MDFY_HW_OP_SET;
+ /* PRM defines that length zero specific length of 32bits */
+ if (!length)
+ length = 32;
+
+ if (length + offset > max_length) {
+ mlx5dr_dbg(dmn, "Modify action length + offset exceeds limit\n");
+ return -EINVAL;
+ }
+ break;
+
+ case MLX5_ACTION_TYPE_ADD:
+ hw_opcode = MLX5DR_ACTION_MDFY_HW_OP_ADD;
+ offset = 0;
+ length = max_length;
+ break;
+
+ default:
+ mlx5dr_info(dmn, "Unsupported action_type for modify action\n");
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(dr_action_hw_set, hw_action, opcode, hw_opcode);
+
+ MLX5_SET(dr_action_hw_set, hw_action, destination_field_code,
+ hw_action_info->hw_field);
+
+ MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter,
+ hw_action_info->start + offset);
+
+ MLX5_SET(dr_action_hw_set, hw_action, destination_length,
+ length == 32 ? 0 : length);
+
+ MLX5_SET(dr_action_hw_set, hw_action, inline_data, data);
+
+ *ret_hw_info = hw_action_info;
+
+ return 0;
+}
+
+static int
+dr_action_modify_check_field_limitation(struct mlx5dr_domain *dmn,
+ const __be64 *sw_action)
+{
+ u16 sw_field;
+ u8 action;
+
+ sw_field = MLX5_GET(set_action_in, sw_action, field);
+ action = MLX5_GET(set_action_in, sw_action, action_type);
+
+ /* Check if SW field is supported in current domain (RX/TX) */
+ if (action == MLX5_ACTION_TYPE_SET) {
+ if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_A) {
+ if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) {
+ mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n",
+ sw_field);
+ return -EINVAL;
+ }
+ }
+
+ if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_B) {
+ if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) {
+ mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n",
+ sw_field);
+ return -EINVAL;
+ }
+ }
+ } else if (action == MLX5_ACTION_TYPE_ADD) {
+ if (sw_field != MLX5_ACTION_IN_FIELD_OUT_IP_TTL &&
+ sw_field != MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT &&
+ sw_field != MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM &&
+ sw_field != MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM) {
+ mlx5dr_dbg(dmn, "Unsupported field %d for add action\n", sw_field);
+ return -EINVAL;
+ }
+ } else {
+ mlx5dr_info(dmn, "Unsupported action %d modify action\n", action);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static bool
+dr_action_modify_check_is_ttl_modify(const u64 *sw_action)
+{
+ u16 sw_field = MLX5_GET(set_action_in, sw_action, field);
+
+ return sw_field == MLX5_ACTION_IN_FIELD_OUT_IP_TTL;
+}
+
+static int dr_actions_convert_modify_header(struct mlx5dr_domain *dmn,
+ u32 max_hw_actions,
+ u32 num_sw_actions,
+ __be64 sw_actions[],
+ __be64 hw_actions[],
+ u32 *num_hw_actions,
+ bool *modify_ttl)
+{
+ const struct dr_action_modify_field_conv *hw_action_info;
+ u16 hw_field = MLX5DR_ACTION_MDFY_HW_FLD_RESERVED;
+ u32 l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_NONE;
+ u32 l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_NONE;
+ int ret, i, hw_idx = 0;
+ __be64 *sw_action;
+ __be64 hw_action;
+
+ *modify_ttl = false;
+
+ for (i = 0; i < num_sw_actions; i++) {
+ sw_action = &sw_actions[i];
+
+ ret = dr_action_modify_check_field_limitation(dmn, sw_action);
+ if (ret)
+ return ret;
+
+ if (!(*modify_ttl))
+ *modify_ttl = dr_action_modify_check_is_ttl_modify(sw_action);
+
+ /* Convert SW action to HW action */
+ ret = dr_action_modify_sw_to_hw(dmn,
+ sw_action,
+ &hw_action,
+ &hw_action_info);
+ if (ret)
+ return ret;
+
+ /* Due to a HW limitation we cannot modify 2 different L3 types */
+ if (l3_type && hw_action_info->l3_type &&
+ hw_action_info->l3_type != l3_type) {
+ mlx5dr_dbg(dmn, "Action list can't support two different L3 types\n");
+ return -EINVAL;
+ }
+ if (hw_action_info->l3_type)
+ l3_type = hw_action_info->l3_type;
+
+ /* Due to a HW limitation we cannot modify two different L4 types */
+ if (l4_type && hw_action_info->l4_type &&
+ hw_action_info->l4_type != l4_type) {
+ mlx5dr_dbg(dmn, "Action list can't support two different L4 types\n");
+ return -EINVAL;
+ }
+ if (hw_action_info->l4_type)
+ l4_type = hw_action_info->l4_type;
+
+ /* HW reads and executes two actions at once this means we
+ * need to create a gap if two actions access the same field
+ */
+ if ((hw_idx % 2) && hw_field == hw_action_info->hw_field) {
+ /* Check if after gap insertion the total number of HW
+ * modify actions doesn't exceeds the limit
+ */
+ hw_idx++;
+ if ((num_sw_actions + hw_idx - i) >= max_hw_actions) {
+ mlx5dr_dbg(dmn, "Modify header action number exceeds HW limit\n");
+ return -EINVAL;
+ }
+ }
+ hw_field = hw_action_info->hw_field;
+
+ hw_actions[hw_idx] = hw_action;
+ hw_idx++;
+ }
+
+ *num_hw_actions = hw_idx;
+
+ return 0;
+}
+
+static int dr_action_create_modify_action(struct mlx5dr_domain *dmn,
+ size_t actions_sz,
+ __be64 actions[],
+ struct mlx5dr_action *action)
+{
+ struct mlx5dr_icm_chunk *chunk;
+ u32 max_hw_actions;
+ u32 num_hw_actions;
+ u32 num_sw_actions;
+ __be64 *hw_actions;
+ bool modify_ttl;
+ int ret;
+
+ num_sw_actions = actions_sz / DR_MODIFY_ACTION_SIZE;
+ max_hw_actions = mlx5dr_icm_pool_chunk_size_to_entries(DR_CHUNK_SIZE_16);
+
+ if (num_sw_actions > max_hw_actions) {
+ mlx5dr_dbg(dmn, "Max number of actions %d exceeds limit %d\n",
+ num_sw_actions, max_hw_actions);
+ return -EINVAL;
+ }
+
+ chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, DR_CHUNK_SIZE_16);
+ if (!chunk)
+ return -ENOMEM;
+
+ hw_actions = kcalloc(1, max_hw_actions * DR_MODIFY_ACTION_SIZE, GFP_KERNEL);
+ if (!hw_actions) {
+ ret = -ENOMEM;
+ goto free_chunk;
+ }
+
+ ret = dr_actions_convert_modify_header(dmn,
+ max_hw_actions,
+ num_sw_actions,
+ actions,
+ hw_actions,
+ &num_hw_actions,
+ &modify_ttl);
+ if (ret)
+ goto free_hw_actions;
+
+ action->rewrite.chunk = chunk;
+ action->rewrite.modify_ttl = modify_ttl;
+ action->rewrite.data = (u8 *)hw_actions;
+ action->rewrite.num_of_actions = num_hw_actions;
+ action->rewrite.index = (chunk->icm_addr -
+ dmn->info.caps.hdr_modify_icm_addr) /
+ ACTION_CACHE_LINE_SIZE;
+
+ ret = mlx5dr_send_postsend_action(dmn, action);
+ if (ret)
+ goto free_hw_actions;
+
+ return 0;
+
+free_hw_actions:
+ kfree(hw_actions);
+free_chunk:
+ mlx5dr_icm_free_chunk(chunk);
+ return ret;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_modify_header(struct mlx5dr_domain *dmn,
+ u32 flags,
+ size_t actions_sz,
+ __be64 actions[])
+{
+ struct mlx5dr_action *action;
+ int ret = 0;
+
+ refcount_inc(&dmn->refcount);
+
+ if (actions_sz % DR_MODIFY_ACTION_SIZE) {
+ mlx5dr_dbg(dmn, "Invalid modify actions size provided\n");
+ goto dec_ref;
+ }
+
+ action = dr_action_create_generic(DR_ACTION_TYP_MODIFY_HDR);
+ if (!action)
+ goto dec_ref;
+
+ action->rewrite.dmn = dmn;
+
+ ret = dr_action_create_modify_action(dmn,
+ actions_sz,
+ actions,
+ action);
+ if (ret) {
+ mlx5dr_dbg(dmn, "Failed creating modify header action %d\n", ret);
+ goto free_action;
+ }
+
+ return action;
+
+free_action:
+ kfree(action);
+dec_ref:
+ refcount_dec(&dmn->refcount);
+ return NULL;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn,
+ u32 vport, u8 vhca_id_valid,
+ u16 vhca_id)
+{
+ struct mlx5dr_cmd_vport_cap *vport_cap;
+ struct mlx5dr_domain *vport_dmn;
+ struct mlx5dr_action *action;
+ u8 peer_vport;
+
+ peer_vport = vhca_id_valid && (vhca_id != dmn->info.caps.gvmi);
+ vport_dmn = peer_vport ? dmn->peer_dmn : dmn;
+ if (!vport_dmn) {
+ mlx5dr_dbg(dmn, "No peer vport domain for given vhca_id\n");
+ return NULL;
+ }
+
+ if (vport_dmn->type != MLX5DR_DOMAIN_TYPE_FDB) {
+ mlx5dr_dbg(dmn, "Domain doesn't support vport actions\n");
+ return NULL;
+ }
+
+ vport_cap = mlx5dr_get_vport_cap(&vport_dmn->info.caps, vport);
+ if (!vport_cap) {
+ mlx5dr_dbg(dmn, "Failed to get vport %d caps\n", vport);
+ return NULL;
+ }
+
+ action = dr_action_create_generic(DR_ACTION_TYP_VPORT);
+ if (!action)
+ return NULL;
+
+ action->vport.dmn = vport_dmn;
+ action->vport.caps = vport_cap;
+
+ return action;
+}
+
+int mlx5dr_action_destroy(struct mlx5dr_action *action)
+{
+ if (refcount_read(&action->refcount) > 1)
+ return -EBUSY;
+
+ switch (action->action_type) {
+ case DR_ACTION_TYP_FT:
+ if (!action->dest_tbl.is_fw_tbl)
+ refcount_dec(&action->dest_tbl.tbl->refcount);
+ break;
+ case DR_ACTION_TYP_TNL_L2_TO_L2:
+ refcount_dec(&action->reformat.dmn->refcount);
+ break;
+ case DR_ACTION_TYP_TNL_L3_TO_L2:
+ mlx5dr_icm_free_chunk(action->rewrite.chunk);
+ refcount_dec(&action->reformat.dmn->refcount);
+ break;
+ case DR_ACTION_TYP_L2_TO_TNL_L2:
+ case DR_ACTION_TYP_L2_TO_TNL_L3:
+ mlx5dr_cmd_destroy_reformat_ctx((action->reformat.dmn)->mdev,
+ action->reformat.reformat_id);
+ refcount_dec(&action->reformat.dmn->refcount);
+ break;
+ case DR_ACTION_TYP_MODIFY_HDR:
+ mlx5dr_icm_free_chunk(action->rewrite.chunk);
+ refcount_dec(&action->rewrite.dmn->refcount);
+ break;
+ default:
+ break;
+ }
+
+ kfree(action);
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
new file mode 100644
index 000000000000..41662c4e2664
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -0,0 +1,480 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "dr_types.h"
+
+int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev,
+ bool other_vport,
+ u16 vport_number,
+ u64 *icm_address_rx,
+ u64 *icm_address_tx)
+{
+ u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {};
+ int err;
+
+ MLX5_SET(query_esw_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
+ MLX5_SET(query_esw_vport_context_in, in, other_vport, other_vport);
+ MLX5_SET(query_esw_vport_context_in, in, vport_number, vport_number);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ *icm_address_rx =
+ MLX5_GET64(query_esw_vport_context_out, out,
+ esw_vport_context.sw_steering_vport_icm_address_rx);
+ *icm_address_tx =
+ MLX5_GET64(query_esw_vport_context_out, out,
+ esw_vport_context.sw_steering_vport_icm_address_tx);
+ return 0;
+}
+
+int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_vport,
+ u16 vport_number, u16 *gvmi)
+{
+ u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
+ int out_size;
+ void *out;
+ int err;
+
+ out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ out = kzalloc(out_size, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+ MLX5_SET(query_hca_cap_in, in, other_function, other_vport);
+ MLX5_SET(query_hca_cap_in, in, function_id, vport_number);
+ MLX5_SET(query_hca_cap_in, in, op_mod,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 |
+ HCA_CAP_OPMOD_GET_CUR);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, out_size);
+ if (err) {
+ kfree(out);
+ return err;
+ }
+
+ *gvmi = MLX5_GET(query_hca_cap_out, out, capability.cmd_hca_cap.vhca_id);
+
+ kfree(out);
+ return 0;
+}
+
+int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev,
+ struct mlx5dr_esw_caps *caps)
+{
+ caps->drop_icm_address_rx =
+ MLX5_CAP64_ESW_FLOWTABLE(mdev,
+ sw_steering_fdb_action_drop_icm_address_rx);
+ caps->drop_icm_address_tx =
+ MLX5_CAP64_ESW_FLOWTABLE(mdev,
+ sw_steering_fdb_action_drop_icm_address_tx);
+ caps->uplink_icm_address_rx =
+ MLX5_CAP64_ESW_FLOWTABLE(mdev,
+ sw_steering_uplink_icm_address_rx);
+ caps->uplink_icm_address_tx =
+ MLX5_CAP64_ESW_FLOWTABLE(mdev,
+ sw_steering_uplink_icm_address_tx);
+ caps->sw_owner =
+ MLX5_CAP_ESW_FLOWTABLE_FDB(mdev,
+ sw_owner);
+
+ return 0;
+}
+
+int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
+ struct mlx5dr_cmd_caps *caps)
+{
+ caps->prio_tag_required = MLX5_CAP_GEN(mdev, prio_tag_required);
+ caps->eswitch_manager = MLX5_CAP_GEN(mdev, eswitch_manager);
+ caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id);
+ caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols);
+
+ if (mlx5dr_matcher_supp_flex_parser_icmp_v4(caps)) {
+ caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0);
+ caps->flex_parser_id_icmp_dw1 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw1);
+ }
+
+ if (mlx5dr_matcher_supp_flex_parser_icmp_v6(caps)) {
+ caps->flex_parser_id_icmpv6_dw0 =
+ MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw0);
+ caps->flex_parser_id_icmpv6_dw1 =
+ MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw1);
+ }
+
+ caps->nic_rx_drop_address =
+ MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_rx_action_drop_icm_address);
+ caps->nic_tx_drop_address =
+ MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_tx_action_drop_icm_address);
+ caps->nic_tx_allow_address =
+ MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_tx_action_allow_icm_address);
+
+ caps->rx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, sw_owner);
+ caps->max_ft_level = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_ft_level);
+
+ caps->tx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_TX(mdev, sw_owner);
+
+ caps->log_icm_size = MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size);
+ caps->hdr_modify_icm_addr =
+ MLX5_CAP64_DEV_MEM(mdev, header_modify_sw_icm_start_address);
+
+ caps->roce_min_src_udp = MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port);
+
+ return 0;
+}
+
+int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev,
+ enum fs_flow_table_type type,
+ u32 table_id,
+ struct mlx5dr_cmd_query_flow_table_details *output)
+{
+ u32 out[MLX5_ST_SZ_DW(query_flow_table_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_flow_table_in)] = {};
+ int err;
+
+ MLX5_SET(query_flow_table_in, in, opcode,
+ MLX5_CMD_OP_QUERY_FLOW_TABLE);
+
+ MLX5_SET(query_flow_table_in, in, table_type, type);
+ MLX5_SET(query_flow_table_in, in, table_id, table_id);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ output->status = MLX5_GET(query_flow_table_out, out, status);
+ output->level = MLX5_GET(query_flow_table_out, out, flow_table_context.level);
+
+ output->sw_owner_icm_root_1 = MLX5_GET64(query_flow_table_out, out,
+ flow_table_context.sw_owner_icm_root_1);
+ output->sw_owner_icm_root_0 = MLX5_GET64(query_flow_table_out, out,
+ flow_table_context.sw_owner_icm_root_0);
+
+ return 0;
+}
+
+int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev)
+{
+ u32 out[MLX5_ST_SZ_DW(sync_steering_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(sync_steering_in)] = {};
+
+ MLX5_SET(sync_steering_in, in, opcode, MLX5_CMD_OP_SYNC_STEERING);
+
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u32 table_id,
+ u32 group_id,
+ u32 modify_header_id,
+ u32 vport_id)
+{
+ u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {};
+ void *in_flow_context;
+ unsigned int inlen;
+ void *in_dests;
+ u32 *in;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(set_fte_in) +
+ 1 * MLX5_ST_SZ_BYTES(dest_format_struct); /* One destination only */
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY);
+ MLX5_SET(set_fte_in, in, table_type, table_type);
+ MLX5_SET(set_fte_in, in, table_id, table_id);
+
+ in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
+ MLX5_SET(flow_context, in_flow_context, group_id, group_id);
+ MLX5_SET(flow_context, in_flow_context, modify_header_id, modify_header_id);
+ MLX5_SET(flow_context, in_flow_context, destination_list_size, 1);
+ MLX5_SET(flow_context, in_flow_context, action,
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR);
+
+ in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
+ MLX5_SET(dest_format_struct, in_dests, destination_type,
+ MLX5_FLOW_DESTINATION_TYPE_VPORT);
+ MLX5_SET(dest_format_struct, in_dests, destination_id, vport_id);
+
+ err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+ kvfree(in);
+
+ return err;
+}
+
+int mlx5dr_cmd_del_flow_table_entry(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u32 table_id)
+{
+ u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {};
+
+ MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
+ MLX5_SET(delete_fte_in, in, table_type, table_type);
+ MLX5_SET(delete_fte_in, in, table_id, table_id);
+
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5dr_cmd_alloc_modify_header(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u8 num_of_actions,
+ u64 *actions,
+ u32 *modify_header_id)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)] = {};
+ void *p_actions;
+ u32 inlen;
+ u32 *in;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(alloc_modify_header_context_in) +
+ num_of_actions * sizeof(u64);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(alloc_modify_header_context_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_SET(alloc_modify_header_context_in, in, table_type, table_type);
+ MLX5_SET(alloc_modify_header_context_in, in, num_of_actions, num_of_actions);
+ p_actions = MLX5_ADDR_OF(alloc_modify_header_context_in, in, actions);
+ memcpy(p_actions, actions, num_of_actions * sizeof(u64));
+
+ err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+ if (err)
+ goto out;
+
+ *modify_header_id = MLX5_GET(alloc_modify_header_context_out, out,
+ modify_header_id);
+out:
+ kvfree(in);
+ return err;
+}
+
+int mlx5dr_cmd_dealloc_modify_header(struct mlx5_core_dev *mdev,
+ u32 modify_header_id)
+{
+ u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)] = {};
+
+ MLX5_SET(dealloc_modify_header_context_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id,
+ modify_header_id);
+
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u32 table_id,
+ u32 *group_id)
+{
+ u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {};
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ u32 *in;
+ int err;
+
+ in = kzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_GROUP);
+ MLX5_SET(create_flow_group_in, in, table_type, table_type);
+ MLX5_SET(create_flow_group_in, in, table_id, table_id);
+
+ err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+ if (err)
+ goto out;
+
+ *group_id = MLX5_GET(create_flow_group_out, out, group_id);
+
+out:
+ kfree(in);
+ return err;
+}
+
+int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u32 table_id,
+ u32 group_id)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {};
+
+ MLX5_SET(create_flow_group_in, in, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP);
+ MLX5_SET(destroy_flow_group_in, in, table_type, table_type);
+ MLX5_SET(destroy_flow_group_in, in, table_id, table_id);
+ MLX5_SET(destroy_flow_group_in, in, group_id, group_id);
+
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u64 icm_addr_rx,
+ u64 icm_addr_tx,
+ u8 level,
+ bool sw_owner,
+ bool term_tbl,
+ u64 *fdb_rx_icm_addr,
+ u32 *table_id)
+{
+ u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {};
+ void *ft_mdev;
+ int err;
+
+ MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE);
+ MLX5_SET(create_flow_table_in, in, table_type, table_type);
+
+ ft_mdev = MLX5_ADDR_OF(create_flow_table_in, in, flow_table_context);
+ MLX5_SET(flow_table_context, ft_mdev, termination_table, term_tbl);
+ MLX5_SET(flow_table_context, ft_mdev, sw_owner, sw_owner);
+ MLX5_SET(flow_table_context, ft_mdev, level, level);
+
+ if (sw_owner) {
+ /* icm_addr_0 used for FDB RX / NIC TX / NIC_RX
+ * icm_addr_1 used for FDB TX
+ */
+ if (table_type == MLX5_FLOW_TABLE_TYPE_NIC_RX) {
+ MLX5_SET64(flow_table_context, ft_mdev,
+ sw_owner_icm_root_0, icm_addr_rx);
+ } else if (table_type == MLX5_FLOW_TABLE_TYPE_NIC_TX) {
+ MLX5_SET64(flow_table_context, ft_mdev,
+ sw_owner_icm_root_0, icm_addr_tx);
+ } else if (table_type == MLX5_FLOW_TABLE_TYPE_FDB) {
+ MLX5_SET64(flow_table_context, ft_mdev,
+ sw_owner_icm_root_0, icm_addr_rx);
+ MLX5_SET64(flow_table_context, ft_mdev,
+ sw_owner_icm_root_1, icm_addr_tx);
+ }
+ }
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ *table_id = MLX5_GET(create_flow_table_out, out, table_id);
+ if (!sw_owner && table_type == MLX5_FLOW_TABLE_TYPE_FDB)
+ *fdb_rx_icm_addr =
+ (u64)MLX5_GET(create_flow_table_out, out, icm_address_31_0) |
+ (u64)MLX5_GET(create_flow_table_out, out, icm_address_39_32) << 32 |
+ (u64)MLX5_GET(create_flow_table_out, out, icm_address_63_40) << 40;
+
+ return 0;
+}
+
+int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev,
+ u32 table_id,
+ u32 table_type)
+{
+ u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {};
+
+ MLX5_SET(destroy_flow_table_in, in, opcode,
+ MLX5_CMD_OP_DESTROY_FLOW_TABLE);
+ MLX5_SET(destroy_flow_table_in, in, table_type, table_type);
+ MLX5_SET(destroy_flow_table_in, in, table_id, table_id);
+
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
+ enum mlx5_reformat_ctx_type rt,
+ size_t reformat_size,
+ void *reformat_data,
+ u32 *reformat_id)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)] = {};
+ size_t inlen, cmd_data_sz, cmd_total_sz;
+ void *prctx;
+ void *pdata;
+ void *in;
+ int err;
+
+ cmd_total_sz = MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in);
+ cmd_data_sz = MLX5_FLD_SZ_BYTES(alloc_packet_reformat_context_in,
+ packet_reformat_context.reformat_data);
+ inlen = ALIGN(cmd_total_sz + reformat_size - cmd_data_sz, 4);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(alloc_packet_reformat_context_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT);
+
+ prctx = MLX5_ADDR_OF(alloc_packet_reformat_context_in, in, packet_reformat_context);
+ pdata = MLX5_ADDR_OF(packet_reformat_context_in, prctx, reformat_data);
+
+ MLX5_SET(packet_reformat_context_in, prctx, reformat_type, rt);
+ MLX5_SET(packet_reformat_context_in, prctx, reformat_data_size, reformat_size);
+ memcpy(pdata, reformat_data, reformat_size);
+
+ err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+ if (err)
+ return err;
+
+ *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id);
+ kvfree(in);
+
+ return err;
+}
+
+void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev,
+ u32 reformat_id)
+{
+ u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)] = {};
+
+ MLX5_SET(dealloc_packet_reformat_context_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
+ MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id,
+ reformat_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num,
+ u16 index, struct mlx5dr_cmd_gid_attr *attr)
+{
+ u32 out[MLX5_ST_SZ_DW(query_roce_address_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_roce_address_in)] = {};
+ int err;
+
+ MLX5_SET(query_roce_address_in, in, opcode,
+ MLX5_CMD_OP_QUERY_ROCE_ADDRESS);
+
+ MLX5_SET(query_roce_address_in, in, roce_address_index, index);
+ MLX5_SET(query_roce_address_in, in, vhca_port_num, vhca_port_num);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ memcpy(&attr->gid,
+ MLX5_ADDR_OF(query_roce_address_out,
+ out, roce_address.source_l3_address),
+ sizeof(attr->gid));
+ memcpy(attr->mac,
+ MLX5_ADDR_OF(query_roce_address_out, out,
+ roce_address.source_mac_47_32),
+ sizeof(attr->mac));
+
+ if (MLX5_GET(query_roce_address_out, out,
+ roce_address.roce_version) == MLX5_ROCE_VERSION_2)
+ attr->roce_ver = MLX5_ROCE_VERSION_2;
+ else
+ attr->roce_ver = MLX5_ROCE_VERSION_1;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_crc32.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_crc32.c
new file mode 100644
index 000000000000..9e2eccbb1eb8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_crc32.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+/* Copyright (c) 2011-2015 Stephan Brumme. All rights reserved.
+ * Slicing-by-16 contributed by Bulat Ziganshin
+ *
+ * This software is provided 'as-is', without any express or implied warranty.
+ * In no event will the author be held liable for any damages arising from the
+ * of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software.
+ * 2. If you use this software in a product, an acknowledgment in the product
+ * documentation would be appreciated but is not required.
+ * 3. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ *
+ * Taken from http://create.stephan-brumme.com/crc32/ and adapted.
+ */
+
+#include "dr_types.h"
+
+#define DR_STE_CRC_POLY 0xEDB88320L
+
+static u32 dr_ste_crc_tab32[8][256];
+
+static void dr_crc32_calc_lookup_entry(u32 (*tbl)[256], u8 i, u8 j)
+{
+ tbl[i][j] = (tbl[i - 1][j] >> 8) ^ tbl[0][tbl[i - 1][j] & 0xff];
+}
+
+void mlx5dr_crc32_init_table(void)
+{
+ u32 crc, i, j;
+
+ for (i = 0; i < 256; i++) {
+ crc = i;
+ for (j = 0; j < 8; j++) {
+ if (crc & 0x00000001L)
+ crc = (crc >> 1) ^ DR_STE_CRC_POLY;
+ else
+ crc = crc >> 1;
+ }
+ dr_ste_crc_tab32[0][i] = crc;
+ }
+
+ /* Init CRC lookup tables according to crc_slice_8 algorithm */
+ for (i = 0; i < 256; i++) {
+ dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 1, i);
+ dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 2, i);
+ dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 3, i);
+ dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 4, i);
+ dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 5, i);
+ dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 6, i);
+ dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 7, i);
+ }
+}
+
+/* Compute CRC32 (Slicing-by-8 algorithm) */
+u32 mlx5dr_crc32_slice8_calc(const void *input_data, size_t length)
+{
+ const u32 *curr = (const u32 *)input_data;
+ const u8 *curr_char;
+ u32 crc = 0, one, two;
+
+ if (!input_data)
+ return 0;
+
+ /* Process eight bytes at once (Slicing-by-8) */
+ while (length >= 8) {
+ one = *curr++ ^ crc;
+ two = *curr++;
+
+ crc = dr_ste_crc_tab32[0][(two >> 24) & 0xff]
+ ^ dr_ste_crc_tab32[1][(two >> 16) & 0xff]
+ ^ dr_ste_crc_tab32[2][(two >> 8) & 0xff]
+ ^ dr_ste_crc_tab32[3][two & 0xff]
+ ^ dr_ste_crc_tab32[4][(one >> 24) & 0xff]
+ ^ dr_ste_crc_tab32[5][(one >> 16) & 0xff]
+ ^ dr_ste_crc_tab32[6][(one >> 8) & 0xff]
+ ^ dr_ste_crc_tab32[7][one & 0xff];
+
+ length -= 8;
+ }
+
+ curr_char = (const u8 *)curr;
+ /* Remaining 1 to 7 bytes (standard algorithm) */
+ while (length-- != 0)
+ crc = (crc >> 8) ^ dr_ste_crc_tab32[0][(crc & 0xff)
+ ^ *curr_char++];
+
+ return ((crc >> 24) & 0xff) | ((crc << 8) & 0xff0000) |
+ ((crc >> 8) & 0xff00) | ((crc << 24) & 0xff000000);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
new file mode 100644
index 000000000000..3b9cf0bccf4d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/mlx5/eswitch.h>
+#include "dr_types.h"
+
+static int dr_domain_init_cache(struct mlx5dr_domain *dmn)
+{
+ /* Per vport cached FW FT for checksum recalculation, this
+ * recalculation is needed due to a HW bug.
+ */
+ dmn->cache.recalc_cs_ft = kcalloc(dmn->info.caps.num_vports,
+ sizeof(dmn->cache.recalc_cs_ft[0]),
+ GFP_KERNEL);
+ if (!dmn->cache.recalc_cs_ft)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void dr_domain_uninit_cache(struct mlx5dr_domain *dmn)
+{
+ int i;
+
+ for (i = 0; i < dmn->info.caps.num_vports; i++) {
+ if (!dmn->cache.recalc_cs_ft[i])
+ continue;
+
+ mlx5dr_fw_destroy_recalc_cs_ft(dmn, dmn->cache.recalc_cs_ft[i]);
+ }
+
+ kfree(dmn->cache.recalc_cs_ft);
+}
+
+int mlx5dr_domain_cache_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn,
+ u32 vport_num,
+ u64 *rx_icm_addr)
+{
+ struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft;
+
+ recalc_cs_ft = dmn->cache.recalc_cs_ft[vport_num];
+ if (!recalc_cs_ft) {
+ /* Table not in cache, need to allocate a new one */
+ recalc_cs_ft = mlx5dr_fw_create_recalc_cs_ft(dmn, vport_num);
+ if (!recalc_cs_ft)
+ return -EINVAL;
+
+ dmn->cache.recalc_cs_ft[vport_num] = recalc_cs_ft;
+ }
+
+ *rx_icm_addr = recalc_cs_ft->rx_icm_addr;
+
+ return 0;
+}
+
+static int dr_domain_init_resources(struct mlx5dr_domain *dmn)
+{
+ int ret;
+
+ ret = mlx5_core_alloc_pd(dmn->mdev, &dmn->pdn);
+ if (ret) {
+ mlx5dr_dbg(dmn, "Couldn't allocate PD\n");
+ return ret;
+ }
+
+ dmn->uar = mlx5_get_uars_page(dmn->mdev);
+ if (!dmn->uar) {
+ mlx5dr_err(dmn, "Couldn't allocate UAR\n");
+ goto clean_pd;
+ }
+
+ dmn->ste_icm_pool = mlx5dr_icm_pool_create(dmn, DR_ICM_TYPE_STE);
+ if (!dmn->ste_icm_pool) {
+ mlx5dr_err(dmn, "Couldn't get icm memory for %s\n",
+ dev_name(dmn->mdev->device));
+ goto clean_uar;
+ }
+
+ dmn->action_icm_pool = mlx5dr_icm_pool_create(dmn, DR_ICM_TYPE_MODIFY_ACTION);
+ if (!dmn->action_icm_pool) {
+ mlx5dr_err(dmn, "Couldn't get action icm memory for %s\n",
+ dev_name(dmn->mdev->device));
+ goto free_ste_icm_pool;
+ }
+
+ ret = mlx5dr_send_ring_alloc(dmn);
+ if (ret) {
+ mlx5dr_err(dmn, "Couldn't create send-ring for %s\n",
+ dev_name(dmn->mdev->device));
+ goto free_action_icm_pool;
+ }
+
+ return 0;
+
+free_action_icm_pool:
+ mlx5dr_icm_pool_destroy(dmn->action_icm_pool);
+free_ste_icm_pool:
+ mlx5dr_icm_pool_destroy(dmn->ste_icm_pool);
+clean_uar:
+ mlx5_put_uars_page(dmn->mdev, dmn->uar);
+clean_pd:
+ mlx5_core_dealloc_pd(dmn->mdev, dmn->pdn);
+
+ return ret;
+}
+
+static void dr_domain_uninit_resources(struct mlx5dr_domain *dmn)
+{
+ mlx5dr_send_ring_free(dmn, dmn->send_ring);
+ mlx5dr_icm_pool_destroy(dmn->action_icm_pool);
+ mlx5dr_icm_pool_destroy(dmn->ste_icm_pool);
+ mlx5_put_uars_page(dmn->mdev, dmn->uar);
+ mlx5_core_dealloc_pd(dmn->mdev, dmn->pdn);
+}
+
+static int dr_domain_query_vport(struct mlx5dr_domain *dmn,
+ bool other_vport,
+ u16 vport_number)
+{
+ struct mlx5dr_cmd_vport_cap *vport_caps;
+ int ret;
+
+ vport_caps = &dmn->info.caps.vports_caps[vport_number];
+
+ ret = mlx5dr_cmd_query_esw_vport_context(dmn->mdev,
+ other_vport,
+ vport_number,
+ &vport_caps->icm_address_rx,
+ &vport_caps->icm_address_tx);
+ if (ret)
+ return ret;
+
+ ret = mlx5dr_cmd_query_gvmi(dmn->mdev,
+ other_vport,
+ vport_number,
+ &vport_caps->vport_gvmi);
+ if (ret)
+ return ret;
+
+ vport_caps->num = vport_number;
+ vport_caps->vhca_gvmi = dmn->info.caps.gvmi;
+
+ return 0;
+}
+
+static int dr_domain_query_vports(struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_esw_caps *esw_caps = &dmn->info.caps.esw_caps;
+ struct mlx5dr_cmd_vport_cap *wire_vport;
+ int vport;
+ int ret;
+
+ /* Query vports (except wire vport) */
+ for (vport = 0; vport < dmn->info.caps.num_esw_ports - 1; vport++) {
+ ret = dr_domain_query_vport(dmn, !!vport, vport);
+ if (ret)
+ return ret;
+ }
+
+ /* Last vport is the wire port */
+ wire_vport = &dmn->info.caps.vports_caps[vport];
+ wire_vport->num = WIRE_PORT;
+ wire_vport->icm_address_rx = esw_caps->uplink_icm_address_rx;
+ wire_vport->icm_address_tx = esw_caps->uplink_icm_address_tx;
+ wire_vport->vport_gvmi = 0;
+ wire_vport->vhca_gvmi = dmn->info.caps.gvmi;
+
+ return 0;
+}
+
+static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev,
+ struct mlx5dr_domain *dmn)
+{
+ int ret;
+
+ if (!dmn->info.caps.eswitch_manager)
+ return -EOPNOTSUPP;
+
+ ret = mlx5dr_cmd_query_esw_caps(mdev, &dmn->info.caps.esw_caps);
+ if (ret)
+ return ret;
+
+ dmn->info.caps.fdb_sw_owner = dmn->info.caps.esw_caps.sw_owner;
+ dmn->info.caps.esw_rx_drop_address = dmn->info.caps.esw_caps.drop_icm_address_rx;
+ dmn->info.caps.esw_tx_drop_address = dmn->info.caps.esw_caps.drop_icm_address_tx;
+
+ dmn->info.caps.vports_caps = kcalloc(dmn->info.caps.num_esw_ports,
+ sizeof(dmn->info.caps.vports_caps[0]),
+ GFP_KERNEL);
+ if (!dmn->info.caps.vports_caps)
+ return -ENOMEM;
+
+ ret = dr_domain_query_vports(dmn);
+ if (ret) {
+ mlx5dr_dbg(dmn, "Failed to query vports caps\n");
+ goto free_vports_caps;
+ }
+
+ dmn->info.caps.num_vports = dmn->info.caps.num_esw_ports - 1;
+
+ return 0;
+
+free_vports_caps:
+ kfree(dmn->info.caps.vports_caps);
+ dmn->info.caps.vports_caps = NULL;
+ return ret;
+}
+
+static int dr_domain_caps_init(struct mlx5_core_dev *mdev,
+ struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_cmd_vport_cap *vport_cap;
+ int ret;
+
+ if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) {
+ mlx5dr_dbg(dmn, "Failed to allocate domain, bad link type\n");
+ return -EOPNOTSUPP;
+ }
+
+ dmn->info.caps.num_esw_ports = mlx5_eswitch_get_total_vports(mdev);
+
+ ret = mlx5dr_cmd_query_device(mdev, &dmn->info.caps);
+ if (ret)
+ return ret;
+
+ ret = dr_domain_query_fdb_caps(mdev, dmn);
+ if (ret)
+ return ret;
+
+ switch (dmn->type) {
+ case MLX5DR_DOMAIN_TYPE_NIC_RX:
+ if (!dmn->info.caps.rx_sw_owner)
+ return -ENOTSUPP;
+
+ dmn->info.supp_sw_steering = true;
+ dmn->info.rx.ste_type = MLX5DR_STE_TYPE_RX;
+ dmn->info.rx.default_icm_addr = dmn->info.caps.nic_rx_drop_address;
+ dmn->info.rx.drop_icm_addr = dmn->info.caps.nic_rx_drop_address;
+ break;
+ case MLX5DR_DOMAIN_TYPE_NIC_TX:
+ if (!dmn->info.caps.tx_sw_owner)
+ return -ENOTSUPP;
+
+ dmn->info.supp_sw_steering = true;
+ dmn->info.tx.ste_type = MLX5DR_STE_TYPE_TX;
+ dmn->info.tx.default_icm_addr = dmn->info.caps.nic_tx_allow_address;
+ dmn->info.tx.drop_icm_addr = dmn->info.caps.nic_tx_drop_address;
+ break;
+ case MLX5DR_DOMAIN_TYPE_FDB:
+ if (!dmn->info.caps.eswitch_manager)
+ return -ENOTSUPP;
+
+ if (!dmn->info.caps.fdb_sw_owner)
+ return -ENOTSUPP;
+
+ dmn->info.rx.ste_type = MLX5DR_STE_TYPE_RX;
+ dmn->info.tx.ste_type = MLX5DR_STE_TYPE_TX;
+ vport_cap = mlx5dr_get_vport_cap(&dmn->info.caps, 0);
+ if (!vport_cap) {
+ mlx5dr_dbg(dmn, "Failed to get esw manager vport\n");
+ return -ENOENT;
+ }
+
+ dmn->info.supp_sw_steering = true;
+ dmn->info.tx.default_icm_addr = vport_cap->icm_address_tx;
+ dmn->info.rx.default_icm_addr = vport_cap->icm_address_rx;
+ dmn->info.rx.drop_icm_addr = dmn->info.caps.esw_rx_drop_address;
+ dmn->info.tx.drop_icm_addr = dmn->info.caps.esw_tx_drop_address;
+ break;
+ default:
+ mlx5dr_dbg(dmn, "Invalid domain\n");
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static void dr_domain_caps_uninit(struct mlx5dr_domain *dmn)
+{
+ kfree(dmn->info.caps.vports_caps);
+}
+
+struct mlx5dr_domain *
+mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type)
+{
+ struct mlx5dr_domain *dmn;
+ int ret;
+
+ if (type > MLX5DR_DOMAIN_TYPE_FDB)
+ return NULL;
+
+ dmn = kzalloc(sizeof(*dmn), GFP_KERNEL);
+ if (!dmn)
+ return NULL;
+
+ dmn->mdev = mdev;
+ dmn->type = type;
+ refcount_set(&dmn->refcount, 1);
+ mutex_init(&dmn->mutex);
+
+ if (dr_domain_caps_init(mdev, dmn)) {
+ mlx5dr_dbg(dmn, "Failed init domain, no caps\n");
+ goto free_domain;
+ }
+
+ dmn->info.max_log_action_icm_sz = DR_CHUNK_SIZE_4K;
+ dmn->info.max_log_sw_icm_sz = min_t(u32, DR_CHUNK_SIZE_1024K,
+ dmn->info.caps.log_icm_size);
+
+ if (!dmn->info.supp_sw_steering) {
+ mlx5dr_err(dmn, "SW steering not supported for %s\n",
+ dev_name(mdev->device));
+ goto uninit_caps;
+ }
+
+ /* Allocate resources */
+ ret = dr_domain_init_resources(dmn);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed init domain resources for %s\n",
+ dev_name(mdev->device));
+ goto uninit_caps;
+ }
+
+ ret = dr_domain_init_cache(dmn);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed initialize domain cache\n");
+ goto uninit_resourses;
+ }
+
+ /* Init CRC table for htbl CRC calculation */
+ mlx5dr_crc32_init_table();
+
+ return dmn;
+
+uninit_resourses:
+ dr_domain_uninit_resources(dmn);
+uninit_caps:
+ dr_domain_caps_uninit(dmn);
+free_domain:
+ kfree(dmn);
+ return NULL;
+}
+
+/* Assure synchronization of the device steering tables with updates made by SW
+ * insertion.
+ */
+int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags)
+{
+ int ret = 0;
+
+ if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_SW) {
+ mutex_lock(&dmn->mutex);
+ ret = mlx5dr_send_ring_force_drain(dmn);
+ mutex_unlock(&dmn->mutex);
+ if (ret)
+ return ret;
+ }
+
+ if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_HW)
+ ret = mlx5dr_cmd_sync_steering(dmn->mdev);
+
+ return ret;
+}
+
+int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn)
+{
+ if (refcount_read(&dmn->refcount) > 1)
+ return -EBUSY;
+
+ /* make sure resources are not used by the hardware */
+ mlx5dr_cmd_sync_steering(dmn->mdev);
+ dr_domain_uninit_cache(dmn);
+ dr_domain_uninit_resources(dmn);
+ dr_domain_caps_uninit(dmn);
+ mutex_destroy(&dmn->mutex);
+ kfree(dmn);
+ return 0;
+}
+
+void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
+ struct mlx5dr_domain *peer_dmn)
+{
+ mutex_lock(&dmn->mutex);
+
+ if (dmn->peer_dmn)
+ refcount_dec(&dmn->peer_dmn->refcount);
+
+ dmn->peer_dmn = peer_dmn;
+
+ if (dmn->peer_dmn)
+ refcount_inc(&dmn->peer_dmn->refcount);
+
+ mutex_unlock(&dmn->mutex);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
new file mode 100644
index 000000000000..60ef6e6171e3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/types.h>
+#include "dr_types.h"
+
+struct mlx5dr_fw_recalc_cs_ft *
+mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u32 vport_num)
+{
+ struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft;
+ u32 table_id, group_id, modify_hdr_id;
+ u64 rx_icm_addr, modify_ttl_action;
+ int ret;
+
+ recalc_cs_ft = kzalloc(sizeof(*recalc_cs_ft), GFP_KERNEL);
+ if (!recalc_cs_ft)
+ return NULL;
+
+ ret = mlx5dr_cmd_create_flow_table(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB,
+ 0, 0, dmn->info.caps.max_ft_level - 1,
+ false, true, &rx_icm_addr, &table_id);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed creating TTL W/A FW flow table %d\n", ret);
+ goto free_ttl_tbl;
+ }
+
+ ret = mlx5dr_cmd_create_empty_flow_group(dmn->mdev,
+ MLX5_FLOW_TABLE_TYPE_FDB,
+ table_id, &group_id);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed creating TTL W/A FW flow group %d\n", ret);
+ goto destroy_flow_table;
+ }
+
+ /* Modify TTL action by adding zero to trigger CS recalculation */
+ modify_ttl_action = 0;
+ MLX5_SET(set_action_in, &modify_ttl_action, action_type, MLX5_ACTION_TYPE_ADD);
+ MLX5_SET(set_action_in, &modify_ttl_action, field, MLX5_ACTION_IN_FIELD_OUT_IP_TTL);
+
+ ret = mlx5dr_cmd_alloc_modify_header(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB, 1,
+ &modify_ttl_action,
+ &modify_hdr_id);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed modify header TTL %d\n", ret);
+ goto destroy_flow_group;
+ }
+
+ ret = mlx5dr_cmd_set_fte_modify_and_vport(dmn->mdev,
+ MLX5_FLOW_TABLE_TYPE_FDB,
+ table_id, group_id, modify_hdr_id,
+ vport_num);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed setting TTL W/A flow table entry %d\n", ret);
+ goto dealloc_modify_header;
+ }
+
+ recalc_cs_ft->modify_hdr_id = modify_hdr_id;
+ recalc_cs_ft->rx_icm_addr = rx_icm_addr;
+ recalc_cs_ft->table_id = table_id;
+ recalc_cs_ft->group_id = group_id;
+
+ return recalc_cs_ft;
+
+dealloc_modify_header:
+ mlx5dr_cmd_dealloc_modify_header(dmn->mdev, modify_hdr_id);
+destroy_flow_group:
+ mlx5dr_cmd_destroy_flow_group(dmn->mdev,
+ MLX5_FLOW_TABLE_TYPE_FDB,
+ table_id, group_id);
+destroy_flow_table:
+ mlx5dr_cmd_destroy_flow_table(dmn->mdev, table_id, MLX5_FLOW_TABLE_TYPE_FDB);
+free_ttl_tbl:
+ kfree(recalc_cs_ft);
+ return NULL;
+}
+
+void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn,
+ struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft)
+{
+ mlx5dr_cmd_del_flow_table_entry(dmn->mdev,
+ MLX5_FLOW_TABLE_TYPE_FDB,
+ recalc_cs_ft->table_id);
+ mlx5dr_cmd_dealloc_modify_header(dmn->mdev, recalc_cs_ft->modify_hdr_id);
+ mlx5dr_cmd_destroy_flow_group(dmn->mdev,
+ MLX5_FLOW_TABLE_TYPE_FDB,
+ recalc_cs_ft->table_id,
+ recalc_cs_ft->group_id);
+ mlx5dr_cmd_destroy_flow_table(dmn->mdev,
+ recalc_cs_ft->table_id,
+ MLX5_FLOW_TABLE_TYPE_FDB);
+
+ kfree(recalc_cs_ft);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
new file mode 100644
index 000000000000..e76f61e7555e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
@@ -0,0 +1,570 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "dr_types.h"
+
+#define DR_ICM_MODIFY_HDR_ALIGN_BASE 64
+#define DR_ICM_SYNC_THRESHOLD (64 * 1024 * 1024)
+
+struct mlx5dr_icm_pool;
+
+struct mlx5dr_icm_bucket {
+ struct mlx5dr_icm_pool *pool;
+
+ /* Chunks that aren't visible to HW not directly and not in cache */
+ struct list_head free_list;
+ unsigned int free_list_count;
+
+ /* Used chunks, HW may be accessing this memory */
+ struct list_head used_list;
+ unsigned int used_list_count;
+
+ /* HW may be accessing this memory but at some future,
+ * undetermined time, it might cease to do so. Before deciding to call
+ * sync_ste, this list is moved to sync_list
+ */
+ struct list_head hot_list;
+ unsigned int hot_list_count;
+
+ /* Pending sync list, entries from the hot list are moved to this list.
+ * sync_ste is executed and then sync_list is concatenated to the free list
+ */
+ struct list_head sync_list;
+ unsigned int sync_list_count;
+
+ u32 total_chunks;
+ u32 num_of_entries;
+ u32 entry_size;
+ /* protect the ICM bucket */
+ struct mutex mutex;
+};
+
+struct mlx5dr_icm_pool {
+ struct mlx5dr_icm_bucket *buckets;
+ enum mlx5dr_icm_type icm_type;
+ enum mlx5dr_icm_chunk_size max_log_chunk_sz;
+ enum mlx5dr_icm_chunk_size num_of_buckets;
+ struct list_head icm_mr_list;
+ /* protect the ICM MR list */
+ struct mutex mr_mutex;
+ struct mlx5dr_domain *dmn;
+};
+
+struct mlx5dr_icm_dm {
+ u32 obj_id;
+ enum mlx5_sw_icm_type type;
+ u64 addr;
+ size_t length;
+};
+
+struct mlx5dr_icm_mr {
+ struct mlx5dr_icm_pool *pool;
+ struct mlx5_core_mkey mkey;
+ struct mlx5dr_icm_dm dm;
+ size_t used_length;
+ size_t length;
+ u64 icm_start_addr;
+ struct list_head mr_list;
+};
+
+static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev,
+ u32 pd, u64 length, u64 start_addr, int mode,
+ struct mlx5_core_mkey *mkey)
+{
+ u32 inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
+ void *mkc;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ MLX5_SET(mkc, mkc, access_mode_1_0, mode);
+ MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+ if (mode == MLX5_MKC_ACCESS_MODE_SW_ICM) {
+ MLX5_SET(mkc, mkc, rw, 1);
+ MLX5_SET(mkc, mkc, rr, 1);
+ }
+
+ MLX5_SET64(mkc, mkc, len, length);
+ MLX5_SET(mkc, mkc, pd, pd);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET64(mkc, mkc, start_addr, start_addr);
+
+ return mlx5_core_create_mkey(mdev, mkey, in, inlen);
+}
+
+static struct mlx5dr_icm_mr *
+dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool,
+ enum mlx5_sw_icm_type type,
+ size_t align_base)
+{
+ struct mlx5_core_dev *mdev = pool->dmn->mdev;
+ struct mlx5dr_icm_mr *icm_mr;
+ size_t align_diff;
+ int err;
+
+ icm_mr = kvzalloc(sizeof(*icm_mr), GFP_KERNEL);
+ if (!icm_mr)
+ return NULL;
+
+ icm_mr->pool = pool;
+ INIT_LIST_HEAD(&icm_mr->mr_list);
+
+ icm_mr->dm.type = type;
+
+ /* 2^log_biggest_table * entry-size * double-for-alignment */
+ icm_mr->dm.length = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
+ pool->icm_type) * 2;
+
+ err = mlx5_dm_sw_icm_alloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0,
+ &icm_mr->dm.addr, &icm_mr->dm.obj_id);
+ if (err) {
+ mlx5dr_err(pool->dmn, "Failed to allocate SW ICM memory, err (%d)\n", err);
+ goto free_icm_mr;
+ }
+
+ /* Register device memory */
+ err = dr_icm_create_dm_mkey(mdev, pool->dmn->pdn,
+ icm_mr->dm.length,
+ icm_mr->dm.addr,
+ MLX5_MKC_ACCESS_MODE_SW_ICM,
+ &icm_mr->mkey);
+ if (err) {
+ mlx5dr_err(pool->dmn, "Failed to create SW ICM MKEY, err (%d)\n", err);
+ goto free_dm;
+ }
+
+ icm_mr->icm_start_addr = icm_mr->dm.addr;
+
+ align_diff = icm_mr->icm_start_addr % align_base;
+ if (align_diff)
+ icm_mr->used_length = align_base - align_diff;
+
+ list_add_tail(&icm_mr->mr_list, &pool->icm_mr_list);
+
+ return icm_mr;
+
+free_dm:
+ mlx5_dm_sw_icm_dealloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0,
+ icm_mr->dm.addr, icm_mr->dm.obj_id);
+free_icm_mr:
+ kvfree(icm_mr);
+ return NULL;
+}
+
+static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr)
+{
+ struct mlx5_core_dev *mdev = icm_mr->pool->dmn->mdev;
+ struct mlx5dr_icm_dm *dm = &icm_mr->dm;
+
+ list_del(&icm_mr->mr_list);
+ mlx5_core_destroy_mkey(mdev, &icm_mr->mkey);
+ mlx5_dm_sw_icm_dealloc(mdev, dm->type, dm->length, 0,
+ dm->addr, dm->obj_id);
+ kvfree(icm_mr);
+}
+
+static int dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk)
+{
+ struct mlx5dr_icm_bucket *bucket = chunk->bucket;
+
+ chunk->ste_arr = kvzalloc(bucket->num_of_entries *
+ sizeof(chunk->ste_arr[0]), GFP_KERNEL);
+ if (!chunk->ste_arr)
+ return -ENOMEM;
+
+ chunk->hw_ste_arr = kvzalloc(bucket->num_of_entries *
+ DR_STE_SIZE_REDUCED, GFP_KERNEL);
+ if (!chunk->hw_ste_arr)
+ goto out_free_ste_arr;
+
+ chunk->miss_list = kvmalloc(bucket->num_of_entries *
+ sizeof(chunk->miss_list[0]), GFP_KERNEL);
+ if (!chunk->miss_list)
+ goto out_free_hw_ste_arr;
+
+ return 0;
+
+out_free_hw_ste_arr:
+ kvfree(chunk->hw_ste_arr);
+out_free_ste_arr:
+ kvfree(chunk->ste_arr);
+ return -ENOMEM;
+}
+
+static int dr_icm_chunks_create(struct mlx5dr_icm_bucket *bucket)
+{
+ size_t mr_free_size, mr_req_size, mr_row_size;
+ struct mlx5dr_icm_pool *pool = bucket->pool;
+ struct mlx5dr_icm_mr *icm_mr = NULL;
+ struct mlx5dr_icm_chunk *chunk;
+ enum mlx5_sw_icm_type dm_type;
+ size_t align_base;
+ int i, err = 0;
+
+ mr_req_size = bucket->num_of_entries * bucket->entry_size;
+ mr_row_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
+ pool->icm_type);
+
+ if (pool->icm_type == DR_ICM_TYPE_STE) {
+ dm_type = MLX5_SW_ICM_TYPE_STEERING;
+ /* Align base is the biggest chunk size / row size */
+ align_base = mr_row_size;
+ } else {
+ dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY;
+ /* Align base is 64B */
+ align_base = DR_ICM_MODIFY_HDR_ALIGN_BASE;
+ }
+
+ mutex_lock(&pool->mr_mutex);
+ if (!list_empty(&pool->icm_mr_list)) {
+ icm_mr = list_last_entry(&pool->icm_mr_list,
+ struct mlx5dr_icm_mr, mr_list);
+
+ if (icm_mr)
+ mr_free_size = icm_mr->dm.length - icm_mr->used_length;
+ }
+
+ if (!icm_mr || mr_free_size < mr_row_size) {
+ icm_mr = dr_icm_pool_mr_create(pool, dm_type, align_base);
+ if (!icm_mr) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+ }
+
+ /* Create memory aligned chunks */
+ for (i = 0; i < mr_row_size / mr_req_size; i++) {
+ chunk = kvzalloc(sizeof(*chunk), GFP_KERNEL);
+ if (!chunk) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ chunk->bucket = bucket;
+ chunk->rkey = icm_mr->mkey.key;
+ /* mr start addr is zero based */
+ chunk->mr_addr = icm_mr->used_length;
+ chunk->icm_addr = (uintptr_t)icm_mr->icm_start_addr + icm_mr->used_length;
+ icm_mr->used_length += mr_req_size;
+ chunk->num_of_entries = bucket->num_of_entries;
+ chunk->byte_size = chunk->num_of_entries * bucket->entry_size;
+
+ if (pool->icm_type == DR_ICM_TYPE_STE) {
+ err = dr_icm_chunk_ste_init(chunk);
+ if (err)
+ goto out_free_chunk;
+ }
+
+ INIT_LIST_HEAD(&chunk->chunk_list);
+ list_add(&chunk->chunk_list, &bucket->free_list);
+ bucket->free_list_count++;
+ bucket->total_chunks++;
+ }
+ mutex_unlock(&pool->mr_mutex);
+ return 0;
+
+out_free_chunk:
+ kvfree(chunk);
+out_err:
+ mutex_unlock(&pool->mr_mutex);
+ return err;
+}
+
+static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk)
+{
+ kvfree(chunk->miss_list);
+ kvfree(chunk->hw_ste_arr);
+ kvfree(chunk->ste_arr);
+}
+
+static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk)
+{
+ struct mlx5dr_icm_bucket *bucket = chunk->bucket;
+
+ list_del(&chunk->chunk_list);
+ bucket->total_chunks--;
+
+ if (bucket->pool->icm_type == DR_ICM_TYPE_STE)
+ dr_icm_chunk_ste_cleanup(chunk);
+
+ kvfree(chunk);
+}
+
+static void dr_icm_bucket_init(struct mlx5dr_icm_pool *pool,
+ struct mlx5dr_icm_bucket *bucket,
+ enum mlx5dr_icm_chunk_size chunk_size)
+{
+ if (pool->icm_type == DR_ICM_TYPE_STE)
+ bucket->entry_size = DR_STE_SIZE;
+ else
+ bucket->entry_size = DR_MODIFY_ACTION_SIZE;
+
+ bucket->num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(chunk_size);
+ bucket->pool = pool;
+ mutex_init(&bucket->mutex);
+ INIT_LIST_HEAD(&bucket->free_list);
+ INIT_LIST_HEAD(&bucket->used_list);
+ INIT_LIST_HEAD(&bucket->hot_list);
+ INIT_LIST_HEAD(&bucket->sync_list);
+}
+
+static void dr_icm_bucket_cleanup(struct mlx5dr_icm_bucket *bucket)
+{
+ struct mlx5dr_icm_chunk *chunk, *next;
+
+ mutex_destroy(&bucket->mutex);
+ list_splice_tail_init(&bucket->sync_list, &bucket->free_list);
+ list_splice_tail_init(&bucket->hot_list, &bucket->free_list);
+
+ list_for_each_entry_safe(chunk, next, &bucket->free_list, chunk_list)
+ dr_icm_chunk_destroy(chunk);
+
+ WARN_ON(bucket->total_chunks != 0);
+
+ /* Cleanup of unreturned chunks */
+ list_for_each_entry_safe(chunk, next, &bucket->used_list, chunk_list)
+ dr_icm_chunk_destroy(chunk);
+}
+
+static u64 dr_icm_hot_mem_size(struct mlx5dr_icm_pool *pool)
+{
+ u64 hot_size = 0;
+ int chunk_order;
+
+ for (chunk_order = 0; chunk_order < pool->num_of_buckets; chunk_order++)
+ hot_size += pool->buckets[chunk_order].hot_list_count *
+ mlx5dr_icm_pool_chunk_size_to_byte(chunk_order, pool->icm_type);
+
+ return hot_size;
+}
+
+static bool dr_icm_reuse_hot_entries(struct mlx5dr_icm_pool *pool,
+ struct mlx5dr_icm_bucket *bucket)
+{
+ u64 bytes_for_sync;
+
+ bytes_for_sync = dr_icm_hot_mem_size(pool);
+ if (bytes_for_sync < DR_ICM_SYNC_THRESHOLD || !bucket->hot_list_count)
+ return false;
+
+ return true;
+}
+
+static void dr_icm_chill_bucket_start(struct mlx5dr_icm_bucket *bucket)
+{
+ list_splice_tail_init(&bucket->hot_list, &bucket->sync_list);
+ bucket->sync_list_count += bucket->hot_list_count;
+ bucket->hot_list_count = 0;
+}
+
+static void dr_icm_chill_bucket_end(struct mlx5dr_icm_bucket *bucket)
+{
+ list_splice_tail_init(&bucket->sync_list, &bucket->free_list);
+ bucket->free_list_count += bucket->sync_list_count;
+ bucket->sync_list_count = 0;
+}
+
+static void dr_icm_chill_bucket_abort(struct mlx5dr_icm_bucket *bucket)
+{
+ list_splice_tail_init(&bucket->sync_list, &bucket->hot_list);
+ bucket->hot_list_count += bucket->sync_list_count;
+ bucket->sync_list_count = 0;
+}
+
+static void dr_icm_chill_buckets_start(struct mlx5dr_icm_pool *pool,
+ struct mlx5dr_icm_bucket *cb,
+ bool buckets[DR_CHUNK_SIZE_MAX])
+{
+ struct mlx5dr_icm_bucket *bucket;
+ int i;
+
+ for (i = 0; i < pool->num_of_buckets; i++) {
+ bucket = &pool->buckets[i];
+ if (bucket == cb) {
+ dr_icm_chill_bucket_start(bucket);
+ continue;
+ }
+
+ /* Freeing the mutex is done at the end of that process, after
+ * sync_ste was executed at dr_icm_chill_buckets_end func.
+ */
+ if (mutex_trylock(&bucket->mutex)) {
+ dr_icm_chill_bucket_start(bucket);
+ buckets[i] = true;
+ }
+ }
+}
+
+static void dr_icm_chill_buckets_end(struct mlx5dr_icm_pool *pool,
+ struct mlx5dr_icm_bucket *cb,
+ bool buckets[DR_CHUNK_SIZE_MAX])
+{
+ struct mlx5dr_icm_bucket *bucket;
+ int i;
+
+ for (i = 0; i < pool->num_of_buckets; i++) {
+ bucket = &pool->buckets[i];
+ if (bucket == cb) {
+ dr_icm_chill_bucket_end(bucket);
+ continue;
+ }
+
+ if (!buckets[i])
+ continue;
+
+ dr_icm_chill_bucket_end(bucket);
+ mutex_unlock(&bucket->mutex);
+ }
+}
+
+static void dr_icm_chill_buckets_abort(struct mlx5dr_icm_pool *pool,
+ struct mlx5dr_icm_bucket *cb,
+ bool buckets[DR_CHUNK_SIZE_MAX])
+{
+ struct mlx5dr_icm_bucket *bucket;
+ int i;
+
+ for (i = 0; i < pool->num_of_buckets; i++) {
+ bucket = &pool->buckets[i];
+ if (bucket == cb) {
+ dr_icm_chill_bucket_abort(bucket);
+ continue;
+ }
+
+ if (!buckets[i])
+ continue;
+
+ dr_icm_chill_bucket_abort(bucket);
+ mutex_unlock(&bucket->mutex);
+ }
+}
+
+/* Allocate an ICM chunk, each chunk holds a piece of ICM memory and
+ * also memory used for HW STE management for optimizations.
+ */
+struct mlx5dr_icm_chunk *
+mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool,
+ enum mlx5dr_icm_chunk_size chunk_size)
+{
+ struct mlx5dr_icm_chunk *chunk = NULL; /* Fix compilation warning */
+ bool buckets[DR_CHUNK_SIZE_MAX] = {};
+ struct mlx5dr_icm_bucket *bucket;
+ int err;
+
+ if (chunk_size > pool->max_log_chunk_sz)
+ return NULL;
+
+ bucket = &pool->buckets[chunk_size];
+
+ mutex_lock(&bucket->mutex);
+
+ /* Take chunk from pool if available, otherwise allocate new chunks */
+ if (list_empty(&bucket->free_list)) {
+ if (dr_icm_reuse_hot_entries(pool, bucket)) {
+ dr_icm_chill_buckets_start(pool, bucket, buckets);
+ err = mlx5dr_cmd_sync_steering(pool->dmn->mdev);
+ if (err) {
+ dr_icm_chill_buckets_abort(pool, bucket, buckets);
+ mlx5dr_dbg(pool->dmn, "Sync_steering failed\n");
+ chunk = NULL;
+ goto out;
+ }
+ dr_icm_chill_buckets_end(pool, bucket, buckets);
+ } else {
+ dr_icm_chunks_create(bucket);
+ }
+ }
+
+ if (!list_empty(&bucket->free_list)) {
+ chunk = list_last_entry(&bucket->free_list,
+ struct mlx5dr_icm_chunk,
+ chunk_list);
+ if (chunk) {
+ list_del_init(&chunk->chunk_list);
+ list_add_tail(&chunk->chunk_list, &bucket->used_list);
+ bucket->free_list_count--;
+ bucket->used_list_count++;
+ }
+ }
+out:
+ mutex_unlock(&bucket->mutex);
+ return chunk;
+}
+
+void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk)
+{
+ struct mlx5dr_icm_bucket *bucket = chunk->bucket;
+
+ if (bucket->pool->icm_type == DR_ICM_TYPE_STE) {
+ memset(chunk->ste_arr, 0,
+ bucket->num_of_entries * sizeof(chunk->ste_arr[0]));
+ memset(chunk->hw_ste_arr, 0,
+ bucket->num_of_entries * DR_STE_SIZE_REDUCED);
+ }
+
+ mutex_lock(&bucket->mutex);
+ list_del_init(&chunk->chunk_list);
+ list_add_tail(&chunk->chunk_list, &bucket->hot_list);
+ bucket->hot_list_count++;
+ bucket->used_list_count--;
+ mutex_unlock(&bucket->mutex);
+}
+
+struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn,
+ enum mlx5dr_icm_type icm_type)
+{
+ enum mlx5dr_icm_chunk_size max_log_chunk_sz;
+ struct mlx5dr_icm_pool *pool;
+ int i;
+
+ if (icm_type == DR_ICM_TYPE_STE)
+ max_log_chunk_sz = dmn->info.max_log_sw_icm_sz;
+ else
+ max_log_chunk_sz = dmn->info.max_log_action_icm_sz;
+
+ pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
+ if (!pool)
+ return NULL;
+
+ pool->buckets = kcalloc(max_log_chunk_sz + 1,
+ sizeof(pool->buckets[0]),
+ GFP_KERNEL);
+ if (!pool->buckets)
+ goto free_pool;
+
+ pool->dmn = dmn;
+ pool->icm_type = icm_type;
+ pool->max_log_chunk_sz = max_log_chunk_sz;
+ pool->num_of_buckets = max_log_chunk_sz + 1;
+ INIT_LIST_HEAD(&pool->icm_mr_list);
+
+ for (i = 0; i < pool->num_of_buckets; i++)
+ dr_icm_bucket_init(pool, &pool->buckets[i], i);
+
+ mutex_init(&pool->mr_mutex);
+
+ return pool;
+
+free_pool:
+ kvfree(pool);
+ return NULL;
+}
+
+void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool)
+{
+ struct mlx5dr_icm_mr *icm_mr, *next;
+ int i;
+
+ mutex_destroy(&pool->mr_mutex);
+
+ list_for_each_entry_safe(icm_mr, next, &pool->icm_mr_list, mr_list)
+ dr_icm_pool_mr_destroy(icm_mr);
+
+ for (i = 0; i < pool->num_of_buckets; i++)
+ dr_icm_bucket_cleanup(&pool->buckets[i]);
+
+ kfree(pool->buckets);
+ kvfree(pool);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
new file mode 100644
index 000000000000..01008cd66f75
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
@@ -0,0 +1,770 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "dr_types.h"
+
+static bool dr_mask_is_smac_set(struct mlx5dr_match_spec *spec)
+{
+ return (spec->smac_47_16 || spec->smac_15_0);
+}
+
+static bool dr_mask_is_dmac_set(struct mlx5dr_match_spec *spec)
+{
+ return (spec->dmac_47_16 || spec->dmac_15_0);
+}
+
+static bool dr_mask_is_src_addr_set(struct mlx5dr_match_spec *spec)
+{
+ return (spec->src_ip_127_96 || spec->src_ip_95_64 ||
+ spec->src_ip_63_32 || spec->src_ip_31_0);
+}
+
+static bool dr_mask_is_dst_addr_set(struct mlx5dr_match_spec *spec)
+{
+ return (spec->dst_ip_127_96 || spec->dst_ip_95_64 ||
+ spec->dst_ip_63_32 || spec->dst_ip_31_0);
+}
+
+static bool dr_mask_is_l3_base_set(struct mlx5dr_match_spec *spec)
+{
+ return (spec->ip_protocol || spec->frag || spec->tcp_flags ||
+ spec->ip_ecn || spec->ip_dscp);
+}
+
+static bool dr_mask_is_tcp_udp_base_set(struct mlx5dr_match_spec *spec)
+{
+ return (spec->tcp_sport || spec->tcp_dport ||
+ spec->udp_sport || spec->udp_dport);
+}
+
+static bool dr_mask_is_ipv4_set(struct mlx5dr_match_spec *spec)
+{
+ return (spec->dst_ip_31_0 || spec->src_ip_31_0);
+}
+
+static bool dr_mask_is_ipv4_5_tuple_set(struct mlx5dr_match_spec *spec)
+{
+ return (dr_mask_is_l3_base_set(spec) ||
+ dr_mask_is_tcp_udp_base_set(spec) ||
+ dr_mask_is_ipv4_set(spec));
+}
+
+static bool dr_mask_is_eth_l2_tnl_set(struct mlx5dr_match_misc *misc)
+{
+ return misc->vxlan_vni;
+}
+
+static bool dr_mask_is_ttl_set(struct mlx5dr_match_spec *spec)
+{
+ return spec->ttl_hoplimit;
+}
+
+#define DR_MASK_IS_L2_DST(_spec, _misc, _inner_outer) (_spec.first_vid || \
+ (_spec).first_cfi || (_spec).first_prio || (_spec).cvlan_tag || \
+ (_spec).svlan_tag || (_spec).dmac_47_16 || (_spec).dmac_15_0 || \
+ (_spec).ethertype || (_spec).ip_version || \
+ (_misc)._inner_outer##_second_vid || \
+ (_misc)._inner_outer##_second_cfi || \
+ (_misc)._inner_outer##_second_prio || \
+ (_misc)._inner_outer##_second_cvlan_tag || \
+ (_misc)._inner_outer##_second_svlan_tag)
+
+#define DR_MASK_IS_ETH_L4_SET(_spec, _misc, _inner_outer) ( \
+ dr_mask_is_l3_base_set(&(_spec)) || \
+ dr_mask_is_tcp_udp_base_set(&(_spec)) || \
+ dr_mask_is_ttl_set(&(_spec)) || \
+ (_misc)._inner_outer##_ipv6_flow_label)
+
+#define DR_MASK_IS_ETH_L4_MISC_SET(_misc3, _inner_outer) ( \
+ (_misc3)._inner_outer##_tcp_seq_num || \
+ (_misc3)._inner_outer##_tcp_ack_num)
+
+#define DR_MASK_IS_FIRST_MPLS_SET(_misc2, _inner_outer) ( \
+ (_misc2)._inner_outer##_first_mpls_label || \
+ (_misc2)._inner_outer##_first_mpls_exp || \
+ (_misc2)._inner_outer##_first_mpls_s_bos || \
+ (_misc2)._inner_outer##_first_mpls_ttl)
+
+static bool dr_mask_is_gre_set(struct mlx5dr_match_misc *misc)
+{
+ return (misc->gre_key_h || misc->gre_key_l ||
+ misc->gre_protocol || misc->gre_c_present ||
+ misc->gre_k_present || misc->gre_s_present);
+}
+
+#define DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET(_misc2, gre_udp) ( \
+ (_misc2).outer_first_mpls_over_##gre_udp##_label || \
+ (_misc2).outer_first_mpls_over_##gre_udp##_exp || \
+ (_misc2).outer_first_mpls_over_##gre_udp##_s_bos || \
+ (_misc2).outer_first_mpls_over_##gre_udp##_ttl)
+
+#define DR_MASK_IS_FLEX_PARSER_0_SET(_misc2) ( \
+ DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), gre) || \
+ DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), udp))
+
+static bool dr_mask_is_flex_parser_tnl_set(struct mlx5dr_match_misc3 *misc3)
+{
+ return (misc3->outer_vxlan_gpe_vni ||
+ misc3->outer_vxlan_gpe_next_protocol ||
+ misc3->outer_vxlan_gpe_flags);
+}
+
+static bool dr_mask_is_flex_parser_icmpv6_set(struct mlx5dr_match_misc3 *misc3)
+{
+ return (misc3->icmpv6_type || misc3->icmpv6_code ||
+ misc3->icmpv6_header_data);
+}
+
+static bool dr_mask_is_wqe_metadata_set(struct mlx5dr_match_misc2 *misc2)
+{
+ return misc2->metadata_reg_a;
+}
+
+static bool dr_mask_is_reg_c_0_3_set(struct mlx5dr_match_misc2 *misc2)
+{
+ return (misc2->metadata_reg_c_0 || misc2->metadata_reg_c_1 ||
+ misc2->metadata_reg_c_2 || misc2->metadata_reg_c_3);
+}
+
+static bool dr_mask_is_reg_c_4_7_set(struct mlx5dr_match_misc2 *misc2)
+{
+ return (misc2->metadata_reg_c_4 || misc2->metadata_reg_c_5 ||
+ misc2->metadata_reg_c_6 || misc2->metadata_reg_c_7);
+}
+
+static bool dr_mask_is_gvmi_or_qpn_set(struct mlx5dr_match_misc *misc)
+{
+ return (misc->source_sqn || misc->source_port);
+}
+
+static bool
+dr_matcher_supp_flex_parser_vxlan_gpe(struct mlx5dr_domain *dmn)
+{
+ return dmn->info.caps.flex_protocols &
+ MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED;
+}
+
+int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ bool ipv6)
+{
+ if (ipv6) {
+ nic_matcher->ste_builder = nic_matcher->ste_builder6;
+ nic_matcher->num_of_builders = nic_matcher->num_of_builders6;
+ } else {
+ nic_matcher->ste_builder = nic_matcher->ste_builder4;
+ nic_matcher->num_of_builders = nic_matcher->num_of_builders4;
+ }
+
+ if (!nic_matcher->num_of_builders) {
+ mlx5dr_dbg(matcher->tbl->dmn,
+ "Rule not supported on this matcher due to IP related fields\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ bool ipv6)
+{
+ struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_match_param mask = {};
+ struct mlx5dr_match_misc3 *misc3;
+ struct mlx5dr_ste_build *sb;
+ u8 *num_of_builders;
+ bool inner, rx;
+ int idx = 0;
+ int ret, i;
+
+ if (ipv6) {
+ sb = nic_matcher->ste_builder6;
+ num_of_builders = &nic_matcher->num_of_builders6;
+ } else {
+ sb = nic_matcher->ste_builder4;
+ num_of_builders = &nic_matcher->num_of_builders4;
+ }
+
+ rx = nic_dmn->ste_type == MLX5DR_STE_TYPE_RX;
+
+ /* Create a temporary mask to track and clear used mask fields */
+ if (matcher->match_criteria & DR_MATCHER_CRITERIA_OUTER)
+ mask.outer = matcher->mask.outer;
+
+ if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC)
+ mask.misc = matcher->mask.misc;
+
+ if (matcher->match_criteria & DR_MATCHER_CRITERIA_INNER)
+ mask.inner = matcher->mask.inner;
+
+ if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC2)
+ mask.misc2 = matcher->mask.misc2;
+
+ if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC3)
+ mask.misc3 = matcher->mask.misc3;
+
+ ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria,
+ &matcher->mask, NULL);
+ if (ret)
+ return ret;
+
+ /* Outer */
+ if (matcher->match_criteria & (DR_MATCHER_CRITERIA_OUTER |
+ DR_MATCHER_CRITERIA_MISC |
+ DR_MATCHER_CRITERIA_MISC2 |
+ DR_MATCHER_CRITERIA_MISC3)) {
+ inner = false;
+
+ if (dr_mask_is_wqe_metadata_set(&mask.misc2))
+ mlx5dr_ste_build_general_purpose(&sb[idx++], &mask, inner, rx);
+
+ if (dr_mask_is_reg_c_0_3_set(&mask.misc2))
+ mlx5dr_ste_build_register_0(&sb[idx++], &mask, inner, rx);
+
+ if (dr_mask_is_reg_c_4_7_set(&mask.misc2))
+ mlx5dr_ste_build_register_1(&sb[idx++], &mask, inner, rx);
+
+ if (dr_mask_is_gvmi_or_qpn_set(&mask.misc) &&
+ (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
+ dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX)) {
+ ret = mlx5dr_ste_build_src_gvmi_qpn(&sb[idx++], &mask,
+ &dmn->info.caps,
+ inner, rx);
+ if (ret)
+ return ret;
+ }
+
+ if (dr_mask_is_smac_set(&mask.outer) &&
+ dr_mask_is_dmac_set(&mask.outer)) {
+ ret = mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], &mask,
+ inner, rx);
+ if (ret)
+ return ret;
+ }
+
+ if (dr_mask_is_smac_set(&mask.outer))
+ mlx5dr_ste_build_eth_l2_src(&sb[idx++], &mask, inner, rx);
+
+ if (DR_MASK_IS_L2_DST(mask.outer, mask.misc, outer))
+ mlx5dr_ste_build_eth_l2_dst(&sb[idx++], &mask, inner, rx);
+
+ if (ipv6) {
+ if (dr_mask_is_dst_addr_set(&mask.outer))
+ mlx5dr_ste_build_eth_l3_ipv6_dst(&sb[idx++], &mask,
+ inner, rx);
+
+ if (dr_mask_is_src_addr_set(&mask.outer))
+ mlx5dr_ste_build_eth_l3_ipv6_src(&sb[idx++], &mask,
+ inner, rx);
+
+ if (DR_MASK_IS_ETH_L4_SET(mask.outer, mask.misc, outer))
+ mlx5dr_ste_build_ipv6_l3_l4(&sb[idx++], &mask,
+ inner, rx);
+ } else {
+ if (dr_mask_is_ipv4_5_tuple_set(&mask.outer))
+ mlx5dr_ste_build_eth_l3_ipv4_5_tuple(&sb[idx++], &mask,
+ inner, rx);
+
+ if (dr_mask_is_ttl_set(&mask.outer))
+ mlx5dr_ste_build_eth_l3_ipv4_misc(&sb[idx++], &mask,
+ inner, rx);
+ }
+
+ if (dr_mask_is_flex_parser_tnl_set(&mask.misc3) &&
+ dr_matcher_supp_flex_parser_vxlan_gpe(dmn))
+ mlx5dr_ste_build_flex_parser_tnl(&sb[idx++], &mask,
+ inner, rx);
+
+ if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, outer))
+ mlx5dr_ste_build_eth_l4_misc(&sb[idx++], &mask, inner, rx);
+
+ if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, outer))
+ mlx5dr_ste_build_mpls(&sb[idx++], &mask, inner, rx);
+
+ if (DR_MASK_IS_FLEX_PARSER_0_SET(mask.misc2))
+ mlx5dr_ste_build_flex_parser_0(&sb[idx++], &mask,
+ inner, rx);
+
+ misc3 = &mask.misc3;
+ if ((DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc3) &&
+ mlx5dr_matcher_supp_flex_parser_icmp_v4(&dmn->info.caps)) ||
+ (dr_mask_is_flex_parser_icmpv6_set(&mask.misc3) &&
+ mlx5dr_matcher_supp_flex_parser_icmp_v6(&dmn->info.caps))) {
+ ret = mlx5dr_ste_build_flex_parser_1(&sb[idx++],
+ &mask, &dmn->info.caps,
+ inner, rx);
+ if (ret)
+ return ret;
+ }
+ if (dr_mask_is_gre_set(&mask.misc))
+ mlx5dr_ste_build_gre(&sb[idx++], &mask, inner, rx);
+ }
+
+ /* Inner */
+ if (matcher->match_criteria & (DR_MATCHER_CRITERIA_INNER |
+ DR_MATCHER_CRITERIA_MISC |
+ DR_MATCHER_CRITERIA_MISC2 |
+ DR_MATCHER_CRITERIA_MISC3)) {
+ inner = true;
+
+ if (dr_mask_is_eth_l2_tnl_set(&mask.misc))
+ mlx5dr_ste_build_eth_l2_tnl(&sb[idx++], &mask, inner, rx);
+
+ if (dr_mask_is_smac_set(&mask.inner) &&
+ dr_mask_is_dmac_set(&mask.inner)) {
+ ret = mlx5dr_ste_build_eth_l2_src_des(&sb[idx++],
+ &mask, inner, rx);
+ if (ret)
+ return ret;
+ }
+
+ if (dr_mask_is_smac_set(&mask.inner))
+ mlx5dr_ste_build_eth_l2_src(&sb[idx++], &mask, inner, rx);
+
+ if (DR_MASK_IS_L2_DST(mask.inner, mask.misc, inner))
+ mlx5dr_ste_build_eth_l2_dst(&sb[idx++], &mask, inner, rx);
+
+ if (ipv6) {
+ if (dr_mask_is_dst_addr_set(&mask.inner))
+ mlx5dr_ste_build_eth_l3_ipv6_dst(&sb[idx++], &mask,
+ inner, rx);
+
+ if (dr_mask_is_src_addr_set(&mask.inner))
+ mlx5dr_ste_build_eth_l3_ipv6_src(&sb[idx++], &mask,
+ inner, rx);
+
+ if (DR_MASK_IS_ETH_L4_SET(mask.inner, mask.misc, inner))
+ mlx5dr_ste_build_ipv6_l3_l4(&sb[idx++], &mask,
+ inner, rx);
+ } else {
+ if (dr_mask_is_ipv4_5_tuple_set(&mask.inner))
+ mlx5dr_ste_build_eth_l3_ipv4_5_tuple(&sb[idx++], &mask,
+ inner, rx);
+
+ if (dr_mask_is_ttl_set(&mask.inner))
+ mlx5dr_ste_build_eth_l3_ipv4_misc(&sb[idx++], &mask,
+ inner, rx);
+ }
+
+ if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, inner))
+ mlx5dr_ste_build_eth_l4_misc(&sb[idx++], &mask, inner, rx);
+
+ if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, inner))
+ mlx5dr_ste_build_mpls(&sb[idx++], &mask, inner, rx);
+
+ if (DR_MASK_IS_FLEX_PARSER_0_SET(mask.misc2))
+ mlx5dr_ste_build_flex_parser_0(&sb[idx++], &mask, inner, rx);
+ }
+ /* Empty matcher, takes all */
+ if (matcher->match_criteria == DR_MATCHER_CRITERIA_EMPTY)
+ mlx5dr_ste_build_empty_always_hit(&sb[idx++], rx);
+
+ if (idx == 0) {
+ mlx5dr_dbg(dmn, "Cannot generate any valid rules from mask\n");
+ return -EINVAL;
+ }
+
+ /* Check that all mask fields were consumed */
+ for (i = 0; i < sizeof(struct mlx5dr_match_param); i++) {
+ if (((u8 *)&mask)[i] != 0) {
+ mlx5dr_info(dmn, "Mask contains unsupported parameters\n");
+ return -EOPNOTSUPP;
+ }
+ }
+
+ *num_of_builders = idx;
+
+ return 0;
+}
+
+static int dr_matcher_connect(struct mlx5dr_domain *dmn,
+ struct mlx5dr_matcher_rx_tx *curr_nic_matcher,
+ struct mlx5dr_matcher_rx_tx *next_nic_matcher,
+ struct mlx5dr_matcher_rx_tx *prev_nic_matcher)
+{
+ struct mlx5dr_table_rx_tx *nic_tbl = curr_nic_matcher->nic_tbl;
+ struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn;
+ struct mlx5dr_htbl_connect_info info;
+ struct mlx5dr_ste_htbl *prev_htbl;
+ int ret;
+
+ /* Connect end anchor hash table to next_htbl or to the default address */
+ if (next_nic_matcher) {
+ info.type = CONNECT_HIT;
+ info.hit_next_htbl = next_nic_matcher->s_htbl;
+ } else {
+ info.type = CONNECT_MISS;
+ info.miss_icm_addr = nic_tbl->default_icm_addr;
+ }
+ ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn,
+ curr_nic_matcher->e_anchor,
+ &info, info.type == CONNECT_HIT);
+ if (ret)
+ return ret;
+
+ /* Connect start hash table to end anchor */
+ info.type = CONNECT_MISS;
+ info.miss_icm_addr = curr_nic_matcher->e_anchor->chunk->icm_addr;
+ ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn,
+ curr_nic_matcher->s_htbl,
+ &info, false);
+ if (ret)
+ return ret;
+
+ /* Connect previous hash table to matcher start hash table */
+ if (prev_nic_matcher)
+ prev_htbl = prev_nic_matcher->e_anchor;
+ else
+ prev_htbl = nic_tbl->s_anchor;
+
+ info.type = CONNECT_HIT;
+ info.hit_next_htbl = curr_nic_matcher->s_htbl;
+ ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, prev_htbl,
+ &info, true);
+ if (ret)
+ return ret;
+
+ /* Update the pointing ste and next hash table */
+ curr_nic_matcher->s_htbl->pointing_ste = prev_htbl->ste_arr;
+ prev_htbl->ste_arr[0].next_htbl = curr_nic_matcher->s_htbl;
+
+ if (next_nic_matcher) {
+ next_nic_matcher->s_htbl->pointing_ste = curr_nic_matcher->e_anchor->ste_arr;
+ curr_nic_matcher->e_anchor->ste_arr[0].next_htbl = next_nic_matcher->s_htbl;
+ }
+
+ return 0;
+}
+
+static int dr_matcher_add_to_tbl(struct mlx5dr_matcher *matcher)
+{
+ struct mlx5dr_matcher *next_matcher, *prev_matcher, *tmp_matcher;
+ struct mlx5dr_table *tbl = matcher->tbl;
+ struct mlx5dr_domain *dmn = tbl->dmn;
+ bool first = true;
+ int ret;
+
+ next_matcher = NULL;
+ if (!list_empty(&tbl->matcher_list))
+ list_for_each_entry(tmp_matcher, &tbl->matcher_list, matcher_list) {
+ if (tmp_matcher->prio >= matcher->prio) {
+ next_matcher = tmp_matcher;
+ break;
+ }
+ first = false;
+ }
+
+ prev_matcher = NULL;
+ if (next_matcher && !first)
+ prev_matcher = list_entry(next_matcher->matcher_list.prev,
+ struct mlx5dr_matcher,
+ matcher_list);
+ else if (!first)
+ prev_matcher = list_entry(tbl->matcher_list.prev,
+ struct mlx5dr_matcher,
+ matcher_list);
+
+ if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
+ dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) {
+ ret = dr_matcher_connect(dmn, &matcher->rx,
+ next_matcher ? &next_matcher->rx : NULL,
+ prev_matcher ? &prev_matcher->rx : NULL);
+ if (ret)
+ return ret;
+ }
+
+ if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
+ dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) {
+ ret = dr_matcher_connect(dmn, &matcher->tx,
+ next_matcher ? &next_matcher->tx : NULL,
+ prev_matcher ? &prev_matcher->tx : NULL);
+ if (ret)
+ return ret;
+ }
+
+ if (prev_matcher)
+ list_add(&matcher->matcher_list, &prev_matcher->matcher_list);
+ else if (next_matcher)
+ list_add_tail(&matcher->matcher_list,
+ &next_matcher->matcher_list);
+ else
+ list_add(&matcher->matcher_list, &tbl->matcher_list);
+
+ return 0;
+}
+
+static void dr_matcher_uninit_nic(struct mlx5dr_matcher_rx_tx *nic_matcher)
+{
+ mlx5dr_htbl_put(nic_matcher->s_htbl);
+ mlx5dr_htbl_put(nic_matcher->e_anchor);
+}
+
+static void dr_matcher_uninit_fdb(struct mlx5dr_matcher *matcher)
+{
+ dr_matcher_uninit_nic(&matcher->rx);
+ dr_matcher_uninit_nic(&matcher->tx);
+}
+
+static void dr_matcher_uninit(struct mlx5dr_matcher *matcher)
+{
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+
+ switch (dmn->type) {
+ case MLX5DR_DOMAIN_TYPE_NIC_RX:
+ dr_matcher_uninit_nic(&matcher->rx);
+ break;
+ case MLX5DR_DOMAIN_TYPE_NIC_TX:
+ dr_matcher_uninit_nic(&matcher->tx);
+ break;
+ case MLX5DR_DOMAIN_TYPE_FDB:
+ dr_matcher_uninit_fdb(matcher);
+ break;
+ default:
+ WARN_ON(true);
+ break;
+ }
+}
+
+static int dr_matcher_init_nic(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher)
+{
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ int ret, ret_v4, ret_v6;
+
+ ret_v4 = dr_matcher_set_ste_builders(matcher, nic_matcher, false);
+ ret_v6 = dr_matcher_set_ste_builders(matcher, nic_matcher, true);
+
+ if (ret_v4 && ret_v6) {
+ mlx5dr_dbg(dmn, "Cannot generate IPv4 or IPv6 rules with given mask\n");
+ return -EINVAL;
+ }
+
+ if (!ret_v4)
+ nic_matcher->ste_builder = nic_matcher->ste_builder4;
+ else
+ nic_matcher->ste_builder = nic_matcher->ste_builder6;
+
+ nic_matcher->e_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
+ DR_CHUNK_SIZE_1,
+ MLX5DR_STE_LU_TYPE_DONT_CARE,
+ 0);
+ if (!nic_matcher->e_anchor)
+ return -ENOMEM;
+
+ nic_matcher->s_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
+ DR_CHUNK_SIZE_1,
+ nic_matcher->ste_builder[0].lu_type,
+ nic_matcher->ste_builder[0].byte_mask);
+ if (!nic_matcher->s_htbl) {
+ ret = -ENOMEM;
+ goto free_e_htbl;
+ }
+
+ /* make sure the tables exist while empty */
+ mlx5dr_htbl_get(nic_matcher->s_htbl);
+ mlx5dr_htbl_get(nic_matcher->e_anchor);
+
+ return 0;
+
+free_e_htbl:
+ mlx5dr_ste_htbl_free(nic_matcher->e_anchor);
+ return ret;
+}
+
+static int dr_matcher_init_fdb(struct mlx5dr_matcher *matcher)
+{
+ int ret;
+
+ ret = dr_matcher_init_nic(matcher, &matcher->rx);
+ if (ret)
+ return ret;
+
+ ret = dr_matcher_init_nic(matcher, &matcher->tx);
+ if (ret)
+ goto uninit_nic_rx;
+
+ return 0;
+
+uninit_nic_rx:
+ dr_matcher_uninit_nic(&matcher->rx);
+ return ret;
+}
+
+static int dr_matcher_init(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_match_parameters *mask)
+{
+ struct mlx5dr_table *tbl = matcher->tbl;
+ struct mlx5dr_domain *dmn = tbl->dmn;
+ int ret;
+
+ if (matcher->match_criteria >= DR_MATCHER_CRITERIA_MAX) {
+ mlx5dr_info(dmn, "Invalid match criteria attribute\n");
+ return -EINVAL;
+ }
+
+ if (mask) {
+ if (mask->match_sz > sizeof(struct mlx5dr_match_param)) {
+ mlx5dr_info(dmn, "Invalid match size attribute\n");
+ return -EINVAL;
+ }
+ mlx5dr_ste_copy_param(matcher->match_criteria,
+ &matcher->mask, mask);
+ }
+
+ switch (dmn->type) {
+ case MLX5DR_DOMAIN_TYPE_NIC_RX:
+ matcher->rx.nic_tbl = &tbl->rx;
+ ret = dr_matcher_init_nic(matcher, &matcher->rx);
+ break;
+ case MLX5DR_DOMAIN_TYPE_NIC_TX:
+ matcher->tx.nic_tbl = &tbl->tx;
+ ret = dr_matcher_init_nic(matcher, &matcher->tx);
+ break;
+ case MLX5DR_DOMAIN_TYPE_FDB:
+ matcher->rx.nic_tbl = &tbl->rx;
+ matcher->tx.nic_tbl = &tbl->tx;
+ ret = dr_matcher_init_fdb(matcher);
+ break;
+ default:
+ WARN_ON(true);
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+struct mlx5dr_matcher *
+mlx5dr_matcher_create(struct mlx5dr_table *tbl,
+ u16 priority,
+ u8 match_criteria_enable,
+ struct mlx5dr_match_parameters *mask)
+{
+ struct mlx5dr_matcher *matcher;
+ int ret;
+
+ refcount_inc(&tbl->refcount);
+
+ matcher = kzalloc(sizeof(*matcher), GFP_KERNEL);
+ if (!matcher)
+ goto dec_ref;
+
+ matcher->tbl = tbl;
+ matcher->prio = priority;
+ matcher->match_criteria = match_criteria_enable;
+ refcount_set(&matcher->refcount, 1);
+ INIT_LIST_HEAD(&matcher->matcher_list);
+
+ mutex_lock(&tbl->dmn->mutex);
+
+ ret = dr_matcher_init(matcher, mask);
+ if (ret)
+ goto free_matcher;
+
+ ret = dr_matcher_add_to_tbl(matcher);
+ if (ret)
+ goto matcher_uninit;
+
+ mutex_unlock(&tbl->dmn->mutex);
+
+ return matcher;
+
+matcher_uninit:
+ dr_matcher_uninit(matcher);
+free_matcher:
+ mutex_unlock(&tbl->dmn->mutex);
+ kfree(matcher);
+dec_ref:
+ refcount_dec(&tbl->refcount);
+ return NULL;
+}
+
+static int dr_matcher_disconnect(struct mlx5dr_domain *dmn,
+ struct mlx5dr_table_rx_tx *nic_tbl,
+ struct mlx5dr_matcher_rx_tx *next_nic_matcher,
+ struct mlx5dr_matcher_rx_tx *prev_nic_matcher)
+{
+ struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn;
+ struct mlx5dr_htbl_connect_info info;
+ struct mlx5dr_ste_htbl *prev_anchor;
+
+ if (prev_nic_matcher)
+ prev_anchor = prev_nic_matcher->e_anchor;
+ else
+ prev_anchor = nic_tbl->s_anchor;
+
+ /* Connect previous anchor hash table to next matcher or to the default address */
+ if (next_nic_matcher) {
+ info.type = CONNECT_HIT;
+ info.hit_next_htbl = next_nic_matcher->s_htbl;
+ next_nic_matcher->s_htbl->pointing_ste = prev_anchor->ste_arr;
+ prev_anchor->ste_arr[0].next_htbl = next_nic_matcher->s_htbl;
+ } else {
+ info.type = CONNECT_MISS;
+ info.miss_icm_addr = nic_tbl->default_icm_addr;
+ prev_anchor->ste_arr[0].next_htbl = NULL;
+ }
+
+ return mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, prev_anchor,
+ &info, true);
+}
+
+static int dr_matcher_remove_from_tbl(struct mlx5dr_matcher *matcher)
+{
+ struct mlx5dr_matcher *prev_matcher, *next_matcher;
+ struct mlx5dr_table *tbl = matcher->tbl;
+ struct mlx5dr_domain *dmn = tbl->dmn;
+ int ret = 0;
+
+ if (list_is_last(&matcher->matcher_list, &tbl->matcher_list))
+ next_matcher = NULL;
+ else
+ next_matcher = list_next_entry(matcher, matcher_list);
+
+ if (matcher->matcher_list.prev == &tbl->matcher_list)
+ prev_matcher = NULL;
+ else
+ prev_matcher = list_prev_entry(matcher, matcher_list);
+
+ if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
+ dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) {
+ ret = dr_matcher_disconnect(dmn, &tbl->rx,
+ next_matcher ? &next_matcher->rx : NULL,
+ prev_matcher ? &prev_matcher->rx : NULL);
+ if (ret)
+ return ret;
+ }
+
+ if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB ||
+ dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) {
+ ret = dr_matcher_disconnect(dmn, &tbl->tx,
+ next_matcher ? &next_matcher->tx : NULL,
+ prev_matcher ? &prev_matcher->tx : NULL);
+ if (ret)
+ return ret;
+ }
+
+ list_del(&matcher->matcher_list);
+
+ return 0;
+}
+
+int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher)
+{
+ struct mlx5dr_table *tbl = matcher->tbl;
+
+ if (refcount_read(&matcher->refcount) > 1)
+ return -EBUSY;
+
+ mutex_lock(&tbl->dmn->mutex);
+
+ dr_matcher_remove_from_tbl(matcher);
+ dr_matcher_uninit(matcher);
+ refcount_dec(&matcher->tbl->refcount);
+
+ mutex_unlock(&tbl->dmn->mutex);
+ kfree(matcher);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
new file mode 100644
index 000000000000..3bc3f66b8fa8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
@@ -0,0 +1,1243 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "dr_types.h"
+
+#define DR_RULE_MAX_STE_CHAIN (DR_RULE_MAX_STES + DR_ACTION_MAX_STES)
+
+struct mlx5dr_rule_action_member {
+ struct mlx5dr_action *action;
+ struct list_head list;
+};
+
+static int dr_rule_append_to_miss_list(struct mlx5dr_ste *new_last_ste,
+ struct list_head *miss_list,
+ struct list_head *send_list)
+{
+ struct mlx5dr_ste_send_info *ste_info_last;
+ struct mlx5dr_ste *last_ste;
+
+ /* The new entry will be inserted after the last */
+ last_ste = list_entry(miss_list->prev, struct mlx5dr_ste, miss_list_node);
+ WARN_ON(!last_ste);
+
+ ste_info_last = kzalloc(sizeof(*ste_info_last), GFP_KERNEL);
+ if (!ste_info_last)
+ return -ENOMEM;
+
+ mlx5dr_ste_set_miss_addr(last_ste->hw_ste,
+ mlx5dr_ste_get_icm_addr(new_last_ste));
+ list_add_tail(&new_last_ste->miss_list_node, miss_list);
+
+ mlx5dr_send_fill_and_append_ste_send_info(last_ste, DR_STE_SIZE_REDUCED,
+ 0, last_ste->hw_ste,
+ ste_info_last, send_list, true);
+
+ return 0;
+}
+
+static struct mlx5dr_ste *
+dr_rule_create_collision_htbl(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ u8 *hw_ste)
+{
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_ste_htbl *new_htbl;
+ struct mlx5dr_ste *ste;
+
+ /* Create new table for miss entry */
+ new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
+ DR_CHUNK_SIZE_1,
+ MLX5DR_STE_LU_TYPE_DONT_CARE,
+ 0);
+ if (!new_htbl) {
+ mlx5dr_dbg(dmn, "Failed allocating collision table\n");
+ return NULL;
+ }
+
+ /* One and only entry, never grows */
+ ste = new_htbl->ste_arr;
+ mlx5dr_ste_set_miss_addr(hw_ste, nic_matcher->e_anchor->chunk->icm_addr);
+ mlx5dr_htbl_get(new_htbl);
+
+ return ste;
+}
+
+static struct mlx5dr_ste *
+dr_rule_create_collision_entry(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ u8 *hw_ste,
+ struct mlx5dr_ste *orig_ste)
+{
+ struct mlx5dr_ste *ste;
+
+ ste = dr_rule_create_collision_htbl(matcher, nic_matcher, hw_ste);
+ if (!ste) {
+ mlx5dr_dbg(matcher->tbl->dmn, "Failed creating collision entry\n");
+ return NULL;
+ }
+
+ ste->ste_chain_location = orig_ste->ste_chain_location;
+
+ /* In collision entry, all members share the same miss_list_head */
+ ste->htbl->miss_list = mlx5dr_ste_get_miss_list(orig_ste);
+
+ /* Next table */
+ if (mlx5dr_ste_create_next_htbl(matcher, nic_matcher, ste, hw_ste,
+ DR_CHUNK_SIZE_1)) {
+ mlx5dr_dbg(matcher->tbl->dmn, "Failed allocating table\n");
+ goto free_tbl;
+ }
+
+ return ste;
+
+free_tbl:
+ mlx5dr_ste_free(ste, matcher, nic_matcher);
+ return NULL;
+}
+
+static int
+dr_rule_handle_one_ste_in_update_list(struct mlx5dr_ste_send_info *ste_info,
+ struct mlx5dr_domain *dmn)
+{
+ int ret;
+
+ list_del(&ste_info->send_list);
+ ret = mlx5dr_send_postsend_ste(dmn, ste_info->ste, ste_info->data,
+ ste_info->size, ste_info->offset);
+ if (ret)
+ goto out;
+ /* Copy data to ste, only reduced size, the last 16B (mask)
+ * is already written to the hw.
+ */
+ memcpy(ste_info->ste->hw_ste, ste_info->data, DR_STE_SIZE_REDUCED);
+
+out:
+ kfree(ste_info);
+ return ret;
+}
+
+static int dr_rule_send_update_list(struct list_head *send_ste_list,
+ struct mlx5dr_domain *dmn,
+ bool is_reverse)
+{
+ struct mlx5dr_ste_send_info *ste_info, *tmp_ste_info;
+ int ret;
+
+ if (is_reverse) {
+ list_for_each_entry_safe_reverse(ste_info, tmp_ste_info,
+ send_ste_list, send_list) {
+ ret = dr_rule_handle_one_ste_in_update_list(ste_info,
+ dmn);
+ if (ret)
+ return ret;
+ }
+ } else {
+ list_for_each_entry_safe(ste_info, tmp_ste_info,
+ send_ste_list, send_list) {
+ ret = dr_rule_handle_one_ste_in_update_list(ste_info,
+ dmn);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static struct mlx5dr_ste *
+dr_rule_find_ste_in_miss_list(struct list_head *miss_list, u8 *hw_ste)
+{
+ struct mlx5dr_ste *ste;
+
+ if (list_empty(miss_list))
+ return NULL;
+
+ /* Check if hw_ste is present in the list */
+ list_for_each_entry(ste, miss_list, miss_list_node) {
+ if (mlx5dr_ste_equal_tag(ste->hw_ste, hw_ste))
+ return ste;
+ }
+
+ return NULL;
+}
+
+static struct mlx5dr_ste *
+dr_rule_rehash_handle_collision(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct list_head *update_list,
+ struct mlx5dr_ste *col_ste,
+ u8 *hw_ste)
+{
+ struct mlx5dr_ste *new_ste;
+ int ret;
+
+ new_ste = dr_rule_create_collision_htbl(matcher, nic_matcher, hw_ste);
+ if (!new_ste)
+ return NULL;
+
+ /* In collision entry, all members share the same miss_list_head */
+ new_ste->htbl->miss_list = mlx5dr_ste_get_miss_list(col_ste);
+
+ /* Update the previous from the list */
+ ret = dr_rule_append_to_miss_list(new_ste,
+ mlx5dr_ste_get_miss_list(col_ste),
+ update_list);
+ if (ret) {
+ mlx5dr_dbg(matcher->tbl->dmn, "Failed update dup entry\n");
+ goto err_exit;
+ }
+
+ return new_ste;
+
+err_exit:
+ mlx5dr_ste_free(new_ste, matcher, nic_matcher);
+ return NULL;
+}
+
+static void dr_rule_rehash_copy_ste_ctrl(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_ste *cur_ste,
+ struct mlx5dr_ste *new_ste)
+{
+ new_ste->next_htbl = cur_ste->next_htbl;
+ new_ste->ste_chain_location = cur_ste->ste_chain_location;
+
+ if (!mlx5dr_ste_is_last_in_rule(nic_matcher, new_ste->ste_chain_location))
+ new_ste->next_htbl->pointing_ste = new_ste;
+
+ /* We need to copy the refcount since this ste
+ * may have been traversed several times
+ */
+ refcount_set(&new_ste->refcount, refcount_read(&cur_ste->refcount));
+
+ /* Link old STEs rule_mem list to the new ste */
+ mlx5dr_rule_update_rule_member(cur_ste, new_ste);
+ INIT_LIST_HEAD(&new_ste->rule_list);
+ list_splice_tail_init(&cur_ste->rule_list, &new_ste->rule_list);
+}
+
+static struct mlx5dr_ste *
+dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_ste *cur_ste,
+ struct mlx5dr_ste_htbl *new_htbl,
+ struct list_head *update_list)
+{
+ struct mlx5dr_ste_send_info *ste_info;
+ bool use_update_list = false;
+ u8 hw_ste[DR_STE_SIZE] = {};
+ struct mlx5dr_ste *new_ste;
+ int new_idx;
+ u8 sb_idx;
+
+ /* Copy STE mask from the matcher */
+ sb_idx = cur_ste->ste_chain_location - 1;
+ mlx5dr_ste_set_bit_mask(hw_ste, nic_matcher->ste_builder[sb_idx].bit_mask);
+
+ /* Copy STE control and tag */
+ memcpy(hw_ste, cur_ste->hw_ste, DR_STE_SIZE_REDUCED);
+ mlx5dr_ste_set_miss_addr(hw_ste, nic_matcher->e_anchor->chunk->icm_addr);
+
+ new_idx = mlx5dr_ste_calc_hash_index(hw_ste, new_htbl);
+ new_ste = &new_htbl->ste_arr[new_idx];
+
+ if (mlx5dr_ste_not_used_ste(new_ste)) {
+ mlx5dr_htbl_get(new_htbl);
+ list_add_tail(&new_ste->miss_list_node,
+ mlx5dr_ste_get_miss_list(new_ste));
+ } else {
+ new_ste = dr_rule_rehash_handle_collision(matcher,
+ nic_matcher,
+ update_list,
+ new_ste,
+ hw_ste);
+ if (!new_ste) {
+ mlx5dr_dbg(matcher->tbl->dmn, "Failed adding collision entry, index: %d\n",
+ new_idx);
+ return NULL;
+ }
+ new_htbl->ctrl.num_of_collisions++;
+ use_update_list = true;
+ }
+
+ memcpy(new_ste->hw_ste, hw_ste, DR_STE_SIZE_REDUCED);
+
+ new_htbl->ctrl.num_of_valid_entries++;
+
+ if (use_update_list) {
+ ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL);
+ if (!ste_info)
+ goto err_exit;
+
+ mlx5dr_send_fill_and_append_ste_send_info(new_ste, DR_STE_SIZE, 0,
+ hw_ste, ste_info,
+ update_list, true);
+ }
+
+ dr_rule_rehash_copy_ste_ctrl(matcher, nic_matcher, cur_ste, new_ste);
+
+ return new_ste;
+
+err_exit:
+ mlx5dr_ste_free(new_ste, matcher, nic_matcher);
+ return NULL;
+}
+
+static int dr_rule_rehash_copy_miss_list(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct list_head *cur_miss_list,
+ struct mlx5dr_ste_htbl *new_htbl,
+ struct list_head *update_list)
+{
+ struct mlx5dr_ste *tmp_ste, *cur_ste, *new_ste;
+
+ if (list_empty(cur_miss_list))
+ return 0;
+
+ list_for_each_entry_safe(cur_ste, tmp_ste, cur_miss_list, miss_list_node) {
+ new_ste = dr_rule_rehash_copy_ste(matcher,
+ nic_matcher,
+ cur_ste,
+ new_htbl,
+ update_list);
+ if (!new_ste)
+ goto err_insert;
+
+ list_del(&cur_ste->miss_list_node);
+ mlx5dr_htbl_put(cur_ste->htbl);
+ }
+ return 0;
+
+err_insert:
+ mlx5dr_err(matcher->tbl->dmn, "Fatal error during resize\n");
+ WARN_ON(true);
+ return -EINVAL;
+}
+
+static int dr_rule_rehash_copy_htbl(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_ste_htbl *cur_htbl,
+ struct mlx5dr_ste_htbl *new_htbl,
+ struct list_head *update_list)
+{
+ struct mlx5dr_ste *cur_ste;
+ int cur_entries;
+ int err = 0;
+ int i;
+
+ cur_entries = mlx5dr_icm_pool_chunk_size_to_entries(cur_htbl->chunk_size);
+
+ if (cur_entries < 1) {
+ mlx5dr_dbg(matcher->tbl->dmn, "Invalid number of entries\n");
+ return -EINVAL;
+ }
+
+ for (i = 0; i < cur_entries; i++) {
+ cur_ste = &cur_htbl->ste_arr[i];
+ if (mlx5dr_ste_not_used_ste(cur_ste)) /* Empty, nothing to copy */
+ continue;
+
+ err = dr_rule_rehash_copy_miss_list(matcher,
+ nic_matcher,
+ mlx5dr_ste_get_miss_list(cur_ste),
+ new_htbl,
+ update_list);
+ if (err)
+ goto clean_copy;
+ }
+
+clean_copy:
+ return err;
+}
+
+static struct mlx5dr_ste_htbl *
+dr_rule_rehash_htbl(struct mlx5dr_rule *rule,
+ struct mlx5dr_rule_rx_tx *nic_rule,
+ struct mlx5dr_ste_htbl *cur_htbl,
+ u8 ste_location,
+ struct list_head *update_list,
+ enum mlx5dr_icm_chunk_size new_size)
+{
+ struct mlx5dr_ste_send_info *del_ste_info, *tmp_ste_info;
+ struct mlx5dr_matcher *matcher = rule->matcher;
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_matcher_rx_tx *nic_matcher;
+ struct mlx5dr_ste_send_info *ste_info;
+ struct mlx5dr_htbl_connect_info info;
+ struct mlx5dr_domain_rx_tx *nic_dmn;
+ u8 formatted_ste[DR_STE_SIZE] = {};
+ LIST_HEAD(rehash_table_send_list);
+ struct mlx5dr_ste *ste_to_update;
+ struct mlx5dr_ste_htbl *new_htbl;
+ int err;
+
+ nic_matcher = nic_rule->nic_matcher;
+ nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+
+ ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL);
+ if (!ste_info)
+ return NULL;
+
+ new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
+ new_size,
+ cur_htbl->lu_type,
+ cur_htbl->byte_mask);
+ if (!new_htbl) {
+ mlx5dr_err(dmn, "Failed to allocate new hash table\n");
+ goto free_ste_info;
+ }
+
+ /* Write new table to HW */
+ info.type = CONNECT_MISS;
+ info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr;
+ mlx5dr_ste_set_formatted_ste(dmn->info.caps.gvmi,
+ nic_dmn,
+ new_htbl,
+ formatted_ste,
+ &info);
+
+ new_htbl->pointing_ste = cur_htbl->pointing_ste;
+ new_htbl->pointing_ste->next_htbl = new_htbl;
+ err = dr_rule_rehash_copy_htbl(matcher,
+ nic_matcher,
+ cur_htbl,
+ new_htbl,
+ &rehash_table_send_list);
+ if (err)
+ goto free_new_htbl;
+
+ if (mlx5dr_send_postsend_htbl(dmn, new_htbl, formatted_ste,
+ nic_matcher->ste_builder[ste_location - 1].bit_mask)) {
+ mlx5dr_err(dmn, "Failed writing table to HW\n");
+ goto free_new_htbl;
+ }
+
+ /* Writing to the hw is done in regular order of rehash_table_send_list,
+ * in order to have the origin data written before the miss address of
+ * collision entries, if exists.
+ */
+ if (dr_rule_send_update_list(&rehash_table_send_list, dmn, false)) {
+ mlx5dr_err(dmn, "Failed updating table to HW\n");
+ goto free_ste_list;
+ }
+
+ /* Connect previous hash table to current */
+ if (ste_location == 1) {
+ /* The previous table is an anchor, anchors size is always one STE */
+ struct mlx5dr_ste_htbl *prev_htbl = cur_htbl->pointing_ste->htbl;
+
+ /* On matcher s_anchor we keep an extra refcount */
+ mlx5dr_htbl_get(new_htbl);
+ mlx5dr_htbl_put(cur_htbl);
+
+ nic_matcher->s_htbl = new_htbl;
+
+ /* It is safe to operate dr_ste_set_hit_addr on the hw_ste here
+ * (48B len) which works only on first 32B
+ */
+ mlx5dr_ste_set_hit_addr(prev_htbl->ste_arr[0].hw_ste,
+ new_htbl->chunk->icm_addr,
+ new_htbl->chunk->num_of_entries);
+
+ ste_to_update = &prev_htbl->ste_arr[0];
+ } else {
+ mlx5dr_ste_set_hit_addr_by_next_htbl(cur_htbl->pointing_ste->hw_ste,
+ new_htbl);
+ ste_to_update = cur_htbl->pointing_ste;
+ }
+
+ mlx5dr_send_fill_and_append_ste_send_info(ste_to_update, DR_STE_SIZE_REDUCED,
+ 0, ste_to_update->hw_ste, ste_info,
+ update_list, false);
+
+ return new_htbl;
+
+free_ste_list:
+ /* Clean all ste_info's from the new table */
+ list_for_each_entry_safe(del_ste_info, tmp_ste_info,
+ &rehash_table_send_list, send_list) {
+ list_del(&del_ste_info->send_list);
+ kfree(del_ste_info);
+ }
+
+free_new_htbl:
+ mlx5dr_ste_htbl_free(new_htbl);
+free_ste_info:
+ kfree(ste_info);
+ mlx5dr_info(dmn, "Failed creating rehash table\n");
+ return NULL;
+}
+
+static struct mlx5dr_ste_htbl *dr_rule_rehash(struct mlx5dr_rule *rule,
+ struct mlx5dr_rule_rx_tx *nic_rule,
+ struct mlx5dr_ste_htbl *cur_htbl,
+ u8 ste_location,
+ struct list_head *update_list)
+{
+ struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn;
+ enum mlx5dr_icm_chunk_size new_size;
+
+ new_size = mlx5dr_icm_next_higher_chunk(cur_htbl->chunk_size);
+ new_size = min_t(u32, new_size, dmn->info.max_log_sw_icm_sz);
+
+ if (new_size == cur_htbl->chunk_size)
+ return NULL; /* Skip rehash, we already at the max size */
+
+ return dr_rule_rehash_htbl(rule, nic_rule, cur_htbl, ste_location,
+ update_list, new_size);
+}
+
+static struct mlx5dr_ste *
+dr_rule_handle_collision(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_ste *ste,
+ u8 *hw_ste,
+ struct list_head *miss_list,
+ struct list_head *send_list)
+{
+ struct mlx5dr_ste_send_info *ste_info;
+ struct mlx5dr_ste *new_ste;
+
+ ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL);
+ if (!ste_info)
+ return NULL;
+
+ new_ste = dr_rule_create_collision_entry(matcher, nic_matcher, hw_ste, ste);
+ if (!new_ste)
+ goto free_send_info;
+
+ if (dr_rule_append_to_miss_list(new_ste, miss_list, send_list)) {
+ mlx5dr_dbg(matcher->tbl->dmn, "Failed to update prev miss_list\n");
+ goto err_exit;
+ }
+
+ mlx5dr_send_fill_and_append_ste_send_info(new_ste, DR_STE_SIZE, 0, hw_ste,
+ ste_info, send_list, false);
+
+ ste->htbl->ctrl.num_of_collisions++;
+ ste->htbl->ctrl.num_of_valid_entries++;
+
+ return new_ste;
+
+err_exit:
+ mlx5dr_ste_free(new_ste, matcher, nic_matcher);
+free_send_info:
+ kfree(ste_info);
+ return NULL;
+}
+
+static void dr_rule_remove_action_members(struct mlx5dr_rule *rule)
+{
+ struct mlx5dr_rule_action_member *action_mem;
+ struct mlx5dr_rule_action_member *tmp;
+
+ list_for_each_entry_safe(action_mem, tmp, &rule->rule_actions_list, list) {
+ list_del(&action_mem->list);
+ refcount_dec(&action_mem->action->refcount);
+ kvfree(action_mem);
+ }
+}
+
+static int dr_rule_add_action_members(struct mlx5dr_rule *rule,
+ size_t num_actions,
+ struct mlx5dr_action *actions[])
+{
+ struct mlx5dr_rule_action_member *action_mem;
+ int i;
+
+ for (i = 0; i < num_actions; i++) {
+ action_mem = kvzalloc(sizeof(*action_mem), GFP_KERNEL);
+ if (!action_mem)
+ goto free_action_members;
+
+ action_mem->action = actions[i];
+ INIT_LIST_HEAD(&action_mem->list);
+ list_add_tail(&action_mem->list, &rule->rule_actions_list);
+ refcount_inc(&action_mem->action->refcount);
+ }
+
+ return 0;
+
+free_action_members:
+ dr_rule_remove_action_members(rule);
+ return -ENOMEM;
+}
+
+/* While the pointer of ste is no longer valid, like while moving ste to be
+ * the first in the miss_list, and to be in the origin table,
+ * all rule-members that are attached to this ste should update their ste member
+ * to the new pointer
+ */
+void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *ste,
+ struct mlx5dr_ste *new_ste)
+{
+ struct mlx5dr_rule_member *rule_mem;
+
+ if (!list_empty(&ste->rule_list))
+ list_for_each_entry(rule_mem, &ste->rule_list, use_ste_list)
+ rule_mem->ste = new_ste;
+}
+
+static void dr_rule_clean_rule_members(struct mlx5dr_rule *rule,
+ struct mlx5dr_rule_rx_tx *nic_rule)
+{
+ struct mlx5dr_rule_member *rule_mem;
+ struct mlx5dr_rule_member *tmp_mem;
+
+ if (list_empty(&nic_rule->rule_members_list))
+ return;
+ list_for_each_entry_safe(rule_mem, tmp_mem, &nic_rule->rule_members_list, list) {
+ list_del(&rule_mem->list);
+ list_del(&rule_mem->use_ste_list);
+ mlx5dr_ste_put(rule_mem->ste, rule->matcher, nic_rule->nic_matcher);
+ kvfree(rule_mem);
+ }
+}
+
+static bool dr_rule_need_enlarge_hash(struct mlx5dr_ste_htbl *htbl,
+ struct mlx5dr_domain *dmn,
+ struct mlx5dr_domain_rx_tx *nic_dmn)
+{
+ struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl;
+
+ if (dmn->info.max_log_sw_icm_sz <= htbl->chunk_size)
+ return false;
+
+ if (!ctrl->may_grow)
+ return false;
+
+ if (ctrl->num_of_collisions >= ctrl->increase_threshold &&
+ (ctrl->num_of_valid_entries - ctrl->num_of_collisions) >= ctrl->increase_threshold)
+ return true;
+
+ return false;
+}
+
+static int dr_rule_add_member(struct mlx5dr_rule_rx_tx *nic_rule,
+ struct mlx5dr_ste *ste)
+{
+ struct mlx5dr_rule_member *rule_mem;
+
+ rule_mem = kvzalloc(sizeof(*rule_mem), GFP_KERNEL);
+ if (!rule_mem)
+ return -ENOMEM;
+
+ rule_mem->ste = ste;
+ list_add_tail(&rule_mem->list, &nic_rule->rule_members_list);
+
+ list_add_tail(&rule_mem->use_ste_list, &ste->rule_list);
+
+ return 0;
+}
+
+static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule,
+ struct mlx5dr_rule_rx_tx *nic_rule,
+ struct list_head *send_ste_list,
+ struct mlx5dr_ste *last_ste,
+ u8 *hw_ste_arr,
+ u32 new_hw_ste_arr_sz)
+{
+ struct mlx5dr_matcher_rx_tx *nic_matcher = nic_rule->nic_matcher;
+ struct mlx5dr_ste_send_info *ste_info_arr[DR_ACTION_MAX_STES];
+ u8 num_of_builders = nic_matcher->num_of_builders;
+ struct mlx5dr_matcher *matcher = rule->matcher;
+ u8 *curr_hw_ste, *prev_hw_ste;
+ struct mlx5dr_ste *action_ste;
+ int i, k, ret;
+
+ /* Two cases:
+ * 1. num_of_builders is equal to new_hw_ste_arr_sz, the action in the ste
+ * 2. num_of_builders is less then new_hw_ste_arr_sz, new ste was added
+ * to support the action.
+ */
+ if (num_of_builders == new_hw_ste_arr_sz)
+ return 0;
+
+ for (i = num_of_builders, k = 0; i < new_hw_ste_arr_sz; i++, k++) {
+ curr_hw_ste = hw_ste_arr + i * DR_STE_SIZE;
+ prev_hw_ste = (i == 0) ? curr_hw_ste : hw_ste_arr + ((i - 1) * DR_STE_SIZE);
+ action_ste = dr_rule_create_collision_htbl(matcher,
+ nic_matcher,
+ curr_hw_ste);
+ if (!action_ste)
+ return -ENOMEM;
+
+ mlx5dr_ste_get(action_ste);
+
+ /* While free ste we go over the miss list, so add this ste to the list */
+ list_add_tail(&action_ste->miss_list_node,
+ mlx5dr_ste_get_miss_list(action_ste));
+
+ ste_info_arr[k] = kzalloc(sizeof(*ste_info_arr[k]),
+ GFP_KERNEL);
+ if (!ste_info_arr[k])
+ goto err_exit;
+
+ /* Point current ste to the new action */
+ mlx5dr_ste_set_hit_addr_by_next_htbl(prev_hw_ste, action_ste->htbl);
+ ret = dr_rule_add_member(nic_rule, action_ste);
+ if (ret) {
+ mlx5dr_dbg(matcher->tbl->dmn, "Failed adding rule member\n");
+ goto free_ste_info;
+ }
+ mlx5dr_send_fill_and_append_ste_send_info(action_ste, DR_STE_SIZE, 0,
+ curr_hw_ste,
+ ste_info_arr[k],
+ send_ste_list, false);
+ }
+
+ return 0;
+
+free_ste_info:
+ kfree(ste_info_arr[k]);
+err_exit:
+ mlx5dr_ste_put(action_ste, matcher, nic_matcher);
+ return -ENOMEM;
+}
+
+static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_ste_htbl *cur_htbl,
+ struct mlx5dr_ste *ste,
+ u8 ste_location,
+ u8 *hw_ste,
+ struct list_head *miss_list,
+ struct list_head *send_list)
+{
+ struct mlx5dr_ste_send_info *ste_info;
+
+ /* Take ref on table, only on first time this ste is used */
+ mlx5dr_htbl_get(cur_htbl);
+
+ /* new entry -> new branch */
+ list_add_tail(&ste->miss_list_node, miss_list);
+
+ mlx5dr_ste_set_miss_addr(hw_ste, nic_matcher->e_anchor->chunk->icm_addr);
+
+ ste->ste_chain_location = ste_location;
+
+ ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL);
+ if (!ste_info)
+ goto clean_ste_setting;
+
+ if (mlx5dr_ste_create_next_htbl(matcher,
+ nic_matcher,
+ ste,
+ hw_ste,
+ DR_CHUNK_SIZE_1)) {
+ mlx5dr_dbg(matcher->tbl->dmn, "Failed allocating table\n");
+ goto clean_ste_info;
+ }
+
+ cur_htbl->ctrl.num_of_valid_entries++;
+
+ mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE, 0, hw_ste,
+ ste_info, send_list, false);
+
+ return 0;
+
+clean_ste_info:
+ kfree(ste_info);
+clean_ste_setting:
+ list_del_init(&ste->miss_list_node);
+ mlx5dr_htbl_put(cur_htbl);
+
+ return -ENOMEM;
+}
+
+static struct mlx5dr_ste *
+dr_rule_handle_ste_branch(struct mlx5dr_rule *rule,
+ struct mlx5dr_rule_rx_tx *nic_rule,
+ struct list_head *send_ste_list,
+ struct mlx5dr_ste_htbl *cur_htbl,
+ u8 *hw_ste,
+ u8 ste_location,
+ struct mlx5dr_ste_htbl **put_htbl)
+{
+ struct mlx5dr_matcher *matcher = rule->matcher;
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_matcher_rx_tx *nic_matcher;
+ struct mlx5dr_domain_rx_tx *nic_dmn;
+ struct mlx5dr_ste_htbl *new_htbl;
+ struct mlx5dr_ste *matched_ste;
+ struct list_head *miss_list;
+ bool skip_rehash = false;
+ struct mlx5dr_ste *ste;
+ int index;
+
+ nic_matcher = nic_rule->nic_matcher;
+ nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+
+again:
+ index = mlx5dr_ste_calc_hash_index(hw_ste, cur_htbl);
+ miss_list = &cur_htbl->chunk->miss_list[index];
+ ste = &cur_htbl->ste_arr[index];
+
+ if (mlx5dr_ste_not_used_ste(ste)) {
+ if (dr_rule_handle_empty_entry(matcher, nic_matcher, cur_htbl,
+ ste, ste_location,
+ hw_ste, miss_list,
+ send_ste_list))
+ return NULL;
+ } else {
+ /* Hash table index in use, check if this ste is in the miss list */
+ matched_ste = dr_rule_find_ste_in_miss_list(miss_list, hw_ste);
+ if (matched_ste) {
+ /* If it is last STE in the chain, and has the same tag
+ * it means that all the previous stes are the same,
+ * if so, this rule is duplicated.
+ */
+ if (mlx5dr_ste_is_last_in_rule(nic_matcher,
+ matched_ste->ste_chain_location)) {
+ mlx5dr_info(dmn, "Duplicate rule inserted, aborting!!\n");
+ return NULL;
+ }
+ return matched_ste;
+ }
+
+ if (!skip_rehash && dr_rule_need_enlarge_hash(cur_htbl, dmn, nic_dmn)) {
+ /* Hash table index in use, try to resize of the hash */
+ skip_rehash = true;
+
+ /* Hold the table till we update.
+ * Release in dr_rule_create_rule()
+ */
+ *put_htbl = cur_htbl;
+ mlx5dr_htbl_get(cur_htbl);
+
+ new_htbl = dr_rule_rehash(rule, nic_rule, cur_htbl,
+ ste_location, send_ste_list);
+ if (!new_htbl) {
+ mlx5dr_htbl_put(cur_htbl);
+ mlx5dr_info(dmn, "failed creating rehash table, htbl-log_size: %d\n",
+ cur_htbl->chunk_size);
+ } else {
+ cur_htbl = new_htbl;
+ }
+ goto again;
+ } else {
+ /* Hash table index in use, add another collision (miss) */
+ ste = dr_rule_handle_collision(matcher,
+ nic_matcher,
+ ste,
+ hw_ste,
+ miss_list,
+ send_ste_list);
+ if (!ste) {
+ mlx5dr_dbg(dmn, "failed adding collision entry, index: %d\n",
+ index);
+ return NULL;
+ }
+ }
+ }
+ return ste;
+}
+
+static bool dr_rule_cmp_value_to_mask(u8 *mask, u8 *value,
+ u32 s_idx, u32 e_idx)
+{
+ u32 i;
+
+ for (i = s_idx; i < e_idx; i++) {
+ if (value[i] & ~mask[i]) {
+ pr_info("Rule parameters contains a value not specified by mask\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool dr_rule_verify(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_match_parameters *value,
+ struct mlx5dr_match_param *param)
+{
+ u8 match_criteria = matcher->match_criteria;
+ size_t value_size = value->match_sz;
+ u8 *mask_p = (u8 *)&matcher->mask;
+ u8 *param_p = (u8 *)param;
+ u32 s_idx, e_idx;
+
+ if (!value_size ||
+ (value_size > sizeof(struct mlx5dr_match_param) ||
+ (value_size % sizeof(u32)))) {
+ mlx5dr_dbg(matcher->tbl->dmn, "Rule parameters length is incorrect\n");
+ return false;
+ }
+
+ mlx5dr_ste_copy_param(matcher->match_criteria, param, value);
+
+ if (match_criteria & DR_MATCHER_CRITERIA_OUTER) {
+ s_idx = offsetof(struct mlx5dr_match_param, outer);
+ e_idx = min(s_idx + sizeof(param->outer), value_size);
+
+ if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+ mlx5dr_dbg(matcher->tbl->dmn, "Rule outer parameters contains a value not specified by mask\n");
+ return false;
+ }
+ }
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC) {
+ s_idx = offsetof(struct mlx5dr_match_param, misc);
+ e_idx = min(s_idx + sizeof(param->misc), value_size);
+
+ if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+ mlx5dr_dbg(matcher->tbl->dmn, "Rule misc parameters contains a value not specified by mask\n");
+ return false;
+ }
+ }
+
+ if (match_criteria & DR_MATCHER_CRITERIA_INNER) {
+ s_idx = offsetof(struct mlx5dr_match_param, inner);
+ e_idx = min(s_idx + sizeof(param->inner), value_size);
+
+ if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+ mlx5dr_dbg(matcher->tbl->dmn, "Rule inner parameters contains a value not specified by mask\n");
+ return false;
+ }
+ }
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC2) {
+ s_idx = offsetof(struct mlx5dr_match_param, misc2);
+ e_idx = min(s_idx + sizeof(param->misc2), value_size);
+
+ if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+ mlx5dr_dbg(matcher->tbl->dmn, "Rule misc2 parameters contains a value not specified by mask\n");
+ return false;
+ }
+ }
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC3) {
+ s_idx = offsetof(struct mlx5dr_match_param, misc3);
+ e_idx = min(s_idx + sizeof(param->misc3), value_size);
+
+ if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+ mlx5dr_dbg(matcher->tbl->dmn, "Rule misc3 parameters contains a value not specified by mask\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+static int dr_rule_destroy_rule_nic(struct mlx5dr_rule *rule,
+ struct mlx5dr_rule_rx_tx *nic_rule)
+{
+ dr_rule_clean_rule_members(rule, nic_rule);
+ return 0;
+}
+
+static int dr_rule_destroy_rule_fdb(struct mlx5dr_rule *rule)
+{
+ dr_rule_destroy_rule_nic(rule, &rule->rx);
+ dr_rule_destroy_rule_nic(rule, &rule->tx);
+ return 0;
+}
+
+static int dr_rule_destroy_rule(struct mlx5dr_rule *rule)
+{
+ struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn;
+
+ switch (dmn->type) {
+ case MLX5DR_DOMAIN_TYPE_NIC_RX:
+ dr_rule_destroy_rule_nic(rule, &rule->rx);
+ break;
+ case MLX5DR_DOMAIN_TYPE_NIC_TX:
+ dr_rule_destroy_rule_nic(rule, &rule->tx);
+ break;
+ case MLX5DR_DOMAIN_TYPE_FDB:
+ dr_rule_destroy_rule_fdb(rule);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ dr_rule_remove_action_members(rule);
+ kfree(rule);
+ return 0;
+}
+
+static bool dr_rule_is_ipv6(struct mlx5dr_match_param *param)
+{
+ return (param->outer.ip_version == 6 ||
+ param->inner.ip_version == 6 ||
+ param->outer.ethertype == ETH_P_IPV6 ||
+ param->inner.ethertype == ETH_P_IPV6);
+}
+
+static bool dr_rule_skip(enum mlx5dr_domain_type domain,
+ enum mlx5dr_ste_entry_type ste_type,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_match_param *value)
+{
+ if (domain != MLX5DR_DOMAIN_TYPE_FDB)
+ return false;
+
+ if (mask->misc.source_port) {
+ if (ste_type == MLX5DR_STE_TYPE_RX)
+ if (value->misc.source_port != WIRE_PORT)
+ return true;
+
+ if (ste_type == MLX5DR_STE_TYPE_TX)
+ if (value->misc.source_port == WIRE_PORT)
+ return true;
+ }
+
+ /* Metadata C can be used to describe the source vport */
+ if (mask->misc2.metadata_reg_c_0) {
+ if (ste_type == MLX5DR_STE_TYPE_RX)
+ if ((value->misc2.metadata_reg_c_0 & WIRE_PORT) != WIRE_PORT)
+ return true;
+
+ if (ste_type == MLX5DR_STE_TYPE_TX)
+ if ((value->misc2.metadata_reg_c_0 & WIRE_PORT) == WIRE_PORT)
+ return true;
+ }
+ return false;
+}
+
+static int
+dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
+ struct mlx5dr_rule_rx_tx *nic_rule,
+ struct mlx5dr_match_param *param,
+ size_t num_actions,
+ struct mlx5dr_action *actions[])
+{
+ struct mlx5dr_ste_send_info *ste_info, *tmp_ste_info;
+ struct mlx5dr_matcher *matcher = rule->matcher;
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_matcher_rx_tx *nic_matcher;
+ struct mlx5dr_domain_rx_tx *nic_dmn;
+ struct mlx5dr_ste_htbl *htbl = NULL;
+ struct mlx5dr_ste_htbl *cur_htbl;
+ struct mlx5dr_ste *ste = NULL;
+ LIST_HEAD(send_ste_list);
+ u8 *hw_ste_arr = NULL;
+ u32 new_hw_ste_arr_sz;
+ int ret, i;
+
+ nic_matcher = nic_rule->nic_matcher;
+ nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+
+ INIT_LIST_HEAD(&nic_rule->rule_members_list);
+
+ if (dr_rule_skip(dmn->type, nic_dmn->ste_type, &matcher->mask, param))
+ return 0;
+
+ ret = mlx5dr_matcher_select_builders(matcher,
+ nic_matcher,
+ dr_rule_is_ipv6(param));
+ if (ret)
+ goto out_err;
+
+ hw_ste_arr = kzalloc(DR_RULE_MAX_STE_CHAIN * DR_STE_SIZE, GFP_KERNEL);
+ if (!hw_ste_arr) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ /* Set the tag values inside the ste array */
+ ret = mlx5dr_ste_build_ste_arr(matcher, nic_matcher, param, hw_ste_arr);
+ if (ret)
+ goto free_hw_ste;
+
+ /* Set the actions values/addresses inside the ste array */
+ ret = mlx5dr_actions_build_ste_arr(matcher, nic_matcher, actions,
+ num_actions, hw_ste_arr,
+ &new_hw_ste_arr_sz);
+ if (ret)
+ goto free_hw_ste;
+
+ cur_htbl = nic_matcher->s_htbl;
+
+ /* Go over the array of STEs, and build dr_ste accordingly.
+ * The loop is over only the builders which are equal or less to the
+ * number of stes, in case we have actions that lives in other stes.
+ */
+ for (i = 0; i < nic_matcher->num_of_builders; i++) {
+ /* Calculate CRC and keep new ste entry */
+ u8 *cur_hw_ste_ent = hw_ste_arr + (i * DR_STE_SIZE);
+
+ ste = dr_rule_handle_ste_branch(rule,
+ nic_rule,
+ &send_ste_list,
+ cur_htbl,
+ cur_hw_ste_ent,
+ i + 1,
+ &htbl);
+ if (!ste) {
+ mlx5dr_err(dmn, "Failed creating next branch\n");
+ ret = -ENOENT;
+ goto free_rule;
+ }
+
+ cur_htbl = ste->next_htbl;
+
+ /* Keep all STEs in the rule struct */
+ ret = dr_rule_add_member(nic_rule, ste);
+ if (ret) {
+ mlx5dr_dbg(dmn, "Failed adding rule member index %d\n", i);
+ goto free_ste;
+ }
+
+ mlx5dr_ste_get(ste);
+ }
+
+ /* Connect actions */
+ ret = dr_rule_handle_action_stes(rule, nic_rule, &send_ste_list,
+ ste, hw_ste_arr, new_hw_ste_arr_sz);
+ if (ret) {
+ mlx5dr_dbg(dmn, "Failed apply actions\n");
+ goto free_rule;
+ }
+ ret = dr_rule_send_update_list(&send_ste_list, dmn, true);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed sending ste!\n");
+ goto free_rule;
+ }
+
+ if (htbl)
+ mlx5dr_htbl_put(htbl);
+
+ return 0;
+
+free_ste:
+ mlx5dr_ste_put(ste, matcher, nic_matcher);
+free_rule:
+ dr_rule_clean_rule_members(rule, nic_rule);
+ /* Clean all ste_info's */
+ list_for_each_entry_safe(ste_info, tmp_ste_info, &send_ste_list, send_list) {
+ list_del(&ste_info->send_list);
+ kfree(ste_info);
+ }
+free_hw_ste:
+ kfree(hw_ste_arr);
+out_err:
+ return ret;
+}
+
+static int
+dr_rule_create_rule_fdb(struct mlx5dr_rule *rule,
+ struct mlx5dr_match_param *param,
+ size_t num_actions,
+ struct mlx5dr_action *actions[])
+{
+ struct mlx5dr_match_param copy_param = {};
+ int ret;
+
+ /* Copy match_param since they will be consumed during the first
+ * nic_rule insertion.
+ */
+ memcpy(&copy_param, param, sizeof(struct mlx5dr_match_param));
+
+ ret = dr_rule_create_rule_nic(rule, &rule->rx, param,
+ num_actions, actions);
+ if (ret)
+ return ret;
+
+ ret = dr_rule_create_rule_nic(rule, &rule->tx, &copy_param,
+ num_actions, actions);
+ if (ret)
+ goto destroy_rule_nic_rx;
+
+ return 0;
+
+destroy_rule_nic_rx:
+ dr_rule_destroy_rule_nic(rule, &rule->rx);
+ return ret;
+}
+
+static struct mlx5dr_rule *
+dr_rule_create_rule(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_match_parameters *value,
+ size_t num_actions,
+ struct mlx5dr_action *actions[])
+{
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_match_param param = {};
+ struct mlx5dr_rule *rule;
+ int ret;
+
+ if (!dr_rule_verify(matcher, value, &param))
+ return NULL;
+
+ rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+ if (!rule)
+ return NULL;
+
+ rule->matcher = matcher;
+ INIT_LIST_HEAD(&rule->rule_actions_list);
+
+ ret = dr_rule_add_action_members(rule, num_actions, actions);
+ if (ret)
+ goto free_rule;
+
+ switch (dmn->type) {
+ case MLX5DR_DOMAIN_TYPE_NIC_RX:
+ rule->rx.nic_matcher = &matcher->rx;
+ ret = dr_rule_create_rule_nic(rule, &rule->rx, &param,
+ num_actions, actions);
+ break;
+ case MLX5DR_DOMAIN_TYPE_NIC_TX:
+ rule->tx.nic_matcher = &matcher->tx;
+ ret = dr_rule_create_rule_nic(rule, &rule->tx, &param,
+ num_actions, actions);
+ break;
+ case MLX5DR_DOMAIN_TYPE_FDB:
+ rule->rx.nic_matcher = &matcher->rx;
+ rule->tx.nic_matcher = &matcher->tx;
+ ret = dr_rule_create_rule_fdb(rule, &param,
+ num_actions, actions);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret)
+ goto remove_action_members;
+
+ return rule;
+
+remove_action_members:
+ dr_rule_remove_action_members(rule);
+free_rule:
+ kfree(rule);
+ mlx5dr_info(dmn, "Failed creating rule\n");
+ return NULL;
+}
+
+struct mlx5dr_rule *mlx5dr_rule_create(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_match_parameters *value,
+ size_t num_actions,
+ struct mlx5dr_action *actions[])
+{
+ struct mlx5dr_rule *rule;
+
+ mutex_lock(&matcher->tbl->dmn->mutex);
+ refcount_inc(&matcher->refcount);
+
+ rule = dr_rule_create_rule(matcher, value, num_actions, actions);
+ if (!rule)
+ refcount_dec(&matcher->refcount);
+
+ mutex_unlock(&matcher->tbl->dmn->mutex);
+
+ return rule;
+}
+
+int mlx5dr_rule_destroy(struct mlx5dr_rule *rule)
+{
+ struct mlx5dr_matcher *matcher = rule->matcher;
+ struct mlx5dr_table *tbl = rule->matcher->tbl;
+ int ret;
+
+ mutex_lock(&tbl->dmn->mutex);
+
+ ret = dr_rule_destroy_rule(rule);
+
+ mutex_unlock(&tbl->dmn->mutex);
+
+ if (!ret)
+ refcount_dec(&matcher->refcount);
+ return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
new file mode 100644
index 000000000000..ef0dea44f3b3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -0,0 +1,976 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "dr_types.h"
+
+#define QUEUE_SIZE 128
+#define SIGNAL_PER_DIV_QUEUE 16
+#define TH_NUMS_TO_DRAIN 2
+
+enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
+
+struct dr_data_seg {
+ u64 addr;
+ u32 length;
+ u32 lkey;
+ unsigned int send_flags;
+};
+
+struct postsend_info {
+ struct dr_data_seg write;
+ struct dr_data_seg read;
+ u64 remote_addr;
+ u32 rkey;
+};
+
+struct dr_qp_rtr_attr {
+ struct mlx5dr_cmd_gid_attr dgid_attr;
+ enum ib_mtu mtu;
+ u32 qp_num;
+ u16 port_num;
+ u8 min_rnr_timer;
+ u8 sgid_index;
+ u16 udp_src_port;
+};
+
+struct dr_qp_rts_attr {
+ u8 timeout;
+ u8 retry_cnt;
+ u8 rnr_retry;
+};
+
+struct dr_qp_init_attr {
+ u32 cqn;
+ u32 pdn;
+ u32 max_send_wr;
+ struct mlx5_uars_page *uar;
+};
+
+static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64)
+{
+ unsigned int idx;
+ u8 opcode;
+
+ opcode = get_cqe_opcode(cqe64);
+ if (opcode == MLX5_CQE_REQ_ERR) {
+ idx = be16_to_cpu(cqe64->wqe_counter) &
+ (dr_cq->qp->sq.wqe_cnt - 1);
+ dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
+ } else if (opcode == MLX5_CQE_RESP_ERR) {
+ ++dr_cq->qp->sq.cc;
+ } else {
+ idx = be16_to_cpu(cqe64->wqe_counter) &
+ (dr_cq->qp->sq.wqe_cnt - 1);
+ dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
+
+ return CQ_OK;
+ }
+
+ return CQ_POLL_ERR;
+}
+
+static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq)
+{
+ struct mlx5_cqe64 *cqe64;
+ int err;
+
+ cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq);
+ if (!cqe64)
+ return CQ_EMPTY;
+
+ mlx5_cqwq_pop(&dr_cq->wq);
+ err = dr_parse_cqe(dr_cq, cqe64);
+ mlx5_cqwq_update_db_record(&dr_cq->wq);
+
+ return err;
+}
+
+static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
+{
+ int npolled;
+ int err = 0;
+
+ for (npolled = 0; npolled < ne; ++npolled) {
+ err = dr_cq_poll_one(dr_cq);
+ if (err != CQ_OK)
+ break;
+ }
+
+ return err == CQ_POLL_ERR ? err : npolled;
+}
+
+static void dr_qp_event(struct mlx5_core_qp *mqp, int event)
+{
+ pr_info("DR QP event %u on QP #%u\n", event, mqp->qpn);
+}
+
+static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
+ struct dr_qp_init_attr *attr)
+{
+ u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
+ struct mlx5_wq_param wqp;
+ struct mlx5dr_qp *dr_qp;
+ int inlen;
+ void *qpc;
+ void *in;
+ int err;
+
+ dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL);
+ if (!dr_qp)
+ return NULL;
+
+ wqp.buf_numa_node = mdev->priv.numa_node;
+ wqp.db_numa_node = mdev->priv.numa_node;
+
+ dr_qp->rq.pc = 0;
+ dr_qp->rq.cc = 0;
+ dr_qp->rq.wqe_cnt = 4;
+ dr_qp->sq.pc = 0;
+ dr_qp->sq.cc = 0;
+ dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr);
+
+ MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
+ MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
+ MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
+ err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq,
+ &dr_qp->wq_ctrl);
+ if (err) {
+ mlx5_core_info(mdev, "Can't create QP WQ\n");
+ goto err_wq;
+ }
+
+ dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt,
+ sizeof(dr_qp->sq.wqe_head[0]),
+ GFP_KERNEL);
+
+ if (!dr_qp->sq.wqe_head) {
+ mlx5_core_warn(mdev, "Can't allocate wqe head\n");
+ goto err_wqe_head;
+ }
+
+ inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
+ MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
+ dr_qp->wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_in;
+ }
+
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, pd, attr->pdn);
+ MLX5_SET(qpc, qpc, uar_page, attr->uar->index);
+ MLX5_SET(qpc, qpc, log_page_size,
+ dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(qpc, qpc, fre, 1);
+ MLX5_SET(qpc, qpc, rlky, 1);
+ MLX5_SET(qpc, qpc, cqn_snd, attr->cqn);
+ MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn);
+ MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
+ MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
+ MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
+ MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
+ MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma);
+ if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
+ MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
+ mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf,
+ (__be64 *)MLX5_ADDR_OF(create_qp_in,
+ in, pas));
+
+ err = mlx5_core_create_qp(mdev, &dr_qp->mqp, in, inlen);
+ kfree(in);
+
+ if (err) {
+ mlx5_core_warn(mdev, " Can't create QP\n");
+ goto err_in;
+ }
+ dr_qp->mqp.event = dr_qp_event;
+ dr_qp->uar = attr->uar;
+
+ return dr_qp;
+
+err_in:
+ kfree(dr_qp->sq.wqe_head);
+err_wqe_head:
+ mlx5_wq_destroy(&dr_qp->wq_ctrl);
+err_wq:
+ kfree(dr_qp);
+ return NULL;
+}
+
+static void dr_destroy_qp(struct mlx5_core_dev *mdev,
+ struct mlx5dr_qp *dr_qp)
+{
+ mlx5_core_destroy_qp(mdev, &dr_qp->mqp);
+ kfree(dr_qp->sq.wqe_head);
+ mlx5_wq_destroy(&dr_qp->wq_ctrl);
+ kfree(dr_qp);
+}
+
+static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
+{
+ dma_wmb();
+ *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xfffff);
+
+ /* After wmb() the hw aware of new work */
+ wmb();
+
+ mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET);
+}
+
+static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
+ u32 rkey, struct dr_data_seg *data_seg,
+ u32 opcode, int nreq)
+{
+ struct mlx5_wqe_raddr_seg *wq_raddr;
+ struct mlx5_wqe_ctrl_seg *wq_ctrl;
+ struct mlx5_wqe_data_seg *wq_dseg;
+ unsigned int size;
+ unsigned int idx;
+
+ size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 +
+ sizeof(*wq_raddr) / 16;
+
+ idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
+
+ wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
+ wq_ctrl->imm = 0;
+ wq_ctrl->fm_ce_se = (data_seg->send_flags) ?
+ MLX5_WQE_CTRL_CQ_UPDATE : 0;
+ wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) |
+ opcode);
+ wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->mqp.qpn << 8);
+ wq_raddr = (void *)(wq_ctrl + 1);
+ wq_raddr->raddr = cpu_to_be64(remote_addr);
+ wq_raddr->rkey = cpu_to_be32(rkey);
+ wq_raddr->reserved = 0;
+
+ wq_dseg = (void *)(wq_raddr + 1);
+ wq_dseg->byte_count = cpu_to_be32(data_seg->length);
+ wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
+ wq_dseg->addr = cpu_to_be64(data_seg->addr);
+
+ dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++;
+
+ if (nreq)
+ dr_cmd_notify_hw(dr_qp, wq_ctrl);
+}
+
+static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info)
+{
+ dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
+ &send_info->write, MLX5_OPCODE_RDMA_WRITE, 0);
+ dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
+ &send_info->read, MLX5_OPCODE_RDMA_READ, 1);
+}
+
+/**
+ * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent
+ * with send_list parameters:
+ *
+ * @ste: The data that attached to this specific ste
+ * @size: of data to write
+ * @offset: of the data from start of the hw_ste entry
+ * @data: data
+ * @ste_info: ste to be sent with send_list
+ * @send_list: to append into it
+ * @copy_data: if true indicates that the data should be kept because
+ * it's not backuped any where (like in re-hash).
+ * if false, it lets the data to be updated after
+ * it was added to the list.
+ */
+void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
+ u16 offset, u8 *data,
+ struct mlx5dr_ste_send_info *ste_info,
+ struct list_head *send_list,
+ bool copy_data)
+{
+ ste_info->size = size;
+ ste_info->ste = ste;
+ ste_info->offset = offset;
+
+ if (copy_data) {
+ memcpy(ste_info->data_cont, data, size);
+ ste_info->data = ste_info->data_cont;
+ } else {
+ ste_info->data = data;
+ }
+
+ list_add_tail(&ste_info->send_list, send_list);
+}
+
+/* The function tries to consume one wc each time, unless the queue is full, in
+ * that case, which means that the hw is behind the sw in a full queue len
+ * the function will drain the cq till it empty.
+ */
+static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
+ struct mlx5dr_send_ring *send_ring)
+{
+ bool is_drain = false;
+ int ne;
+
+ if (send_ring->pending_wqe < send_ring->signal_th)
+ return 0;
+
+ /* Queue is full start drain it */
+ if (send_ring->pending_wqe >=
+ dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN)
+ is_drain = true;
+
+ do {
+ ne = dr_poll_cq(send_ring->cq, 1);
+ if (ne < 0)
+ return ne;
+ else if (ne == 1)
+ send_ring->pending_wqe -= send_ring->signal_th;
+ } while (is_drain && send_ring->pending_wqe);
+
+ return 0;
+}
+
+static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring,
+ struct postsend_info *send_info)
+{
+ send_ring->pending_wqe++;
+
+ if (send_ring->pending_wqe % send_ring->signal_th == 0)
+ send_info->write.send_flags |= IB_SEND_SIGNALED;
+
+ send_ring->pending_wqe++;
+ send_info->read.length = send_info->write.length;
+ /* Read into the same write area */
+ send_info->read.addr = (uintptr_t)send_info->write.addr;
+ send_info->read.lkey = send_ring->mr->mkey.key;
+
+ if (send_ring->pending_wqe % send_ring->signal_th == 0)
+ send_info->read.send_flags = IB_SEND_SIGNALED;
+ else
+ send_info->read.send_flags = 0;
+}
+
+static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
+ struct postsend_info *send_info)
+{
+ struct mlx5dr_send_ring *send_ring = dmn->send_ring;
+ u32 buff_offset;
+ int ret;
+
+ ret = dr_handle_pending_wc(dmn, send_ring);
+ if (ret)
+ return ret;
+
+ if (send_info->write.length > dmn->info.max_inline_size) {
+ buff_offset = (send_ring->tx_head &
+ (dmn->send_ring->signal_th - 1)) *
+ send_ring->max_post_send_size;
+ /* Copy to ring mr */
+ memcpy(send_ring->buf + buff_offset,
+ (void *)(uintptr_t)send_info->write.addr,
+ send_info->write.length);
+ send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
+ send_info->write.lkey = send_ring->mr->mkey.key;
+ }
+
+ send_ring->tx_head++;
+ dr_fill_data_segs(send_ring, send_info);
+ dr_post_send(send_ring->qp, send_info);
+
+ return 0;
+}
+
+static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
+ struct mlx5dr_ste_htbl *htbl,
+ u8 **data,
+ u32 *byte_size,
+ int *iterations,
+ int *num_stes)
+{
+ int alloc_size;
+
+ if (htbl->chunk->byte_size > dmn->send_ring->max_post_send_size) {
+ *iterations = htbl->chunk->byte_size /
+ dmn->send_ring->max_post_send_size;
+ *byte_size = dmn->send_ring->max_post_send_size;
+ alloc_size = *byte_size;
+ *num_stes = *byte_size / DR_STE_SIZE;
+ } else {
+ *iterations = 1;
+ *num_stes = htbl->chunk->num_of_entries;
+ alloc_size = *num_stes * DR_STE_SIZE;
+ }
+
+ *data = kzalloc(alloc_size, GFP_KERNEL);
+ if (!*data)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm.
+ *
+ * @dmn: Domain
+ * @ste: The ste struct that contains the data (at
+ * least part of it)
+ * @data: The real data to send size data
+ * @size: for writing.
+ * @offset: The offset from the icm mapped data to
+ * start write to this for write only part of the
+ * buffer.
+ *
+ * Return: 0 on success.
+ */
+int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste,
+ u8 *data, u16 size, u16 offset)
+{
+ struct postsend_info send_info = {};
+
+ send_info.write.addr = (uintptr_t)data;
+ send_info.write.length = size;
+ send_info.write.lkey = 0;
+ send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset;
+ send_info.rkey = ste->htbl->chunk->rkey;
+
+ return dr_postsend_icm_data(dmn, &send_info);
+}
+
+int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
+ struct mlx5dr_ste_htbl *htbl,
+ u8 *formatted_ste, u8 *mask)
+{
+ u32 byte_size = htbl->chunk->byte_size;
+ int num_stes_per_iter;
+ int iterations;
+ u8 *data;
+ int ret;
+ int i;
+ int j;
+
+ ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
+ &iterations, &num_stes_per_iter);
+ if (ret)
+ return ret;
+
+ /* Send the data iteration times */
+ for (i = 0; i < iterations; i++) {
+ u32 ste_index = i * (byte_size / DR_STE_SIZE);
+ struct postsend_info send_info = {};
+
+ /* Copy all ste's on the data buffer
+ * need to add the bit_mask
+ */
+ for (j = 0; j < num_stes_per_iter; j++) {
+ u8 *hw_ste = htbl->ste_arr[ste_index + j].hw_ste;
+ u32 ste_off = j * DR_STE_SIZE;
+
+ if (mlx5dr_ste_is_not_valid_entry(hw_ste)) {
+ memcpy(data + ste_off,
+ formatted_ste, DR_STE_SIZE);
+ } else {
+ /* Copy data */
+ memcpy(data + ste_off,
+ htbl->ste_arr[ste_index + j].hw_ste,
+ DR_STE_SIZE_REDUCED);
+ /* Copy bit_mask */
+ memcpy(data + ste_off + DR_STE_SIZE_REDUCED,
+ mask, DR_STE_SIZE_MASK);
+ }
+ }
+
+ send_info.write.addr = (uintptr_t)data;
+ send_info.write.length = byte_size;
+ send_info.write.lkey = 0;
+ send_info.remote_addr =
+ mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
+ send_info.rkey = htbl->chunk->rkey;
+
+ ret = dr_postsend_icm_data(dmn, &send_info);
+ if (ret)
+ goto out_free;
+ }
+
+out_free:
+ kfree(data);
+ return ret;
+}
+
+/* Initialize htble with default STEs */
+int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
+ struct mlx5dr_ste_htbl *htbl,
+ u8 *ste_init_data,
+ bool update_hw_ste)
+{
+ u32 byte_size = htbl->chunk->byte_size;
+ int iterations;
+ int num_stes;
+ u8 *data;
+ int ret;
+ int i;
+
+ ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
+ &iterations, &num_stes);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < num_stes; i++) {
+ u8 *copy_dst;
+
+ /* Copy the same ste on the data buffer */
+ copy_dst = data + i * DR_STE_SIZE;
+ memcpy(copy_dst, ste_init_data, DR_STE_SIZE);
+
+ if (update_hw_ste) {
+ /* Copy the reduced ste to hash table ste_arr */
+ copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
+ memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED);
+ }
+ }
+
+ /* Send the data iteration times */
+ for (i = 0; i < iterations; i++) {
+ u8 ste_index = i * (byte_size / DR_STE_SIZE);
+ struct postsend_info send_info = {};
+
+ send_info.write.addr = (uintptr_t)data;
+ send_info.write.length = byte_size;
+ send_info.write.lkey = 0;
+ send_info.remote_addr =
+ mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
+ send_info.rkey = htbl->chunk->rkey;
+
+ ret = dr_postsend_icm_data(dmn, &send_info);
+ if (ret)
+ goto out_free;
+ }
+
+out_free:
+ kfree(data);
+ return ret;
+}
+
+int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
+ struct mlx5dr_action *action)
+{
+ struct postsend_info send_info = {};
+ int ret;
+
+ send_info.write.addr = (uintptr_t)action->rewrite.data;
+ send_info.write.length = action->rewrite.chunk->byte_size;
+ send_info.write.lkey = 0;
+ send_info.remote_addr = action->rewrite.chunk->mr_addr;
+ send_info.rkey = action->rewrite.chunk->rkey;
+
+ mutex_lock(&dmn->mutex);
+ ret = dr_postsend_icm_data(dmn, &send_info);
+ mutex_unlock(&dmn->mutex);
+
+ return ret;
+}
+
+static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
+ struct mlx5dr_qp *dr_qp,
+ int port)
+{
+ u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
+ void *qpc;
+
+ qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
+
+ MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED);
+ MLX5_SET(qpc, qpc, rre, 1);
+ MLX5_SET(qpc, qpc, rwe, 1);
+
+ return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc,
+ &dr_qp->mqp);
+}
+
+static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
+ struct mlx5dr_qp *dr_qp,
+ struct dr_qp_rts_attr *attr)
+{
+ u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
+ void *qpc;
+
+ qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
+
+ MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->mqp.qpn);
+
+ MLX5_SET(qpc, qpc, log_ack_req_freq, 0);
+ MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
+ MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
+
+ return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, qpc,
+ &dr_qp->mqp);
+}
+
+static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
+ struct mlx5dr_qp *dr_qp,
+ struct dr_qp_rtr_attr *attr)
+{
+ u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
+ void *qpc;
+
+ qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
+
+ MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->mqp.qpn);
+
+ MLX5_SET(qpc, qpc, mtu, attr->mtu);
+ MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1);
+ MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num);
+ memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
+ attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac));
+ memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
+ attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid));
+ MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
+ attr->sgid_index);
+
+ if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2)
+ MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
+ attr->udp_src_port);
+
+ MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num);
+ MLX5_SET(qpc, qpc, min_rnr_nak, 1);
+
+ return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc,
+ &dr_qp->mqp);
+}
+
+static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_qp *dr_qp = dmn->send_ring->qp;
+ struct dr_qp_rts_attr rts_attr = {};
+ struct dr_qp_rtr_attr rtr_attr = {};
+ enum ib_mtu mtu = IB_MTU_1024;
+ u16 gid_index = 0;
+ int port = 1;
+ int ret;
+
+ /* Init */
+ ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port);
+ if (ret)
+ return ret;
+
+ /* RTR */
+ ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr);
+ if (ret)
+ return ret;
+
+ rtr_attr.mtu = mtu;
+ rtr_attr.qp_num = dr_qp->mqp.qpn;
+ rtr_attr.min_rnr_timer = 12;
+ rtr_attr.port_num = port;
+ rtr_attr.sgid_index = gid_index;
+ rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp;
+
+ ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr);
+ if (ret)
+ return ret;
+
+ /* RTS */
+ rts_attr.timeout = 14;
+ rts_attr.retry_cnt = 7;
+ rts_attr.rnr_retry = 7;
+
+ ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static void dr_cq_event(struct mlx5_core_cq *mcq,
+ enum mlx5_event event)
+{
+ pr_info("CQ event %u on CQ #%u\n", event, mcq->cqn);
+}
+
+static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
+ struct mlx5_uars_page *uar,
+ size_t ncqe)
+{
+ u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {};
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
+ struct mlx5_wq_param wqp;
+ struct mlx5_cqe64 *cqe;
+ struct mlx5dr_cq *cq;
+ int inlen, err, eqn;
+ unsigned int irqn;
+ void *cqc, *in;
+ __be64 *pas;
+ u32 i;
+
+ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+ if (!cq)
+ return NULL;
+
+ ncqe = roundup_pow_of_two(ncqe);
+ MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe));
+
+ wqp.buf_numa_node = mdev->priv.numa_node;
+ wqp.db_numa_node = mdev->priv.numa_node;
+
+ err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq,
+ &cq->wq_ctrl);
+ if (err)
+ goto out;
+
+ for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
+ cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
+ cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
+ }
+
+ inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+ sizeof(u64) * cq->wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ goto err_cqwq;
+
+ err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn);
+ if (err) {
+ kvfree(in);
+ goto err_cqwq;
+ }
+
+ cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+ MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
+ MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, uar_page, uar->index);
+ MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
+
+ pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
+ mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas);
+
+ cq->mcq.event = dr_cq_event;
+
+ err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
+ kvfree(in);
+
+ if (err)
+ goto err_cqwq;
+
+ cq->mcq.cqe_sz = 64;
+ cq->mcq.set_ci_db = cq->wq_ctrl.db.db;
+ cq->mcq.arm_db = cq->wq_ctrl.db.db + 1;
+ *cq->mcq.set_ci_db = 0;
+ *cq->mcq.arm_db = 0;
+ cq->mcq.vector = 0;
+ cq->mcq.irqn = irqn;
+ cq->mcq.uar = uar;
+
+ return cq;
+
+err_cqwq:
+ mlx5_wq_destroy(&cq->wq_ctrl);
+out:
+ kfree(cq);
+ return NULL;
+}
+
+static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq)
+{
+ mlx5_core_destroy_cq(mdev, &cq->mcq);
+ mlx5_wq_destroy(&cq->wq_ctrl);
+ kfree(cq);
+}
+
+static int
+dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey)
+{
+ u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
+ void *mkc;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
+ MLX5_SET(mkc, mkc, a, 1);
+ MLX5_SET(mkc, mkc, rw, 1);
+ MLX5_SET(mkc, mkc, rr, 1);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+
+ MLX5_SET(mkc, mkc, pd, pdn);
+ MLX5_SET(mkc, mkc, length64, 1);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+ return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in));
+}
+
+static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
+ u32 pdn, void *buf, size_t size)
+{
+ struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ struct device *dma_device;
+ dma_addr_t dma_addr;
+ int err;
+
+ if (!mr)
+ return NULL;
+
+ dma_device = &mdev->pdev->dev;
+ dma_addr = dma_map_single(dma_device, buf, size,
+ DMA_BIDIRECTIONAL);
+ err = dma_mapping_error(dma_device, dma_addr);
+ if (err) {
+ mlx5_core_warn(mdev, "Can't dma buf\n");
+ kfree(mr);
+ return NULL;
+ }
+
+ err = dr_create_mkey(mdev, pdn, &mr->mkey);
+ if (err) {
+ mlx5_core_warn(mdev, "Can't create mkey\n");
+ dma_unmap_single(dma_device, dma_addr, size,
+ DMA_BIDIRECTIONAL);
+ kfree(mr);
+ return NULL;
+ }
+
+ mr->dma_addr = dma_addr;
+ mr->size = size;
+ mr->addr = buf;
+
+ return mr;
+}
+
+static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
+{
+ mlx5_core_destroy_mkey(mdev, &mr->mkey);
+ dma_unmap_single(&mdev->pdev->dev, mr->dma_addr, mr->size,
+ DMA_BIDIRECTIONAL);
+ kfree(mr);
+}
+
+int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
+{
+ struct dr_qp_init_attr init_attr = {};
+ int cq_size;
+ int size;
+ int ret;
+
+ dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL);
+ if (!dmn->send_ring)
+ return -ENOMEM;
+
+ cq_size = QUEUE_SIZE + 1;
+ dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size);
+ if (!dmn->send_ring->cq) {
+ ret = -ENOMEM;
+ goto free_send_ring;
+ }
+
+ init_attr.cqn = dmn->send_ring->cq->mcq.cqn;
+ init_attr.pdn = dmn->pdn;
+ init_attr.uar = dmn->uar;
+ init_attr.max_send_wr = QUEUE_SIZE;
+
+ dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr);
+ if (!dmn->send_ring->qp) {
+ ret = -ENOMEM;
+ goto clean_cq;
+ }
+
+ dmn->send_ring->cq->qp = dmn->send_ring->qp;
+
+ dmn->info.max_send_wr = QUEUE_SIZE;
+ dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
+ DR_STE_SIZE);
+
+ dmn->send_ring->signal_th = dmn->info.max_send_wr /
+ SIGNAL_PER_DIV_QUEUE;
+
+ /* Prepare qp to be used */
+ ret = dr_prepare_qp_to_rts(dmn);
+ if (ret)
+ goto clean_qp;
+
+ dmn->send_ring->max_post_send_size =
+ mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K,
+ DR_ICM_TYPE_STE);
+
+ /* Allocating the max size as a buffer for writing */
+ size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size;
+ dmn->send_ring->buf = kzalloc(size, GFP_KERNEL);
+ if (!dmn->send_ring->buf) {
+ ret = -ENOMEM;
+ goto clean_qp;
+ }
+
+ memset(dmn->send_ring->buf, 0, size);
+ dmn->send_ring->buf_size = size;
+
+ dmn->send_ring->mr = dr_reg_mr(dmn->mdev,
+ dmn->pdn, dmn->send_ring->buf, size);
+ if (!dmn->send_ring->mr) {
+ ret = -ENOMEM;
+ goto free_mem;
+ }
+
+ dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev,
+ dmn->pdn, dmn->send_ring->sync_buff,
+ MIN_READ_SYNC);
+ if (!dmn->send_ring->sync_mr) {
+ ret = -ENOMEM;
+ goto clean_mr;
+ }
+
+ return 0;
+
+clean_mr:
+ dr_dereg_mr(dmn->mdev, dmn->send_ring->mr);
+free_mem:
+ kfree(dmn->send_ring->buf);
+clean_qp:
+ dr_destroy_qp(dmn->mdev, dmn->send_ring->qp);
+clean_cq:
+ dr_destroy_cq(dmn->mdev, dmn->send_ring->cq);
+free_send_ring:
+ kfree(dmn->send_ring);
+
+ return ret;
+}
+
+void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
+ struct mlx5dr_send_ring *send_ring)
+{
+ dr_destroy_qp(dmn->mdev, send_ring->qp);
+ dr_destroy_cq(dmn->mdev, send_ring->cq);
+ dr_dereg_mr(dmn->mdev, send_ring->sync_mr);
+ dr_dereg_mr(dmn->mdev, send_ring->mr);
+ kfree(send_ring->buf);
+ kfree(send_ring);
+}
+
+int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
+{
+ struct mlx5dr_send_ring *send_ring = dmn->send_ring;
+ struct postsend_info send_info = {};
+ u8 data[DR_STE_SIZE];
+ int num_of_sends_req;
+ int ret;
+ int i;
+
+ /* Sending this amount of requests makes sure we will get drain */
+ num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2;
+
+ /* Send fake requests forcing the last to be signaled */
+ send_info.write.addr = (uintptr_t)data;
+ send_info.write.length = DR_STE_SIZE;
+ send_info.write.lkey = 0;
+ /* Using the sync_mr in order to write/read */
+ send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
+ send_info.rkey = send_ring->sync_mr->mkey.key;
+
+ for (i = 0; i < num_of_sends_req; i++) {
+ ret = dr_postsend_icm_data(dmn, &send_info);
+ if (ret)
+ return ret;
+ }
+
+ ret = dr_handle_pending_wc(dmn, send_ring);
+
+ return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
new file mode 100644
index 000000000000..6b0af64536d8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -0,0 +1,2308 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/types.h>
+#include "dr_types.h"
+
+#define DR_STE_CRC_POLY 0xEDB88320L
+#define STE_IPV4 0x1
+#define STE_IPV6 0x2
+#define STE_TCP 0x1
+#define STE_UDP 0x2
+#define STE_SPI 0x3
+#define IP_VERSION_IPV4 0x4
+#define IP_VERSION_IPV6 0x6
+#define STE_SVLAN 0x1
+#define STE_CVLAN 0x2
+
+#define DR_STE_ENABLE_FLOW_TAG BIT(31)
+
+/* Set to STE a specific value using DR_STE_SET */
+#define DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, value) do { \
+ if ((spec)->s_fname) { \
+ MLX5_SET(ste_##lookup_type, tag, t_fname, value); \
+ (spec)->s_fname = 0; \
+ } \
+} while (0)
+
+/* Set to STE spec->s_fname to tag->t_fname */
+#define DR_STE_SET_TAG(lookup_type, tag, t_fname, spec, s_fname) \
+ DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, spec->s_fname)
+
+/* Set to STE -1 to bit_mask->bm_fname and set spec->s_fname as used */
+#define DR_STE_SET_MASK(lookup_type, bit_mask, bm_fname, spec, s_fname) \
+ DR_STE_SET_VAL(lookup_type, bit_mask, bm_fname, spec, s_fname, -1)
+
+/* Set to STE spec->s_fname to bit_mask->bm_fname and set spec->s_fname as used */
+#define DR_STE_SET_MASK_V(lookup_type, bit_mask, bm_fname, spec, s_fname) \
+ DR_STE_SET_VAL(lookup_type, bit_mask, bm_fname, spec, s_fname, (spec)->s_fname)
+
+#define DR_STE_SET_TCP_FLAGS(lookup_type, tag, spec) do { \
+ MLX5_SET(ste_##lookup_type, tag, tcp_ns, !!((spec)->tcp_flags & (1 << 8))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_cwr, !!((spec)->tcp_flags & (1 << 7))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_ece, !!((spec)->tcp_flags & (1 << 6))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_urg, !!((spec)->tcp_flags & (1 << 5))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_ack, !!((spec)->tcp_flags & (1 << 4))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_psh, !!((spec)->tcp_flags & (1 << 3))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_rst, !!((spec)->tcp_flags & (1 << 2))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_syn, !!((spec)->tcp_flags & (1 << 1))); \
+ MLX5_SET(ste_##lookup_type, tag, tcp_fin, !!((spec)->tcp_flags & (1 << 0))); \
+} while (0)
+
+#define DR_STE_SET_MPLS_MASK(lookup_type, mask, in_out, bit_mask) do { \
+ DR_STE_SET_MASK_V(lookup_type, mask, mpls0_label, mask, \
+ in_out##_first_mpls_label);\
+ DR_STE_SET_MASK_V(lookup_type, mask, mpls0_s_bos, mask, \
+ in_out##_first_mpls_s_bos); \
+ DR_STE_SET_MASK_V(lookup_type, mask, mpls0_exp, mask, \
+ in_out##_first_mpls_exp); \
+ DR_STE_SET_MASK_V(lookup_type, mask, mpls0_ttl, mask, \
+ in_out##_first_mpls_ttl); \
+} while (0)
+
+#define DR_STE_SET_MPLS_TAG(lookup_type, mask, in_out, tag) do { \
+ DR_STE_SET_TAG(lookup_type, tag, mpls0_label, mask, \
+ in_out##_first_mpls_label);\
+ DR_STE_SET_TAG(lookup_type, tag, mpls0_s_bos, mask, \
+ in_out##_first_mpls_s_bos); \
+ DR_STE_SET_TAG(lookup_type, tag, mpls0_exp, mask, \
+ in_out##_first_mpls_exp); \
+ DR_STE_SET_TAG(lookup_type, tag, mpls0_ttl, mask, \
+ in_out##_first_mpls_ttl); \
+} while (0)
+
+#define DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(_misc) (\
+ (_misc)->outer_first_mpls_over_gre_label || \
+ (_misc)->outer_first_mpls_over_gre_exp || \
+ (_misc)->outer_first_mpls_over_gre_s_bos || \
+ (_misc)->outer_first_mpls_over_gre_ttl)
+#define DR_STE_IS_OUTER_MPLS_OVER_UDP_SET(_misc) (\
+ (_misc)->outer_first_mpls_over_udp_label || \
+ (_misc)->outer_first_mpls_over_udp_exp || \
+ (_misc)->outer_first_mpls_over_udp_s_bos || \
+ (_misc)->outer_first_mpls_over_udp_ttl)
+
+#define DR_STE_CALC_LU_TYPE(lookup_type, rx, inner) \
+ ((inner) ? MLX5DR_STE_LU_TYPE_##lookup_type##_I : \
+ (rx) ? MLX5DR_STE_LU_TYPE_##lookup_type##_D : \
+ MLX5DR_STE_LU_TYPE_##lookup_type##_O)
+
+enum dr_ste_tunl_action {
+ DR_STE_TUNL_ACTION_NONE = 0,
+ DR_STE_TUNL_ACTION_ENABLE = 1,
+ DR_STE_TUNL_ACTION_DECAP = 2,
+ DR_STE_TUNL_ACTION_L3_DECAP = 3,
+ DR_STE_TUNL_ACTION_POP_VLAN = 4,
+};
+
+enum dr_ste_action_type {
+ DR_STE_ACTION_TYPE_PUSH_VLAN = 1,
+ DR_STE_ACTION_TYPE_ENCAP_L3 = 3,
+ DR_STE_ACTION_TYPE_ENCAP = 4,
+};
+
+struct dr_hw_ste_format {
+ u8 ctrl[DR_STE_SIZE_CTRL];
+ u8 tag[DR_STE_SIZE_TAG];
+ u8 mask[DR_STE_SIZE_MASK];
+};
+
+u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ u8 masked[DR_STE_SIZE_TAG] = {};
+ u32 crc32, index;
+ u16 bit;
+ int i;
+
+ /* Don't calculate CRC if the result is predicted */
+ if (htbl->chunk->num_of_entries == 1 || htbl->byte_mask == 0)
+ return 0;
+
+ /* Mask tag using byte mask, bit per byte */
+ bit = 1 << (DR_STE_SIZE_TAG - 1);
+ for (i = 0; i < DR_STE_SIZE_TAG; i++) {
+ if (htbl->byte_mask & bit)
+ masked[i] = hw_ste->tag[i];
+
+ bit = bit >> 1;
+ }
+
+ crc32 = mlx5dr_crc32_slice8_calc(masked, DR_STE_SIZE_TAG);
+ index = crc32 & (htbl->chunk->num_of_entries - 1);
+
+ return index;
+}
+
+static u16 dr_ste_conv_bit_to_byte_mask(u8 *bit_mask)
+{
+ u16 byte_mask = 0;
+ int i;
+
+ for (i = 0; i < DR_STE_SIZE_MASK; i++) {
+ byte_mask = byte_mask << 1;
+ if (bit_mask[i] == 0xff)
+ byte_mask |= 1;
+ }
+ return byte_mask;
+}
+
+void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+
+ memcpy(hw_ste->mask, bit_mask, DR_STE_SIZE_MASK);
+}
+
+void mlx5dr_ste_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag)
+{
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, qp_list_pointer,
+ DR_STE_ENABLE_FLOW_TAG | flow_tag);
+}
+
+void mlx5dr_ste_set_counter_id(u8 *hw_ste_p, u32 ctr_id)
+{
+ /* This can be used for both rx_steering_mult and for sx_transmit */
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, counter_trigger_15_0, ctr_id);
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, counter_trigger_23_16, ctr_id >> 16);
+}
+
+void mlx5dr_ste_set_go_back_bit(u8 *hw_ste_p)
+{
+ MLX5_SET(ste_sx_transmit, hw_ste_p, go_back, 1);
+}
+
+void mlx5dr_ste_set_tx_push_vlan(u8 *hw_ste_p, u32 vlan_hdr,
+ bool go_back)
+{
+ MLX5_SET(ste_sx_transmit, hw_ste_p, action_type,
+ DR_STE_ACTION_TYPE_PUSH_VLAN);
+ MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, vlan_hdr);
+ /* Due to HW limitation we need to set this bit, otherwise reforamt +
+ * push vlan will not work.
+ */
+ if (go_back)
+ mlx5dr_ste_set_go_back_bit(hw_ste_p);
+}
+
+void mlx5dr_ste_set_tx_encap(void *hw_ste_p, u32 reformat_id, int size, bool encap_l3)
+{
+ MLX5_SET(ste_sx_transmit, hw_ste_p, action_type,
+ encap_l3 ? DR_STE_ACTION_TYPE_ENCAP_L3 : DR_STE_ACTION_TYPE_ENCAP);
+ /* The hardware expects here size in words (2 byte) */
+ MLX5_SET(ste_sx_transmit, hw_ste_p, action_description, size / 2);
+ MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, reformat_id);
+}
+
+void mlx5dr_ste_set_rx_decap(u8 *hw_ste_p)
+{
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action,
+ DR_STE_TUNL_ACTION_DECAP);
+}
+
+void mlx5dr_ste_set_rx_pop_vlan(u8 *hw_ste_p)
+{
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action,
+ DR_STE_TUNL_ACTION_POP_VLAN);
+}
+
+void mlx5dr_ste_set_rx_decap_l3(u8 *hw_ste_p, bool vlan)
+{
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action,
+ DR_STE_TUNL_ACTION_L3_DECAP);
+ MLX5_SET(ste_modify_packet, hw_ste_p, action_description, vlan ? 1 : 0);
+}
+
+void mlx5dr_ste_set_entry_type(u8 *hw_ste_p, u8 entry_type)
+{
+ MLX5_SET(ste_general, hw_ste_p, entry_type, entry_type);
+}
+
+u8 mlx5dr_ste_get_entry_type(u8 *hw_ste_p)
+{
+ return MLX5_GET(ste_general, hw_ste_p, entry_type);
+}
+
+void mlx5dr_ste_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions,
+ u32 re_write_index)
+{
+ MLX5_SET(ste_modify_packet, hw_ste_p, number_of_re_write_actions,
+ num_of_actions);
+ MLX5_SET(ste_modify_packet, hw_ste_p, header_re_write_actions_pointer,
+ re_write_index);
+}
+
+void mlx5dr_ste_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi)
+{
+ MLX5_SET(ste_general, hw_ste_p, next_table_base_63_48, gvmi);
+}
+
+void mlx5dr_ste_init(u8 *hw_ste_p, u8 lu_type, u8 entry_type,
+ u16 gvmi)
+{
+ MLX5_SET(ste_general, hw_ste_p, entry_type, entry_type);
+ MLX5_SET(ste_general, hw_ste_p, entry_sub_type, lu_type);
+ MLX5_SET(ste_general, hw_ste_p, next_lu_type, MLX5DR_STE_LU_TYPE_DONT_CARE);
+
+ /* Set GVMI once, this is the same for RX/TX
+ * bits 63_48 of next table base / miss address encode the next GVMI
+ */
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, gvmi, gvmi);
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, next_table_base_63_48, gvmi);
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_63_48, gvmi);
+}
+
+static void dr_ste_set_always_hit(struct dr_hw_ste_format *hw_ste)
+{
+ memset(&hw_ste->tag, 0, sizeof(hw_ste->tag));
+ memset(&hw_ste->mask, 0, sizeof(hw_ste->mask));
+}
+
+static void dr_ste_set_always_miss(struct dr_hw_ste_format *hw_ste)
+{
+ hw_ste->tag[0] = 0xdc;
+ hw_ste->mask[0] = 0;
+}
+
+u64 mlx5dr_ste_get_miss_addr(u8 *hw_ste)
+{
+ u64 index =
+ (MLX5_GET(ste_rx_steering_mult, hw_ste, miss_address_31_6) |
+ MLX5_GET(ste_rx_steering_mult, hw_ste, miss_address_39_32) << 26);
+
+ return index << 6;
+}
+
+void mlx5dr_ste_set_hit_addr(u8 *hw_ste, u64 icm_addr, u32 ht_size)
+{
+ u64 index = (icm_addr >> 5) | ht_size;
+
+ MLX5_SET(ste_general, hw_ste, next_table_base_39_32_size, index >> 27);
+ MLX5_SET(ste_general, hw_ste, next_table_base_31_5_size, index);
+}
+
+u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste)
+{
+ u32 index = ste - ste->htbl->ste_arr;
+
+ return ste->htbl->chunk->icm_addr + DR_STE_SIZE * index;
+}
+
+u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste)
+{
+ u32 index = ste - ste->htbl->ste_arr;
+
+ return ste->htbl->chunk->mr_addr + DR_STE_SIZE * index;
+}
+
+struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste)
+{
+ u32 index = ste - ste->htbl->ste_arr;
+
+ return &ste->htbl->miss_list[index];
+}
+
+static void dr_ste_always_hit_htbl(struct mlx5dr_ste *ste,
+ struct mlx5dr_ste_htbl *next_htbl)
+{
+ struct mlx5dr_icm_chunk *chunk = next_htbl->chunk;
+ u8 *hw_ste = ste->hw_ste;
+
+ MLX5_SET(ste_general, hw_ste, byte_mask, next_htbl->byte_mask);
+ MLX5_SET(ste_general, hw_ste, next_lu_type, next_htbl->lu_type);
+ mlx5dr_ste_set_hit_addr(hw_ste, chunk->icm_addr, chunk->num_of_entries);
+
+ dr_ste_set_always_hit((struct dr_hw_ste_format *)ste->hw_ste);
+}
+
+bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher,
+ u8 ste_location)
+{
+ return ste_location == nic_matcher->num_of_builders;
+}
+
+/* Replace relevant fields, except of:
+ * htbl - keep the origin htbl
+ * miss_list + list - already took the src from the list.
+ * icm_addr/mr_addr - depends on the hosting table.
+ *
+ * Before:
+ * | a | -> | b | -> | c | ->
+ *
+ * After:
+ * | a | -> | c | ->
+ * While the data that was in b copied to a.
+ */
+static void dr_ste_replace(struct mlx5dr_ste *dst, struct mlx5dr_ste *src)
+{
+ memcpy(dst->hw_ste, src->hw_ste, DR_STE_SIZE_REDUCED);
+ dst->next_htbl = src->next_htbl;
+ if (dst->next_htbl)
+ dst->next_htbl->pointing_ste = dst;
+
+ refcount_set(&dst->refcount, refcount_read(&src->refcount));
+
+ INIT_LIST_HEAD(&dst->rule_list);
+ list_splice_tail_init(&src->rule_list, &dst->rule_list);
+}
+
+/* Free ste which is the head and the only one in miss_list */
+static void
+dr_ste_remove_head_ste(struct mlx5dr_ste *ste,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_ste_send_info *ste_info_head,
+ struct list_head *send_ste_list,
+ struct mlx5dr_ste_htbl *stats_tbl)
+{
+ u8 tmp_data_ste[DR_STE_SIZE] = {};
+ struct mlx5dr_ste tmp_ste = {};
+ u64 miss_addr;
+
+ tmp_ste.hw_ste = tmp_data_ste;
+
+ /* Use temp ste because dr_ste_always_miss_addr
+ * touches bit_mask area which doesn't exist at ste->hw_ste.
+ */
+ memcpy(tmp_ste.hw_ste, ste->hw_ste, DR_STE_SIZE_REDUCED);
+ miss_addr = nic_matcher->e_anchor->chunk->icm_addr;
+ mlx5dr_ste_always_miss_addr(&tmp_ste, miss_addr);
+ memcpy(ste->hw_ste, tmp_ste.hw_ste, DR_STE_SIZE_REDUCED);
+
+ list_del_init(&ste->miss_list_node);
+
+ /* Write full STE size in order to have "always_miss" */
+ mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE,
+ 0, tmp_data_ste,
+ ste_info_head,
+ send_ste_list,
+ true /* Copy data */);
+
+ stats_tbl->ctrl.num_of_valid_entries--;
+}
+
+/* Free ste which is the head but NOT the only one in miss_list:
+ * |_ste_| --> |_next_ste_| -->|__| -->|__| -->/0
+ */
+static void
+dr_ste_replace_head_ste(struct mlx5dr_ste *ste, struct mlx5dr_ste *next_ste,
+ struct mlx5dr_ste_send_info *ste_info_head,
+ struct list_head *send_ste_list,
+ struct mlx5dr_ste_htbl *stats_tbl)
+
+{
+ struct mlx5dr_ste_htbl *next_miss_htbl;
+
+ next_miss_htbl = next_ste->htbl;
+
+ /* Remove from the miss_list the next_ste before copy */
+ list_del_init(&next_ste->miss_list_node);
+
+ /* All rule-members that use next_ste should know about that */
+ mlx5dr_rule_update_rule_member(next_ste, ste);
+
+ /* Move data from next into ste */
+ dr_ste_replace(ste, next_ste);
+
+ /* Del the htbl that contains the next_ste.
+ * The origin htbl stay with the same number of entries.
+ */
+ mlx5dr_htbl_put(next_miss_htbl);
+
+ mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE_REDUCED,
+ 0, ste->hw_ste,
+ ste_info_head,
+ send_ste_list,
+ true /* Copy data */);
+
+ stats_tbl->ctrl.num_of_collisions--;
+ stats_tbl->ctrl.num_of_valid_entries--;
+}
+
+/* Free ste that is located in the middle of the miss list:
+ * |__| -->|_prev_ste_|->|_ste_|-->|_next_ste_|
+ */
+static void dr_ste_remove_middle_ste(struct mlx5dr_ste *ste,
+ struct mlx5dr_ste_send_info *ste_info,
+ struct list_head *send_ste_list,
+ struct mlx5dr_ste_htbl *stats_tbl)
+{
+ struct mlx5dr_ste *prev_ste;
+ u64 miss_addr;
+
+ prev_ste = list_entry(mlx5dr_ste_get_miss_list(ste)->prev, struct mlx5dr_ste,
+ miss_list_node);
+ if (!prev_ste) {
+ WARN_ON(true);
+ return;
+ }
+
+ miss_addr = mlx5dr_ste_get_miss_addr(ste->hw_ste);
+ mlx5dr_ste_set_miss_addr(prev_ste->hw_ste, miss_addr);
+
+ mlx5dr_send_fill_and_append_ste_send_info(prev_ste, DR_STE_SIZE_REDUCED, 0,
+ prev_ste->hw_ste, ste_info,
+ send_ste_list, true /* Copy data*/);
+
+ list_del_init(&ste->miss_list_node);
+
+ stats_tbl->ctrl.num_of_valid_entries--;
+ stats_tbl->ctrl.num_of_collisions--;
+}
+
+void mlx5dr_ste_free(struct mlx5dr_ste *ste,
+ struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher)
+{
+ struct mlx5dr_ste_send_info *cur_ste_info, *tmp_ste_info;
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_ste_send_info ste_info_head;
+ struct mlx5dr_ste *next_ste, *first_ste;
+ bool put_on_origin_table = true;
+ struct mlx5dr_ste_htbl *stats_tbl;
+ LIST_HEAD(send_ste_list);
+
+ first_ste = list_entry(mlx5dr_ste_get_miss_list(ste)->next,
+ struct mlx5dr_ste, miss_list_node);
+ stats_tbl = first_ste->htbl;
+
+ /* Two options:
+ * 1. ste is head:
+ * a. head ste is the only ste in the miss list
+ * b. head ste is not the only ste in the miss-list
+ * 2. ste is not head
+ */
+ if (first_ste == ste) { /* Ste is the head */
+ struct mlx5dr_ste *last_ste;
+
+ last_ste = list_last_entry(mlx5dr_ste_get_miss_list(ste),
+ struct mlx5dr_ste, miss_list_node);
+ if (last_ste == first_ste)
+ next_ste = NULL;
+ else
+ next_ste = list_entry(ste->miss_list_node.next,
+ struct mlx5dr_ste, miss_list_node);
+
+ if (!next_ste) {
+ /* One and only entry in the list */
+ dr_ste_remove_head_ste(ste, nic_matcher,
+ &ste_info_head,
+ &send_ste_list,
+ stats_tbl);
+ } else {
+ /* First but not only entry in the list */
+ dr_ste_replace_head_ste(ste, next_ste, &ste_info_head,
+ &send_ste_list, stats_tbl);
+ put_on_origin_table = false;
+ }
+ } else { /* Ste in the middle of the list */
+ dr_ste_remove_middle_ste(ste, &ste_info_head, &send_ste_list, stats_tbl);
+ }
+
+ /* Update HW */
+ list_for_each_entry_safe(cur_ste_info, tmp_ste_info,
+ &send_ste_list, send_list) {
+ list_del(&cur_ste_info->send_list);
+ mlx5dr_send_postsend_ste(dmn, cur_ste_info->ste,
+ cur_ste_info->data, cur_ste_info->size,
+ cur_ste_info->offset);
+ }
+
+ if (put_on_origin_table)
+ mlx5dr_htbl_put(ste->htbl);
+}
+
+bool mlx5dr_ste_equal_tag(void *src, void *dst)
+{
+ struct dr_hw_ste_format *s_hw_ste = (struct dr_hw_ste_format *)src;
+ struct dr_hw_ste_format *d_hw_ste = (struct dr_hw_ste_format *)dst;
+
+ return !memcmp(s_hw_ste->tag, d_hw_ste->tag, DR_STE_SIZE_TAG);
+}
+
+void mlx5dr_ste_set_hit_addr_by_next_htbl(u8 *hw_ste,
+ struct mlx5dr_ste_htbl *next_htbl)
+{
+ struct mlx5dr_icm_chunk *chunk = next_htbl->chunk;
+
+ mlx5dr_ste_set_hit_addr(hw_ste, chunk->icm_addr, chunk->num_of_entries);
+}
+
+void mlx5dr_ste_set_miss_addr(u8 *hw_ste_p, u64 miss_addr)
+{
+ u64 index = miss_addr >> 6;
+
+ /* Miss address for TX and RX STEs located in the same offsets */
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_39_32, index >> 26);
+ MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_31_6, index);
+}
+
+void mlx5dr_ste_always_miss_addr(struct mlx5dr_ste *ste, u64 miss_addr)
+{
+ u8 *hw_ste = ste->hw_ste;
+
+ MLX5_SET(ste_rx_steering_mult, hw_ste, next_lu_type, MLX5DR_STE_LU_TYPE_DONT_CARE);
+ mlx5dr_ste_set_miss_addr(hw_ste, miss_addr);
+ dr_ste_set_always_miss((struct dr_hw_ste_format *)ste->hw_ste);
+}
+
+/* The assumption here is that we don't update the ste->hw_ste if it is not
+ * used ste, so it will be all zero, checking the next_lu_type.
+ */
+bool mlx5dr_ste_is_not_valid_entry(u8 *p_hw_ste)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)p_hw_ste;
+
+ if (MLX5_GET(ste_general, hw_ste, next_lu_type) ==
+ MLX5DR_STE_LU_TYPE_NOP)
+ return true;
+
+ return false;
+}
+
+bool mlx5dr_ste_not_used_ste(struct mlx5dr_ste *ste)
+{
+ return !refcount_read(&ste->refcount);
+}
+
+static u16 get_bits_per_mask(u16 byte_mask)
+{
+ u16 bits = 0;
+
+ while (byte_mask) {
+ byte_mask = byte_mask & (byte_mask - 1);
+ bits++;
+ }
+
+ return bits;
+}
+
+/* Init one ste as a pattern for ste data array */
+void mlx5dr_ste_set_formatted_ste(u16 gvmi,
+ struct mlx5dr_domain_rx_tx *nic_dmn,
+ struct mlx5dr_ste_htbl *htbl,
+ u8 *formatted_ste,
+ struct mlx5dr_htbl_connect_info *connect_info)
+{
+ struct mlx5dr_ste ste = {};
+
+ mlx5dr_ste_init(formatted_ste, htbl->lu_type, nic_dmn->ste_type, gvmi);
+ ste.hw_ste = formatted_ste;
+
+ if (connect_info->type == CONNECT_HIT)
+ dr_ste_always_hit_htbl(&ste, connect_info->hit_next_htbl);
+ else
+ mlx5dr_ste_always_miss_addr(&ste, connect_info->miss_icm_addr);
+}
+
+int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn,
+ struct mlx5dr_domain_rx_tx *nic_dmn,
+ struct mlx5dr_ste_htbl *htbl,
+ struct mlx5dr_htbl_connect_info *connect_info,
+ bool update_hw_ste)
+{
+ u8 formatted_ste[DR_STE_SIZE] = {};
+
+ mlx5dr_ste_set_formatted_ste(dmn->info.caps.gvmi,
+ nic_dmn,
+ htbl,
+ formatted_ste,
+ connect_info);
+
+ return mlx5dr_send_postsend_formatted_htbl(dmn, htbl, formatted_ste, update_hw_ste);
+}
+
+int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_ste *ste,
+ u8 *cur_hw_ste,
+ enum mlx5dr_icm_chunk_size log_table_size)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)cur_hw_ste;
+ struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_htbl_connect_info info;
+ struct mlx5dr_ste_htbl *next_htbl;
+
+ if (!mlx5dr_ste_is_last_in_rule(nic_matcher, ste->ste_chain_location)) {
+ u32 bits_in_mask;
+ u8 next_lu_type;
+ u16 byte_mask;
+
+ next_lu_type = MLX5_GET(ste_general, hw_ste, next_lu_type);
+ byte_mask = MLX5_GET(ste_general, hw_ste, byte_mask);
+
+ /* Don't allocate table more than required,
+ * the size of the table defined via the byte_mask, so no need
+ * to allocate more than that.
+ */
+ bits_in_mask = get_bits_per_mask(byte_mask) * BITS_PER_BYTE;
+ log_table_size = min(log_table_size, bits_in_mask);
+
+ next_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
+ log_table_size,
+ next_lu_type,
+ byte_mask);
+ if (!next_htbl) {
+ mlx5dr_dbg(dmn, "Failed allocating table\n");
+ return -ENOMEM;
+ }
+
+ /* Write new table to HW */
+ info.type = CONNECT_MISS;
+ info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr;
+ if (mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, next_htbl,
+ &info, false)) {
+ mlx5dr_info(dmn, "Failed writing table to HW\n");
+ goto free_table;
+ }
+
+ mlx5dr_ste_set_hit_addr_by_next_htbl(cur_hw_ste, next_htbl);
+ ste->next_htbl = next_htbl;
+ next_htbl->pointing_ste = ste;
+ }
+
+ return 0;
+
+free_table:
+ mlx5dr_ste_htbl_free(next_htbl);
+ return -ENOENT;
+}
+
+static void dr_ste_set_ctrl(struct mlx5dr_ste_htbl *htbl)
+{
+ struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl;
+ int num_of_entries;
+
+ htbl->ctrl.may_grow = true;
+
+ if (htbl->chunk_size == DR_CHUNK_SIZE_MAX - 1)
+ htbl->ctrl.may_grow = false;
+
+ /* Threshold is 50%, one is added to table of size 1 */
+ num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(htbl->chunk_size);
+ ctrl->increase_threshold = (num_of_entries + 1) / 2;
+}
+
+struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool,
+ enum mlx5dr_icm_chunk_size chunk_size,
+ u8 lu_type, u16 byte_mask)
+{
+ struct mlx5dr_icm_chunk *chunk;
+ struct mlx5dr_ste_htbl *htbl;
+ int i;
+
+ htbl = kzalloc(sizeof(*htbl), GFP_KERNEL);
+ if (!htbl)
+ return NULL;
+
+ chunk = mlx5dr_icm_alloc_chunk(pool, chunk_size);
+ if (!chunk)
+ goto out_free_htbl;
+
+ htbl->chunk = chunk;
+ htbl->lu_type = lu_type;
+ htbl->byte_mask = byte_mask;
+ htbl->ste_arr = chunk->ste_arr;
+ htbl->hw_ste_arr = chunk->hw_ste_arr;
+ htbl->miss_list = chunk->miss_list;
+ refcount_set(&htbl->refcount, 0);
+
+ for (i = 0; i < chunk->num_of_entries; i++) {
+ struct mlx5dr_ste *ste = &htbl->ste_arr[i];
+
+ ste->hw_ste = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
+ ste->htbl = htbl;
+ refcount_set(&ste->refcount, 0);
+ INIT_LIST_HEAD(&ste->miss_list_node);
+ INIT_LIST_HEAD(&htbl->miss_list[i]);
+ INIT_LIST_HEAD(&ste->rule_list);
+ }
+
+ htbl->chunk_size = chunk_size;
+ dr_ste_set_ctrl(htbl);
+ return htbl;
+
+out_free_htbl:
+ kfree(htbl);
+ return NULL;
+}
+
+int mlx5dr_ste_htbl_free(struct mlx5dr_ste_htbl *htbl)
+{
+ if (refcount_read(&htbl->refcount))
+ return -EBUSY;
+
+ mlx5dr_icm_free_chunk(htbl->chunk);
+ kfree(htbl);
+ return 0;
+}
+
+int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn,
+ u8 match_criteria,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_match_param *value)
+{
+ if (!value && (match_criteria & DR_MATCHER_CRITERIA_MISC)) {
+ if (mask->misc.source_port && mask->misc.source_port != 0xffff) {
+ mlx5dr_dbg(dmn, "Partial mask source_port is not supported\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_match_param *value,
+ u8 *ste_arr)
+{
+ struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+ struct mlx5dr_domain *dmn = matcher->tbl->dmn;
+ struct mlx5dr_ste_build *sb;
+ int ret, i;
+
+ ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria,
+ &matcher->mask, value);
+ if (ret)
+ return ret;
+
+ sb = nic_matcher->ste_builder;
+ for (i = 0; i < nic_matcher->num_of_builders; i++) {
+ mlx5dr_ste_init(ste_arr,
+ sb->lu_type,
+ nic_dmn->ste_type,
+ dmn->info.caps.gvmi);
+
+ mlx5dr_ste_set_bit_mask(ste_arr, sb->bit_mask);
+
+ ret = sb->ste_build_tag_func(value, sb, ste_arr);
+ if (ret)
+ return ret;
+
+ /* Connect the STEs */
+ if (i < (nic_matcher->num_of_builders - 1)) {
+ /* Need the next builder for these fields,
+ * not relevant for the last ste in the chain.
+ */
+ sb++;
+ MLX5_SET(ste_general, ste_arr, next_lu_type, sb->lu_type);
+ MLX5_SET(ste_general, ste_arr, byte_mask, sb->byte_mask);
+ }
+ ste_arr += DR_STE_SIZE;
+ }
+ return 0;
+}
+
+static int dr_ste_build_eth_l2_src_des_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, dmac_47_16, mask, dmac_47_16);
+ DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, dmac_15_0, mask, dmac_15_0);
+
+ if (mask->smac_47_16 || mask->smac_15_0) {
+ MLX5_SET(ste_eth_l2_src_dst, bit_mask, smac_47_32,
+ mask->smac_47_16 >> 16);
+ MLX5_SET(ste_eth_l2_src_dst, bit_mask, smac_31_0,
+ mask->smac_47_16 << 16 | mask->smac_15_0);
+ mask->smac_47_16 = 0;
+ mask->smac_15_0 = 0;
+ }
+
+ DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, first_vlan_id, mask, first_vid);
+ DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, first_cfi, mask, first_cfi);
+ DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, first_priority, mask, first_prio);
+ DR_STE_SET_MASK(eth_l2_src_dst, bit_mask, l3_type, mask, ip_version);
+
+ if (mask->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1);
+ mask->cvlan_tag = 0;
+ } else if (mask->svlan_tag) {
+ MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1);
+ mask->svlan_tag = 0;
+ }
+
+ if (mask->cvlan_tag || mask->svlan_tag) {
+ pr_info("Invalid c/svlan mask configuration\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec)
+{
+ spec->gre_c_present = MLX5_GET(fte_match_set_misc, mask, gre_c_present);
+ spec->gre_k_present = MLX5_GET(fte_match_set_misc, mask, gre_k_present);
+ spec->gre_s_present = MLX5_GET(fte_match_set_misc, mask, gre_s_present);
+ spec->source_vhca_port = MLX5_GET(fte_match_set_misc, mask, source_vhca_port);
+ spec->source_sqn = MLX5_GET(fte_match_set_misc, mask, source_sqn);
+
+ spec->source_port = MLX5_GET(fte_match_set_misc, mask, source_port);
+
+ spec->outer_second_prio = MLX5_GET(fte_match_set_misc, mask, outer_second_prio);
+ spec->outer_second_cfi = MLX5_GET(fte_match_set_misc, mask, outer_second_cfi);
+ spec->outer_second_vid = MLX5_GET(fte_match_set_misc, mask, outer_second_vid);
+ spec->inner_second_prio = MLX5_GET(fte_match_set_misc, mask, inner_second_prio);
+ spec->inner_second_cfi = MLX5_GET(fte_match_set_misc, mask, inner_second_cfi);
+ spec->inner_second_vid = MLX5_GET(fte_match_set_misc, mask, inner_second_vid);
+
+ spec->outer_second_cvlan_tag =
+ MLX5_GET(fte_match_set_misc, mask, outer_second_cvlan_tag);
+ spec->inner_second_cvlan_tag =
+ MLX5_GET(fte_match_set_misc, mask, inner_second_cvlan_tag);
+ spec->outer_second_svlan_tag =
+ MLX5_GET(fte_match_set_misc, mask, outer_second_svlan_tag);
+ spec->inner_second_svlan_tag =
+ MLX5_GET(fte_match_set_misc, mask, inner_second_svlan_tag);
+
+ spec->gre_protocol = MLX5_GET(fte_match_set_misc, mask, gre_protocol);
+
+ spec->gre_key_h = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.hi);
+ spec->gre_key_l = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.lo);
+
+ spec->vxlan_vni = MLX5_GET(fte_match_set_misc, mask, vxlan_vni);
+
+ spec->geneve_vni = MLX5_GET(fte_match_set_misc, mask, geneve_vni);
+ spec->geneve_oam = MLX5_GET(fte_match_set_misc, mask, geneve_oam);
+
+ spec->outer_ipv6_flow_label =
+ MLX5_GET(fte_match_set_misc, mask, outer_ipv6_flow_label);
+
+ spec->inner_ipv6_flow_label =
+ MLX5_GET(fte_match_set_misc, mask, inner_ipv6_flow_label);
+
+ spec->geneve_opt_len = MLX5_GET(fte_match_set_misc, mask, geneve_opt_len);
+ spec->geneve_protocol_type =
+ MLX5_GET(fte_match_set_misc, mask, geneve_protocol_type);
+
+ spec->bth_dst_qp = MLX5_GET(fte_match_set_misc, mask, bth_dst_qp);
+}
+
+static void dr_ste_copy_mask_spec(char *mask, struct mlx5dr_match_spec *spec)
+{
+ u32 raw_ip[4];
+
+ spec->smac_47_16 = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_47_16);
+
+ spec->smac_15_0 = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_15_0);
+ spec->ethertype = MLX5_GET(fte_match_set_lyr_2_4, mask, ethertype);
+
+ spec->dmac_47_16 = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_47_16);
+
+ spec->dmac_15_0 = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_15_0);
+ spec->first_prio = MLX5_GET(fte_match_set_lyr_2_4, mask, first_prio);
+ spec->first_cfi = MLX5_GET(fte_match_set_lyr_2_4, mask, first_cfi);
+ spec->first_vid = MLX5_GET(fte_match_set_lyr_2_4, mask, first_vid);
+
+ spec->ip_protocol = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_protocol);
+ spec->ip_dscp = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_dscp);
+ spec->ip_ecn = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_ecn);
+ spec->cvlan_tag = MLX5_GET(fte_match_set_lyr_2_4, mask, cvlan_tag);
+ spec->svlan_tag = MLX5_GET(fte_match_set_lyr_2_4, mask, svlan_tag);
+ spec->frag = MLX5_GET(fte_match_set_lyr_2_4, mask, frag);
+ spec->ip_version = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_version);
+ spec->tcp_flags = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_flags);
+ spec->tcp_sport = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_sport);
+ spec->tcp_dport = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_dport);
+
+ spec->ttl_hoplimit = MLX5_GET(fte_match_set_lyr_2_4, mask, ttl_hoplimit);
+
+ spec->udp_sport = MLX5_GET(fte_match_set_lyr_2_4, mask, udp_sport);
+ spec->udp_dport = MLX5_GET(fte_match_set_lyr_2_4, mask, udp_dport);
+
+ memcpy(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ sizeof(raw_ip));
+
+ spec->src_ip_127_96 = be32_to_cpu(raw_ip[0]);
+ spec->src_ip_95_64 = be32_to_cpu(raw_ip[1]);
+ spec->src_ip_63_32 = be32_to_cpu(raw_ip[2]);
+ spec->src_ip_31_0 = be32_to_cpu(raw_ip[3]);
+
+ memcpy(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ sizeof(raw_ip));
+
+ spec->dst_ip_127_96 = be32_to_cpu(raw_ip[0]);
+ spec->dst_ip_95_64 = be32_to_cpu(raw_ip[1]);
+ spec->dst_ip_63_32 = be32_to_cpu(raw_ip[2]);
+ spec->dst_ip_31_0 = be32_to_cpu(raw_ip[3]);
+}
+
+static void dr_ste_copy_mask_misc2(char *mask, struct mlx5dr_match_misc2 *spec)
+{
+ spec->outer_first_mpls_label =
+ MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_label);
+ spec->outer_first_mpls_exp =
+ MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_exp);
+ spec->outer_first_mpls_s_bos =
+ MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_s_bos);
+ spec->outer_first_mpls_ttl =
+ MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_ttl);
+ spec->inner_first_mpls_label =
+ MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_label);
+ spec->inner_first_mpls_exp =
+ MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_exp);
+ spec->inner_first_mpls_s_bos =
+ MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_s_bos);
+ spec->inner_first_mpls_ttl =
+ MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_ttl);
+ spec->outer_first_mpls_over_gre_label =
+ MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_label);
+ spec->outer_first_mpls_over_gre_exp =
+ MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_exp);
+ spec->outer_first_mpls_over_gre_s_bos =
+ MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_s_bos);
+ spec->outer_first_mpls_over_gre_ttl =
+ MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_ttl);
+ spec->outer_first_mpls_over_udp_label =
+ MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_label);
+ spec->outer_first_mpls_over_udp_exp =
+ MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_exp);
+ spec->outer_first_mpls_over_udp_s_bos =
+ MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_s_bos);
+ spec->outer_first_mpls_over_udp_ttl =
+ MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_ttl);
+ spec->metadata_reg_c_7 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_7);
+ spec->metadata_reg_c_6 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_6);
+ spec->metadata_reg_c_5 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_5);
+ spec->metadata_reg_c_4 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_4);
+ spec->metadata_reg_c_3 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_3);
+ spec->metadata_reg_c_2 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_2);
+ spec->metadata_reg_c_1 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_1);
+ spec->metadata_reg_c_0 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_0);
+ spec->metadata_reg_a = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_a);
+ spec->metadata_reg_b = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_b);
+}
+
+static void dr_ste_copy_mask_misc3(char *mask, struct mlx5dr_match_misc3 *spec)
+{
+ spec->inner_tcp_seq_num = MLX5_GET(fte_match_set_misc3, mask, inner_tcp_seq_num);
+ spec->outer_tcp_seq_num = MLX5_GET(fte_match_set_misc3, mask, outer_tcp_seq_num);
+ spec->inner_tcp_ack_num = MLX5_GET(fte_match_set_misc3, mask, inner_tcp_ack_num);
+ spec->outer_tcp_ack_num = MLX5_GET(fte_match_set_misc3, mask, outer_tcp_ack_num);
+ spec->outer_vxlan_gpe_vni =
+ MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_vni);
+ spec->outer_vxlan_gpe_next_protocol =
+ MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_next_protocol);
+ spec->outer_vxlan_gpe_flags =
+ MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_flags);
+ spec->icmpv4_header_data = MLX5_GET(fte_match_set_misc3, mask, icmp_header_data);
+ spec->icmpv6_header_data =
+ MLX5_GET(fte_match_set_misc3, mask, icmpv6_header_data);
+ spec->icmpv4_type = MLX5_GET(fte_match_set_misc3, mask, icmp_type);
+ spec->icmpv4_code = MLX5_GET(fte_match_set_misc3, mask, icmp_code);
+ spec->icmpv6_type = MLX5_GET(fte_match_set_misc3, mask, icmpv6_type);
+ spec->icmpv6_code = MLX5_GET(fte_match_set_misc3, mask, icmpv6_code);
+}
+
+void mlx5dr_ste_copy_param(u8 match_criteria,
+ struct mlx5dr_match_param *set_param,
+ struct mlx5dr_match_parameters *mask)
+{
+ u8 tail_param[MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)] = {};
+ u8 *data = (u8 *)mask->match_buf;
+ size_t param_location;
+ void *buff;
+
+ if (match_criteria & DR_MATCHER_CRITERIA_OUTER) {
+ if (mask->match_sz < sizeof(struct mlx5dr_match_spec)) {
+ memcpy(tail_param, data, mask->match_sz);
+ buff = tail_param;
+ } else {
+ buff = mask->match_buf;
+ }
+ dr_ste_copy_mask_spec(buff, &set_param->outer);
+ }
+ param_location = sizeof(struct mlx5dr_match_spec);
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC) {
+ if (mask->match_sz < param_location +
+ sizeof(struct mlx5dr_match_misc)) {
+ memcpy(tail_param, data + param_location,
+ mask->match_sz - param_location);
+ buff = tail_param;
+ } else {
+ buff = data + param_location;
+ }
+ dr_ste_copy_mask_misc(buff, &set_param->misc);
+ }
+ param_location += sizeof(struct mlx5dr_match_misc);
+
+ if (match_criteria & DR_MATCHER_CRITERIA_INNER) {
+ if (mask->match_sz < param_location +
+ sizeof(struct mlx5dr_match_spec)) {
+ memcpy(tail_param, data + param_location,
+ mask->match_sz - param_location);
+ buff = tail_param;
+ } else {
+ buff = data + param_location;
+ }
+ dr_ste_copy_mask_spec(buff, &set_param->inner);
+ }
+ param_location += sizeof(struct mlx5dr_match_spec);
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC2) {
+ if (mask->match_sz < param_location +
+ sizeof(struct mlx5dr_match_misc2)) {
+ memcpy(tail_param, data + param_location,
+ mask->match_sz - param_location);
+ buff = tail_param;
+ } else {
+ buff = data + param_location;
+ }
+ dr_ste_copy_mask_misc2(buff, &set_param->misc2);
+ }
+
+ param_location += sizeof(struct mlx5dr_match_misc2);
+
+ if (match_criteria & DR_MATCHER_CRITERIA_MISC3) {
+ if (mask->match_sz < param_location +
+ sizeof(struct mlx5dr_match_misc3)) {
+ memcpy(tail_param, data + param_location,
+ mask->match_sz - param_location);
+ buff = tail_param;
+ } else {
+ buff = data + param_location;
+ }
+ dr_ste_copy_mask_misc3(buff, &set_param->misc3);
+ }
+}
+
+static int dr_ste_build_eth_l2_src_des_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ u8 *tag = hw_ste->tag;
+
+ DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_47_16, spec, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_15_0, spec, dmac_15_0);
+
+ if (spec->smac_47_16 || spec->smac_15_0) {
+ MLX5_SET(ste_eth_l2_src_dst, tag, smac_47_32,
+ spec->smac_47_16 >> 16);
+ MLX5_SET(ste_eth_l2_src_dst, tag, smac_31_0,
+ spec->smac_47_16 << 16 | spec->smac_15_0);
+ spec->smac_47_16 = 0;
+ spec->smac_15_0 = 0;
+ }
+
+ if (spec->ip_version) {
+ if (spec->ip_version == IP_VERSION_IPV4) {
+ MLX5_SET(ste_eth_l2_src_dst, tag, l3_type, STE_IPV4);
+ spec->ip_version = 0;
+ } else if (spec->ip_version == IP_VERSION_IPV6) {
+ MLX5_SET(ste_eth_l2_src_dst, tag, l3_type, STE_IPV6);
+ spec->ip_version = 0;
+ } else {
+ pr_info("Unsupported ip_version value\n");
+ return -EINVAL;
+ }
+ }
+
+ DR_STE_SET_TAG(eth_l2_src_dst, tag, first_vlan_id, spec, first_vid);
+ DR_STE_SET_TAG(eth_l2_src_dst, tag, first_cfi, spec, first_cfi);
+ DR_STE_SET_TAG(eth_l2_src_dst, tag, first_priority, spec, first_prio);
+
+ if (spec->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src_dst, tag, first_vlan_qualifier, DR_STE_CVLAN);
+ spec->cvlan_tag = 0;
+ } else if (spec->svlan_tag) {
+ MLX5_SET(ste_eth_l2_src_dst, tag, first_vlan_qualifier, DR_STE_SVLAN);
+ spec->svlan_tag = 0;
+ }
+ return 0;
+}
+
+int mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ int ret;
+
+ ret = dr_ste_build_eth_l2_src_des_bit_mask(mask, inner, sb->bit_mask);
+ if (ret)
+ return ret;
+
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC_DST, rx, inner);
+ sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_build_eth_l2_src_des_tag;
+
+ return 0;
+}
+
+static void dr_ste_build_eth_l3_ipv6_dst_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_127_96, mask, dst_ip_127_96);
+ DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_95_64, mask, dst_ip_95_64);
+ DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_63_32, mask, dst_ip_63_32);
+ DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_31_0, mask, dst_ip_31_0);
+}
+
+static int dr_ste_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+ u8 *tag = hw_ste->tag;
+
+ DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_127_96, spec, dst_ip_127_96);
+ DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_95_64, spec, dst_ip_95_64);
+ DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_63_32, spec, dst_ip_63_32);
+ DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_31_0, spec, dst_ip_31_0);
+
+ return 0;
+}
+
+void mlx5dr_ste_build_eth_l3_ipv6_dst(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ dr_ste_build_eth_l3_ipv6_dst_bit_mask(mask, inner, sb->bit_mask);
+
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV6_DST, rx, inner);
+ sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv6_dst_tag;
+}
+
+static void dr_ste_build_eth_l3_ipv6_src_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_127_96, mask, src_ip_127_96);
+ DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_95_64, mask, src_ip_95_64);
+ DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_63_32, mask, src_ip_63_32);
+ DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_31_0, mask, src_ip_31_0);
+}
+
+static int dr_ste_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ u8 *tag = hw_ste->tag;
+
+ DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_127_96, spec, src_ip_127_96);
+ DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_95_64, spec, src_ip_95_64);
+ DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_63_32, spec, src_ip_63_32);
+ DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_31_0, spec, src_ip_31_0);
+
+ return 0;
+}
+
+void mlx5dr_ste_build_eth_l3_ipv6_src(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ dr_ste_build_eth_l3_ipv6_src_bit_mask(mask, inner, sb->bit_mask);
+
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV6_SRC, rx, inner);
+ sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv6_src_tag;
+}
+
+static void dr_ste_build_eth_l3_ipv4_5_tuple_bit_mask(struct mlx5dr_match_param *value,
+ bool inner,
+ u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
+ destination_address, mask, dst_ip_31_0);
+ DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
+ source_address, mask, src_ip_31_0);
+ DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
+ destination_port, mask, tcp_dport);
+ DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
+ destination_port, mask, udp_dport);
+ DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
+ source_port, mask, tcp_sport);
+ DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
+ source_port, mask, udp_sport);
+ DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
+ protocol, mask, ip_protocol);
+ DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
+ fragmented, mask, frag);
+ DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
+ dscp, mask, ip_dscp);
+ DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask,
+ ecn, mask, ip_ecn);
+
+ if (mask->tcp_flags) {
+ DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple, bit_mask, mask);
+ mask->tcp_flags = 0;
+ }
+}
+
+static int dr_ste_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+ u8 *tag = hw_ste->tag;
+
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_address, spec, dst_ip_31_0);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_address, spec, src_ip_31_0);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_port, spec, tcp_dport);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_port, spec, udp_dport);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_port, spec, tcp_sport);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_port, spec, udp_sport);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, protocol, spec, ip_protocol);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, fragmented, spec, frag);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, dscp, spec, ip_dscp);
+ DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, ecn, spec, ip_ecn);
+
+ if (spec->tcp_flags) {
+ DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple, tag, spec);
+ spec->tcp_flags = 0;
+ }
+
+ return 0;
+}
+
+void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ dr_ste_build_eth_l3_ipv4_5_tuple_bit_mask(mask, inner, sb->bit_mask);
+
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV4_5_TUPLE, rx, inner);
+ sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv4_5_tuple_tag;
+}
+
+static void
+dr_ste_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc_mask = &value->misc;
+
+ DR_STE_SET_MASK_V(eth_l2_src, bit_mask, first_vlan_id, mask, first_vid);
+ DR_STE_SET_MASK_V(eth_l2_src, bit_mask, first_cfi, mask, first_cfi);
+ DR_STE_SET_MASK_V(eth_l2_src, bit_mask, first_priority, mask, first_prio);
+ DR_STE_SET_MASK_V(eth_l2_src, bit_mask, ip_fragmented, mask, frag);
+ DR_STE_SET_MASK_V(eth_l2_src, bit_mask, l3_ethertype, mask, ethertype);
+ DR_STE_SET_MASK(eth_l2_src, bit_mask, l3_type, mask, ip_version);
+
+ if (mask->svlan_tag || mask->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src, bit_mask, first_vlan_qualifier, -1);
+ mask->cvlan_tag = 0;
+ mask->svlan_tag = 0;
+ }
+
+ if (inner) {
+ if (misc_mask->inner_second_cvlan_tag ||
+ misc_mask->inner_second_svlan_tag) {
+ MLX5_SET(ste_eth_l2_src, bit_mask, second_vlan_qualifier, -1);
+ misc_mask->inner_second_cvlan_tag = 0;
+ misc_mask->inner_second_svlan_tag = 0;
+ }
+
+ DR_STE_SET_MASK_V(eth_l2_src, bit_mask,
+ second_vlan_id, misc_mask, inner_second_vid);
+ DR_STE_SET_MASK_V(eth_l2_src, bit_mask,
+ second_cfi, misc_mask, inner_second_cfi);
+ DR_STE_SET_MASK_V(eth_l2_src, bit_mask,
+ second_priority, misc_mask, inner_second_prio);
+ } else {
+ if (misc_mask->outer_second_cvlan_tag ||
+ misc_mask->outer_second_svlan_tag) {
+ MLX5_SET(ste_eth_l2_src, bit_mask, second_vlan_qualifier, -1);
+ misc_mask->outer_second_cvlan_tag = 0;
+ misc_mask->outer_second_svlan_tag = 0;
+ }
+
+ DR_STE_SET_MASK_V(eth_l2_src, bit_mask,
+ second_vlan_id, misc_mask, outer_second_vid);
+ DR_STE_SET_MASK_V(eth_l2_src, bit_mask,
+ second_cfi, misc_mask, outer_second_cfi);
+ DR_STE_SET_MASK_V(eth_l2_src, bit_mask,
+ second_priority, misc_mask, outer_second_prio);
+ }
+}
+
+static int dr_ste_build_eth_l2_src_or_dst_tag(struct mlx5dr_match_param *value,
+ bool inner, u8 *hw_ste_p)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ struct mlx5dr_match_spec *spec = inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc_spec = &value->misc;
+ u8 *tag = hw_ste->tag;
+
+ DR_STE_SET_TAG(eth_l2_src, tag, first_vlan_id, spec, first_vid);
+ DR_STE_SET_TAG(eth_l2_src, tag, first_cfi, spec, first_cfi);
+ DR_STE_SET_TAG(eth_l2_src, tag, first_priority, spec, first_prio);
+ DR_STE_SET_TAG(eth_l2_src, tag, ip_fragmented, spec, frag);
+ DR_STE_SET_TAG(eth_l2_src, tag, l3_ethertype, spec, ethertype);
+
+ if (spec->ip_version) {
+ if (spec->ip_version == IP_VERSION_IPV4) {
+ MLX5_SET(ste_eth_l2_src, tag, l3_type, STE_IPV4);
+ spec->ip_version = 0;
+ } else if (spec->ip_version == IP_VERSION_IPV6) {
+ MLX5_SET(ste_eth_l2_src, tag, l3_type, STE_IPV6);
+ spec->ip_version = 0;
+ } else {
+ pr_info("Unsupported ip_version value\n");
+ return -EINVAL;
+ }
+ }
+
+ if (spec->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src, tag, first_vlan_qualifier, DR_STE_CVLAN);
+ spec->cvlan_tag = 0;
+ } else if (spec->svlan_tag) {
+ MLX5_SET(ste_eth_l2_src, tag, first_vlan_qualifier, DR_STE_SVLAN);
+ spec->svlan_tag = 0;
+ }
+
+ if (inner) {
+ if (misc_spec->inner_second_cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_CVLAN);
+ misc_spec->inner_second_cvlan_tag = 0;
+ } else if (misc_spec->inner_second_svlan_tag) {
+ MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_SVLAN);
+ misc_spec->inner_second_svlan_tag = 0;
+ }
+
+ DR_STE_SET_TAG(eth_l2_src, tag, second_vlan_id, misc_spec, inner_second_vid);
+ DR_STE_SET_TAG(eth_l2_src, tag, second_cfi, misc_spec, inner_second_cfi);
+ DR_STE_SET_TAG(eth_l2_src, tag, second_priority, misc_spec, inner_second_prio);
+ } else {
+ if (misc_spec->outer_second_cvlan_tag) {
+ MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_CVLAN);
+ misc_spec->outer_second_cvlan_tag = 0;
+ } else if (misc_spec->outer_second_svlan_tag) {
+ MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_SVLAN);
+ misc_spec->outer_second_svlan_tag = 0;
+ }
+ DR_STE_SET_TAG(eth_l2_src, tag, second_vlan_id, misc_spec, outer_second_vid);
+ DR_STE_SET_TAG(eth_l2_src, tag, second_cfi, misc_spec, outer_second_cfi);
+ DR_STE_SET_TAG(eth_l2_src, tag, second_priority, misc_spec, outer_second_prio);
+ }
+
+ return 0;
+}
+
+static void dr_ste_build_eth_l2_src_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_MASK_V(eth_l2_src, bit_mask, smac_47_16, mask, smac_47_16);
+ DR_STE_SET_MASK_V(eth_l2_src, bit_mask, smac_15_0, mask, smac_15_0);
+
+ dr_ste_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask);
+}
+
+static int dr_ste_build_eth_l2_src_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+ u8 *tag = hw_ste->tag;
+
+ DR_STE_SET_TAG(eth_l2_src, tag, smac_47_16, spec, smac_47_16);
+ DR_STE_SET_TAG(eth_l2_src, tag, smac_15_0, spec, smac_15_0);
+
+ return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, hw_ste_p);
+}
+
+void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ dr_ste_build_eth_l2_src_bit_mask(mask, inner, sb->bit_mask);
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC, rx, inner);
+ sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_build_eth_l2_src_tag;
+}
+
+static void dr_ste_build_eth_l2_dst_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_MASK_V(eth_l2_dst, bit_mask, dmac_47_16, mask, dmac_47_16);
+ DR_STE_SET_MASK_V(eth_l2_dst, bit_mask, dmac_15_0, mask, dmac_15_0);
+
+ dr_ste_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask);
+}
+
+static int dr_ste_build_eth_l2_dst_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+ u8 *tag = hw_ste->tag;
+
+ DR_STE_SET_TAG(eth_l2_dst, tag, dmac_47_16, spec, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_dst, tag, dmac_15_0, spec, dmac_15_0);
+
+ return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, hw_ste_p);
+}
+
+void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ dr_ste_build_eth_l2_dst_bit_mask(mask, inner, sb->bit_mask);
+
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_DST, rx, inner);
+ sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_build_eth_l2_dst_tag;
+}
+
+static void dr_ste_build_eth_l2_tnl_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+ struct mlx5dr_match_misc *misc = &value->misc;
+
+ DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, dmac_47_16, mask, dmac_47_16);
+ DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, dmac_15_0, mask, dmac_15_0);
+ DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, first_vlan_id, mask, first_vid);
+ DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, first_cfi, mask, first_cfi);
+ DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, first_priority, mask, first_prio);
+ DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, ip_fragmented, mask, frag);
+ DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, l3_ethertype, mask, ethertype);
+ DR_STE_SET_MASK(eth_l2_tnl, bit_mask, l3_type, mask, ip_version);
+
+ if (misc->vxlan_vni) {
+ MLX5_SET(ste_eth_l2_tnl, bit_mask,
+ l2_tunneling_network_id, (misc->vxlan_vni << 8));
+ misc->vxlan_vni = 0;
+ }
+
+ if (mask->svlan_tag || mask->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_tnl, bit_mask, first_vlan_qualifier, -1);
+ mask->cvlan_tag = 0;
+ mask->svlan_tag = 0;
+ }
+}
+
+static int dr_ste_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ struct mlx5dr_match_misc *misc = &value->misc;
+ u8 *tag = hw_ste->tag;
+
+ DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_47_16, spec, dmac_47_16);
+ DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_15_0, spec, dmac_15_0);
+ DR_STE_SET_TAG(eth_l2_tnl, tag, first_vlan_id, spec, first_vid);
+ DR_STE_SET_TAG(eth_l2_tnl, tag, first_cfi, spec, first_cfi);
+ DR_STE_SET_TAG(eth_l2_tnl, tag, ip_fragmented, spec, frag);
+ DR_STE_SET_TAG(eth_l2_tnl, tag, first_priority, spec, first_prio);
+ DR_STE_SET_TAG(eth_l2_tnl, tag, l3_ethertype, spec, ethertype);
+
+ if (misc->vxlan_vni) {
+ MLX5_SET(ste_eth_l2_tnl, tag, l2_tunneling_network_id,
+ (misc->vxlan_vni << 8));
+ misc->vxlan_vni = 0;
+ }
+
+ if (spec->cvlan_tag) {
+ MLX5_SET(ste_eth_l2_tnl, tag, first_vlan_qualifier, DR_STE_CVLAN);
+ spec->cvlan_tag = 0;
+ } else if (spec->svlan_tag) {
+ MLX5_SET(ste_eth_l2_tnl, tag, first_vlan_qualifier, DR_STE_SVLAN);
+ spec->svlan_tag = 0;
+ }
+
+ if (spec->ip_version) {
+ if (spec->ip_version == IP_VERSION_IPV4) {
+ MLX5_SET(ste_eth_l2_tnl, tag, l3_type, STE_IPV4);
+ spec->ip_version = 0;
+ } else if (spec->ip_version == IP_VERSION_IPV6) {
+ MLX5_SET(ste_eth_l2_tnl, tag, l3_type, STE_IPV6);
+ spec->ip_version = 0;
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask, bool inner, bool rx)
+{
+ dr_ste_build_eth_l2_tnl_bit_mask(mask, inner, sb->bit_mask);
+
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->lu_type = MLX5DR_STE_LU_TYPE_ETHL2_TUNNELING_I;
+ sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_build_eth_l2_tnl_tag;
+}
+
+static void dr_ste_build_eth_l3_ipv4_misc_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_MASK_V(eth_l3_ipv4_misc, bit_mask, time_to_live, mask, ttl_hoplimit);
+}
+
+static int dr_ste_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+ u8 *tag = hw_ste->tag;
+
+ DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, time_to_live, spec, ttl_hoplimit);
+
+ return 0;
+}
+
+void mlx5dr_ste_build_eth_l3_ipv4_misc(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ dr_ste_build_eth_l3_ipv4_misc_bit_mask(mask, inner, sb->bit_mask);
+
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV4_MISC, rx, inner);
+ sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv4_misc_tag;
+}
+
+static void dr_ste_build_ipv6_l3_l4_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer;
+
+ DR_STE_SET_MASK_V(eth_l4, bit_mask, dst_port, mask, tcp_dport);
+ DR_STE_SET_MASK_V(eth_l4, bit_mask, src_port, mask, tcp_sport);
+ DR_STE_SET_MASK_V(eth_l4, bit_mask, dst_port, mask, udp_dport);
+ DR_STE_SET_MASK_V(eth_l4, bit_mask, src_port, mask, udp_sport);
+ DR_STE_SET_MASK_V(eth_l4, bit_mask, protocol, mask, ip_protocol);
+ DR_STE_SET_MASK_V(eth_l4, bit_mask, fragmented, mask, frag);
+ DR_STE_SET_MASK_V(eth_l4, bit_mask, dscp, mask, ip_dscp);
+ DR_STE_SET_MASK_V(eth_l4, bit_mask, ecn, mask, ip_ecn);
+ DR_STE_SET_MASK_V(eth_l4, bit_mask, ipv6_hop_limit, mask, ttl_hoplimit);
+
+ if (mask->tcp_flags) {
+ DR_STE_SET_TCP_FLAGS(eth_l4, bit_mask, mask);
+ mask->tcp_flags = 0;
+ }
+}
+
+static int dr_ste_build_ipv6_l3_l4_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p)
+{
+ struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ u8 *tag = hw_ste->tag;
+
+ DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, tcp_dport);
+ DR_STE_SET_TAG(eth_l4, tag, src_port, spec, tcp_sport);
+ DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, udp_dport);
+ DR_STE_SET_TAG(eth_l4, tag, src_port, spec, udp_sport);
+ DR_STE_SET_TAG(eth_l4, tag, protocol, spec, ip_protocol);
+ DR_STE_SET_TAG(eth_l4, tag, fragmented, spec, frag);
+ DR_STE_SET_TAG(eth_l4, tag, dscp, spec, ip_dscp);
+ DR_STE_SET_TAG(eth_l4, tag, ecn, spec, ip_ecn);
+ DR_STE_SET_TAG(eth_l4, tag, ipv6_hop_limit, spec, ttl_hoplimit);
+
+ if (spec->tcp_flags) {
+ DR_STE_SET_TCP_FLAGS(eth_l4, tag, spec);
+ spec->tcp_flags = 0;
+ }
+
+ return 0;
+}
+
+void mlx5dr_ste_build_ipv6_l3_l4(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ dr_ste_build_ipv6_l3_l4_bit_mask(mask, inner, sb->bit_mask);
+
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL4, rx, inner);
+ sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_build_ipv6_l3_l4_tag;
+}
+
+static int dr_ste_build_empty_always_hit_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p)
+{
+ return 0;
+}
+
+void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx)
+{
+ sb->rx = rx;
+ sb->lu_type = MLX5DR_STE_LU_TYPE_DONT_CARE;
+ sb->byte_mask = 0;
+ sb->ste_build_tag_func = &dr_ste_build_empty_always_hit_tag;
+}
+
+static void dr_ste_build_mpls_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_misc2 *misc2_mask = &value->misc2;
+
+ if (inner)
+ DR_STE_SET_MPLS_MASK(mpls, misc2_mask, inner, bit_mask);
+ else
+ DR_STE_SET_MPLS_MASK(mpls, misc2_mask, outer, bit_mask);
+}
+
+static int dr_ste_build_mpls_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ struct mlx5dr_match_misc2 *misc2_mask = &value->misc2;
+ u8 *tag = hw_ste->tag;
+
+ if (sb->inner)
+ DR_STE_SET_MPLS_TAG(mpls, misc2_mask, inner, tag);
+ else
+ DR_STE_SET_MPLS_TAG(mpls, misc2_mask, outer, tag);
+
+ return 0;
+}
+
+void mlx5dr_ste_build_mpls(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ dr_ste_build_mpls_bit_mask(mask, inner, sb->bit_mask);
+
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->lu_type = DR_STE_CALC_LU_TYPE(MPLS_FIRST, rx, inner);
+ sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_build_mpls_tag;
+}
+
+static void dr_ste_build_gre_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_misc *misc_mask = &value->misc;
+
+ DR_STE_SET_MASK_V(gre, bit_mask, gre_protocol, misc_mask, gre_protocol);
+ DR_STE_SET_MASK_V(gre, bit_mask, gre_k_present, misc_mask, gre_k_present);
+ DR_STE_SET_MASK_V(gre, bit_mask, gre_key_h, misc_mask, gre_key_h);
+ DR_STE_SET_MASK_V(gre, bit_mask, gre_key_l, misc_mask, gre_key_l);
+
+ DR_STE_SET_MASK_V(gre, bit_mask, gre_c_present, misc_mask, gre_c_present);
+ DR_STE_SET_MASK_V(gre, bit_mask, gre_s_present, misc_mask, gre_s_present);
+}
+
+static int dr_ste_build_gre_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ struct mlx5dr_match_misc *misc = &value->misc;
+ u8 *tag = hw_ste->tag;
+
+ DR_STE_SET_TAG(gre, tag, gre_protocol, misc, gre_protocol);
+
+ DR_STE_SET_TAG(gre, tag, gre_k_present, misc, gre_k_present);
+ DR_STE_SET_TAG(gre, tag, gre_key_h, misc, gre_key_h);
+ DR_STE_SET_TAG(gre, tag, gre_key_l, misc, gre_key_l);
+
+ DR_STE_SET_TAG(gre, tag, gre_c_present, misc, gre_c_present);
+
+ DR_STE_SET_TAG(gre, tag, gre_s_present, misc, gre_s_present);
+
+ return 0;
+}
+
+void mlx5dr_ste_build_gre(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask, bool inner, bool rx)
+{
+ dr_ste_build_gre_bit_mask(mask, inner, sb->bit_mask);
+
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->lu_type = MLX5DR_STE_LU_TYPE_GRE;
+ sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_build_gre_tag;
+}
+
+static void dr_ste_build_flex_parser_0_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
+
+ if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2_mask)) {
+ DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_label,
+ misc_2_mask, outer_first_mpls_over_gre_label);
+
+ DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_exp,
+ misc_2_mask, outer_first_mpls_over_gre_exp);
+
+ DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_s_bos,
+ misc_2_mask, outer_first_mpls_over_gre_s_bos);
+
+ DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_ttl,
+ misc_2_mask, outer_first_mpls_over_gre_ttl);
+ } else {
+ DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_label,
+ misc_2_mask, outer_first_mpls_over_udp_label);
+
+ DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_exp,
+ misc_2_mask, outer_first_mpls_over_udp_exp);
+
+ DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_s_bos,
+ misc_2_mask, outer_first_mpls_over_udp_s_bos);
+
+ DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_ttl,
+ misc_2_mask, outer_first_mpls_over_udp_ttl);
+ }
+}
+
+static int dr_ste_build_flex_parser_0_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
+ u8 *tag = hw_ste->tag;
+
+ if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2_mask)) {
+ DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label,
+ misc_2_mask, outer_first_mpls_over_gre_label);
+
+ DR_STE_SET_TAG(flex_parser_0, tag, parser_3_exp,
+ misc_2_mask, outer_first_mpls_over_gre_exp);
+
+ DR_STE_SET_TAG(flex_parser_0, tag, parser_3_s_bos,
+ misc_2_mask, outer_first_mpls_over_gre_s_bos);
+
+ DR_STE_SET_TAG(flex_parser_0, tag, parser_3_ttl,
+ misc_2_mask, outer_first_mpls_over_gre_ttl);
+ } else {
+ DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label,
+ misc_2_mask, outer_first_mpls_over_udp_label);
+
+ DR_STE_SET_TAG(flex_parser_0, tag, parser_3_exp,
+ misc_2_mask, outer_first_mpls_over_udp_exp);
+
+ DR_STE_SET_TAG(flex_parser_0, tag, parser_3_s_bos,
+ misc_2_mask, outer_first_mpls_over_udp_s_bos);
+
+ DR_STE_SET_TAG(flex_parser_0, tag, parser_3_ttl,
+ misc_2_mask, outer_first_mpls_over_udp_ttl);
+ }
+ return 0;
+}
+
+void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ dr_ste_build_flex_parser_0_bit_mask(mask, inner, sb->bit_mask);
+
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->lu_type = MLX5DR_STE_LU_TYPE_FLEX_PARSER_0;
+ sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_build_flex_parser_0_tag;
+}
+
+#define ICMP_TYPE_OFFSET_FIRST_DW 24
+#define ICMP_CODE_OFFSET_FIRST_DW 16
+#define ICMP_HEADER_DATA_OFFSET_SECOND_DW 0
+
+static int dr_ste_build_flex_parser_1_bit_mask(struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ u8 *bit_mask)
+{
+ struct mlx5dr_match_misc3 *misc_3_mask = &mask->misc3;
+ bool is_ipv4_mask = DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc_3_mask);
+ u32 icmp_header_data_mask;
+ u32 icmp_type_mask;
+ u32 icmp_code_mask;
+ int dw0_location;
+ int dw1_location;
+
+ if (is_ipv4_mask) {
+ icmp_header_data_mask = misc_3_mask->icmpv4_header_data;
+ icmp_type_mask = misc_3_mask->icmpv4_type;
+ icmp_code_mask = misc_3_mask->icmpv4_code;
+ dw0_location = caps->flex_parser_id_icmp_dw0;
+ dw1_location = caps->flex_parser_id_icmp_dw1;
+ } else {
+ icmp_header_data_mask = misc_3_mask->icmpv6_header_data;
+ icmp_type_mask = misc_3_mask->icmpv6_type;
+ icmp_code_mask = misc_3_mask->icmpv6_code;
+ dw0_location = caps->flex_parser_id_icmpv6_dw0;
+ dw1_location = caps->flex_parser_id_icmpv6_dw1;
+ }
+
+ switch (dw0_location) {
+ case 4:
+ if (icmp_type_mask) {
+ MLX5_SET(ste_flex_parser_1, bit_mask, flex_parser_4,
+ (icmp_type_mask << ICMP_TYPE_OFFSET_FIRST_DW));
+ if (is_ipv4_mask)
+ misc_3_mask->icmpv4_type = 0;
+ else
+ misc_3_mask->icmpv6_type = 0;
+ }
+ if (icmp_code_mask) {
+ u32 cur_val = MLX5_GET(ste_flex_parser_1, bit_mask,
+ flex_parser_4);
+ MLX5_SET(ste_flex_parser_1, bit_mask, flex_parser_4,
+ cur_val | (icmp_code_mask << ICMP_CODE_OFFSET_FIRST_DW));
+ if (is_ipv4_mask)
+ misc_3_mask->icmpv4_code = 0;
+ else
+ misc_3_mask->icmpv6_code = 0;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ switch (dw1_location) {
+ case 5:
+ if (icmp_header_data_mask) {
+ MLX5_SET(ste_flex_parser_1, bit_mask, flex_parser_5,
+ (icmp_header_data_mask << ICMP_HEADER_DATA_OFFSET_SECOND_DW));
+ if (is_ipv4_mask)
+ misc_3_mask->icmpv4_header_data = 0;
+ else
+ misc_3_mask->icmpv6_header_data = 0;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int dr_ste_build_flex_parser_1_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ struct mlx5dr_match_misc3 *misc_3 = &value->misc3;
+ u8 *tag = hw_ste->tag;
+ u32 icmp_header_data;
+ int dw0_location;
+ int dw1_location;
+ u32 icmp_type;
+ u32 icmp_code;
+ bool is_ipv4;
+
+ is_ipv4 = DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc_3);
+ if (is_ipv4) {
+ icmp_header_data = misc_3->icmpv4_header_data;
+ icmp_type = misc_3->icmpv4_type;
+ icmp_code = misc_3->icmpv4_code;
+ dw0_location = sb->caps->flex_parser_id_icmp_dw0;
+ dw1_location = sb->caps->flex_parser_id_icmp_dw1;
+ } else {
+ icmp_header_data = misc_3->icmpv6_header_data;
+ icmp_type = misc_3->icmpv6_type;
+ icmp_code = misc_3->icmpv6_code;
+ dw0_location = sb->caps->flex_parser_id_icmpv6_dw0;
+ dw1_location = sb->caps->flex_parser_id_icmpv6_dw1;
+ }
+
+ switch (dw0_location) {
+ case 4:
+ if (icmp_type) {
+ MLX5_SET(ste_flex_parser_1, tag, flex_parser_4,
+ (icmp_type << ICMP_TYPE_OFFSET_FIRST_DW));
+ if (is_ipv4)
+ misc_3->icmpv4_type = 0;
+ else
+ misc_3->icmpv6_type = 0;
+ }
+
+ if (icmp_code) {
+ u32 cur_val = MLX5_GET(ste_flex_parser_1, tag,
+ flex_parser_4);
+ MLX5_SET(ste_flex_parser_1, tag, flex_parser_4,
+ cur_val | (icmp_code << ICMP_CODE_OFFSET_FIRST_DW));
+ if (is_ipv4)
+ misc_3->icmpv4_code = 0;
+ else
+ misc_3->icmpv6_code = 0;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ switch (dw1_location) {
+ case 5:
+ if (icmp_header_data) {
+ MLX5_SET(ste_flex_parser_1, tag, flex_parser_5,
+ (icmp_header_data << ICMP_HEADER_DATA_OFFSET_SECOND_DW));
+ if (is_ipv4)
+ misc_3->icmpv4_header_data = 0;
+ else
+ misc_3->icmpv6_header_data = 0;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx)
+{
+ int ret;
+
+ ret = dr_ste_build_flex_parser_1_bit_mask(mask, caps, sb->bit_mask);
+ if (ret)
+ return ret;
+
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->caps = caps;
+ sb->lu_type = MLX5DR_STE_LU_TYPE_FLEX_PARSER_1;
+ sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_build_flex_parser_1_tag;
+
+ return 0;
+}
+
+static void dr_ste_build_general_purpose_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
+
+ DR_STE_SET_MASK_V(general_purpose, bit_mask,
+ general_purpose_lookup_field, misc_2_mask,
+ metadata_reg_a);
+}
+
+static int dr_ste_build_general_purpose_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
+ u8 *tag = hw_ste->tag;
+
+ DR_STE_SET_TAG(general_purpose, tag, general_purpose_lookup_field,
+ misc_2_mask, metadata_reg_a);
+
+ return 0;
+}
+
+void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ dr_ste_build_general_purpose_bit_mask(mask, inner, sb->bit_mask);
+
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->lu_type = MLX5DR_STE_LU_TYPE_GENERAL_PURPOSE;
+ sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_build_general_purpose_tag;
+}
+
+static void dr_ste_build_eth_l4_misc_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_misc3 *misc_3_mask = &value->misc3;
+
+ if (inner) {
+ DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, seq_num, misc_3_mask,
+ inner_tcp_seq_num);
+ DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, ack_num, misc_3_mask,
+ inner_tcp_ack_num);
+ } else {
+ DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, seq_num, misc_3_mask,
+ outer_tcp_seq_num);
+ DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, ack_num, misc_3_mask,
+ outer_tcp_ack_num);
+ }
+}
+
+static int dr_ste_build_eth_l4_misc_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+ u8 *tag = hw_ste->tag;
+
+ if (sb->inner) {
+ DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, inner_tcp_seq_num);
+ DR_STE_SET_TAG(eth_l4_misc, tag, ack_num, misc3, inner_tcp_ack_num);
+ } else {
+ DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, outer_tcp_seq_num);
+ DR_STE_SET_TAG(eth_l4_misc, tag, ack_num, misc3, outer_tcp_ack_num);
+ }
+
+ return 0;
+}
+
+void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ dr_ste_build_eth_l4_misc_bit_mask(mask, inner, sb->bit_mask);
+
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL4_MISC, rx, inner);
+ sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_build_eth_l4_misc_tag;
+}
+
+static void dr_ste_build_flex_parser_tnl_bit_mask(struct mlx5dr_match_param *value,
+ bool inner, u8 *bit_mask)
+{
+ struct mlx5dr_match_misc3 *misc_3_mask = &value->misc3;
+
+ if (misc_3_mask->outer_vxlan_gpe_flags ||
+ misc_3_mask->outer_vxlan_gpe_next_protocol) {
+ MLX5_SET(ste_flex_parser_tnl, bit_mask,
+ flex_parser_tunneling_header_63_32,
+ (misc_3_mask->outer_vxlan_gpe_flags << 24) |
+ (misc_3_mask->outer_vxlan_gpe_next_protocol));
+ misc_3_mask->outer_vxlan_gpe_flags = 0;
+ misc_3_mask->outer_vxlan_gpe_next_protocol = 0;
+ }
+
+ if (misc_3_mask->outer_vxlan_gpe_vni) {
+ MLX5_SET(ste_flex_parser_tnl, bit_mask,
+ flex_parser_tunneling_header_31_0,
+ misc_3_mask->outer_vxlan_gpe_vni << 8);
+ misc_3_mask->outer_vxlan_gpe_vni = 0;
+ }
+}
+
+static int dr_ste_build_flex_parser_tnl_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+ u8 *tag = hw_ste->tag;
+
+ if (misc3->outer_vxlan_gpe_flags ||
+ misc3->outer_vxlan_gpe_next_protocol) {
+ MLX5_SET(ste_flex_parser_tnl, tag,
+ flex_parser_tunneling_header_63_32,
+ (misc3->outer_vxlan_gpe_flags << 24) |
+ (misc3->outer_vxlan_gpe_next_protocol));
+ misc3->outer_vxlan_gpe_flags = 0;
+ misc3->outer_vxlan_gpe_next_protocol = 0;
+ }
+
+ if (misc3->outer_vxlan_gpe_vni) {
+ MLX5_SET(ste_flex_parser_tnl, tag,
+ flex_parser_tunneling_header_31_0,
+ misc3->outer_vxlan_gpe_vni << 8);
+ misc3->outer_vxlan_gpe_vni = 0;
+ }
+
+ return 0;
+}
+
+void mlx5dr_ste_build_flex_parser_tnl(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ dr_ste_build_flex_parser_tnl_bit_mask(mask, inner, sb->bit_mask);
+
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->lu_type = MLX5DR_STE_LU_TYPE_FLEX_PARSER_TNL_HEADER;
+ sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_build_flex_parser_tnl_tag;
+}
+
+static void dr_ste_build_register_0_bit_mask(struct mlx5dr_match_param *value,
+ u8 *bit_mask)
+{
+ struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
+
+ DR_STE_SET_MASK_V(register_0, bit_mask, register_0_h,
+ misc_2_mask, metadata_reg_c_0);
+ DR_STE_SET_MASK_V(register_0, bit_mask, register_0_l,
+ misc_2_mask, metadata_reg_c_1);
+ DR_STE_SET_MASK_V(register_0, bit_mask, register_1_h,
+ misc_2_mask, metadata_reg_c_2);
+ DR_STE_SET_MASK_V(register_0, bit_mask, register_1_l,
+ misc_2_mask, metadata_reg_c_3);
+}
+
+static int dr_ste_build_register_0_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+ u8 *tag = hw_ste->tag;
+
+ DR_STE_SET_TAG(register_0, tag, register_0_h, misc2, metadata_reg_c_0);
+ DR_STE_SET_TAG(register_0, tag, register_0_l, misc2, metadata_reg_c_1);
+ DR_STE_SET_TAG(register_0, tag, register_1_h, misc2, metadata_reg_c_2);
+ DR_STE_SET_TAG(register_0, tag, register_1_l, misc2, metadata_reg_c_3);
+
+ return 0;
+}
+
+void mlx5dr_ste_build_register_0(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ dr_ste_build_register_0_bit_mask(mask, sb->bit_mask);
+
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->lu_type = MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_0;
+ sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_build_register_0_tag;
+}
+
+static void dr_ste_build_register_1_bit_mask(struct mlx5dr_match_param *value,
+ u8 *bit_mask)
+{
+ struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2;
+
+ DR_STE_SET_MASK_V(register_1, bit_mask, register_2_h,
+ misc_2_mask, metadata_reg_c_4);
+ DR_STE_SET_MASK_V(register_1, bit_mask, register_2_l,
+ misc_2_mask, metadata_reg_c_5);
+ DR_STE_SET_MASK_V(register_1, bit_mask, register_3_h,
+ misc_2_mask, metadata_reg_c_6);
+ DR_STE_SET_MASK_V(register_1, bit_mask, register_3_l,
+ misc_2_mask, metadata_reg_c_7);
+}
+
+static int dr_ste_build_register_1_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+ u8 *tag = hw_ste->tag;
+
+ DR_STE_SET_TAG(register_1, tag, register_2_h, misc2, metadata_reg_c_4);
+ DR_STE_SET_TAG(register_1, tag, register_2_l, misc2, metadata_reg_c_5);
+ DR_STE_SET_TAG(register_1, tag, register_3_h, misc2, metadata_reg_c_6);
+ DR_STE_SET_TAG(register_1, tag, register_3_l, misc2, metadata_reg_c_7);
+
+ return 0;
+}
+
+void mlx5dr_ste_build_register_1(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx)
+{
+ dr_ste_build_register_1_bit_mask(mask, sb->bit_mask);
+
+ sb->rx = rx;
+ sb->inner = inner;
+ sb->lu_type = MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_1;
+ sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_build_register_1_tag;
+}
+
+static int dr_ste_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value,
+ u8 *bit_mask)
+{
+ struct mlx5dr_match_misc *misc_mask = &value->misc;
+
+ if (misc_mask->source_port != 0xffff)
+ return -EINVAL;
+
+ DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_gvmi, misc_mask, source_port);
+ DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_qp, misc_mask, source_sqn);
+
+ return 0;
+}
+
+static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p)
+{
+ struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p;
+ struct mlx5dr_match_misc *misc = &value->misc;
+ struct mlx5dr_cmd_vport_cap *vport_cap;
+ u8 *tag = hw_ste->tag;
+
+ DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn);
+
+ vport_cap = mlx5dr_get_vport_cap(sb->caps, misc->source_port);
+ if (!vport_cap)
+ return -EINVAL;
+
+ if (vport_cap->vport_gvmi)
+ MLX5_SET(ste_src_gvmi_qp, tag, source_gvmi, vport_cap->vport_gvmi);
+
+ misc->source_port = 0;
+
+ return 0;
+}
+
+int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx)
+{
+ int ret;
+
+ ret = dr_ste_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask);
+ if (ret)
+ return ret;
+
+ sb->rx = rx;
+ sb->caps = caps;
+ sb->inner = inner;
+ sb->lu_type = MLX5DR_STE_LU_TYPE_SRC_GVMI_AND_QP;
+ sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+ sb->ste_build_tag_func = &dr_ste_build_src_gvmi_qpn_tag;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
new file mode 100644
index 000000000000..e178d8d3dbc9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "dr_types.h"
+
+int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
+ struct mlx5dr_action *action)
+{
+ struct mlx5dr_matcher *last_matcher = NULL;
+ struct mlx5dr_htbl_connect_info info;
+ struct mlx5dr_ste_htbl *last_htbl;
+ int ret;
+
+ if (action && action->action_type != DR_ACTION_TYP_FT)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&tbl->dmn->mutex);
+
+ if (!list_empty(&tbl->matcher_list))
+ last_matcher = list_last_entry(&tbl->matcher_list,
+ struct mlx5dr_matcher,
+ matcher_list);
+
+ if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX ||
+ tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) {
+ if (last_matcher)
+ last_htbl = last_matcher->rx.e_anchor;
+ else
+ last_htbl = tbl->rx.s_anchor;
+
+ tbl->rx.default_icm_addr = action ?
+ action->dest_tbl.tbl->rx.s_anchor->chunk->icm_addr :
+ tbl->rx.nic_dmn->default_icm_addr;
+
+ info.type = CONNECT_MISS;
+ info.miss_icm_addr = tbl->rx.default_icm_addr;
+
+ ret = mlx5dr_ste_htbl_init_and_postsend(tbl->dmn,
+ tbl->rx.nic_dmn,
+ last_htbl,
+ &info, true);
+ if (ret) {
+ mlx5dr_dbg(tbl->dmn, "Failed to set RX miss action, ret %d\n", ret);
+ goto out;
+ }
+ }
+
+ if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX ||
+ tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) {
+ if (last_matcher)
+ last_htbl = last_matcher->tx.e_anchor;
+ else
+ last_htbl = tbl->tx.s_anchor;
+
+ tbl->tx.default_icm_addr = action ?
+ action->dest_tbl.tbl->tx.s_anchor->chunk->icm_addr :
+ tbl->tx.nic_dmn->default_icm_addr;
+
+ info.type = CONNECT_MISS;
+ info.miss_icm_addr = tbl->tx.default_icm_addr;
+
+ ret = mlx5dr_ste_htbl_init_and_postsend(tbl->dmn,
+ tbl->tx.nic_dmn,
+ last_htbl, &info, true);
+ if (ret) {
+ mlx5dr_dbg(tbl->dmn, "Failed to set TX miss action, ret %d\n", ret);
+ goto out;
+ }
+ }
+
+ /* Release old action */
+ if (tbl->miss_action)
+ refcount_dec(&tbl->miss_action->refcount);
+
+ /* Set new miss action */
+ tbl->miss_action = action;
+ if (tbl->miss_action)
+ refcount_inc(&action->refcount);
+
+out:
+ mutex_unlock(&tbl->dmn->mutex);
+ return ret;
+}
+
+static void dr_table_uninit_nic(struct mlx5dr_table_rx_tx *nic_tbl)
+{
+ mlx5dr_htbl_put(nic_tbl->s_anchor);
+}
+
+static void dr_table_uninit_fdb(struct mlx5dr_table *tbl)
+{
+ dr_table_uninit_nic(&tbl->rx);
+ dr_table_uninit_nic(&tbl->tx);
+}
+
+static void dr_table_uninit(struct mlx5dr_table *tbl)
+{
+ mutex_lock(&tbl->dmn->mutex);
+
+ switch (tbl->dmn->type) {
+ case MLX5DR_DOMAIN_TYPE_NIC_RX:
+ dr_table_uninit_nic(&tbl->rx);
+ break;
+ case MLX5DR_DOMAIN_TYPE_NIC_TX:
+ dr_table_uninit_nic(&tbl->tx);
+ break;
+ case MLX5DR_DOMAIN_TYPE_FDB:
+ dr_table_uninit_fdb(tbl);
+ break;
+ default:
+ WARN_ON(true);
+ break;
+ }
+
+ mutex_unlock(&tbl->dmn->mutex);
+}
+
+static int dr_table_init_nic(struct mlx5dr_domain *dmn,
+ struct mlx5dr_table_rx_tx *nic_tbl)
+{
+ struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn;
+ struct mlx5dr_htbl_connect_info info;
+ int ret;
+
+ nic_tbl->default_icm_addr = nic_dmn->default_icm_addr;
+
+ nic_tbl->s_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
+ DR_CHUNK_SIZE_1,
+ MLX5DR_STE_LU_TYPE_DONT_CARE,
+ 0);
+ if (!nic_tbl->s_anchor)
+ return -ENOMEM;
+
+ info.type = CONNECT_MISS;
+ info.miss_icm_addr = nic_dmn->default_icm_addr;
+ ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn,
+ nic_tbl->s_anchor,
+ &info, true);
+ if (ret)
+ goto free_s_anchor;
+
+ mlx5dr_htbl_get(nic_tbl->s_anchor);
+
+ return 0;
+
+free_s_anchor:
+ mlx5dr_ste_htbl_free(nic_tbl->s_anchor);
+ return ret;
+}
+
+static int dr_table_init_fdb(struct mlx5dr_table *tbl)
+{
+ int ret;
+
+ ret = dr_table_init_nic(tbl->dmn, &tbl->rx);
+ if (ret)
+ return ret;
+
+ ret = dr_table_init_nic(tbl->dmn, &tbl->tx);
+ if (ret)
+ goto destroy_rx;
+
+ return 0;
+
+destroy_rx:
+ dr_table_uninit_nic(&tbl->rx);
+ return ret;
+}
+
+static int dr_table_init(struct mlx5dr_table *tbl)
+{
+ int ret = 0;
+
+ INIT_LIST_HEAD(&tbl->matcher_list);
+
+ mutex_lock(&tbl->dmn->mutex);
+
+ switch (tbl->dmn->type) {
+ case MLX5DR_DOMAIN_TYPE_NIC_RX:
+ tbl->table_type = MLX5_FLOW_TABLE_TYPE_NIC_RX;
+ tbl->rx.nic_dmn = &tbl->dmn->info.rx;
+ ret = dr_table_init_nic(tbl->dmn, &tbl->rx);
+ break;
+ case MLX5DR_DOMAIN_TYPE_NIC_TX:
+ tbl->table_type = MLX5_FLOW_TABLE_TYPE_NIC_TX;
+ tbl->tx.nic_dmn = &tbl->dmn->info.tx;
+ ret = dr_table_init_nic(tbl->dmn, &tbl->tx);
+ break;
+ case MLX5DR_DOMAIN_TYPE_FDB:
+ tbl->table_type = MLX5_FLOW_TABLE_TYPE_FDB;
+ tbl->rx.nic_dmn = &tbl->dmn->info.rx;
+ tbl->tx.nic_dmn = &tbl->dmn->info.tx;
+ ret = dr_table_init_fdb(tbl);
+ break;
+ default:
+ WARN_ON(true);
+ break;
+ }
+
+ mutex_unlock(&tbl->dmn->mutex);
+
+ return ret;
+}
+
+static int dr_table_destroy_sw_owned_tbl(struct mlx5dr_table *tbl)
+{
+ return mlx5dr_cmd_destroy_flow_table(tbl->dmn->mdev,
+ tbl->table_id,
+ tbl->table_type);
+}
+
+static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl)
+{
+ u64 icm_addr_rx = 0;
+ u64 icm_addr_tx = 0;
+ int ret;
+
+ if (tbl->rx.s_anchor)
+ icm_addr_rx = tbl->rx.s_anchor->chunk->icm_addr;
+
+ if (tbl->tx.s_anchor)
+ icm_addr_tx = tbl->tx.s_anchor->chunk->icm_addr;
+
+ ret = mlx5dr_cmd_create_flow_table(tbl->dmn->mdev,
+ tbl->table_type,
+ icm_addr_rx,
+ icm_addr_tx,
+ tbl->dmn->info.caps.max_ft_level - 1,
+ true, false, NULL,
+ &tbl->table_id);
+
+ return ret;
+}
+
+struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level)
+{
+ struct mlx5dr_table *tbl;
+ int ret;
+
+ refcount_inc(&dmn->refcount);
+
+ tbl = kzalloc(sizeof(*tbl), GFP_KERNEL);
+ if (!tbl)
+ goto dec_ref;
+
+ tbl->dmn = dmn;
+ tbl->level = level;
+ refcount_set(&tbl->refcount, 1);
+
+ ret = dr_table_init(tbl);
+ if (ret)
+ goto free_tbl;
+
+ ret = dr_table_create_sw_owned_tbl(tbl);
+ if (ret)
+ goto uninit_tbl;
+
+ return tbl;
+
+uninit_tbl:
+ dr_table_uninit(tbl);
+free_tbl:
+ kfree(tbl);
+dec_ref:
+ refcount_dec(&dmn->refcount);
+ return NULL;
+}
+
+int mlx5dr_table_destroy(struct mlx5dr_table *tbl)
+{
+ int ret;
+
+ if (refcount_read(&tbl->refcount) > 1)
+ return -EBUSY;
+
+ ret = dr_table_destroy_sw_owned_tbl(tbl);
+ if (ret)
+ return ret;
+
+ dr_table_uninit(tbl);
+
+ if (tbl->miss_action)
+ refcount_dec(&tbl->miss_action->refcount);
+
+ refcount_dec(&tbl->dmn->refcount);
+ kfree(tbl);
+
+ return ret;
+}
+
+u32 mlx5dr_table_get_id(struct mlx5dr_table *tbl)
+{
+ return tbl->table_id;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
new file mode 100644
index 000000000000..a37ee6359be2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -0,0 +1,1060 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019, Mellanox Technologies */
+
+#ifndef _DR_TYPES_
+#define _DR_TYPES_
+
+#include <linux/mlx5/driver.h>
+#include <linux/refcount.h>
+#include "fs_core.h"
+#include "wq.h"
+#include "lib/mlx5.h"
+#include "mlx5_ifc_dr.h"
+#include "mlx5dr.h"
+
+#define DR_RULE_MAX_STES 17
+#define DR_ACTION_MAX_STES 5
+#define WIRE_PORT 0xFFFF
+#define DR_STE_SVLAN 0x1
+#define DR_STE_CVLAN 0x2
+
+#define mlx5dr_err(dmn, arg...) mlx5_core_err((dmn)->mdev, ##arg)
+#define mlx5dr_info(dmn, arg...) mlx5_core_info((dmn)->mdev, ##arg)
+#define mlx5dr_dbg(dmn, arg...) mlx5_core_dbg((dmn)->mdev, ##arg)
+
+enum mlx5dr_icm_chunk_size {
+ DR_CHUNK_SIZE_1,
+ DR_CHUNK_SIZE_MIN = DR_CHUNK_SIZE_1, /* keep updated when changing */
+ DR_CHUNK_SIZE_2,
+ DR_CHUNK_SIZE_4,
+ DR_CHUNK_SIZE_8,
+ DR_CHUNK_SIZE_16,
+ DR_CHUNK_SIZE_32,
+ DR_CHUNK_SIZE_64,
+ DR_CHUNK_SIZE_128,
+ DR_CHUNK_SIZE_256,
+ DR_CHUNK_SIZE_512,
+ DR_CHUNK_SIZE_1K,
+ DR_CHUNK_SIZE_2K,
+ DR_CHUNK_SIZE_4K,
+ DR_CHUNK_SIZE_8K,
+ DR_CHUNK_SIZE_16K,
+ DR_CHUNK_SIZE_32K,
+ DR_CHUNK_SIZE_64K,
+ DR_CHUNK_SIZE_128K,
+ DR_CHUNK_SIZE_256K,
+ DR_CHUNK_SIZE_512K,
+ DR_CHUNK_SIZE_1024K,
+ DR_CHUNK_SIZE_2048K,
+ DR_CHUNK_SIZE_MAX,
+};
+
+enum mlx5dr_icm_type {
+ DR_ICM_TYPE_STE,
+ DR_ICM_TYPE_MODIFY_ACTION,
+};
+
+static inline enum mlx5dr_icm_chunk_size
+mlx5dr_icm_next_higher_chunk(enum mlx5dr_icm_chunk_size chunk)
+{
+ chunk += 2;
+ if (chunk < DR_CHUNK_SIZE_MAX)
+ return chunk;
+
+ return DR_CHUNK_SIZE_MAX;
+}
+
+enum {
+ DR_STE_SIZE = 64,
+ DR_STE_SIZE_CTRL = 32,
+ DR_STE_SIZE_TAG = 16,
+ DR_STE_SIZE_MASK = 16,
+};
+
+enum {
+ DR_STE_SIZE_REDUCED = DR_STE_SIZE - DR_STE_SIZE_MASK,
+};
+
+enum {
+ DR_MODIFY_ACTION_SIZE = 8,
+};
+
+enum mlx5dr_matcher_criteria {
+ DR_MATCHER_CRITERIA_EMPTY = 0,
+ DR_MATCHER_CRITERIA_OUTER = 1 << 0,
+ DR_MATCHER_CRITERIA_MISC = 1 << 1,
+ DR_MATCHER_CRITERIA_INNER = 1 << 2,
+ DR_MATCHER_CRITERIA_MISC2 = 1 << 3,
+ DR_MATCHER_CRITERIA_MISC3 = 1 << 4,
+ DR_MATCHER_CRITERIA_MAX = 1 << 5,
+};
+
+enum mlx5dr_action_type {
+ DR_ACTION_TYP_TNL_L2_TO_L2,
+ DR_ACTION_TYP_L2_TO_TNL_L2,
+ DR_ACTION_TYP_TNL_L3_TO_L2,
+ DR_ACTION_TYP_L2_TO_TNL_L3,
+ DR_ACTION_TYP_DROP,
+ DR_ACTION_TYP_QP,
+ DR_ACTION_TYP_FT,
+ DR_ACTION_TYP_CTR,
+ DR_ACTION_TYP_TAG,
+ DR_ACTION_TYP_MODIFY_HDR,
+ DR_ACTION_TYP_VPORT,
+ DR_ACTION_TYP_POP_VLAN,
+ DR_ACTION_TYP_PUSH_VLAN,
+ DR_ACTION_TYP_MAX,
+};
+
+struct mlx5dr_icm_pool;
+struct mlx5dr_icm_chunk;
+struct mlx5dr_icm_bucket;
+struct mlx5dr_ste_htbl;
+struct mlx5dr_match_param;
+struct mlx5dr_cmd_caps;
+struct mlx5dr_matcher_rx_tx;
+
+struct mlx5dr_ste {
+ u8 *hw_ste;
+ /* refcount: indicates the num of rules that using this ste */
+ refcount_t refcount;
+
+ /* attached to the miss_list head at each htbl entry */
+ struct list_head miss_list_node;
+
+ /* each rule member that uses this ste attached here */
+ struct list_head rule_list;
+
+ /* this ste is member of htbl */
+ struct mlx5dr_ste_htbl *htbl;
+
+ struct mlx5dr_ste_htbl *next_htbl;
+
+ /* this ste is part of a rule, located in ste's chain */
+ u8 ste_chain_location;
+};
+
+struct mlx5dr_ste_htbl_ctrl {
+ /* total number of valid entries belonging to this hash table. This
+ * includes the non collision and collision entries
+ */
+ unsigned int num_of_valid_entries;
+
+ /* total number of collisions entries attached to this table */
+ unsigned int num_of_collisions;
+ unsigned int increase_threshold;
+ u8 may_grow:1;
+};
+
+struct mlx5dr_ste_htbl {
+ u8 lu_type;
+ u16 byte_mask;
+ refcount_t refcount;
+ struct mlx5dr_icm_chunk *chunk;
+ struct mlx5dr_ste *ste_arr;
+ u8 *hw_ste_arr;
+
+ struct list_head *miss_list;
+
+ enum mlx5dr_icm_chunk_size chunk_size;
+ struct mlx5dr_ste *pointing_ste;
+
+ struct mlx5dr_ste_htbl_ctrl ctrl;
+};
+
+struct mlx5dr_ste_send_info {
+ struct mlx5dr_ste *ste;
+ struct list_head send_list;
+ u16 size;
+ u16 offset;
+ u8 data_cont[DR_STE_SIZE];
+ u8 *data;
+};
+
+void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
+ u16 offset, u8 *data,
+ struct mlx5dr_ste_send_info *ste_info,
+ struct list_head *send_list,
+ bool copy_data);
+
+struct mlx5dr_ste_build {
+ u8 inner:1;
+ u8 rx:1;
+ struct mlx5dr_cmd_caps *caps;
+ u8 lu_type;
+ u16 byte_mask;
+ u8 bit_mask[DR_STE_SIZE_MASK];
+ int (*ste_build_tag_func)(struct mlx5dr_match_param *spec,
+ struct mlx5dr_ste_build *sb,
+ u8 *hw_ste_p);
+};
+
+struct mlx5dr_ste_htbl *
+mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool,
+ enum mlx5dr_icm_chunk_size chunk_size,
+ u8 lu_type, u16 byte_mask);
+
+int mlx5dr_ste_htbl_free(struct mlx5dr_ste_htbl *htbl);
+
+static inline void mlx5dr_htbl_put(struct mlx5dr_ste_htbl *htbl)
+{
+ if (refcount_dec_and_test(&htbl->refcount))
+ mlx5dr_ste_htbl_free(htbl);
+}
+
+static inline void mlx5dr_htbl_get(struct mlx5dr_ste_htbl *htbl)
+{
+ refcount_inc(&htbl->refcount);
+}
+
+/* STE utils */
+u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl);
+void mlx5dr_ste_init(u8 *hw_ste_p, u8 lu_type, u8 entry_type, u16 gvmi);
+void mlx5dr_ste_always_hit_htbl(struct mlx5dr_ste *ste,
+ struct mlx5dr_ste_htbl *next_htbl);
+void mlx5dr_ste_set_miss_addr(u8 *hw_ste, u64 miss_addr);
+u64 mlx5dr_ste_get_miss_addr(u8 *hw_ste);
+void mlx5dr_ste_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi);
+void mlx5dr_ste_set_hit_addr(u8 *hw_ste, u64 icm_addr, u32 ht_size);
+void mlx5dr_ste_always_miss_addr(struct mlx5dr_ste *ste, u64 miss_addr);
+void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask);
+bool mlx5dr_ste_not_used_ste(struct mlx5dr_ste *ste);
+bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher,
+ u8 ste_location);
+void mlx5dr_ste_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag);
+void mlx5dr_ste_set_counter_id(u8 *hw_ste_p, u32 ctr_id);
+void mlx5dr_ste_set_tx_encap(void *hw_ste_p, u32 reformat_id,
+ int size, bool encap_l3);
+void mlx5dr_ste_set_rx_decap(u8 *hw_ste_p);
+void mlx5dr_ste_set_rx_decap_l3(u8 *hw_ste_p, bool vlan);
+void mlx5dr_ste_set_rx_pop_vlan(u8 *hw_ste_p);
+void mlx5dr_ste_set_tx_push_vlan(u8 *hw_ste_p, u32 vlan_tpid_pcp_dei_vid,
+ bool go_back);
+void mlx5dr_ste_set_entry_type(u8 *hw_ste_p, u8 entry_type);
+u8 mlx5dr_ste_get_entry_type(u8 *hw_ste_p);
+void mlx5dr_ste_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions,
+ u32 re_write_index);
+void mlx5dr_ste_set_go_back_bit(u8 *hw_ste_p);
+u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste);
+u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste);
+struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste);
+
+void mlx5dr_ste_free(struct mlx5dr_ste *ste,
+ struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher);
+static inline void mlx5dr_ste_put(struct mlx5dr_ste *ste,
+ struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher)
+{
+ if (refcount_dec_and_test(&ste->refcount))
+ mlx5dr_ste_free(ste, matcher, nic_matcher);
+}
+
+/* initial as 0, increased only when ste appears in a new rule */
+static inline void mlx5dr_ste_get(struct mlx5dr_ste *ste)
+{
+ refcount_inc(&ste->refcount);
+}
+
+void mlx5dr_ste_set_hit_addr_by_next_htbl(u8 *hw_ste,
+ struct mlx5dr_ste_htbl *next_htbl);
+bool mlx5dr_ste_equal_tag(void *src, void *dst);
+int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_ste *ste,
+ u8 *cur_hw_ste,
+ enum mlx5dr_icm_chunk_size log_table_size);
+
+/* STE build functions */
+int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn,
+ u8 match_criteria,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_match_param *value);
+int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_match_param *value,
+ u8 *ste_arr);
+int mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *builder,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_eth_l3_ipv4_misc(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_eth_l3_ipv6_dst(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_eth_l3_ipv6_src(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_ipv6_l3_l4(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_gre(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_mpls(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+int mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx);
+void mlx5dr_ste_build_flex_parser_tnl(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_register_0(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+void mlx5dr_ste_build_register_1(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ bool inner, bool rx);
+int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb,
+ struct mlx5dr_match_param *mask,
+ struct mlx5dr_cmd_caps *caps,
+ bool inner, bool rx);
+void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx);
+
+/* Actions utils */
+int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ struct mlx5dr_action *actions[],
+ u32 num_actions,
+ u8 *ste_arr,
+ u32 *new_hw_ste_arr_sz);
+
+struct mlx5dr_match_spec {
+ u32 smac_47_16; /* Source MAC address of incoming packet */
+ /* Incoming packet Ethertype - this is the Ethertype
+ * following the last VLAN tag of the packet
+ */
+ u32 ethertype:16;
+ u32 smac_15_0:16; /* Source MAC address of incoming packet */
+ u32 dmac_47_16; /* Destination MAC address of incoming packet */
+ /* VLAN ID of first VLAN tag in the incoming packet.
+ * Valid only when cvlan_tag==1 or svlan_tag==1
+ */
+ u32 first_vid:12;
+ /* CFI bit of first VLAN tag in the incoming packet.
+ * Valid only when cvlan_tag==1 or svlan_tag==1
+ */
+ u32 first_cfi:1;
+ /* Priority of first VLAN tag in the incoming packet.
+ * Valid only when cvlan_tag==1 or svlan_tag==1
+ */
+ u32 first_prio:3;
+ u32 dmac_15_0:16; /* Destination MAC address of incoming packet */
+ /* TCP flags. ;Bit 0: FIN;Bit 1: SYN;Bit 2: RST;Bit 3: PSH;Bit 4: ACK;
+ * Bit 5: URG;Bit 6: ECE;Bit 7: CWR;Bit 8: NS
+ */
+ u32 tcp_flags:9;
+ u32 ip_version:4; /* IP version */
+ u32 frag:1; /* Packet is an IP fragment */
+ /* The first vlan in the packet is s-vlan (0x8a88).
+ * cvlan_tag and svlan_tag cannot be set together
+ */
+ u32 svlan_tag:1;
+ /* The first vlan in the packet is c-vlan (0x8100).
+ * cvlan_tag and svlan_tag cannot be set together
+ */
+ u32 cvlan_tag:1;
+ /* Explicit Congestion Notification derived from
+ * Traffic Class/TOS field of IPv6/v4
+ */
+ u32 ip_ecn:2;
+ /* Differentiated Services Code Point derived from
+ * Traffic Class/TOS field of IPv6/v4
+ */
+ u32 ip_dscp:6;
+ u32 ip_protocol:8; /* IP protocol */
+ /* TCP destination port.
+ * tcp and udp sport/dport are mutually exclusive
+ */
+ u32 tcp_dport:16;
+ /* TCP source port.;tcp and udp sport/dport are mutually exclusive */
+ u32 tcp_sport:16;
+ u32 ttl_hoplimit:8;
+ u32 reserved:24;
+ /* UDP destination port.;tcp and udp sport/dport are mutually exclusive */
+ u32 udp_dport:16;
+ /* UDP source port.;tcp and udp sport/dport are mutually exclusive */
+ u32 udp_sport:16;
+ /* IPv6 source address of incoming packets
+ * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+ * This field should be qualified by an appropriate ethertype
+ */
+ u32 src_ip_127_96;
+ /* IPv6 source address of incoming packets
+ * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+ * This field should be qualified by an appropriate ethertype
+ */
+ u32 src_ip_95_64;
+ /* IPv6 source address of incoming packets
+ * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+ * This field should be qualified by an appropriate ethertype
+ */
+ u32 src_ip_63_32;
+ /* IPv6 source address of incoming packets
+ * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+ * This field should be qualified by an appropriate ethertype
+ */
+ u32 src_ip_31_0;
+ /* IPv6 destination address of incoming packets
+ * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+ * This field should be qualified by an appropriate ethertype
+ */
+ u32 dst_ip_127_96;
+ /* IPv6 destination address of incoming packets
+ * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+ * This field should be qualified by an appropriate ethertype
+ */
+ u32 dst_ip_95_64;
+ /* IPv6 destination address of incoming packets
+ * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+ * This field should be qualified by an appropriate ethertype
+ */
+ u32 dst_ip_63_32;
+ /* IPv6 destination address of incoming packets
+ * For IPv4 address use bits 31:0 (rest of the bits are reserved)
+ * This field should be qualified by an appropriate ethertype
+ */
+ u32 dst_ip_31_0;
+};
+
+struct mlx5dr_match_misc {
+ u32 source_sqn:24; /* Source SQN */
+ u32 source_vhca_port:4;
+ /* used with GRE, sequence number exist when gre_s_present == 1 */
+ u32 gre_s_present:1;
+ /* used with GRE, key exist when gre_k_present == 1 */
+ u32 gre_k_present:1;
+ u32 reserved_auto1:1;
+ /* used with GRE, checksum exist when gre_c_present == 1 */
+ u32 gre_c_present:1;
+ /* Source port.;0xffff determines wire port */
+ u32 source_port:16;
+ u32 reserved_auto2:16;
+ /* VLAN ID of first VLAN tag the inner header of the incoming packet.
+ * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
+ */
+ u32 inner_second_vid:12;
+ /* CFI bit of first VLAN tag in the inner header of the incoming packet.
+ * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
+ */
+ u32 inner_second_cfi:1;
+ /* Priority of second VLAN tag in the inner header of the incoming packet.
+ * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1
+ */
+ u32 inner_second_prio:3;
+ /* VLAN ID of first VLAN tag the outer header of the incoming packet.
+ * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1
+ */
+ u32 outer_second_vid:12;
+ /* CFI bit of first VLAN tag in the outer header of the incoming packet.
+ * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1
+ */
+ u32 outer_second_cfi:1;
+ /* Priority of second VLAN tag in the outer header of the incoming packet.
+ * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1
+ */
+ u32 outer_second_prio:3;
+ u32 gre_protocol:16; /* GRE Protocol (outer) */
+ u32 reserved_auto3:12;
+ /* The second vlan in the inner header of the packet is s-vlan (0x8a88).
+ * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together
+ */
+ u32 inner_second_svlan_tag:1;
+ /* The second vlan in the outer header of the packet is s-vlan (0x8a88).
+ * outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together
+ */
+ u32 outer_second_svlan_tag:1;
+ /* The second vlan in the inner header of the packet is c-vlan (0x8100).
+ * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together
+ */
+ u32 inner_second_cvlan_tag:1;
+ /* The second vlan in the outer header of the packet is c-vlan (0x8100).
+ * outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together
+ */
+ u32 outer_second_cvlan_tag:1;
+ u32 gre_key_l:8; /* GRE Key [7:0] (outer) */
+ u32 gre_key_h:24; /* GRE Key[31:8] (outer) */
+ u32 reserved_auto4:8;
+ u32 vxlan_vni:24; /* VXLAN VNI (outer) */
+ u32 geneve_oam:1; /* GENEVE OAM field (outer) */
+ u32 reserved_auto5:7;
+ u32 geneve_vni:24; /* GENEVE VNI field (outer) */
+ u32 outer_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (outer) */
+ u32 reserved_auto6:12;
+ u32 inner_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (inner) */
+ u32 reserved_auto7:12;
+ u32 geneve_protocol_type:16; /* GENEVE protocol type (outer) */
+ u32 geneve_opt_len:6; /* GENEVE OptLen (outer) */
+ u32 reserved_auto8:10;
+ u32 bth_dst_qp:24; /* Destination QP in BTH header */
+ u32 reserved_auto9:8;
+ u8 reserved_auto10[20];
+};
+
+struct mlx5dr_match_misc2 {
+ u32 outer_first_mpls_ttl:8; /* First MPLS TTL (outer) */
+ u32 outer_first_mpls_s_bos:1; /* First MPLS S_BOS (outer) */
+ u32 outer_first_mpls_exp:3; /* First MPLS EXP (outer) */
+ u32 outer_first_mpls_label:20; /* First MPLS LABEL (outer) */
+ u32 inner_first_mpls_ttl:8; /* First MPLS TTL (inner) */
+ u32 inner_first_mpls_s_bos:1; /* First MPLS S_BOS (inner) */
+ u32 inner_first_mpls_exp:3; /* First MPLS EXP (inner) */
+ u32 inner_first_mpls_label:20; /* First MPLS LABEL (inner) */
+ u32 outer_first_mpls_over_gre_ttl:8; /* last MPLS TTL (outer) */
+ u32 outer_first_mpls_over_gre_s_bos:1; /* last MPLS S_BOS (outer) */
+ u32 outer_first_mpls_over_gre_exp:3; /* last MPLS EXP (outer) */
+ u32 outer_first_mpls_over_gre_label:20; /* last MPLS LABEL (outer) */
+ u32 outer_first_mpls_over_udp_ttl:8; /* last MPLS TTL (outer) */
+ u32 outer_first_mpls_over_udp_s_bos:1; /* last MPLS S_BOS (outer) */
+ u32 outer_first_mpls_over_udp_exp:3; /* last MPLS EXP (outer) */
+ u32 outer_first_mpls_over_udp_label:20; /* last MPLS LABEL (outer) */
+ u32 metadata_reg_c_7; /* metadata_reg_c_7 */
+ u32 metadata_reg_c_6; /* metadata_reg_c_6 */
+ u32 metadata_reg_c_5; /* metadata_reg_c_5 */
+ u32 metadata_reg_c_4; /* metadata_reg_c_4 */
+ u32 metadata_reg_c_3; /* metadata_reg_c_3 */
+ u32 metadata_reg_c_2; /* metadata_reg_c_2 */
+ u32 metadata_reg_c_1; /* metadata_reg_c_1 */
+ u32 metadata_reg_c_0; /* metadata_reg_c_0 */
+ u32 metadata_reg_a; /* metadata_reg_a */
+ u32 metadata_reg_b; /* metadata_reg_b */
+ u8 reserved_auto2[8];
+};
+
+struct mlx5dr_match_misc3 {
+ u32 inner_tcp_seq_num;
+ u32 outer_tcp_seq_num;
+ u32 inner_tcp_ack_num;
+ u32 outer_tcp_ack_num;
+ u32 outer_vxlan_gpe_vni:24;
+ u32 reserved_auto1:8;
+ u32 reserved_auto2:16;
+ u32 outer_vxlan_gpe_flags:8;
+ u32 outer_vxlan_gpe_next_protocol:8;
+ u32 icmpv4_header_data;
+ u32 icmpv6_header_data;
+ u32 icmpv6_code:8;
+ u32 icmpv6_type:8;
+ u32 icmpv4_code:8;
+ u32 icmpv4_type:8;
+ u8 reserved_auto3[0x1c];
+};
+
+struct mlx5dr_match_param {
+ struct mlx5dr_match_spec outer;
+ struct mlx5dr_match_misc misc;
+ struct mlx5dr_match_spec inner;
+ struct mlx5dr_match_misc2 misc2;
+ struct mlx5dr_match_misc3 misc3;
+};
+
+#define DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(_misc3) ((_misc3)->icmpv4_type || \
+ (_misc3)->icmpv4_code || \
+ (_misc3)->icmpv4_header_data)
+
+struct mlx5dr_esw_caps {
+ u64 drop_icm_address_rx;
+ u64 drop_icm_address_tx;
+ u64 uplink_icm_address_rx;
+ u64 uplink_icm_address_tx;
+ bool sw_owner;
+};
+
+struct mlx5dr_cmd_vport_cap {
+ u16 vport_gvmi;
+ u16 vhca_gvmi;
+ u64 icm_address_rx;
+ u64 icm_address_tx;
+ u32 num;
+};
+
+struct mlx5dr_cmd_caps {
+ u16 gvmi;
+ u64 nic_rx_drop_address;
+ u64 nic_tx_drop_address;
+ u64 nic_tx_allow_address;
+ u64 esw_rx_drop_address;
+ u64 esw_tx_drop_address;
+ u32 log_icm_size;
+ u64 hdr_modify_icm_addr;
+ u32 flex_protocols;
+ u8 flex_parser_id_icmp_dw0;
+ u8 flex_parser_id_icmp_dw1;
+ u8 flex_parser_id_icmpv6_dw0;
+ u8 flex_parser_id_icmpv6_dw1;
+ u8 max_ft_level;
+ u16 roce_min_src_udp;
+ u8 num_esw_ports;
+ bool eswitch_manager;
+ bool rx_sw_owner;
+ bool tx_sw_owner;
+ bool fdb_sw_owner;
+ u32 num_vports;
+ struct mlx5dr_esw_caps esw_caps;
+ struct mlx5dr_cmd_vport_cap *vports_caps;
+ bool prio_tag_required;
+};
+
+struct mlx5dr_domain_rx_tx {
+ u64 drop_icm_addr;
+ u64 default_icm_addr;
+ enum mlx5dr_ste_entry_type ste_type;
+};
+
+struct mlx5dr_domain_info {
+ bool supp_sw_steering;
+ u32 max_inline_size;
+ u32 max_send_wr;
+ u32 max_log_sw_icm_sz;
+ u32 max_log_action_icm_sz;
+ struct mlx5dr_domain_rx_tx rx;
+ struct mlx5dr_domain_rx_tx tx;
+ struct mlx5dr_cmd_caps caps;
+};
+
+struct mlx5dr_domain_cache {
+ struct mlx5dr_fw_recalc_cs_ft **recalc_cs_ft;
+};
+
+struct mlx5dr_domain {
+ struct mlx5dr_domain *peer_dmn;
+ struct mlx5_core_dev *mdev;
+ u32 pdn;
+ struct mlx5_uars_page *uar;
+ enum mlx5dr_domain_type type;
+ refcount_t refcount;
+ struct mutex mutex; /* protect domain */
+ struct mlx5dr_icm_pool *ste_icm_pool;
+ struct mlx5dr_icm_pool *action_icm_pool;
+ struct mlx5dr_send_ring *send_ring;
+ struct mlx5dr_domain_info info;
+ struct mlx5dr_domain_cache cache;
+};
+
+struct mlx5dr_table_rx_tx {
+ struct mlx5dr_ste_htbl *s_anchor;
+ struct mlx5dr_domain_rx_tx *nic_dmn;
+ u64 default_icm_addr;
+};
+
+struct mlx5dr_table {
+ struct mlx5dr_domain *dmn;
+ struct mlx5dr_table_rx_tx rx;
+ struct mlx5dr_table_rx_tx tx;
+ u32 level;
+ u32 table_type;
+ u32 table_id;
+ struct list_head matcher_list;
+ struct mlx5dr_action *miss_action;
+ refcount_t refcount;
+};
+
+struct mlx5dr_matcher_rx_tx {
+ struct mlx5dr_ste_htbl *s_htbl;
+ struct mlx5dr_ste_htbl *e_anchor;
+ struct mlx5dr_ste_build *ste_builder;
+ struct mlx5dr_ste_build ste_builder4[DR_RULE_MAX_STES];
+ struct mlx5dr_ste_build ste_builder6[DR_RULE_MAX_STES];
+ u8 num_of_builders;
+ u8 num_of_builders4;
+ u8 num_of_builders6;
+ u64 default_icm_addr;
+ struct mlx5dr_table_rx_tx *nic_tbl;
+};
+
+struct mlx5dr_matcher {
+ struct mlx5dr_table *tbl;
+ struct mlx5dr_matcher_rx_tx rx;
+ struct mlx5dr_matcher_rx_tx tx;
+ struct list_head matcher_list;
+ u16 prio;
+ struct mlx5dr_match_param mask;
+ u8 match_criteria;
+ refcount_t refcount;
+ struct mlx5dv_flow_matcher *dv_matcher;
+};
+
+struct mlx5dr_rule_member {
+ struct mlx5dr_ste *ste;
+ /* attached to mlx5dr_rule via this */
+ struct list_head list;
+ /* attached to mlx5dr_ste via this */
+ struct list_head use_ste_list;
+};
+
+struct mlx5dr_action {
+ enum mlx5dr_action_type action_type;
+ refcount_t refcount;
+ union {
+ struct {
+ struct mlx5dr_domain *dmn;
+ struct mlx5dr_icm_chunk *chunk;
+ u8 *data;
+ u32 data_size;
+ u16 num_of_actions;
+ u32 index;
+ u8 allow_rx:1;
+ u8 allow_tx:1;
+ u8 modify_ttl:1;
+ } rewrite;
+ struct {
+ struct mlx5dr_domain *dmn;
+ u32 reformat_id;
+ u32 reformat_size;
+ } reformat;
+ struct {
+ u8 is_fw_tbl:1;
+ union {
+ struct mlx5dr_table *tbl;
+ struct {
+ struct mlx5_flow_table *ft;
+ u64 rx_icm_addr;
+ u64 tx_icm_addr;
+ struct mlx5_core_dev *mdev;
+ } fw_tbl;
+ };
+ } dest_tbl;
+ struct {
+ u32 ctr_id;
+ u32 offeset;
+ } ctr;
+ struct {
+ struct mlx5dr_domain *dmn;
+ struct mlx5dr_cmd_vport_cap *caps;
+ u32 num;
+ } vport;
+ struct {
+ u32 vlan_hdr; /* tpid_pcp_dei_vid */
+ } push_vlan;
+ u32 flow_tag;
+ };
+};
+
+enum mlx5dr_connect_type {
+ CONNECT_HIT = 1,
+ CONNECT_MISS = 2,
+};
+
+struct mlx5dr_htbl_connect_info {
+ enum mlx5dr_connect_type type;
+ union {
+ struct mlx5dr_ste_htbl *hit_next_htbl;
+ u64 miss_icm_addr;
+ };
+};
+
+struct mlx5dr_rule_rx_tx {
+ struct list_head rule_members_list;
+ struct mlx5dr_matcher_rx_tx *nic_matcher;
+};
+
+struct mlx5dr_rule {
+ struct mlx5dr_matcher *matcher;
+ struct mlx5dr_rule_rx_tx rx;
+ struct mlx5dr_rule_rx_tx tx;
+ struct list_head rule_actions_list;
+};
+
+void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *new_ste,
+ struct mlx5dr_ste *ste);
+
+struct mlx5dr_icm_chunk {
+ struct mlx5dr_icm_bucket *bucket;
+ struct list_head chunk_list;
+ u32 rkey;
+ u32 num_of_entries;
+ u32 byte_size;
+ u64 icm_addr;
+ u64 mr_addr;
+
+ /* Memory optimisation */
+ struct mlx5dr_ste *ste_arr;
+ u8 *hw_ste_arr;
+ struct list_head *miss_list;
+};
+
+static inline int
+mlx5dr_matcher_supp_flex_parser_icmp_v4(struct mlx5dr_cmd_caps *caps)
+{
+ return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED;
+}
+
+static inline int
+mlx5dr_matcher_supp_flex_parser_icmp_v6(struct mlx5dr_cmd_caps *caps)
+{
+ return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED;
+}
+
+int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_matcher_rx_tx *nic_matcher,
+ bool ipv6);
+
+static inline u32
+mlx5dr_icm_pool_chunk_size_to_entries(enum mlx5dr_icm_chunk_size chunk_size)
+{
+ return 1 << chunk_size;
+}
+
+static inline int
+mlx5dr_icm_pool_chunk_size_to_byte(enum mlx5dr_icm_chunk_size chunk_size,
+ enum mlx5dr_icm_type icm_type)
+{
+ int num_of_entries;
+ int entry_size;
+
+ if (icm_type == DR_ICM_TYPE_STE)
+ entry_size = DR_STE_SIZE;
+ else
+ entry_size = DR_MODIFY_ACTION_SIZE;
+
+ num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(chunk_size);
+
+ return entry_size * num_of_entries;
+}
+
+static inline struct mlx5dr_cmd_vport_cap *
+mlx5dr_get_vport_cap(struct mlx5dr_cmd_caps *caps, u32 vport)
+{
+ if (!caps->vports_caps ||
+ (vport >= caps->num_vports && vport != WIRE_PORT))
+ return NULL;
+
+ if (vport == WIRE_PORT)
+ vport = caps->num_vports;
+
+ return &caps->vports_caps[vport];
+}
+
+struct mlx5dr_cmd_query_flow_table_details {
+ u8 status;
+ u8 level;
+ u64 sw_owner_icm_root_1;
+ u64 sw_owner_icm_root_0;
+};
+
+/* internal API functions */
+int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
+ struct mlx5dr_cmd_caps *caps);
+int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev,
+ bool other_vport, u16 vport_number,
+ u64 *icm_address_rx,
+ u64 *icm_address_tx);
+int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev,
+ bool other_vport, u16 vport_number, u16 *gvmi);
+int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev,
+ struct mlx5dr_esw_caps *caps);
+int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev);
+int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u32 table_id,
+ u32 group_id,
+ u32 modify_header_id,
+ u32 vport_id);
+int mlx5dr_cmd_del_flow_table_entry(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u32 table_id);
+int mlx5dr_cmd_alloc_modify_header(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u8 num_of_actions,
+ u64 *actions,
+ u32 *modify_header_id);
+int mlx5dr_cmd_dealloc_modify_header(struct mlx5_core_dev *mdev,
+ u32 modify_header_id);
+int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u32 table_id,
+ u32 *group_id);
+int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u32 table_id,
+ u32 group_id);
+int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev,
+ u32 table_type,
+ u64 icm_addr_rx,
+ u64 icm_addr_tx,
+ u8 level,
+ bool sw_owner,
+ bool term_tbl,
+ u64 *fdb_rx_icm_addr,
+ u32 *table_id);
+int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev,
+ u32 table_id,
+ u32 table_type);
+int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev,
+ enum fs_flow_table_type type,
+ u32 table_id,
+ struct mlx5dr_cmd_query_flow_table_details *output);
+int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
+ enum mlx5_reformat_ctx_type rt,
+ size_t reformat_size,
+ void *reformat_data,
+ u32 *reformat_id);
+void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev,
+ u32 reformat_id);
+
+struct mlx5dr_cmd_gid_attr {
+ u8 gid[16];
+ u8 mac[6];
+ u32 roce_ver;
+};
+
+struct mlx5dr_cmd_qp_create_attr {
+ u32 page_id;
+ u32 pdn;
+ u32 cqn;
+ u32 pm_state;
+ u32 service_type;
+ u32 buff_umem_id;
+ u32 db_umem_id;
+ u32 sq_wqe_cnt;
+ u32 rq_wqe_cnt;
+ u32 rq_wqe_shift;
+};
+
+int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num,
+ u16 index, struct mlx5dr_cmd_gid_attr *attr);
+
+struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn,
+ enum mlx5dr_icm_type icm_type);
+void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool);
+
+struct mlx5dr_icm_chunk *
+mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool,
+ enum mlx5dr_icm_chunk_size chunk_size);
+void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk);
+bool mlx5dr_ste_is_not_valid_entry(u8 *p_hw_ste);
+int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn,
+ struct mlx5dr_domain_rx_tx *nic_dmn,
+ struct mlx5dr_ste_htbl *htbl,
+ struct mlx5dr_htbl_connect_info *connect_info,
+ bool update_hw_ste);
+void mlx5dr_ste_set_formatted_ste(u16 gvmi,
+ struct mlx5dr_domain_rx_tx *nic_dmn,
+ struct mlx5dr_ste_htbl *htbl,
+ u8 *formatted_ste,
+ struct mlx5dr_htbl_connect_info *connect_info);
+void mlx5dr_ste_copy_param(u8 match_criteria,
+ struct mlx5dr_match_param *set_param,
+ struct mlx5dr_match_parameters *mask);
+
+void mlx5dr_crc32_init_table(void);
+u32 mlx5dr_crc32_slice8_calc(const void *input_data, size_t length);
+
+struct mlx5dr_qp {
+ struct mlx5_core_dev *mdev;
+ struct mlx5_wq_qp wq;
+ struct mlx5_uars_page *uar;
+ struct mlx5_wq_ctrl wq_ctrl;
+ struct mlx5_core_qp mqp;
+ struct {
+ unsigned int pc;
+ unsigned int cc;
+ unsigned int size;
+ unsigned int *wqe_head;
+ unsigned int wqe_cnt;
+ } sq;
+ struct {
+ unsigned int pc;
+ unsigned int cc;
+ unsigned int size;
+ unsigned int wqe_cnt;
+ } rq;
+ int max_inline_data;
+};
+
+struct mlx5dr_cq {
+ struct mlx5_core_dev *mdev;
+ struct mlx5_cqwq wq;
+ struct mlx5_wq_ctrl wq_ctrl;
+ struct mlx5_core_cq mcq;
+ struct mlx5dr_qp *qp;
+};
+
+struct mlx5dr_mr {
+ struct mlx5_core_dev *mdev;
+ struct mlx5_core_mkey mkey;
+ dma_addr_t dma_addr;
+ void *addr;
+ size_t size;
+};
+
+#define MAX_SEND_CQE 64
+#define MIN_READ_SYNC 64
+
+struct mlx5dr_send_ring {
+ struct mlx5dr_cq *cq;
+ struct mlx5dr_qp *qp;
+ struct mlx5dr_mr *mr;
+ /* How much wqes are waiting for completion */
+ u32 pending_wqe;
+ /* Signal request per this trash hold value */
+ u16 signal_th;
+ /* Each post_send_size less than max_post_send_size */
+ u32 max_post_send_size;
+ /* manage the send queue */
+ u32 tx_head;
+ void *buf;
+ u32 buf_size;
+ struct ib_wc wc[MAX_SEND_CQE];
+ u8 sync_buff[MIN_READ_SYNC];
+ struct mlx5dr_mr *sync_mr;
+};
+
+int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn);
+void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
+ struct mlx5dr_send_ring *send_ring);
+int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn);
+int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn,
+ struct mlx5dr_ste *ste,
+ u8 *data,
+ u16 size,
+ u16 offset);
+int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
+ struct mlx5dr_ste_htbl *htbl,
+ u8 *formatted_ste, u8 *mask);
+int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
+ struct mlx5dr_ste_htbl *htbl,
+ u8 *ste_init_data,
+ bool update_hw_ste);
+int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
+ struct mlx5dr_action *action);
+
+struct mlx5dr_fw_recalc_cs_ft {
+ u64 rx_icm_addr;
+ u32 table_id;
+ u32 group_id;
+ u32 modify_hdr_id;
+};
+
+struct mlx5dr_fw_recalc_cs_ft *
+mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u32 vport_num);
+void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn,
+ struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft);
+int mlx5dr_domain_cache_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn,
+ u32 vport_num,
+ u64 *rx_icm_addr);
+#endif /* _DR_TYPES_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
new file mode 100644
index 000000000000..3d587d0bdbbe
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -0,0 +1,600 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include "mlx5_core.h"
+#include "fs_core.h"
+#include "fs_cmd.h"
+#include "mlx5dr.h"
+#include "fs_dr.h"
+
+static bool mlx5_dr_is_fw_table(u32 flags)
+{
+ if (flags & MLX5_FLOW_TABLE_TERMINATION)
+ return true;
+
+ return false;
+}
+
+static int mlx5_cmd_dr_update_root_ft(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ u32 underlay_qpn,
+ bool disconnect)
+{
+ return mlx5_fs_cmd_get_fw_cmds()->update_root_ft(ns, ft, underlay_qpn,
+ disconnect);
+}
+
+static int set_miss_action(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft)
+{
+ struct mlx5dr_action *old_miss_action;
+ struct mlx5dr_action *action = NULL;
+ struct mlx5dr_table *next_tbl;
+ int err;
+
+ next_tbl = next_ft ? next_ft->fs_dr_table.dr_table : NULL;
+ if (next_tbl) {
+ action = mlx5dr_action_create_dest_table(next_tbl);
+ if (!action)
+ return -EINVAL;
+ }
+ old_miss_action = ft->fs_dr_table.miss_action;
+ err = mlx5dr_table_set_miss_action(ft->fs_dr_table.dr_table, action);
+ if (err && action) {
+ err = mlx5dr_action_destroy(action);
+ if (err) {
+ action = NULL;
+ mlx5_core_err(ns->dev, "Failed to destroy action (%d)\n",
+ err);
+ }
+ }
+ ft->fs_dr_table.miss_action = action;
+ if (old_miss_action) {
+ err = mlx5dr_action_destroy(old_miss_action);
+ if (err)
+ mlx5_core_err(ns->dev, "Failed to destroy action (%d)\n",
+ err);
+ }
+
+ return err;
+}
+
+static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ unsigned int log_size,
+ struct mlx5_flow_table *next_ft)
+{
+ struct mlx5dr_table *tbl;
+ int err;
+
+ if (mlx5_dr_is_fw_table(ft->flags))
+ return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft,
+ log_size,
+ next_ft);
+
+ tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain,
+ ft->level);
+ if (!tbl) {
+ mlx5_core_err(ns->dev, "Failed creating dr flow_table\n");
+ return -EINVAL;
+ }
+
+ ft->fs_dr_table.dr_table = tbl;
+ ft->id = mlx5dr_table_get_id(tbl);
+
+ if (next_ft) {
+ err = set_miss_action(ns, ft, next_ft);
+ if (err) {
+ mlx5dr_table_destroy(tbl);
+ ft->fs_dr_table.dr_table = NULL;
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int mlx5_cmd_dr_destroy_flow_table(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft)
+{
+ struct mlx5dr_action *action = ft->fs_dr_table.miss_action;
+ int err;
+
+ if (mlx5_dr_is_fw_table(ft->flags))
+ return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_table(ns, ft);
+
+ err = mlx5dr_table_destroy(ft->fs_dr_table.dr_table);
+ if (err) {
+ mlx5_core_err(ns->dev, "Failed to destroy flow_table (%d)\n",
+ err);
+ return err;
+ }
+ if (action) {
+ err = mlx5dr_action_destroy(action);
+ if (err) {
+ mlx5_core_err(ns->dev, "Failed to destroy action(%d)\n",
+ err);
+ return err;
+ }
+ }
+
+ return err;
+}
+
+static int mlx5_cmd_dr_modify_flow_table(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft)
+{
+ return set_miss_action(ns, ft, next_ft);
+}
+
+static int mlx5_cmd_dr_create_flow_group(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ u32 *in,
+ struct mlx5_flow_group *fg)
+{
+ struct mlx5dr_matcher *matcher;
+ u16 priority = MLX5_GET(create_flow_group_in, in,
+ start_flow_index);
+ u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
+ in,
+ match_criteria_enable);
+ struct mlx5dr_match_parameters mask;
+
+ if (mlx5_dr_is_fw_table(ft->flags))
+ return mlx5_fs_cmd_get_fw_cmds()->create_flow_group(ns, ft, in,
+ fg);
+
+ mask.match_buf = MLX5_ADDR_OF(create_flow_group_in,
+ in, match_criteria);
+ mask.match_sz = sizeof(fg->mask.match_criteria);
+
+ matcher = mlx5dr_matcher_create(ft->fs_dr_table.dr_table,
+ priority,
+ match_criteria_enable,
+ &mask);
+ if (!matcher) {
+ mlx5_core_err(ns->dev, "Failed creating matcher\n");
+ return -EINVAL;
+ }
+
+ fg->fs_dr_matcher.dr_matcher = matcher;
+ return 0;
+}
+
+static int mlx5_cmd_dr_destroy_flow_group(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg)
+{
+ if (mlx5_dr_is_fw_table(ft->flags))
+ return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_group(ns, ft, fg);
+
+ return mlx5dr_matcher_destroy(fg->fs_dr_matcher.dr_matcher);
+}
+
+static struct mlx5dr_action *create_vport_action(struct mlx5dr_domain *domain,
+ struct mlx5_flow_rule *dst)
+{
+ struct mlx5_flow_destination *dest_attr = &dst->dest_attr;
+
+ return mlx5dr_action_create_dest_vport(domain, dest_attr->vport.num,
+ dest_attr->vport.flags &
+ MLX5_FLOW_DEST_VPORT_VHCA_ID,
+ dest_attr->vport.vhca_id);
+}
+
+static struct mlx5dr_action *create_ft_action(struct mlx5_core_dev *dev,
+ struct mlx5_flow_rule *dst)
+{
+ struct mlx5_flow_table *dest_ft = dst->dest_attr.ft;
+
+ if (mlx5_dr_is_fw_table(dest_ft->flags))
+ return mlx5dr_create_action_dest_flow_fw_table(dest_ft, dev);
+ return mlx5dr_action_create_dest_table(dest_ft->fs_dr_table.dr_table);
+}
+
+static struct mlx5dr_action *create_action_push_vlan(struct mlx5dr_domain *domain,
+ struct mlx5_fs_vlan *vlan)
+{
+ u16 n_ethtype = vlan->ethtype;
+ u8 prio = vlan->prio;
+ u16 vid = vlan->vid;
+ u32 vlan_hdr;
+
+ vlan_hdr = (u32)n_ethtype << 16 | (u32)(prio) << 12 | (u32)vid;
+ return mlx5dr_action_create_push_vlan(domain, htonl(vlan_hdr));
+}
+
+#define MLX5_FLOW_CONTEXT_ACTION_MAX 20
+static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *group,
+ struct fs_fte *fte)
+{
+ struct mlx5dr_domain *domain = ns->fs_dr_domain.dr_domain;
+ struct mlx5dr_action *term_action = NULL;
+ struct mlx5dr_match_parameters params;
+ struct mlx5_core_dev *dev = ns->dev;
+ struct mlx5dr_action **fs_dr_actions;
+ struct mlx5dr_action *tmp_action;
+ struct mlx5dr_action **actions;
+ bool delay_encap_set = false;
+ struct mlx5dr_rule *rule;
+ struct mlx5_flow_rule *dst;
+ int fs_dr_num_actions = 0;
+ int num_actions = 0;
+ size_t match_sz;
+ int err = 0;
+ int i;
+
+ if (mlx5_dr_is_fw_table(ft->flags))
+ return mlx5_fs_cmd_get_fw_cmds()->create_fte(ns, ft, group, fte);
+
+ actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, sizeof(*actions),
+ GFP_KERNEL);
+ if (!actions)
+ return -ENOMEM;
+
+ fs_dr_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX,
+ sizeof(*fs_dr_actions), GFP_KERNEL);
+ if (!fs_dr_actions) {
+ kfree(actions);
+ return -ENOMEM;
+ }
+
+ match_sz = sizeof(fte->val);
+
+ /* The order of the actions are must to be keep, only the following
+ * order is supported by SW steering:
+ * TX: push vlan -> modify header -> encap
+ * RX: decap -> pop vlan -> modify header
+ */
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
+ tmp_action = create_action_push_vlan(domain, &fte->action.vlan[0]);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
+ tmp_action = create_action_push_vlan(domain, &fte->action.vlan[1]);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
+ enum mlx5dr_action_reformat_type decap_type =
+ DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2;
+
+ tmp_action = mlx5dr_action_create_packet_reformat(domain,
+ decap_type, 0,
+ NULL);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
+ bool is_decap = fte->action.pkt_reformat->reformat_type ==
+ MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
+
+ if (is_decap)
+ actions[num_actions++] =
+ fte->action.pkt_reformat->action.dr_action;
+ else
+ delay_encap_set = true;
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
+ tmp_action =
+ mlx5dr_action_create_pop_vlan();
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2) {
+ tmp_action =
+ mlx5dr_action_create_pop_vlan();
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ actions[num_actions++] =
+ fte->action.modify_hdr->action.dr_action;
+
+ if (delay_encap_set)
+ actions[num_actions++] =
+ fte->action.pkt_reformat->action.dr_action;
+
+ /* The order of the actions below is not important */
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+ tmp_action = mlx5dr_action_create_drop();
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ term_action = tmp_action;
+ }
+
+ if (fte->flow_context.flow_tag) {
+ tmp_action =
+ mlx5dr_action_create_tag(fte->flow_context.flow_tag);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ list_for_each_entry(dst, &fte->node.children, node.list) {
+ enum mlx5_flow_destination_type type = dst->dest_attr.type;
+ u32 id;
+
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -ENOSPC;
+ goto free_actions;
+ }
+
+ switch (type) {
+ case MLX5_FLOW_DESTINATION_TYPE_COUNTER:
+ id = dst->dest_attr.counter_id;
+
+ tmp_action =
+ mlx5dr_action_create_flow_counter(id);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
+ tmp_action = create_ft_action(dev, dst);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ term_action = tmp_action;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_VPORT:
+ tmp_action = create_vport_action(domain, dst);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ term_action = tmp_action;
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+ }
+ }
+
+ params.match_sz = match_sz;
+ params.match_buf = (u64 *)fte->val;
+
+ if (term_action)
+ actions[num_actions++] = term_action;
+
+ rule = mlx5dr_rule_create(group->fs_dr_matcher.dr_matcher,
+ &params,
+ num_actions,
+ actions);
+ if (!rule) {
+ err = -EINVAL;
+ goto free_actions;
+ }
+
+ kfree(actions);
+ fte->fs_dr_rule.dr_rule = rule;
+ fte->fs_dr_rule.num_actions = fs_dr_num_actions;
+ fte->fs_dr_rule.dr_actions = fs_dr_actions;
+
+ return 0;
+
+free_actions:
+ for (i = 0; i < fs_dr_num_actions; i++)
+ if (!IS_ERR_OR_NULL(fs_dr_actions[i]))
+ mlx5dr_action_destroy(fs_dr_actions[i]);
+
+ mlx5_core_err(dev, "Failed to create dr rule err(%d)\n", err);
+ kfree(actions);
+ kfree(fs_dr_actions);
+ return err;
+}
+
+static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
+ int reformat_type,
+ size_t size,
+ void *reformat_data,
+ enum mlx5_flow_namespace_type namespace,
+ struct mlx5_pkt_reformat *pkt_reformat)
+{
+ struct mlx5dr_domain *dr_domain = ns->fs_dr_domain.dr_domain;
+ struct mlx5dr_action *action;
+ int dr_reformat;
+
+ switch (reformat_type) {
+ case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
+ case MLX5_REFORMAT_TYPE_L2_TO_NVGRE:
+ case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+ dr_reformat = DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2;
+ break;
+ case MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+ dr_reformat = DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2;
+ break;
+ case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+ dr_reformat = DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3;
+ break;
+ default:
+ mlx5_core_err(ns->dev, "Packet-reformat not supported(%d)\n",
+ reformat_type);
+ return -EOPNOTSUPP;
+ }
+
+ action = mlx5dr_action_create_packet_reformat(dr_domain,
+ dr_reformat,
+ size,
+ reformat_data);
+ if (!action) {
+ mlx5_core_err(ns->dev, "Failed allocating packet-reformat action\n");
+ return -EINVAL;
+ }
+
+ pkt_reformat->action.dr_action = action;
+
+ return 0;
+}
+
+static void mlx5_cmd_dr_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_pkt_reformat *pkt_reformat)
+{
+ mlx5dr_action_destroy(pkt_reformat->action.dr_action);
+}
+
+static int mlx5_cmd_dr_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
+ u8 namespace, u8 num_actions,
+ void *modify_actions,
+ struct mlx5_modify_hdr *modify_hdr)
+{
+ struct mlx5dr_domain *dr_domain = ns->fs_dr_domain.dr_domain;
+ struct mlx5dr_action *action;
+ size_t actions_sz;
+
+ actions_sz = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) *
+ num_actions;
+ action = mlx5dr_action_create_modify_header(dr_domain, 0,
+ actions_sz,
+ modify_actions);
+ if (!action) {
+ mlx5_core_err(ns->dev, "Failed allocating modify-header action\n");
+ return -EINVAL;
+ }
+
+ modify_hdr->action.dr_action = action;
+
+ return 0;
+}
+
+static void mlx5_cmd_dr_modify_header_dealloc(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_modify_hdr *modify_hdr)
+{
+ mlx5dr_action_destroy(modify_hdr->action.dr_action);
+}
+
+static int mlx5_cmd_dr_update_fte(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *group,
+ int modify_mask,
+ struct fs_fte *fte)
+{
+ return -EOPNOTSUPP;
+}
+
+static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct fs_fte *fte)
+{
+ struct mlx5_fs_dr_rule *rule = &fte->fs_dr_rule;
+ int err;
+ int i;
+
+ if (mlx5_dr_is_fw_table(ft->flags))
+ return mlx5_fs_cmd_get_fw_cmds()->delete_fte(ns, ft, fte);
+
+ err = mlx5dr_rule_destroy(rule->dr_rule);
+ if (err)
+ return err;
+
+ for (i = 0; i < rule->num_actions; i++)
+ if (!IS_ERR_OR_NULL(rule->dr_actions[i]))
+ mlx5dr_action_destroy(rule->dr_actions[i]);
+
+ kfree(rule->dr_actions);
+ return 0;
+}
+
+static int mlx5_cmd_dr_set_peer(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_root_namespace *peer_ns)
+{
+ struct mlx5dr_domain *peer_domain = NULL;
+
+ if (peer_ns)
+ peer_domain = peer_ns->fs_dr_domain.dr_domain;
+ mlx5dr_domain_set_peer(ns->fs_dr_domain.dr_domain,
+ peer_domain);
+ return 0;
+}
+
+static int mlx5_cmd_dr_create_ns(struct mlx5_flow_root_namespace *ns)
+{
+ ns->fs_dr_domain.dr_domain =
+ mlx5dr_domain_create(ns->dev,
+ MLX5DR_DOMAIN_TYPE_FDB);
+ if (!ns->fs_dr_domain.dr_domain) {
+ mlx5_core_err(ns->dev, "Failed to create dr flow namespace\n");
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int mlx5_cmd_dr_destroy_ns(struct mlx5_flow_root_namespace *ns)
+{
+ return mlx5dr_domain_destroy(ns->fs_dr_domain.dr_domain);
+}
+
+bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev)
+{
+ return mlx5dr_is_supported(dev);
+}
+
+static const struct mlx5_flow_cmds mlx5_flow_cmds_dr = {
+ .create_flow_table = mlx5_cmd_dr_create_flow_table,
+ .destroy_flow_table = mlx5_cmd_dr_destroy_flow_table,
+ .modify_flow_table = mlx5_cmd_dr_modify_flow_table,
+ .create_flow_group = mlx5_cmd_dr_create_flow_group,
+ .destroy_flow_group = mlx5_cmd_dr_destroy_flow_group,
+ .create_fte = mlx5_cmd_dr_create_fte,
+ .update_fte = mlx5_cmd_dr_update_fte,
+ .delete_fte = mlx5_cmd_dr_delete_fte,
+ .update_root_ft = mlx5_cmd_dr_update_root_ft,
+ .packet_reformat_alloc = mlx5_cmd_dr_packet_reformat_alloc,
+ .packet_reformat_dealloc = mlx5_cmd_dr_packet_reformat_dealloc,
+ .modify_header_alloc = mlx5_cmd_dr_modify_header_alloc,
+ .modify_header_dealloc = mlx5_cmd_dr_modify_header_dealloc,
+ .set_peer = mlx5_cmd_dr_set_peer,
+ .create_ns = mlx5_cmd_dr_create_ns,
+ .destroy_ns = mlx5_cmd_dr_destroy_ns,
+};
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void)
+{
+ return &mlx5_flow_cmds_dr;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h
new file mode 100644
index 000000000000..1fb185d6ac7f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+ * Copyright (c) 2019 Mellanox Technologies
+ */
+
+#ifndef _MLX5_FS_DR_
+#define _MLX5_FS_DR_
+
+#include "mlx5dr.h"
+
+struct mlx5_flow_root_namespace;
+struct fs_fte;
+
+struct mlx5_fs_dr_action {
+ struct mlx5dr_action *dr_action;
+};
+
+struct mlx5_fs_dr_ns {
+ struct mlx5_dr_ns *dr_ns;
+};
+
+struct mlx5_fs_dr_rule {
+ struct mlx5dr_rule *dr_rule;
+ /* Only actions created by fs_dr */
+ struct mlx5dr_action **dr_actions;
+ int num_actions;
+};
+
+struct mlx5_fs_dr_domain {
+ struct mlx5dr_domain *dr_domain;
+};
+
+struct mlx5_fs_dr_matcher {
+ struct mlx5dr_matcher *dr_matcher;
+};
+
+struct mlx5_fs_dr_table {
+ struct mlx5dr_table *dr_table;
+ struct mlx5dr_action *miss_action;
+};
+
+#ifdef CONFIG_MLX5_SW_STEERING
+
+bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev);
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void);
+
+#else
+
+static inline const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void)
+{
+ return NULL;
+}
+
+static inline bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev)
+{
+ return false;
+}
+
+#endif /* CONFIG_MLX5_SW_STEERING */
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
new file mode 100644
index 000000000000..596c927220d9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
@@ -0,0 +1,604 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019, Mellanox Technologies */
+
+#ifndef MLX5_IFC_DR_H
+#define MLX5_IFC_DR_H
+
+enum {
+ MLX5DR_ACTION_MDFY_HW_FLD_L2_0 = 0,
+ MLX5DR_ACTION_MDFY_HW_FLD_L2_1 = 1,
+ MLX5DR_ACTION_MDFY_HW_FLD_L2_2 = 2,
+ MLX5DR_ACTION_MDFY_HW_FLD_L3_0 = 3,
+ MLX5DR_ACTION_MDFY_HW_FLD_L3_1 = 4,
+ MLX5DR_ACTION_MDFY_HW_FLD_L3_2 = 5,
+ MLX5DR_ACTION_MDFY_HW_FLD_L3_3 = 6,
+ MLX5DR_ACTION_MDFY_HW_FLD_L3_4 = 7,
+ MLX5DR_ACTION_MDFY_HW_FLD_L4_0 = 8,
+ MLX5DR_ACTION_MDFY_HW_FLD_L4_1 = 9,
+ MLX5DR_ACTION_MDFY_HW_FLD_MPLS = 10,
+ MLX5DR_ACTION_MDFY_HW_FLD_L2_TNL_0 = 11,
+ MLX5DR_ACTION_MDFY_HW_FLD_REG_0 = 12,
+ MLX5DR_ACTION_MDFY_HW_FLD_REG_1 = 13,
+ MLX5DR_ACTION_MDFY_HW_FLD_REG_2 = 14,
+ MLX5DR_ACTION_MDFY_HW_FLD_REG_3 = 15,
+ MLX5DR_ACTION_MDFY_HW_FLD_L4_2 = 16,
+ MLX5DR_ACTION_MDFY_HW_FLD_FLEX_0 = 17,
+ MLX5DR_ACTION_MDFY_HW_FLD_FLEX_1 = 18,
+ MLX5DR_ACTION_MDFY_HW_FLD_FLEX_2 = 19,
+ MLX5DR_ACTION_MDFY_HW_FLD_FLEX_3 = 20,
+ MLX5DR_ACTION_MDFY_HW_FLD_L2_TNL_1 = 21,
+ MLX5DR_ACTION_MDFY_HW_FLD_METADATA = 22,
+ MLX5DR_ACTION_MDFY_HW_FLD_RESERVED = 23,
+};
+
+enum {
+ MLX5DR_ACTION_MDFY_HW_OP_SET = 0x2,
+ MLX5DR_ACTION_MDFY_HW_OP_ADD = 0x3,
+};
+
+enum {
+ MLX5DR_ACTION_MDFY_HW_HDR_L3_NONE = 0x0,
+ MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4 = 0x1,
+ MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6 = 0x2,
+};
+
+enum {
+ MLX5DR_ACTION_MDFY_HW_HDR_L4_NONE = 0x0,
+ MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP = 0x1,
+ MLX5DR_ACTION_MDFY_HW_HDR_L4_UDP = 0x2,
+};
+
+enum {
+ MLX5DR_STE_LU_TYPE_NOP = 0x00,
+ MLX5DR_STE_LU_TYPE_SRC_GVMI_AND_QP = 0x05,
+ MLX5DR_STE_LU_TYPE_ETHL2_TUNNELING_I = 0x0a,
+ MLX5DR_STE_LU_TYPE_ETHL2_DST_O = 0x06,
+ MLX5DR_STE_LU_TYPE_ETHL2_DST_I = 0x07,
+ MLX5DR_STE_LU_TYPE_ETHL2_DST_D = 0x1b,
+ MLX5DR_STE_LU_TYPE_ETHL2_SRC_O = 0x08,
+ MLX5DR_STE_LU_TYPE_ETHL2_SRC_I = 0x09,
+ MLX5DR_STE_LU_TYPE_ETHL2_SRC_D = 0x1c,
+ MLX5DR_STE_LU_TYPE_ETHL2_SRC_DST_O = 0x36,
+ MLX5DR_STE_LU_TYPE_ETHL2_SRC_DST_I = 0x37,
+ MLX5DR_STE_LU_TYPE_ETHL2_SRC_DST_D = 0x38,
+ MLX5DR_STE_LU_TYPE_ETHL3_IPV6_DST_O = 0x0d,
+ MLX5DR_STE_LU_TYPE_ETHL3_IPV6_DST_I = 0x0e,
+ MLX5DR_STE_LU_TYPE_ETHL3_IPV6_DST_D = 0x1e,
+ MLX5DR_STE_LU_TYPE_ETHL3_IPV6_SRC_O = 0x0f,
+ MLX5DR_STE_LU_TYPE_ETHL3_IPV6_SRC_I = 0x10,
+ MLX5DR_STE_LU_TYPE_ETHL3_IPV6_SRC_D = 0x1f,
+ MLX5DR_STE_LU_TYPE_ETHL3_IPV4_5_TUPLE_O = 0x11,
+ MLX5DR_STE_LU_TYPE_ETHL3_IPV4_5_TUPLE_I = 0x12,
+ MLX5DR_STE_LU_TYPE_ETHL3_IPV4_5_TUPLE_D = 0x20,
+ MLX5DR_STE_LU_TYPE_ETHL3_IPV4_MISC_O = 0x29,
+ MLX5DR_STE_LU_TYPE_ETHL3_IPV4_MISC_I = 0x2a,
+ MLX5DR_STE_LU_TYPE_ETHL3_IPV4_MISC_D = 0x2b,
+ MLX5DR_STE_LU_TYPE_ETHL4_O = 0x13,
+ MLX5DR_STE_LU_TYPE_ETHL4_I = 0x14,
+ MLX5DR_STE_LU_TYPE_ETHL4_D = 0x21,
+ MLX5DR_STE_LU_TYPE_ETHL4_MISC_O = 0x2c,
+ MLX5DR_STE_LU_TYPE_ETHL4_MISC_I = 0x2d,
+ MLX5DR_STE_LU_TYPE_ETHL4_MISC_D = 0x2e,
+ MLX5DR_STE_LU_TYPE_MPLS_FIRST_O = 0x15,
+ MLX5DR_STE_LU_TYPE_MPLS_FIRST_I = 0x24,
+ MLX5DR_STE_LU_TYPE_MPLS_FIRST_D = 0x25,
+ MLX5DR_STE_LU_TYPE_GRE = 0x16,
+ MLX5DR_STE_LU_TYPE_FLEX_PARSER_0 = 0x22,
+ MLX5DR_STE_LU_TYPE_FLEX_PARSER_1 = 0x23,
+ MLX5DR_STE_LU_TYPE_FLEX_PARSER_TNL_HEADER = 0x19,
+ MLX5DR_STE_LU_TYPE_GENERAL_PURPOSE = 0x18,
+ MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_0 = 0x2f,
+ MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_1 = 0x30,
+ MLX5DR_STE_LU_TYPE_DONT_CARE = 0x0f,
+};
+
+enum mlx5dr_ste_entry_type {
+ MLX5DR_STE_TYPE_TX = 1,
+ MLX5DR_STE_TYPE_RX = 2,
+ MLX5DR_STE_TYPE_MODIFY_PKT = 6,
+};
+
+struct mlx5_ifc_ste_general_bits {
+ u8 entry_type[0x4];
+ u8 reserved_at_4[0x4];
+ u8 entry_sub_type[0x8];
+ u8 byte_mask[0x10];
+
+ u8 next_table_base_63_48[0x10];
+ u8 next_lu_type[0x8];
+ u8 next_table_base_39_32_size[0x8];
+
+ u8 next_table_base_31_5_size[0x1b];
+ u8 linear_hash_enable[0x1];
+ u8 reserved_at_5c[0x2];
+ u8 next_table_rank[0x2];
+
+ u8 reserved_at_60[0xa0];
+ u8 tag_value[0x60];
+ u8 bit_mask[0x60];
+};
+
+struct mlx5_ifc_ste_sx_transmit_bits {
+ u8 entry_type[0x4];
+ u8 reserved_at_4[0x4];
+ u8 entry_sub_type[0x8];
+ u8 byte_mask[0x10];
+
+ u8 next_table_base_63_48[0x10];
+ u8 next_lu_type[0x8];
+ u8 next_table_base_39_32_size[0x8];
+
+ u8 next_table_base_31_5_size[0x1b];
+ u8 linear_hash_enable[0x1];
+ u8 reserved_at_5c[0x2];
+ u8 next_table_rank[0x2];
+
+ u8 sx_wire[0x1];
+ u8 sx_func_lb[0x1];
+ u8 sx_sniffer[0x1];
+ u8 sx_wire_enable[0x1];
+ u8 sx_func_lb_enable[0x1];
+ u8 sx_sniffer_enable[0x1];
+ u8 action_type[0x3];
+ u8 reserved_at_69[0x1];
+ u8 action_description[0x6];
+ u8 gvmi[0x10];
+
+ u8 encap_pointer_vlan_data[0x20];
+
+ u8 loopback_syndome_en[0x8];
+ u8 loopback_syndome[0x8];
+ u8 counter_trigger[0x10];
+
+ u8 miss_address_63_48[0x10];
+ u8 counter_trigger_23_16[0x8];
+ u8 miss_address_39_32[0x8];
+
+ u8 miss_address_31_6[0x1a];
+ u8 learning_point[0x1];
+ u8 go_back[0x1];
+ u8 match_polarity[0x1];
+ u8 mask_mode[0x1];
+ u8 miss_rank[0x2];
+};
+
+struct mlx5_ifc_ste_rx_steering_mult_bits {
+ u8 entry_type[0x4];
+ u8 reserved_at_4[0x4];
+ u8 entry_sub_type[0x8];
+ u8 byte_mask[0x10];
+
+ u8 next_table_base_63_48[0x10];
+ u8 next_lu_type[0x8];
+ u8 next_table_base_39_32_size[0x8];
+
+ u8 next_table_base_31_5_size[0x1b];
+ u8 linear_hash_enable[0x1];
+ u8 reserved_at_[0x2];
+ u8 next_table_rank[0x2];
+
+ u8 member_count[0x10];
+ u8 gvmi[0x10];
+
+ u8 qp_list_pointer[0x20];
+
+ u8 reserved_at_a0[0x1];
+ u8 tunneling_action[0x3];
+ u8 action_description[0x4];
+ u8 reserved_at_a8[0x8];
+ u8 counter_trigger_15_0[0x10];
+
+ u8 miss_address_63_48[0x10];
+ u8 counter_trigger_23_16[0x08];
+ u8 miss_address_39_32[0x8];
+
+ u8 miss_address_31_6[0x1a];
+ u8 learning_point[0x1];
+ u8 fail_on_error[0x1];
+ u8 match_polarity[0x1];
+ u8 mask_mode[0x1];
+ u8 miss_rank[0x2];
+};
+
+struct mlx5_ifc_ste_modify_packet_bits {
+ u8 entry_type[0x4];
+ u8 reserved_at_4[0x4];
+ u8 entry_sub_type[0x8];
+ u8 byte_mask[0x10];
+
+ u8 next_table_base_63_48[0x10];
+ u8 next_lu_type[0x8];
+ u8 next_table_base_39_32_size[0x8];
+
+ u8 next_table_base_31_5_size[0x1b];
+ u8 linear_hash_enable[0x1];
+ u8 reserved_at_[0x2];
+ u8 next_table_rank[0x2];
+
+ u8 number_of_re_write_actions[0x10];
+ u8 gvmi[0x10];
+
+ u8 header_re_write_actions_pointer[0x20];
+
+ u8 reserved_at_a0[0x1];
+ u8 tunneling_action[0x3];
+ u8 action_description[0x4];
+ u8 reserved_at_a8[0x8];
+ u8 counter_trigger_15_0[0x10];
+
+ u8 miss_address_63_48[0x10];
+ u8 counter_trigger_23_16[0x08];
+ u8 miss_address_39_32[0x8];
+
+ u8 miss_address_31_6[0x1a];
+ u8 learning_point[0x1];
+ u8 fail_on_error[0x1];
+ u8 match_polarity[0x1];
+ u8 mask_mode[0x1];
+ u8 miss_rank[0x2];
+};
+
+struct mlx5_ifc_ste_eth_l2_src_bits {
+ u8 smac_47_16[0x20];
+
+ u8 smac_15_0[0x10];
+ u8 l3_ethertype[0x10];
+
+ u8 qp_type[0x2];
+ u8 ethertype_filter[0x1];
+ u8 reserved_at_43[0x1];
+ u8 sx_sniffer[0x1];
+ u8 force_lb[0x1];
+ u8 functional_lb[0x1];
+ u8 port[0x1];
+ u8 reserved_at_48[0x4];
+ u8 first_priority[0x3];
+ u8 first_cfi[0x1];
+ u8 first_vlan_qualifier[0x2];
+ u8 reserved_at_52[0x2];
+ u8 first_vlan_id[0xc];
+
+ u8 ip_fragmented[0x1];
+ u8 tcp_syn[0x1];
+ u8 encp_type[0x2];
+ u8 l3_type[0x2];
+ u8 l4_type[0x2];
+ u8 reserved_at_68[0x4];
+ u8 second_priority[0x3];
+ u8 second_cfi[0x1];
+ u8 second_vlan_qualifier[0x2];
+ u8 reserved_at_72[0x2];
+ u8 second_vlan_id[0xc];
+};
+
+struct mlx5_ifc_ste_eth_l2_dst_bits {
+ u8 dmac_47_16[0x20];
+
+ u8 dmac_15_0[0x10];
+ u8 l3_ethertype[0x10];
+
+ u8 qp_type[0x2];
+ u8 ethertype_filter[0x1];
+ u8 reserved_at_43[0x1];
+ u8 sx_sniffer[0x1];
+ u8 force_lb[0x1];
+ u8 functional_lb[0x1];
+ u8 port[0x1];
+ u8 reserved_at_48[0x4];
+ u8 first_priority[0x3];
+ u8 first_cfi[0x1];
+ u8 first_vlan_qualifier[0x2];
+ u8 reserved_at_52[0x2];
+ u8 first_vlan_id[0xc];
+
+ u8 ip_fragmented[0x1];
+ u8 tcp_syn[0x1];
+ u8 encp_type[0x2];
+ u8 l3_type[0x2];
+ u8 l4_type[0x2];
+ u8 reserved_at_68[0x4];
+ u8 second_priority[0x3];
+ u8 second_cfi[0x1];
+ u8 second_vlan_qualifier[0x2];
+ u8 reserved_at_72[0x2];
+ u8 second_vlan_id[0xc];
+};
+
+struct mlx5_ifc_ste_eth_l2_src_dst_bits {
+ u8 dmac_47_16[0x20];
+
+ u8 dmac_15_0[0x10];
+ u8 smac_47_32[0x10];
+
+ u8 smac_31_0[0x20];
+
+ u8 sx_sniffer[0x1];
+ u8 force_lb[0x1];
+ u8 functional_lb[0x1];
+ u8 port[0x1];
+ u8 l3_type[0x2];
+ u8 reserved_at_66[0x6];
+ u8 first_priority[0x3];
+ u8 first_cfi[0x1];
+ u8 first_vlan_qualifier[0x2];
+ u8 reserved_at_72[0x2];
+ u8 first_vlan_id[0xc];
+};
+
+struct mlx5_ifc_ste_eth_l3_ipv4_5_tuple_bits {
+ u8 destination_address[0x20];
+
+ u8 source_address[0x20];
+
+ u8 source_port[0x10];
+ u8 destination_port[0x10];
+
+ u8 fragmented[0x1];
+ u8 first_fragment[0x1];
+ u8 reserved_at_62[0x2];
+ u8 reserved_at_64[0x1];
+ u8 ecn[0x2];
+ u8 tcp_ns[0x1];
+ u8 tcp_cwr[0x1];
+ u8 tcp_ece[0x1];
+ u8 tcp_urg[0x1];
+ u8 tcp_ack[0x1];
+ u8 tcp_psh[0x1];
+ u8 tcp_rst[0x1];
+ u8 tcp_syn[0x1];
+ u8 tcp_fin[0x1];
+ u8 dscp[0x6];
+ u8 reserved_at_76[0x2];
+ u8 protocol[0x8];
+};
+
+struct mlx5_ifc_ste_eth_l3_ipv6_dst_bits {
+ u8 dst_ip_127_96[0x20];
+
+ u8 dst_ip_95_64[0x20];
+
+ u8 dst_ip_63_32[0x20];
+
+ u8 dst_ip_31_0[0x20];
+};
+
+struct mlx5_ifc_ste_eth_l2_tnl_bits {
+ u8 dmac_47_16[0x20];
+
+ u8 dmac_15_0[0x10];
+ u8 l3_ethertype[0x10];
+
+ u8 l2_tunneling_network_id[0x20];
+
+ u8 ip_fragmented[0x1];
+ u8 tcp_syn[0x1];
+ u8 encp_type[0x2];
+ u8 l3_type[0x2];
+ u8 l4_type[0x2];
+ u8 first_priority[0x3];
+ u8 first_cfi[0x1];
+ u8 reserved_at_6c[0x3];
+ u8 gre_key_flag[0x1];
+ u8 first_vlan_qualifier[0x2];
+ u8 reserved_at_72[0x2];
+ u8 first_vlan_id[0xc];
+};
+
+struct mlx5_ifc_ste_eth_l3_ipv6_src_bits {
+ u8 src_ip_127_96[0x20];
+
+ u8 src_ip_95_64[0x20];
+
+ u8 src_ip_63_32[0x20];
+
+ u8 src_ip_31_0[0x20];
+};
+
+struct mlx5_ifc_ste_eth_l3_ipv4_misc_bits {
+ u8 version[0x4];
+ u8 ihl[0x4];
+ u8 reserved_at_8[0x8];
+ u8 total_length[0x10];
+
+ u8 identification[0x10];
+ u8 flags[0x3];
+ u8 fragment_offset[0xd];
+
+ u8 time_to_live[0x8];
+ u8 reserved_at_48[0x8];
+ u8 checksum[0x10];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_ste_eth_l4_bits {
+ u8 fragmented[0x1];
+ u8 first_fragment[0x1];
+ u8 reserved_at_2[0x6];
+ u8 protocol[0x8];
+ u8 dst_port[0x10];
+
+ u8 ipv6_version[0x4];
+ u8 reserved_at_24[0x1];
+ u8 ecn[0x2];
+ u8 tcp_ns[0x1];
+ u8 tcp_cwr[0x1];
+ u8 tcp_ece[0x1];
+ u8 tcp_urg[0x1];
+ u8 tcp_ack[0x1];
+ u8 tcp_psh[0x1];
+ u8 tcp_rst[0x1];
+ u8 tcp_syn[0x1];
+ u8 tcp_fin[0x1];
+ u8 src_port[0x10];
+
+ u8 ipv6_payload_length[0x10];
+ u8 ipv6_hop_limit[0x8];
+ u8 dscp[0x6];
+ u8 reserved_at_5e[0x2];
+
+ u8 tcp_data_offset[0x4];
+ u8 reserved_at_64[0x8];
+ u8 flow_label[0x14];
+};
+
+struct mlx5_ifc_ste_eth_l4_misc_bits {
+ u8 checksum[0x10];
+ u8 length[0x10];
+
+ u8 seq_num[0x20];
+
+ u8 ack_num[0x20];
+
+ u8 urgent_pointer[0x10];
+ u8 window_size[0x10];
+};
+
+struct mlx5_ifc_ste_mpls_bits {
+ u8 mpls0_label[0x14];
+ u8 mpls0_exp[0x3];
+ u8 mpls0_s_bos[0x1];
+ u8 mpls0_ttl[0x8];
+
+ u8 mpls1_label[0x20];
+
+ u8 mpls2_label[0x20];
+
+ u8 reserved_at_60[0x16];
+ u8 mpls4_s_bit[0x1];
+ u8 mpls4_qualifier[0x1];
+ u8 mpls3_s_bit[0x1];
+ u8 mpls3_qualifier[0x1];
+ u8 mpls2_s_bit[0x1];
+ u8 mpls2_qualifier[0x1];
+ u8 mpls1_s_bit[0x1];
+ u8 mpls1_qualifier[0x1];
+ u8 mpls0_s_bit[0x1];
+ u8 mpls0_qualifier[0x1];
+};
+
+struct mlx5_ifc_ste_register_0_bits {
+ u8 register_0_h[0x20];
+
+ u8 register_0_l[0x20];
+
+ u8 register_1_h[0x20];
+
+ u8 register_1_l[0x20];
+};
+
+struct mlx5_ifc_ste_register_1_bits {
+ u8 register_2_h[0x20];
+
+ u8 register_2_l[0x20];
+
+ u8 register_3_h[0x20];
+
+ u8 register_3_l[0x20];
+};
+
+struct mlx5_ifc_ste_gre_bits {
+ u8 gre_c_present[0x1];
+ u8 reserved_at_30[0x1];
+ u8 gre_k_present[0x1];
+ u8 gre_s_present[0x1];
+ u8 strict_src_route[0x1];
+ u8 recur[0x3];
+ u8 flags[0x5];
+ u8 version[0x3];
+ u8 gre_protocol[0x10];
+
+ u8 checksum[0x10];
+ u8 offset[0x10];
+
+ u8 gre_key_h[0x18];
+ u8 gre_key_l[0x8];
+
+ u8 seq_num[0x20];
+};
+
+struct mlx5_ifc_ste_flex_parser_0_bits {
+ u8 parser_3_label[0x14];
+ u8 parser_3_exp[0x3];
+ u8 parser_3_s_bos[0x1];
+ u8 parser_3_ttl[0x8];
+
+ u8 flex_parser_2[0x20];
+
+ u8 flex_parser_1[0x20];
+
+ u8 flex_parser_0[0x20];
+};
+
+struct mlx5_ifc_ste_flex_parser_1_bits {
+ u8 flex_parser_7[0x20];
+
+ u8 flex_parser_6[0x20];
+
+ u8 flex_parser_5[0x20];
+
+ u8 flex_parser_4[0x20];
+};
+
+struct mlx5_ifc_ste_flex_parser_tnl_bits {
+ u8 flex_parser_tunneling_header_63_32[0x20];
+
+ u8 flex_parser_tunneling_header_31_0[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_ste_general_purpose_bits {
+ u8 general_purpose_lookup_field[0x20];
+
+ u8 reserved_at_20[0x20];
+
+ u8 reserved_at_40[0x20];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_ste_src_gvmi_qp_bits {
+ u8 loopback_syndrome[0x8];
+ u8 reserved_at_8[0x8];
+ u8 source_gvmi[0x10];
+
+ u8 reserved_at_20[0x5];
+ u8 force_lb[0x1];
+ u8 functional_lb[0x1];
+ u8 source_is_requestor[0x1];
+ u8 source_qp[0x18];
+
+ u8 reserved_at_40[0x20];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_l2_hdr_bits {
+ u8 dmac_47_16[0x20];
+
+ u8 dmac_15_0[0x10];
+ u8 smac_47_32[0x10];
+
+ u8 smac_31_0[0x20];
+
+ u8 ethertype[0x10];
+ u8 vlan_type[0x10];
+
+ u8 vlan[0x10];
+ u8 reserved_at_90[0x10];
+};
+
+/* Both HW set and HW add share the same HW format with different opcodes */
+struct mlx5_ifc_dr_action_hw_set_bits {
+ u8 opcode[0x8];
+ u8 destination_field_code[0x8];
+ u8 reserved_at_10[0x2];
+ u8 destination_left_shifter[0x6];
+ u8 reserved_at_18[0x3];
+ u8 destination_length[0x5];
+
+ u8 inline_data[0x20];
+};
+
+#endif /* MLX5_IFC_DR_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
new file mode 100644
index 000000000000..adda9cbfba45
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019, Mellanox Technologies */
+
+#ifndef _MLX5DR_H_
+#define _MLX5DR_H_
+
+struct mlx5dr_domain;
+struct mlx5dr_table;
+struct mlx5dr_matcher;
+struct mlx5dr_rule;
+struct mlx5dr_action;
+
+enum mlx5dr_domain_type {
+ MLX5DR_DOMAIN_TYPE_NIC_RX,
+ MLX5DR_DOMAIN_TYPE_NIC_TX,
+ MLX5DR_DOMAIN_TYPE_FDB,
+};
+
+enum mlx5dr_domain_sync_flags {
+ MLX5DR_DOMAIN_SYNC_FLAGS_SW = 1 << 0,
+ MLX5DR_DOMAIN_SYNC_FLAGS_HW = 1 << 1,
+};
+
+enum mlx5dr_action_reformat_type {
+ DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2,
+ DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2,
+ DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2,
+ DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3,
+};
+
+struct mlx5dr_match_parameters {
+ size_t match_sz;
+ u64 *match_buf; /* Device spec format */
+};
+
+#ifdef CONFIG_MLX5_SW_STEERING
+
+struct mlx5dr_domain *
+mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type);
+
+int mlx5dr_domain_destroy(struct mlx5dr_domain *domain);
+
+int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags);
+
+void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
+ struct mlx5dr_domain *peer_dmn);
+
+struct mlx5dr_table *
+mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level);
+
+int mlx5dr_table_destroy(struct mlx5dr_table *table);
+
+u32 mlx5dr_table_get_id(struct mlx5dr_table *table);
+
+struct mlx5dr_matcher *
+mlx5dr_matcher_create(struct mlx5dr_table *table,
+ u16 priority,
+ u8 match_criteria_enable,
+ struct mlx5dr_match_parameters *mask);
+
+int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher);
+
+struct mlx5dr_rule *
+mlx5dr_rule_create(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_match_parameters *value,
+ size_t num_actions,
+ struct mlx5dr_action *actions[]);
+
+int mlx5dr_rule_destroy(struct mlx5dr_rule *rule);
+
+int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
+ struct mlx5dr_action *action);
+
+struct mlx5dr_action *
+mlx5dr_action_create_dest_table(struct mlx5dr_table *table);
+
+struct mlx5dr_action *
+mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft,
+ struct mlx5_core_dev *mdev);
+
+struct mlx5dr_action *
+mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain,
+ u32 vport, u8 vhca_id_valid,
+ u16 vhca_id);
+
+struct mlx5dr_action *mlx5dr_action_create_drop(void);
+
+struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value);
+
+struct mlx5dr_action *
+mlx5dr_action_create_flow_counter(u32 counter_id);
+
+struct mlx5dr_action *
+mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn,
+ enum mlx5dr_action_reformat_type reformat_type,
+ size_t data_sz,
+ void *data);
+
+struct mlx5dr_action *
+mlx5dr_action_create_modify_header(struct mlx5dr_domain *domain,
+ u32 flags,
+ size_t actions_sz,
+ __be64 actions[]);
+
+struct mlx5dr_action *mlx5dr_action_create_pop_vlan(void);
+
+struct mlx5dr_action *
+mlx5dr_action_create_push_vlan(struct mlx5dr_domain *domain, __be32 vlan_hdr);
+
+int mlx5dr_action_destroy(struct mlx5dr_action *action);
+
+static inline bool
+mlx5dr_is_supported(struct mlx5_core_dev *dev)
+{
+ return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner);
+}
+
+#else /* CONFIG_MLX5_SW_STEERING */
+
+static inline struct mlx5dr_domain *
+mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) { return NULL; }
+
+static inline int
+mlx5dr_domain_destroy(struct mlx5dr_domain *domain) { return 0; }
+
+static inline int
+mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags) { return 0; }
+
+static inline void
+mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
+ struct mlx5dr_domain *peer_dmn) { }
+
+static inline struct mlx5dr_table *
+mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level) { return NULL; }
+
+static inline int
+mlx5dr_table_destroy(struct mlx5dr_table *table) { return 0; }
+
+static inline u32
+mlx5dr_table_get_id(struct mlx5dr_table *table) { return 0; }
+
+static inline struct mlx5dr_matcher *
+mlx5dr_matcher_create(struct mlx5dr_table *table,
+ u16 priority,
+ u8 match_criteria_enable,
+ struct mlx5dr_match_parameters *mask) { return NULL; }
+
+static inline int
+mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher) { return 0; }
+
+static inline struct mlx5dr_rule *
+mlx5dr_rule_create(struct mlx5dr_matcher *matcher,
+ struct mlx5dr_match_parameters *value,
+ size_t num_actions,
+ struct mlx5dr_action *actions[]) { return NULL; }
+
+static inline int
+mlx5dr_rule_destroy(struct mlx5dr_rule *rule) { return 0; }
+
+static inline int
+mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
+ struct mlx5dr_action *action) { return 0; }
+
+static inline struct mlx5dr_action *
+mlx5dr_action_create_dest_table(struct mlx5dr_table *table) { return NULL; }
+
+static inline struct mlx5dr_action *
+mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft,
+ struct mlx5_core_dev *mdev) { return NULL; }
+
+static inline struct mlx5dr_action *
+mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain,
+ u32 vport, u8 vhca_id_valid,
+ u16 vhca_id) { return NULL; }
+
+static inline struct mlx5dr_action *
+mlx5dr_action_create_drop(void) { return NULL; }
+
+static inline struct mlx5dr_action *
+mlx5dr_action_create_tag(u32 tag_value) { return NULL; }
+
+static inline struct mlx5dr_action *
+mlx5dr_action_create_flow_counter(u32 counter_id) { return NULL; }
+
+static inline struct mlx5dr_action *
+mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn,
+ enum mlx5dr_action_reformat_type reformat_type,
+ size_t data_sz,
+ void *data) { return NULL; }
+
+static inline struct mlx5dr_action *
+mlx5dr_action_create_modify_header(struct mlx5dr_domain *domain,
+ u32 flags,
+ size_t actions_sz,
+ __be64 actions[]) { return NULL; }
+
+static inline struct mlx5dr_action *
+mlx5dr_action_create_pop_vlan(void) { return NULL; }
+
+static inline struct mlx5dr_action *
+mlx5dr_action_create_push_vlan(struct mlx5dr_domain *domain,
+ __be32 vlan_hdr) { return NULL; }
+
+static inline int
+mlx5dr_action_destroy(struct mlx5dr_action *action) { return 0; }
+
+static inline bool
+mlx5dr_is_supported(struct mlx5_core_dev *dev) { return false; }
+
+#endif /* CONFIG_MLX5_SW_STEERING */
+
+#endif /* _MLX5DR_H_ */
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index 15a8be6bad27..a43140f7b5eb 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -2172,9 +2172,8 @@ static int lan743x_rx_napi_poll(struct napi_struct *napi, int weight)
}
count = 0;
while (count < weight) {
- int rx_process_result = -1;
+ int rx_process_result = lan743x_rx_process_packet(rx);
- rx_process_result = lan743x_rx_process_packet(rx);
if (rx_process_result == RX_PROCESS_RESULT_PACKET_RECEIVED) {
count++;
} else if (rx_process_result ==
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
index bc9850e4ec5e..0e2db6ea79e9 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
@@ -6,6 +6,7 @@
#include <linux/bug.h>
#include <linux/jiffies.h>
#include <linux/skbuff.h>
+#include <linux/timekeeping.h>
#include "../ccm.h"
#include "../nfp_app.h"
@@ -175,29 +176,151 @@ nfp_bpf_ctrl_reply_val(struct nfp_app_bpf *bpf, struct cmsg_reply_map_op *reply,
return &reply->data[bpf->cmsg_key_sz * (n + 1) + bpf->cmsg_val_sz * n];
}
+static bool nfp_bpf_ctrl_op_cache_invalidate(enum nfp_ccm_type op)
+{
+ return op == NFP_CCM_TYPE_BPF_MAP_UPDATE ||
+ op == NFP_CCM_TYPE_BPF_MAP_DELETE;
+}
+
+static bool nfp_bpf_ctrl_op_cache_capable(enum nfp_ccm_type op)
+{
+ return op == NFP_CCM_TYPE_BPF_MAP_LOOKUP ||
+ op == NFP_CCM_TYPE_BPF_MAP_GETNEXT;
+}
+
+static bool nfp_bpf_ctrl_op_cache_fill(enum nfp_ccm_type op)
+{
+ return op == NFP_CCM_TYPE_BPF_MAP_GETFIRST ||
+ op == NFP_CCM_TYPE_BPF_MAP_GETNEXT;
+}
+
+static unsigned int
+nfp_bpf_ctrl_op_cache_get(struct nfp_bpf_map *nfp_map, enum nfp_ccm_type op,
+ const u8 *key, u8 *out_key, u8 *out_value,
+ u32 *cache_gen)
+{
+ struct bpf_map *map = &nfp_map->offmap->map;
+ struct nfp_app_bpf *bpf = nfp_map->bpf;
+ unsigned int i, count, n_entries;
+ struct cmsg_reply_map_op *reply;
+
+ n_entries = nfp_bpf_ctrl_op_cache_fill(op) ? bpf->cmsg_cache_cnt : 1;
+
+ spin_lock(&nfp_map->cache_lock);
+ *cache_gen = nfp_map->cache_gen;
+ if (nfp_map->cache_blockers)
+ n_entries = 1;
+
+ if (nfp_bpf_ctrl_op_cache_invalidate(op))
+ goto exit_block;
+ if (!nfp_bpf_ctrl_op_cache_capable(op))
+ goto exit_unlock;
+
+ if (!nfp_map->cache)
+ goto exit_unlock;
+ if (nfp_map->cache_to < ktime_get_ns())
+ goto exit_invalidate;
+
+ reply = (void *)nfp_map->cache->data;
+ count = be32_to_cpu(reply->count);
+
+ for (i = 0; i < count; i++) {
+ void *cached_key;
+
+ cached_key = nfp_bpf_ctrl_reply_key(bpf, reply, i);
+ if (memcmp(cached_key, key, map->key_size))
+ continue;
+
+ if (op == NFP_CCM_TYPE_BPF_MAP_LOOKUP)
+ memcpy(out_value, nfp_bpf_ctrl_reply_val(bpf, reply, i),
+ map->value_size);
+ if (op == NFP_CCM_TYPE_BPF_MAP_GETNEXT) {
+ if (i + 1 == count)
+ break;
+
+ memcpy(out_key,
+ nfp_bpf_ctrl_reply_key(bpf, reply, i + 1),
+ map->key_size);
+ }
+
+ n_entries = 0;
+ goto exit_unlock;
+ }
+ goto exit_unlock;
+
+exit_block:
+ nfp_map->cache_blockers++;
+exit_invalidate:
+ dev_consume_skb_any(nfp_map->cache);
+ nfp_map->cache = NULL;
+exit_unlock:
+ spin_unlock(&nfp_map->cache_lock);
+ return n_entries;
+}
+
+static void
+nfp_bpf_ctrl_op_cache_put(struct nfp_bpf_map *nfp_map, enum nfp_ccm_type op,
+ struct sk_buff *skb, u32 cache_gen)
+{
+ bool blocker, filler;
+
+ blocker = nfp_bpf_ctrl_op_cache_invalidate(op);
+ filler = nfp_bpf_ctrl_op_cache_fill(op);
+ if (blocker || filler) {
+ u64 to = 0;
+
+ if (filler)
+ to = ktime_get_ns() + NFP_BPF_MAP_CACHE_TIME_NS;
+
+ spin_lock(&nfp_map->cache_lock);
+ if (blocker) {
+ nfp_map->cache_blockers--;
+ nfp_map->cache_gen++;
+ }
+ if (filler && !nfp_map->cache_blockers &&
+ nfp_map->cache_gen == cache_gen) {
+ nfp_map->cache_to = to;
+ swap(nfp_map->cache, skb);
+ }
+ spin_unlock(&nfp_map->cache_lock);
+ }
+
+ dev_consume_skb_any(skb);
+}
+
static int
nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
u8 *key, u8 *value, u64 flags, u8 *out_key, u8 *out_value)
{
struct nfp_bpf_map *nfp_map = offmap->dev_priv;
+ unsigned int n_entries, reply_entries, count;
struct nfp_app_bpf *bpf = nfp_map->bpf;
struct bpf_map *map = &offmap->map;
struct cmsg_reply_map_op *reply;
struct cmsg_req_map_op *req;
struct sk_buff *skb;
+ u32 cache_gen;
int err;
/* FW messages have no space for more than 32 bits of flags */
if (flags >> 32)
return -EOPNOTSUPP;
+ /* Handle op cache */
+ n_entries = nfp_bpf_ctrl_op_cache_get(nfp_map, op, key, out_key,
+ out_value, &cache_gen);
+ if (!n_entries)
+ return 0;
+
skb = nfp_bpf_cmsg_map_req_alloc(bpf, 1);
- if (!skb)
- return -ENOMEM;
+ if (!skb) {
+ err = -ENOMEM;
+ goto err_cache_put;
+ }
req = (void *)skb->data;
req->tid = cpu_to_be32(nfp_map->tid);
- req->count = cpu_to_be32(1);
+ req->count = cpu_to_be32(n_entries);
req->flags = cpu_to_be32(flags);
/* Copy inputs */
@@ -207,16 +330,38 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
memcpy(nfp_bpf_ctrl_req_val(bpf, req, 0), value,
map->value_size);
- skb = nfp_ccm_communicate(&bpf->ccm, skb, op,
- nfp_bpf_cmsg_map_reply_size(bpf, 1));
- if (IS_ERR(skb))
- return PTR_ERR(skb);
+ skb = nfp_ccm_communicate(&bpf->ccm, skb, op, 0);
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ goto err_cache_put;
+ }
+
+ if (skb->len < sizeof(*reply)) {
+ cmsg_warn(bpf, "cmsg drop - type 0x%02x too short %d!\n",
+ op, skb->len);
+ err = -EIO;
+ goto err_free;
+ }
reply = (void *)skb->data;
+ count = be32_to_cpu(reply->count);
err = nfp_bpf_ctrl_rc_to_errno(bpf, &reply->reply_hdr);
+ /* FW responds with message sized to hold the good entries,
+ * plus one extra entry if there was an error.
+ */
+ reply_entries = count + !!err;
+ if (n_entries > 1 && count)
+ err = 0;
if (err)
goto err_free;
+ if (skb->len != nfp_bpf_cmsg_map_reply_size(bpf, reply_entries)) {
+ cmsg_warn(bpf, "cmsg drop - type 0x%02x too short %d for %d entries!\n",
+ op, skb->len, reply_entries);
+ err = -EIO;
+ goto err_free;
+ }
+
/* Copy outputs */
if (out_key)
memcpy(out_key, nfp_bpf_ctrl_reply_key(bpf, reply, 0),
@@ -225,11 +370,13 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
memcpy(out_value, nfp_bpf_ctrl_reply_val(bpf, reply, 0),
map->value_size);
- dev_consume_skb_any(skb);
+ nfp_bpf_ctrl_op_cache_put(nfp_map, op, skb, cache_gen);
return 0;
err_free:
dev_kfree_skb_any(skb);
+err_cache_put:
+ nfp_bpf_ctrl_op_cache_put(nfp_map, op, NULL, cache_gen);
return err;
}
@@ -267,11 +414,29 @@ int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap,
key, NULL, 0, next_key, NULL);
}
+unsigned int nfp_bpf_ctrl_cmsg_min_mtu(struct nfp_app_bpf *bpf)
+{
+ return max(nfp_bpf_cmsg_map_req_size(bpf, 1),
+ nfp_bpf_cmsg_map_reply_size(bpf, 1));
+}
+
unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf)
{
- return max3((unsigned int)NFP_NET_DEFAULT_MTU,
- nfp_bpf_cmsg_map_req_size(bpf, 1),
- nfp_bpf_cmsg_map_reply_size(bpf, 1));
+ return max3(NFP_NET_DEFAULT_MTU,
+ nfp_bpf_cmsg_map_req_size(bpf, NFP_BPF_MAP_CACHE_CNT),
+ nfp_bpf_cmsg_map_reply_size(bpf, NFP_BPF_MAP_CACHE_CNT));
+}
+
+unsigned int nfp_bpf_ctrl_cmsg_cache_cnt(struct nfp_app_bpf *bpf)
+{
+ unsigned int mtu, req_max, reply_max, entry_sz;
+
+ mtu = bpf->app->ctrl->dp.mtu;
+ entry_sz = bpf->cmsg_key_sz + bpf->cmsg_val_sz;
+ req_max = (mtu - sizeof(struct cmsg_req_map_op)) / entry_sz;
+ reply_max = (mtu - sizeof(struct cmsg_reply_map_op)) / entry_sz;
+
+ return min3(req_max, reply_max, NFP_BPF_MAP_CACHE_CNT);
}
void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
index 06c4286bd79e..a83a0ad5e27d 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
@@ -24,6 +24,7 @@ enum bpf_cap_tlv_type {
NFP_BPF_CAP_TYPE_QUEUE_SELECT = 5,
NFP_BPF_CAP_TYPE_ADJUST_TAIL = 6,
NFP_BPF_CAP_TYPE_ABI_VERSION = 7,
+ NFP_BPF_CAP_TYPE_CMSG_MULTI_ENT = 8,
};
struct nfp_bpf_cap_tlv_func {
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index 1c9fb11470df..8f732771d3fa 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -300,6 +300,14 @@ nfp_bpf_parse_cap_adjust_tail(struct nfp_app_bpf *bpf, void __iomem *value,
}
static int
+nfp_bpf_parse_cap_cmsg_multi_ent(struct nfp_app_bpf *bpf, void __iomem *value,
+ u32 length)
+{
+ bpf->cmsg_multi_ent = true;
+ return 0;
+}
+
+static int
nfp_bpf_parse_cap_abi_version(struct nfp_app_bpf *bpf, void __iomem *value,
u32 length)
{
@@ -375,6 +383,11 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
length))
goto err_release_free;
break;
+ case NFP_BPF_CAP_TYPE_CMSG_MULTI_ENT:
+ if (nfp_bpf_parse_cap_cmsg_multi_ent(app->priv, value,
+ length))
+ goto err_release_free;
+ break;
default:
nfp_dbg(cpp, "unknown BPF capability: %d\n", type);
break;
@@ -415,6 +428,25 @@ static void nfp_bpf_ndo_uninit(struct nfp_app *app, struct net_device *netdev)
bpf_offload_dev_netdev_unregister(bpf->bpf_dev, netdev);
}
+static int nfp_bpf_start(struct nfp_app *app)
+{
+ struct nfp_app_bpf *bpf = app->priv;
+
+ if (app->ctrl->dp.mtu < nfp_bpf_ctrl_cmsg_min_mtu(bpf)) {
+ nfp_err(bpf->app->cpp,
+ "ctrl channel MTU below min required %u < %u\n",
+ app->ctrl->dp.mtu, nfp_bpf_ctrl_cmsg_min_mtu(bpf));
+ return -EINVAL;
+ }
+
+ if (bpf->cmsg_multi_ent)
+ bpf->cmsg_cache_cnt = nfp_bpf_ctrl_cmsg_cache_cnt(bpf);
+ else
+ bpf->cmsg_cache_cnt = 1;
+
+ return 0;
+}
+
static int nfp_bpf_init(struct nfp_app *app)
{
struct nfp_app_bpf *bpf;
@@ -488,6 +520,7 @@ const struct nfp_app_type app_bpf = {
.init = nfp_bpf_init,
.clean = nfp_bpf_clean,
+ .start = nfp_bpf_start,
.check_mtu = nfp_bpf_check_mtu,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 57d6ff51e980..fac9c6f9e197 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -99,6 +99,7 @@ enum pkt_vec {
* @maps_neutral: hash table of offload-neutral maps (on pointer)
*
* @abi_version: global BPF ABI version
+ * @cmsg_cache_cnt: number of entries to read for caching
*
* @adjust_head: adjust head capability
* @adjust_head.flags: extra flags for adjust head
@@ -124,6 +125,7 @@ enum pkt_vec {
* @pseudo_random: FW initialized the pseudo-random machinery (CSRs)
* @queue_select: BPF can set the RX queue ID in packet vector
* @adjust_tail: BPF can simply trunc packet size for adjust tail
+ * @cmsg_multi_ent: FW can pack multiple map entries in a single cmsg
*/
struct nfp_app_bpf {
struct nfp_app *app;
@@ -134,6 +136,8 @@ struct nfp_app_bpf {
unsigned int cmsg_key_sz;
unsigned int cmsg_val_sz;
+ unsigned int cmsg_cache_cnt;
+
struct list_head map_list;
unsigned int maps_in_use;
unsigned int map_elems_in_use;
@@ -169,6 +173,7 @@ struct nfp_app_bpf {
bool pseudo_random;
bool queue_select;
bool adjust_tail;
+ bool cmsg_multi_ent;
};
enum nfp_bpf_map_use {
@@ -183,11 +188,21 @@ struct nfp_bpf_map_word {
unsigned char non_zero_update :1;
};
+#define NFP_BPF_MAP_CACHE_CNT 4U
+#define NFP_BPF_MAP_CACHE_TIME_NS (250 * 1000)
+
/**
* struct nfp_bpf_map - private per-map data attached to BPF maps for offload
* @offmap: pointer to the offloaded BPF map
* @bpf: back pointer to bpf app private structure
* @tid: table id identifying map on datapath
+ *
+ * @cache_lock: protects @cache_blockers, @cache_to, @cache
+ * @cache_blockers: number of ops in flight which block caching
+ * @cache_gen: counter incremented by every blocker on exit
+ * @cache_to: time when cache will no longer be valid (ns)
+ * @cache: skb with cached response
+ *
* @l: link on the nfp_app_bpf->map_list list
* @use_map: map of how the value is used (in 4B chunks)
*/
@@ -195,6 +210,13 @@ struct nfp_bpf_map {
struct bpf_offloaded_map *offmap;
struct nfp_app_bpf *bpf;
u32 tid;
+
+ spinlock_t cache_lock;
+ u32 cache_blockers;
+ u32 cache_gen;
+ u64 cache_to;
+ struct sk_buff *cache;
+
struct list_head l;
struct nfp_bpf_map_word use_map[];
};
@@ -564,7 +586,9 @@ nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv);
+unsigned int nfp_bpf_ctrl_cmsg_min_mtu(struct nfp_app_bpf *bpf);
unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf);
+unsigned int nfp_bpf_ctrl_cmsg_cache_cnt(struct nfp_app_bpf *bpf);
long long int
nfp_bpf_ctrl_alloc_map(struct nfp_app_bpf *bpf, struct bpf_map *map);
void
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 39c9fec222b4..88fab6a82acf 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -385,6 +385,7 @@ nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
offmap->dev_priv = nfp_map;
nfp_map->offmap = offmap;
nfp_map->bpf = bpf;
+ spin_lock_init(&nfp_map->cache_lock);
res = nfp_bpf_ctrl_alloc_map(bpf, &offmap->map);
if (res < 0) {
@@ -407,6 +408,8 @@ nfp_bpf_map_free(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
struct nfp_bpf_map *nfp_map = offmap->dev_priv;
nfp_bpf_ctrl_free_map(bpf, nfp_map);
+ dev_consume_skb_any(nfp_map->cache);
+ WARN_ON_ONCE(nfp_map->cache_blockers);
list_del_init(&nfp_map->l);
bpf->map_elems_in_use -= offmap->map.max_entries;
bpf->maps_in_use--;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 5d6c3738b494..250f510b1d21 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -66,7 +66,7 @@
#define NFP_NET_MAX_DMA_BITS 40
/* Default size for MTU and freelist buffer sizes */
-#define NFP_NET_DEFAULT_MTU 1500
+#define NFP_NET_DEFAULT_MTU 1500U
/* Maximum number of bytes prepended to a packet */
#define NFP_NET_MAX_PREPEND 64
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 6f97b554f7da..61aabffc8888 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -4116,14 +4116,7 @@ int nfp_net_init(struct nfp_net *nn)
/* Set default MTU and Freelist buffer size */
if (!nfp_net_is_data_vnic(nn) && nn->app->ctrl_mtu) {
- if (nn->app->ctrl_mtu <= nn->max_mtu) {
- nn->dp.mtu = nn->app->ctrl_mtu;
- } else {
- if (nn->app->ctrl_mtu != NFP_APP_CTRL_MTU_MAX)
- nn_warn(nn, "app requested MTU above max supported %u > %u\n",
- nn->app->ctrl_mtu, nn->max_mtu);
- nn->dp.mtu = nn->max_mtu;
- }
+ nn->dp.mtu = min(nn->app->ctrl_mtu, nn->max_mtu);
} else if (nn->max_mtu < NFP_NET_DEFAULT_MTU) {
nn->dp.mtu = nn->max_mtu;
} else {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index 986464d4a206..68db47d8e8b6 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -205,10 +205,8 @@ nfp_net_pf_alloc_vnics(struct nfp_pf *pf, void __iomem *ctrl_bar,
ctrl_bar += NFP_PF_CSR_SLICE_SIZE;
/* Kill the vNIC if app init marked it as invalid */
- if (nn->port && nn->port->type == NFP_PORT_INVALID) {
+ if (nn->port && nn->port->type == NFP_PORT_INVALID)
nfp_net_pf_free_vnic(pf, nn);
- continue;
- }
}
if (list_empty(&pf->vnics))
diff --git a/drivers/net/ethernet/pensando/Kconfig b/drivers/net/ethernet/pensando/Kconfig
new file mode 100644
index 000000000000..5ea570be8379
--- /dev/null
+++ b/drivers/net/ethernet/pensando/Kconfig
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2019 Pensando Systems, Inc
+#
+# Pensando device configuration
+#
+
+config NET_VENDOR_PENSANDO
+ bool "Pensando devices"
+ default y
+ help
+ If you have a network (Ethernet) card belonging to this class, say Y.
+
+ Note that the answer to this question doesn't directly affect the
+ kernel: saying N will just cause the configurator to skip all
+ the questions about Pensando cards. If you say Y, you will be asked
+ for your specific card in the following questions.
+
+if NET_VENDOR_PENSANDO
+
+config IONIC
+ tristate "Pensando Ethernet IONIC Support"
+ depends on 64BIT && PCI
+ help
+ This enables the support for the Pensando family of Ethernet
+ adapters. More specific information on this driver can be
+ found in
+ <file:Documentation/networking/device_drivers/pensando/ionic.rst>.
+
+ To compile this driver as a module, choose M here. The module
+ will be called ionic.
+
+endif # NET_VENDOR_PENSANDO
diff --git a/drivers/net/ethernet/pensando/Makefile b/drivers/net/ethernet/pensando/Makefile
new file mode 100644
index 000000000000..21ce7499c122
--- /dev/null
+++ b/drivers/net/ethernet/pensando/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the Pensando network device drivers.
+#
+
+obj-$(CONFIG_IONIC) += ionic/
diff --git a/drivers/net/ethernet/pensando/ionic/Makefile b/drivers/net/ethernet/pensando/ionic/Makefile
new file mode 100644
index 000000000000..29f304d75261
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2017 - 2019 Pensando Systems, Inc
+
+obj-$(CONFIG_IONIC) := ionic.o
+
+ionic-y := ionic_main.o ionic_bus_pci.o ionic_devlink.o ionic_dev.o \
+ ionic_debugfs.o ionic_lif.o ionic_rx_filter.o ionic_ethtool.o \
+ ionic_txrx.o ionic_stats.o
diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h
new file mode 100644
index 000000000000..7a7060677f15
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#ifndef _IONIC_H_
+#define _IONIC_H_
+
+struct ionic_lif;
+
+#include "ionic_if.h"
+#include "ionic_dev.h"
+#include "ionic_devlink.h"
+
+#define IONIC_DRV_NAME "ionic"
+#define IONIC_DRV_DESCRIPTION "Pensando Ethernet NIC Driver"
+#define IONIC_DRV_VERSION "0.15.0-k"
+
+#define PCI_VENDOR_ID_PENSANDO 0x1dd8
+
+#define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF 0x1002
+#define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF 0x1003
+
+#define IONIC_SUBDEV_ID_NAPLES_25 0x4000
+#define IONIC_SUBDEV_ID_NAPLES_100_4 0x4001
+#define IONIC_SUBDEV_ID_NAPLES_100_8 0x4002
+
+#define DEVCMD_TIMEOUT 10
+
+struct ionic {
+ struct pci_dev *pdev;
+ struct device *dev;
+ struct devlink_port dl_port;
+ struct ionic_dev idev;
+ struct mutex dev_cmd_lock; /* lock for dev_cmd operations */
+ struct dentry *dentry;
+ struct ionic_dev_bar bars[IONIC_BARS_MAX];
+ unsigned int num_bars;
+ struct ionic_identity ident;
+ struct list_head lifs;
+ struct ionic_lif *master_lif;
+ unsigned int nnqs_per_lif;
+ unsigned int neqs_per_lif;
+ unsigned int ntxqs_per_lif;
+ unsigned int nrxqs_per_lif;
+ DECLARE_BITMAP(lifbits, IONIC_LIFS_MAX);
+ unsigned int nintrs;
+ DECLARE_BITMAP(intrs, IONIC_INTR_CTRL_REGS_MAX);
+ struct work_struct nb_work;
+ struct notifier_block nb;
+};
+
+struct ionic_admin_ctx {
+ struct completion work;
+ union ionic_adminq_cmd cmd;
+ union ionic_adminq_comp comp;
+};
+
+int ionic_napi(struct napi_struct *napi, int budget, ionic_cq_cb cb,
+ ionic_cq_done_cb done_cb, void *done_arg);
+
+int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx);
+int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_wait);
+int ionic_set_dma_mask(struct ionic *ionic);
+int ionic_setup(struct ionic *ionic);
+
+int ionic_identify(struct ionic *ionic);
+int ionic_init(struct ionic *ionic);
+int ionic_reset(struct ionic *ionic);
+
+int ionic_port_identify(struct ionic *ionic);
+int ionic_port_init(struct ionic *ionic);
+int ionic_port_reset(struct ionic *ionic);
+
+#endif /* _IONIC_H_ */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus.h b/drivers/net/ethernet/pensando/ionic/ionic_bus.h
new file mode 100644
index 000000000000..2f4d08c64910
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_bus.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#ifndef _IONIC_BUS_H_
+#define _IONIC_BUS_H_
+
+int ionic_bus_get_irq(struct ionic *ionic, unsigned int num);
+const char *ionic_bus_info(struct ionic *ionic);
+int ionic_bus_alloc_irq_vectors(struct ionic *ionic, unsigned int nintrs);
+void ionic_bus_free_irq_vectors(struct ionic *ionic);
+int ionic_bus_register_driver(void);
+void ionic_bus_unregister_driver(void);
+void __iomem *ionic_bus_map_dbpage(struct ionic *ionic, int page_num);
+void ionic_bus_unmap_dbpage(struct ionic *ionic, void __iomem *page);
+
+#endif /* _IONIC_BUS_H_ */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
new file mode 100644
index 000000000000..9a9ab8cb2cb3
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/pci.h>
+
+#include "ionic.h"
+#include "ionic_bus.h"
+#include "ionic_lif.h"
+#include "ionic_debugfs.h"
+
+/* Supported devices */
+static const struct pci_device_id ionic_id_table[] = {
+ { PCI_VDEVICE(PENSANDO, PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF) },
+ { PCI_VDEVICE(PENSANDO, PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF) },
+ { 0, } /* end of table */
+};
+MODULE_DEVICE_TABLE(pci, ionic_id_table);
+
+int ionic_bus_get_irq(struct ionic *ionic, unsigned int num)
+{
+ return pci_irq_vector(ionic->pdev, num);
+}
+
+const char *ionic_bus_info(struct ionic *ionic)
+{
+ return pci_name(ionic->pdev);
+}
+
+int ionic_bus_alloc_irq_vectors(struct ionic *ionic, unsigned int nintrs)
+{
+ return pci_alloc_irq_vectors(ionic->pdev, nintrs, nintrs,
+ PCI_IRQ_MSIX);
+}
+
+void ionic_bus_free_irq_vectors(struct ionic *ionic)
+{
+ pci_free_irq_vectors(ionic->pdev);
+}
+
+static int ionic_map_bars(struct ionic *ionic)
+{
+ struct pci_dev *pdev = ionic->pdev;
+ struct device *dev = ionic->dev;
+ struct ionic_dev_bar *bars;
+ unsigned int i, j;
+
+ bars = ionic->bars;
+ ionic->num_bars = 0;
+
+ for (i = 0, j = 0; i < IONIC_BARS_MAX; i++) {
+ if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
+ continue;
+ bars[j].len = pci_resource_len(pdev, i);
+
+ /* only map the whole bar 0 */
+ if (j > 0) {
+ bars[j].vaddr = NULL;
+ } else {
+ bars[j].vaddr = pci_iomap(pdev, i, bars[j].len);
+ if (!bars[j].vaddr) {
+ dev_err(dev,
+ "Cannot memory-map BAR %d, aborting\n",
+ i);
+ return -ENODEV;
+ }
+ }
+
+ bars[j].bus_addr = pci_resource_start(pdev, i);
+ bars[j].res_index = i;
+ ionic->num_bars++;
+ j++;
+ }
+
+ return 0;
+}
+
+static void ionic_unmap_bars(struct ionic *ionic)
+{
+ struct ionic_dev_bar *bars = ionic->bars;
+ unsigned int i;
+
+ for (i = 0; i < IONIC_BARS_MAX; i++) {
+ if (bars[i].vaddr) {
+ iounmap(bars[i].vaddr);
+ bars[i].bus_addr = 0;
+ bars[i].vaddr = NULL;
+ bars[i].len = 0;
+ }
+ }
+}
+
+void __iomem *ionic_bus_map_dbpage(struct ionic *ionic, int page_num)
+{
+ return pci_iomap_range(ionic->pdev,
+ ionic->bars[IONIC_PCI_BAR_DBELL].res_index,
+ (u64)page_num << PAGE_SHIFT, PAGE_SIZE);
+}
+
+void ionic_bus_unmap_dbpage(struct ionic *ionic, void __iomem *page)
+{
+ iounmap(page);
+}
+
+static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ struct device *dev = &pdev->dev;
+ struct ionic *ionic;
+ int err;
+
+ ionic = ionic_devlink_alloc(dev);
+ if (!ionic)
+ return -ENOMEM;
+
+ ionic->pdev = pdev;
+ ionic->dev = dev;
+ pci_set_drvdata(pdev, ionic);
+ mutex_init(&ionic->dev_cmd_lock);
+
+ /* Query system for DMA addressing limitation for the device. */
+ err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(IONIC_ADDR_LEN));
+ if (err) {
+ dev_err(dev, "Unable to obtain 64-bit DMA for consistent allocations, aborting. err=%d\n",
+ err);
+ goto err_out_clear_drvdata;
+ }
+
+ ionic_debugfs_add_dev(ionic);
+
+ /* Setup PCI device */
+ err = pci_enable_device_mem(pdev);
+ if (err) {
+ dev_err(dev, "Cannot enable PCI device: %d, aborting\n", err);
+ goto err_out_debugfs_del_dev;
+ }
+
+ err = pci_request_regions(pdev, IONIC_DRV_NAME);
+ if (err) {
+ dev_err(dev, "Cannot request PCI regions: %d, aborting\n", err);
+ goto err_out_pci_disable_device;
+ }
+
+ pci_set_master(pdev);
+
+ err = ionic_map_bars(ionic);
+ if (err)
+ goto err_out_pci_clear_master;
+
+ /* Configure the device */
+ err = ionic_setup(ionic);
+ if (err) {
+ dev_err(dev, "Cannot setup device: %d, aborting\n", err);
+ goto err_out_unmap_bars;
+ }
+
+ err = ionic_identify(ionic);
+ if (err) {
+ dev_err(dev, "Cannot identify device: %d, aborting\n", err);
+ goto err_out_teardown;
+ }
+
+ err = ionic_init(ionic);
+ if (err) {
+ dev_err(dev, "Cannot init device: %d, aborting\n", err);
+ goto err_out_teardown;
+ }
+
+ /* Configure the ports */
+ err = ionic_port_identify(ionic);
+ if (err) {
+ dev_err(dev, "Cannot identify port: %d, aborting\n", err);
+ goto err_out_reset;
+ }
+
+ err = ionic_port_init(ionic);
+ if (err) {
+ dev_err(dev, "Cannot init port: %d, aborting\n", err);
+ goto err_out_reset;
+ }
+
+ /* Configure LIFs */
+ err = ionic_lif_identify(ionic, IONIC_LIF_TYPE_CLASSIC,
+ &ionic->ident.lif);
+ if (err) {
+ dev_err(dev, "Cannot identify LIFs: %d, aborting\n", err);
+ goto err_out_port_reset;
+ }
+
+ err = ionic_lifs_size(ionic);
+ if (err) {
+ dev_err(dev, "Cannot size LIFs: %d, aborting\n", err);
+ goto err_out_port_reset;
+ }
+
+ err = ionic_lifs_alloc(ionic);
+ if (err) {
+ dev_err(dev, "Cannot allocate LIFs: %d, aborting\n", err);
+ goto err_out_free_irqs;
+ }
+
+ err = ionic_lifs_init(ionic);
+ if (err) {
+ dev_err(dev, "Cannot init LIFs: %d, aborting\n", err);
+ goto err_out_free_lifs;
+ }
+
+ err = ionic_lifs_register(ionic);
+ if (err) {
+ dev_err(dev, "Cannot register LIFs: %d, aborting\n", err);
+ goto err_out_deinit_lifs;
+ }
+
+ err = ionic_devlink_register(ionic);
+ if (err) {
+ dev_err(dev, "Cannot register devlink: %d\n", err);
+ goto err_out_deregister_lifs;
+ }
+
+ return 0;
+
+err_out_deregister_lifs:
+ ionic_lifs_unregister(ionic);
+err_out_deinit_lifs:
+ ionic_lifs_deinit(ionic);
+err_out_free_lifs:
+ ionic_lifs_free(ionic);
+err_out_free_irqs:
+ ionic_bus_free_irq_vectors(ionic);
+err_out_port_reset:
+ ionic_port_reset(ionic);
+err_out_reset:
+ ionic_reset(ionic);
+err_out_teardown:
+ ionic_dev_teardown(ionic);
+err_out_unmap_bars:
+ ionic_unmap_bars(ionic);
+ pci_release_regions(pdev);
+err_out_pci_clear_master:
+ pci_clear_master(pdev);
+err_out_pci_disable_device:
+ pci_disable_device(pdev);
+err_out_debugfs_del_dev:
+ ionic_debugfs_del_dev(ionic);
+err_out_clear_drvdata:
+ mutex_destroy(&ionic->dev_cmd_lock);
+ ionic_devlink_free(ionic);
+
+ return err;
+}
+
+static void ionic_remove(struct pci_dev *pdev)
+{
+ struct ionic *ionic = pci_get_drvdata(pdev);
+
+ if (!ionic)
+ return;
+
+ ionic_devlink_unregister(ionic);
+ ionic_lifs_unregister(ionic);
+ ionic_lifs_deinit(ionic);
+ ionic_lifs_free(ionic);
+ ionic_bus_free_irq_vectors(ionic);
+ ionic_port_reset(ionic);
+ ionic_reset(ionic);
+ ionic_dev_teardown(ionic);
+ ionic_unmap_bars(ionic);
+ pci_release_regions(pdev);
+ pci_clear_master(pdev);
+ pci_disable_device(pdev);
+ ionic_debugfs_del_dev(ionic);
+ mutex_destroy(&ionic->dev_cmd_lock);
+ ionic_devlink_free(ionic);
+}
+
+static struct pci_driver ionic_driver = {
+ .name = IONIC_DRV_NAME,
+ .id_table = ionic_id_table,
+ .probe = ionic_probe,
+ .remove = ionic_remove,
+};
+
+int ionic_bus_register_driver(void)
+{
+ return pci_register_driver(&ionic_driver);
+}
+
+void ionic_bus_unregister_driver(void)
+{
+ pci_unregister_driver(&ionic_driver);
+}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
new file mode 100644
index 000000000000..7afc4a365b75
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+
+#include "ionic.h"
+#include "ionic_bus.h"
+#include "ionic_lif.h"
+#include "ionic_debugfs.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+static struct dentry *ionic_dir;
+
+void ionic_debugfs_create(void)
+{
+ ionic_dir = debugfs_create_dir(IONIC_DRV_NAME, NULL);
+}
+
+void ionic_debugfs_destroy(void)
+{
+ debugfs_remove_recursive(ionic_dir);
+}
+
+void ionic_debugfs_add_dev(struct ionic *ionic)
+{
+ ionic->dentry = debugfs_create_dir(ionic_bus_info(ionic), ionic_dir);
+}
+
+void ionic_debugfs_del_dev(struct ionic *ionic)
+{
+ debugfs_remove_recursive(ionic->dentry);
+ ionic->dentry = NULL;
+}
+
+static int identity_show(struct seq_file *seq, void *v)
+{
+ struct ionic *ionic = seq->private;
+ struct ionic_identity *ident;
+
+ ident = &ionic->ident;
+
+ seq_printf(seq, "nlifs: %d\n", ident->dev.nlifs);
+ seq_printf(seq, "nintrs: %d\n", ident->dev.nintrs);
+ seq_printf(seq, "ndbpgs_per_lif: %d\n", ident->dev.ndbpgs_per_lif);
+ seq_printf(seq, "intr_coal_mult: %d\n", ident->dev.intr_coal_mult);
+ seq_printf(seq, "intr_coal_div: %d\n", ident->dev.intr_coal_div);
+
+ seq_printf(seq, "max_ucast_filters: %d\n", ident->lif.eth.max_ucast_filters);
+ seq_printf(seq, "max_mcast_filters: %d\n", ident->lif.eth.max_mcast_filters);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(identity);
+
+void ionic_debugfs_add_ident(struct ionic *ionic)
+{
+ debugfs_create_file("identity", 0400, ionic->dentry,
+ ionic, &identity_fops) ? 0 : -EOPNOTSUPP;
+}
+
+void ionic_debugfs_add_sizes(struct ionic *ionic)
+{
+ debugfs_create_u32("nlifs", 0400, ionic->dentry,
+ (u32 *)&ionic->ident.dev.nlifs);
+ debugfs_create_u32("nintrs", 0400, ionic->dentry, &ionic->nintrs);
+
+ debugfs_create_u32("ntxqs_per_lif", 0400, ionic->dentry,
+ (u32 *)&ionic->ident.lif.eth.config.queue_count[IONIC_QTYPE_TXQ]);
+ debugfs_create_u32("nrxqs_per_lif", 0400, ionic->dentry,
+ (u32 *)&ionic->ident.lif.eth.config.queue_count[IONIC_QTYPE_RXQ]);
+}
+
+static int q_tail_show(struct seq_file *seq, void *v)
+{
+ struct ionic_queue *q = seq->private;
+
+ seq_printf(seq, "%d\n", q->tail->index);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(q_tail);
+
+static int q_head_show(struct seq_file *seq, void *v)
+{
+ struct ionic_queue *q = seq->private;
+
+ seq_printf(seq, "%d\n", q->head->index);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(q_head);
+
+static int cq_tail_show(struct seq_file *seq, void *v)
+{
+ struct ionic_cq *cq = seq->private;
+
+ seq_printf(seq, "%d\n", cq->tail->index);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(cq_tail);
+
+static const struct debugfs_reg32 intr_ctrl_regs[] = {
+ { .name = "coal_init", .offset = 0, },
+ { .name = "mask", .offset = 4, },
+ { .name = "credits", .offset = 8, },
+ { .name = "mask_on_assert", .offset = 12, },
+ { .name = "coal_timer", .offset = 16, },
+};
+
+void ionic_debugfs_add_qcq(struct ionic_lif *lif, struct ionic_qcq *qcq)
+{
+ struct dentry *q_dentry, *cq_dentry, *intr_dentry, *stats_dentry;
+ struct ionic_dev *idev = &lif->ionic->idev;
+ struct debugfs_regset32 *intr_ctrl_regset;
+ struct ionic_intr_info *intr = &qcq->intr;
+ struct debugfs_blob_wrapper *desc_blob;
+ struct device *dev = lif->ionic->dev;
+ struct ionic_queue *q = &qcq->q;
+ struct ionic_cq *cq = &qcq->cq;
+
+ qcq->dentry = debugfs_create_dir(q->name, lif->dentry);
+
+ debugfs_create_x32("total_size", 0400, qcq->dentry, &qcq->total_size);
+ debugfs_create_x64("base_pa", 0400, qcq->dentry, &qcq->base_pa);
+
+ q_dentry = debugfs_create_dir("q", qcq->dentry);
+
+ debugfs_create_u32("index", 0400, q_dentry, &q->index);
+ debugfs_create_x64("base_pa", 0400, q_dentry, &q->base_pa);
+ if (qcq->flags & IONIC_QCQ_F_SG) {
+ debugfs_create_x64("sg_base_pa", 0400, q_dentry,
+ &q->sg_base_pa);
+ debugfs_create_u32("sg_desc_size", 0400, q_dentry,
+ &q->sg_desc_size);
+ }
+ debugfs_create_u32("num_descs", 0400, q_dentry, &q->num_descs);
+ debugfs_create_u32("desc_size", 0400, q_dentry, &q->desc_size);
+ debugfs_create_u32("pid", 0400, q_dentry, &q->pid);
+ debugfs_create_u32("qid", 0400, q_dentry, &q->hw_index);
+ debugfs_create_u32("qtype", 0400, q_dentry, &q->hw_type);
+ debugfs_create_u64("drop", 0400, q_dentry, &q->drop);
+ debugfs_create_u64("stop", 0400, q_dentry, &q->stop);
+ debugfs_create_u64("wake", 0400, q_dentry, &q->wake);
+
+ debugfs_create_file("tail", 0400, q_dentry, q, &q_tail_fops);
+ debugfs_create_file("head", 0400, q_dentry, q, &q_head_fops);
+
+ desc_blob = devm_kzalloc(dev, sizeof(*desc_blob), GFP_KERNEL);
+ if (!desc_blob)
+ return;
+ desc_blob->data = q->base;
+ desc_blob->size = (unsigned long)q->num_descs * q->desc_size;
+ debugfs_create_blob("desc_blob", 0400, q_dentry, desc_blob);
+
+ if (qcq->flags & IONIC_QCQ_F_SG) {
+ desc_blob = devm_kzalloc(dev, sizeof(*desc_blob), GFP_KERNEL);
+ if (!desc_blob)
+ return;
+ desc_blob->data = q->sg_base;
+ desc_blob->size = (unsigned long)q->num_descs * q->sg_desc_size;
+ debugfs_create_blob("sg_desc_blob", 0400, q_dentry,
+ desc_blob);
+ }
+
+ cq_dentry = debugfs_create_dir("cq", qcq->dentry);
+
+ debugfs_create_x64("base_pa", 0400, cq_dentry, &cq->base_pa);
+ debugfs_create_u32("num_descs", 0400, cq_dentry, &cq->num_descs);
+ debugfs_create_u32("desc_size", 0400, cq_dentry, &cq->desc_size);
+ debugfs_create_u8("done_color", 0400, cq_dentry,
+ (u8 *)&cq->done_color);
+
+ debugfs_create_file("tail", 0400, cq_dentry, cq, &cq_tail_fops);
+
+ desc_blob = devm_kzalloc(dev, sizeof(*desc_blob), GFP_KERNEL);
+ if (!desc_blob)
+ return;
+ desc_blob->data = cq->base;
+ desc_blob->size = (unsigned long)cq->num_descs * cq->desc_size;
+ debugfs_create_blob("desc_blob", 0400, cq_dentry, desc_blob);
+
+ if (qcq->flags & IONIC_QCQ_F_INTR) {
+ intr_dentry = debugfs_create_dir("intr", qcq->dentry);
+
+ debugfs_create_u32("index", 0400, intr_dentry,
+ &intr->index);
+ debugfs_create_u32("vector", 0400, intr_dentry,
+ &intr->vector);
+
+ intr_ctrl_regset = devm_kzalloc(dev, sizeof(*intr_ctrl_regset),
+ GFP_KERNEL);
+ if (!intr_ctrl_regset)
+ return;
+ intr_ctrl_regset->regs = intr_ctrl_regs;
+ intr_ctrl_regset->nregs = ARRAY_SIZE(intr_ctrl_regs);
+ intr_ctrl_regset->base = &idev->intr_ctrl[intr->index];
+
+ debugfs_create_regset32("intr_ctrl", 0400, intr_dentry,
+ intr_ctrl_regset);
+ }
+
+ if (qcq->flags & IONIC_QCQ_F_NOTIFYQ) {
+ stats_dentry = debugfs_create_dir("notifyblock", qcq->dentry);
+
+ debugfs_create_u64("eid", 0400, stats_dentry,
+ (u64 *)&lif->info->status.eid);
+ debugfs_create_u16("link_status", 0400, stats_dentry,
+ (u16 *)&lif->info->status.link_status);
+ debugfs_create_u32("link_speed", 0400, stats_dentry,
+ (u32 *)&lif->info->status.link_speed);
+ debugfs_create_u16("link_down_count", 0400, stats_dentry,
+ (u16 *)&lif->info->status.link_down_count);
+ }
+}
+
+static int netdev_show(struct seq_file *seq, void *v)
+{
+ struct net_device *netdev = seq->private;
+
+ seq_printf(seq, "%s\n", netdev->name);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(netdev);
+
+void ionic_debugfs_add_lif(struct ionic_lif *lif)
+{
+ lif->dentry = debugfs_create_dir(lif->name, lif->ionic->dentry);
+ debugfs_create_file("netdev", 0400, lif->dentry,
+ lif->netdev, &netdev_fops);
+}
+
+void ionic_debugfs_del_lif(struct ionic_lif *lif)
+{
+ debugfs_remove_recursive(lif->dentry);
+ lif->dentry = NULL;
+}
+
+void ionic_debugfs_del_qcq(struct ionic_qcq *qcq)
+{
+ debugfs_remove_recursive(qcq->dentry);
+ qcq->dentry = NULL;
+}
+
+#endif
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.h b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.h
new file mode 100644
index 000000000000..c44ebde170b6
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#ifndef _IONIC_DEBUGFS_H_
+#define _IONIC_DEBUGFS_H_
+
+#include <linux/debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+void ionic_debugfs_create(void);
+void ionic_debugfs_destroy(void);
+void ionic_debugfs_add_dev(struct ionic *ionic);
+void ionic_debugfs_del_dev(struct ionic *ionic);
+void ionic_debugfs_add_ident(struct ionic *ionic);
+void ionic_debugfs_add_sizes(struct ionic *ionic);
+void ionic_debugfs_add_lif(struct ionic_lif *lif);
+void ionic_debugfs_add_qcq(struct ionic_lif *lif, struct ionic_qcq *qcq);
+void ionic_debugfs_del_lif(struct ionic_lif *lif);
+void ionic_debugfs_del_qcq(struct ionic_qcq *qcq);
+#else
+static inline void ionic_debugfs_create(void) { }
+static inline void ionic_debugfs_destroy(void) { }
+static inline void ionic_debugfs_add_dev(struct ionic *ionic) { }
+static inline void ionic_debugfs_del_dev(struct ionic *ionic) { }
+static inline void ionic_debugfs_add_ident(struct ionic *ionic) { }
+static inline void ionic_debugfs_add_sizes(struct ionic *ionic) { }
+static inline void ionic_debugfs_add_lif(struct ionic_lif *lif) { }
+static inline void ionic_debugfs_add_qcq(struct ionic_lif *lif, struct ionic_qcq *qcq) { }
+static inline void ionic_debugfs_del_lif(struct ionic_lif *lif) { }
+static inline void ionic_debugfs_del_qcq(struct ionic_qcq *qcq) { }
+#endif
+
+#endif /* _IONIC_DEBUGFS_H_ */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
new file mode 100644
index 000000000000..d168a6435322
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -0,0 +1,500 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/etherdevice.h>
+#include "ionic.h"
+#include "ionic_dev.h"
+#include "ionic_lif.h"
+
+void ionic_init_devinfo(struct ionic *ionic)
+{
+ struct ionic_dev *idev = &ionic->idev;
+
+ idev->dev_info.asic_type = ioread8(&idev->dev_info_regs->asic_type);
+ idev->dev_info.asic_rev = ioread8(&idev->dev_info_regs->asic_rev);
+
+ memcpy_fromio(idev->dev_info.fw_version,
+ idev->dev_info_regs->fw_version,
+ IONIC_DEVINFO_FWVERS_BUFLEN);
+
+ memcpy_fromio(idev->dev_info.serial_num,
+ idev->dev_info_regs->serial_num,
+ IONIC_DEVINFO_SERIAL_BUFLEN);
+
+ idev->dev_info.fw_version[IONIC_DEVINFO_FWVERS_BUFLEN] = 0;
+ idev->dev_info.serial_num[IONIC_DEVINFO_SERIAL_BUFLEN] = 0;
+
+ dev_dbg(ionic->dev, "fw_version %s\n", idev->dev_info.fw_version);
+}
+
+int ionic_dev_setup(struct ionic *ionic)
+{
+ struct ionic_dev_bar *bar = ionic->bars;
+ unsigned int num_bars = ionic->num_bars;
+ struct ionic_dev *idev = &ionic->idev;
+ struct device *dev = ionic->dev;
+ u32 sig;
+
+ /* BAR0: dev_cmd and interrupts */
+ if (num_bars < 1) {
+ dev_err(dev, "No bars found, aborting\n");
+ return -EFAULT;
+ }
+
+ if (bar->len < IONIC_BAR0_SIZE) {
+ dev_err(dev, "Resource bar size %lu too small, aborting\n",
+ bar->len);
+ return -EFAULT;
+ }
+
+ idev->dev_info_regs = bar->vaddr + IONIC_BAR0_DEV_INFO_REGS_OFFSET;
+ idev->dev_cmd_regs = bar->vaddr + IONIC_BAR0_DEV_CMD_REGS_OFFSET;
+ idev->intr_status = bar->vaddr + IONIC_BAR0_INTR_STATUS_OFFSET;
+ idev->intr_ctrl = bar->vaddr + IONIC_BAR0_INTR_CTRL_OFFSET;
+
+ sig = ioread32(&idev->dev_info_regs->signature);
+ if (sig != IONIC_DEV_INFO_SIGNATURE) {
+ dev_err(dev, "Incompatible firmware signature %x", sig);
+ return -EFAULT;
+ }
+
+ ionic_init_devinfo(ionic);
+
+ /* BAR1: doorbells */
+ bar++;
+ if (num_bars < 2) {
+ dev_err(dev, "Doorbell bar missing, aborting\n");
+ return -EFAULT;
+ }
+
+ idev->db_pages = bar->vaddr;
+ idev->phy_db_pages = bar->bus_addr;
+
+ return 0;
+}
+
+void ionic_dev_teardown(struct ionic *ionic)
+{
+ /* place holder */
+}
+
+/* Devcmd Interface */
+u8 ionic_dev_cmd_status(struct ionic_dev *idev)
+{
+ return ioread8(&idev->dev_cmd_regs->comp.comp.status);
+}
+
+bool ionic_dev_cmd_done(struct ionic_dev *idev)
+{
+ return ioread32(&idev->dev_cmd_regs->done) & IONIC_DEV_CMD_DONE;
+}
+
+void ionic_dev_cmd_comp(struct ionic_dev *idev, union ionic_dev_cmd_comp *comp)
+{
+ memcpy_fromio(comp, &idev->dev_cmd_regs->comp, sizeof(*comp));
+}
+
+void ionic_dev_cmd_go(struct ionic_dev *idev, union ionic_dev_cmd *cmd)
+{
+ memcpy_toio(&idev->dev_cmd_regs->cmd, cmd, sizeof(*cmd));
+ iowrite32(0, &idev->dev_cmd_regs->done);
+ iowrite32(1, &idev->dev_cmd_regs->doorbell);
+}
+
+/* Device commands */
+void ionic_dev_cmd_identify(struct ionic_dev *idev, u8 ver)
+{
+ union ionic_dev_cmd cmd = {
+ .identify.opcode = IONIC_CMD_IDENTIFY,
+ .identify.ver = ver,
+ };
+
+ ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_init(struct ionic_dev *idev)
+{
+ union ionic_dev_cmd cmd = {
+ .init.opcode = IONIC_CMD_INIT,
+ .init.type = 0,
+ };
+
+ ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_reset(struct ionic_dev *idev)
+{
+ union ionic_dev_cmd cmd = {
+ .reset.opcode = IONIC_CMD_RESET,
+ };
+
+ ionic_dev_cmd_go(idev, &cmd);
+}
+
+/* Port commands */
+void ionic_dev_cmd_port_identify(struct ionic_dev *idev)
+{
+ union ionic_dev_cmd cmd = {
+ .port_init.opcode = IONIC_CMD_PORT_IDENTIFY,
+ .port_init.index = 0,
+ };
+
+ ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_port_init(struct ionic_dev *idev)
+{
+ union ionic_dev_cmd cmd = {
+ .port_init.opcode = IONIC_CMD_PORT_INIT,
+ .port_init.index = 0,
+ .port_init.info_pa = cpu_to_le64(idev->port_info_pa),
+ };
+
+ ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_port_reset(struct ionic_dev *idev)
+{
+ union ionic_dev_cmd cmd = {
+ .port_reset.opcode = IONIC_CMD_PORT_RESET,
+ .port_reset.index = 0,
+ };
+
+ ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_port_state(struct ionic_dev *idev, u8 state)
+{
+ union ionic_dev_cmd cmd = {
+ .port_setattr.opcode = IONIC_CMD_PORT_SETATTR,
+ .port_setattr.index = 0,
+ .port_setattr.attr = IONIC_PORT_ATTR_STATE,
+ .port_setattr.state = state,
+ };
+
+ ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_port_speed(struct ionic_dev *idev, u32 speed)
+{
+ union ionic_dev_cmd cmd = {
+ .port_setattr.opcode = IONIC_CMD_PORT_SETATTR,
+ .port_setattr.index = 0,
+ .port_setattr.attr = IONIC_PORT_ATTR_SPEED,
+ .port_setattr.speed = cpu_to_le32(speed),
+ };
+
+ ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_port_autoneg(struct ionic_dev *idev, u8 an_enable)
+{
+ union ionic_dev_cmd cmd = {
+ .port_setattr.opcode = IONIC_CMD_PORT_SETATTR,
+ .port_setattr.index = 0,
+ .port_setattr.attr = IONIC_PORT_ATTR_AUTONEG,
+ .port_setattr.an_enable = an_enable,
+ };
+
+ ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_port_fec(struct ionic_dev *idev, u8 fec_type)
+{
+ union ionic_dev_cmd cmd = {
+ .port_setattr.opcode = IONIC_CMD_PORT_SETATTR,
+ .port_setattr.index = 0,
+ .port_setattr.attr = IONIC_PORT_ATTR_FEC,
+ .port_setattr.fec_type = fec_type,
+ };
+
+ ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type)
+{
+ union ionic_dev_cmd cmd = {
+ .port_setattr.opcode = IONIC_CMD_PORT_SETATTR,
+ .port_setattr.index = 0,
+ .port_setattr.attr = IONIC_PORT_ATTR_PAUSE,
+ .port_setattr.pause_type = pause_type,
+ };
+
+ ionic_dev_cmd_go(idev, &cmd);
+}
+
+/* LIF commands */
+void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver)
+{
+ union ionic_dev_cmd cmd = {
+ .lif_identify.opcode = IONIC_CMD_LIF_IDENTIFY,
+ .lif_identify.type = type,
+ .lif_identify.ver = ver,
+ };
+
+ ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_lif_init(struct ionic_dev *idev, u16 lif_index,
+ dma_addr_t info_pa)
+{
+ union ionic_dev_cmd cmd = {
+ .lif_init.opcode = IONIC_CMD_LIF_INIT,
+ .lif_init.index = cpu_to_le16(lif_index),
+ .lif_init.info_pa = cpu_to_le64(info_pa),
+ };
+
+ ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_lif_reset(struct ionic_dev *idev, u16 lif_index)
+{
+ union ionic_dev_cmd cmd = {
+ .lif_init.opcode = IONIC_CMD_LIF_RESET,
+ .lif_init.index = cpu_to_le16(lif_index),
+ };
+
+ ionic_dev_cmd_go(idev, &cmd);
+}
+
+void ionic_dev_cmd_adminq_init(struct ionic_dev *idev, struct ionic_qcq *qcq,
+ u16 lif_index, u16 intr_index)
+{
+ struct ionic_queue *q = &qcq->q;
+ struct ionic_cq *cq = &qcq->cq;
+
+ union ionic_dev_cmd cmd = {
+ .q_init.opcode = IONIC_CMD_Q_INIT,
+ .q_init.lif_index = cpu_to_le16(lif_index),
+ .q_init.type = q->type,
+ .q_init.index = cpu_to_le32(q->index),
+ .q_init.flags = cpu_to_le16(IONIC_QINIT_F_IRQ |
+ IONIC_QINIT_F_ENA),
+ .q_init.pid = cpu_to_le16(q->pid),
+ .q_init.intr_index = cpu_to_le16(intr_index),
+ .q_init.ring_size = ilog2(q->num_descs),
+ .q_init.ring_base = cpu_to_le64(q->base_pa),
+ .q_init.cq_ring_base = cpu_to_le64(cq->base_pa),
+ };
+
+ ionic_dev_cmd_go(idev, &cmd);
+}
+
+int ionic_db_page_num(struct ionic_lif *lif, int pid)
+{
+ return (lif->hw_index * lif->dbid_count) + pid;
+}
+
+int ionic_cq_init(struct ionic_lif *lif, struct ionic_cq *cq,
+ struct ionic_intr_info *intr,
+ unsigned int num_descs, size_t desc_size)
+{
+ struct ionic_cq_info *cur;
+ unsigned int ring_size;
+ unsigned int i;
+
+ if (desc_size == 0 || !is_power_of_2(num_descs))
+ return -EINVAL;
+
+ ring_size = ilog2(num_descs);
+ if (ring_size < 2 || ring_size > 16)
+ return -EINVAL;
+
+ cq->lif = lif;
+ cq->bound_intr = intr;
+ cq->num_descs = num_descs;
+ cq->desc_size = desc_size;
+ cq->tail = cq->info;
+ cq->done_color = 1;
+
+ cur = cq->info;
+
+ for (i = 0; i < num_descs; i++) {
+ if (i + 1 == num_descs) {
+ cur->next = cq->info;
+ cur->last = true;
+ } else {
+ cur->next = cur + 1;
+ }
+ cur->index = i;
+ cur++;
+ }
+
+ return 0;
+}
+
+void ionic_cq_map(struct ionic_cq *cq, void *base, dma_addr_t base_pa)
+{
+ struct ionic_cq_info *cur;
+ unsigned int i;
+
+ cq->base = base;
+ cq->base_pa = base_pa;
+
+ for (i = 0, cur = cq->info; i < cq->num_descs; i++, cur++)
+ cur->cq_desc = base + (i * cq->desc_size);
+}
+
+void ionic_cq_bind(struct ionic_cq *cq, struct ionic_queue *q)
+{
+ cq->bound_q = q;
+}
+
+unsigned int ionic_cq_service(struct ionic_cq *cq, unsigned int work_to_do,
+ ionic_cq_cb cb, ionic_cq_done_cb done_cb,
+ void *done_arg)
+{
+ unsigned int work_done = 0;
+
+ if (work_to_do == 0)
+ return 0;
+
+ while (cb(cq, cq->tail)) {
+ if (cq->tail->last)
+ cq->done_color = !cq->done_color;
+ cq->tail = cq->tail->next;
+ DEBUG_STATS_CQE_CNT(cq);
+
+ if (++work_done >= work_to_do)
+ break;
+ }
+
+ if (work_done && done_cb)
+ done_cb(done_arg);
+
+ return work_done;
+}
+
+int ionic_q_init(struct ionic_lif *lif, struct ionic_dev *idev,
+ struct ionic_queue *q, unsigned int index, const char *name,
+ unsigned int num_descs, size_t desc_size,
+ size_t sg_desc_size, unsigned int pid)
+{
+ struct ionic_desc_info *cur;
+ unsigned int ring_size;
+ unsigned int i;
+
+ if (desc_size == 0 || !is_power_of_2(num_descs))
+ return -EINVAL;
+
+ ring_size = ilog2(num_descs);
+ if (ring_size < 2 || ring_size > 16)
+ return -EINVAL;
+
+ q->lif = lif;
+ q->idev = idev;
+ q->index = index;
+ q->num_descs = num_descs;
+ q->desc_size = desc_size;
+ q->sg_desc_size = sg_desc_size;
+ q->tail = q->info;
+ q->head = q->tail;
+ q->pid = pid;
+
+ snprintf(q->name, sizeof(q->name), "L%d-%s%u", lif->index, name, index);
+
+ cur = q->info;
+
+ for (i = 0; i < num_descs; i++) {
+ if (i + 1 == num_descs)
+ cur->next = q->info;
+ else
+ cur->next = cur + 1;
+ cur->index = i;
+ cur->left = num_descs - i;
+ cur++;
+ }
+
+ return 0;
+}
+
+void ionic_q_map(struct ionic_queue *q, void *base, dma_addr_t base_pa)
+{
+ struct ionic_desc_info *cur;
+ unsigned int i;
+
+ q->base = base;
+ q->base_pa = base_pa;
+
+ for (i = 0, cur = q->info; i < q->num_descs; i++, cur++)
+ cur->desc = base + (i * q->desc_size);
+}
+
+void ionic_q_sg_map(struct ionic_queue *q, void *base, dma_addr_t base_pa)
+{
+ struct ionic_desc_info *cur;
+ unsigned int i;
+
+ q->sg_base = base;
+ q->sg_base_pa = base_pa;
+
+ for (i = 0, cur = q->info; i < q->num_descs; i++, cur++)
+ cur->sg_desc = base + (i * q->sg_desc_size);
+}
+
+void ionic_q_post(struct ionic_queue *q, bool ring_doorbell, ionic_desc_cb cb,
+ void *cb_arg)
+{
+ struct device *dev = q->lif->ionic->dev;
+ struct ionic_lif *lif = q->lif;
+
+ q->head->cb = cb;
+ q->head->cb_arg = cb_arg;
+ q->head = q->head->next;
+
+ dev_dbg(dev, "lif=%d qname=%s qid=%d qtype=%d p_index=%d ringdb=%d\n",
+ q->lif->index, q->name, q->hw_type, q->hw_index,
+ q->head->index, ring_doorbell);
+
+ if (ring_doorbell)
+ ionic_dbell_ring(lif->kern_dbpage, q->hw_type,
+ q->dbval | q->head->index);
+}
+
+static bool ionic_q_is_posted(struct ionic_queue *q, unsigned int pos)
+{
+ unsigned int mask, tail, head;
+
+ mask = q->num_descs - 1;
+ tail = q->tail->index;
+ head = q->head->index;
+
+ return ((pos - tail) & mask) < ((head - tail) & mask);
+}
+
+void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
+ unsigned int stop_index)
+{
+ struct ionic_desc_info *desc_info;
+ ionic_desc_cb cb;
+ void *cb_arg;
+
+ /* check for empty queue */
+ if (q->tail->index == q->head->index)
+ return;
+
+ /* stop index must be for a descriptor that is not yet completed */
+ if (unlikely(!ionic_q_is_posted(q, stop_index)))
+ dev_err(q->lif->ionic->dev,
+ "ionic stop is not posted %s stop %u tail %u head %u\n",
+ q->name, stop_index, q->tail->index, q->head->index);
+
+ do {
+ desc_info = q->tail;
+ q->tail = desc_info->next;
+
+ cb = desc_info->cb;
+ cb_arg = desc_info->cb_arg;
+
+ desc_info->cb = NULL;
+ desc_info->cb_arg = NULL;
+
+ if (cb)
+ cb(q, desc_info, cq_info, cb_arg);
+ } while (desc_info->index != stop_index);
+}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
new file mode 100644
index 000000000000..9610aeb7d5f4
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -0,0 +1,299 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#ifndef _IONIC_DEV_H_
+#define _IONIC_DEV_H_
+
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+
+#include "ionic_if.h"
+#include "ionic_regs.h"
+
+#define IONIC_MIN_MTU ETH_MIN_MTU
+#define IONIC_MAX_MTU 9194
+#define IONIC_MAX_TXRX_DESC 16384
+#define IONIC_MIN_TXRX_DESC 16
+#define IONIC_DEF_TXRX_DESC 4096
+#define IONIC_LIFS_MAX 1024
+#define IONIC_ITR_COAL_USEC_DEFAULT 64
+
+#define IONIC_DEV_CMD_REG_VERSION 1
+#define IONIC_DEV_INFO_REG_COUNT 32
+#define IONIC_DEV_CMD_REG_COUNT 32
+
+struct ionic_dev_bar {
+ void __iomem *vaddr;
+ phys_addr_t bus_addr;
+ unsigned long len;
+ int res_index;
+};
+
+/* Registers */
+static_assert(sizeof(struct ionic_intr) == 32);
+
+static_assert(sizeof(struct ionic_doorbell) == 8);
+static_assert(sizeof(struct ionic_intr_status) == 8);
+static_assert(sizeof(union ionic_dev_regs) == 4096);
+static_assert(sizeof(union ionic_dev_info_regs) == 2048);
+static_assert(sizeof(union ionic_dev_cmd_regs) == 2048);
+static_assert(sizeof(struct ionic_lif_stats) == 1024);
+
+static_assert(sizeof(struct ionic_admin_cmd) == 64);
+static_assert(sizeof(struct ionic_admin_comp) == 16);
+static_assert(sizeof(struct ionic_nop_cmd) == 64);
+static_assert(sizeof(struct ionic_nop_comp) == 16);
+
+/* Device commands */
+static_assert(sizeof(struct ionic_dev_identify_cmd) == 64);
+static_assert(sizeof(struct ionic_dev_identify_comp) == 16);
+static_assert(sizeof(struct ionic_dev_init_cmd) == 64);
+static_assert(sizeof(struct ionic_dev_init_comp) == 16);
+static_assert(sizeof(struct ionic_dev_reset_cmd) == 64);
+static_assert(sizeof(struct ionic_dev_reset_comp) == 16);
+static_assert(sizeof(struct ionic_dev_getattr_cmd) == 64);
+static_assert(sizeof(struct ionic_dev_getattr_comp) == 16);
+static_assert(sizeof(struct ionic_dev_setattr_cmd) == 64);
+static_assert(sizeof(struct ionic_dev_setattr_comp) == 16);
+
+/* Port commands */
+static_assert(sizeof(struct ionic_port_identify_cmd) == 64);
+static_assert(sizeof(struct ionic_port_identify_comp) == 16);
+static_assert(sizeof(struct ionic_port_init_cmd) == 64);
+static_assert(sizeof(struct ionic_port_init_comp) == 16);
+static_assert(sizeof(struct ionic_port_reset_cmd) == 64);
+static_assert(sizeof(struct ionic_port_reset_comp) == 16);
+static_assert(sizeof(struct ionic_port_getattr_cmd) == 64);
+static_assert(sizeof(struct ionic_port_getattr_comp) == 16);
+static_assert(sizeof(struct ionic_port_setattr_cmd) == 64);
+static_assert(sizeof(struct ionic_port_setattr_comp) == 16);
+
+/* LIF commands */
+static_assert(sizeof(struct ionic_lif_init_cmd) == 64);
+static_assert(sizeof(struct ionic_lif_init_comp) == 16);
+static_assert(sizeof(struct ionic_lif_reset_cmd) == 64);
+static_assert(sizeof(ionic_lif_reset_comp) == 16);
+static_assert(sizeof(struct ionic_lif_getattr_cmd) == 64);
+static_assert(sizeof(struct ionic_lif_getattr_comp) == 16);
+static_assert(sizeof(struct ionic_lif_setattr_cmd) == 64);
+static_assert(sizeof(struct ionic_lif_setattr_comp) == 16);
+
+static_assert(sizeof(struct ionic_q_init_cmd) == 64);
+static_assert(sizeof(struct ionic_q_init_comp) == 16);
+static_assert(sizeof(struct ionic_q_control_cmd) == 64);
+static_assert(sizeof(ionic_q_control_comp) == 16);
+
+static_assert(sizeof(struct ionic_rx_mode_set_cmd) == 64);
+static_assert(sizeof(ionic_rx_mode_set_comp) == 16);
+static_assert(sizeof(struct ionic_rx_filter_add_cmd) == 64);
+static_assert(sizeof(struct ionic_rx_filter_add_comp) == 16);
+static_assert(sizeof(struct ionic_rx_filter_del_cmd) == 64);
+static_assert(sizeof(ionic_rx_filter_del_comp) == 16);
+
+/* RDMA commands */
+static_assert(sizeof(struct ionic_rdma_reset_cmd) == 64);
+static_assert(sizeof(struct ionic_rdma_queue_cmd) == 64);
+
+/* Events */
+static_assert(sizeof(struct ionic_notifyq_cmd) == 4);
+static_assert(sizeof(union ionic_notifyq_comp) == 64);
+static_assert(sizeof(struct ionic_notifyq_event) == 64);
+static_assert(sizeof(struct ionic_link_change_event) == 64);
+static_assert(sizeof(struct ionic_reset_event) == 64);
+static_assert(sizeof(struct ionic_heartbeat_event) == 64);
+static_assert(sizeof(struct ionic_log_event) == 64);
+
+/* I/O */
+static_assert(sizeof(struct ionic_txq_desc) == 16);
+static_assert(sizeof(struct ionic_txq_sg_desc) == 128);
+static_assert(sizeof(struct ionic_txq_comp) == 16);
+
+static_assert(sizeof(struct ionic_rxq_desc) == 16);
+static_assert(sizeof(struct ionic_rxq_sg_desc) == 128);
+static_assert(sizeof(struct ionic_rxq_comp) == 16);
+
+struct ionic_devinfo {
+ u8 asic_type;
+ u8 asic_rev;
+ char fw_version[IONIC_DEVINFO_FWVERS_BUFLEN + 1];
+ char serial_num[IONIC_DEVINFO_SERIAL_BUFLEN + 1];
+};
+
+struct ionic_dev {
+ union ionic_dev_info_regs __iomem *dev_info_regs;
+ union ionic_dev_cmd_regs __iomem *dev_cmd_regs;
+
+ u64 __iomem *db_pages;
+ dma_addr_t phy_db_pages;
+
+ struct ionic_intr __iomem *intr_ctrl;
+ u64 __iomem *intr_status;
+
+ u32 port_info_sz;
+ struct ionic_port_info *port_info;
+ dma_addr_t port_info_pa;
+
+ struct ionic_devinfo dev_info;
+};
+
+struct ionic_cq_info {
+ void *cq_desc;
+ struct ionic_cq_info *next;
+ unsigned int index;
+ bool last;
+};
+
+struct ionic_queue;
+struct ionic_qcq;
+struct ionic_desc_info;
+
+typedef void (*ionic_desc_cb)(struct ionic_queue *q,
+ struct ionic_desc_info *desc_info,
+ struct ionic_cq_info *cq_info, void *cb_arg);
+
+struct ionic_desc_info {
+ void *desc;
+ void *sg_desc;
+ struct ionic_desc_info *next;
+ unsigned int index;
+ unsigned int left;
+ ionic_desc_cb cb;
+ void *cb_arg;
+};
+
+#define QUEUE_NAME_MAX_SZ 32
+
+struct ionic_queue {
+ u64 dbell_count;
+ u64 drop;
+ u64 stop;
+ u64 wake;
+ struct ionic_lif *lif;
+ struct ionic_desc_info *info;
+ struct ionic_desc_info *tail;
+ struct ionic_desc_info *head;
+ struct ionic_dev *idev;
+ unsigned int index;
+ unsigned int type;
+ unsigned int hw_index;
+ unsigned int hw_type;
+ u64 dbval;
+ void *base;
+ void *sg_base;
+ dma_addr_t base_pa;
+ dma_addr_t sg_base_pa;
+ unsigned int num_descs;
+ unsigned int desc_size;
+ unsigned int sg_desc_size;
+ unsigned int pid;
+ char name[QUEUE_NAME_MAX_SZ];
+};
+
+#define INTR_INDEX_NOT_ASSIGNED -1
+#define INTR_NAME_MAX_SZ 32
+
+struct ionic_intr_info {
+ char name[INTR_NAME_MAX_SZ];
+ unsigned int index;
+ unsigned int vector;
+ u64 rearm_count;
+ unsigned int cpu;
+ cpumask_t affinity_mask;
+};
+
+struct ionic_cq {
+ void *base;
+ dma_addr_t base_pa;
+ struct ionic_lif *lif;
+ struct ionic_cq_info *info;
+ struct ionic_cq_info *tail;
+ struct ionic_queue *bound_q;
+ struct ionic_intr_info *bound_intr;
+ bool done_color;
+ unsigned int num_descs;
+ u64 compl_count;
+ unsigned int desc_size;
+};
+
+struct ionic;
+
+static inline void ionic_intr_init(struct ionic_dev *idev,
+ struct ionic_intr_info *intr,
+ unsigned long index)
+{
+ ionic_intr_clean(idev->intr_ctrl, index);
+ intr->index = index;
+}
+
+static inline unsigned int ionic_q_space_avail(struct ionic_queue *q)
+{
+ unsigned int avail = q->tail->index;
+
+ if (q->head->index >= avail)
+ avail += q->head->left - 1;
+ else
+ avail -= q->head->index + 1;
+
+ return avail;
+}
+
+static inline bool ionic_q_has_space(struct ionic_queue *q, unsigned int want)
+{
+ return ionic_q_space_avail(q) >= want;
+}
+
+void ionic_init_devinfo(struct ionic *ionic);
+int ionic_dev_setup(struct ionic *ionic);
+void ionic_dev_teardown(struct ionic *ionic);
+
+void ionic_dev_cmd_go(struct ionic_dev *idev, union ionic_dev_cmd *cmd);
+u8 ionic_dev_cmd_status(struct ionic_dev *idev);
+bool ionic_dev_cmd_done(struct ionic_dev *idev);
+void ionic_dev_cmd_comp(struct ionic_dev *idev, union ionic_dev_cmd_comp *comp);
+
+void ionic_dev_cmd_identify(struct ionic_dev *idev, u8 ver);
+void ionic_dev_cmd_init(struct ionic_dev *idev);
+void ionic_dev_cmd_reset(struct ionic_dev *idev);
+
+void ionic_dev_cmd_port_identify(struct ionic_dev *idev);
+void ionic_dev_cmd_port_init(struct ionic_dev *idev);
+void ionic_dev_cmd_port_reset(struct ionic_dev *idev);
+void ionic_dev_cmd_port_state(struct ionic_dev *idev, u8 state);
+void ionic_dev_cmd_port_speed(struct ionic_dev *idev, u32 speed);
+void ionic_dev_cmd_port_autoneg(struct ionic_dev *idev, u8 an_enable);
+void ionic_dev_cmd_port_fec(struct ionic_dev *idev, u8 fec_type);
+void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type);
+
+void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver);
+void ionic_dev_cmd_lif_init(struct ionic_dev *idev, u16 lif_index,
+ dma_addr_t addr);
+void ionic_dev_cmd_lif_reset(struct ionic_dev *idev, u16 lif_index);
+void ionic_dev_cmd_adminq_init(struct ionic_dev *idev, struct ionic_qcq *qcq,
+ u16 lif_index, u16 intr_index);
+
+int ionic_db_page_num(struct ionic_lif *lif, int pid);
+
+int ionic_cq_init(struct ionic_lif *lif, struct ionic_cq *cq,
+ struct ionic_intr_info *intr,
+ unsigned int num_descs, size_t desc_size);
+void ionic_cq_map(struct ionic_cq *cq, void *base, dma_addr_t base_pa);
+void ionic_cq_bind(struct ionic_cq *cq, struct ionic_queue *q);
+typedef bool (*ionic_cq_cb)(struct ionic_cq *cq, struct ionic_cq_info *cq_info);
+typedef void (*ionic_cq_done_cb)(void *done_arg);
+unsigned int ionic_cq_service(struct ionic_cq *cq, unsigned int work_to_do,
+ ionic_cq_cb cb, ionic_cq_done_cb done_cb,
+ void *done_arg);
+
+int ionic_q_init(struct ionic_lif *lif, struct ionic_dev *idev,
+ struct ionic_queue *q, unsigned int index, const char *name,
+ unsigned int num_descs, size_t desc_size,
+ size_t sg_desc_size, unsigned int pid);
+void ionic_q_map(struct ionic_queue *q, void *base, dma_addr_t base_pa);
+void ionic_q_sg_map(struct ionic_queue *q, void *base, dma_addr_t base_pa);
+void ionic_q_post(struct ionic_queue *q, bool ring_doorbell, ionic_desc_cb cb,
+ void *cb_arg);
+void ionic_q_rewind(struct ionic_queue *q, struct ionic_desc_info *start);
+void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
+ unsigned int stop_index);
+
+#endif /* _IONIC_DEV_H_ */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
new file mode 100644
index 000000000000..af1647afa4e8
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+
+#include "ionic.h"
+#include "ionic_bus.h"
+#include "ionic_lif.h"
+#include "ionic_devlink.h"
+
+static int ionic_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
+ struct netlink_ext_ack *extack)
+{
+ struct ionic *ionic = devlink_priv(dl);
+ struct ionic_dev *idev = &ionic->idev;
+ char buf[16];
+ int err = 0;
+
+ err = devlink_info_driver_name_put(req, IONIC_DRV_NAME);
+ if (err)
+ goto info_out;
+
+ err = devlink_info_version_running_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_FW,
+ idev->dev_info.fw_version);
+ if (err)
+ goto info_out;
+
+ snprintf(buf, sizeof(buf), "0x%x", idev->dev_info.asic_type);
+ err = devlink_info_version_fixed_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_ASIC_ID,
+ buf);
+ if (err)
+ goto info_out;
+
+ snprintf(buf, sizeof(buf), "0x%x", idev->dev_info.asic_rev);
+ err = devlink_info_version_fixed_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_ASIC_REV,
+ buf);
+ if (err)
+ goto info_out;
+
+ err = devlink_info_serial_number_put(req, idev->dev_info.serial_num);
+
+info_out:
+ return err;
+}
+
+static const struct devlink_ops ionic_dl_ops = {
+ .info_get = ionic_dl_info_get,
+};
+
+struct ionic *ionic_devlink_alloc(struct device *dev)
+{
+ struct devlink *dl;
+
+ dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic));
+
+ return devlink_priv(dl);
+}
+
+void ionic_devlink_free(struct ionic *ionic)
+{
+ struct devlink *dl = priv_to_devlink(ionic);
+
+ devlink_free(dl);
+}
+
+int ionic_devlink_register(struct ionic *ionic)
+{
+ struct devlink *dl = priv_to_devlink(ionic);
+ int err;
+
+ err = devlink_register(dl, ionic->dev);
+ if (err) {
+ dev_warn(ionic->dev, "devlink_register failed: %d\n", err);
+ return err;
+ }
+
+ devlink_port_attrs_set(&ionic->dl_port, DEVLINK_PORT_FLAVOUR_PHYSICAL,
+ 0, false, 0, NULL, 0);
+ err = devlink_port_register(dl, &ionic->dl_port, 0);
+ if (err)
+ dev_err(ionic->dev, "devlink_port_register failed: %d\n", err);
+ else
+ devlink_port_type_eth_set(&ionic->dl_port,
+ ionic->master_lif->netdev);
+
+ return err;
+}
+
+void ionic_devlink_unregister(struct ionic *ionic)
+{
+ struct devlink *dl = priv_to_devlink(ionic);
+
+ devlink_port_unregister(&ionic->dl_port);
+ devlink_unregister(dl);
+}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.h b/drivers/net/ethernet/pensando/ionic/ionic_devlink.h
new file mode 100644
index 000000000000..0690172fc57a
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#ifndef _IONIC_DEVLINK_H_
+#define _IONIC_DEVLINK_H_
+
+#include <net/devlink.h>
+
+struct ionic *ionic_devlink_alloc(struct device *dev);
+void ionic_devlink_free(struct ionic *ionic);
+int ionic_devlink_register(struct ionic *ionic);
+void ionic_devlink_unregister(struct ionic *ionic);
+
+#endif /* _IONIC_DEVLINK_H_ */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
new file mode 100644
index 000000000000..7d10265f782a
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
@@ -0,0 +1,779 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+
+#include "ionic.h"
+#include "ionic_bus.h"
+#include "ionic_lif.h"
+#include "ionic_ethtool.h"
+#include "ionic_stats.h"
+
+static const char ionic_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define PRIV_F_SW_DBG_STATS BIT(0)
+ "sw-dbg-stats",
+};
+#define PRIV_FLAGS_COUNT ARRAY_SIZE(ionic_priv_flags_strings)
+
+static void ionic_get_stats_strings(struct ionic_lif *lif, u8 *buf)
+{
+ u32 i;
+
+ for (i = 0; i < ionic_num_stats_grps; i++)
+ ionic_stats_groups[i].get_strings(lif, &buf);
+}
+
+static void ionic_get_stats(struct net_device *netdev,
+ struct ethtool_stats *stats, u64 *buf)
+{
+ struct ionic_lif *lif;
+ u32 i;
+
+ lif = netdev_priv(netdev);
+
+ memset(buf, 0, stats->n_stats * sizeof(*buf));
+ for (i = 0; i < ionic_num_stats_grps; i++)
+ ionic_stats_groups[i].get_values(lif, &buf);
+}
+
+static int ionic_get_stats_count(struct ionic_lif *lif)
+{
+ int i, num_stats = 0;
+
+ for (i = 0; i < ionic_num_stats_grps; i++)
+ num_stats += ionic_stats_groups[i].get_count(lif);
+
+ return num_stats;
+}
+
+static int ionic_get_sset_count(struct net_device *netdev, int sset)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ int count = 0;
+
+ switch (sset) {
+ case ETH_SS_STATS:
+ count = ionic_get_stats_count(lif);
+ break;
+ case ETH_SS_PRIV_FLAGS:
+ count = PRIV_FLAGS_COUNT;
+ break;
+ }
+ return count;
+}
+
+static void ionic_get_strings(struct net_device *netdev,
+ u32 sset, u8 *buf)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+
+ switch (sset) {
+ case ETH_SS_STATS:
+ ionic_get_stats_strings(lif, buf);
+ break;
+ case ETH_SS_PRIV_FLAGS:
+ memcpy(buf, ionic_priv_flags_strings,
+ PRIV_FLAGS_COUNT * ETH_GSTRING_LEN);
+ break;
+ }
+}
+
+static void ionic_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic *ionic = lif->ionic;
+
+ strlcpy(drvinfo->driver, IONIC_DRV_NAME, sizeof(drvinfo->driver));
+ strlcpy(drvinfo->version, IONIC_DRV_VERSION, sizeof(drvinfo->version));
+ strlcpy(drvinfo->fw_version, ionic->idev.dev_info.fw_version,
+ sizeof(drvinfo->fw_version));
+ strlcpy(drvinfo->bus_info, ionic_bus_info(ionic),
+ sizeof(drvinfo->bus_info));
+}
+
+static int ionic_get_regs_len(struct net_device *netdev)
+{
+ return (IONIC_DEV_INFO_REG_COUNT + IONIC_DEV_CMD_REG_COUNT) * sizeof(u32);
+}
+
+static void ionic_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
+ void *p)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ unsigned int size;
+
+ regs->version = IONIC_DEV_CMD_REG_VERSION;
+
+ size = IONIC_DEV_INFO_REG_COUNT * sizeof(u32);
+ memcpy_fromio(p, lif->ionic->idev.dev_info_regs->words, size);
+
+ size = IONIC_DEV_CMD_REG_COUNT * sizeof(u32);
+ memcpy_fromio(p, lif->ionic->idev.dev_cmd_regs->words, size);
+}
+
+static int ionic_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *ks)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic_dev *idev = &lif->ionic->idev;
+ int copper_seen = 0;
+
+ ethtool_link_ksettings_zero_link_mode(ks, supported);
+
+ /* The port_info data is found in a DMA space that the NIC keeps
+ * up-to-date, so there's no need to request the data from the
+ * NIC, we already have it in our memory space.
+ */
+
+ switch (le16_to_cpu(idev->port_info->status.xcvr.pid)) {
+ /* Copper */
+ case IONIC_XCVR_PID_QSFP_100G_CR4:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 100000baseCR4_Full);
+ copper_seen++;
+ break;
+ case IONIC_XCVR_PID_QSFP_40GBASE_CR4:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseCR4_Full);
+ copper_seen++;
+ break;
+ case IONIC_XCVR_PID_SFP_25GBASE_CR_S:
+ case IONIC_XCVR_PID_SFP_25GBASE_CR_L:
+ case IONIC_XCVR_PID_SFP_25GBASE_CR_N:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseCR_Full);
+ copper_seen++;
+ break;
+ case IONIC_XCVR_PID_SFP_10GBASE_AOC:
+ case IONIC_XCVR_PID_SFP_10GBASE_CU:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseCR_Full);
+ copper_seen++;
+ break;
+
+ /* Fibre */
+ case IONIC_XCVR_PID_QSFP_100G_SR4:
+ case IONIC_XCVR_PID_QSFP_100G_AOC:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 100000baseSR4_Full);
+ break;
+ case IONIC_XCVR_PID_QSFP_100G_LR4:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 100000baseLR4_ER4_Full);
+ break;
+ case IONIC_XCVR_PID_QSFP_100G_ER4:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 100000baseLR4_ER4_Full);
+ break;
+ case IONIC_XCVR_PID_QSFP_40GBASE_SR4:
+ case IONIC_XCVR_PID_QSFP_40GBASE_AOC:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseSR4_Full);
+ break;
+ case IONIC_XCVR_PID_QSFP_40GBASE_LR4:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseLR4_Full);
+ break;
+ case IONIC_XCVR_PID_SFP_25GBASE_SR:
+ case IONIC_XCVR_PID_SFP_25GBASE_AOC:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseSR_Full);
+ break;
+ case IONIC_XCVR_PID_SFP_10GBASE_SR:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseSR_Full);
+ break;
+ case IONIC_XCVR_PID_SFP_10GBASE_LR:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseLR_Full);
+ break;
+ case IONIC_XCVR_PID_SFP_10GBASE_LRM:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseLRM_Full);
+ break;
+ case IONIC_XCVR_PID_SFP_10GBASE_ER:
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseER_Full);
+ break;
+ case IONIC_XCVR_PID_UNKNOWN:
+ /* This means there's no module plugged in */
+ break;
+ default:
+ dev_info(lif->ionic->dev, "unknown xcvr type pid=%d / 0x%x\n",
+ idev->port_info->status.xcvr.pid,
+ idev->port_info->status.xcvr.pid);
+ break;
+ }
+
+ bitmap_copy(ks->link_modes.advertising, ks->link_modes.supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+ ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER);
+ ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
+ if (idev->port_info->config.fec_type == IONIC_PORT_FEC_TYPE_FC)
+ ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_BASER);
+ else if (idev->port_info->config.fec_type == IONIC_PORT_FEC_TYPE_RS)
+ ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
+
+ ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+ ethtool_link_ksettings_add_link_mode(ks, supported, Pause);
+
+ if (idev->port_info->status.xcvr.phy == IONIC_PHY_TYPE_COPPER ||
+ copper_seen)
+ ks->base.port = PORT_DA;
+ else if (idev->port_info->status.xcvr.phy == IONIC_PHY_TYPE_FIBER)
+ ks->base.port = PORT_FIBRE;
+ else
+ ks->base.port = PORT_NONE;
+
+ if (ks->base.port != PORT_NONE) {
+ ks->base.speed = le32_to_cpu(lif->info->status.link_speed);
+
+ if (le16_to_cpu(lif->info->status.link_status))
+ ks->base.duplex = DUPLEX_FULL;
+ else
+ ks->base.duplex = DUPLEX_UNKNOWN;
+
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+
+ if (idev->port_info->config.an_enable) {
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ Autoneg);
+ ks->base.autoneg = AUTONEG_ENABLE;
+ }
+ }
+
+ return 0;
+}
+
+static int ionic_set_link_ksettings(struct net_device *netdev,
+ const struct ethtool_link_ksettings *ks)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic *ionic = lif->ionic;
+ struct ionic_dev *idev;
+ u32 req_rs, req_fc;
+ u8 fec_type;
+ int err = 0;
+
+ idev = &lif->ionic->idev;
+ fec_type = IONIC_PORT_FEC_TYPE_NONE;
+
+ /* set autoneg */
+ if (ks->base.autoneg != idev->port_info->config.an_enable) {
+ mutex_lock(&ionic->dev_cmd_lock);
+ ionic_dev_cmd_port_autoneg(idev, ks->base.autoneg);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+ mutex_unlock(&ionic->dev_cmd_lock);
+ if (err)
+ return err;
+ }
+
+ /* set speed */
+ if (ks->base.speed != le32_to_cpu(idev->port_info->config.speed)) {
+ mutex_lock(&ionic->dev_cmd_lock);
+ ionic_dev_cmd_port_speed(idev, ks->base.speed);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+ mutex_unlock(&ionic->dev_cmd_lock);
+ if (err)
+ return err;
+ }
+
+ /* set FEC */
+ req_rs = ethtool_link_ksettings_test_link_mode(ks, advertising, FEC_RS);
+ req_fc = ethtool_link_ksettings_test_link_mode(ks, advertising, FEC_BASER);
+ if (req_rs && req_fc) {
+ netdev_info(netdev, "Only select one FEC mode at a time\n");
+ return -EINVAL;
+ } else if (req_fc) {
+ fec_type = IONIC_PORT_FEC_TYPE_FC;
+ } else if (req_rs) {
+ fec_type = IONIC_PORT_FEC_TYPE_RS;
+ } else if (!(req_rs | req_fc)) {
+ fec_type = IONIC_PORT_FEC_TYPE_NONE;
+ }
+
+ if (fec_type != idev->port_info->config.fec_type) {
+ mutex_lock(&ionic->dev_cmd_lock);
+ ionic_dev_cmd_port_fec(idev, fec_type);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+ mutex_unlock(&ionic->dev_cmd_lock);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static void ionic_get_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pause)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ u8 pause_type;
+
+ pause->autoneg = 0;
+
+ pause_type = lif->ionic->idev.port_info->config.pause_type;
+ if (pause_type) {
+ pause->rx_pause = pause_type & IONIC_PAUSE_F_RX ? 1 : 0;
+ pause->tx_pause = pause_type & IONIC_PAUSE_F_TX ? 1 : 0;
+ }
+}
+
+static int ionic_set_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pause)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic *ionic = lif->ionic;
+ u32 requested_pause;
+ int err;
+
+ if (pause->autoneg)
+ return -EOPNOTSUPP;
+
+ /* change both at the same time */
+ requested_pause = IONIC_PORT_PAUSE_TYPE_LINK;
+ if (pause->rx_pause)
+ requested_pause |= IONIC_PAUSE_F_RX;
+ if (pause->tx_pause)
+ requested_pause |= IONIC_PAUSE_F_TX;
+
+ if (requested_pause == lif->ionic->idev.port_info->config.pause_type)
+ return 0;
+
+ mutex_lock(&ionic->dev_cmd_lock);
+ ionic_dev_cmd_port_pause(&lif->ionic->idev, requested_pause);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+ mutex_unlock(&ionic->dev_cmd_lock);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int ionic_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coalesce)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+
+ /* Tx uses Rx interrupt */
+ coalesce->tx_coalesce_usecs = lif->rx_coalesce_usecs;
+ coalesce->rx_coalesce_usecs = lif->rx_coalesce_usecs;
+
+ return 0;
+}
+
+static int ionic_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coalesce)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic_identity *ident;
+ struct ionic_qcq *qcq;
+ unsigned int i;
+ u32 usecs;
+ u32 coal;
+
+ if (coalesce->rx_max_coalesced_frames ||
+ coalesce->rx_coalesce_usecs_irq ||
+ coalesce->rx_max_coalesced_frames_irq ||
+ coalesce->tx_max_coalesced_frames ||
+ coalesce->tx_coalesce_usecs_irq ||
+ coalesce->tx_max_coalesced_frames_irq ||
+ coalesce->stats_block_coalesce_usecs ||
+ coalesce->use_adaptive_rx_coalesce ||
+ coalesce->use_adaptive_tx_coalesce ||
+ coalesce->pkt_rate_low ||
+ coalesce->rx_coalesce_usecs_low ||
+ coalesce->rx_max_coalesced_frames_low ||
+ coalesce->tx_coalesce_usecs_low ||
+ coalesce->tx_max_coalesced_frames_low ||
+ coalesce->pkt_rate_high ||
+ coalesce->rx_coalesce_usecs_high ||
+ coalesce->rx_max_coalesced_frames_high ||
+ coalesce->tx_coalesce_usecs_high ||
+ coalesce->tx_max_coalesced_frames_high ||
+ coalesce->rate_sample_interval)
+ return -EINVAL;
+
+ ident = &lif->ionic->ident;
+ if (ident->dev.intr_coal_div == 0) {
+ netdev_warn(netdev, "bad HW value in dev.intr_coal_div = %d\n",
+ ident->dev.intr_coal_div);
+ return -EIO;
+ }
+
+ /* Tx uses Rx interrupt, so only change Rx */
+ if (coalesce->tx_coalesce_usecs != lif->rx_coalesce_usecs) {
+ netdev_warn(netdev, "only the rx-usecs can be changed\n");
+ return -EINVAL;
+ }
+
+ coal = ionic_coal_usec_to_hw(lif->ionic, coalesce->rx_coalesce_usecs);
+
+ if (coal > IONIC_INTR_CTRL_COAL_MAX)
+ return -ERANGE;
+
+ /* If they asked for non-zero and it resolved to zero, bump it up */
+ if (!coal && coalesce->rx_coalesce_usecs)
+ coal = 1;
+
+ /* Convert it back to get device resolution */
+ usecs = ionic_coal_hw_to_usec(lif->ionic, coal);
+
+ if (usecs != lif->rx_coalesce_usecs) {
+ lif->rx_coalesce_usecs = usecs;
+
+ if (test_bit(IONIC_LIF_UP, lif->state)) {
+ for (i = 0; i < lif->nxqs; i++) {
+ qcq = lif->rxqcqs[i].qcq;
+ ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
+ qcq->intr.index, coal);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void ionic_get_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+
+ ring->tx_max_pending = IONIC_MAX_TXRX_DESC;
+ ring->tx_pending = lif->ntxq_descs;
+ ring->rx_max_pending = IONIC_MAX_TXRX_DESC;
+ ring->rx_pending = lif->nrxq_descs;
+}
+
+static int ionic_set_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ bool running;
+
+ if (ring->rx_mini_pending || ring->rx_jumbo_pending) {
+ netdev_info(netdev, "Changing jumbo or mini descriptors not supported\n");
+ return -EINVAL;
+ }
+
+ if (!is_power_of_2(ring->tx_pending) ||
+ !is_power_of_2(ring->rx_pending)) {
+ netdev_info(netdev, "Descriptor count must be a power of 2\n");
+ return -EINVAL;
+ }
+
+ /* if nothing to do return success */
+ if (ring->tx_pending == lif->ntxq_descs &&
+ ring->rx_pending == lif->nrxq_descs)
+ return 0;
+
+ if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET))
+ return -EBUSY;
+
+ running = test_bit(IONIC_LIF_UP, lif->state);
+ if (running)
+ ionic_stop(netdev);
+
+ lif->ntxq_descs = ring->tx_pending;
+ lif->nrxq_descs = ring->rx_pending;
+
+ if (running)
+ ionic_open(netdev);
+ clear_bit(IONIC_LIF_QUEUE_RESET, lif->state);
+
+ return 0;
+}
+
+static void ionic_get_channels(struct net_device *netdev,
+ struct ethtool_channels *ch)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+
+ /* report maximum channels */
+ ch->max_combined = lif->ionic->ntxqs_per_lif;
+
+ /* report current channels */
+ ch->combined_count = lif->nxqs;
+}
+
+static int ionic_set_channels(struct net_device *netdev,
+ struct ethtool_channels *ch)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ bool running;
+
+ if (!ch->combined_count || ch->other_count ||
+ ch->rx_count || ch->tx_count)
+ return -EINVAL;
+
+ if (ch->combined_count == lif->nxqs)
+ return 0;
+
+ if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET))
+ return -EBUSY;
+
+ running = test_bit(IONIC_LIF_UP, lif->state);
+ if (running)
+ ionic_stop(netdev);
+
+ lif->nxqs = ch->combined_count;
+
+ if (running)
+ ionic_open(netdev);
+ clear_bit(IONIC_LIF_QUEUE_RESET, lif->state);
+
+ return 0;
+}
+
+static u32 ionic_get_priv_flags(struct net_device *netdev)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ u32 priv_flags = 0;
+
+ if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state))
+ priv_flags |= PRIV_F_SW_DBG_STATS;
+
+ return priv_flags;
+}
+
+static int ionic_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ u32 flags = lif->flags;
+
+ clear_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state);
+ if (priv_flags & PRIV_F_SW_DBG_STATS)
+ set_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state);
+
+ if (flags != lif->flags)
+ lif->flags = flags;
+
+ return 0;
+}
+
+static int ionic_get_rxnfc(struct net_device *netdev,
+ struct ethtool_rxnfc *info, u32 *rules)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ int err = 0;
+
+ switch (info->cmd) {
+ case ETHTOOL_GRXRINGS:
+ info->data = lif->nxqs;
+ break;
+ default:
+ netdev_err(netdev, "Command parameter %d is not supported\n",
+ info->cmd);
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+static u32 ionic_get_rxfh_indir_size(struct net_device *netdev)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+
+ return le16_to_cpu(lif->ionic->ident.lif.eth.rss_ind_tbl_sz);
+}
+
+static u32 ionic_get_rxfh_key_size(struct net_device *netdev)
+{
+ return IONIC_RSS_HASH_KEY_SIZE;
+}
+
+static int ionic_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+ u8 *hfunc)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ unsigned int i, tbl_sz;
+
+ if (indir) {
+ tbl_sz = le16_to_cpu(lif->ionic->ident.lif.eth.rss_ind_tbl_sz);
+ for (i = 0; i < tbl_sz; i++)
+ indir[i] = lif->rss_ind_tbl[i];
+ }
+
+ if (key)
+ memcpy(key, lif->rss_hash_key, IONIC_RSS_HASH_KEY_SIZE);
+
+ if (hfunc)
+ *hfunc = ETH_RSS_HASH_TOP;
+
+ return 0;
+}
+
+static int ionic_set_rxfh(struct net_device *netdev, const u32 *indir,
+ const u8 *key, const u8 hfunc)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ int err;
+
+ if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+ return -EOPNOTSUPP;
+
+ err = ionic_lif_rss_config(lif, lif->rss_types, key, indir);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int ionic_set_tunable(struct net_device *dev,
+ const struct ethtool_tunable *tuna,
+ const void *data)
+{
+ struct ionic_lif *lif = netdev_priv(dev);
+
+ switch (tuna->id) {
+ case ETHTOOL_RX_COPYBREAK:
+ lif->rx_copybreak = *(u32 *)data;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int ionic_get_tunable(struct net_device *netdev,
+ const struct ethtool_tunable *tuna, void *data)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+
+ switch (tuna->id) {
+ case ETHTOOL_RX_COPYBREAK:
+ *(u32 *)data = lif->rx_copybreak;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int ionic_get_module_info(struct net_device *netdev,
+ struct ethtool_modinfo *modinfo)
+
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic_dev *idev = &lif->ionic->idev;
+ struct ionic_xcvr_status *xcvr;
+
+ xcvr = &idev->port_info->status.xcvr;
+
+ /* report the module data type and length */
+ switch (xcvr->sprom[0]) {
+ case 0x03: /* SFP */
+ modinfo->type = ETH_MODULE_SFF_8079;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+ break;
+ case 0x0D: /* QSFP */
+ case 0x11: /* QSFP28 */
+ modinfo->type = ETH_MODULE_SFF_8436;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+ break;
+ default:
+ netdev_info(netdev, "unknown xcvr type 0x%02x\n",
+ xcvr->sprom[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int ionic_get_module_eeprom(struct net_device *netdev,
+ struct ethtool_eeprom *ee,
+ u8 *data)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic_dev *idev = &lif->ionic->idev;
+ struct ionic_xcvr_status *xcvr;
+ char tbuf[sizeof(xcvr->sprom)];
+ int count = 10;
+ u32 len;
+
+ /* The NIC keeps the module prom up-to-date in the DMA space
+ * so we can simply copy the module bytes into the data buffer.
+ */
+ xcvr = &idev->port_info->status.xcvr;
+ len = min_t(u32, sizeof(xcvr->sprom), ee->len);
+
+ do {
+ memcpy(data, xcvr->sprom, len);
+ memcpy(tbuf, xcvr->sprom, len);
+
+ /* Let's make sure we got a consistent copy */
+ if (!memcmp(data, tbuf, len))
+ break;
+
+ } while (--count);
+
+ if (!count)
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+static int ionic_nway_reset(struct net_device *netdev)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic *ionic = lif->ionic;
+ int err = 0;
+
+ /* flap the link to force auto-negotiation */
+
+ mutex_lock(&ionic->dev_cmd_lock);
+
+ ionic_dev_cmd_port_state(&ionic->idev, IONIC_PORT_ADMIN_STATE_DOWN);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+
+ if (!err) {
+ ionic_dev_cmd_port_state(&ionic->idev, IONIC_PORT_ADMIN_STATE_UP);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+ }
+
+ mutex_unlock(&ionic->dev_cmd_lock);
+
+ return err;
+}
+
+static const struct ethtool_ops ionic_ethtool_ops = {
+ .get_drvinfo = ionic_get_drvinfo,
+ .get_regs_len = ionic_get_regs_len,
+ .get_regs = ionic_get_regs,
+ .get_link = ethtool_op_get_link,
+ .get_link_ksettings = ionic_get_link_ksettings,
+ .get_coalesce = ionic_get_coalesce,
+ .set_coalesce = ionic_set_coalesce,
+ .get_ringparam = ionic_get_ringparam,
+ .set_ringparam = ionic_set_ringparam,
+ .get_channels = ionic_get_channels,
+ .set_channels = ionic_set_channels,
+ .get_strings = ionic_get_strings,
+ .get_ethtool_stats = ionic_get_stats,
+ .get_sset_count = ionic_get_sset_count,
+ .get_priv_flags = ionic_get_priv_flags,
+ .set_priv_flags = ionic_set_priv_flags,
+ .get_rxnfc = ionic_get_rxnfc,
+ .get_rxfh_indir_size = ionic_get_rxfh_indir_size,
+ .get_rxfh_key_size = ionic_get_rxfh_key_size,
+ .get_rxfh = ionic_get_rxfh,
+ .set_rxfh = ionic_set_rxfh,
+ .get_tunable = ionic_get_tunable,
+ .set_tunable = ionic_set_tunable,
+ .get_module_info = ionic_get_module_info,
+ .get_module_eeprom = ionic_get_module_eeprom,
+ .get_pauseparam = ionic_get_pauseparam,
+ .set_pauseparam = ionic_set_pauseparam,
+ .set_link_ksettings = ionic_set_link_ksettings,
+ .nway_reset = ionic_nway_reset,
+};
+
+void ionic_ethtool_set_ops(struct net_device *netdev)
+{
+ netdev->ethtool_ops = &ionic_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.h b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.h
new file mode 100644
index 000000000000..38b91b1d70ae
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#ifndef _IONIC_ETHTOOL_H_
+#define _IONIC_ETHTOOL_H_
+
+void ionic_ethtool_set_ops(struct net_device *netdev);
+
+#endif /* _IONIC_ETHTOOL_H_ */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
new file mode 100644
index 000000000000..5bfdda19f64d
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -0,0 +1,2482 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB OR BSD-2-Clause */
+/* Copyright (c) 2017-2019 Pensando Systems, Inc. All rights reserved. */
+
+#ifndef _IONIC_IF_H_
+#define _IONIC_IF_H_
+
+#pragma pack(push, 1)
+
+#define IONIC_DEV_INFO_SIGNATURE 0x44455649 /* 'DEVI' */
+#define IONIC_DEV_INFO_VERSION 1
+#define IONIC_IFNAMSIZ 16
+
+/**
+ * Commands
+ */
+enum ionic_cmd_opcode {
+ IONIC_CMD_NOP = 0,
+
+ /* Device commands */
+ IONIC_CMD_IDENTIFY = 1,
+ IONIC_CMD_INIT = 2,
+ IONIC_CMD_RESET = 3,
+ IONIC_CMD_GETATTR = 4,
+ IONIC_CMD_SETATTR = 5,
+
+ /* Port commands */
+ IONIC_CMD_PORT_IDENTIFY = 10,
+ IONIC_CMD_PORT_INIT = 11,
+ IONIC_CMD_PORT_RESET = 12,
+ IONIC_CMD_PORT_GETATTR = 13,
+ IONIC_CMD_PORT_SETATTR = 14,
+
+ /* LIF commands */
+ IONIC_CMD_LIF_IDENTIFY = 20,
+ IONIC_CMD_LIF_INIT = 21,
+ IONIC_CMD_LIF_RESET = 22,
+ IONIC_CMD_LIF_GETATTR = 23,
+ IONIC_CMD_LIF_SETATTR = 24,
+
+ IONIC_CMD_RX_MODE_SET = 30,
+ IONIC_CMD_RX_FILTER_ADD = 31,
+ IONIC_CMD_RX_FILTER_DEL = 32,
+
+ /* Queue commands */
+ IONIC_CMD_Q_INIT = 40,
+ IONIC_CMD_Q_CONTROL = 41,
+
+ /* RDMA commands */
+ IONIC_CMD_RDMA_RESET_LIF = 50,
+ IONIC_CMD_RDMA_CREATE_EQ = 51,
+ IONIC_CMD_RDMA_CREATE_CQ = 52,
+ IONIC_CMD_RDMA_CREATE_ADMINQ = 53,
+
+ /* QoS commands */
+ IONIC_CMD_QOS_CLASS_IDENTIFY = 240,
+ IONIC_CMD_QOS_CLASS_INIT = 241,
+ IONIC_CMD_QOS_CLASS_RESET = 242,
+
+ /* Firmware commands */
+ IONIC_CMD_FW_DOWNLOAD = 254,
+ IONIC_CMD_FW_CONTROL = 255,
+};
+
+/**
+ * Command Return codes
+ */
+enum ionic_status_code {
+ IONIC_RC_SUCCESS = 0, /* Success */
+ IONIC_RC_EVERSION = 1, /* Incorrect version for request */
+ IONIC_RC_EOPCODE = 2, /* Invalid cmd opcode */
+ IONIC_RC_EIO = 3, /* I/O error */
+ IONIC_RC_EPERM = 4, /* Permission denied */
+ IONIC_RC_EQID = 5, /* Bad qid */
+ IONIC_RC_EQTYPE = 6, /* Bad qtype */
+ IONIC_RC_ENOENT = 7, /* No such element */
+ IONIC_RC_EINTR = 8, /* operation interrupted */
+ IONIC_RC_EAGAIN = 9, /* Try again */
+ IONIC_RC_ENOMEM = 10, /* Out of memory */
+ IONIC_RC_EFAULT = 11, /* Bad address */
+ IONIC_RC_EBUSY = 12, /* Device or resource busy */
+ IONIC_RC_EEXIST = 13, /* object already exists */
+ IONIC_RC_EINVAL = 14, /* Invalid argument */
+ IONIC_RC_ENOSPC = 15, /* No space left or alloc failure */
+ IONIC_RC_ERANGE = 16, /* Parameter out of range */
+ IONIC_RC_BAD_ADDR = 17, /* Descriptor contains a bad ptr */
+ IONIC_RC_DEV_CMD = 18, /* Device cmd attempted on AdminQ */
+ IONIC_RC_ENOSUPP = 19, /* Operation not supported */
+ IONIC_RC_ERROR = 29, /* Generic error */
+
+ IONIC_RC_ERDMA = 30, /* Generic RDMA error */
+};
+
+enum ionic_notifyq_opcode {
+ IONIC_EVENT_LINK_CHANGE = 1,
+ IONIC_EVENT_RESET = 2,
+ IONIC_EVENT_HEARTBEAT = 3,
+ IONIC_EVENT_LOG = 4,
+};
+
+/**
+ * struct cmd - General admin command format
+ * @opcode: Opcode for the command
+ * @lif_index: LIF index
+ * @cmd_data: Opcode-specific command bytes
+ */
+struct ionic_admin_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 lif_index;
+ u8 cmd_data[60];
+};
+
+/**
+ * struct admin_comp - General admin command completion format
+ * @status: The status of the command (enum status_code)
+ * @comp_index: The index in the descriptor ring for which this
+ * is the completion.
+ * @cmd_data: Command-specific bytes.
+ * @color: Color bit. (Always 0 for commands issued to the
+ * Device Cmd Registers.)
+ */
+struct ionic_admin_comp {
+ u8 status;
+ u8 rsvd;
+ __le16 comp_index;
+ u8 cmd_data[11];
+ u8 color;
+#define IONIC_COMP_COLOR_MASK 0x80
+};
+
+static inline u8 color_match(u8 color, u8 done_color)
+{
+ return (!!(color & IONIC_COMP_COLOR_MASK)) == done_color;
+}
+
+/**
+ * struct nop_cmd - NOP command
+ * @opcode: opcode
+ */
+struct ionic_nop_cmd {
+ u8 opcode;
+ u8 rsvd[63];
+};
+
+/**
+ * struct nop_comp - NOP command completion
+ * @status: The status of the command (enum status_code)
+ */
+struct ionic_nop_comp {
+ u8 status;
+ u8 rsvd[15];
+};
+
+/**
+ * struct dev_init_cmd - Device init command
+ * @opcode: opcode
+ * @type: device type
+ */
+struct ionic_dev_init_cmd {
+ u8 opcode;
+ u8 type;
+ u8 rsvd[62];
+};
+
+/**
+ * struct init_comp - Device init command completion
+ * @status: The status of the command (enum status_code)
+ */
+struct ionic_dev_init_comp {
+ u8 status;
+ u8 rsvd[15];
+};
+
+/**
+ * struct dev_reset_cmd - Device reset command
+ * @opcode: opcode
+ */
+struct ionic_dev_reset_cmd {
+ u8 opcode;
+ u8 rsvd[63];
+};
+
+/**
+ * struct reset_comp - Reset command completion
+ * @status: The status of the command (enum status_code)
+ */
+struct ionic_dev_reset_comp {
+ u8 status;
+ u8 rsvd[15];
+};
+
+#define IONIC_IDENTITY_VERSION_1 1
+
+/**
+ * struct dev_identify_cmd - Driver/device identify command
+ * @opcode: opcode
+ * @ver: Highest version of identify supported by driver
+ */
+struct ionic_dev_identify_cmd {
+ u8 opcode;
+ u8 ver;
+ u8 rsvd[62];
+};
+
+/**
+ * struct dev_identify_comp - Driver/device identify command completion
+ * @status: The status of the command (enum status_code)
+ * @ver: Version of identify returned by device
+ */
+struct ionic_dev_identify_comp {
+ u8 status;
+ u8 ver;
+ u8 rsvd[14];
+};
+
+enum ionic_os_type {
+ IONIC_OS_TYPE_LINUX = 1,
+ IONIC_OS_TYPE_WIN = 2,
+ IONIC_OS_TYPE_DPDK = 3,
+ IONIC_OS_TYPE_FREEBSD = 4,
+ IONIC_OS_TYPE_IPXE = 5,
+ IONIC_OS_TYPE_ESXI = 6,
+};
+
+/**
+ * union drv_identity - driver identity information
+ * @os_type: OS type (see enum os_type)
+ * @os_dist: OS distribution, numeric format
+ * @os_dist_str: OS distribution, string format
+ * @kernel_ver: Kernel version, numeric format
+ * @kernel_ver_str: Kernel version, string format
+ * @driver_ver_str: Driver version, string format
+ */
+union ionic_drv_identity {
+ struct {
+ __le32 os_type;
+ __le32 os_dist;
+ char os_dist_str[128];
+ __le32 kernel_ver;
+ char kernel_ver_str[32];
+ char driver_ver_str[32];
+ };
+ __le32 words[512];
+};
+
+/**
+ * union dev_identity - device identity information
+ * @version: Version of device identify
+ * @type: Identify type (0 for now)
+ * @nports: Number of ports provisioned
+ * @nlifs: Number of LIFs provisioned
+ * @nintrs: Number of interrupts provisioned
+ * @ndbpgs_per_lif: Number of doorbell pages per LIF
+ * @intr_coal_mult: Interrupt coalescing multiplication factor.
+ * Scale user-supplied interrupt coalescing
+ * value in usecs to device units using:
+ * device units = usecs * mult / div
+ * @intr_coal_div: Interrupt coalescing division factor.
+ * Scale user-supplied interrupt coalescing
+ * value in usecs to device units using:
+ * device units = usecs * mult / div
+ *
+ */
+union ionic_dev_identity {
+ struct {
+ u8 version;
+ u8 type;
+ u8 rsvd[2];
+ u8 nports;
+ u8 rsvd2[3];
+ __le32 nlifs;
+ __le32 nintrs;
+ __le32 ndbpgs_per_lif;
+ __le32 intr_coal_mult;
+ __le32 intr_coal_div;
+ };
+ __le32 words[512];
+};
+
+enum ionic_lif_type {
+ IONIC_LIF_TYPE_CLASSIC = 0,
+ IONIC_LIF_TYPE_MACVLAN = 1,
+ IONIC_LIF_TYPE_NETQUEUE = 2,
+};
+
+/**
+ * struct lif_identify_cmd - lif identify command
+ * @opcode: opcode
+ * @type: lif type (enum lif_type)
+ * @ver: version of identify returned by device
+ */
+struct ionic_lif_identify_cmd {
+ u8 opcode;
+ u8 type;
+ u8 ver;
+ u8 rsvd[61];
+};
+
+/**
+ * struct lif_identify_comp - lif identify command completion
+ * @status: status of the command (enum status_code)
+ * @ver: version of identify returned by device
+ */
+struct ionic_lif_identify_comp {
+ u8 status;
+ u8 ver;
+ u8 rsvd2[14];
+};
+
+enum ionic_lif_capability {
+ IONIC_LIF_CAP_ETH = BIT(0),
+ IONIC_LIF_CAP_RDMA = BIT(1),
+};
+
+/**
+ * Logical Queue Types
+ */
+enum ionic_logical_qtype {
+ IONIC_QTYPE_ADMINQ = 0,
+ IONIC_QTYPE_NOTIFYQ = 1,
+ IONIC_QTYPE_RXQ = 2,
+ IONIC_QTYPE_TXQ = 3,
+ IONIC_QTYPE_EQ = 4,
+ IONIC_QTYPE_MAX = 16,
+};
+
+/**
+ * struct lif_logical_qtype - Descriptor of logical to hardware queue type.
+ * @qtype: Hardware Queue Type.
+ * @qid_count: Number of Queue IDs of the logical type.
+ * @qid_base: Minimum Queue ID of the logical type.
+ */
+struct ionic_lif_logical_qtype {
+ u8 qtype;
+ u8 rsvd[3];
+ __le32 qid_count;
+ __le32 qid_base;
+};
+
+enum ionic_lif_state {
+ IONIC_LIF_DISABLE = 0,
+ IONIC_LIF_ENABLE = 1,
+ IONIC_LIF_HANG_RESET = 2,
+};
+
+/**
+ * LIF configuration
+ * @state: lif state (enum lif_state)
+ * @name: lif name
+ * @mtu: mtu
+ * @mac: station mac address
+ * @features: features (enum eth_hw_features)
+ * @queue_count: queue counts per queue-type
+ */
+union ionic_lif_config {
+ struct {
+ u8 state;
+ u8 rsvd[3];
+ char name[IONIC_IFNAMSIZ];
+ __le32 mtu;
+ u8 mac[6];
+ u8 rsvd2[2];
+ __le64 features;
+ __le32 queue_count[IONIC_QTYPE_MAX];
+ };
+ __le32 words[64];
+};
+
+/**
+ * struct lif_identity - lif identity information (type-specific)
+ *
+ * @capabilities LIF capabilities
+ *
+ * Ethernet:
+ * @version: Ethernet identify structure version.
+ * @features: Ethernet features supported on this lif type.
+ * @max_ucast_filters: Number of perfect unicast addresses supported.
+ * @max_mcast_filters: Number of perfect multicast addresses supported.
+ * @min_frame_size: Minimum size of frames to be sent
+ * @max_frame_size: Maximim size of frames to be sent
+ * @config: LIF config struct with features, mtu, mac, q counts
+ *
+ * RDMA:
+ * @version: RDMA version of opcodes and queue descriptors.
+ * @qp_opcodes: Number of rdma queue pair opcodes supported.
+ * @admin_opcodes: Number of rdma admin opcodes supported.
+ * @npts_per_lif: Page table size per lif
+ * @nmrs_per_lif: Number of memory regions per lif
+ * @nahs_per_lif: Number of address handles per lif
+ * @max_stride: Max work request stride.
+ * @cl_stride: Cache line stride.
+ * @pte_stride: Page table entry stride.
+ * @rrq_stride: Remote RQ work request stride.
+ * @rsq_stride: Remote SQ work request stride.
+ * @dcqcn_profiles: Number of DCQCN profiles
+ * @aq_qtype: RDMA Admin Qtype.
+ * @sq_qtype: RDMA Send Qtype.
+ * @rq_qtype: RDMA Receive Qtype.
+ * @cq_qtype: RDMA Completion Qtype.
+ * @eq_qtype: RDMA Event Qtype.
+ */
+union ionic_lif_identity {
+ struct {
+ __le64 capabilities;
+
+ struct {
+ u8 version;
+ u8 rsvd[3];
+ __le32 max_ucast_filters;
+ __le32 max_mcast_filters;
+ __le16 rss_ind_tbl_sz;
+ __le32 min_frame_size;
+ __le32 max_frame_size;
+ u8 rsvd2[106];
+ union ionic_lif_config config;
+ } eth;
+
+ struct {
+ u8 version;
+ u8 qp_opcodes;
+ u8 admin_opcodes;
+ u8 rsvd;
+ __le32 npts_per_lif;
+ __le32 nmrs_per_lif;
+ __le32 nahs_per_lif;
+ u8 max_stride;
+ u8 cl_stride;
+ u8 pte_stride;
+ u8 rrq_stride;
+ u8 rsq_stride;
+ u8 dcqcn_profiles;
+ u8 rsvd_dimensions[10];
+ struct ionic_lif_logical_qtype aq_qtype;
+ struct ionic_lif_logical_qtype sq_qtype;
+ struct ionic_lif_logical_qtype rq_qtype;
+ struct ionic_lif_logical_qtype cq_qtype;
+ struct ionic_lif_logical_qtype eq_qtype;
+ } rdma;
+ };
+ __le32 words[512];
+};
+
+/**
+ * struct lif_init_cmd - LIF init command
+ * @opcode: opcode
+ * @type: LIF type (enum lif_type)
+ * @index: LIF index
+ * @info_pa: destination address for lif info (struct lif_info)
+ */
+struct ionic_lif_init_cmd {
+ u8 opcode;
+ u8 type;
+ __le16 index;
+ __le32 rsvd;
+ __le64 info_pa;
+ u8 rsvd2[48];
+};
+
+/**
+ * struct lif_init_comp - LIF init command completion
+ * @status: The status of the command (enum status_code)
+ */
+struct ionic_lif_init_comp {
+ u8 status;
+ u8 rsvd;
+ __le16 hw_index;
+ u8 rsvd2[12];
+};
+
+/**
+ * struct q_init_cmd - Queue init command
+ * @opcode: opcode
+ * @type: Logical queue type
+ * @ver: Queue version (defines opcode/descriptor scope)
+ * @lif_index: LIF index
+ * @index: (lif, qtype) relative admin queue index
+ * @intr_index: Interrupt control register index
+ * @pid: Process ID
+ * @flags:
+ * IRQ: Interrupt requested on completion
+ * ENA: Enable the queue. If ENA=0 the queue is initialized
+ * but remains disabled, to be later enabled with the
+ * Queue Enable command. If ENA=1, then queue is
+ * initialized and then enabled.
+ * SG: Enable Scatter-Gather on the queue.
+ * in number of descs. The actual ring size is
+ * (1 << ring_size). For example, to
+ * select a ring size of 64 descriptors write
+ * ring_size = 6. The minimum ring_size value is 2
+ * for a ring size of 4 descriptors. The maximum
+ * ring_size value is 16 for a ring size of 64k
+ * descriptors. Values of ring_size <2 and >16 are
+ * reserved.
+ * EQ: Enable the Event Queue
+ * @cos: Class of service for this queue.
+ * @ring_size: Queue ring size, encoded as a log2(size)
+ * @ring_base: Queue ring base address
+ * @cq_ring_base: Completion queue ring base address
+ * @sg_ring_base: Scatter/Gather ring base address
+ * @eq_index: Event queue index
+ */
+struct ionic_q_init_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 lif_index;
+ u8 type;
+ u8 ver;
+ u8 rsvd1[2];
+ __le32 index;
+ __le16 pid;
+ __le16 intr_index;
+ __le16 flags;
+#define IONIC_QINIT_F_IRQ 0x01 /* Request interrupt on completion */
+#define IONIC_QINIT_F_ENA 0x02 /* Enable the queue */
+#define IONIC_QINIT_F_SG 0x04 /* Enable scatter/gather on the queue */
+#define IONIC_QINIT_F_EQ 0x08 /* Enable event queue */
+#define IONIC_QINIT_F_DEBUG 0x80 /* Enable queue debugging */
+ u8 cos;
+ u8 ring_size;
+ __le64 ring_base;
+ __le64 cq_ring_base;
+ __le64 sg_ring_base;
+ __le32 eq_index;
+ u8 rsvd2[16];
+};
+
+/**
+ * struct q_init_comp - Queue init command completion
+ * @status: The status of the command (enum status_code)
+ * @ver: Queue version (defines opcode/descriptor scope)
+ * @comp_index: The index in the descriptor ring for which this
+ * is the completion.
+ * @hw_index: Hardware Queue ID
+ * @hw_type: Hardware Queue type
+ * @color: Color
+ */
+struct ionic_q_init_comp {
+ u8 status;
+ u8 ver;
+ __le16 comp_index;
+ __le32 hw_index;
+ u8 hw_type;
+ u8 rsvd2[6];
+ u8 color;
+};
+
+/* the device's internal addressing uses up to 52 bits */
+#define IONIC_ADDR_LEN 52
+#define IONIC_ADDR_MASK (BIT_ULL(IONIC_ADDR_LEN) - 1)
+
+enum ionic_txq_desc_opcode {
+ IONIC_TXQ_DESC_OPCODE_CSUM_NONE = 0,
+ IONIC_TXQ_DESC_OPCODE_CSUM_PARTIAL = 1,
+ IONIC_TXQ_DESC_OPCODE_CSUM_HW = 2,
+ IONIC_TXQ_DESC_OPCODE_TSO = 3,
+};
+
+/**
+ * struct txq_desc - Ethernet Tx queue descriptor format
+ * @opcode: Tx operation, see TXQ_DESC_OPCODE_*:
+ *
+ * IONIC_TXQ_DESC_OPCODE_CSUM_NONE:
+ *
+ * Non-offload send. No segmentation,
+ * fragmentation or checksum calc/insertion is
+ * performed by device; packet is prepared
+ * to send by software stack and requires
+ * no further manipulation from device.
+ *
+ * IONIC_TXQ_DESC_OPCODE_CSUM_PARTIAL:
+ *
+ * Offload 16-bit L4 checksum
+ * calculation/insertion. The device will
+ * calculate the L4 checksum value and
+ * insert the result in the packet's L4
+ * header checksum field. The L4 checksum
+ * is calculated starting at @csum_start bytes
+ * into the packet to the end of the packet.
+ * The checksum insertion position is given
+ * in @csum_offset. This feature is only
+ * applicable to protocols such as TCP, UDP
+ * and ICMP where a standard (i.e. the
+ * 'IP-style' checksum) one's complement
+ * 16-bit checksum is used, using an IP
+ * pseudo-header to seed the calculation.
+ * Software will preload the L4 checksum
+ * field with the IP pseudo-header checksum.
+ *
+ * For tunnel encapsulation, @csum_start and
+ * @csum_offset refer to the inner L4
+ * header. Supported tunnels encapsulations
+ * are: IPIP, GRE, and UDP. If the @encap
+ * is clear, no further processing by the
+ * device is required; software will
+ * calculate the outer header checksums. If
+ * the @encap is set, the device will
+ * offload the outer header checksums using
+ * LCO (local checksum offload) (see
+ * Documentation/networking/checksum-
+ * offloads.txt for more info).
+ *
+ * IONIC_TXQ_DESC_OPCODE_CSUM_HW:
+ *
+ * Offload 16-bit checksum computation to hardware.
+ * If @csum_l3 is set then the packet's L3 checksum is
+ * updated. Similarly, if @csum_l4 is set the the L4
+ * checksum is updated. If @encap is set then encap header
+ * checksums are also updated.
+ *
+ * IONIC_TXQ_DESC_OPCODE_TSO:
+ *
+ * Device preforms TCP segmentation offload
+ * (TSO). @hdr_len is the number of bytes
+ * to the end of TCP header (the offset to
+ * the TCP payload). @mss is the desired
+ * MSS, the TCP payload length for each
+ * segment. The device will calculate/
+ * insert IP (IPv4 only) and TCP checksums
+ * for each segment. In the first data
+ * buffer containing the header template,
+ * the driver will set IPv4 checksum to 0
+ * and preload TCP checksum with the IP
+ * pseudo header calculated with IP length = 0.
+ *
+ * Supported tunnel encapsulations are IPIP,
+ * layer-3 GRE, and UDP. @hdr_len includes
+ * both outer and inner headers. The driver
+ * will set IPv4 checksum to zero and
+ * preload TCP checksum with IP pseudo
+ * header on the inner header.
+ *
+ * TCP ECN offload is supported. The device
+ * will set CWR flag in the first segment if
+ * CWR is set in the template header, and
+ * clear CWR in remaining segments.
+ * @flags:
+ * vlan:
+ * Insert an L2 VLAN header using @vlan_tci.
+ * encap:
+ * Calculate encap header checksum.
+ * csum_l3:
+ * Compute L3 header checksum.
+ * csum_l4:
+ * Compute L4 header checksum.
+ * tso_sot:
+ * TSO start
+ * tso_eot:
+ * TSO end
+ * @num_sg_elems: Number of scatter-gather elements in SG
+ * descriptor
+ * @addr: First data buffer's DMA address.
+ * (Subsequent data buffers are on txq_sg_desc).
+ * @len: First data buffer's length, in bytes
+ * @vlan_tci: VLAN tag to insert in the packet (if requested
+ * by @V-bit). Includes .1p and .1q tags
+ * @hdr_len: Length of packet headers, including
+ * encapsulating outer header, if applicable.
+ * Valid for opcodes TXQ_DESC_OPCODE_CALC_CSUM and
+ * TXQ_DESC_OPCODE_TSO. Should be set to zero for
+ * all other modes. For
+ * TXQ_DESC_OPCODE_CALC_CSUM, @hdr_len is length
+ * of headers up to inner-most L4 header. For
+ * TXQ_DESC_OPCODE_TSO, @hdr_len is up to
+ * inner-most L4 payload, so inclusive of
+ * inner-most L4 header.
+ * @mss: Desired MSS value for TSO. Only applicable for
+ * TXQ_DESC_OPCODE_TSO.
+ * @csum_start: Offset into inner-most L3 header of checksum
+ * @csum_offset: Offset into inner-most L4 header of checksum
+ */
+
+#define IONIC_TXQ_DESC_OPCODE_MASK 0xf
+#define IONIC_TXQ_DESC_OPCODE_SHIFT 4
+#define IONIC_TXQ_DESC_FLAGS_MASK 0xf
+#define IONIC_TXQ_DESC_FLAGS_SHIFT 0
+#define IONIC_TXQ_DESC_NSGE_MASK 0xf
+#define IONIC_TXQ_DESC_NSGE_SHIFT 8
+#define IONIC_TXQ_DESC_ADDR_MASK (BIT_ULL(IONIC_ADDR_LEN) - 1)
+#define IONIC_TXQ_DESC_ADDR_SHIFT 12
+
+/* common flags */
+#define IONIC_TXQ_DESC_FLAG_VLAN 0x1
+#define IONIC_TXQ_DESC_FLAG_ENCAP 0x2
+
+/* flags for csum_hw opcode */
+#define IONIC_TXQ_DESC_FLAG_CSUM_L3 0x4
+#define IONIC_TXQ_DESC_FLAG_CSUM_L4 0x8
+
+/* flags for tso opcode */
+#define IONIC_TXQ_DESC_FLAG_TSO_SOT 0x4
+#define IONIC_TXQ_DESC_FLAG_TSO_EOT 0x8
+
+struct ionic_txq_desc {
+ __le64 cmd;
+ __le16 len;
+ union {
+ __le16 vlan_tci;
+ __le16 hword0;
+ };
+ union {
+ __le16 csum_start;
+ __le16 hdr_len;
+ __le16 hword1;
+ };
+ union {
+ __le16 csum_offset;
+ __le16 mss;
+ __le16 hword2;
+ };
+};
+
+static inline u64 encode_txq_desc_cmd(u8 opcode, u8 flags,
+ u8 nsge, u64 addr)
+{
+ u64 cmd;
+
+ cmd = (opcode & IONIC_TXQ_DESC_OPCODE_MASK) << IONIC_TXQ_DESC_OPCODE_SHIFT;
+ cmd |= (flags & IONIC_TXQ_DESC_FLAGS_MASK) << IONIC_TXQ_DESC_FLAGS_SHIFT;
+ cmd |= (nsge & IONIC_TXQ_DESC_NSGE_MASK) << IONIC_TXQ_DESC_NSGE_SHIFT;
+ cmd |= (addr & IONIC_TXQ_DESC_ADDR_MASK) << IONIC_TXQ_DESC_ADDR_SHIFT;
+
+ return cmd;
+};
+
+static inline void decode_txq_desc_cmd(u64 cmd, u8 *opcode, u8 *flags,
+ u8 *nsge, u64 *addr)
+{
+ *opcode = (cmd >> IONIC_TXQ_DESC_OPCODE_SHIFT) & IONIC_TXQ_DESC_OPCODE_MASK;
+ *flags = (cmd >> IONIC_TXQ_DESC_FLAGS_SHIFT) & IONIC_TXQ_DESC_FLAGS_MASK;
+ *nsge = (cmd >> IONIC_TXQ_DESC_NSGE_SHIFT) & IONIC_TXQ_DESC_NSGE_MASK;
+ *addr = (cmd >> IONIC_TXQ_DESC_ADDR_SHIFT) & IONIC_TXQ_DESC_ADDR_MASK;
+};
+
+#define IONIC_TX_MAX_SG_ELEMS 8
+#define IONIC_RX_MAX_SG_ELEMS 8
+
+/**
+ * struct txq_sg_desc - Transmit scatter-gather (SG) list
+ * @addr: DMA address of SG element data buffer
+ * @len: Length of SG element data buffer, in bytes
+ */
+struct ionic_txq_sg_desc {
+ struct ionic_txq_sg_elem {
+ __le64 addr;
+ __le16 len;
+ __le16 rsvd[3];
+ } elems[IONIC_TX_MAX_SG_ELEMS];
+};
+
+/**
+ * struct txq_comp - Ethernet transmit queue completion descriptor
+ * @status: The status of the command (enum status_code)
+ * @comp_index: The index in the descriptor ring for which this
+ * is the completion.
+ * @color: Color bit.
+ */
+struct ionic_txq_comp {
+ u8 status;
+ u8 rsvd;
+ __le16 comp_index;
+ u8 rsvd2[11];
+ u8 color;
+};
+
+enum ionic_rxq_desc_opcode {
+ IONIC_RXQ_DESC_OPCODE_SIMPLE = 0,
+ IONIC_RXQ_DESC_OPCODE_SG = 1,
+};
+
+/**
+ * struct rxq_desc - Ethernet Rx queue descriptor format
+ * @opcode: Rx operation, see RXQ_DESC_OPCODE_*:
+ *
+ * RXQ_DESC_OPCODE_SIMPLE:
+ *
+ * Receive full packet into data buffer
+ * starting at @addr. Results of
+ * receive, including actual bytes received,
+ * are recorded in Rx completion descriptor.
+ *
+ * @len: Data buffer's length, in bytes.
+ * @addr: Data buffer's DMA address
+ */
+struct ionic_rxq_desc {
+ u8 opcode;
+ u8 rsvd[5];
+ __le16 len;
+ __le64 addr;
+};
+
+/**
+ * struct rxq_sg_desc - Receive scatter-gather (SG) list
+ * @addr: DMA address of SG element data buffer
+ * @len: Length of SG element data buffer, in bytes
+ */
+struct ionic_rxq_sg_desc {
+ struct ionic_rxq_sg_elem {
+ __le64 addr;
+ __le16 len;
+ __le16 rsvd[3];
+ } elems[IONIC_RX_MAX_SG_ELEMS];
+};
+
+/**
+ * struct rxq_comp - Ethernet receive queue completion descriptor
+ * @status: The status of the command (enum status_code)
+ * @num_sg_elems: Number of SG elements used by this descriptor
+ * @comp_index: The index in the descriptor ring for which this
+ * is the completion.
+ * @rss_hash: 32-bit RSS hash
+ * @csum: 16-bit sum of the packet's L2 payload.
+ * If the packet's L2 payload is odd length, an extra
+ * zero-value byte is included in the @csum calculation but
+ * not included in @len.
+ * @vlan_tci: VLAN tag stripped from the packet. Valid if @VLAN is
+ * set. Includes .1p and .1q tags.
+ * @len: Received packet length, in bytes. Excludes FCS.
+ * @csum_calc L2 payload checksum is computed or not
+ * @csum_tcp_ok: The TCP checksum calculated by the device
+ * matched the checksum in the receive packet's
+ * TCP header
+ * @csum_tcp_bad: The TCP checksum calculated by the device did
+ * not match the checksum in the receive packet's
+ * TCP header.
+ * @csum_udp_ok: The UDP checksum calculated by the device
+ * matched the checksum in the receive packet's
+ * UDP header
+ * @csum_udp_bad: The UDP checksum calculated by the device did
+ * not match the checksum in the receive packet's
+ * UDP header.
+ * @csum_ip_ok: The IPv4 checksum calculated by the device
+ * matched the checksum in the receive packet's
+ * first IPv4 header. If the receive packet
+ * contains both a tunnel IPv4 header and a
+ * transport IPv4 header, the device validates the
+ * checksum for the both IPv4 headers.
+ * @csum_ip_bad: The IPv4 checksum calculated by the device did
+ * not match the checksum in the receive packet's
+ * first IPv4 header. If the receive packet
+ * contains both a tunnel IPv4 header and a
+ * transport IPv4 header, the device validates the
+ * checksum for both IP headers.
+ * @VLAN: VLAN header was stripped and placed in @vlan_tci.
+ * @pkt_type: Packet type
+ * @color: Color bit.
+ */
+struct ionic_rxq_comp {
+ u8 status;
+ u8 num_sg_elems;
+ __le16 comp_index;
+ __le32 rss_hash;
+ __le16 csum;
+ __le16 vlan_tci;
+ __le16 len;
+ u8 csum_flags;
+#define IONIC_RXQ_COMP_CSUM_F_TCP_OK 0x01
+#define IONIC_RXQ_COMP_CSUM_F_TCP_BAD 0x02
+#define IONIC_RXQ_COMP_CSUM_F_UDP_OK 0x04
+#define IONIC_RXQ_COMP_CSUM_F_UDP_BAD 0x08
+#define IONIC_RXQ_COMP_CSUM_F_IP_OK 0x10
+#define IONIC_RXQ_COMP_CSUM_F_IP_BAD 0x20
+#define IONIC_RXQ_COMP_CSUM_F_VLAN 0x40
+#define IONIC_RXQ_COMP_CSUM_F_CALC 0x80
+ u8 pkt_type_color;
+#define IONIC_RXQ_COMP_PKT_TYPE_MASK 0x0f
+};
+
+enum ionic_pkt_type {
+ IONIC_PKT_TYPE_NON_IP = 0x000,
+ IONIC_PKT_TYPE_IPV4 = 0x001,
+ IONIC_PKT_TYPE_IPV4_TCP = 0x003,
+ IONIC_PKT_TYPE_IPV4_UDP = 0x005,
+ IONIC_PKT_TYPE_IPV6 = 0x008,
+ IONIC_PKT_TYPE_IPV6_TCP = 0x018,
+ IONIC_PKT_TYPE_IPV6_UDP = 0x028,
+};
+
+enum ionic_eth_hw_features {
+ IONIC_ETH_HW_VLAN_TX_TAG = BIT(0),
+ IONIC_ETH_HW_VLAN_RX_STRIP = BIT(1),
+ IONIC_ETH_HW_VLAN_RX_FILTER = BIT(2),
+ IONIC_ETH_HW_RX_HASH = BIT(3),
+ IONIC_ETH_HW_RX_CSUM = BIT(4),
+ IONIC_ETH_HW_TX_SG = BIT(5),
+ IONIC_ETH_HW_RX_SG = BIT(6),
+ IONIC_ETH_HW_TX_CSUM = BIT(7),
+ IONIC_ETH_HW_TSO = BIT(8),
+ IONIC_ETH_HW_TSO_IPV6 = BIT(9),
+ IONIC_ETH_HW_TSO_ECN = BIT(10),
+ IONIC_ETH_HW_TSO_GRE = BIT(11),
+ IONIC_ETH_HW_TSO_GRE_CSUM = BIT(12),
+ IONIC_ETH_HW_TSO_IPXIP4 = BIT(13),
+ IONIC_ETH_HW_TSO_IPXIP6 = BIT(14),
+ IONIC_ETH_HW_TSO_UDP = BIT(15),
+ IONIC_ETH_HW_TSO_UDP_CSUM = BIT(16),
+};
+
+/**
+ * struct q_control_cmd - Queue control command
+ * @opcode: opcode
+ * @type: Queue type
+ * @lif_index: LIF index
+ * @index: Queue index
+ * @oper: Operation (enum q_control_oper)
+ */
+struct ionic_q_control_cmd {
+ u8 opcode;
+ u8 type;
+ __le16 lif_index;
+ __le32 index;
+ u8 oper;
+ u8 rsvd[55];
+};
+
+typedef struct ionic_admin_comp ionic_q_control_comp;
+
+enum q_control_oper {
+ IONIC_Q_DISABLE = 0,
+ IONIC_Q_ENABLE = 1,
+ IONIC_Q_HANG_RESET = 2,
+};
+
+/**
+ * Physical connection type
+ */
+enum ionic_phy_type {
+ IONIC_PHY_TYPE_NONE = 0,
+ IONIC_PHY_TYPE_COPPER = 1,
+ IONIC_PHY_TYPE_FIBER = 2,
+};
+
+/**
+ * Transceiver status
+ */
+enum ionic_xcvr_state {
+ IONIC_XCVR_STATE_REMOVED = 0,
+ IONIC_XCVR_STATE_INSERTED = 1,
+ IONIC_XCVR_STATE_PENDING = 2,
+ IONIC_XCVR_STATE_SPROM_READ = 3,
+ IONIC_XCVR_STATE_SPROM_READ_ERR = 4,
+};
+
+/**
+ * Supported link modes
+ */
+enum ionic_xcvr_pid {
+ IONIC_XCVR_PID_UNKNOWN = 0,
+
+ /* CU */
+ IONIC_XCVR_PID_QSFP_100G_CR4 = 1,
+ IONIC_XCVR_PID_QSFP_40GBASE_CR4 = 2,
+ IONIC_XCVR_PID_SFP_25GBASE_CR_S = 3,
+ IONIC_XCVR_PID_SFP_25GBASE_CR_L = 4,
+ IONIC_XCVR_PID_SFP_25GBASE_CR_N = 5,
+
+ /* Fiber */
+ IONIC_XCVR_PID_QSFP_100G_AOC = 50,
+ IONIC_XCVR_PID_QSFP_100G_ACC = 51,
+ IONIC_XCVR_PID_QSFP_100G_SR4 = 52,
+ IONIC_XCVR_PID_QSFP_100G_LR4 = 53,
+ IONIC_XCVR_PID_QSFP_100G_ER4 = 54,
+ IONIC_XCVR_PID_QSFP_40GBASE_ER4 = 55,
+ IONIC_XCVR_PID_QSFP_40GBASE_SR4 = 56,
+ IONIC_XCVR_PID_QSFP_40GBASE_LR4 = 57,
+ IONIC_XCVR_PID_QSFP_40GBASE_AOC = 58,
+ IONIC_XCVR_PID_SFP_25GBASE_SR = 59,
+ IONIC_XCVR_PID_SFP_25GBASE_LR = 60,
+ IONIC_XCVR_PID_SFP_25GBASE_ER = 61,
+ IONIC_XCVR_PID_SFP_25GBASE_AOC = 62,
+ IONIC_XCVR_PID_SFP_10GBASE_SR = 63,
+ IONIC_XCVR_PID_SFP_10GBASE_LR = 64,
+ IONIC_XCVR_PID_SFP_10GBASE_LRM = 65,
+ IONIC_XCVR_PID_SFP_10GBASE_ER = 66,
+ IONIC_XCVR_PID_SFP_10GBASE_AOC = 67,
+ IONIC_XCVR_PID_SFP_10GBASE_CU = 68,
+ IONIC_XCVR_PID_QSFP_100G_CWDM4 = 69,
+ IONIC_XCVR_PID_QSFP_100G_PSM4 = 70,
+};
+
+/**
+ * Port types
+ */
+enum ionic_port_type {
+ IONIC_PORT_TYPE_NONE = 0, /* port type not configured */
+ IONIC_PORT_TYPE_ETH = 1, /* port carries ethernet traffic (inband) */
+ IONIC_PORT_TYPE_MGMT = 2, /* port carries mgmt traffic (out-of-band) */
+};
+
+/**
+ * Port config state
+ */
+enum ionic_port_admin_state {
+ IONIC_PORT_ADMIN_STATE_NONE = 0, /* port admin state not configured */
+ IONIC_PORT_ADMIN_STATE_DOWN = 1, /* port is admin disabled */
+ IONIC_PORT_ADMIN_STATE_UP = 2, /* port is admin enabled */
+};
+
+/**
+ * Port operational status
+ */
+enum ionic_port_oper_status {
+ IONIC_PORT_OPER_STATUS_NONE = 0, /* port is disabled */
+ IONIC_PORT_OPER_STATUS_UP = 1, /* port is linked up */
+ IONIC_PORT_OPER_STATUS_DOWN = 2, /* port link status is down */
+};
+
+/**
+ * Ethernet Forward error correction (fec) modes
+ */
+enum ionic_port_fec_type {
+ IONIC_PORT_FEC_TYPE_NONE = 0, /* Disabled */
+ IONIC_PORT_FEC_TYPE_FC = 1, /* FireCode */
+ IONIC_PORT_FEC_TYPE_RS = 2, /* ReedSolomon */
+};
+
+/**
+ * Ethernet pause (flow control) modes
+ */
+enum ionic_port_pause_type {
+ IONIC_PORT_PAUSE_TYPE_NONE = 0, /* Disable Pause */
+ IONIC_PORT_PAUSE_TYPE_LINK = 1, /* Link level pause */
+ IONIC_PORT_PAUSE_TYPE_PFC = 2, /* Priority-Flow control */
+};
+
+/**
+ * Loopback modes
+ */
+enum ionic_port_loopback_mode {
+ IONIC_PORT_LOOPBACK_MODE_NONE = 0, /* Disable loopback */
+ IONIC_PORT_LOOPBACK_MODE_MAC = 1, /* MAC loopback */
+ IONIC_PORT_LOOPBACK_MODE_PHY = 2, /* PHY/Serdes loopback */
+};
+
+/**
+ * Transceiver Status information
+ * @state: Transceiver status (enum xcvr_state)
+ * @phy: Physical connection type (enum phy_type)
+ * @pid: Transceiver link mode (enum pid)
+ * @sprom: Transceiver sprom contents
+ */
+struct ionic_xcvr_status {
+ u8 state;
+ u8 phy;
+ __le16 pid;
+ u8 sprom[256];
+};
+
+/**
+ * Port configuration
+ * @speed: port speed (in Mbps)
+ * @mtu: mtu
+ * @state: port admin state (enum port_admin_state)
+ * @an_enable: autoneg enable
+ * @fec_type: fec type (enum port_fec_type)
+ * @pause_type: pause type (enum port_pause_type)
+ * @loopback_mode: loopback mode (enum port_loopback_mode)
+ */
+union ionic_port_config {
+ struct {
+#define IONIC_SPEED_100G 100000 /* 100G in Mbps */
+#define IONIC_SPEED_50G 50000 /* 50G in Mbps */
+#define IONIC_SPEED_40G 40000 /* 40G in Mbps */
+#define IONIC_SPEED_25G 25000 /* 25G in Mbps */
+#define IONIC_SPEED_10G 10000 /* 10G in Mbps */
+#define IONIC_SPEED_1G 1000 /* 1G in Mbps */
+ __le32 speed;
+ __le32 mtu;
+ u8 state;
+ u8 an_enable;
+ u8 fec_type;
+#define IONIC_PAUSE_TYPE_MASK 0x0f
+#define IONIC_PAUSE_FLAGS_MASK 0xf0
+#define IONIC_PAUSE_F_TX 0x10
+#define IONIC_PAUSE_F_RX 0x20
+ u8 pause_type;
+ u8 loopback_mode;
+ };
+ __le32 words[64];
+};
+
+/**
+ * Port Status information
+ * @status: link status (enum port_oper_status)
+ * @id: port id
+ * @speed: link speed (in Mbps)
+ * @xcvr: tranceiver status
+ */
+struct ionic_port_status {
+ __le32 id;
+ __le32 speed;
+ u8 status;
+ u8 rsvd[51];
+ struct ionic_xcvr_status xcvr;
+};
+
+/**
+ * struct port_identify_cmd - Port identify command
+ * @opcode: opcode
+ * @index: port index
+ * @ver: Highest version of identify supported by driver
+ */
+struct ionic_port_identify_cmd {
+ u8 opcode;
+ u8 index;
+ u8 ver;
+ u8 rsvd[61];
+};
+
+/**
+ * struct port_identify_comp - Port identify command completion
+ * @status: The status of the command (enum status_code)
+ * @ver: Version of identify returned by device
+ */
+struct ionic_port_identify_comp {
+ u8 status;
+ u8 ver;
+ u8 rsvd[14];
+};
+
+/**
+ * struct port_init_cmd - Port initialization command
+ * @opcode: opcode
+ * @index: port index
+ * @info_pa: destination address for port info (struct port_info)
+ */
+struct ionic_port_init_cmd {
+ u8 opcode;
+ u8 index;
+ u8 rsvd[6];
+ __le64 info_pa;
+ u8 rsvd2[48];
+};
+
+/**
+ * struct port_init_comp - Port initialization command completion
+ * @status: The status of the command (enum status_code)
+ */
+struct ionic_port_init_comp {
+ u8 status;
+ u8 rsvd[15];
+};
+
+/**
+ * struct port_reset_cmd - Port reset command
+ * @opcode: opcode
+ * @index: port index
+ */
+struct ionic_port_reset_cmd {
+ u8 opcode;
+ u8 index;
+ u8 rsvd[62];
+};
+
+/**
+ * struct port_reset_comp - Port reset command completion
+ * @status: The status of the command (enum status_code)
+ */
+struct ionic_port_reset_comp {
+ u8 status;
+ u8 rsvd[15];
+};
+
+/**
+ * enum stats_ctl_cmd - List of commands for stats control
+ */
+enum ionic_stats_ctl_cmd {
+ IONIC_STATS_CTL_RESET = 0,
+};
+
+
+/**
+ * enum ionic_port_attr - List of device attributes
+ */
+enum ionic_port_attr {
+ IONIC_PORT_ATTR_STATE = 0,
+ IONIC_PORT_ATTR_SPEED = 1,
+ IONIC_PORT_ATTR_MTU = 2,
+ IONIC_PORT_ATTR_AUTONEG = 3,
+ IONIC_PORT_ATTR_FEC = 4,
+ IONIC_PORT_ATTR_PAUSE = 5,
+ IONIC_PORT_ATTR_LOOPBACK = 6,
+ IONIC_PORT_ATTR_STATS_CTRL = 7,
+};
+
+/**
+ * struct port_setattr_cmd - Set port attributes on the NIC
+ * @opcode: Opcode
+ * @index: port index
+ * @attr: Attribute type (enum ionic_port_attr)
+ */
+struct ionic_port_setattr_cmd {
+ u8 opcode;
+ u8 index;
+ u8 attr;
+ u8 rsvd;
+ union {
+ u8 state;
+ __le32 speed;
+ __le32 mtu;
+ u8 an_enable;
+ u8 fec_type;
+ u8 pause_type;
+ u8 loopback_mode;
+ u8 stats_ctl;
+ u8 rsvd2[60];
+ };
+};
+
+/**
+ * struct port_setattr_comp - Port set attr command completion
+ * @status: The status of the command (enum status_code)
+ * @color: Color bit
+ */
+struct ionic_port_setattr_comp {
+ u8 status;
+ u8 rsvd[14];
+ u8 color;
+};
+
+/**
+ * struct port_getattr_cmd - Get port attributes from the NIC
+ * @opcode: Opcode
+ * @index: port index
+ * @attr: Attribute type (enum ionic_port_attr)
+ */
+struct ionic_port_getattr_cmd {
+ u8 opcode;
+ u8 index;
+ u8 attr;
+ u8 rsvd[61];
+};
+
+/**
+ * struct port_getattr_comp - Port get attr command completion
+ * @status: The status of the command (enum status_code)
+ * @color: Color bit
+ */
+struct ionic_port_getattr_comp {
+ u8 status;
+ u8 rsvd[3];
+ union {
+ u8 state;
+ __le32 speed;
+ __le32 mtu;
+ u8 an_enable;
+ u8 fec_type;
+ u8 pause_type;
+ u8 loopback_mode;
+ u8 rsvd2[11];
+ };
+ u8 color;
+};
+
+/**
+ * struct lif_status - Lif status register
+ * @eid: most recent NotifyQ event id
+ * @port_num: port the lif is connected to
+ * @link_status: port status (enum port_oper_status)
+ * @link_speed: speed of link in Mbps
+ * @link_down_count: number of times link status changes
+ */
+struct ionic_lif_status {
+ __le64 eid;
+ u8 port_num;
+ u8 rsvd;
+ __le16 link_status;
+ __le32 link_speed; /* units of 1Mbps: eg 10000 = 10Gbps */
+ __le16 link_down_count;
+ u8 rsvd2[46];
+};
+
+/**
+ * struct lif_reset_cmd - LIF reset command
+ * @opcode: opcode
+ * @index: LIF index
+ */
+struct ionic_lif_reset_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 index;
+ __le32 rsvd2[15];
+};
+
+typedef struct ionic_admin_comp ionic_lif_reset_comp;
+
+enum ionic_dev_state {
+ IONIC_DEV_DISABLE = 0,
+ IONIC_DEV_ENABLE = 1,
+ IONIC_DEV_HANG_RESET = 2,
+};
+
+/**
+ * enum dev_attr - List of device attributes
+ */
+enum ionic_dev_attr {
+ IONIC_DEV_ATTR_STATE = 0,
+ IONIC_DEV_ATTR_NAME = 1,
+ IONIC_DEV_ATTR_FEATURES = 2,
+};
+
+/**
+ * struct dev_setattr_cmd - Set Device attributes on the NIC
+ * @opcode: Opcode
+ * @attr: Attribute type (enum dev_attr)
+ * @state: Device state (enum dev_state)
+ * @name: The bus info, e.g. PCI slot-device-function, 0 terminated
+ * @features: Device features
+ */
+struct ionic_dev_setattr_cmd {
+ u8 opcode;
+ u8 attr;
+ __le16 rsvd;
+ union {
+ u8 state;
+ char name[IONIC_IFNAMSIZ];
+ __le64 features;
+ u8 rsvd2[60];
+ };
+};
+
+/**
+ * struct dev_setattr_comp - Device set attr command completion
+ * @status: The status of the command (enum status_code)
+ * @features: Device features
+ * @color: Color bit
+ */
+struct ionic_dev_setattr_comp {
+ u8 status;
+ u8 rsvd[3];
+ union {
+ __le64 features;
+ u8 rsvd2[11];
+ };
+ u8 color;
+};
+
+/**
+ * struct dev_getattr_cmd - Get Device attributes from the NIC
+ * @opcode: opcode
+ * @attr: Attribute type (enum dev_attr)
+ */
+struct ionic_dev_getattr_cmd {
+ u8 opcode;
+ u8 attr;
+ u8 rsvd[62];
+};
+
+/**
+ * struct dev_setattr_comp - Device set attr command completion
+ * @status: The status of the command (enum status_code)
+ * @features: Device features
+ * @color: Color bit
+ */
+struct ionic_dev_getattr_comp {
+ u8 status;
+ u8 rsvd[3];
+ union {
+ __le64 features;
+ u8 rsvd2[11];
+ };
+ u8 color;
+};
+
+/**
+ * RSS parameters
+ */
+#define IONIC_RSS_HASH_KEY_SIZE 40
+
+enum ionic_rss_hash_types {
+ IONIC_RSS_TYPE_IPV4 = BIT(0),
+ IONIC_RSS_TYPE_IPV4_TCP = BIT(1),
+ IONIC_RSS_TYPE_IPV4_UDP = BIT(2),
+ IONIC_RSS_TYPE_IPV6 = BIT(3),
+ IONIC_RSS_TYPE_IPV6_TCP = BIT(4),
+ IONIC_RSS_TYPE_IPV6_UDP = BIT(5),
+};
+
+/**
+ * enum lif_attr - List of LIF attributes
+ */
+enum ionic_lif_attr {
+ IONIC_LIF_ATTR_STATE = 0,
+ IONIC_LIF_ATTR_NAME = 1,
+ IONIC_LIF_ATTR_MTU = 2,
+ IONIC_LIF_ATTR_MAC = 3,
+ IONIC_LIF_ATTR_FEATURES = 4,
+ IONIC_LIF_ATTR_RSS = 5,
+ IONIC_LIF_ATTR_STATS_CTRL = 6,
+};
+
+/**
+ * struct lif_setattr_cmd - Set LIF attributes on the NIC
+ * @opcode: Opcode
+ * @type: Attribute type (enum lif_attr)
+ * @index: LIF index
+ * @state: lif state (enum lif_state)
+ * @name: The netdev name string, 0 terminated
+ * @mtu: Mtu
+ * @mac: Station mac
+ * @features: Features (enum eth_hw_features)
+ * @rss: RSS properties
+ * @types: The hash types to enable (see rss_hash_types).
+ * @key: The hash secret key.
+ * @addr: Address for the indirection table shared memory.
+ * @stats_ctl: stats control commands (enum stats_ctl_cmd)
+ */
+struct ionic_lif_setattr_cmd {
+ u8 opcode;
+ u8 attr;
+ __le16 index;
+ union {
+ u8 state;
+ char name[IONIC_IFNAMSIZ];
+ __le32 mtu;
+ u8 mac[6];
+ __le64 features;
+ struct {
+ __le16 types;
+ u8 key[IONIC_RSS_HASH_KEY_SIZE];
+ u8 rsvd[6];
+ __le64 addr;
+ } rss;
+ u8 stats_ctl;
+ u8 rsvd[60];
+ };
+};
+
+/**
+ * struct lif_setattr_comp - LIF set attr command completion
+ * @status: The status of the command (enum status_code)
+ * @comp_index: The index in the descriptor ring for which this
+ * is the completion.
+ * @features: features (enum eth_hw_features)
+ * @color: Color bit
+ */
+struct ionic_lif_setattr_comp {
+ u8 status;
+ u8 rsvd;
+ __le16 comp_index;
+ union {
+ __le64 features;
+ u8 rsvd2[11];
+ };
+ u8 color;
+};
+
+/**
+ * struct lif_getattr_cmd - Get LIF attributes from the NIC
+ * @opcode: Opcode
+ * @attr: Attribute type (enum lif_attr)
+ * @index: LIF index
+ */
+struct ionic_lif_getattr_cmd {
+ u8 opcode;
+ u8 attr;
+ __le16 index;
+ u8 rsvd[60];
+};
+
+/**
+ * struct lif_getattr_comp - LIF get attr command completion
+ * @status: The status of the command (enum status_code)
+ * @comp_index: The index in the descriptor ring for which this
+ * is the completion.
+ * @state: lif state (enum lif_state)
+ * @name: The netdev name string, 0 terminated
+ * @mtu: Mtu
+ * @mac: Station mac
+ * @features: Features (enum eth_hw_features)
+ * @color: Color bit
+ */
+struct ionic_lif_getattr_comp {
+ u8 status;
+ u8 rsvd;
+ __le16 comp_index;
+ union {
+ u8 state;
+ __le32 mtu;
+ u8 mac[6];
+ __le64 features;
+ u8 rsvd2[11];
+ };
+ u8 color;
+};
+
+enum ionic_rx_mode {
+ IONIC_RX_MODE_F_UNICAST = BIT(0),
+ IONIC_RX_MODE_F_MULTICAST = BIT(1),
+ IONIC_RX_MODE_F_BROADCAST = BIT(2),
+ IONIC_RX_MODE_F_PROMISC = BIT(3),
+ IONIC_RX_MODE_F_ALLMULTI = BIT(4),
+};
+
+/**
+ * struct rx_mode_set_cmd - Set LIF's Rx mode command
+ * @opcode: opcode
+ * @lif_index: LIF index
+ * @rx_mode: Rx mode flags:
+ * IONIC_RX_MODE_F_UNICAST: Accept known unicast packets.
+ * IONIC_RX_MODE_F_MULTICAST: Accept known multicast packets.
+ * IONIC_RX_MODE_F_BROADCAST: Accept broadcast packets.
+ * IONIC_RX_MODE_F_PROMISC: Accept any packets.
+ * IONIC_RX_MODE_F_ALLMULTI: Accept any multicast packets.
+ */
+struct ionic_rx_mode_set_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 lif_index;
+ __le16 rx_mode;
+ __le16 rsvd2[29];
+};
+
+typedef struct ionic_admin_comp ionic_rx_mode_set_comp;
+
+enum ionic_rx_filter_match_type {
+ IONIC_RX_FILTER_MATCH_VLAN = 0,
+ IONIC_RX_FILTER_MATCH_MAC,
+ IONIC_RX_FILTER_MATCH_MAC_VLAN,
+};
+
+/**
+ * struct rx_filter_add_cmd - Add LIF Rx filter command
+ * @opcode: opcode
+ * @qtype: Queue type
+ * @lif_index: LIF index
+ * @qid: Queue ID
+ * @match: Rx filter match type. (See IONIC_RX_FILTER_MATCH_xxx)
+ * @vlan: VLAN ID
+ * @addr: MAC address (network-byte order)
+ */
+struct ionic_rx_filter_add_cmd {
+ u8 opcode;
+ u8 qtype;
+ __le16 lif_index;
+ __le32 qid;
+ __le16 match;
+ union {
+ struct {
+ __le16 vlan;
+ } vlan;
+ struct {
+ u8 addr[6];
+ } mac;
+ struct {
+ __le16 vlan;
+ u8 addr[6];
+ } mac_vlan;
+ u8 rsvd[54];
+ };
+};
+
+/**
+ * struct rx_filter_add_comp - Add LIF Rx filter command completion
+ * @status: The status of the command (enum status_code)
+ * @comp_index: The index in the descriptor ring for which this
+ * is the completion.
+ * @filter_id: Filter ID
+ * @color: Color bit.
+ */
+struct ionic_rx_filter_add_comp {
+ u8 status;
+ u8 rsvd;
+ __le16 comp_index;
+ __le32 filter_id;
+ u8 rsvd2[7];
+ u8 color;
+};
+
+/**
+ * struct rx_filter_del_cmd - Delete LIF Rx filter command
+ * @opcode: opcode
+ * @lif_index: LIF index
+ * @filter_id: Filter ID
+ */
+struct ionic_rx_filter_del_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 lif_index;
+ __le32 filter_id;
+ u8 rsvd2[56];
+};
+
+typedef struct ionic_admin_comp ionic_rx_filter_del_comp;
+
+/**
+ * struct qos_identify_cmd - QoS identify command
+ * @opcode: opcode
+ * @ver: Highest version of identify supported by driver
+ *
+ */
+struct ionic_qos_identify_cmd {
+ u8 opcode;
+ u8 ver;
+ u8 rsvd[62];
+};
+
+/**
+ * struct qos_identify_comp - QoS identify command completion
+ * @status: The status of the command (enum status_code)
+ * @ver: Version of identify returned by device
+ */
+struct ionic_qos_identify_comp {
+ u8 status;
+ u8 ver;
+ u8 rsvd[14];
+};
+
+#define IONIC_QOS_CLASS_MAX 7
+#define IONIC_QOS_CLASS_NAME_SZ 32
+#define IONIC_QOS_DSCP_MAX_VALUES 64
+
+/**
+ * enum qos_class
+ */
+enum ionic_qos_class {
+ IONIC_QOS_CLASS_DEFAULT = 0,
+ IONIC_QOS_CLASS_USER_DEFINED_1 = 1,
+ IONIC_QOS_CLASS_USER_DEFINED_2 = 2,
+ IONIC_QOS_CLASS_USER_DEFINED_3 = 3,
+ IONIC_QOS_CLASS_USER_DEFINED_4 = 4,
+ IONIC_QOS_CLASS_USER_DEFINED_5 = 5,
+ IONIC_QOS_CLASS_USER_DEFINED_6 = 6,
+};
+
+/**
+ * enum qos_class_type - Traffic classification criteria
+ */
+enum ionic_qos_class_type {
+ IONIC_QOS_CLASS_TYPE_NONE = 0,
+ IONIC_QOS_CLASS_TYPE_PCP = 1, /* Dot1Q pcp */
+ IONIC_QOS_CLASS_TYPE_DSCP = 2, /* IP dscp */
+};
+
+/**
+ * enum qos_sched_type - Qos class scheduling type
+ */
+enum ionic_qos_sched_type {
+ IONIC_QOS_SCHED_TYPE_STRICT = 0, /* Strict priority */
+ IONIC_QOS_SCHED_TYPE_DWRR = 1, /* Deficit weighted round-robin */
+};
+
+/**
+ * union qos_config - Qos configuration structure
+ * @flags: Configuration flags
+ * IONIC_QOS_CONFIG_F_ENABLE enable
+ * IONIC_QOS_CONFIG_F_DROP drop/nodrop
+ * IONIC_QOS_CONFIG_F_RW_DOT1Q_PCP enable dot1q pcp rewrite
+ * IONIC_QOS_CONFIG_F_RW_IP_DSCP enable ip dscp rewrite
+ * @sched_type: Qos class scheduling type (enum qos_sched_type)
+ * @class_type: Qos class type (enum qos_class_type)
+ * @pause_type: Qos pause type (enum qos_pause_type)
+ * @name: Qos class name
+ * @mtu: MTU of the class
+ * @pfc_dot1q_pcp: Pcp value for pause frames (valid iff F_NODROP)
+ * @dwrr_weight: Qos class scheduling weight
+ * @strict_rlmt: Rate limit for strict priority scheduling
+ * @rw_dot1q_pcp: Rewrite dot1q pcp to this value (valid iff F_RW_DOT1Q_PCP)
+ * @rw_ip_dscp: Rewrite ip dscp to this value (valid iff F_RW_IP_DSCP)
+ * @dot1q_pcp: Dot1q pcp value
+ * @ndscp: Number of valid dscp values in the ip_dscp field
+ * @ip_dscp: IP dscp values
+ */
+union ionic_qos_config {
+ struct {
+#define IONIC_QOS_CONFIG_F_ENABLE BIT(0)
+#define IONIC_QOS_CONFIG_F_DROP BIT(1)
+#define IONIC_QOS_CONFIG_F_RW_DOT1Q_PCP BIT(2)
+#define IONIC_QOS_CONFIG_F_RW_IP_DSCP BIT(3)
+ u8 flags;
+ u8 sched_type;
+ u8 class_type;
+ u8 pause_type;
+ char name[IONIC_QOS_CLASS_NAME_SZ];
+ __le32 mtu;
+ /* flow control */
+ u8 pfc_cos;
+ /* scheduler */
+ union {
+ u8 dwrr_weight;
+ __le64 strict_rlmt;
+ };
+ /* marking */
+ union {
+ u8 rw_dot1q_pcp;
+ u8 rw_ip_dscp;
+ };
+ /* classification */
+ union {
+ u8 dot1q_pcp;
+ struct {
+ u8 ndscp;
+ u8 ip_dscp[IONIC_QOS_DSCP_MAX_VALUES];
+ };
+ };
+ };
+ __le32 words[64];
+};
+
+/**
+ * union qos_identity - QoS identity structure
+ * @version: Version of the identify structure
+ * @type: QoS system type
+ * @nclasses: Number of usable QoS classes
+ * @config: Current configuration of classes
+ */
+union ionic_qos_identity {
+ struct {
+ u8 version;
+ u8 type;
+ u8 rsvd[62];
+ union ionic_qos_config config[IONIC_QOS_CLASS_MAX];
+ };
+ __le32 words[512];
+};
+
+/**
+ * struct qos_init_cmd - QoS config init command
+ * @opcode: Opcode
+ * @group: Qos class id
+ * @info_pa: destination address for qos info
+ */
+struct ionic_qos_init_cmd {
+ u8 opcode;
+ u8 group;
+ u8 rsvd[6];
+ __le64 info_pa;
+ u8 rsvd1[48];
+};
+
+typedef struct ionic_admin_comp ionic_qos_init_comp;
+
+/**
+ * struct qos_reset_cmd - Qos config reset command
+ * @opcode: Opcode
+ */
+struct ionic_qos_reset_cmd {
+ u8 opcode;
+ u8 group;
+ u8 rsvd[62];
+};
+
+typedef struct ionic_admin_comp ionic_qos_reset_comp;
+
+/**
+ * struct fw_download_cmd - Firmware download command
+ * @opcode: opcode
+ * @addr: dma address of the firmware buffer
+ * @offset: offset of the firmware buffer within the full image
+ * @length: number of valid bytes in the firmware buffer
+ */
+struct ionic_fw_download_cmd {
+ u8 opcode;
+ u8 rsvd[3];
+ __le32 offset;
+ __le64 addr;
+ __le32 length;
+};
+
+typedef struct ionic_admin_comp ionic_fw_download_comp;
+
+enum ionic_fw_control_oper {
+ IONIC_FW_RESET = 0, /* Reset firmware */
+ IONIC_FW_INSTALL = 1, /* Install firmware */
+ IONIC_FW_ACTIVATE = 2, /* Activate firmware */
+};
+
+/**
+ * struct fw_control_cmd - Firmware control command
+ * @opcode: opcode
+ * @oper: firmware control operation (enum fw_control_oper)
+ * @slot: slot to activate
+ */
+struct ionic_fw_control_cmd {
+ u8 opcode;
+ u8 rsvd[3];
+ u8 oper;
+ u8 slot;
+ u8 rsvd1[58];
+};
+
+/**
+ * struct fw_control_comp - Firmware control copletion
+ * @opcode: opcode
+ * @slot: slot where the firmware was installed
+ */
+struct ionic_fw_control_comp {
+ u8 status;
+ u8 rsvd;
+ __le16 comp_index;
+ u8 slot;
+ u8 rsvd1[10];
+ u8 color;
+};
+
+/******************************************************************
+ ******************* RDMA Commands ********************************
+ ******************************************************************/
+
+/**
+ * struct rdma_reset_cmd - Reset RDMA LIF cmd
+ * @opcode: opcode
+ * @lif_index: lif index
+ *
+ * There is no rdma specific dev command completion struct. Completion uses
+ * the common struct admin_comp. Only the status is indicated. Nonzero status
+ * means the LIF does not support rdma.
+ **/
+struct ionic_rdma_reset_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 lif_index;
+ u8 rsvd2[60];
+};
+
+/**
+ * struct rdma_queue_cmd - Create RDMA Queue command
+ * @opcode: opcode, 52, 53
+ * @lif_index lif index
+ * @qid_ver: (qid | (rdma version << 24))
+ * @cid: intr, eq_id, or cq_id
+ * @dbid: doorbell page id
+ * @depth_log2: log base two of queue depth
+ * @stride_log2: log base two of queue stride
+ * @dma_addr: address of the queue memory
+ * @xxx_table_index: temporary, but should not need pgtbl for contig. queues.
+ *
+ * The same command struct is used to create an rdma event queue, completion
+ * queue, or rdma admin queue. The cid is an interrupt number for an event
+ * queue, an event queue id for a completion queue, or a completion queue id
+ * for an rdma admin queue.
+ *
+ * The queue created via a dev command must be contiguous in dma space.
+ *
+ * The dev commands are intended only to be used during driver initialization,
+ * to create queues supporting the rdma admin queue. Other queues, and other
+ * types of rdma resources like memory regions, will be created and registered
+ * via the rdma admin queue, and will support a more complete interface
+ * providing scatter gather lists for larger, scattered queue buffers and
+ * memory registration.
+ *
+ * There is no rdma specific dev command completion struct. Completion uses
+ * the common struct admin_comp. Only the status is indicated.
+ **/
+struct ionic_rdma_queue_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 lif_index;
+ __le32 qid_ver;
+ __le32 cid;
+ __le16 dbid;
+ u8 depth_log2;
+ u8 stride_log2;
+ __le64 dma_addr;
+ u8 rsvd2[36];
+ __le32 xxx_table_index;
+};
+
+/******************************************************************
+ ******************* Notify Events ********************************
+ ******************************************************************/
+
+/**
+ * struct notifyq_event
+ * @eid: event number
+ * @ecode: event code
+ * @data: unspecified data about the event
+ *
+ * This is the generic event report struct from which the other
+ * actual events will be formed.
+ */
+struct ionic_notifyq_event {
+ __le64 eid;
+ __le16 ecode;
+ u8 data[54];
+};
+
+/**
+ * struct link_change_event
+ * @eid: event number
+ * @ecode: event code = EVENT_OPCODE_LINK_CHANGE
+ * @link_status: link up or down, with error bits (enum port_status)
+ * @link_speed: speed of the network link
+ *
+ * Sent when the network link state changes between UP and DOWN
+ */
+struct ionic_link_change_event {
+ __le64 eid;
+ __le16 ecode;
+ __le16 link_status;
+ __le32 link_speed; /* units of 1Mbps: e.g. 10000 = 10Gbps */
+ u8 rsvd[48];
+};
+
+/**
+ * struct reset_event
+ * @eid: event number
+ * @ecode: event code = EVENT_OPCODE_RESET
+ * @reset_code: reset type
+ * @state: 0=pending, 1=complete, 2=error
+ *
+ * Sent when the NIC or some subsystem is going to be or
+ * has been reset.
+ */
+struct ionic_reset_event {
+ __le64 eid;
+ __le16 ecode;
+ u8 reset_code;
+ u8 state;
+ u8 rsvd[52];
+};
+
+/**
+ * struct heartbeat_event
+ * @eid: event number
+ * @ecode: event code = EVENT_OPCODE_HEARTBEAT
+ *
+ * Sent periodically by the NIC to indicate continued health
+ */
+struct ionic_heartbeat_event {
+ __le64 eid;
+ __le16 ecode;
+ u8 rsvd[54];
+};
+
+/**
+ * struct log_event
+ * @eid: event number
+ * @ecode: event code = EVENT_OPCODE_LOG
+ * @data: log data
+ *
+ * Sent to notify the driver of an internal error.
+ */
+struct ionic_log_event {
+ __le64 eid;
+ __le16 ecode;
+ u8 data[54];
+};
+
+/**
+ * struct port_stats
+ */
+struct ionic_port_stats {
+ __le64 frames_rx_ok;
+ __le64 frames_rx_all;
+ __le64 frames_rx_bad_fcs;
+ __le64 frames_rx_bad_all;
+ __le64 octets_rx_ok;
+ __le64 octets_rx_all;
+ __le64 frames_rx_unicast;
+ __le64 frames_rx_multicast;
+ __le64 frames_rx_broadcast;
+ __le64 frames_rx_pause;
+ __le64 frames_rx_bad_length;
+ __le64 frames_rx_undersized;
+ __le64 frames_rx_oversized;
+ __le64 frames_rx_fragments;
+ __le64 frames_rx_jabber;
+ __le64 frames_rx_pripause;
+ __le64 frames_rx_stomped_crc;
+ __le64 frames_rx_too_long;
+ __le64 frames_rx_vlan_good;
+ __le64 frames_rx_dropped;
+ __le64 frames_rx_less_than_64b;
+ __le64 frames_rx_64b;
+ __le64 frames_rx_65b_127b;
+ __le64 frames_rx_128b_255b;
+ __le64 frames_rx_256b_511b;
+ __le64 frames_rx_512b_1023b;
+ __le64 frames_rx_1024b_1518b;
+ __le64 frames_rx_1519b_2047b;
+ __le64 frames_rx_2048b_4095b;
+ __le64 frames_rx_4096b_8191b;
+ __le64 frames_rx_8192b_9215b;
+ __le64 frames_rx_other;
+ __le64 frames_tx_ok;
+ __le64 frames_tx_all;
+ __le64 frames_tx_bad;
+ __le64 octets_tx_ok;
+ __le64 octets_tx_total;
+ __le64 frames_tx_unicast;
+ __le64 frames_tx_multicast;
+ __le64 frames_tx_broadcast;
+ __le64 frames_tx_pause;
+ __le64 frames_tx_pripause;
+ __le64 frames_tx_vlan;
+ __le64 frames_tx_less_than_64b;
+ __le64 frames_tx_64b;
+ __le64 frames_tx_65b_127b;
+ __le64 frames_tx_128b_255b;
+ __le64 frames_tx_256b_511b;
+ __le64 frames_tx_512b_1023b;
+ __le64 frames_tx_1024b_1518b;
+ __le64 frames_tx_1519b_2047b;
+ __le64 frames_tx_2048b_4095b;
+ __le64 frames_tx_4096b_8191b;
+ __le64 frames_tx_8192b_9215b;
+ __le64 frames_tx_other;
+ __le64 frames_tx_pri_0;
+ __le64 frames_tx_pri_1;
+ __le64 frames_tx_pri_2;
+ __le64 frames_tx_pri_3;
+ __le64 frames_tx_pri_4;
+ __le64 frames_tx_pri_5;
+ __le64 frames_tx_pri_6;
+ __le64 frames_tx_pri_7;
+ __le64 frames_rx_pri_0;
+ __le64 frames_rx_pri_1;
+ __le64 frames_rx_pri_2;
+ __le64 frames_rx_pri_3;
+ __le64 frames_rx_pri_4;
+ __le64 frames_rx_pri_5;
+ __le64 frames_rx_pri_6;
+ __le64 frames_rx_pri_7;
+ __le64 tx_pripause_0_1us_count;
+ __le64 tx_pripause_1_1us_count;
+ __le64 tx_pripause_2_1us_count;
+ __le64 tx_pripause_3_1us_count;
+ __le64 tx_pripause_4_1us_count;
+ __le64 tx_pripause_5_1us_count;
+ __le64 tx_pripause_6_1us_count;
+ __le64 tx_pripause_7_1us_count;
+ __le64 rx_pripause_0_1us_count;
+ __le64 rx_pripause_1_1us_count;
+ __le64 rx_pripause_2_1us_count;
+ __le64 rx_pripause_3_1us_count;
+ __le64 rx_pripause_4_1us_count;
+ __le64 rx_pripause_5_1us_count;
+ __le64 rx_pripause_6_1us_count;
+ __le64 rx_pripause_7_1us_count;
+ __le64 rx_pause_1us_count;
+ __le64 frames_tx_truncated;
+};
+
+struct ionic_mgmt_port_stats {
+ __le64 frames_rx_ok;
+ __le64 frames_rx_all;
+ __le64 frames_rx_bad_fcs;
+ __le64 frames_rx_bad_all;
+ __le64 octets_rx_ok;
+ __le64 octets_rx_all;
+ __le64 frames_rx_unicast;
+ __le64 frames_rx_multicast;
+ __le64 frames_rx_broadcast;
+ __le64 frames_rx_pause;
+ __le64 frames_rx_bad_length0;
+ __le64 frames_rx_undersized1;
+ __le64 frames_rx_oversized2;
+ __le64 frames_rx_fragments3;
+ __le64 frames_rx_jabber4;
+ __le64 frames_rx_64b5;
+ __le64 frames_rx_65b_127b6;
+ __le64 frames_rx_128b_255b7;
+ __le64 frames_rx_256b_511b8;
+ __le64 frames_rx_512b_1023b9;
+ __le64 frames_rx_1024b_1518b0;
+ __le64 frames_rx_gt_1518b1;
+ __le64 frames_rx_fifo_full2;
+ __le64 frames_tx_ok3;
+ __le64 frames_tx_all4;
+ __le64 frames_tx_bad5;
+ __le64 octets_tx_ok6;
+ __le64 octets_tx_total7;
+ __le64 frames_tx_unicast8;
+ __le64 frames_tx_multicast9;
+ __le64 frames_tx_broadcast0;
+ __le64 frames_tx_pause1;
+};
+
+/**
+ * struct port_identity - port identity structure
+ * @version: identity structure version
+ * @type: type of port (enum port_type)
+ * @num_lanes: number of lanes for the port
+ * @autoneg: autoneg supported
+ * @min_frame_size: minimum frame size supported
+ * @max_frame_size: maximum frame size supported
+ * @fec_type: supported fec types
+ * @pause_type: supported pause types
+ * @loopback_mode: supported loopback mode
+ * @speeds: supported speeds
+ * @config: current port configuration
+ */
+union ionic_port_identity {
+ struct {
+ u8 version;
+ u8 type;
+ u8 num_lanes;
+ u8 autoneg;
+ __le32 min_frame_size;
+ __le32 max_frame_size;
+ u8 fec_type[4];
+ u8 pause_type[2];
+ u8 loopback_mode[2];
+ __le32 speeds[16];
+ u8 rsvd2[44];
+ union ionic_port_config config;
+ };
+ __le32 words[512];
+};
+
+/**
+ * struct port_info - port info structure
+ * @port_status: port status
+ * @port_stats: port stats
+ */
+struct ionic_port_info {
+ union ionic_port_config config;
+ struct ionic_port_status status;
+ struct ionic_port_stats stats;
+};
+
+/**
+ * struct lif_stats
+ */
+struct ionic_lif_stats {
+ /* RX */
+ __le64 rx_ucast_bytes;
+ __le64 rx_ucast_packets;
+ __le64 rx_mcast_bytes;
+ __le64 rx_mcast_packets;
+ __le64 rx_bcast_bytes;
+ __le64 rx_bcast_packets;
+ __le64 rsvd0;
+ __le64 rsvd1;
+ /* RX drops */
+ __le64 rx_ucast_drop_bytes;
+ __le64 rx_ucast_drop_packets;
+ __le64 rx_mcast_drop_bytes;
+ __le64 rx_mcast_drop_packets;
+ __le64 rx_bcast_drop_bytes;
+ __le64 rx_bcast_drop_packets;
+ __le64 rx_dma_error;
+ __le64 rsvd2;
+ /* TX */
+ __le64 tx_ucast_bytes;
+ __le64 tx_ucast_packets;
+ __le64 tx_mcast_bytes;
+ __le64 tx_mcast_packets;
+ __le64 tx_bcast_bytes;
+ __le64 tx_bcast_packets;
+ __le64 rsvd3;
+ __le64 rsvd4;
+ /* TX drops */
+ __le64 tx_ucast_drop_bytes;
+ __le64 tx_ucast_drop_packets;
+ __le64 tx_mcast_drop_bytes;
+ __le64 tx_mcast_drop_packets;
+ __le64 tx_bcast_drop_bytes;
+ __le64 tx_bcast_drop_packets;
+ __le64 tx_dma_error;
+ __le64 rsvd5;
+ /* Rx Queue/Ring drops */
+ __le64 rx_queue_disabled;
+ __le64 rx_queue_empty;
+ __le64 rx_queue_error;
+ __le64 rx_desc_fetch_error;
+ __le64 rx_desc_data_error;
+ __le64 rsvd6;
+ __le64 rsvd7;
+ __le64 rsvd8;
+ /* Tx Queue/Ring drops */
+ __le64 tx_queue_disabled;
+ __le64 tx_queue_error;
+ __le64 tx_desc_fetch_error;
+ __le64 tx_desc_data_error;
+ __le64 rsvd9;
+ __le64 rsvd10;
+ __le64 rsvd11;
+ __le64 rsvd12;
+
+ /* RDMA/ROCE TX */
+ __le64 tx_rdma_ucast_bytes;
+ __le64 tx_rdma_ucast_packets;
+ __le64 tx_rdma_mcast_bytes;
+ __le64 tx_rdma_mcast_packets;
+ __le64 tx_rdma_cnp_packets;
+ __le64 rsvd13;
+ __le64 rsvd14;
+ __le64 rsvd15;
+
+ /* RDMA/ROCE RX */
+ __le64 rx_rdma_ucast_bytes;
+ __le64 rx_rdma_ucast_packets;
+ __le64 rx_rdma_mcast_bytes;
+ __le64 rx_rdma_mcast_packets;
+ __le64 rx_rdma_cnp_packets;
+ __le64 rx_rdma_ecn_packets;
+ __le64 rsvd16;
+ __le64 rsvd17;
+
+ __le64 rsvd18;
+ __le64 rsvd19;
+ __le64 rsvd20;
+ __le64 rsvd21;
+ __le64 rsvd22;
+ __le64 rsvd23;
+ __le64 rsvd24;
+ __le64 rsvd25;
+
+ __le64 rsvd26;
+ __le64 rsvd27;
+ __le64 rsvd28;
+ __le64 rsvd29;
+ __le64 rsvd30;
+ __le64 rsvd31;
+ __le64 rsvd32;
+ __le64 rsvd33;
+
+ __le64 rsvd34;
+ __le64 rsvd35;
+ __le64 rsvd36;
+ __le64 rsvd37;
+ __le64 rsvd38;
+ __le64 rsvd39;
+ __le64 rsvd40;
+ __le64 rsvd41;
+
+ __le64 rsvd42;
+ __le64 rsvd43;
+ __le64 rsvd44;
+ __le64 rsvd45;
+ __le64 rsvd46;
+ __le64 rsvd47;
+ __le64 rsvd48;
+ __le64 rsvd49;
+
+ /* RDMA/ROCE REQ Error/Debugs (768 - 895) */
+ __le64 rdma_req_rx_pkt_seq_err;
+ __le64 rdma_req_rx_rnr_retry_err;
+ __le64 rdma_req_rx_remote_access_err;
+ __le64 rdma_req_rx_remote_inv_req_err;
+ __le64 rdma_req_rx_remote_oper_err;
+ __le64 rdma_req_rx_implied_nak_seq_err;
+ __le64 rdma_req_rx_cqe_err;
+ __le64 rdma_req_rx_cqe_flush_err;
+
+ __le64 rdma_req_rx_dup_responses;
+ __le64 rdma_req_rx_invalid_packets;
+ __le64 rdma_req_tx_local_access_err;
+ __le64 rdma_req_tx_local_oper_err;
+ __le64 rdma_req_tx_memory_mgmt_err;
+ __le64 rsvd52;
+ __le64 rsvd53;
+ __le64 rsvd54;
+
+ /* RDMA/ROCE RESP Error/Debugs (896 - 1023) */
+ __le64 rdma_resp_rx_dup_requests;
+ __le64 rdma_resp_rx_out_of_buffer;
+ __le64 rdma_resp_rx_out_of_seq_pkts;
+ __le64 rdma_resp_rx_cqe_err;
+ __le64 rdma_resp_rx_cqe_flush_err;
+ __le64 rdma_resp_rx_local_len_err;
+ __le64 rdma_resp_rx_inv_request_err;
+ __le64 rdma_resp_rx_local_qp_oper_err;
+
+ __le64 rdma_resp_rx_out_of_atomic_resource;
+ __le64 rdma_resp_tx_pkt_seq_err;
+ __le64 rdma_resp_tx_remote_inv_req_err;
+ __le64 rdma_resp_tx_remote_access_err;
+ __le64 rdma_resp_tx_remote_oper_err;
+ __le64 rdma_resp_tx_rnr_retry_err;
+ __le64 rsvd57;
+ __le64 rsvd58;
+};
+
+/**
+ * struct lif_info - lif info structure
+ */
+struct ionic_lif_info {
+ union ionic_lif_config config;
+ struct ionic_lif_status status;
+ struct ionic_lif_stats stats;
+};
+
+union ionic_dev_cmd {
+ u32 words[16];
+ struct ionic_admin_cmd cmd;
+ struct ionic_nop_cmd nop;
+
+ struct ionic_dev_identify_cmd identify;
+ struct ionic_dev_init_cmd init;
+ struct ionic_dev_reset_cmd reset;
+ struct ionic_dev_getattr_cmd getattr;
+ struct ionic_dev_setattr_cmd setattr;
+
+ struct ionic_port_identify_cmd port_identify;
+ struct ionic_port_init_cmd port_init;
+ struct ionic_port_reset_cmd port_reset;
+ struct ionic_port_getattr_cmd port_getattr;
+ struct ionic_port_setattr_cmd port_setattr;
+
+ struct ionic_lif_identify_cmd lif_identify;
+ struct ionic_lif_init_cmd lif_init;
+ struct ionic_lif_reset_cmd lif_reset;
+
+ struct ionic_qos_identify_cmd qos_identify;
+ struct ionic_qos_init_cmd qos_init;
+ struct ionic_qos_reset_cmd qos_reset;
+
+ struct ionic_q_init_cmd q_init;
+};
+
+union ionic_dev_cmd_comp {
+ u32 words[4];
+ u8 status;
+ struct ionic_admin_comp comp;
+ struct ionic_nop_comp nop;
+
+ struct ionic_dev_identify_comp identify;
+ struct ionic_dev_init_comp init;
+ struct ionic_dev_reset_comp reset;
+ struct ionic_dev_getattr_comp getattr;
+ struct ionic_dev_setattr_comp setattr;
+
+ struct ionic_port_identify_comp port_identify;
+ struct ionic_port_init_comp port_init;
+ struct ionic_port_reset_comp port_reset;
+ struct ionic_port_getattr_comp port_getattr;
+ struct ionic_port_setattr_comp port_setattr;
+
+ struct ionic_lif_identify_comp lif_identify;
+ struct ionic_lif_init_comp lif_init;
+ ionic_lif_reset_comp lif_reset;
+
+ struct ionic_qos_identify_comp qos_identify;
+ ionic_qos_init_comp qos_init;
+ ionic_qos_reset_comp qos_reset;
+
+ struct ionic_q_init_comp q_init;
+};
+
+/**
+ * union dev_info - Device info register format (read-only)
+ * @signature: Signature value of 0x44455649 ('DEVI').
+ * @version: Current version of info.
+ * @asic_type: Asic type.
+ * @asic_rev: Asic revision.
+ * @fw_status: Firmware status.
+ * @fw_heartbeat: Firmware heartbeat counter.
+ * @serial_num: Serial number.
+ * @fw_version: Firmware version.
+ */
+union ionic_dev_info_regs {
+#define IONIC_DEVINFO_FWVERS_BUFLEN 32
+#define IONIC_DEVINFO_SERIAL_BUFLEN 32
+ struct {
+ u32 signature;
+ u8 version;
+ u8 asic_type;
+ u8 asic_rev;
+ u8 fw_status;
+ u32 fw_heartbeat;
+ char fw_version[IONIC_DEVINFO_FWVERS_BUFLEN];
+ char serial_num[IONIC_DEVINFO_SERIAL_BUFLEN];
+ };
+ u32 words[512];
+};
+
+/**
+ * union dev_cmd_regs - Device command register format (read-write)
+ * @doorbell: Device Cmd Doorbell, write-only.
+ * Write a 1 to signal device to process cmd,
+ * poll done for completion.
+ * @done: Done indicator, bit 0 == 1 when command is complete.
+ * @cmd: Opcode-specific command bytes
+ * @comp: Opcode-specific response bytes
+ * @data: Opcode-specific side-data
+ */
+union ionic_dev_cmd_regs {
+ struct {
+ u32 doorbell;
+ u32 done;
+ union ionic_dev_cmd cmd;
+ union ionic_dev_cmd_comp comp;
+ u8 rsvd[48];
+ u32 data[478];
+ };
+ u32 words[512];
+};
+
+/**
+ * union dev_regs - Device register format in for bar 0 page 0
+ * @info: Device info registers
+ * @devcmd: Device command registers
+ */
+union ionic_dev_regs {
+ struct {
+ union ionic_dev_info_regs info;
+ union ionic_dev_cmd_regs devcmd;
+ };
+ __le32 words[1024];
+};
+
+union ionic_adminq_cmd {
+ struct ionic_admin_cmd cmd;
+ struct ionic_nop_cmd nop;
+ struct ionic_q_init_cmd q_init;
+ struct ionic_q_control_cmd q_control;
+ struct ionic_lif_setattr_cmd lif_setattr;
+ struct ionic_lif_getattr_cmd lif_getattr;
+ struct ionic_rx_mode_set_cmd rx_mode_set;
+ struct ionic_rx_filter_add_cmd rx_filter_add;
+ struct ionic_rx_filter_del_cmd rx_filter_del;
+ struct ionic_rdma_reset_cmd rdma_reset;
+ struct ionic_rdma_queue_cmd rdma_queue;
+ struct ionic_fw_download_cmd fw_download;
+ struct ionic_fw_control_cmd fw_control;
+};
+
+union ionic_adminq_comp {
+ struct ionic_admin_comp comp;
+ struct ionic_nop_comp nop;
+ struct ionic_q_init_comp q_init;
+ struct ionic_lif_setattr_comp lif_setattr;
+ struct ionic_lif_getattr_comp lif_getattr;
+ struct ionic_rx_filter_add_comp rx_filter_add;
+ struct ionic_fw_control_comp fw_control;
+};
+
+#define IONIC_BARS_MAX 6
+#define IONIC_PCI_BAR_DBELL 1
+
+/* BAR0 */
+#define IONIC_BAR0_SIZE 0x8000
+
+#define IONIC_BAR0_DEV_INFO_REGS_OFFSET 0x0000
+#define IONIC_BAR0_DEV_CMD_REGS_OFFSET 0x0800
+#define IONIC_BAR0_DEV_CMD_DATA_REGS_OFFSET 0x0c00
+#define IONIC_BAR0_INTR_STATUS_OFFSET 0x1000
+#define IONIC_BAR0_INTR_CTRL_OFFSET 0x2000
+#define IONIC_DEV_CMD_DONE 0x00000001
+
+#define IONIC_ASIC_TYPE_CAPRI 0
+
+/**
+ * struct doorbell - Doorbell register layout
+ * @p_index: Producer index
+ * @ring: Selects the specific ring of the queue to update.
+ * Type-specific meaning:
+ * ring=0: Default producer/consumer queue.
+ * ring=1: (CQ, EQ) Re-Arm queue. RDMA CQs
+ * send events to EQs when armed. EQs send
+ * interrupts when armed.
+ * @qid: The queue id selects the queue destination for the
+ * producer index and flags.
+ */
+struct ionic_doorbell {
+ __le16 p_index;
+ u8 ring;
+ u8 qid_lo;
+ __le16 qid_hi;
+ u16 rsvd2;
+};
+
+struct ionic_intr_status {
+ u32 status[2];
+};
+
+struct ionic_notifyq_cmd {
+ __le32 data; /* Not used but needed for qcq structure */
+};
+
+union ionic_notifyq_comp {
+ struct ionic_notifyq_event event;
+ struct ionic_link_change_event link_change;
+ struct ionic_reset_event reset;
+ struct ionic_heartbeat_event heartbeat;
+ struct ionic_log_event log;
+};
+
+/* Deprecate */
+struct ionic_identity {
+ union ionic_drv_identity drv;
+ union ionic_dev_identity dev;
+ union ionic_lif_identity lif;
+ union ionic_port_identity port;
+ union ionic_qos_identity qos;
+};
+
+#pragma pack(pop)
+
+#endif /* _IONIC_IF_H_ */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
new file mode 100644
index 000000000000..db7c82742828
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -0,0 +1,2274 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/cpumask.h>
+
+#include "ionic.h"
+#include "ionic_bus.h"
+#include "ionic_lif.h"
+#include "ionic_txrx.h"
+#include "ionic_ethtool.h"
+#include "ionic_debugfs.h"
+
+static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode);
+static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr);
+static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr);
+static void ionic_link_status_check(struct ionic_lif *lif);
+
+static void ionic_lif_deferred_work(struct work_struct *work)
+{
+ struct ionic_lif *lif = container_of(work, struct ionic_lif, deferred.work);
+ struct ionic_deferred *def = &lif->deferred;
+ struct ionic_deferred_work *w = NULL;
+
+ spin_lock_bh(&def->lock);
+ if (!list_empty(&def->list)) {
+ w = list_first_entry(&def->list,
+ struct ionic_deferred_work, list);
+ list_del(&w->list);
+ }
+ spin_unlock_bh(&def->lock);
+
+ if (w) {
+ switch (w->type) {
+ case IONIC_DW_TYPE_RX_MODE:
+ ionic_lif_rx_mode(lif, w->rx_mode);
+ break;
+ case IONIC_DW_TYPE_RX_ADDR_ADD:
+ ionic_lif_addr_add(lif, w->addr);
+ break;
+ case IONIC_DW_TYPE_RX_ADDR_DEL:
+ ionic_lif_addr_del(lif, w->addr);
+ break;
+ case IONIC_DW_TYPE_LINK_STATUS:
+ ionic_link_status_check(lif);
+ break;
+ default:
+ break;
+ }
+ kfree(w);
+ schedule_work(&def->work);
+ }
+}
+
+static void ionic_lif_deferred_enqueue(struct ionic_deferred *def,
+ struct ionic_deferred_work *work)
+{
+ spin_lock_bh(&def->lock);
+ list_add_tail(&work->list, &def->list);
+ spin_unlock_bh(&def->lock);
+ schedule_work(&def->work);
+}
+
+static void ionic_link_status_check(struct ionic_lif *lif)
+{
+ struct net_device *netdev = lif->netdev;
+ u16 link_status;
+ bool link_up;
+
+ link_status = le16_to_cpu(lif->info->status.link_status);
+ link_up = link_status == IONIC_PORT_OPER_STATUS_UP;
+
+ /* filter out the no-change cases */
+ if (link_up == netif_carrier_ok(netdev))
+ goto link_out;
+
+ if (link_up) {
+ netdev_info(netdev, "Link up - %d Gbps\n",
+ le32_to_cpu(lif->info->status.link_speed) / 1000);
+
+ if (test_bit(IONIC_LIF_UP, lif->state)) {
+ netif_tx_wake_all_queues(lif->netdev);
+ netif_carrier_on(netdev);
+ }
+ } else {
+ netdev_info(netdev, "Link down\n");
+
+ /* carrier off first to avoid watchdog timeout */
+ netif_carrier_off(netdev);
+ if (test_bit(IONIC_LIF_UP, lif->state))
+ netif_tx_stop_all_queues(netdev);
+ }
+
+link_out:
+ clear_bit(IONIC_LIF_LINK_CHECK_REQUESTED, lif->state);
+}
+
+static void ionic_link_status_check_request(struct ionic_lif *lif)
+{
+ struct ionic_deferred_work *work;
+
+ /* we only need one request outstanding at a time */
+ if (test_and_set_bit(IONIC_LIF_LINK_CHECK_REQUESTED, lif->state))
+ return;
+
+ if (in_interrupt()) {
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return;
+
+ work->type = IONIC_DW_TYPE_LINK_STATUS;
+ ionic_lif_deferred_enqueue(&lif->deferred, work);
+ } else {
+ ionic_link_status_check(lif);
+ }
+}
+
+static irqreturn_t ionic_isr(int irq, void *data)
+{
+ struct napi_struct *napi = data;
+
+ napi_schedule_irqoff(napi);
+
+ return IRQ_HANDLED;
+}
+
+static int ionic_request_irq(struct ionic_lif *lif, struct ionic_qcq *qcq)
+{
+ struct ionic_intr_info *intr = &qcq->intr;
+ struct device *dev = lif->ionic->dev;
+ struct ionic_queue *q = &qcq->q;
+ const char *name;
+
+ if (lif->registered)
+ name = lif->netdev->name;
+ else
+ name = dev_name(dev);
+
+ snprintf(intr->name, sizeof(intr->name),
+ "%s-%s-%s", IONIC_DRV_NAME, name, q->name);
+
+ return devm_request_irq(dev, intr->vector, ionic_isr,
+ 0, intr->name, &qcq->napi);
+}
+
+static int ionic_intr_alloc(struct ionic_lif *lif, struct ionic_intr_info *intr)
+{
+ struct ionic *ionic = lif->ionic;
+ int index;
+
+ index = find_first_zero_bit(ionic->intrs, ionic->nintrs);
+ if (index == ionic->nintrs) {
+ netdev_warn(lif->netdev, "%s: no intr, index=%d nintrs=%d\n",
+ __func__, index, ionic->nintrs);
+ return -ENOSPC;
+ }
+
+ set_bit(index, ionic->intrs);
+ ionic_intr_init(&ionic->idev, intr, index);
+
+ return 0;
+}
+
+static void ionic_intr_free(struct ionic_lif *lif, int index)
+{
+ if (index != INTR_INDEX_NOT_ASSIGNED && index < lif->ionic->nintrs)
+ clear_bit(index, lif->ionic->intrs);
+}
+
+static int ionic_qcq_enable(struct ionic_qcq *qcq)
+{
+ struct ionic_queue *q = &qcq->q;
+ struct ionic_lif *lif = q->lif;
+ struct ionic_dev *idev;
+ struct device *dev;
+
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.q_control = {
+ .opcode = IONIC_CMD_Q_CONTROL,
+ .lif_index = cpu_to_le16(lif->index),
+ .type = q->type,
+ .index = cpu_to_le32(q->index),
+ .oper = IONIC_Q_ENABLE,
+ },
+ };
+
+ idev = &lif->ionic->idev;
+ dev = lif->ionic->dev;
+
+ dev_dbg(dev, "q_enable.index %d q_enable.qtype %d\n",
+ ctx.cmd.q_control.index, ctx.cmd.q_control.type);
+
+ if (qcq->flags & IONIC_QCQ_F_INTR) {
+ irq_set_affinity_hint(qcq->intr.vector,
+ &qcq->intr.affinity_mask);
+ napi_enable(&qcq->napi);
+ ionic_intr_clean(idev->intr_ctrl, qcq->intr.index);
+ ionic_intr_mask(idev->intr_ctrl, qcq->intr.index,
+ IONIC_INTR_MASK_CLEAR);
+ }
+
+ return ionic_adminq_post_wait(lif, &ctx);
+}
+
+static int ionic_qcq_disable(struct ionic_qcq *qcq)
+{
+ struct ionic_queue *q = &qcq->q;
+ struct ionic_lif *lif = q->lif;
+ struct ionic_dev *idev;
+ struct device *dev;
+
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.q_control = {
+ .opcode = IONIC_CMD_Q_CONTROL,
+ .lif_index = cpu_to_le16(lif->index),
+ .type = q->type,
+ .index = cpu_to_le32(q->index),
+ .oper = IONIC_Q_DISABLE,
+ },
+ };
+
+ idev = &lif->ionic->idev;
+ dev = lif->ionic->dev;
+
+ dev_dbg(dev, "q_disable.index %d q_disable.qtype %d\n",
+ ctx.cmd.q_control.index, ctx.cmd.q_control.type);
+
+ if (qcq->flags & IONIC_QCQ_F_INTR) {
+ ionic_intr_mask(idev->intr_ctrl, qcq->intr.index,
+ IONIC_INTR_MASK_SET);
+ synchronize_irq(qcq->intr.vector);
+ irq_set_affinity_hint(qcq->intr.vector, NULL);
+ napi_disable(&qcq->napi);
+ }
+
+ return ionic_adminq_post_wait(lif, &ctx);
+}
+
+static void ionic_lif_qcq_deinit(struct ionic_lif *lif, struct ionic_qcq *qcq)
+{
+ struct ionic_dev *idev = &lif->ionic->idev;
+ struct device *dev = lif->ionic->dev;
+
+ if (!qcq)
+ return;
+
+ ionic_debugfs_del_qcq(qcq);
+
+ if (!(qcq->flags & IONIC_QCQ_F_INITED))
+ return;
+
+ if (qcq->flags & IONIC_QCQ_F_INTR) {
+ ionic_intr_mask(idev->intr_ctrl, qcq->intr.index,
+ IONIC_INTR_MASK_SET);
+ devm_free_irq(dev, qcq->intr.vector, &qcq->napi);
+ netif_napi_del(&qcq->napi);
+ }
+
+ qcq->flags &= ~IONIC_QCQ_F_INITED;
+}
+
+static void ionic_qcq_free(struct ionic_lif *lif, struct ionic_qcq *qcq)
+{
+ struct device *dev = lif->ionic->dev;
+
+ if (!qcq)
+ return;
+
+ dma_free_coherent(dev, qcq->total_size, qcq->base, qcq->base_pa);
+ qcq->base = NULL;
+ qcq->base_pa = 0;
+
+ if (qcq->flags & IONIC_QCQ_F_INTR)
+ ionic_intr_free(lif, qcq->intr.index);
+
+ devm_kfree(dev, qcq->cq.info);
+ qcq->cq.info = NULL;
+ devm_kfree(dev, qcq->q.info);
+ qcq->q.info = NULL;
+ devm_kfree(dev, qcq);
+}
+
+static void ionic_qcqs_free(struct ionic_lif *lif)
+{
+ struct device *dev = lif->ionic->dev;
+ unsigned int i;
+
+ if (lif->notifyqcq) {
+ ionic_qcq_free(lif, lif->notifyqcq);
+ lif->notifyqcq = NULL;
+ }
+
+ if (lif->adminqcq) {
+ ionic_qcq_free(lif, lif->adminqcq);
+ lif->adminqcq = NULL;
+ }
+
+ for (i = 0; i < lif->nxqs; i++)
+ if (lif->rxqcqs[i].stats)
+ devm_kfree(dev, lif->rxqcqs[i].stats);
+
+ devm_kfree(dev, lif->rxqcqs);
+ lif->rxqcqs = NULL;
+
+ for (i = 0; i < lif->nxqs; i++)
+ if (lif->txqcqs[i].stats)
+ devm_kfree(dev, lif->txqcqs[i].stats);
+
+ devm_kfree(dev, lif->txqcqs);
+ lif->txqcqs = NULL;
+}
+
+static void ionic_link_qcq_interrupts(struct ionic_qcq *src_qcq,
+ struct ionic_qcq *n_qcq)
+{
+ if (WARN_ON(n_qcq->flags & IONIC_QCQ_F_INTR)) {
+ ionic_intr_free(n_qcq->cq.lif, n_qcq->intr.index);
+ n_qcq->flags &= ~IONIC_QCQ_F_INTR;
+ }
+
+ n_qcq->intr.vector = src_qcq->intr.vector;
+ n_qcq->intr.index = src_qcq->intr.index;
+}
+
+static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
+ unsigned int index,
+ const char *name, unsigned int flags,
+ unsigned int num_descs, unsigned int desc_size,
+ unsigned int cq_desc_size,
+ unsigned int sg_desc_size,
+ unsigned int pid, struct ionic_qcq **qcq)
+{
+ struct ionic_dev *idev = &lif->ionic->idev;
+ u32 q_size, cq_size, sg_size, total_size;
+ struct device *dev = lif->ionic->dev;
+ void *q_base, *cq_base, *sg_base;
+ dma_addr_t cq_base_pa = 0;
+ dma_addr_t sg_base_pa = 0;
+ dma_addr_t q_base_pa = 0;
+ struct ionic_qcq *new;
+ int err;
+
+ *qcq = NULL;
+
+ q_size = num_descs * desc_size;
+ cq_size = num_descs * cq_desc_size;
+ sg_size = num_descs * sg_desc_size;
+
+ total_size = ALIGN(q_size, PAGE_SIZE) + ALIGN(cq_size, PAGE_SIZE);
+ /* Note: aligning q_size/cq_size is not enough due to cq_base
+ * address aligning as q_base could be not aligned to the page.
+ * Adding PAGE_SIZE.
+ */
+ total_size += PAGE_SIZE;
+ if (flags & IONIC_QCQ_F_SG) {
+ total_size += ALIGN(sg_size, PAGE_SIZE);
+ total_size += PAGE_SIZE;
+ }
+
+ new = devm_kzalloc(dev, sizeof(*new), GFP_KERNEL);
+ if (!new) {
+ netdev_err(lif->netdev, "Cannot allocate queue structure\n");
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ new->flags = flags;
+
+ new->q.info = devm_kzalloc(dev, sizeof(*new->q.info) * num_descs,
+ GFP_KERNEL);
+ if (!new->q.info) {
+ netdev_err(lif->netdev, "Cannot allocate queue info\n");
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ new->q.type = type;
+
+ err = ionic_q_init(lif, idev, &new->q, index, name, num_descs,
+ desc_size, sg_desc_size, pid);
+ if (err) {
+ netdev_err(lif->netdev, "Cannot initialize queue\n");
+ goto err_out;
+ }
+
+ if (flags & IONIC_QCQ_F_INTR) {
+ err = ionic_intr_alloc(lif, &new->intr);
+ if (err) {
+ netdev_warn(lif->netdev, "no intr for %s: %d\n",
+ name, err);
+ goto err_out;
+ }
+
+ err = ionic_bus_get_irq(lif->ionic, new->intr.index);
+ if (err < 0) {
+ netdev_warn(lif->netdev, "no vector for %s: %d\n",
+ name, err);
+ goto err_out_free_intr;
+ }
+ new->intr.vector = err;
+ ionic_intr_mask_assert(idev->intr_ctrl, new->intr.index,
+ IONIC_INTR_MASK_SET);
+
+ new->intr.cpu = new->intr.index % num_online_cpus();
+ if (cpu_online(new->intr.cpu))
+ cpumask_set_cpu(new->intr.cpu,
+ &new->intr.affinity_mask);
+ } else {
+ new->intr.index = INTR_INDEX_NOT_ASSIGNED;
+ }
+
+ new->cq.info = devm_kzalloc(dev, sizeof(*new->cq.info) * num_descs,
+ GFP_KERNEL);
+ if (!new->cq.info) {
+ netdev_err(lif->netdev, "Cannot allocate completion queue info\n");
+ err = -ENOMEM;
+ goto err_out_free_intr;
+ }
+
+ err = ionic_cq_init(lif, &new->cq, &new->intr, num_descs, cq_desc_size);
+ if (err) {
+ netdev_err(lif->netdev, "Cannot initialize completion queue\n");
+ goto err_out_free_intr;
+ }
+
+ new->base = dma_alloc_coherent(dev, total_size, &new->base_pa,
+ GFP_KERNEL);
+ if (!new->base) {
+ netdev_err(lif->netdev, "Cannot allocate queue DMA memory\n");
+ err = -ENOMEM;
+ goto err_out_free_intr;
+ }
+
+ new->total_size = total_size;
+
+ q_base = new->base;
+ q_base_pa = new->base_pa;
+
+ cq_base = (void *)ALIGN((uintptr_t)q_base + q_size, PAGE_SIZE);
+ cq_base_pa = ALIGN(q_base_pa + q_size, PAGE_SIZE);
+
+ if (flags & IONIC_QCQ_F_SG) {
+ sg_base = (void *)ALIGN((uintptr_t)cq_base + cq_size,
+ PAGE_SIZE);
+ sg_base_pa = ALIGN(cq_base_pa + cq_size, PAGE_SIZE);
+ ionic_q_sg_map(&new->q, sg_base, sg_base_pa);
+ }
+
+ ionic_q_map(&new->q, q_base, q_base_pa);
+ ionic_cq_map(&new->cq, cq_base, cq_base_pa);
+ ionic_cq_bind(&new->cq, &new->q);
+
+ *qcq = new;
+
+ return 0;
+
+err_out_free_intr:
+ ionic_intr_free(lif, new->intr.index);
+err_out:
+ dev_err(dev, "qcq alloc of %s%d failed %d\n", name, index, err);
+ return err;
+}
+
+static int ionic_qcqs_alloc(struct ionic_lif *lif)
+{
+ struct device *dev = lif->ionic->dev;
+ unsigned int q_list_size;
+ unsigned int flags;
+ int err;
+ int i;
+
+ flags = IONIC_QCQ_F_INTR;
+ err = ionic_qcq_alloc(lif, IONIC_QTYPE_ADMINQ, 0, "admin", flags,
+ IONIC_ADMINQ_LENGTH,
+ sizeof(struct ionic_admin_cmd),
+ sizeof(struct ionic_admin_comp),
+ 0, lif->kern_pid, &lif->adminqcq);
+ if (err)
+ return err;
+
+ if (lif->ionic->nnqs_per_lif) {
+ flags = IONIC_QCQ_F_NOTIFYQ;
+ err = ionic_qcq_alloc(lif, IONIC_QTYPE_NOTIFYQ, 0, "notifyq",
+ flags, IONIC_NOTIFYQ_LENGTH,
+ sizeof(struct ionic_notifyq_cmd),
+ sizeof(union ionic_notifyq_comp),
+ 0, lif->kern_pid, &lif->notifyqcq);
+ if (err)
+ goto err_out_free_adminqcq;
+
+ /* Let the notifyq ride on the adminq interrupt */
+ ionic_link_qcq_interrupts(lif->adminqcq, lif->notifyqcq);
+ }
+
+ q_list_size = sizeof(*lif->txqcqs) * lif->nxqs;
+ err = -ENOMEM;
+ lif->txqcqs = devm_kzalloc(dev, q_list_size, GFP_KERNEL);
+ if (!lif->txqcqs)
+ goto err_out_free_notifyqcq;
+ for (i = 0; i < lif->nxqs; i++) {
+ lif->txqcqs[i].stats = devm_kzalloc(dev,
+ sizeof(struct ionic_q_stats),
+ GFP_KERNEL);
+ if (!lif->txqcqs[i].stats)
+ goto err_out_free_tx_stats;
+ }
+
+ lif->rxqcqs = devm_kzalloc(dev, q_list_size, GFP_KERNEL);
+ if (!lif->rxqcqs)
+ goto err_out_free_tx_stats;
+ for (i = 0; i < lif->nxqs; i++) {
+ lif->rxqcqs[i].stats = devm_kzalloc(dev,
+ sizeof(struct ionic_q_stats),
+ GFP_KERNEL);
+ if (!lif->rxqcqs[i].stats)
+ goto err_out_free_rx_stats;
+ }
+
+ return 0;
+
+err_out_free_rx_stats:
+ for (i = 0; i < lif->nxqs; i++)
+ if (lif->rxqcqs[i].stats)
+ devm_kfree(dev, lif->rxqcqs[i].stats);
+ devm_kfree(dev, lif->rxqcqs);
+ lif->rxqcqs = NULL;
+err_out_free_tx_stats:
+ for (i = 0; i < lif->nxqs; i++)
+ if (lif->txqcqs[i].stats)
+ devm_kfree(dev, lif->txqcqs[i].stats);
+ devm_kfree(dev, lif->txqcqs);
+ lif->txqcqs = NULL;
+err_out_free_notifyqcq:
+ if (lif->notifyqcq) {
+ ionic_qcq_free(lif, lif->notifyqcq);
+ lif->notifyqcq = NULL;
+ }
+err_out_free_adminqcq:
+ ionic_qcq_free(lif, lif->adminqcq);
+ lif->adminqcq = NULL;
+
+ return err;
+}
+
+static int ionic_lif_txq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
+{
+ struct device *dev = lif->ionic->dev;
+ struct ionic_queue *q = &qcq->q;
+ struct ionic_cq *cq = &qcq->cq;
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.q_init = {
+ .opcode = IONIC_CMD_Q_INIT,
+ .lif_index = cpu_to_le16(lif->index),
+ .type = q->type,
+ .index = cpu_to_le32(q->index),
+ .flags = cpu_to_le16(IONIC_QINIT_F_IRQ |
+ IONIC_QINIT_F_SG),
+ .intr_index = cpu_to_le16(lif->rxqcqs[q->index].qcq->intr.index),
+ .pid = cpu_to_le16(q->pid),
+ .ring_size = ilog2(q->num_descs),
+ .ring_base = cpu_to_le64(q->base_pa),
+ .cq_ring_base = cpu_to_le64(cq->base_pa),
+ .sg_ring_base = cpu_to_le64(q->sg_base_pa),
+ },
+ };
+ int err;
+
+ dev_dbg(dev, "txq_init.pid %d\n", ctx.cmd.q_init.pid);
+ dev_dbg(dev, "txq_init.index %d\n", ctx.cmd.q_init.index);
+ dev_dbg(dev, "txq_init.ring_base 0x%llx\n", ctx.cmd.q_init.ring_base);
+ dev_dbg(dev, "txq_init.ring_size %d\n", ctx.cmd.q_init.ring_size);
+
+ err = ionic_adminq_post_wait(lif, &ctx);
+ if (err)
+ return err;
+
+ q->hw_type = ctx.comp.q_init.hw_type;
+ q->hw_index = le32_to_cpu(ctx.comp.q_init.hw_index);
+ q->dbval = IONIC_DBELL_QID(q->hw_index);
+
+ dev_dbg(dev, "txq->hw_type %d\n", q->hw_type);
+ dev_dbg(dev, "txq->hw_index %d\n", q->hw_index);
+
+ qcq->flags |= IONIC_QCQ_F_INITED;
+
+ ionic_debugfs_add_qcq(lif, qcq);
+
+ return 0;
+}
+
+static int ionic_lif_rxq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
+{
+ struct device *dev = lif->ionic->dev;
+ struct ionic_queue *q = &qcq->q;
+ struct ionic_cq *cq = &qcq->cq;
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.q_init = {
+ .opcode = IONIC_CMD_Q_INIT,
+ .lif_index = cpu_to_le16(lif->index),
+ .type = q->type,
+ .index = cpu_to_le32(q->index),
+ .flags = cpu_to_le16(IONIC_QINIT_F_IRQ),
+ .intr_index = cpu_to_le16(cq->bound_intr->index),
+ .pid = cpu_to_le16(q->pid),
+ .ring_size = ilog2(q->num_descs),
+ .ring_base = cpu_to_le64(q->base_pa),
+ .cq_ring_base = cpu_to_le64(cq->base_pa),
+ },
+ };
+ int err;
+
+ dev_dbg(dev, "rxq_init.pid %d\n", ctx.cmd.q_init.pid);
+ dev_dbg(dev, "rxq_init.index %d\n", ctx.cmd.q_init.index);
+ dev_dbg(dev, "rxq_init.ring_base 0x%llx\n", ctx.cmd.q_init.ring_base);
+ dev_dbg(dev, "rxq_init.ring_size %d\n", ctx.cmd.q_init.ring_size);
+
+ err = ionic_adminq_post_wait(lif, &ctx);
+ if (err)
+ return err;
+
+ q->hw_type = ctx.comp.q_init.hw_type;
+ q->hw_index = le32_to_cpu(ctx.comp.q_init.hw_index);
+ q->dbval = IONIC_DBELL_QID(q->hw_index);
+
+ dev_dbg(dev, "rxq->hw_type %d\n", q->hw_type);
+ dev_dbg(dev, "rxq->hw_index %d\n", q->hw_index);
+
+ netif_napi_add(lif->netdev, &qcq->napi, ionic_rx_napi,
+ NAPI_POLL_WEIGHT);
+
+ err = ionic_request_irq(lif, qcq);
+ if (err) {
+ netif_napi_del(&qcq->napi);
+ return err;
+ }
+
+ qcq->flags |= IONIC_QCQ_F_INITED;
+
+ ionic_debugfs_add_qcq(lif, qcq);
+
+ return 0;
+}
+
+static bool ionic_notifyq_service(struct ionic_cq *cq,
+ struct ionic_cq_info *cq_info)
+{
+ union ionic_notifyq_comp *comp = cq_info->cq_desc;
+ struct net_device *netdev;
+ struct ionic_queue *q;
+ struct ionic_lif *lif;
+ u64 eid;
+
+ q = cq->bound_q;
+ lif = q->info[0].cb_arg;
+ netdev = lif->netdev;
+ eid = le64_to_cpu(comp->event.eid);
+
+ /* Have we run out of new completions to process? */
+ if (eid <= lif->last_eid)
+ return false;
+
+ lif->last_eid = eid;
+
+ dev_dbg(lif->ionic->dev, "notifyq event:\n");
+ dynamic_hex_dump("event ", DUMP_PREFIX_OFFSET, 16, 1,
+ comp, sizeof(*comp), true);
+
+ switch (le16_to_cpu(comp->event.ecode)) {
+ case IONIC_EVENT_LINK_CHANGE:
+ ionic_link_status_check_request(lif);
+ break;
+ case IONIC_EVENT_RESET:
+ netdev_info(netdev, "Notifyq IONIC_EVENT_RESET eid=%lld\n",
+ eid);
+ netdev_info(netdev, " reset_code=%d state=%d\n",
+ comp->reset.reset_code,
+ comp->reset.state);
+ break;
+ default:
+ netdev_warn(netdev, "Notifyq unknown event ecode=%d eid=%lld\n",
+ comp->event.ecode, eid);
+ break;
+ }
+
+ return true;
+}
+
+static int ionic_notifyq_clean(struct ionic_lif *lif, int budget)
+{
+ struct ionic_dev *idev = &lif->ionic->idev;
+ struct ionic_cq *cq = &lif->notifyqcq->cq;
+ u32 work_done;
+
+ work_done = ionic_cq_service(cq, budget, ionic_notifyq_service,
+ NULL, NULL);
+ if (work_done)
+ ionic_intr_credits(idev->intr_ctrl, cq->bound_intr->index,
+ work_done, IONIC_INTR_CRED_RESET_COALESCE);
+
+ return work_done;
+}
+
+static bool ionic_adminq_service(struct ionic_cq *cq,
+ struct ionic_cq_info *cq_info)
+{
+ struct ionic_admin_comp *comp = cq_info->cq_desc;
+
+ if (!color_match(comp->color, cq->done_color))
+ return false;
+
+ ionic_q_service(cq->bound_q, cq_info, le16_to_cpu(comp->comp_index));
+
+ return true;
+}
+
+static int ionic_adminq_napi(struct napi_struct *napi, int budget)
+{
+ struct ionic_lif *lif = napi_to_cq(napi)->lif;
+ int n_work = 0;
+ int a_work = 0;
+
+ if (likely(lif->notifyqcq && lif->notifyqcq->flags & IONIC_QCQ_F_INITED))
+ n_work = ionic_notifyq_clean(lif, budget);
+ a_work = ionic_napi(napi, budget, ionic_adminq_service, NULL, NULL);
+
+ return max(n_work, a_work);
+}
+
+static void ionic_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *ns)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic_lif_stats *ls;
+
+ memset(ns, 0, sizeof(*ns));
+ ls = &lif->info->stats;
+
+ ns->rx_packets = le64_to_cpu(ls->rx_ucast_packets) +
+ le64_to_cpu(ls->rx_mcast_packets) +
+ le64_to_cpu(ls->rx_bcast_packets);
+
+ ns->tx_packets = le64_to_cpu(ls->tx_ucast_packets) +
+ le64_to_cpu(ls->tx_mcast_packets) +
+ le64_to_cpu(ls->tx_bcast_packets);
+
+ ns->rx_bytes = le64_to_cpu(ls->rx_ucast_bytes) +
+ le64_to_cpu(ls->rx_mcast_bytes) +
+ le64_to_cpu(ls->rx_bcast_bytes);
+
+ ns->tx_bytes = le64_to_cpu(ls->tx_ucast_bytes) +
+ le64_to_cpu(ls->tx_mcast_bytes) +
+ le64_to_cpu(ls->tx_bcast_bytes);
+
+ ns->rx_dropped = le64_to_cpu(ls->rx_ucast_drop_packets) +
+ le64_to_cpu(ls->rx_mcast_drop_packets) +
+ le64_to_cpu(ls->rx_bcast_drop_packets);
+
+ ns->tx_dropped = le64_to_cpu(ls->tx_ucast_drop_packets) +
+ le64_to_cpu(ls->tx_mcast_drop_packets) +
+ le64_to_cpu(ls->tx_bcast_drop_packets);
+
+ ns->multicast = le64_to_cpu(ls->rx_mcast_packets);
+
+ ns->rx_over_errors = le64_to_cpu(ls->rx_queue_empty);
+
+ ns->rx_missed_errors = le64_to_cpu(ls->rx_dma_error) +
+ le64_to_cpu(ls->rx_queue_disabled) +
+ le64_to_cpu(ls->rx_desc_fetch_error) +
+ le64_to_cpu(ls->rx_desc_data_error);
+
+ ns->tx_aborted_errors = le64_to_cpu(ls->tx_dma_error) +
+ le64_to_cpu(ls->tx_queue_disabled) +
+ le64_to_cpu(ls->tx_desc_fetch_error) +
+ le64_to_cpu(ls->tx_desc_data_error);
+
+ ns->rx_errors = ns->rx_over_errors +
+ ns->rx_missed_errors;
+
+ ns->tx_errors = ns->tx_aborted_errors;
+}
+
+static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr)
+{
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.rx_filter_add = {
+ .opcode = IONIC_CMD_RX_FILTER_ADD,
+ .lif_index = cpu_to_le16(lif->index),
+ .match = cpu_to_le16(IONIC_RX_FILTER_MATCH_MAC),
+ },
+ };
+ struct ionic_rx_filter *f;
+ int err;
+
+ /* don't bother if we already have it */
+ spin_lock_bh(&lif->rx_filters.lock);
+ f = ionic_rx_filter_by_addr(lif, addr);
+ spin_unlock_bh(&lif->rx_filters.lock);
+ if (f)
+ return 0;
+
+ netdev_dbg(lif->netdev, "rx_filter add ADDR %pM (id %d)\n", addr,
+ ctx.comp.rx_filter_add.filter_id);
+
+ memcpy(ctx.cmd.rx_filter_add.mac.addr, addr, ETH_ALEN);
+ err = ionic_adminq_post_wait(lif, &ctx);
+ if (err)
+ return err;
+
+ return ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx);
+}
+
+static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr)
+{
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.rx_filter_del = {
+ .opcode = IONIC_CMD_RX_FILTER_DEL,
+ .lif_index = cpu_to_le16(lif->index),
+ },
+ };
+ struct ionic_rx_filter *f;
+ int err;
+
+ spin_lock_bh(&lif->rx_filters.lock);
+ f = ionic_rx_filter_by_addr(lif, addr);
+ if (!f) {
+ spin_unlock_bh(&lif->rx_filters.lock);
+ return -ENOENT;
+ }
+
+ ctx.cmd.rx_filter_del.filter_id = cpu_to_le32(f->filter_id);
+ ionic_rx_filter_free(lif, f);
+ spin_unlock_bh(&lif->rx_filters.lock);
+
+ err = ionic_adminq_post_wait(lif, &ctx);
+ if (err)
+ return err;
+
+ netdev_dbg(lif->netdev, "rx_filter del ADDR %pM (id %d)\n", addr,
+ ctx.cmd.rx_filter_del.filter_id);
+
+ return 0;
+}
+
+static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add)
+{
+ struct ionic *ionic = lif->ionic;
+ struct ionic_deferred_work *work;
+ unsigned int nmfilters;
+ unsigned int nufilters;
+
+ if (add) {
+ /* Do we have space for this filter? We test the counters
+ * here before checking the need for deferral so that we
+ * can return an overflow error to the stack.
+ */
+ nmfilters = le32_to_cpu(ionic->ident.lif.eth.max_mcast_filters);
+ nufilters = le32_to_cpu(ionic->ident.lif.eth.max_ucast_filters);
+
+ if ((is_multicast_ether_addr(addr) && lif->nmcast < nmfilters))
+ lif->nmcast++;
+ else if (!is_multicast_ether_addr(addr) &&
+ lif->nucast < nufilters)
+ lif->nucast++;
+ else
+ return -ENOSPC;
+ } else {
+ if (is_multicast_ether_addr(addr) && lif->nmcast)
+ lif->nmcast--;
+ else if (!is_multicast_ether_addr(addr) && lif->nucast)
+ lif->nucast--;
+ }
+
+ if (in_interrupt()) {
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work) {
+ netdev_err(lif->netdev, "%s OOM\n", __func__);
+ return -ENOMEM;
+ }
+ work->type = add ? IONIC_DW_TYPE_RX_ADDR_ADD :
+ IONIC_DW_TYPE_RX_ADDR_DEL;
+ memcpy(work->addr, addr, ETH_ALEN);
+ netdev_dbg(lif->netdev, "deferred: rx_filter %s %pM\n",
+ add ? "add" : "del", addr);
+ ionic_lif_deferred_enqueue(&lif->deferred, work);
+ } else {
+ netdev_dbg(lif->netdev, "rx_filter %s %pM\n",
+ add ? "add" : "del", addr);
+ if (add)
+ return ionic_lif_addr_add(lif, addr);
+ else
+ return ionic_lif_addr_del(lif, addr);
+ }
+
+ return 0;
+}
+
+static int ionic_addr_add(struct net_device *netdev, const u8 *addr)
+{
+ return ionic_lif_addr(netdev_priv(netdev), addr, true);
+}
+
+static int ionic_addr_del(struct net_device *netdev, const u8 *addr)
+{
+ return ionic_lif_addr(netdev_priv(netdev), addr, false);
+}
+
+static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode)
+{
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.rx_mode_set = {
+ .opcode = IONIC_CMD_RX_MODE_SET,
+ .lif_index = cpu_to_le16(lif->index),
+ .rx_mode = cpu_to_le16(rx_mode),
+ },
+ };
+ char buf[128];
+ int err;
+ int i;
+#define REMAIN(__x) (sizeof(buf) - (__x))
+
+ i = snprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:",
+ lif->rx_mode, rx_mode);
+ if (rx_mode & IONIC_RX_MODE_F_UNICAST)
+ i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_UNICAST");
+ if (rx_mode & IONIC_RX_MODE_F_MULTICAST)
+ i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_MULTICAST");
+ if (rx_mode & IONIC_RX_MODE_F_BROADCAST)
+ i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_BROADCAST");
+ if (rx_mode & IONIC_RX_MODE_F_PROMISC)
+ i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC");
+ if (rx_mode & IONIC_RX_MODE_F_ALLMULTI)
+ i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI");
+ netdev_dbg(lif->netdev, "lif%d %s\n", lif->index, buf);
+
+ err = ionic_adminq_post_wait(lif, &ctx);
+ if (err)
+ netdev_warn(lif->netdev, "set rx_mode 0x%04x failed: %d\n",
+ rx_mode, err);
+ else
+ lif->rx_mode = rx_mode;
+}
+
+static void _ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode)
+{
+ struct ionic_deferred_work *work;
+
+ if (in_interrupt()) {
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work) {
+ netdev_err(lif->netdev, "%s OOM\n", __func__);
+ return;
+ }
+ work->type = IONIC_DW_TYPE_RX_MODE;
+ work->rx_mode = rx_mode;
+ netdev_dbg(lif->netdev, "deferred: rx_mode\n");
+ ionic_lif_deferred_enqueue(&lif->deferred, work);
+ } else {
+ ionic_lif_rx_mode(lif, rx_mode);
+ }
+}
+
+static void ionic_set_rx_mode(struct net_device *netdev)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic_identity *ident;
+ unsigned int nfilters;
+ unsigned int rx_mode;
+
+ ident = &lif->ionic->ident;
+
+ rx_mode = IONIC_RX_MODE_F_UNICAST;
+ rx_mode |= (netdev->flags & IFF_MULTICAST) ? IONIC_RX_MODE_F_MULTICAST : 0;
+ rx_mode |= (netdev->flags & IFF_BROADCAST) ? IONIC_RX_MODE_F_BROADCAST : 0;
+ rx_mode |= (netdev->flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0;
+ rx_mode |= (netdev->flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0;
+
+ /* sync unicast addresses
+ * next check to see if we're in an overflow state
+ * if so, we track that we overflowed and enable NIC PROMISC
+ * else if the overflow is set and not needed
+ * we remove our overflow flag and check the netdev flags
+ * to see if we can disable NIC PROMISC
+ */
+ __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del);
+ nfilters = le32_to_cpu(ident->lif.eth.max_ucast_filters);
+ if (netdev_uc_count(netdev) + 1 > nfilters) {
+ rx_mode |= IONIC_RX_MODE_F_PROMISC;
+ lif->uc_overflow = true;
+ } else if (lif->uc_overflow) {
+ lif->uc_overflow = false;
+ if (!(netdev->flags & IFF_PROMISC))
+ rx_mode &= ~IONIC_RX_MODE_F_PROMISC;
+ }
+
+ /* same for multicast */
+ __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del);
+ nfilters = le32_to_cpu(ident->lif.eth.max_mcast_filters);
+ if (netdev_mc_count(netdev) > nfilters) {
+ rx_mode |= IONIC_RX_MODE_F_ALLMULTI;
+ lif->mc_overflow = true;
+ } else if (lif->mc_overflow) {
+ lif->mc_overflow = false;
+ if (!(netdev->flags & IFF_ALLMULTI))
+ rx_mode &= ~IONIC_RX_MODE_F_ALLMULTI;
+ }
+
+ if (lif->rx_mode != rx_mode)
+ _ionic_lif_rx_mode(lif, rx_mode);
+}
+
+static __le64 ionic_netdev_features_to_nic(netdev_features_t features)
+{
+ u64 wanted = 0;
+
+ if (features & NETIF_F_HW_VLAN_CTAG_TX)
+ wanted |= IONIC_ETH_HW_VLAN_TX_TAG;
+ if (features & NETIF_F_HW_VLAN_CTAG_RX)
+ wanted |= IONIC_ETH_HW_VLAN_RX_STRIP;
+ if (features & NETIF_F_HW_VLAN_CTAG_FILTER)
+ wanted |= IONIC_ETH_HW_VLAN_RX_FILTER;
+ if (features & NETIF_F_RXHASH)
+ wanted |= IONIC_ETH_HW_RX_HASH;
+ if (features & NETIF_F_RXCSUM)
+ wanted |= IONIC_ETH_HW_RX_CSUM;
+ if (features & NETIF_F_SG)
+ wanted |= IONIC_ETH_HW_TX_SG;
+ if (features & NETIF_F_HW_CSUM)
+ wanted |= IONIC_ETH_HW_TX_CSUM;
+ if (features & NETIF_F_TSO)
+ wanted |= IONIC_ETH_HW_TSO;
+ if (features & NETIF_F_TSO6)
+ wanted |= IONIC_ETH_HW_TSO_IPV6;
+ if (features & NETIF_F_TSO_ECN)
+ wanted |= IONIC_ETH_HW_TSO_ECN;
+ if (features & NETIF_F_GSO_GRE)
+ wanted |= IONIC_ETH_HW_TSO_GRE;
+ if (features & NETIF_F_GSO_GRE_CSUM)
+ wanted |= IONIC_ETH_HW_TSO_GRE_CSUM;
+ if (features & NETIF_F_GSO_IPXIP4)
+ wanted |= IONIC_ETH_HW_TSO_IPXIP4;
+ if (features & NETIF_F_GSO_IPXIP6)
+ wanted |= IONIC_ETH_HW_TSO_IPXIP6;
+ if (features & NETIF_F_GSO_UDP_TUNNEL)
+ wanted |= IONIC_ETH_HW_TSO_UDP;
+ if (features & NETIF_F_GSO_UDP_TUNNEL_CSUM)
+ wanted |= IONIC_ETH_HW_TSO_UDP_CSUM;
+
+ return cpu_to_le64(wanted);
+}
+
+static int ionic_set_nic_features(struct ionic_lif *lif,
+ netdev_features_t features)
+{
+ struct device *dev = lif->ionic->dev;
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.lif_setattr = {
+ .opcode = IONIC_CMD_LIF_SETATTR,
+ .index = cpu_to_le16(lif->index),
+ .attr = IONIC_LIF_ATTR_FEATURES,
+ },
+ };
+ u64 vlan_flags = IONIC_ETH_HW_VLAN_TX_TAG |
+ IONIC_ETH_HW_VLAN_RX_STRIP |
+ IONIC_ETH_HW_VLAN_RX_FILTER;
+ int err;
+
+ ctx.cmd.lif_setattr.features = ionic_netdev_features_to_nic(features);
+ err = ionic_adminq_post_wait(lif, &ctx);
+ if (err)
+ return err;
+
+ lif->hw_features = le64_to_cpu(ctx.cmd.lif_setattr.features &
+ ctx.comp.lif_setattr.features);
+
+ if ((vlan_flags & features) &&
+ !(vlan_flags & le64_to_cpu(ctx.comp.lif_setattr.features)))
+ dev_info_once(lif->ionic->dev, "NIC is not supporting vlan offload, likely in SmartNIC mode\n");
+
+ if (lif->hw_features & IONIC_ETH_HW_VLAN_TX_TAG)
+ dev_dbg(dev, "feature ETH_HW_VLAN_TX_TAG\n");
+ if (lif->hw_features & IONIC_ETH_HW_VLAN_RX_STRIP)
+ dev_dbg(dev, "feature ETH_HW_VLAN_RX_STRIP\n");
+ if (lif->hw_features & IONIC_ETH_HW_VLAN_RX_FILTER)
+ dev_dbg(dev, "feature ETH_HW_VLAN_RX_FILTER\n");
+ if (lif->hw_features & IONIC_ETH_HW_RX_HASH)
+ dev_dbg(dev, "feature ETH_HW_RX_HASH\n");
+ if (lif->hw_features & IONIC_ETH_HW_TX_SG)
+ dev_dbg(dev, "feature ETH_HW_TX_SG\n");
+ if (lif->hw_features & IONIC_ETH_HW_TX_CSUM)
+ dev_dbg(dev, "feature ETH_HW_TX_CSUM\n");
+ if (lif->hw_features & IONIC_ETH_HW_RX_CSUM)
+ dev_dbg(dev, "feature ETH_HW_RX_CSUM\n");
+ if (lif->hw_features & IONIC_ETH_HW_TSO)
+ dev_dbg(dev, "feature ETH_HW_TSO\n");
+ if (lif->hw_features & IONIC_ETH_HW_TSO_IPV6)
+ dev_dbg(dev, "feature ETH_HW_TSO_IPV6\n");
+ if (lif->hw_features & IONIC_ETH_HW_TSO_ECN)
+ dev_dbg(dev, "feature ETH_HW_TSO_ECN\n");
+ if (lif->hw_features & IONIC_ETH_HW_TSO_GRE)
+ dev_dbg(dev, "feature ETH_HW_TSO_GRE\n");
+ if (lif->hw_features & IONIC_ETH_HW_TSO_GRE_CSUM)
+ dev_dbg(dev, "feature ETH_HW_TSO_GRE_CSUM\n");
+ if (lif->hw_features & IONIC_ETH_HW_TSO_IPXIP4)
+ dev_dbg(dev, "feature ETH_HW_TSO_IPXIP4\n");
+ if (lif->hw_features & IONIC_ETH_HW_TSO_IPXIP6)
+ dev_dbg(dev, "feature ETH_HW_TSO_IPXIP6\n");
+ if (lif->hw_features & IONIC_ETH_HW_TSO_UDP)
+ dev_dbg(dev, "feature ETH_HW_TSO_UDP\n");
+ if (lif->hw_features & IONIC_ETH_HW_TSO_UDP_CSUM)
+ dev_dbg(dev, "feature ETH_HW_TSO_UDP_CSUM\n");
+
+ return 0;
+}
+
+static int ionic_init_nic_features(struct ionic_lif *lif)
+{
+ struct net_device *netdev = lif->netdev;
+ netdev_features_t features;
+ int err;
+
+ /* set up what we expect to support by default */
+ features = NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_CTAG_RX |
+ NETIF_F_HW_VLAN_CTAG_FILTER |
+ NETIF_F_RXHASH |
+ NETIF_F_SG |
+ NETIF_F_HW_CSUM |
+ NETIF_F_RXCSUM |
+ NETIF_F_TSO |
+ NETIF_F_TSO6 |
+ NETIF_F_TSO_ECN;
+
+ err = ionic_set_nic_features(lif, features);
+ if (err)
+ return err;
+
+ /* tell the netdev what we actually can support */
+ netdev->features |= NETIF_F_HIGHDMA;
+
+ if (lif->hw_features & IONIC_ETH_HW_VLAN_TX_TAG)
+ netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
+ if (lif->hw_features & IONIC_ETH_HW_VLAN_RX_STRIP)
+ netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
+ if (lif->hw_features & IONIC_ETH_HW_VLAN_RX_FILTER)
+ netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ if (lif->hw_features & IONIC_ETH_HW_RX_HASH)
+ netdev->hw_features |= NETIF_F_RXHASH;
+ if (lif->hw_features & IONIC_ETH_HW_TX_SG)
+ netdev->hw_features |= NETIF_F_SG;
+
+ if (lif->hw_features & IONIC_ETH_HW_TX_CSUM)
+ netdev->hw_enc_features |= NETIF_F_HW_CSUM;
+ if (lif->hw_features & IONIC_ETH_HW_RX_CSUM)
+ netdev->hw_enc_features |= NETIF_F_RXCSUM;
+ if (lif->hw_features & IONIC_ETH_HW_TSO)
+ netdev->hw_enc_features |= NETIF_F_TSO;
+ if (lif->hw_features & IONIC_ETH_HW_TSO_IPV6)
+ netdev->hw_enc_features |= NETIF_F_TSO6;
+ if (lif->hw_features & IONIC_ETH_HW_TSO_ECN)
+ netdev->hw_enc_features |= NETIF_F_TSO_ECN;
+ if (lif->hw_features & IONIC_ETH_HW_TSO_GRE)
+ netdev->hw_enc_features |= NETIF_F_GSO_GRE;
+ if (lif->hw_features & IONIC_ETH_HW_TSO_GRE_CSUM)
+ netdev->hw_enc_features |= NETIF_F_GSO_GRE_CSUM;
+ if (lif->hw_features & IONIC_ETH_HW_TSO_IPXIP4)
+ netdev->hw_enc_features |= NETIF_F_GSO_IPXIP4;
+ if (lif->hw_features & IONIC_ETH_HW_TSO_IPXIP6)
+ netdev->hw_enc_features |= NETIF_F_GSO_IPXIP6;
+ if (lif->hw_features & IONIC_ETH_HW_TSO_UDP)
+ netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL;
+ if (lif->hw_features & IONIC_ETH_HW_TSO_UDP_CSUM)
+ netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
+
+ netdev->hw_features |= netdev->hw_enc_features;
+ netdev->features |= netdev->hw_features;
+
+ netdev->priv_flags |= IFF_UNICAST_FLT;
+
+ return 0;
+}
+
+static int ionic_set_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ int err;
+
+ netdev_dbg(netdev, "%s: lif->features=0x%08llx new_features=0x%08llx\n",
+ __func__, (u64)lif->netdev->features, (u64)features);
+
+ err = ionic_set_nic_features(lif, features);
+
+ return err;
+}
+
+static int ionic_set_mac_address(struct net_device *netdev, void *sa)
+{
+ struct sockaddr *addr = sa;
+ u8 *mac;
+ int err;
+
+ mac = (u8 *)addr->sa_data;
+ if (ether_addr_equal(netdev->dev_addr, mac))
+ return 0;
+
+ err = eth_prepare_mac_addr_change(netdev, addr);
+ if (err)
+ return err;
+
+ if (!is_zero_ether_addr(netdev->dev_addr)) {
+ netdev_info(netdev, "deleting mac addr %pM\n",
+ netdev->dev_addr);
+ ionic_addr_del(netdev, netdev->dev_addr);
+ }
+
+ eth_commit_mac_addr_change(netdev, addr);
+ netdev_info(netdev, "updating mac addr %pM\n", mac);
+
+ return ionic_addr_add(netdev, mac);
+}
+
+static int ionic_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.lif_setattr = {
+ .opcode = IONIC_CMD_LIF_SETATTR,
+ .index = cpu_to_le16(lif->index),
+ .attr = IONIC_LIF_ATTR_MTU,
+ .mtu = cpu_to_le32(new_mtu),
+ },
+ };
+ int err;
+
+ err = ionic_adminq_post_wait(lif, &ctx);
+ if (err)
+ return err;
+
+ netdev->mtu = new_mtu;
+ err = ionic_reset_queues(lif);
+
+ return err;
+}
+
+static void ionic_tx_timeout_work(struct work_struct *ws)
+{
+ struct ionic_lif *lif = container_of(ws, struct ionic_lif, tx_timeout_work);
+
+ netdev_info(lif->netdev, "Tx Timeout recovery\n");
+
+ rtnl_lock();
+ ionic_reset_queues(lif);
+ rtnl_unlock();
+}
+
+static void ionic_tx_timeout(struct net_device *netdev)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+
+ schedule_work(&lif->tx_timeout_work);
+}
+
+static int ionic_vlan_rx_add_vid(struct net_device *netdev, __be16 proto,
+ u16 vid)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.rx_filter_add = {
+ .opcode = IONIC_CMD_RX_FILTER_ADD,
+ .lif_index = cpu_to_le16(lif->index),
+ .match = cpu_to_le16(IONIC_RX_FILTER_MATCH_VLAN),
+ .vlan.vlan = cpu_to_le16(vid),
+ },
+ };
+ int err;
+
+ err = ionic_adminq_post_wait(lif, &ctx);
+ if (err)
+ return err;
+
+ netdev_dbg(netdev, "rx_filter add VLAN %d (id %d)\n", vid,
+ ctx.comp.rx_filter_add.filter_id);
+
+ return ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx);
+}
+
+static int ionic_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto,
+ u16 vid)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.rx_filter_del = {
+ .opcode = IONIC_CMD_RX_FILTER_DEL,
+ .lif_index = cpu_to_le16(lif->index),
+ },
+ };
+ struct ionic_rx_filter *f;
+
+ spin_lock_bh(&lif->rx_filters.lock);
+
+ f = ionic_rx_filter_by_vlan(lif, vid);
+ if (!f) {
+ spin_unlock_bh(&lif->rx_filters.lock);
+ return -ENOENT;
+ }
+
+ netdev_dbg(netdev, "rx_filter del VLAN %d (id %d)\n", vid,
+ le32_to_cpu(ctx.cmd.rx_filter_del.filter_id));
+
+ ctx.cmd.rx_filter_del.filter_id = cpu_to_le32(f->filter_id);
+ ionic_rx_filter_free(lif, f);
+ spin_unlock_bh(&lif->rx_filters.lock);
+
+ return ionic_adminq_post_wait(lif, &ctx);
+}
+
+int ionic_lif_rss_config(struct ionic_lif *lif, const u16 types,
+ const u8 *key, const u32 *indir)
+{
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.lif_setattr = {
+ .opcode = IONIC_CMD_LIF_SETATTR,
+ .attr = IONIC_LIF_ATTR_RSS,
+ .rss.types = cpu_to_le16(types),
+ .rss.addr = cpu_to_le64(lif->rss_ind_tbl_pa),
+ },
+ };
+ unsigned int i, tbl_sz;
+
+ lif->rss_types = types;
+
+ if (key)
+ memcpy(lif->rss_hash_key, key, IONIC_RSS_HASH_KEY_SIZE);
+
+ if (indir) {
+ tbl_sz = le16_to_cpu(lif->ionic->ident.lif.eth.rss_ind_tbl_sz);
+ for (i = 0; i < tbl_sz; i++)
+ lif->rss_ind_tbl[i] = indir[i];
+ }
+
+ memcpy(ctx.cmd.lif_setattr.rss.key, lif->rss_hash_key,
+ IONIC_RSS_HASH_KEY_SIZE);
+
+ return ionic_adminq_post_wait(lif, &ctx);
+}
+
+static int ionic_lif_rss_init(struct ionic_lif *lif)
+{
+ u8 rss_key[IONIC_RSS_HASH_KEY_SIZE];
+ unsigned int tbl_sz;
+ unsigned int i;
+
+ netdev_rss_key_fill(rss_key, IONIC_RSS_HASH_KEY_SIZE);
+
+ lif->rss_types = IONIC_RSS_TYPE_IPV4 |
+ IONIC_RSS_TYPE_IPV4_TCP |
+ IONIC_RSS_TYPE_IPV4_UDP |
+ IONIC_RSS_TYPE_IPV6 |
+ IONIC_RSS_TYPE_IPV6_TCP |
+ IONIC_RSS_TYPE_IPV6_UDP;
+
+ /* Fill indirection table with 'default' values */
+ tbl_sz = le16_to_cpu(lif->ionic->ident.lif.eth.rss_ind_tbl_sz);
+ for (i = 0; i < tbl_sz; i++)
+ lif->rss_ind_tbl[i] = ethtool_rxfh_indir_default(i, lif->nxqs);
+
+ return ionic_lif_rss_config(lif, lif->rss_types, rss_key, NULL);
+}
+
+static int ionic_lif_rss_deinit(struct ionic_lif *lif)
+{
+ return ionic_lif_rss_config(lif, 0x0, NULL, NULL);
+}
+
+static void ionic_txrx_disable(struct ionic_lif *lif)
+{
+ unsigned int i;
+
+ for (i = 0; i < lif->nxqs; i++) {
+ ionic_qcq_disable(lif->txqcqs[i].qcq);
+ ionic_qcq_disable(lif->rxqcqs[i].qcq);
+ }
+}
+
+static void ionic_txrx_deinit(struct ionic_lif *lif)
+{
+ unsigned int i;
+
+ for (i = 0; i < lif->nxqs; i++) {
+ ionic_lif_qcq_deinit(lif, lif->txqcqs[i].qcq);
+ ionic_tx_flush(&lif->txqcqs[i].qcq->cq);
+
+ ionic_lif_qcq_deinit(lif, lif->rxqcqs[i].qcq);
+ ionic_rx_flush(&lif->rxqcqs[i].qcq->cq);
+ ionic_rx_empty(&lif->rxqcqs[i].qcq->q);
+ }
+}
+
+static void ionic_txrx_free(struct ionic_lif *lif)
+{
+ unsigned int i;
+
+ for (i = 0; i < lif->nxqs; i++) {
+ ionic_qcq_free(lif, lif->txqcqs[i].qcq);
+ lif->txqcqs[i].qcq = NULL;
+
+ ionic_qcq_free(lif, lif->rxqcqs[i].qcq);
+ lif->rxqcqs[i].qcq = NULL;
+ }
+}
+
+static int ionic_txrx_alloc(struct ionic_lif *lif)
+{
+ unsigned int flags;
+ unsigned int i;
+ int err = 0;
+ u32 coal;
+
+ flags = IONIC_QCQ_F_TX_STATS | IONIC_QCQ_F_SG;
+ for (i = 0; i < lif->nxqs; i++) {
+ err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, i, "tx", flags,
+ lif->ntxq_descs,
+ sizeof(struct ionic_txq_desc),
+ sizeof(struct ionic_txq_comp),
+ sizeof(struct ionic_txq_sg_desc),
+ lif->kern_pid, &lif->txqcqs[i].qcq);
+ if (err)
+ goto err_out;
+
+ lif->txqcqs[i].qcq->stats = lif->txqcqs[i].stats;
+ }
+
+ flags = IONIC_QCQ_F_RX_STATS | IONIC_QCQ_F_INTR;
+ coal = ionic_coal_usec_to_hw(lif->ionic, lif->rx_coalesce_usecs);
+ for (i = 0; i < lif->nxqs; i++) {
+ err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags,
+ lif->nrxq_descs,
+ sizeof(struct ionic_rxq_desc),
+ sizeof(struct ionic_rxq_comp),
+ 0, lif->kern_pid, &lif->rxqcqs[i].qcq);
+ if (err)
+ goto err_out;
+
+ lif->rxqcqs[i].qcq->stats = lif->rxqcqs[i].stats;
+
+ ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
+ lif->rxqcqs[i].qcq->intr.index, coal);
+ ionic_link_qcq_interrupts(lif->rxqcqs[i].qcq,
+ lif->txqcqs[i].qcq);
+ }
+
+ return 0;
+
+err_out:
+ ionic_txrx_free(lif);
+
+ return err;
+}
+
+static int ionic_txrx_init(struct ionic_lif *lif)
+{
+ unsigned int i;
+ int err;
+
+ for (i = 0; i < lif->nxqs; i++) {
+ err = ionic_lif_txq_init(lif, lif->txqcqs[i].qcq);
+ if (err)
+ goto err_out;
+
+ err = ionic_lif_rxq_init(lif, lif->rxqcqs[i].qcq);
+ if (err) {
+ ionic_lif_qcq_deinit(lif, lif->txqcqs[i].qcq);
+ goto err_out;
+ }
+ }
+
+ if (lif->netdev->features & NETIF_F_RXHASH)
+ ionic_lif_rss_init(lif);
+
+ ionic_set_rx_mode(lif->netdev);
+
+ return 0;
+
+err_out:
+ while (i--) {
+ ionic_lif_qcq_deinit(lif, lif->txqcqs[i].qcq);
+ ionic_lif_qcq_deinit(lif, lif->rxqcqs[i].qcq);
+ }
+
+ return err;
+}
+
+static int ionic_txrx_enable(struct ionic_lif *lif)
+{
+ int i, err;
+
+ for (i = 0; i < lif->nxqs; i++) {
+ err = ionic_qcq_enable(lif->txqcqs[i].qcq);
+ if (err)
+ goto err_out;
+
+ ionic_rx_fill(&lif->rxqcqs[i].qcq->q);
+ err = ionic_qcq_enable(lif->rxqcqs[i].qcq);
+ if (err) {
+ ionic_qcq_disable(lif->txqcqs[i].qcq);
+ goto err_out;
+ }
+ }
+
+ return 0;
+
+err_out:
+ while (i--) {
+ ionic_qcq_disable(lif->rxqcqs[i].qcq);
+ ionic_qcq_disable(lif->txqcqs[i].qcq);
+ }
+
+ return err;
+}
+
+int ionic_open(struct net_device *netdev)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ int err;
+
+ netif_carrier_off(netdev);
+
+ err = ionic_txrx_alloc(lif);
+ if (err)
+ return err;
+
+ err = ionic_txrx_init(lif);
+ if (err)
+ goto err_txrx_free;
+
+ err = ionic_txrx_enable(lif);
+ if (err)
+ goto err_txrx_deinit;
+
+ netif_set_real_num_tx_queues(netdev, lif->nxqs);
+ netif_set_real_num_rx_queues(netdev, lif->nxqs);
+
+ set_bit(IONIC_LIF_UP, lif->state);
+
+ ionic_link_status_check_request(lif);
+ if (netif_carrier_ok(netdev))
+ netif_tx_wake_all_queues(netdev);
+
+ return 0;
+
+err_txrx_deinit:
+ ionic_txrx_deinit(lif);
+err_txrx_free:
+ ionic_txrx_free(lif);
+ return err;
+}
+
+int ionic_stop(struct net_device *netdev)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ int err = 0;
+
+ if (!test_bit(IONIC_LIF_UP, lif->state)) {
+ dev_dbg(lif->ionic->dev, "%s: %s state=DOWN\n",
+ __func__, lif->name);
+ return 0;
+ }
+ dev_dbg(lif->ionic->dev, "%s: %s state=UP\n", __func__, lif->name);
+ clear_bit(IONIC_LIF_UP, lif->state);
+
+ /* carrier off before disabling queues to avoid watchdog timeout */
+ netif_carrier_off(netdev);
+ netif_tx_stop_all_queues(netdev);
+ netif_tx_disable(netdev);
+
+ ionic_txrx_disable(lif);
+ ionic_txrx_deinit(lif);
+ ionic_txrx_free(lif);
+
+ return err;
+}
+
+static const struct net_device_ops ionic_netdev_ops = {
+ .ndo_open = ionic_open,
+ .ndo_stop = ionic_stop,
+ .ndo_start_xmit = ionic_start_xmit,
+ .ndo_get_stats64 = ionic_get_stats64,
+ .ndo_set_rx_mode = ionic_set_rx_mode,
+ .ndo_set_features = ionic_set_features,
+ .ndo_set_mac_address = ionic_set_mac_address,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_tx_timeout = ionic_tx_timeout,
+ .ndo_change_mtu = ionic_change_mtu,
+ .ndo_vlan_rx_add_vid = ionic_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = ionic_vlan_rx_kill_vid,
+};
+
+int ionic_reset_queues(struct ionic_lif *lif)
+{
+ bool running;
+ int err = 0;
+
+ /* Put off the next watchdog timeout */
+ netif_trans_update(lif->netdev);
+
+ if (!ionic_wait_for_bit(lif, IONIC_LIF_QUEUE_RESET))
+ return -EBUSY;
+
+ running = netif_running(lif->netdev);
+ if (running)
+ err = ionic_stop(lif->netdev);
+ if (!err && running)
+ ionic_open(lif->netdev);
+
+ clear_bit(IONIC_LIF_QUEUE_RESET, lif->state);
+
+ return err;
+}
+
+static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index)
+{
+ struct device *dev = ionic->dev;
+ struct net_device *netdev;
+ struct ionic_lif *lif;
+ int tbl_sz;
+ u32 coal;
+ int err;
+
+ netdev = alloc_etherdev_mqs(sizeof(*lif),
+ ionic->ntxqs_per_lif, ionic->ntxqs_per_lif);
+ if (!netdev) {
+ dev_err(dev, "Cannot allocate netdev, aborting\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ SET_NETDEV_DEV(netdev, dev);
+
+ lif = netdev_priv(netdev);
+ lif->netdev = netdev;
+ ionic->master_lif = lif;
+ netdev->netdev_ops = &ionic_netdev_ops;
+ ionic_ethtool_set_ops(netdev);
+
+ netdev->watchdog_timeo = 2 * HZ;
+ netdev->min_mtu = IONIC_MIN_MTU;
+ netdev->max_mtu = IONIC_MAX_MTU;
+
+ lif->neqs = ionic->neqs_per_lif;
+ lif->nxqs = ionic->ntxqs_per_lif;
+
+ lif->ionic = ionic;
+ lif->index = index;
+ lif->ntxq_descs = IONIC_DEF_TXRX_DESC;
+ lif->nrxq_descs = IONIC_DEF_TXRX_DESC;
+
+ /* Convert the default coalesce value to actual hw resolution */
+ coal = ionic_coal_usec_to_hw(lif->ionic, IONIC_ITR_COAL_USEC_DEFAULT);
+ lif->rx_coalesce_usecs = ionic_coal_hw_to_usec(lif->ionic, coal);
+
+ snprintf(lif->name, sizeof(lif->name), "lif%u", index);
+
+ spin_lock_init(&lif->adminq_lock);
+
+ spin_lock_init(&lif->deferred.lock);
+ INIT_LIST_HEAD(&lif->deferred.list);
+ INIT_WORK(&lif->deferred.work, ionic_lif_deferred_work);
+
+ /* allocate lif info */
+ lif->info_sz = ALIGN(sizeof(*lif->info), PAGE_SIZE);
+ lif->info = dma_alloc_coherent(dev, lif->info_sz,
+ &lif->info_pa, GFP_KERNEL);
+ if (!lif->info) {
+ dev_err(dev, "Failed to allocate lif info, aborting\n");
+ err = -ENOMEM;
+ goto err_out_free_netdev;
+ }
+
+ /* allocate queues */
+ err = ionic_qcqs_alloc(lif);
+ if (err)
+ goto err_out_free_lif_info;
+
+ /* allocate rss indirection table */
+ tbl_sz = le16_to_cpu(lif->ionic->ident.lif.eth.rss_ind_tbl_sz);
+ lif->rss_ind_tbl_sz = sizeof(*lif->rss_ind_tbl) * tbl_sz;
+ lif->rss_ind_tbl = dma_alloc_coherent(dev, lif->rss_ind_tbl_sz,
+ &lif->rss_ind_tbl_pa,
+ GFP_KERNEL);
+
+ if (!lif->rss_ind_tbl) {
+ dev_err(dev, "Failed to allocate rss indirection table, aborting\n");
+ goto err_out_free_qcqs;
+ }
+
+ list_add_tail(&lif->list, &ionic->lifs);
+
+ return lif;
+
+err_out_free_qcqs:
+ ionic_qcqs_free(lif);
+err_out_free_lif_info:
+ dma_free_coherent(dev, lif->info_sz, lif->info, lif->info_pa);
+ lif->info = NULL;
+ lif->info_pa = 0;
+err_out_free_netdev:
+ free_netdev(lif->netdev);
+ lif = NULL;
+
+ return ERR_PTR(err);
+}
+
+int ionic_lifs_alloc(struct ionic *ionic)
+{
+ struct ionic_lif *lif;
+
+ INIT_LIST_HEAD(&ionic->lifs);
+
+ /* only build the first lif, others are for later features */
+ set_bit(0, ionic->lifbits);
+ lif = ionic_lif_alloc(ionic, 0);
+
+ return PTR_ERR_OR_ZERO(lif);
+}
+
+static void ionic_lif_reset(struct ionic_lif *lif)
+{
+ struct ionic_dev *idev = &lif->ionic->idev;
+
+ mutex_lock(&lif->ionic->dev_cmd_lock);
+ ionic_dev_cmd_lif_reset(idev, lif->index);
+ ionic_dev_cmd_wait(lif->ionic, DEVCMD_TIMEOUT);
+ mutex_unlock(&lif->ionic->dev_cmd_lock);
+}
+
+static void ionic_lif_free(struct ionic_lif *lif)
+{
+ struct device *dev = lif->ionic->dev;
+
+ /* free rss indirection table */
+ dma_free_coherent(dev, lif->rss_ind_tbl_sz, lif->rss_ind_tbl,
+ lif->rss_ind_tbl_pa);
+ lif->rss_ind_tbl = NULL;
+ lif->rss_ind_tbl_pa = 0;
+
+ /* free queues */
+ ionic_qcqs_free(lif);
+ ionic_lif_reset(lif);
+
+ /* free lif info */
+ dma_free_coherent(dev, lif->info_sz, lif->info, lif->info_pa);
+ lif->info = NULL;
+ lif->info_pa = 0;
+
+ /* unmap doorbell page */
+ ionic_bus_unmap_dbpage(lif->ionic, lif->kern_dbpage);
+ lif->kern_dbpage = NULL;
+ kfree(lif->dbid_inuse);
+ lif->dbid_inuse = NULL;
+
+ /* free netdev & lif */
+ ionic_debugfs_del_lif(lif);
+ list_del(&lif->list);
+ free_netdev(lif->netdev);
+}
+
+void ionic_lifs_free(struct ionic *ionic)
+{
+ struct list_head *cur, *tmp;
+ struct ionic_lif *lif;
+
+ list_for_each_safe(cur, tmp, &ionic->lifs) {
+ lif = list_entry(cur, struct ionic_lif, list);
+
+ ionic_lif_free(lif);
+ }
+}
+
+static void ionic_lif_deinit(struct ionic_lif *lif)
+{
+ if (!test_bit(IONIC_LIF_INITED, lif->state))
+ return;
+
+ clear_bit(IONIC_LIF_INITED, lif->state);
+
+ ionic_rx_filters_deinit(lif);
+ ionic_lif_rss_deinit(lif);
+
+ napi_disable(&lif->adminqcq->napi);
+ ionic_lif_qcq_deinit(lif, lif->notifyqcq);
+ ionic_lif_qcq_deinit(lif, lif->adminqcq);
+
+ ionic_lif_reset(lif);
+}
+
+void ionic_lifs_deinit(struct ionic *ionic)
+{
+ struct list_head *cur, *tmp;
+ struct ionic_lif *lif;
+
+ list_for_each_safe(cur, tmp, &ionic->lifs) {
+ lif = list_entry(cur, struct ionic_lif, list);
+ ionic_lif_deinit(lif);
+ }
+}
+
+static int ionic_lif_adminq_init(struct ionic_lif *lif)
+{
+ struct device *dev = lif->ionic->dev;
+ struct ionic_q_init_comp comp;
+ struct ionic_dev *idev;
+ struct ionic_qcq *qcq;
+ struct ionic_queue *q;
+ int err;
+
+ idev = &lif->ionic->idev;
+ qcq = lif->adminqcq;
+ q = &qcq->q;
+
+ mutex_lock(&lif->ionic->dev_cmd_lock);
+ ionic_dev_cmd_adminq_init(idev, qcq, lif->index, qcq->intr.index);
+ err = ionic_dev_cmd_wait(lif->ionic, DEVCMD_TIMEOUT);
+ ionic_dev_cmd_comp(idev, (union ionic_dev_cmd_comp *)&comp);
+ mutex_unlock(&lif->ionic->dev_cmd_lock);
+ if (err) {
+ netdev_err(lif->netdev, "adminq init failed %d\n", err);
+ return err;
+ }
+
+ q->hw_type = comp.hw_type;
+ q->hw_index = le32_to_cpu(comp.hw_index);
+ q->dbval = IONIC_DBELL_QID(q->hw_index);
+
+ dev_dbg(dev, "adminq->hw_type %d\n", q->hw_type);
+ dev_dbg(dev, "adminq->hw_index %d\n", q->hw_index);
+
+ netif_napi_add(lif->netdev, &qcq->napi, ionic_adminq_napi,
+ NAPI_POLL_WEIGHT);
+
+ err = ionic_request_irq(lif, qcq);
+ if (err) {
+ netdev_warn(lif->netdev, "adminq irq request failed %d\n", err);
+ netif_napi_del(&qcq->napi);
+ return err;
+ }
+
+ napi_enable(&qcq->napi);
+
+ if (qcq->flags & IONIC_QCQ_F_INTR)
+ ionic_intr_mask(idev->intr_ctrl, qcq->intr.index,
+ IONIC_INTR_MASK_CLEAR);
+
+ qcq->flags |= IONIC_QCQ_F_INITED;
+
+ ionic_debugfs_add_qcq(lif, qcq);
+
+ return 0;
+}
+
+static int ionic_lif_notifyq_init(struct ionic_lif *lif)
+{
+ struct ionic_qcq *qcq = lif->notifyqcq;
+ struct device *dev = lif->ionic->dev;
+ struct ionic_queue *q = &qcq->q;
+ int err;
+
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.q_init = {
+ .opcode = IONIC_CMD_Q_INIT,
+ .lif_index = cpu_to_le16(lif->index),
+ .type = q->type,
+ .index = cpu_to_le32(q->index),
+ .flags = cpu_to_le16(IONIC_QINIT_F_IRQ |
+ IONIC_QINIT_F_ENA),
+ .intr_index = cpu_to_le16(lif->adminqcq->intr.index),
+ .pid = cpu_to_le16(q->pid),
+ .ring_size = ilog2(q->num_descs),
+ .ring_base = cpu_to_le64(q->base_pa),
+ }
+ };
+
+ dev_dbg(dev, "notifyq_init.pid %d\n", ctx.cmd.q_init.pid);
+ dev_dbg(dev, "notifyq_init.index %d\n", ctx.cmd.q_init.index);
+ dev_dbg(dev, "notifyq_init.ring_base 0x%llx\n", ctx.cmd.q_init.ring_base);
+ dev_dbg(dev, "notifyq_init.ring_size %d\n", ctx.cmd.q_init.ring_size);
+
+ err = ionic_adminq_post_wait(lif, &ctx);
+ if (err)
+ return err;
+
+ q->hw_type = ctx.comp.q_init.hw_type;
+ q->hw_index = le32_to_cpu(ctx.comp.q_init.hw_index);
+ q->dbval = IONIC_DBELL_QID(q->hw_index);
+
+ dev_dbg(dev, "notifyq->hw_type %d\n", q->hw_type);
+ dev_dbg(dev, "notifyq->hw_index %d\n", q->hw_index);
+
+ /* preset the callback info */
+ q->info[0].cb_arg = lif;
+
+ qcq->flags |= IONIC_QCQ_F_INITED;
+
+ ionic_debugfs_add_qcq(lif, qcq);
+
+ return 0;
+}
+
+static int ionic_station_set(struct ionic_lif *lif)
+{
+ struct net_device *netdev = lif->netdev;
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.lif_getattr = {
+ .opcode = IONIC_CMD_LIF_GETATTR,
+ .index = cpu_to_le16(lif->index),
+ .attr = IONIC_LIF_ATTR_MAC,
+ },
+ };
+ struct sockaddr addr;
+ int err;
+
+ err = ionic_adminq_post_wait(lif, &ctx);
+ if (err)
+ return err;
+
+ memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len);
+ addr.sa_family = AF_INET;
+ err = eth_prepare_mac_addr_change(netdev, &addr);
+ if (err)
+ return err;
+
+ if (!is_zero_ether_addr(netdev->dev_addr)) {
+ netdev_dbg(lif->netdev, "deleting station MAC addr %pM\n",
+ netdev->dev_addr);
+ ionic_lif_addr(lif, netdev->dev_addr, false);
+ }
+
+ eth_commit_mac_addr_change(netdev, &addr);
+ netdev_dbg(lif->netdev, "adding station MAC addr %pM\n",
+ netdev->dev_addr);
+ ionic_lif_addr(lif, netdev->dev_addr, true);
+
+ return 0;
+}
+
+static int ionic_lif_init(struct ionic_lif *lif)
+{
+ struct ionic_dev *idev = &lif->ionic->idev;
+ struct device *dev = lif->ionic->dev;
+ struct ionic_lif_init_comp comp;
+ int dbpage_num;
+ int err;
+
+ ionic_debugfs_add_lif(lif);
+
+ mutex_lock(&lif->ionic->dev_cmd_lock);
+ ionic_dev_cmd_lif_init(idev, lif->index, lif->info_pa);
+ err = ionic_dev_cmd_wait(lif->ionic, DEVCMD_TIMEOUT);
+ ionic_dev_cmd_comp(idev, (union ionic_dev_cmd_comp *)&comp);
+ mutex_unlock(&lif->ionic->dev_cmd_lock);
+ if (err)
+ return err;
+
+ lif->hw_index = le16_to_cpu(comp.hw_index);
+
+ /* now that we have the hw_index we can figure out our doorbell page */
+ lif->dbid_count = le32_to_cpu(lif->ionic->ident.dev.ndbpgs_per_lif);
+ if (!lif->dbid_count) {
+ dev_err(dev, "No doorbell pages, aborting\n");
+ return -EINVAL;
+ }
+
+ lif->dbid_inuse = bitmap_alloc(lif->dbid_count, GFP_KERNEL);
+ if (!lif->dbid_inuse) {
+ dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n");
+ return -ENOMEM;
+ }
+
+ /* first doorbell id reserved for kernel (dbid aka pid == zero) */
+ set_bit(0, lif->dbid_inuse);
+ lif->kern_pid = 0;
+
+ dbpage_num = ionic_db_page_num(lif, lif->kern_pid);
+ lif->kern_dbpage = ionic_bus_map_dbpage(lif->ionic, dbpage_num);
+ if (!lif->kern_dbpage) {
+ dev_err(dev, "Cannot map dbpage, aborting\n");
+ err = -ENOMEM;
+ goto err_out_free_dbid;
+ }
+
+ err = ionic_lif_adminq_init(lif);
+ if (err)
+ goto err_out_adminq_deinit;
+
+ if (lif->ionic->nnqs_per_lif) {
+ err = ionic_lif_notifyq_init(lif);
+ if (err)
+ goto err_out_notifyq_deinit;
+ }
+
+ err = ionic_init_nic_features(lif);
+ if (err)
+ goto err_out_notifyq_deinit;
+
+ err = ionic_rx_filters_init(lif);
+ if (err)
+ goto err_out_notifyq_deinit;
+
+ err = ionic_station_set(lif);
+ if (err)
+ goto err_out_notifyq_deinit;
+
+ lif->rx_copybreak = IONIC_RX_COPYBREAK_DEFAULT;
+
+ set_bit(IONIC_LIF_INITED, lif->state);
+
+ INIT_WORK(&lif->tx_timeout_work, ionic_tx_timeout_work);
+
+ return 0;
+
+err_out_notifyq_deinit:
+ ionic_lif_qcq_deinit(lif, lif->notifyqcq);
+err_out_adminq_deinit:
+ ionic_lif_qcq_deinit(lif, lif->adminqcq);
+ ionic_lif_reset(lif);
+ ionic_bus_unmap_dbpage(lif->ionic, lif->kern_dbpage);
+ lif->kern_dbpage = NULL;
+err_out_free_dbid:
+ kfree(lif->dbid_inuse);
+ lif->dbid_inuse = NULL;
+
+ return err;
+}
+
+int ionic_lifs_init(struct ionic *ionic)
+{
+ struct list_head *cur, *tmp;
+ struct ionic_lif *lif;
+ int err;
+
+ list_for_each_safe(cur, tmp, &ionic->lifs) {
+ lif = list_entry(cur, struct ionic_lif, list);
+ err = ionic_lif_init(lif);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static void ionic_lif_notify_work(struct work_struct *ws)
+{
+}
+
+static void ionic_lif_set_netdev_info(struct ionic_lif *lif)
+{
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.lif_setattr = {
+ .opcode = IONIC_CMD_LIF_SETATTR,
+ .index = cpu_to_le16(lif->index),
+ .attr = IONIC_LIF_ATTR_NAME,
+ },
+ };
+
+ strlcpy(ctx.cmd.lif_setattr.name, lif->netdev->name,
+ sizeof(ctx.cmd.lif_setattr.name));
+
+ ionic_adminq_post_wait(lif, &ctx);
+}
+
+static struct ionic_lif *ionic_netdev_lif(struct net_device *netdev)
+{
+ if (!netdev || netdev->netdev_ops->ndo_start_xmit != ionic_start_xmit)
+ return NULL;
+
+ return netdev_priv(netdev);
+}
+
+static int ionic_lif_notify(struct notifier_block *nb,
+ unsigned long event, void *info)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(info);
+ struct ionic *ionic = container_of(nb, struct ionic, nb);
+ struct ionic_lif *lif = ionic_netdev_lif(ndev);
+
+ if (!lif || lif->ionic != ionic)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_CHANGENAME:
+ ionic_lif_set_netdev_info(lif);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+int ionic_lifs_register(struct ionic *ionic)
+{
+ int err;
+
+ INIT_WORK(&ionic->nb_work, ionic_lif_notify_work);
+
+ ionic->nb.notifier_call = ionic_lif_notify;
+
+ err = register_netdevice_notifier(&ionic->nb);
+ if (err)
+ ionic->nb.notifier_call = NULL;
+
+ /* only register LIF0 for now */
+ err = register_netdev(ionic->master_lif->netdev);
+ if (err) {
+ dev_err(ionic->dev, "Cannot register net device, aborting\n");
+ return err;
+ }
+
+ ionic_link_status_check_request(ionic->master_lif);
+ ionic->master_lif->registered = true;
+
+ return 0;
+}
+
+void ionic_lifs_unregister(struct ionic *ionic)
+{
+ if (ionic->nb.notifier_call) {
+ unregister_netdevice_notifier(&ionic->nb);
+ cancel_work_sync(&ionic->nb_work);
+ ionic->nb.notifier_call = NULL;
+ }
+
+ /* There is only one lif ever registered in the
+ * current model, so don't bother searching the
+ * ionic->lif for candidates to unregister
+ */
+ cancel_work_sync(&ionic->master_lif->deferred.work);
+ cancel_work_sync(&ionic->master_lif->tx_timeout_work);
+ if (ionic->master_lif->netdev->reg_state == NETREG_REGISTERED)
+ unregister_netdev(ionic->master_lif->netdev);
+}
+
+int ionic_lif_identify(struct ionic *ionic, u8 lif_type,
+ union ionic_lif_identity *lid)
+{
+ struct ionic_dev *idev = &ionic->idev;
+ size_t sz;
+ int err;
+
+ sz = min(sizeof(*lid), sizeof(idev->dev_cmd_regs->data));
+
+ mutex_lock(&ionic->dev_cmd_lock);
+ ionic_dev_cmd_lif_identify(idev, lif_type, IONIC_IDENTITY_VERSION_1);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+ memcpy_fromio(lid, &idev->dev_cmd_regs->data, sz);
+ mutex_unlock(&ionic->dev_cmd_lock);
+ if (err)
+ return (err);
+
+ dev_dbg(ionic->dev, "capabilities 0x%llx\n",
+ le64_to_cpu(lid->capabilities));
+
+ dev_dbg(ionic->dev, "eth.max_ucast_filters %d\n",
+ le32_to_cpu(lid->eth.max_ucast_filters));
+ dev_dbg(ionic->dev, "eth.max_mcast_filters %d\n",
+ le32_to_cpu(lid->eth.max_mcast_filters));
+ dev_dbg(ionic->dev, "eth.features 0x%llx\n",
+ le64_to_cpu(lid->eth.config.features));
+ dev_dbg(ionic->dev, "eth.queue_count[IONIC_QTYPE_ADMINQ] %d\n",
+ le32_to_cpu(lid->eth.config.queue_count[IONIC_QTYPE_ADMINQ]));
+ dev_dbg(ionic->dev, "eth.queue_count[IONIC_QTYPE_NOTIFYQ] %d\n",
+ le32_to_cpu(lid->eth.config.queue_count[IONIC_QTYPE_NOTIFYQ]));
+ dev_dbg(ionic->dev, "eth.queue_count[IONIC_QTYPE_RXQ] %d\n",
+ le32_to_cpu(lid->eth.config.queue_count[IONIC_QTYPE_RXQ]));
+ dev_dbg(ionic->dev, "eth.queue_count[IONIC_QTYPE_TXQ] %d\n",
+ le32_to_cpu(lid->eth.config.queue_count[IONIC_QTYPE_TXQ]));
+ dev_dbg(ionic->dev, "eth.config.name %s\n", lid->eth.config.name);
+ dev_dbg(ionic->dev, "eth.config.mac %pM\n", lid->eth.config.mac);
+ dev_dbg(ionic->dev, "eth.config.mtu %d\n",
+ le32_to_cpu(lid->eth.config.mtu));
+
+ return 0;
+}
+
+int ionic_lifs_size(struct ionic *ionic)
+{
+ struct ionic_identity *ident = &ionic->ident;
+ unsigned int nintrs, dev_nintrs;
+ union ionic_lif_config *lc;
+ unsigned int ntxqs_per_lif;
+ unsigned int nrxqs_per_lif;
+ unsigned int neqs_per_lif;
+ unsigned int nnqs_per_lif;
+ unsigned int nxqs, neqs;
+ unsigned int min_intrs;
+ int err;
+
+ lc = &ident->lif.eth.config;
+ dev_nintrs = le32_to_cpu(ident->dev.nintrs);
+ neqs_per_lif = le32_to_cpu(ident->lif.rdma.eq_qtype.qid_count);
+ nnqs_per_lif = le32_to_cpu(lc->queue_count[IONIC_QTYPE_NOTIFYQ]);
+ ntxqs_per_lif = le32_to_cpu(lc->queue_count[IONIC_QTYPE_TXQ]);
+ nrxqs_per_lif = le32_to_cpu(lc->queue_count[IONIC_QTYPE_RXQ]);
+
+ nxqs = min(ntxqs_per_lif, nrxqs_per_lif);
+ nxqs = min(nxqs, num_online_cpus());
+ neqs = min(neqs_per_lif, num_online_cpus());
+
+try_again:
+ /* interrupt usage:
+ * 1 for master lif adminq/notifyq
+ * 1 for each CPU for master lif TxRx queue pairs
+ * whatever's left is for RDMA queues
+ */
+ nintrs = 1 + nxqs + neqs;
+ min_intrs = 2; /* adminq + 1 TxRx queue pair */
+
+ if (nintrs > dev_nintrs)
+ goto try_fewer;
+
+ err = ionic_bus_alloc_irq_vectors(ionic, nintrs);
+ if (err < 0 && err != -ENOSPC) {
+ dev_err(ionic->dev, "Can't get intrs from OS: %d\n", err);
+ return err;
+ }
+ if (err == -ENOSPC)
+ goto try_fewer;
+
+ if (err != nintrs) {
+ ionic_bus_free_irq_vectors(ionic);
+ goto try_fewer;
+ }
+
+ ionic->nnqs_per_lif = nnqs_per_lif;
+ ionic->neqs_per_lif = neqs;
+ ionic->ntxqs_per_lif = nxqs;
+ ionic->nrxqs_per_lif = nxqs;
+ ionic->nintrs = nintrs;
+
+ ionic_debugfs_add_sizes(ionic);
+
+ return 0;
+
+try_fewer:
+ if (nnqs_per_lif > 1) {
+ nnqs_per_lif >>= 1;
+ goto try_again;
+ }
+ if (neqs > 1) {
+ neqs >>= 1;
+ goto try_again;
+ }
+ if (nxqs > 1) {
+ nxqs >>= 1;
+ goto try_again;
+ }
+ dev_err(ionic->dev, "Can't get minimum %d intrs from OS\n", min_intrs);
+ return -ENOSPC;
+}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
new file mode 100644
index 000000000000..812190e729c2
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -0,0 +1,277 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#ifndef _IONIC_LIF_H_
+#define _IONIC_LIF_H_
+
+#include <linux/pci.h>
+#include "ionic_rx_filter.h"
+
+#define IONIC_ADMINQ_LENGTH 16 /* must be a power of two */
+#define IONIC_NOTIFYQ_LENGTH 64 /* must be a power of two */
+
+#define IONIC_MAX_NUM_NAPI_CNTR (NAPI_POLL_WEIGHT + 1)
+#define IONIC_MAX_NUM_SG_CNTR (IONIC_TX_MAX_SG_ELEMS + 1)
+#define IONIC_RX_COPYBREAK_DEFAULT 256
+
+struct ionic_tx_stats {
+ u64 dma_map_err;
+ u64 pkts;
+ u64 bytes;
+ u64 clean;
+ u64 linearize;
+ u64 no_csum;
+ u64 csum;
+ u64 crc32_csum;
+ u64 tso;
+ u64 frags;
+ u64 sg_cntr[IONIC_MAX_NUM_SG_CNTR];
+};
+
+struct ionic_rx_stats {
+ u64 dma_map_err;
+ u64 alloc_err;
+ u64 pkts;
+ u64 bytes;
+ u64 csum_none;
+ u64 csum_complete;
+ u64 csum_error;
+ u64 buffers_posted;
+};
+
+#define IONIC_QCQ_F_INITED BIT(0)
+#define IONIC_QCQ_F_SG BIT(1)
+#define IONIC_QCQ_F_INTR BIT(2)
+#define IONIC_QCQ_F_TX_STATS BIT(3)
+#define IONIC_QCQ_F_RX_STATS BIT(4)
+#define IONIC_QCQ_F_NOTIFYQ BIT(5)
+
+struct ionic_napi_stats {
+ u64 poll_count;
+ u64 work_done_cntr[IONIC_MAX_NUM_NAPI_CNTR];
+};
+
+struct ionic_q_stats {
+ union {
+ struct ionic_tx_stats tx;
+ struct ionic_rx_stats rx;
+ };
+};
+
+struct ionic_qcq {
+ void *base;
+ dma_addr_t base_pa;
+ unsigned int total_size;
+ struct ionic_queue q;
+ struct ionic_cq cq;
+ struct ionic_intr_info intr;
+ struct napi_struct napi;
+ struct ionic_napi_stats napi_stats;
+ struct ionic_q_stats *stats;
+ unsigned int flags;
+ struct dentry *dentry;
+};
+
+struct ionic_qcqst {
+ struct ionic_qcq *qcq;
+ struct ionic_q_stats *stats;
+};
+
+#define q_to_qcq(q) container_of(q, struct ionic_qcq, q)
+#define q_to_tx_stats(q) (&q_to_qcq(q)->stats->tx)
+#define q_to_rx_stats(q) (&q_to_qcq(q)->stats->rx)
+#define napi_to_qcq(napi) container_of(napi, struct ionic_qcq, napi)
+#define napi_to_cq(napi) (&napi_to_qcq(napi)->cq)
+
+enum ionic_deferred_work_type {
+ IONIC_DW_TYPE_RX_MODE,
+ IONIC_DW_TYPE_RX_ADDR_ADD,
+ IONIC_DW_TYPE_RX_ADDR_DEL,
+ IONIC_DW_TYPE_LINK_STATUS,
+ IONIC_DW_TYPE_LIF_RESET,
+};
+
+struct ionic_deferred_work {
+ struct list_head list;
+ enum ionic_deferred_work_type type;
+ union {
+ unsigned int rx_mode;
+ u8 addr[ETH_ALEN];
+ };
+};
+
+struct ionic_deferred {
+ spinlock_t lock; /* lock for deferred work list */
+ struct list_head list;
+ struct work_struct work;
+};
+
+struct ionic_lif_sw_stats {
+ u64 tx_packets;
+ u64 tx_bytes;
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 tx_tso;
+ u64 tx_no_csum;
+ u64 tx_csum;
+ u64 rx_csum_none;
+ u64 rx_csum_complete;
+ u64 rx_csum_error;
+};
+
+enum ionic_lif_state_flags {
+ IONIC_LIF_INITED,
+ IONIC_LIF_SW_DEBUG_STATS,
+ IONIC_LIF_UP,
+ IONIC_LIF_LINK_CHECK_REQUESTED,
+ IONIC_LIF_QUEUE_RESET,
+
+ /* leave this as last */
+ IONIC_LIF_STATE_SIZE
+};
+
+#define IONIC_LIF_NAME_MAX_SZ 32
+struct ionic_lif {
+ char name[IONIC_LIF_NAME_MAX_SZ];
+ struct list_head list;
+ struct net_device *netdev;
+ DECLARE_BITMAP(state, IONIC_LIF_STATE_SIZE);
+ struct ionic *ionic;
+ bool registered;
+ unsigned int index;
+ unsigned int hw_index;
+ unsigned int kern_pid;
+ u64 __iomem *kern_dbpage;
+ spinlock_t adminq_lock; /* lock for AdminQ operations */
+ struct ionic_qcq *adminqcq;
+ struct ionic_qcq *notifyqcq;
+ struct ionic_qcqst *txqcqs;
+ struct ionic_qcqst *rxqcqs;
+ u64 last_eid;
+ unsigned int neqs;
+ unsigned int nxqs;
+ unsigned int ntxq_descs;
+ unsigned int nrxq_descs;
+ u32 rx_copybreak;
+ unsigned int rx_mode;
+ u64 hw_features;
+ bool mc_overflow;
+ unsigned int nmcast;
+ bool uc_overflow;
+ unsigned int nucast;
+
+ struct ionic_lif_info *info;
+ dma_addr_t info_pa;
+ u32 info_sz;
+
+ u16 rss_types;
+ u8 rss_hash_key[IONIC_RSS_HASH_KEY_SIZE];
+ u8 *rss_ind_tbl;
+ dma_addr_t rss_ind_tbl_pa;
+ u32 rss_ind_tbl_sz;
+
+ struct ionic_rx_filters rx_filters;
+ struct ionic_deferred deferred;
+ unsigned long *dbid_inuse;
+ unsigned int dbid_count;
+ struct dentry *dentry;
+ u32 rx_coalesce_usecs;
+ u32 flags;
+ struct work_struct tx_timeout_work;
+};
+
+#define lif_to_txqcq(lif, i) ((lif)->txqcqs[i].qcq)
+#define lif_to_rxqcq(lif, i) ((lif)->rxqcqs[i].qcq)
+#define lif_to_txq(lif, i) (&lif_to_txqcq((lif), i)->q)
+#define lif_to_rxq(lif, i) (&lif_to_txqcq((lif), i)->q)
+
+static inline int ionic_wait_for_bit(struct ionic_lif *lif, int bitname)
+{
+ unsigned long tlimit = jiffies + HZ;
+
+ while (test_and_set_bit(bitname, lif->state) &&
+ time_before(jiffies, tlimit))
+ usleep_range(100, 200);
+
+ return test_bit(bitname, lif->state);
+}
+
+static inline u32 ionic_coal_usec_to_hw(struct ionic *ionic, u32 usecs)
+{
+ u32 mult = le32_to_cpu(ionic->ident.dev.intr_coal_mult);
+ u32 div = le32_to_cpu(ionic->ident.dev.intr_coal_div);
+
+ /* Div-by-zero should never be an issue, but check anyway */
+ if (!div || !mult)
+ return 0;
+
+ /* Round up in case usecs is close to the next hw unit */
+ usecs += (div / mult) >> 1;
+
+ /* Convert from usecs to device units */
+ return (usecs * mult) / div;
+}
+
+static inline u32 ionic_coal_hw_to_usec(struct ionic *ionic, u32 units)
+{
+ u32 mult = le32_to_cpu(ionic->ident.dev.intr_coal_mult);
+ u32 div = le32_to_cpu(ionic->ident.dev.intr_coal_div);
+
+ /* Div-by-zero should never be an issue, but check anyway */
+ if (!div || !mult)
+ return 0;
+
+ /* Convert from device units to usec */
+ return (units * div) / mult;
+}
+
+int ionic_lifs_alloc(struct ionic *ionic);
+void ionic_lifs_free(struct ionic *ionic);
+void ionic_lifs_deinit(struct ionic *ionic);
+int ionic_lifs_init(struct ionic *ionic);
+int ionic_lifs_register(struct ionic *ionic);
+void ionic_lifs_unregister(struct ionic *ionic);
+int ionic_lif_identify(struct ionic *ionic, u8 lif_type,
+ union ionic_lif_identity *lif_ident);
+int ionic_lifs_size(struct ionic *ionic);
+int ionic_lif_rss_config(struct ionic_lif *lif, u16 types,
+ const u8 *key, const u32 *indir);
+
+int ionic_open(struct net_device *netdev);
+int ionic_stop(struct net_device *netdev);
+int ionic_reset_queues(struct ionic_lif *lif);
+
+static inline void debug_stats_txq_post(struct ionic_qcq *qcq,
+ struct ionic_txq_desc *desc, bool dbell)
+{
+ u8 num_sg_elems = ((le64_to_cpu(desc->cmd) >> IONIC_TXQ_DESC_NSGE_SHIFT)
+ & IONIC_TXQ_DESC_NSGE_MASK);
+
+ qcq->q.dbell_count += dbell;
+
+ if (num_sg_elems > (IONIC_MAX_NUM_SG_CNTR - 1))
+ num_sg_elems = IONIC_MAX_NUM_SG_CNTR - 1;
+
+ qcq->stats->tx.sg_cntr[num_sg_elems]++;
+}
+
+static inline void debug_stats_napi_poll(struct ionic_qcq *qcq,
+ unsigned int work_done)
+{
+ qcq->napi_stats.poll_count++;
+
+ if (work_done > (IONIC_MAX_NUM_NAPI_CNTR - 1))
+ work_done = IONIC_MAX_NUM_NAPI_CNTR - 1;
+
+ qcq->napi_stats.work_done_cntr[work_done]++;
+}
+
+#define DEBUG_STATS_CQE_CNT(cq) ((cq)->compl_count++)
+#define DEBUG_STATS_RX_BUFF_CNT(qcq) ((qcq)->stats->rx.buffers_posted++)
+#define DEBUG_STATS_INTR_REARM(intr) ((intr)->rearm_count++)
+#define DEBUG_STATS_TXQ_POST(qcq, txdesc, dbell) \
+ debug_stats_txq_post(qcq, txdesc, dbell)
+#define DEBUG_STATS_NAPI_POLL(qcq, work_done) \
+ debug_stats_napi_poll(qcq, work_done)
+
+#endif /* _IONIC_LIF_H_ */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
new file mode 100644
index 000000000000..5ec67f3f1853
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -0,0 +1,549 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/netdevice.h>
+#include <linux/utsname.h>
+
+#include "ionic.h"
+#include "ionic_bus.h"
+#include "ionic_lif.h"
+#include "ionic_debugfs.h"
+
+MODULE_DESCRIPTION(IONIC_DRV_DESCRIPTION);
+MODULE_AUTHOR("Pensando Systems, Inc");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(IONIC_DRV_VERSION);
+
+static const char *ionic_error_to_str(enum ionic_status_code code)
+{
+ switch (code) {
+ case IONIC_RC_SUCCESS:
+ return "IONIC_RC_SUCCESS";
+ case IONIC_RC_EVERSION:
+ return "IONIC_RC_EVERSION";
+ case IONIC_RC_EOPCODE:
+ return "IONIC_RC_EOPCODE";
+ case IONIC_RC_EIO:
+ return "IONIC_RC_EIO";
+ case IONIC_RC_EPERM:
+ return "IONIC_RC_EPERM";
+ case IONIC_RC_EQID:
+ return "IONIC_RC_EQID";
+ case IONIC_RC_EQTYPE:
+ return "IONIC_RC_EQTYPE";
+ case IONIC_RC_ENOENT:
+ return "IONIC_RC_ENOENT";
+ case IONIC_RC_EINTR:
+ return "IONIC_RC_EINTR";
+ case IONIC_RC_EAGAIN:
+ return "IONIC_RC_EAGAIN";
+ case IONIC_RC_ENOMEM:
+ return "IONIC_RC_ENOMEM";
+ case IONIC_RC_EFAULT:
+ return "IONIC_RC_EFAULT";
+ case IONIC_RC_EBUSY:
+ return "IONIC_RC_EBUSY";
+ case IONIC_RC_EEXIST:
+ return "IONIC_RC_EEXIST";
+ case IONIC_RC_EINVAL:
+ return "IONIC_RC_EINVAL";
+ case IONIC_RC_ENOSPC:
+ return "IONIC_RC_ENOSPC";
+ case IONIC_RC_ERANGE:
+ return "IONIC_RC_ERANGE";
+ case IONIC_RC_BAD_ADDR:
+ return "IONIC_RC_BAD_ADDR";
+ case IONIC_RC_DEV_CMD:
+ return "IONIC_RC_DEV_CMD";
+ case IONIC_RC_ERROR:
+ return "IONIC_RC_ERROR";
+ case IONIC_RC_ERDMA:
+ return "IONIC_RC_ERDMA";
+ default:
+ return "IONIC_RC_UNKNOWN";
+ }
+}
+
+static int ionic_error_to_errno(enum ionic_status_code code)
+{
+ switch (code) {
+ case IONIC_RC_SUCCESS:
+ return 0;
+ case IONIC_RC_EVERSION:
+ case IONIC_RC_EQTYPE:
+ case IONIC_RC_EQID:
+ case IONIC_RC_EINVAL:
+ return -EINVAL;
+ case IONIC_RC_EPERM:
+ return -EPERM;
+ case IONIC_RC_ENOENT:
+ return -ENOENT;
+ case IONIC_RC_EAGAIN:
+ return -EAGAIN;
+ case IONIC_RC_ENOMEM:
+ return -ENOMEM;
+ case IONIC_RC_EFAULT:
+ return -EFAULT;
+ case IONIC_RC_EBUSY:
+ return -EBUSY;
+ case IONIC_RC_EEXIST:
+ return -EEXIST;
+ case IONIC_RC_ENOSPC:
+ return -ENOSPC;
+ case IONIC_RC_ERANGE:
+ return -ERANGE;
+ case IONIC_RC_BAD_ADDR:
+ return -EFAULT;
+ case IONIC_RC_EOPCODE:
+ case IONIC_RC_EINTR:
+ case IONIC_RC_DEV_CMD:
+ case IONIC_RC_ERROR:
+ case IONIC_RC_ERDMA:
+ case IONIC_RC_EIO:
+ default:
+ return -EIO;
+ }
+}
+
+static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode)
+{
+ switch (opcode) {
+ case IONIC_CMD_NOP:
+ return "IONIC_CMD_NOP";
+ case IONIC_CMD_INIT:
+ return "IONIC_CMD_INIT";
+ case IONIC_CMD_RESET:
+ return "IONIC_CMD_RESET";
+ case IONIC_CMD_IDENTIFY:
+ return "IONIC_CMD_IDENTIFY";
+ case IONIC_CMD_GETATTR:
+ return "IONIC_CMD_GETATTR";
+ case IONIC_CMD_SETATTR:
+ return "IONIC_CMD_SETATTR";
+ case IONIC_CMD_PORT_IDENTIFY:
+ return "IONIC_CMD_PORT_IDENTIFY";
+ case IONIC_CMD_PORT_INIT:
+ return "IONIC_CMD_PORT_INIT";
+ case IONIC_CMD_PORT_RESET:
+ return "IONIC_CMD_PORT_RESET";
+ case IONIC_CMD_PORT_GETATTR:
+ return "IONIC_CMD_PORT_GETATTR";
+ case IONIC_CMD_PORT_SETATTR:
+ return "IONIC_CMD_PORT_SETATTR";
+ case IONIC_CMD_LIF_INIT:
+ return "IONIC_CMD_LIF_INIT";
+ case IONIC_CMD_LIF_RESET:
+ return "IONIC_CMD_LIF_RESET";
+ case IONIC_CMD_LIF_IDENTIFY:
+ return "IONIC_CMD_LIF_IDENTIFY";
+ case IONIC_CMD_LIF_SETATTR:
+ return "IONIC_CMD_LIF_SETATTR";
+ case IONIC_CMD_LIF_GETATTR:
+ return "IONIC_CMD_LIF_GETATTR";
+ case IONIC_CMD_RX_MODE_SET:
+ return "IONIC_CMD_RX_MODE_SET";
+ case IONIC_CMD_RX_FILTER_ADD:
+ return "IONIC_CMD_RX_FILTER_ADD";
+ case IONIC_CMD_RX_FILTER_DEL:
+ return "IONIC_CMD_RX_FILTER_DEL";
+ case IONIC_CMD_Q_INIT:
+ return "IONIC_CMD_Q_INIT";
+ case IONIC_CMD_Q_CONTROL:
+ return "IONIC_CMD_Q_CONTROL";
+ case IONIC_CMD_RDMA_RESET_LIF:
+ return "IONIC_CMD_RDMA_RESET_LIF";
+ case IONIC_CMD_RDMA_CREATE_EQ:
+ return "IONIC_CMD_RDMA_CREATE_EQ";
+ case IONIC_CMD_RDMA_CREATE_CQ:
+ return "IONIC_CMD_RDMA_CREATE_CQ";
+ case IONIC_CMD_RDMA_CREATE_ADMINQ:
+ return "IONIC_CMD_RDMA_CREATE_ADMINQ";
+ case IONIC_CMD_FW_DOWNLOAD:
+ return "IONIC_CMD_FW_DOWNLOAD";
+ case IONIC_CMD_FW_CONTROL:
+ return "IONIC_CMD_FW_CONTROL";
+ default:
+ return "DEVCMD_UNKNOWN";
+ }
+}
+
+static void ionic_adminq_flush(struct ionic_lif *lif)
+{
+ struct ionic_queue *adminq = &lif->adminqcq->q;
+
+ spin_lock(&lif->adminq_lock);
+
+ while (adminq->tail != adminq->head) {
+ memset(adminq->tail->desc, 0, sizeof(union ionic_adminq_cmd));
+ adminq->tail->cb = NULL;
+ adminq->tail->cb_arg = NULL;
+ adminq->tail = adminq->tail->next;
+ }
+ spin_unlock(&lif->adminq_lock);
+}
+
+static int ionic_adminq_check_err(struct ionic_lif *lif,
+ struct ionic_admin_ctx *ctx,
+ bool timeout)
+{
+ struct net_device *netdev = lif->netdev;
+ const char *opcode_str;
+ const char *status_str;
+ int err = 0;
+
+ if (ctx->comp.comp.status || timeout) {
+ opcode_str = ionic_opcode_to_str(ctx->cmd.cmd.opcode);
+ status_str = ionic_error_to_str(ctx->comp.comp.status);
+ err = timeout ? -ETIMEDOUT :
+ ionic_error_to_errno(ctx->comp.comp.status);
+
+ netdev_err(netdev, "%s (%d) failed: %s (%d)\n",
+ opcode_str, ctx->cmd.cmd.opcode,
+ timeout ? "TIMEOUT" : status_str, err);
+
+ if (timeout)
+ ionic_adminq_flush(lif);
+ }
+
+ return err;
+}
+
+static void ionic_adminq_cb(struct ionic_queue *q,
+ struct ionic_desc_info *desc_info,
+ struct ionic_cq_info *cq_info, void *cb_arg)
+{
+ struct ionic_admin_ctx *ctx = cb_arg;
+ struct ionic_admin_comp *comp;
+ struct device *dev;
+
+ if (!ctx)
+ return;
+
+ comp = cq_info->cq_desc;
+ dev = &q->lif->netdev->dev;
+
+ memcpy(&ctx->comp, comp, sizeof(*comp));
+
+ dev_dbg(dev, "comp admin queue command:\n");
+ dynamic_hex_dump("comp ", DUMP_PREFIX_OFFSET, 16, 1,
+ &ctx->comp, sizeof(ctx->comp), true);
+
+ complete_all(&ctx->work);
+}
+
+static int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
+{
+ struct ionic_queue *adminq = &lif->adminqcq->q;
+ int err = 0;
+
+ WARN_ON(in_interrupt());
+
+ spin_lock(&lif->adminq_lock);
+ if (!ionic_q_has_space(adminq, 1)) {
+ err = -ENOSPC;
+ goto err_out;
+ }
+
+ memcpy(adminq->head->desc, &ctx->cmd, sizeof(ctx->cmd));
+
+ dev_dbg(&lif->netdev->dev, "post admin queue command:\n");
+ dynamic_hex_dump("cmd ", DUMP_PREFIX_OFFSET, 16, 1,
+ &ctx->cmd, sizeof(ctx->cmd), true);
+
+ ionic_q_post(adminq, true, ionic_adminq_cb, ctx);
+
+err_out:
+ spin_unlock(&lif->adminq_lock);
+
+ return err;
+}
+
+int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
+{
+ struct net_device *netdev = lif->netdev;
+ unsigned long remaining;
+ const char *name;
+ int err;
+
+ err = ionic_adminq_post(lif, ctx);
+ if (err) {
+ name = ionic_opcode_to_str(ctx->cmd.cmd.opcode);
+ netdev_err(netdev, "Posting of %s (%d) failed: %d\n",
+ name, ctx->cmd.cmd.opcode, err);
+ return err;
+ }
+
+ remaining = wait_for_completion_timeout(&ctx->work,
+ HZ * (ulong)DEVCMD_TIMEOUT);
+ return ionic_adminq_check_err(lif, ctx, (remaining == 0));
+}
+
+int ionic_napi(struct napi_struct *napi, int budget, ionic_cq_cb cb,
+ ionic_cq_done_cb done_cb, void *done_arg)
+{
+ struct ionic_qcq *qcq = napi_to_qcq(napi);
+ struct ionic_cq *cq = &qcq->cq;
+ u32 work_done, flags = 0;
+
+ work_done = ionic_cq_service(cq, budget, cb, done_cb, done_arg);
+
+ if (work_done < budget && napi_complete_done(napi, work_done)) {
+ flags |= IONIC_INTR_CRED_UNMASK;
+ DEBUG_STATS_INTR_REARM(cq->bound_intr);
+ }
+
+ if (work_done || flags) {
+ flags |= IONIC_INTR_CRED_RESET_COALESCE;
+ ionic_intr_credits(cq->lif->ionic->idev.intr_ctrl,
+ cq->bound_intr->index,
+ work_done, flags);
+ }
+
+ DEBUG_STATS_NAPI_POLL(qcq, work_done);
+
+ return work_done;
+}
+
+int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
+{
+ struct ionic_dev *idev = &ionic->idev;
+ unsigned long start_time;
+ unsigned long max_wait;
+ unsigned long duration;
+ int opcode;
+ int done;
+ int err;
+
+ WARN_ON(in_interrupt());
+
+ /* Wait for dev cmd to complete, retrying if we get EAGAIN,
+ * but don't wait any longer than max_seconds.
+ */
+ max_wait = jiffies + (max_seconds * HZ);
+try_again:
+ start_time = jiffies;
+ do {
+ done = ionic_dev_cmd_done(idev);
+ if (done)
+ break;
+ msleep(20);
+ } while (!done && time_before(jiffies, max_wait));
+ duration = jiffies - start_time;
+
+ opcode = idev->dev_cmd_regs->cmd.cmd.opcode;
+ dev_dbg(ionic->dev, "DEVCMD %s (%d) done=%d took %ld secs (%ld jiffies)\n",
+ ionic_opcode_to_str(opcode), opcode,
+ done, duration / HZ, duration);
+
+ if (!done && !time_before(jiffies, max_wait)) {
+ dev_warn(ionic->dev, "DEVCMD %s (%d) timeout after %ld secs\n",
+ ionic_opcode_to_str(opcode), opcode, max_seconds);
+ return -ETIMEDOUT;
+ }
+
+ err = ionic_dev_cmd_status(&ionic->idev);
+ if (err) {
+ if (err == IONIC_RC_EAGAIN && !time_after(jiffies, max_wait)) {
+ dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) retrying...\n",
+ ionic_opcode_to_str(opcode), opcode,
+ ionic_error_to_str(err), err);
+
+ msleep(1000);
+ iowrite32(0, &idev->dev_cmd_regs->done);
+ iowrite32(1, &idev->dev_cmd_regs->doorbell);
+ goto try_again;
+ }
+
+ dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) failed\n",
+ ionic_opcode_to_str(opcode), opcode,
+ ionic_error_to_str(err), err);
+
+ return ionic_error_to_errno(err);
+ }
+
+ return 0;
+}
+
+int ionic_setup(struct ionic *ionic)
+{
+ int err;
+
+ err = ionic_dev_setup(ionic);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+int ionic_identify(struct ionic *ionic)
+{
+ struct ionic_identity *ident = &ionic->ident;
+ struct ionic_dev *idev = &ionic->idev;
+ size_t sz;
+ int err;
+
+ memset(ident, 0, sizeof(*ident));
+
+ ident->drv.os_type = cpu_to_le32(IONIC_OS_TYPE_LINUX);
+ strncpy(ident->drv.driver_ver_str, IONIC_DRV_VERSION,
+ sizeof(ident->drv.driver_ver_str) - 1);
+
+ mutex_lock(&ionic->dev_cmd_lock);
+
+ sz = min(sizeof(ident->drv), sizeof(idev->dev_cmd_regs->data));
+ memcpy_toio(&idev->dev_cmd_regs->data, &ident->drv, sz);
+
+ ionic_dev_cmd_identify(idev, IONIC_IDENTITY_VERSION_1);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+ if (!err) {
+ sz = min(sizeof(ident->dev), sizeof(idev->dev_cmd_regs->data));
+ memcpy_fromio(&ident->dev, &idev->dev_cmd_regs->data, sz);
+ }
+
+ mutex_unlock(&ionic->dev_cmd_lock);
+
+ if (err)
+ goto err_out_unmap;
+
+ ionic_debugfs_add_ident(ionic);
+
+ return 0;
+
+err_out_unmap:
+ return err;
+}
+
+int ionic_init(struct ionic *ionic)
+{
+ struct ionic_dev *idev = &ionic->idev;
+ int err;
+
+ mutex_lock(&ionic->dev_cmd_lock);
+ ionic_dev_cmd_init(idev);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+ mutex_unlock(&ionic->dev_cmd_lock);
+
+ return err;
+}
+
+int ionic_reset(struct ionic *ionic)
+{
+ struct ionic_dev *idev = &ionic->idev;
+ int err;
+
+ mutex_lock(&ionic->dev_cmd_lock);
+ ionic_dev_cmd_reset(idev);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+ mutex_unlock(&ionic->dev_cmd_lock);
+
+ return err;
+}
+
+int ionic_port_identify(struct ionic *ionic)
+{
+ struct ionic_identity *ident = &ionic->ident;
+ struct ionic_dev *idev = &ionic->idev;
+ size_t sz;
+ int err;
+
+ mutex_lock(&ionic->dev_cmd_lock);
+
+ ionic_dev_cmd_port_identify(idev);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+ if (!err) {
+ sz = min(sizeof(ident->port), sizeof(idev->dev_cmd_regs->data));
+ memcpy_fromio(&ident->port, &idev->dev_cmd_regs->data, sz);
+ }
+
+ mutex_unlock(&ionic->dev_cmd_lock);
+
+ return err;
+}
+
+int ionic_port_init(struct ionic *ionic)
+{
+ struct ionic_identity *ident = &ionic->ident;
+ struct ionic_dev *idev = &ionic->idev;
+ size_t sz;
+ int err;
+
+ if (idev->port_info)
+ return 0;
+
+ idev->port_info_sz = ALIGN(sizeof(*idev->port_info), PAGE_SIZE);
+ idev->port_info = dma_alloc_coherent(ionic->dev, idev->port_info_sz,
+ &idev->port_info_pa,
+ GFP_KERNEL);
+ if (!idev->port_info) {
+ dev_err(ionic->dev, "Failed to allocate port info, aborting\n");
+ return -ENOMEM;
+ }
+
+ sz = min(sizeof(ident->port.config), sizeof(idev->dev_cmd_regs->data));
+
+ mutex_lock(&ionic->dev_cmd_lock);
+
+ memcpy_toio(&idev->dev_cmd_regs->data, &ident->port.config, sz);
+ ionic_dev_cmd_port_init(idev);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+
+ ionic_dev_cmd_port_state(&ionic->idev, IONIC_PORT_ADMIN_STATE_UP);
+ (void)ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+
+ mutex_unlock(&ionic->dev_cmd_lock);
+ if (err) {
+ dev_err(ionic->dev, "Failed to init port\n");
+ dma_free_coherent(ionic->dev, idev->port_info_sz,
+ idev->port_info, idev->port_info_pa);
+ idev->port_info = NULL;
+ idev->port_info_pa = 0;
+ }
+
+ return err;
+}
+
+int ionic_port_reset(struct ionic *ionic)
+{
+ struct ionic_dev *idev = &ionic->idev;
+ int err;
+
+ if (!idev->port_info)
+ return 0;
+
+ mutex_lock(&ionic->dev_cmd_lock);
+ ionic_dev_cmd_port_reset(idev);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+ mutex_unlock(&ionic->dev_cmd_lock);
+
+ dma_free_coherent(ionic->dev, idev->port_info_sz,
+ idev->port_info, idev->port_info_pa);
+
+ idev->port_info = NULL;
+ idev->port_info_pa = 0;
+
+ if (err)
+ dev_err(ionic->dev, "Failed to reset port\n");
+
+ return err;
+}
+
+static int __init ionic_init_module(void)
+{
+ pr_info("%s %s, ver %s\n",
+ IONIC_DRV_NAME, IONIC_DRV_DESCRIPTION, IONIC_DRV_VERSION);
+ ionic_debugfs_create();
+ return ionic_bus_register_driver();
+}
+
+static void __exit ionic_cleanup_module(void)
+{
+ ionic_bus_unregister_driver();
+ ionic_debugfs_destroy();
+
+ pr_info("%s removed\n", IONIC_DRV_NAME);
+}
+
+module_init(ionic_init_module);
+module_exit(ionic_cleanup_module);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_regs.h b/drivers/net/ethernet/pensando/ionic/ionic_regs.h
new file mode 100644
index 000000000000..03ee5a36472b
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_regs.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB OR BSD-2-Clause */
+/* Copyright (c) 2018-2019 Pensando Systems, Inc. All rights reserved. */
+
+#ifndef IONIC_REGS_H
+#define IONIC_REGS_H
+
+#include <linux/io.h>
+
+/** struct ionic_intr - interrupt control register set.
+ * @coal_init: coalesce timer initial value.
+ * @mask: interrupt mask value.
+ * @credits: interrupt credit count and return.
+ * @mask_assert: interrupt mask value on assert.
+ * @coal: coalesce timer time remaining.
+ */
+struct ionic_intr {
+ u32 coal_init;
+ u32 mask;
+ u32 credits;
+ u32 mask_assert;
+ u32 coal;
+ u32 rsvd[3];
+};
+
+#define IONIC_INTR_CTRL_REGS_MAX 2048
+#define IONIC_INTR_CTRL_COAL_MAX 0x3F
+
+/** enum ionic_intr_mask_vals - valid values for mask and mask_assert.
+ * @IONIC_INTR_MASK_CLEAR: unmask interrupt.
+ * @IONIC_INTR_MASK_SET: mask interrupt.
+ */
+enum ionic_intr_mask_vals {
+ IONIC_INTR_MASK_CLEAR = 0,
+ IONIC_INTR_MASK_SET = 1,
+};
+
+/** enum ionic_intr_credits_bits - bitwise composition of credits values.
+ * @IONIC_INTR_CRED_COUNT: bit mask of credit count, no shift needed.
+ * @IONIC_INTR_CRED_COUNT_SIGNED: bit mask of credit count, including sign bit.
+ * @IONIC_INTR_CRED_UNMASK: unmask the interrupt.
+ * @IONIC_INTR_CRED_RESET_COALESCE: reset the coalesce timer.
+ * @IONIC_INTR_CRED_REARM: unmask the and reset the timer.
+ */
+enum ionic_intr_credits_bits {
+ IONIC_INTR_CRED_COUNT = 0x7fffu,
+ IONIC_INTR_CRED_COUNT_SIGNED = 0xffffu,
+ IONIC_INTR_CRED_UNMASK = 0x10000u,
+ IONIC_INTR_CRED_RESET_COALESCE = 0x20000u,
+ IONIC_INTR_CRED_REARM = (IONIC_INTR_CRED_UNMASK |
+ IONIC_INTR_CRED_RESET_COALESCE),
+};
+
+static inline void ionic_intr_coal_init(struct ionic_intr __iomem *intr_ctrl,
+ int intr_idx, u32 coal)
+{
+ iowrite32(coal, &intr_ctrl[intr_idx].coal_init);
+}
+
+static inline void ionic_intr_mask(struct ionic_intr __iomem *intr_ctrl,
+ int intr_idx, u32 mask)
+{
+ iowrite32(mask, &intr_ctrl[intr_idx].mask);
+}
+
+static inline void ionic_intr_credits(struct ionic_intr __iomem *intr_ctrl,
+ int intr_idx, u32 cred, u32 flags)
+{
+ if (WARN_ON_ONCE(cred > IONIC_INTR_CRED_COUNT)) {
+ cred = ioread32(&intr_ctrl[intr_idx].credits);
+ cred &= IONIC_INTR_CRED_COUNT_SIGNED;
+ }
+
+ iowrite32(cred | flags, &intr_ctrl[intr_idx].credits);
+}
+
+static inline void ionic_intr_clean(struct ionic_intr __iomem *intr_ctrl,
+ int intr_idx)
+{
+ u32 cred;
+
+ cred = ioread32(&intr_ctrl[intr_idx].credits);
+ cred &= IONIC_INTR_CRED_COUNT_SIGNED;
+ cred |= IONIC_INTR_CRED_RESET_COALESCE;
+ iowrite32(cred, &intr_ctrl[intr_idx].credits);
+}
+
+static inline void ionic_intr_mask_assert(struct ionic_intr __iomem *intr_ctrl,
+ int intr_idx, u32 mask)
+{
+ iowrite32(mask, &intr_ctrl[intr_idx].mask_assert);
+}
+
+/** enum ionic_dbell_bits - bitwise composition of dbell values.
+ *
+ * @IONIC_DBELL_QID_MASK: unshifted mask of valid queue id bits.
+ * @IONIC_DBELL_QID_SHIFT: queue id shift amount in dbell value.
+ * @IONIC_DBELL_QID: macro to build QID component of dbell value.
+ *
+ * @IONIC_DBELL_RING_MASK: unshifted mask of valid ring bits.
+ * @IONIC_DBELL_RING_SHIFT: ring shift amount in dbell value.
+ * @IONIC_DBELL_RING: macro to build ring component of dbell value.
+ *
+ * @IONIC_DBELL_RING_0: ring zero dbell component value.
+ * @IONIC_DBELL_RING_1: ring one dbell component value.
+ * @IONIC_DBELL_RING_2: ring two dbell component value.
+ * @IONIC_DBELL_RING_3: ring three dbell component value.
+ *
+ * @IONIC_DBELL_INDEX_MASK: bit mask of valid index bits, no shift needed.
+ */
+enum ionic_dbell_bits {
+ IONIC_DBELL_QID_MASK = 0xffffff,
+ IONIC_DBELL_QID_SHIFT = 24,
+
+#define IONIC_DBELL_QID(n) \
+ (((u64)(n) & IONIC_DBELL_QID_MASK) << IONIC_DBELL_QID_SHIFT)
+
+ IONIC_DBELL_RING_MASK = 0x7,
+ IONIC_DBELL_RING_SHIFT = 16,
+
+#define IONIC_DBELL_RING(n) \
+ (((u64)(n) & IONIC_DBELL_RING_MASK) << IONIC_DBELL_RING_SHIFT)
+
+ IONIC_DBELL_RING_0 = 0,
+ IONIC_DBELL_RING_1 = IONIC_DBELL_RING(1),
+ IONIC_DBELL_RING_2 = IONIC_DBELL_RING(2),
+ IONIC_DBELL_RING_3 = IONIC_DBELL_RING(3),
+
+ IONIC_DBELL_INDEX_MASK = 0xffff,
+};
+
+static inline void ionic_dbell_ring(u64 __iomem *db_page, int qtype, u64 val)
+{
+ writeq(val, &db_page[qtype]);
+}
+
+#endif /* IONIC_REGS_H */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
new file mode 100644
index 000000000000..7a093f148ee5
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+
+#include "ionic.h"
+#include "ionic_lif.h"
+#include "ionic_rx_filter.h"
+
+void ionic_rx_filter_free(struct ionic_lif *lif, struct ionic_rx_filter *f)
+{
+ struct device *dev = lif->ionic->dev;
+
+ hlist_del(&f->by_id);
+ hlist_del(&f->by_hash);
+ devm_kfree(dev, f);
+}
+
+int ionic_rx_filter_del(struct ionic_lif *lif, struct ionic_rx_filter *f)
+{
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.rx_filter_del = {
+ .opcode = IONIC_CMD_RX_FILTER_DEL,
+ .filter_id = cpu_to_le32(f->filter_id),
+ },
+ };
+
+ return ionic_adminq_post_wait(lif, &ctx);
+}
+
+int ionic_rx_filters_init(struct ionic_lif *lif)
+{
+ unsigned int i;
+
+ spin_lock_init(&lif->rx_filters.lock);
+
+ for (i = 0; i < IONIC_RX_FILTER_HLISTS; i++) {
+ INIT_HLIST_HEAD(&lif->rx_filters.by_hash[i]);
+ INIT_HLIST_HEAD(&lif->rx_filters.by_id[i]);
+ }
+
+ return 0;
+}
+
+void ionic_rx_filters_deinit(struct ionic_lif *lif)
+{
+ struct ionic_rx_filter *f;
+ struct hlist_head *head;
+ struct hlist_node *tmp;
+ unsigned int i;
+
+ for (i = 0; i < IONIC_RX_FILTER_HLISTS; i++) {
+ head = &lif->rx_filters.by_id[i];
+ hlist_for_each_entry_safe(f, tmp, head, by_id)
+ ionic_rx_filter_free(lif, f);
+ }
+}
+
+int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index,
+ u32 hash, struct ionic_admin_ctx *ctx)
+{
+ struct device *dev = lif->ionic->dev;
+ struct ionic_rx_filter_add_cmd *ac;
+ struct ionic_rx_filter *f;
+ struct hlist_head *head;
+ unsigned int key;
+
+ ac = &ctx->cmd.rx_filter_add;
+
+ switch (le16_to_cpu(ac->match)) {
+ case IONIC_RX_FILTER_MATCH_VLAN:
+ key = le16_to_cpu(ac->vlan.vlan);
+ break;
+ case IONIC_RX_FILTER_MATCH_MAC:
+ key = *(u32 *)ac->mac.addr;
+ break;
+ case IONIC_RX_FILTER_MATCH_MAC_VLAN:
+ key = le16_to_cpu(ac->mac_vlan.vlan);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ f = devm_kzalloc(dev, sizeof(*f), GFP_KERNEL);
+ if (!f)
+ return -ENOMEM;
+
+ f->flow_id = flow_id;
+ f->filter_id = le32_to_cpu(ctx->comp.rx_filter_add.filter_id);
+ f->rxq_index = rxq_index;
+ memcpy(&f->cmd, ac, sizeof(f->cmd));
+
+ INIT_HLIST_NODE(&f->by_hash);
+ INIT_HLIST_NODE(&f->by_id);
+
+ spin_lock_bh(&lif->rx_filters.lock);
+
+ key = hash_32(key, IONIC_RX_FILTER_HASH_BITS);
+ head = &lif->rx_filters.by_hash[key];
+ hlist_add_head(&f->by_hash, head);
+
+ key = f->filter_id & IONIC_RX_FILTER_HLISTS_MASK;
+ head = &lif->rx_filters.by_id[key];
+ hlist_add_head(&f->by_id, head);
+
+ spin_unlock_bh(&lif->rx_filters.lock);
+
+ return 0;
+}
+
+struct ionic_rx_filter *ionic_rx_filter_by_vlan(struct ionic_lif *lif, u16 vid)
+{
+ struct ionic_rx_filter *f;
+ struct hlist_head *head;
+ unsigned int key;
+
+ key = hash_32(vid, IONIC_RX_FILTER_HASH_BITS);
+ head = &lif->rx_filters.by_hash[key];
+
+ hlist_for_each_entry(f, head, by_hash) {
+ if (le16_to_cpu(f->cmd.match) != IONIC_RX_FILTER_MATCH_VLAN)
+ continue;
+ if (le16_to_cpu(f->cmd.vlan.vlan) == vid)
+ return f;
+ }
+
+ return NULL;
+}
+
+struct ionic_rx_filter *ionic_rx_filter_by_addr(struct ionic_lif *lif,
+ const u8 *addr)
+{
+ struct ionic_rx_filter *f;
+ struct hlist_head *head;
+ unsigned int key;
+
+ key = hash_32(*(u32 *)addr, IONIC_RX_FILTER_HASH_BITS);
+ head = &lif->rx_filters.by_hash[key];
+
+ hlist_for_each_entry(f, head, by_hash) {
+ if (le16_to_cpu(f->cmd.match) != IONIC_RX_FILTER_MATCH_MAC)
+ continue;
+ if (memcmp(addr, f->cmd.mac.addr, ETH_ALEN) == 0)
+ return f;
+ }
+
+ return NULL;
+}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h
new file mode 100644
index 000000000000..b6aec9c19918
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#ifndef _IONIC_RX_FILTER_H_
+#define _IONIC_RX_FILTER_H_
+
+#define IONIC_RXQ_INDEX_ANY (0xFFFF)
+struct ionic_rx_filter {
+ u32 flow_id;
+ u32 filter_id;
+ u16 rxq_index;
+ struct ionic_rx_filter_add_cmd cmd;
+ struct hlist_node by_hash;
+ struct hlist_node by_id;
+};
+
+#define IONIC_RX_FILTER_HASH_BITS 10
+#define IONIC_RX_FILTER_HLISTS BIT(IONIC_RX_FILTER_HASH_BITS)
+#define IONIC_RX_FILTER_HLISTS_MASK (IONIC_RX_FILTER_HLISTS - 1)
+struct ionic_rx_filters {
+ spinlock_t lock; /* filter list lock */
+ struct hlist_head by_hash[IONIC_RX_FILTER_HLISTS]; /* by skb hash */
+ struct hlist_head by_id[IONIC_RX_FILTER_HLISTS]; /* by filter_id */
+};
+
+void ionic_rx_filter_free(struct ionic_lif *lif, struct ionic_rx_filter *f);
+int ionic_rx_filter_del(struct ionic_lif *lif, struct ionic_rx_filter *f);
+int ionic_rx_filters_init(struct ionic_lif *lif);
+void ionic_rx_filters_deinit(struct ionic_lif *lif);
+int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index,
+ u32 hash, struct ionic_admin_ctx *ctx);
+struct ionic_rx_filter *ionic_rx_filter_by_vlan(struct ionic_lif *lif, u16 vid);
+struct ionic_rx_filter *ionic_rx_filter_by_addr(struct ionic_lif *lif, const u8 *addr);
+
+#endif /* _IONIC_RX_FILTER_H_ */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
new file mode 100644
index 000000000000..e2907884f843
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+
+#include "ionic.h"
+#include "ionic_lif.h"
+#include "ionic_stats.h"
+
+static const struct ionic_stat_desc ionic_lif_stats_desc[] = {
+ IONIC_LIF_STAT_DESC(tx_packets),
+ IONIC_LIF_STAT_DESC(tx_bytes),
+ IONIC_LIF_STAT_DESC(rx_packets),
+ IONIC_LIF_STAT_DESC(rx_bytes),
+ IONIC_LIF_STAT_DESC(tx_tso),
+ IONIC_LIF_STAT_DESC(tx_no_csum),
+ IONIC_LIF_STAT_DESC(tx_csum),
+ IONIC_LIF_STAT_DESC(rx_csum_none),
+ IONIC_LIF_STAT_DESC(rx_csum_complete),
+ IONIC_LIF_STAT_DESC(rx_csum_error),
+};
+
+static const struct ionic_stat_desc ionic_tx_stats_desc[] = {
+ IONIC_TX_STAT_DESC(pkts),
+ IONIC_TX_STAT_DESC(bytes),
+ IONIC_TX_STAT_DESC(clean),
+ IONIC_TX_STAT_DESC(dma_map_err),
+ IONIC_TX_STAT_DESC(linearize),
+ IONIC_TX_STAT_DESC(frags),
+};
+
+static const struct ionic_stat_desc ionic_rx_stats_desc[] = {
+ IONIC_RX_STAT_DESC(pkts),
+ IONIC_RX_STAT_DESC(bytes),
+ IONIC_RX_STAT_DESC(dma_map_err),
+ IONIC_RX_STAT_DESC(alloc_err),
+ IONIC_RX_STAT_DESC(csum_none),
+ IONIC_RX_STAT_DESC(csum_complete),
+ IONIC_RX_STAT_DESC(csum_error),
+};
+
+static const struct ionic_stat_desc ionic_txq_stats_desc[] = {
+ IONIC_TX_Q_STAT_DESC(stop),
+ IONIC_TX_Q_STAT_DESC(wake),
+ IONIC_TX_Q_STAT_DESC(drop),
+ IONIC_TX_Q_STAT_DESC(dbell_count),
+};
+
+static const struct ionic_stat_desc ionic_dbg_cq_stats_desc[] = {
+ IONIC_CQ_STAT_DESC(compl_count),
+};
+
+static const struct ionic_stat_desc ionic_dbg_intr_stats_desc[] = {
+ IONIC_INTR_STAT_DESC(rearm_count),
+};
+
+static const struct ionic_stat_desc ionic_dbg_napi_stats_desc[] = {
+ IONIC_NAPI_STAT_DESC(poll_count),
+};
+
+#define IONIC_NUM_LIF_STATS ARRAY_SIZE(ionic_lif_stats_desc)
+#define IONIC_NUM_TX_STATS ARRAY_SIZE(ionic_tx_stats_desc)
+#define IONIC_NUM_RX_STATS ARRAY_SIZE(ionic_rx_stats_desc)
+#define IONIC_NUM_TX_Q_STATS ARRAY_SIZE(ionic_txq_stats_desc)
+#define IONIC_NUM_DBG_CQ_STATS ARRAY_SIZE(ionic_dbg_cq_stats_desc)
+#define IONIC_NUM_DBG_INTR_STATS ARRAY_SIZE(ionic_dbg_intr_stats_desc)
+#define IONIC_NUM_DBG_NAPI_STATS ARRAY_SIZE(ionic_dbg_napi_stats_desc)
+
+#define MAX_Q(lif) ((lif)->netdev->real_num_tx_queues)
+
+static void ionic_get_lif_stats(struct ionic_lif *lif,
+ struct ionic_lif_sw_stats *stats)
+{
+ struct ionic_tx_stats *tstats;
+ struct ionic_rx_stats *rstats;
+ struct ionic_qcq *txqcq;
+ struct ionic_qcq *rxqcq;
+ int q_num;
+
+ memset(stats, 0, sizeof(*stats));
+
+ for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
+ txqcq = lif_to_txqcq(lif, q_num);
+ if (txqcq && txqcq->stats) {
+ tstats = &txqcq->stats->tx;
+ stats->tx_packets += tstats->pkts;
+ stats->tx_bytes += tstats->bytes;
+ stats->tx_tso += tstats->tso;
+ stats->tx_no_csum += tstats->no_csum;
+ stats->tx_csum += tstats->csum;
+ }
+
+ rxqcq = lif_to_rxqcq(lif, q_num);
+ if (rxqcq && rxqcq->stats) {
+ rstats = &rxqcq->stats->rx;
+ stats->rx_packets += rstats->pkts;
+ stats->rx_bytes += rstats->bytes;
+ stats->rx_csum_none += rstats->csum_none;
+ stats->rx_csum_complete += rstats->csum_complete;
+ stats->rx_csum_error += rstats->csum_error;
+ }
+ }
+}
+
+static u64 ionic_sw_stats_get_count(struct ionic_lif *lif)
+{
+ u64 total = 0;
+
+ /* lif stats */
+ total += IONIC_NUM_LIF_STATS;
+
+ /* tx stats */
+ total += MAX_Q(lif) * IONIC_NUM_TX_STATS;
+
+ /* rx stats */
+ total += MAX_Q(lif) * IONIC_NUM_RX_STATS;
+
+ if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+ /* tx debug stats */
+ total += MAX_Q(lif) * (IONIC_NUM_DBG_CQ_STATS +
+ IONIC_NUM_TX_Q_STATS +
+ IONIC_NUM_DBG_INTR_STATS +
+ IONIC_MAX_NUM_SG_CNTR);
+
+ /* rx debug stats */
+ total += MAX_Q(lif) * (IONIC_NUM_DBG_CQ_STATS +
+ IONIC_NUM_DBG_INTR_STATS +
+ IONIC_NUM_DBG_NAPI_STATS +
+ IONIC_MAX_NUM_NAPI_CNTR);
+ }
+
+ return total;
+}
+
+static void ionic_sw_stats_get_strings(struct ionic_lif *lif, u8 **buf)
+{
+ int i, q_num;
+
+ for (i = 0; i < IONIC_NUM_LIF_STATS; i++) {
+ snprintf(*buf, ETH_GSTRING_LEN, ionic_lif_stats_desc[i].name);
+ *buf += ETH_GSTRING_LEN;
+ }
+ for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
+ for (i = 0; i < IONIC_NUM_TX_STATS; i++) {
+ snprintf(*buf, ETH_GSTRING_LEN, "tx_%d_%s",
+ q_num, ionic_tx_stats_desc[i].name);
+ *buf += ETH_GSTRING_LEN;
+ }
+
+ if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+ for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) {
+ snprintf(*buf, ETH_GSTRING_LEN,
+ "txq_%d_%s",
+ q_num,
+ ionic_txq_stats_desc[i].name);
+ *buf += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
+ snprintf(*buf, ETH_GSTRING_LEN,
+ "txq_%d_cq_%s",
+ q_num,
+ ionic_dbg_cq_stats_desc[i].name);
+ *buf += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) {
+ snprintf(*buf, ETH_GSTRING_LEN,
+ "txq_%d_intr_%s",
+ q_num,
+ ionic_dbg_intr_stats_desc[i].name);
+ *buf += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) {
+ snprintf(*buf, ETH_GSTRING_LEN,
+ "txq_%d_sg_cntr_%d",
+ q_num, i);
+ *buf += ETH_GSTRING_LEN;
+ }
+ }
+ }
+ for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
+ for (i = 0; i < IONIC_NUM_RX_STATS; i++) {
+ snprintf(*buf, ETH_GSTRING_LEN,
+ "rx_%d_%s",
+ q_num, ionic_rx_stats_desc[i].name);
+ *buf += ETH_GSTRING_LEN;
+ }
+
+ if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+ for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
+ snprintf(*buf, ETH_GSTRING_LEN,
+ "rxq_%d_cq_%s",
+ q_num,
+ ionic_dbg_cq_stats_desc[i].name);
+ *buf += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) {
+ snprintf(*buf, ETH_GSTRING_LEN,
+ "rxq_%d_intr_%s",
+ q_num,
+ ionic_dbg_intr_stats_desc[i].name);
+ *buf += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) {
+ snprintf(*buf, ETH_GSTRING_LEN,
+ "rxq_%d_napi_%s",
+ q_num,
+ ionic_dbg_napi_stats_desc[i].name);
+ *buf += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) {
+ snprintf(*buf, ETH_GSTRING_LEN,
+ "rxq_%d_napi_work_done_%d",
+ q_num, i);
+ *buf += ETH_GSTRING_LEN;
+ }
+ }
+ }
+}
+
+static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
+{
+ struct ionic_lif_sw_stats lif_stats;
+ struct ionic_qcq *txqcq, *rxqcq;
+ int i, q_num;
+
+ ionic_get_lif_stats(lif, &lif_stats);
+
+ for (i = 0; i < IONIC_NUM_LIF_STATS; i++) {
+ **buf = IONIC_READ_STAT64(&lif_stats, &ionic_lif_stats_desc[i]);
+ (*buf)++;
+ }
+
+ for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
+ txqcq = lif_to_txqcq(lif, q_num);
+
+ for (i = 0; i < IONIC_NUM_TX_STATS; i++) {
+ **buf = IONIC_READ_STAT64(&txqcq->stats->tx,
+ &ionic_tx_stats_desc[i]);
+ (*buf)++;
+ }
+
+ if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+ for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) {
+ **buf = IONIC_READ_STAT64(&txqcq->q,
+ &ionic_txq_stats_desc[i]);
+ (*buf)++;
+ }
+ for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
+ **buf = IONIC_READ_STAT64(&txqcq->cq,
+ &ionic_dbg_cq_stats_desc[i]);
+ (*buf)++;
+ }
+ for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) {
+ **buf = IONIC_READ_STAT64(&txqcq->intr,
+ &ionic_dbg_intr_stats_desc[i]);
+ (*buf)++;
+ }
+ for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) {
+ **buf = txqcq->stats->tx.sg_cntr[i];
+ (*buf)++;
+ }
+ }
+ }
+
+ for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
+ rxqcq = lif_to_rxqcq(lif, q_num);
+
+ for (i = 0; i < IONIC_NUM_RX_STATS; i++) {
+ **buf = IONIC_READ_STAT64(&rxqcq->stats->rx,
+ &ionic_rx_stats_desc[i]);
+ (*buf)++;
+ }
+
+ if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+ for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
+ **buf = IONIC_READ_STAT64(&rxqcq->cq,
+ &ionic_dbg_cq_stats_desc[i]);
+ (*buf)++;
+ }
+ for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) {
+ **buf = IONIC_READ_STAT64(&rxqcq->intr,
+ &ionic_dbg_intr_stats_desc[i]);
+ (*buf)++;
+ }
+ for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) {
+ **buf = IONIC_READ_STAT64(&rxqcq->napi_stats,
+ &ionic_dbg_napi_stats_desc[i]);
+ (*buf)++;
+ }
+ for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) {
+ **buf = rxqcq->napi_stats.work_done_cntr[i];
+ (*buf)++;
+ }
+ }
+ }
+}
+
+const struct ionic_stats_group_intf ionic_stats_groups[] = {
+ /* SW Stats group */
+ {
+ .get_strings = ionic_sw_stats_get_strings,
+ .get_values = ionic_sw_stats_get_values,
+ .get_count = ionic_sw_stats_get_count,
+ },
+ /* Add more stat groups here */
+};
+
+const int ionic_num_stats_grps = ARRAY_SIZE(ionic_stats_groups);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.h b/drivers/net/ethernet/pensando/ionic/ionic_stats.h
new file mode 100644
index 000000000000..d2c1122a2c6e
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#ifndef _IONIC_STATS_H_
+#define _IONIC_STATS_H_
+
+#define IONIC_STAT_TO_OFFSET(type, stat_name) (offsetof(type, stat_name))
+
+#define IONIC_STAT_DESC(type, stat_name) { \
+ .name = #stat_name, \
+ .offset = IONIC_STAT_TO_OFFSET(type, stat_name) \
+}
+
+#define IONIC_LIF_STAT_DESC(stat_name) \
+ IONIC_STAT_DESC(struct ionic_lif_sw_stats, stat_name)
+
+#define IONIC_TX_STAT_DESC(stat_name) \
+ IONIC_STAT_DESC(struct ionic_tx_stats, stat_name)
+
+#define IONIC_RX_STAT_DESC(stat_name) \
+ IONIC_STAT_DESC(struct ionic_rx_stats, stat_name)
+
+#define IONIC_TX_Q_STAT_DESC(stat_name) \
+ IONIC_STAT_DESC(struct ionic_queue, stat_name)
+
+#define IONIC_CQ_STAT_DESC(stat_name) \
+ IONIC_STAT_DESC(struct ionic_cq, stat_name)
+
+#define IONIC_INTR_STAT_DESC(stat_name) \
+ IONIC_STAT_DESC(struct ionic_intr_info, stat_name)
+
+#define IONIC_NAPI_STAT_DESC(stat_name) \
+ IONIC_STAT_DESC(struct ionic_napi_stats, stat_name)
+
+/* Interface structure for a particalar stats group */
+struct ionic_stats_group_intf {
+ void (*get_strings)(struct ionic_lif *lif, u8 **buf);
+ void (*get_values)(struct ionic_lif *lif, u64 **buf);
+ u64 (*get_count)(struct ionic_lif *lif);
+};
+
+extern const struct ionic_stats_group_intf ionic_stats_groups[];
+extern const int ionic_num_stats_grps;
+
+#define IONIC_READ_STAT64(base_ptr, desc_ptr) \
+ (*((u64 *)(((u8 *)(base_ptr)) + (desc_ptr)->offset)))
+
+struct ionic_stat_desc {
+ char name[ETH_GSTRING_LEN];
+ u64 offset;
+};
+
+#endif /* _IONIC_STATS_H_ */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
new file mode 100644
index 000000000000..ab6663d94f42
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -0,0 +1,925 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/if_vlan.h>
+#include <net/ip6_checksum.h>
+
+#include "ionic.h"
+#include "ionic_lif.h"
+#include "ionic_txrx.h"
+
+static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_info,
+ struct ionic_cq_info *cq_info, void *cb_arg);
+
+static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell,
+ ionic_desc_cb cb_func, void *cb_arg)
+{
+ DEBUG_STATS_TXQ_POST(q_to_qcq(q), q->head->desc, ring_dbell);
+
+ ionic_q_post(q, ring_dbell, cb_func, cb_arg);
+}
+
+static inline void ionic_rxq_post(struct ionic_queue *q, bool ring_dbell,
+ ionic_desc_cb cb_func, void *cb_arg)
+{
+ ionic_q_post(q, ring_dbell, cb_func, cb_arg);
+
+ DEBUG_STATS_RX_BUFF_CNT(q_to_qcq(q));
+}
+
+static inline struct netdev_queue *q_to_ndq(struct ionic_queue *q)
+{
+ return netdev_get_tx_queue(q->lif->netdev, q->index);
+}
+
+static void ionic_rx_recycle(struct ionic_queue *q, struct ionic_desc_info *desc_info,
+ struct sk_buff *skb)
+{
+ struct ionic_rxq_desc *old = desc_info->desc;
+ struct ionic_rxq_desc *new = q->head->desc;
+
+ new->addr = old->addr;
+ new->len = old->len;
+
+ ionic_rxq_post(q, true, ionic_rx_clean, skb);
+}
+
+static bool ionic_rx_copybreak(struct ionic_queue *q, struct ionic_desc_info *desc_info,
+ struct ionic_cq_info *cq_info, struct sk_buff **skb)
+{
+ struct ionic_rxq_comp *comp = cq_info->cq_desc;
+ struct ionic_rxq_desc *desc = desc_info->desc;
+ struct net_device *netdev = q->lif->netdev;
+ struct device *dev = q->lif->ionic->dev;
+ struct sk_buff *new_skb;
+ u16 clen, dlen;
+
+ clen = le16_to_cpu(comp->len);
+ dlen = le16_to_cpu(desc->len);
+ if (clen > q->lif->rx_copybreak) {
+ dma_unmap_single(dev, (dma_addr_t)le64_to_cpu(desc->addr),
+ dlen, DMA_FROM_DEVICE);
+ return false;
+ }
+
+ new_skb = netdev_alloc_skb_ip_align(netdev, clen);
+ if (!new_skb) {
+ dma_unmap_single(dev, (dma_addr_t)le64_to_cpu(desc->addr),
+ dlen, DMA_FROM_DEVICE);
+ return false;
+ }
+
+ dma_sync_single_for_cpu(dev, (dma_addr_t)le64_to_cpu(desc->addr),
+ clen, DMA_FROM_DEVICE);
+
+ memcpy(new_skb->data, (*skb)->data, clen);
+
+ ionic_rx_recycle(q, desc_info, *skb);
+ *skb = new_skb;
+
+ return true;
+}
+
+static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_info,
+ struct ionic_cq_info *cq_info, void *cb_arg)
+{
+ struct ionic_rxq_comp *comp = cq_info->cq_desc;
+ struct ionic_qcq *qcq = q_to_qcq(q);
+ struct sk_buff *skb = cb_arg;
+ struct ionic_rx_stats *stats;
+ struct net_device *netdev;
+
+ stats = q_to_rx_stats(q);
+ netdev = q->lif->netdev;
+
+ if (comp->status) {
+ ionic_rx_recycle(q, desc_info, skb);
+ return;
+ }
+
+ if (unlikely(test_bit(IONIC_LIF_QUEUE_RESET, q->lif->state))) {
+ /* no packet processing while resetting */
+ ionic_rx_recycle(q, desc_info, skb);
+ return;
+ }
+
+ stats->pkts++;
+ stats->bytes += le16_to_cpu(comp->len);
+
+ ionic_rx_copybreak(q, desc_info, cq_info, &skb);
+
+ skb_put(skb, le16_to_cpu(comp->len));
+ skb->protocol = eth_type_trans(skb, netdev);
+
+ skb_record_rx_queue(skb, q->index);
+
+ if (netdev->features & NETIF_F_RXHASH) {
+ switch (comp->pkt_type_color & IONIC_RXQ_COMP_PKT_TYPE_MASK) {
+ case IONIC_PKT_TYPE_IPV4:
+ case IONIC_PKT_TYPE_IPV6:
+ skb_set_hash(skb, le32_to_cpu(comp->rss_hash),
+ PKT_HASH_TYPE_L3);
+ break;
+ case IONIC_PKT_TYPE_IPV4_TCP:
+ case IONIC_PKT_TYPE_IPV6_TCP:
+ case IONIC_PKT_TYPE_IPV4_UDP:
+ case IONIC_PKT_TYPE_IPV6_UDP:
+ skb_set_hash(skb, le32_to_cpu(comp->rss_hash),
+ PKT_HASH_TYPE_L4);
+ break;
+ }
+ }
+
+ if (netdev->features & NETIF_F_RXCSUM) {
+ if (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC) {
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ skb->csum = (__wsum)le16_to_cpu(comp->csum);
+ stats->csum_complete++;
+ }
+ } else {
+ stats->csum_none++;
+ }
+
+ if ((comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_TCP_BAD) ||
+ (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_UDP_BAD) ||
+ (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_IP_BAD))
+ stats->csum_error++;
+
+ if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
+ if (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_VLAN)
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+ le16_to_cpu(comp->vlan_tci));
+ }
+
+ napi_gro_receive(&qcq->napi, skb);
+}
+
+static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
+{
+ struct ionic_rxq_comp *comp = cq_info->cq_desc;
+ struct ionic_queue *q = cq->bound_q;
+ struct ionic_desc_info *desc_info;
+
+ if (!color_match(comp->pkt_type_color, cq->done_color))
+ return false;
+
+ /* check for empty queue */
+ if (q->tail->index == q->head->index)
+ return false;
+
+ desc_info = q->tail;
+ if (desc_info->index != le16_to_cpu(comp->comp_index))
+ return false;
+
+ q->tail = desc_info->next;
+
+ /* clean the related q entry, only one per qc completion */
+ ionic_rx_clean(q, desc_info, cq_info, desc_info->cb_arg);
+
+ desc_info->cb = NULL;
+ desc_info->cb_arg = NULL;
+
+ return true;
+}
+
+static u32 ionic_rx_walk_cq(struct ionic_cq *rxcq, u32 limit)
+{
+ u32 work_done = 0;
+
+ while (ionic_rx_service(rxcq, rxcq->tail)) {
+ if (rxcq->tail->last)
+ rxcq->done_color = !rxcq->done_color;
+ rxcq->tail = rxcq->tail->next;
+ DEBUG_STATS_CQE_CNT(rxcq);
+
+ if (++work_done >= limit)
+ break;
+ }
+
+ return work_done;
+}
+
+void ionic_rx_flush(struct ionic_cq *cq)
+{
+ struct ionic_dev *idev = &cq->lif->ionic->idev;
+ u32 work_done;
+
+ work_done = ionic_rx_walk_cq(cq, cq->num_descs);
+
+ if (work_done)
+ ionic_intr_credits(idev->intr_ctrl, cq->bound_intr->index,
+ work_done, IONIC_INTR_CRED_RESET_COALESCE);
+}
+
+static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q, unsigned int len,
+ dma_addr_t *dma_addr)
+{
+ struct ionic_lif *lif = q->lif;
+ struct ionic_rx_stats *stats;
+ struct net_device *netdev;
+ struct sk_buff *skb;
+ struct device *dev;
+
+ netdev = lif->netdev;
+ dev = lif->ionic->dev;
+ stats = q_to_rx_stats(q);
+ skb = netdev_alloc_skb_ip_align(netdev, len);
+ if (!skb) {
+ net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
+ netdev->name, q->name);
+ stats->alloc_err++;
+ return NULL;
+ }
+
+ *dma_addr = dma_map_single(dev, skb->data, len, DMA_FROM_DEVICE);
+ if (dma_mapping_error(dev, *dma_addr)) {
+ dev_kfree_skb(skb);
+ net_warn_ratelimited("%s: DMA single map failed on %s!\n",
+ netdev->name, q->name);
+ stats->dma_map_err++;
+ return NULL;
+ }
+
+ return skb;
+}
+
+#define IONIC_RX_RING_DOORBELL_STRIDE ((1 << 2) - 1)
+
+void ionic_rx_fill(struct ionic_queue *q)
+{
+ struct net_device *netdev = q->lif->netdev;
+ struct ionic_rxq_desc *desc;
+ struct sk_buff *skb;
+ dma_addr_t dma_addr;
+ bool ring_doorbell;
+ unsigned int len;
+ unsigned int i;
+
+ len = netdev->mtu + ETH_HLEN;
+
+ for (i = ionic_q_space_avail(q); i; i--) {
+ skb = ionic_rx_skb_alloc(q, len, &dma_addr);
+ if (!skb)
+ return;
+
+ desc = q->head->desc;
+ desc->addr = cpu_to_le64(dma_addr);
+ desc->len = cpu_to_le16(len);
+ desc->opcode = IONIC_RXQ_DESC_OPCODE_SIMPLE;
+
+ ring_doorbell = ((q->head->index + 1) &
+ IONIC_RX_RING_DOORBELL_STRIDE) == 0;
+
+ ionic_rxq_post(q, ring_doorbell, ionic_rx_clean, skb);
+ }
+}
+
+static void ionic_rx_fill_cb(void *arg)
+{
+ ionic_rx_fill(arg);
+}
+
+void ionic_rx_empty(struct ionic_queue *q)
+{
+ struct device *dev = q->lif->ionic->dev;
+ struct ionic_desc_info *cur;
+ struct ionic_rxq_desc *desc;
+
+ for (cur = q->tail; cur != q->head; cur = cur->next) {
+ desc = cur->desc;
+ dma_unmap_single(dev, le64_to_cpu(desc->addr),
+ le16_to_cpu(desc->len), DMA_FROM_DEVICE);
+ dev_kfree_skb(cur->cb_arg);
+ cur->cb_arg = NULL;
+ }
+}
+
+int ionic_rx_napi(struct napi_struct *napi, int budget)
+{
+ struct ionic_qcq *qcq = napi_to_qcq(napi);
+ struct ionic_cq *rxcq = napi_to_cq(napi);
+ unsigned int qi = rxcq->bound_q->index;
+ struct ionic_dev *idev;
+ struct ionic_lif *lif;
+ struct ionic_cq *txcq;
+ u32 work_done = 0;
+ u32 flags = 0;
+
+ lif = rxcq->bound_q->lif;
+ idev = &lif->ionic->idev;
+ txcq = &lif->txqcqs[qi].qcq->cq;
+
+ ionic_tx_flush(txcq);
+
+ work_done = ionic_rx_walk_cq(rxcq, budget);
+
+ if (work_done)
+ ionic_rx_fill_cb(rxcq->bound_q);
+
+ if (work_done < budget && napi_complete_done(napi, work_done)) {
+ flags |= IONIC_INTR_CRED_UNMASK;
+ DEBUG_STATS_INTR_REARM(rxcq->bound_intr);
+ }
+
+ if (work_done || flags) {
+ flags |= IONIC_INTR_CRED_RESET_COALESCE;
+ ionic_intr_credits(idev->intr_ctrl, rxcq->bound_intr->index,
+ work_done, flags);
+ }
+
+ DEBUG_STATS_NAPI_POLL(qcq, work_done);
+
+ return work_done;
+}
+
+static dma_addr_t ionic_tx_map_single(struct ionic_queue *q, void *data, size_t len)
+{
+ struct ionic_tx_stats *stats = q_to_tx_stats(q);
+ struct device *dev = q->lif->ionic->dev;
+ dma_addr_t dma_addr;
+
+ dma_addr = dma_map_single(dev, data, len, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, dma_addr)) {
+ net_warn_ratelimited("%s: DMA single map failed on %s!\n",
+ q->lif->netdev->name, q->name);
+ stats->dma_map_err++;
+ return 0;
+ }
+ return dma_addr;
+}
+
+static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q, const skb_frag_t *frag,
+ size_t offset, size_t len)
+{
+ struct ionic_tx_stats *stats = q_to_tx_stats(q);
+ struct device *dev = q->lif->ionic->dev;
+ dma_addr_t dma_addr;
+
+ dma_addr = skb_frag_dma_map(dev, frag, offset, len, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, dma_addr)) {
+ net_warn_ratelimited("%s: DMA frag map failed on %s!\n",
+ q->lif->netdev->name, q->name);
+ stats->dma_map_err++;
+ }
+ return dma_addr;
+}
+
+static void ionic_tx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_info,
+ struct ionic_cq_info *cq_info, void *cb_arg)
+{
+ struct ionic_txq_sg_desc *sg_desc = desc_info->sg_desc;
+ struct ionic_txq_sg_elem *elem = sg_desc->elems;
+ struct ionic_tx_stats *stats = q_to_tx_stats(q);
+ struct ionic_txq_desc *desc = desc_info->desc;
+ struct device *dev = q->lif->ionic->dev;
+ u8 opcode, flags, nsge;
+ u16 queue_index;
+ unsigned int i;
+ u64 addr;
+
+ decode_txq_desc_cmd(le64_to_cpu(desc->cmd),
+ &opcode, &flags, &nsge, &addr);
+
+ /* use unmap_single only if either this is not TSO,
+ * or this is first descriptor of a TSO
+ */
+ if (opcode != IONIC_TXQ_DESC_OPCODE_TSO ||
+ flags & IONIC_TXQ_DESC_FLAG_TSO_SOT)
+ dma_unmap_single(dev, (dma_addr_t)addr,
+ le16_to_cpu(desc->len), DMA_TO_DEVICE);
+ else
+ dma_unmap_page(dev, (dma_addr_t)addr,
+ le16_to_cpu(desc->len), DMA_TO_DEVICE);
+
+ for (i = 0; i < nsge; i++, elem++)
+ dma_unmap_page(dev, (dma_addr_t)le64_to_cpu(elem->addr),
+ le16_to_cpu(elem->len), DMA_TO_DEVICE);
+
+ if (cb_arg) {
+ struct sk_buff *skb = cb_arg;
+ u32 len = skb->len;
+
+ queue_index = skb_get_queue_mapping(skb);
+ if (unlikely(__netif_subqueue_stopped(q->lif->netdev,
+ queue_index))) {
+ netif_wake_subqueue(q->lif->netdev, queue_index);
+ q->wake++;
+ }
+ dev_kfree_skb_any(skb);
+ stats->clean++;
+ netdev_tx_completed_queue(q_to_ndq(q), 1, len);
+ }
+}
+
+void ionic_tx_flush(struct ionic_cq *cq)
+{
+ struct ionic_txq_comp *comp = cq->tail->cq_desc;
+ struct ionic_dev *idev = &cq->lif->ionic->idev;
+ struct ionic_queue *q = cq->bound_q;
+ struct ionic_desc_info *desc_info;
+ unsigned int work_done = 0;
+
+ /* walk the completed cq entries */
+ while (work_done < cq->num_descs &&
+ color_match(comp->color, cq->done_color)) {
+
+ /* clean the related q entries, there could be
+ * several q entries completed for each cq completion
+ */
+ do {
+ desc_info = q->tail;
+ q->tail = desc_info->next;
+ ionic_tx_clean(q, desc_info, cq->tail,
+ desc_info->cb_arg);
+ desc_info->cb = NULL;
+ desc_info->cb_arg = NULL;
+ } while (desc_info->index != le16_to_cpu(comp->comp_index));
+
+ if (cq->tail->last)
+ cq->done_color = !cq->done_color;
+
+ cq->tail = cq->tail->next;
+ comp = cq->tail->cq_desc;
+ DEBUG_STATS_CQE_CNT(cq);
+
+ work_done++;
+ }
+
+ if (work_done)
+ ionic_intr_credits(idev->intr_ctrl, cq->bound_intr->index,
+ work_done, 0);
+}
+
+static int ionic_tx_tcp_inner_pseudo_csum(struct sk_buff *skb)
+{
+ int err;
+
+ err = skb_cow_head(skb, 0);
+ if (err)
+ return err;
+
+ if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
+ inner_ip_hdr(skb)->check = 0;
+ inner_tcp_hdr(skb)->check =
+ ~csum_tcpudp_magic(inner_ip_hdr(skb)->saddr,
+ inner_ip_hdr(skb)->daddr,
+ 0, IPPROTO_TCP, 0);
+ } else if (skb->protocol == cpu_to_be16(ETH_P_IPV6)) {
+ inner_tcp_hdr(skb)->check =
+ ~csum_ipv6_magic(&inner_ipv6_hdr(skb)->saddr,
+ &inner_ipv6_hdr(skb)->daddr,
+ 0, IPPROTO_TCP, 0);
+ }
+
+ return 0;
+}
+
+static int ionic_tx_tcp_pseudo_csum(struct sk_buff *skb)
+{
+ int err;
+
+ err = skb_cow_head(skb, 0);
+ if (err)
+ return err;
+
+ if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
+ ip_hdr(skb)->check = 0;
+ tcp_hdr(skb)->check =
+ ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr,
+ 0, IPPROTO_TCP, 0);
+ } else if (skb->protocol == cpu_to_be16(ETH_P_IPV6)) {
+ tcp_hdr(skb)->check =
+ ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ 0, IPPROTO_TCP, 0);
+ }
+
+ return 0;
+}
+
+static void ionic_tx_tso_post(struct ionic_queue *q, struct ionic_txq_desc *desc,
+ struct sk_buff *skb,
+ dma_addr_t addr, u8 nsge, u16 len,
+ unsigned int hdrlen, unsigned int mss,
+ bool outer_csum,
+ u16 vlan_tci, bool has_vlan,
+ bool start, bool done)
+{
+ u8 flags = 0;
+ u64 cmd;
+
+ flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0;
+ flags |= outer_csum ? IONIC_TXQ_DESC_FLAG_ENCAP : 0;
+ flags |= start ? IONIC_TXQ_DESC_FLAG_TSO_SOT : 0;
+ flags |= done ? IONIC_TXQ_DESC_FLAG_TSO_EOT : 0;
+
+ cmd = encode_txq_desc_cmd(IONIC_TXQ_DESC_OPCODE_TSO, flags, nsge, addr);
+ desc->cmd = cpu_to_le64(cmd);
+ desc->len = cpu_to_le16(len);
+ desc->vlan_tci = cpu_to_le16(vlan_tci);
+ desc->hdr_len = cpu_to_le16(hdrlen);
+ desc->mss = cpu_to_le16(mss);
+
+ if (done) {
+ skb_tx_timestamp(skb);
+ netdev_tx_sent_queue(q_to_ndq(q), skb->len);
+ ionic_txq_post(q, !netdev_xmit_more(), ionic_tx_clean, skb);
+ } else {
+ ionic_txq_post(q, false, ionic_tx_clean, NULL);
+ }
+}
+
+static struct ionic_txq_desc *ionic_tx_tso_next(struct ionic_queue *q,
+ struct ionic_txq_sg_elem **elem)
+{
+ struct ionic_txq_sg_desc *sg_desc = q->head->sg_desc;
+ struct ionic_txq_desc *desc = q->head->desc;
+
+ *elem = sg_desc->elems;
+ return desc;
+}
+
+static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
+{
+ struct ionic_tx_stats *stats = q_to_tx_stats(q);
+ struct ionic_desc_info *abort = q->head;
+ struct device *dev = q->lif->ionic->dev;
+ struct ionic_desc_info *rewind = abort;
+ struct ionic_txq_sg_elem *elem;
+ struct ionic_txq_desc *desc;
+ unsigned int frag_left = 0;
+ unsigned int offset = 0;
+ unsigned int len_left;
+ dma_addr_t desc_addr;
+ unsigned int hdrlen;
+ unsigned int nfrags;
+ unsigned int seglen;
+ u64 total_bytes = 0;
+ u64 total_pkts = 0;
+ unsigned int left;
+ unsigned int len;
+ unsigned int mss;
+ skb_frag_t *frag;
+ bool start, done;
+ bool outer_csum;
+ bool has_vlan;
+ u16 desc_len;
+ u8 desc_nsge;
+ u16 vlan_tci;
+ bool encap;
+ int err;
+
+ mss = skb_shinfo(skb)->gso_size;
+ nfrags = skb_shinfo(skb)->nr_frags;
+ len_left = skb->len - skb_headlen(skb);
+ outer_csum = (skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM) ||
+ (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
+ has_vlan = !!skb_vlan_tag_present(skb);
+ vlan_tci = skb_vlan_tag_get(skb);
+ encap = skb->encapsulation;
+
+ /* Preload inner-most TCP csum field with IP pseudo hdr
+ * calculated with IP length set to zero. HW will later
+ * add in length to each TCP segment resulting from the TSO.
+ */
+
+ if (encap)
+ err = ionic_tx_tcp_inner_pseudo_csum(skb);
+ else
+ err = ionic_tx_tcp_pseudo_csum(skb);
+ if (err)
+ return err;
+
+ if (encap)
+ hdrlen = skb_inner_transport_header(skb) - skb->data +
+ inner_tcp_hdrlen(skb);
+ else
+ hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
+
+ seglen = hdrlen + mss;
+ left = skb_headlen(skb);
+
+ desc = ionic_tx_tso_next(q, &elem);
+ start = true;
+
+ /* Chop skb->data up into desc segments */
+
+ while (left > 0) {
+ len = min(seglen, left);
+ frag_left = seglen - len;
+ desc_addr = ionic_tx_map_single(q, skb->data + offset, len);
+ if (dma_mapping_error(dev, desc_addr))
+ goto err_out_abort;
+ desc_len = len;
+ desc_nsge = 0;
+ left -= len;
+ offset += len;
+ if (nfrags > 0 && frag_left > 0)
+ continue;
+ done = (nfrags == 0 && left == 0);
+ ionic_tx_tso_post(q, desc, skb,
+ desc_addr, desc_nsge, desc_len,
+ hdrlen, mss,
+ outer_csum,
+ vlan_tci, has_vlan,
+ start, done);
+ total_pkts++;
+ total_bytes += start ? len : len + hdrlen;
+ desc = ionic_tx_tso_next(q, &elem);
+ start = false;
+ seglen = mss;
+ }
+
+ /* Chop skb frags into desc segments */
+
+ for (frag = skb_shinfo(skb)->frags; len_left; frag++) {
+ offset = 0;
+ left = skb_frag_size(frag);
+ len_left -= left;
+ nfrags--;
+ stats->frags++;
+
+ while (left > 0) {
+ if (frag_left > 0) {
+ len = min(frag_left, left);
+ frag_left -= len;
+ elem->addr =
+ cpu_to_le64(ionic_tx_map_frag(q, frag,
+ offset, len));
+ if (dma_mapping_error(dev, elem->addr))
+ goto err_out_abort;
+ elem->len = cpu_to_le16(len);
+ elem++;
+ desc_nsge++;
+ left -= len;
+ offset += len;
+ if (nfrags > 0 && frag_left > 0)
+ continue;
+ done = (nfrags == 0 && left == 0);
+ ionic_tx_tso_post(q, desc, skb, desc_addr,
+ desc_nsge, desc_len,
+ hdrlen, mss, outer_csum,
+ vlan_tci, has_vlan,
+ start, done);
+ total_pkts++;
+ total_bytes += start ? len : len + hdrlen;
+ desc = ionic_tx_tso_next(q, &elem);
+ start = false;
+ } else {
+ len = min(mss, left);
+ frag_left = mss - len;
+ desc_addr = ionic_tx_map_frag(q, frag,
+ offset, len);
+ if (dma_mapping_error(dev, desc_addr))
+ goto err_out_abort;
+ desc_len = len;
+ desc_nsge = 0;
+ left -= len;
+ offset += len;
+ if (nfrags > 0 && frag_left > 0)
+ continue;
+ done = (nfrags == 0 && left == 0);
+ ionic_tx_tso_post(q, desc, skb, desc_addr,
+ desc_nsge, desc_len,
+ hdrlen, mss, outer_csum,
+ vlan_tci, has_vlan,
+ start, done);
+ total_pkts++;
+ total_bytes += start ? len : len + hdrlen;
+ desc = ionic_tx_tso_next(q, &elem);
+ start = false;
+ }
+ }
+ }
+
+ stats->pkts += total_pkts;
+ stats->bytes += total_bytes;
+ stats->tso++;
+
+ return 0;
+
+err_out_abort:
+ while (rewind->desc != q->head->desc) {
+ ionic_tx_clean(q, rewind, NULL, NULL);
+ rewind = rewind->next;
+ }
+ q->head = abort;
+
+ return -ENOMEM;
+}
+
+static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb)
+{
+ struct ionic_tx_stats *stats = q_to_tx_stats(q);
+ struct ionic_txq_desc *desc = q->head->desc;
+ struct device *dev = q->lif->ionic->dev;
+ dma_addr_t dma_addr;
+ bool has_vlan;
+ u8 flags = 0;
+ bool encap;
+ u64 cmd;
+
+ has_vlan = !!skb_vlan_tag_present(skb);
+ encap = skb->encapsulation;
+
+ dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb));
+ if (dma_mapping_error(dev, dma_addr))
+ return -ENOMEM;
+
+ flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0;
+ flags |= encap ? IONIC_TXQ_DESC_FLAG_ENCAP : 0;
+
+ cmd = encode_txq_desc_cmd(IONIC_TXQ_DESC_OPCODE_CSUM_PARTIAL,
+ flags, skb_shinfo(skb)->nr_frags, dma_addr);
+ desc->cmd = cpu_to_le64(cmd);
+ desc->len = cpu_to_le16(skb_headlen(skb));
+ desc->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb));
+ desc->csum_start = cpu_to_le16(skb_checksum_start_offset(skb));
+ desc->csum_offset = cpu_to_le16(skb->csum_offset);
+
+ if (skb->csum_not_inet)
+ stats->crc32_csum++;
+ else
+ stats->csum++;
+
+ return 0;
+}
+
+static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb)
+{
+ struct ionic_tx_stats *stats = q_to_tx_stats(q);
+ struct ionic_txq_desc *desc = q->head->desc;
+ struct device *dev = q->lif->ionic->dev;
+ dma_addr_t dma_addr;
+ bool has_vlan;
+ u8 flags = 0;
+ bool encap;
+ u64 cmd;
+
+ has_vlan = !!skb_vlan_tag_present(skb);
+ encap = skb->encapsulation;
+
+ dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb));
+ if (dma_mapping_error(dev, dma_addr))
+ return -ENOMEM;
+
+ flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0;
+ flags |= encap ? IONIC_TXQ_DESC_FLAG_ENCAP : 0;
+
+ cmd = encode_txq_desc_cmd(IONIC_TXQ_DESC_OPCODE_CSUM_NONE,
+ flags, skb_shinfo(skb)->nr_frags, dma_addr);
+ desc->cmd = cpu_to_le64(cmd);
+ desc->len = cpu_to_le16(skb_headlen(skb));
+ desc->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb));
+
+ stats->no_csum++;
+
+ return 0;
+}
+
+static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb)
+{
+ struct ionic_txq_sg_desc *sg_desc = q->head->sg_desc;
+ unsigned int len_left = skb->len - skb_headlen(skb);
+ struct ionic_txq_sg_elem *elem = sg_desc->elems;
+ struct ionic_tx_stats *stats = q_to_tx_stats(q);
+ struct device *dev = q->lif->ionic->dev;
+ dma_addr_t dma_addr;
+ skb_frag_t *frag;
+ u16 len;
+
+ for (frag = skb_shinfo(skb)->frags; len_left; frag++, elem++) {
+ len = skb_frag_size(frag);
+ elem->len = cpu_to_le16(len);
+ dma_addr = ionic_tx_map_frag(q, frag, 0, len);
+ if (dma_mapping_error(dev, dma_addr))
+ return -ENOMEM;
+ elem->addr = cpu_to_le64(dma_addr);
+ len_left -= len;
+ stats->frags++;
+ }
+
+ return 0;
+}
+
+static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb)
+{
+ struct ionic_tx_stats *stats = q_to_tx_stats(q);
+ int err;
+
+ /* set up the initial descriptor */
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ err = ionic_tx_calc_csum(q, skb);
+ else
+ err = ionic_tx_calc_no_csum(q, skb);
+ if (err)
+ return err;
+
+ /* add frags */
+ err = ionic_tx_skb_frags(q, skb);
+ if (err)
+ return err;
+
+ skb_tx_timestamp(skb);
+ stats->pkts++;
+ stats->bytes += skb->len;
+
+ netdev_tx_sent_queue(q_to_ndq(q), skb->len);
+ ionic_txq_post(q, !netdev_xmit_more(), ionic_tx_clean, skb);
+
+ return 0;
+}
+
+static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb)
+{
+ struct ionic_tx_stats *stats = q_to_tx_stats(q);
+ int err;
+
+ /* If TSO, need roundup(skb->len/mss) descs */
+ if (skb_is_gso(skb))
+ return (skb->len / skb_shinfo(skb)->gso_size) + 1;
+
+ /* If non-TSO, just need 1 desc and nr_frags sg elems */
+ if (skb_shinfo(skb)->nr_frags <= IONIC_TX_MAX_SG_ELEMS)
+ return 1;
+
+ /* Too many frags, so linearize */
+ err = skb_linearize(skb);
+ if (err)
+ return err;
+
+ stats->linearize++;
+
+ /* Need 1 desc and zero sg elems */
+ return 1;
+}
+
+static int ionic_maybe_stop_tx(struct ionic_queue *q, int ndescs)
+{
+ int stopped = 0;
+
+ if (unlikely(!ionic_q_has_space(q, ndescs))) {
+ netif_stop_subqueue(q->lif->netdev, q->index);
+ q->stop++;
+ stopped = 1;
+
+ /* Might race with ionic_tx_clean, check again */
+ smp_rmb();
+ if (ionic_q_has_space(q, ndescs)) {
+ netif_wake_subqueue(q->lif->netdev, q->index);
+ stopped = 0;
+ }
+ }
+
+ return stopped;
+}
+
+netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+ u16 queue_index = skb_get_queue_mapping(skb);
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic_queue *q;
+ int ndescs;
+ int err;
+
+ if (unlikely(!test_bit(IONIC_LIF_UP, lif->state))) {
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+
+ if (unlikely(!lif_to_txqcq(lif, queue_index)))
+ queue_index = 0;
+ q = lif_to_txq(lif, queue_index);
+
+ ndescs = ionic_tx_descs_needed(q, skb);
+ if (ndescs < 0)
+ goto err_out_drop;
+
+ if (unlikely(ionic_maybe_stop_tx(q, ndescs)))
+ return NETDEV_TX_BUSY;
+
+ if (skb_is_gso(skb))
+ err = ionic_tx_tso(q, skb);
+ else
+ err = ionic_tx(q, skb);
+
+ if (err)
+ goto err_out_drop;
+
+ /* Stop the queue if there aren't descriptors for the next packet.
+ * Since our SG lists per descriptor take care of most of the possible
+ * fragmentation, we don't need to have many descriptors available.
+ */
+ ionic_maybe_stop_tx(q, 4);
+
+ return NETDEV_TX_OK;
+
+err_out_drop:
+ q->stop++;
+ q->drop++;
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.h b/drivers/net/ethernet/pensando/ionic/ionic_txrx.h
new file mode 100644
index 000000000000..53775c62c85a
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+
+#ifndef _IONIC_TXRX_H_
+#define _IONIC_TXRX_H_
+
+void ionic_rx_flush(struct ionic_cq *cq);
+void ionic_tx_flush(struct ionic_cq *cq);
+
+void ionic_rx_fill(struct ionic_queue *q);
+void ionic_rx_empty(struct ionic_queue *q);
+int ionic_rx_napi(struct napi_struct *napi, int budget);
+netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev);
+
+#endif /* _IONIC_TXRX_H_ */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index f380fae8799d..65ec16a31658 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -382,7 +382,7 @@ qed_iwarp2roce_state(enum qed_iwarp_qp_state state)
}
}
-const static char *iwarp_state_names[] = {
+static const char * const iwarp_state_names[] = {
"IDLE",
"RTS",
"TERMINATE",
diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index ac9195add811..a9c89d5d8898 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -193,16 +193,12 @@ enum ravb_reg {
GECMR = 0x05b0,
MAHR = 0x05c0,
MALR = 0x05c8,
- TROCR = 0x0700, /* Undocumented? */
- CDCR = 0x0708, /* Undocumented? */
- LCCR = 0x0710, /* Undocumented? */
+ TROCR = 0x0700, /* R-Car Gen3 only */
CEFCR = 0x0740,
FRECR = 0x0748,
TSFRCR = 0x0750,
TLFRCR = 0x0758,
RFCR = 0x0760,
- CERCR = 0x0768, /* Undocumented? */
- CEECR = 0x0770, /* Undocumented? */
MAFCR = 0x0778,
};
@@ -220,7 +216,6 @@ enum CCC_BIT {
CCC_CSEL_HPB = 0x00010000,
CCC_CSEL_ETH_TX = 0x00020000,
CCC_CSEL_GMII_REF = 0x00030000,
- CCC_BOC = 0x00100000, /* Undocumented? */
CCC_LBME = 0x01000000,
};
@@ -317,7 +312,7 @@ enum UFCD_BIT {
/* SFO */
enum SFO_BIT {
- SFO_FPB = 0x0000003F,
+ SFO_FBP = 0x0000003F,
};
/* RTC */
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 6cacd5e893ac..de9aa8c47f1c 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -447,12 +447,6 @@ static int ravb_dmac_init(struct net_device *ndev)
ravb_ring_format(ndev, RAVB_BE);
ravb_ring_format(ndev, RAVB_NC);
-#if defined(__LITTLE_ENDIAN)
- ravb_modify(ndev, CCC, CCC_BOC, 0);
-#else
- ravb_modify(ndev, CCC, CCC_BOC, CCC_BOC);
-#endif
-
/* Set AVB RX */
ravb_write(ndev,
RCR_EFFS | RCR_ENCF | RCR_ETS0 | RCR_ESF | 0x18000000, RCR);
@@ -1627,17 +1621,10 @@ static struct net_device_stats *ravb_get_stats(struct net_device *ndev)
stats0 = &priv->stats[RAVB_BE];
stats1 = &priv->stats[RAVB_NC];
- nstats->tx_dropped += ravb_read(ndev, TROCR);
- ravb_write(ndev, 0, TROCR); /* (write clear) */
- nstats->collisions += ravb_read(ndev, CDCR);
- ravb_write(ndev, 0, CDCR); /* (write clear) */
- nstats->tx_carrier_errors += ravb_read(ndev, LCCR);
- ravb_write(ndev, 0, LCCR); /* (write clear) */
-
- nstats->tx_carrier_errors += ravb_read(ndev, CERCR);
- ravb_write(ndev, 0, CERCR); /* (write clear) */
- nstats->tx_carrier_errors += ravb_read(ndev, CEECR);
- ravb_write(ndev, 0, CEECR); /* (write clear) */
+ if (priv->chip_id == RCAR_GEN3) {
+ nstats->tx_dropped += ravb_read(ndev, TROCR);
+ ravb_write(ndev, 0, TROCR); /* (write clear) */
+ }
nstats->rx_packets = stats0->rx_packets + stats1->rx_packets;
nstats->tx_packets = stats0->tx_packets + stats1->tx_packets;
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 2c5d3f5b84dd..786b158bd305 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2189,6 +2189,9 @@ static int rocker_router_fib_event(struct notifier_block *nb,
struct rocker_fib_event_work *fib_work;
struct fib_notifier_info *info = ptr;
+ if (!net_eq(info->net, &init_net))
+ return NOTIFY_DONE;
+
if (info->family != AF_INET)
return NOTIFY_DONE;
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 49aa56ca09cc..912bbb6515b2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -360,6 +360,8 @@ struct dma_features {
unsigned int sphen;
unsigned int vlins;
unsigned int dvlan;
+ unsigned int l3l4fnum;
+ unsigned int arpoffsel;
};
/* GMAC TX FIFO is 8K, Rx FIFO is 16K */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 7357b8bdc128..5923ca62d793 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -44,9 +44,11 @@
#define XGMAC_CONFIG_CST BIT(2)
#define XGMAC_CONFIG_ACS BIT(1)
#define XGMAC_CONFIG_RE BIT(0)
-#define XGMAC_CORE_INIT_RX 0
+#define XGMAC_CORE_INIT_RX (XGMAC_CONFIG_GPSLCE | XGMAC_CONFIG_WD | \
+ (XGMAC_JUMBO_LEN << XGMAC_CONFIG_GPSL_SHIFT))
#define XGMAC_PACKET_FILTER 0x00000008
#define XGMAC_FILTER_RA BIT(31)
+#define XGMAC_FILTER_IPFE BIT(20)
#define XGMAC_FILTER_VTFE BIT(16)
#define XGMAC_FILTER_HPF BIT(10)
#define XGMAC_FILTER_PCF BIT(7)
@@ -119,6 +121,7 @@
#define XGMAC_HWFEAT_VLHASH BIT(4)
#define XGMAC_HWFEAT_GMIISEL BIT(1)
#define XGMAC_HW_FEATURE1 0x00000120
+#define XGMAC_HWFEAT_L3L4FNUM GENMASK(30, 27)
#define XGMAC_HWFEAT_RSSEN BIT(20)
#define XGMAC_HWFEAT_TSOEN BIT(18)
#define XGMAC_HWFEAT_SPHEN BIT(17)
@@ -150,6 +153,34 @@
#define XGMAC_DCS GENMASK(19, 16)
#define XGMAC_DCS_SHIFT 16
#define XGMAC_ADDRx_LOW(x) (0x00000304 + (x) * 0x8)
+#define XGMAC_L3L4_ADDR_CTRL 0x00000c00
+#define XGMAC_IDDR GENMASK(15, 8)
+#define XGMAC_IDDR_SHIFT 8
+#define XGMAC_IDDR_FNUM 4
+#define XGMAC_TT BIT(1)
+#define XGMAC_XB BIT(0)
+#define XGMAC_L3L4_DATA 0x00000c04
+#define XGMAC_L3L4_CTRL 0x0
+#define XGMAC_L4DPIM0 BIT(21)
+#define XGMAC_L4DPM0 BIT(20)
+#define XGMAC_L4SPIM0 BIT(19)
+#define XGMAC_L4SPM0 BIT(18)
+#define XGMAC_L4PEN0 BIT(16)
+#define XGMAC_L3HDBM0 GENMASK(15, 11)
+#define XGMAC_L3HSBM0 GENMASK(10, 6)
+#define XGMAC_L3DAIM0 BIT(5)
+#define XGMAC_L3DAM0 BIT(4)
+#define XGMAC_L3SAIM0 BIT(3)
+#define XGMAC_L3SAM0 BIT(2)
+#define XGMAC_L3PEN0 BIT(0)
+#define XGMAC_L4_ADDR 0x1
+#define XGMAC_L4DP0 GENMASK(31, 16)
+#define XGMAC_L4DP0_SHIFT 16
+#define XGMAC_L4SP0 GENMASK(15, 0)
+#define XGMAC_L3_ADDR0 0x4
+#define XGMAC_L3_ADDR1 0x5
+#define XGMAC_L3_ADDR2 0x6
+#define XMGAC_L3_ADDR3 0x7
#define XGMAC_ARP_ADDR 0x00000c10
#define XGMAC_RSS_CTRL 0x00000c80
#define XGMAC_UDP4TE BIT(3)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index e534a3aaf4a3..78ac659da279 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -15,7 +15,6 @@ static void dwxgmac2_core_init(struct mac_device_info *hw,
struct net_device *dev)
{
void __iomem *ioaddr = hw->pcsr;
- int mtu = dev->mtu;
u32 tx, rx;
tx = readl(ioaddr + XGMAC_TX_CONFIG);
@@ -24,16 +23,6 @@ static void dwxgmac2_core_init(struct mac_device_info *hw,
tx |= XGMAC_CORE_INIT_TX;
rx |= XGMAC_CORE_INIT_RX;
- if (mtu >= 9000) {
- rx |= XGMAC_CONFIG_GPSLCE;
- rx |= XGMAC_JUMBO_LEN << XGMAC_CONFIG_GPSL_SHIFT;
- rx |= XGMAC_CONFIG_WD;
- } else if (mtu > 2000) {
- rx |= XGMAC_CONFIG_JE;
- } else if (mtu > 1500) {
- rx |= XGMAC_CONFIG_S2KP;
- }
-
if (hw->ps) {
tx |= XGMAC_CONFIG_TE;
tx &= ~hw->link.speed_mask;
@@ -1163,6 +1152,197 @@ static void dwxgmac2_enable_vlan(struct mac_device_info *hw, u32 type)
writel(value, ioaddr + XGMAC_VLAN_INCL);
}
+static int dwxgmac2_filter_wait(struct mac_device_info *hw)
+{
+ void __iomem *ioaddr = hw->pcsr;
+ u32 value;
+
+ if (readl_poll_timeout(ioaddr + XGMAC_L3L4_ADDR_CTRL, value,
+ !(value & XGMAC_XB), 100, 10000))
+ return -EBUSY;
+ return 0;
+}
+
+static int dwxgmac2_filter_read(struct mac_device_info *hw, u32 filter_no,
+ u8 reg, u32 *data)
+{
+ void __iomem *ioaddr = hw->pcsr;
+ u32 value;
+ int ret;
+
+ ret = dwxgmac2_filter_wait(hw);
+ if (ret)
+ return ret;
+
+ value = ((filter_no << XGMAC_IDDR_FNUM) | reg) << XGMAC_IDDR_SHIFT;
+ value |= XGMAC_TT | XGMAC_XB;
+ writel(value, ioaddr + XGMAC_L3L4_ADDR_CTRL);
+
+ ret = dwxgmac2_filter_wait(hw);
+ if (ret)
+ return ret;
+
+ *data = readl(ioaddr + XGMAC_L3L4_DATA);
+ return 0;
+}
+
+static int dwxgmac2_filter_write(struct mac_device_info *hw, u32 filter_no,
+ u8 reg, u32 data)
+{
+ void __iomem *ioaddr = hw->pcsr;
+ u32 value;
+ int ret;
+
+ ret = dwxgmac2_filter_wait(hw);
+ if (ret)
+ return ret;
+
+ writel(data, ioaddr + XGMAC_L3L4_DATA);
+
+ value = ((filter_no << XGMAC_IDDR_FNUM) | reg) << XGMAC_IDDR_SHIFT;
+ value |= XGMAC_XB;
+ writel(value, ioaddr + XGMAC_L3L4_ADDR_CTRL);
+
+ return dwxgmac2_filter_wait(hw);
+}
+
+static int dwxgmac2_config_l3_filter(struct mac_device_info *hw, u32 filter_no,
+ bool en, bool ipv6, bool sa, bool inv,
+ u32 match)
+{
+ void __iomem *ioaddr = hw->pcsr;
+ u32 value;
+ int ret;
+
+ value = readl(ioaddr + XGMAC_PACKET_FILTER);
+ value |= XGMAC_FILTER_IPFE;
+ writel(value, ioaddr + XGMAC_PACKET_FILTER);
+
+ ret = dwxgmac2_filter_read(hw, filter_no, XGMAC_L3L4_CTRL, &value);
+ if (ret)
+ return ret;
+
+ /* For IPv6 not both SA/DA filters can be active */
+ if (ipv6) {
+ value |= XGMAC_L3PEN0;
+ value &= ~(XGMAC_L3SAM0 | XGMAC_L3SAIM0);
+ value &= ~(XGMAC_L3DAM0 | XGMAC_L3DAIM0);
+ if (sa) {
+ value |= XGMAC_L3SAM0;
+ if (inv)
+ value |= XGMAC_L3SAIM0;
+ } else {
+ value |= XGMAC_L3DAM0;
+ if (inv)
+ value |= XGMAC_L3DAIM0;
+ }
+ } else {
+ value &= ~XGMAC_L3PEN0;
+ if (sa) {
+ value |= XGMAC_L3SAM0;
+ if (inv)
+ value |= XGMAC_L3SAIM0;
+ } else {
+ value |= XGMAC_L3DAM0;
+ if (inv)
+ value |= XGMAC_L3DAIM0;
+ }
+ }
+
+ ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L3L4_CTRL, value);
+ if (ret)
+ return ret;
+
+ if (sa) {
+ ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L3_ADDR0, match);
+ if (ret)
+ return ret;
+ } else {
+ ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L3_ADDR1, match);
+ if (ret)
+ return ret;
+ }
+
+ if (!en)
+ return dwxgmac2_filter_write(hw, filter_no, XGMAC_L3L4_CTRL, 0);
+
+ return 0;
+}
+
+static int dwxgmac2_config_l4_filter(struct mac_device_info *hw, u32 filter_no,
+ bool en, bool udp, bool sa, bool inv,
+ u32 match)
+{
+ void __iomem *ioaddr = hw->pcsr;
+ u32 value;
+ int ret;
+
+ value = readl(ioaddr + XGMAC_PACKET_FILTER);
+ value |= XGMAC_FILTER_IPFE;
+ writel(value, ioaddr + XGMAC_PACKET_FILTER);
+
+ ret = dwxgmac2_filter_read(hw, filter_no, XGMAC_L3L4_CTRL, &value);
+ if (ret)
+ return ret;
+
+ if (udp) {
+ value |= XGMAC_L4PEN0;
+ } else {
+ value &= ~XGMAC_L4PEN0;
+ }
+
+ value &= ~(XGMAC_L4SPM0 | XGMAC_L4SPIM0);
+ value &= ~(XGMAC_L4DPM0 | XGMAC_L4DPIM0);
+ if (sa) {
+ value |= XGMAC_L4SPM0;
+ if (inv)
+ value |= XGMAC_L4SPIM0;
+ } else {
+ value |= XGMAC_L4DPM0;
+ if (inv)
+ value |= XGMAC_L4DPIM0;
+ }
+
+ ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L3L4_CTRL, value);
+ if (ret)
+ return ret;
+
+ if (sa) {
+ value = match & XGMAC_L4SP0;
+
+ ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L4_ADDR, value);
+ if (ret)
+ return ret;
+ } else {
+ value = (match << XGMAC_L4DP0_SHIFT) & XGMAC_L4DP0;
+
+ ret = dwxgmac2_filter_write(hw, filter_no, XGMAC_L4_ADDR, value);
+ if (ret)
+ return ret;
+ }
+
+ if (!en)
+ return dwxgmac2_filter_write(hw, filter_no, XGMAC_L3L4_CTRL, 0);
+
+ return 0;
+}
+
+static void dwxgmac2_set_arp_offload(struct mac_device_info *hw, bool en,
+ u32 addr)
+{
+ void __iomem *ioaddr = hw->pcsr;
+ u32 value;
+
+ writel(addr, ioaddr + XGMAC_ARP_ADDR);
+
+ value = readl(ioaddr + XGMAC_RX_CONFIG);
+ if (en)
+ value |= XGMAC_CONFIG_ARPEN;
+ else
+ value &= ~XGMAC_CONFIG_ARPEN;
+ writel(value, ioaddr + XGMAC_RX_CONFIG);
+}
+
const struct stmmac_ops dwxgmac210_ops = {
.core_init = dwxgmac2_core_init,
.set_mac = dwxgmac2_set_mac,
@@ -1203,6 +1383,9 @@ const struct stmmac_ops dwxgmac210_ops = {
.flex_pps_config = dwxgmac2_flex_pps_config,
.sarc_configure = dwxgmac2_sarc_configure,
.enable_vlan = dwxgmac2_enable_vlan,
+ .config_l3_filter = dwxgmac2_config_l3_filter,
+ .config_l4_filter = dwxgmac2_config_l4_filter,
+ .set_arp_offload = dwxgmac2_set_arp_offload,
};
int dwxgmac2_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index 64956465c030..53c4a40d8386 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -322,6 +322,10 @@ static int dwxgmac2_dma_interrupt(void __iomem *ioaddr,
/* ABNORMAL interrupts */
if (unlikely(intr_status & XGMAC_AIS)) {
+ if (unlikely(intr_status & XGMAC_RBU)) {
+ x->rx_buf_unav_irq++;
+ ret |= handle_rx;
+ }
if (unlikely(intr_status & XGMAC_TPS)) {
x->tx_process_stopped_irq++;
ret |= tx_hard_error;
@@ -365,7 +369,8 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr,
dma_cap->eee = (hw_cap & XGMAC_HWFEAT_EEESEL) >> 13;
dma_cap->atime_stamp = (hw_cap & XGMAC_HWFEAT_TSSEL) >> 12;
dma_cap->av = (hw_cap & XGMAC_HWFEAT_AVSEL) >> 11;
- dma_cap->av &= (hw_cap & XGMAC_HWFEAT_RAVSEL) >> 10;
+ dma_cap->av &= !(hw_cap & XGMAC_HWFEAT_RAVSEL) >> 10;
+ dma_cap->arpoffsel = (hw_cap & XGMAC_HWFEAT_ARPOFFSEL) >> 9;
dma_cap->rmon = (hw_cap & XGMAC_HWFEAT_MMCSEL) >> 8;
dma_cap->pmt_magic_frame = (hw_cap & XGMAC_HWFEAT_MGKSEL) >> 7;
dma_cap->pmt_remote_wake_up = (hw_cap & XGMAC_HWFEAT_RWKSEL) >> 6;
@@ -374,6 +379,7 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr,
/* MAC HW feature 1 */
hw_cap = readl(ioaddr + XGMAC_HW_FEATURE1);
+ dma_cap->l3l4fnum = (hw_cap & XGMAC_HWFEAT_L3L4FNUM) >> 27;
dma_cap->rssen = (hw_cap & XGMAC_HWFEAT_RSSEN) >> 20;
dma_cap->tsoen = (hw_cap & XGMAC_HWFEAT_TSOEN) >> 18;
dma_cap->sphen = (hw_cap & XGMAC_HWFEAT_SPHEN) >> 17;
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 9435b312495d..ddb851d99618 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -363,6 +363,14 @@ struct stmmac_ops {
int (*get_mac_tx_timestamp)(struct mac_device_info *hw, u64 *ts);
/* Source Address Insertion / Replacement */
void (*sarc_configure)(void __iomem *ioaddr, int val);
+ /* Filtering */
+ int (*config_l3_filter)(struct mac_device_info *hw, u32 filter_no,
+ bool en, bool ipv6, bool sa, bool inv,
+ u32 match);
+ int (*config_l4_filter)(struct mac_device_info *hw, u32 filter_no,
+ bool en, bool udp, bool sa, bool inv,
+ u32 match);
+ void (*set_arp_offload)(struct mac_device_info *hw, bool en, u32 addr);
};
#define stmmac_core_init(__priv, __args...) \
@@ -443,6 +451,12 @@ struct stmmac_ops {
stmmac_do_callback(__priv, mac, get_mac_tx_timestamp, __args)
#define stmmac_sarc_configure(__priv, __args...) \
stmmac_do_void_callback(__priv, mac, sarc_configure, __args)
+#define stmmac_config_l3_filter(__priv, __args...) \
+ stmmac_do_callback(__priv, mac, config_l3_filter, __args)
+#define stmmac_config_l4_filter(__priv, __args...) \
+ stmmac_do_callback(__priv, mac, config_l4_filter, __args)
+#define stmmac_set_arp_offload(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, set_arp_offload, __args)
/* PTP and HW Timer helpers */
struct stmmac_hwtimestamp {
@@ -499,6 +513,7 @@ struct stmmac_mode_ops {
struct stmmac_priv;
struct tc_cls_u32_offload;
struct tc_cbs_qopt_offload;
+struct flow_cls_offload;
struct stmmac_tc_ops {
int (*init)(struct stmmac_priv *priv);
@@ -506,6 +521,8 @@ struct stmmac_tc_ops {
struct tc_cls_u32_offload *cls);
int (*setup_cbs)(struct stmmac_priv *priv,
struct tc_cbs_qopt_offload *qopt);
+ int (*setup_cls)(struct stmmac_priv *priv,
+ struct flow_cls_offload *cls);
};
#define stmmac_tc_init(__priv, __args...) \
@@ -514,6 +531,8 @@ struct stmmac_tc_ops {
stmmac_do_callback(__priv, tc, setup_cls_u32, __args)
#define stmmac_tc_setup_cbs(__priv, __args...) \
stmmac_do_callback(__priv, tc, setup_cbs, __args)
+#define stmmac_tc_setup_cls(__priv, __args...) \
+ stmmac_do_callback(__priv, tc, setup_cls, __args)
struct stmmac_counters;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index dcb2e29a5717..d993fc7e82c3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -128,6 +128,16 @@ struct stmmac_rss {
u32 table[STMMAC_RSS_MAX_TABLE_SIZE];
};
+#define STMMAC_FLOW_ACTION_DROP BIT(0)
+struct stmmac_flow_entry {
+ unsigned long cookie;
+ unsigned long action;
+ u8 ip_proto;
+ int in_use;
+ int idx;
+ int is_l4;
+};
+
struct stmmac_priv {
/* Frequently used values are kept adjacent for cache effect */
u32 tx_coal_frames;
@@ -216,6 +226,8 @@ struct stmmac_priv {
unsigned int tc_entries_max;
unsigned int tc_off_max;
struct stmmac_tc_entry *tc_entries;
+ unsigned int flow_entries_max;
+ struct stmmac_flow_entry *flow_entries;
/* Pulse Per Second output */
struct stmmac_pps_cfg pps[STMMAC_PPS_MAX];
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 1c450105e5a6..1a768837ca72 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -746,8 +746,15 @@ static int stmmac_set_coalesce(struct net_device *dev,
(ec->tx_max_coalesced_frames_high) || (ec->rate_sample_interval))
return -EOPNOTSUPP;
- if (ec->rx_coalesce_usecs == 0)
- return -EINVAL;
+ if (priv->use_riwt && (ec->rx_coalesce_usecs > 0)) {
+ rx_riwt = stmmac_usec2riwt(ec->rx_coalesce_usecs, priv);
+
+ if ((rx_riwt > MAX_DMA_RIWT) || (rx_riwt < MIN_DMA_RIWT))
+ return -EINVAL;
+
+ priv->rx_riwt = rx_riwt;
+ stmmac_rx_watchdog(priv, priv->ioaddr, priv->rx_riwt, rx_cnt);
+ }
if ((ec->tx_coalesce_usecs == 0) &&
(ec->tx_max_coalesced_frames == 0))
@@ -757,20 +764,10 @@ static int stmmac_set_coalesce(struct net_device *dev,
(ec->tx_max_coalesced_frames > STMMAC_TX_MAX_FRAMES))
return -EINVAL;
- rx_riwt = stmmac_usec2riwt(ec->rx_coalesce_usecs, priv);
-
- if ((rx_riwt > MAX_DMA_RIWT) || (rx_riwt < MIN_DMA_RIWT))
- return -EINVAL;
- else if (!priv->use_riwt)
- return -EOPNOTSUPP;
-
/* Only copy relevant parameters, ignore all others. */
priv->tx_coal_frames = ec->tx_max_coalesced_frames;
priv->tx_coal_timer = ec->tx_coalesce_usecs;
priv->rx_coal_frames = ec->rx_max_coalesced_frames;
- priv->rx_riwt = rx_riwt;
- stmmac_rx_watchdog(priv, priv->ioaddr, priv->rx_riwt, rx_cnt);
-
return 0;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 06ccd216ae90..c3baca9f587b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3511,9 +3511,10 @@ read_again:
&priv->xstats, rx_q->dma_erx + entry);
if (unlikely(status == discard_frame)) {
page_pool_recycle_direct(rx_q->page_pool, buf->page);
- priv->dev->stats.rx_errors++;
buf->page = NULL;
error = 1;
+ if (!priv->hwts_rx_en)
+ priv->dev->stats.rx_errors++;
}
if (unlikely(error && (status & rx_not_ls)))
@@ -3931,12 +3932,17 @@ static int stmmac_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
struct stmmac_priv *priv = cb_priv;
int ret = -EOPNOTSUPP;
+ if (!tc_cls_can_offload_and_chain0(priv->dev, type_data))
+ return ret;
+
stmmac_disable_all_queues(priv);
switch (type) {
case TC_SETUP_CLSU32:
- if (tc_cls_can_offload_and_chain0(priv->dev, type_data))
- ret = stmmac_tc_setup_cls_u32(priv, priv, type_data);
+ ret = stmmac_tc_setup_cls_u32(priv, priv, type_data);
+ break;
+ case TC_SETUP_CLSFLOWER:
+ ret = stmmac_tc_setup_cls(priv, priv, type_data);
break;
default:
break;
@@ -4536,10 +4542,10 @@ int stmmac_dvr_probe(struct device *device,
/* MTU range: 46 - hw-specific max */
ndev->min_mtu = ETH_ZLEN - ETH_HLEN;
- if ((priv->plat->enh_desc) || (priv->synopsys_id >= DWMAC_CORE_4_00))
- ndev->max_mtu = JUMBO_LEN;
- else if (priv->plat->has_xgmac)
+ if (priv->plat->has_xgmac)
ndev->max_mtu = XGMAC_JUMBO_LEN;
+ else if ((priv->plat->enh_desc) || (priv->synopsys_id >= DWMAC_CORE_4_00))
+ ndev->max_mtu = JUMBO_LEN;
else
ndev->max_mtu = SKB_MAX_HEAD(NET_SKB_PAD + NET_IP_ALIGN);
/* Will not overwrite ndev->max_mtu if plat->maxmtu > ndev->max_mtu
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index eaf8f08f2e91..5de754a9fae9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -23,6 +23,7 @@
/**
* dwmac1000_validate_mcast_bins - validates the number of Multicast filter bins
+ * @dev: struct device of the platform device
* @mcast_bins: Multicast filtering bins
* Description:
* this function validates the number of Multicast filtering bins specified
@@ -33,7 +34,7 @@
* invalid and will cause the filtering algorithm to use Multicast
* promiscuous mode.
*/
-static int dwmac1000_validate_mcast_bins(int mcast_bins)
+static int dwmac1000_validate_mcast_bins(struct device *dev, int mcast_bins)
{
int x = mcast_bins;
@@ -44,8 +45,8 @@ static int dwmac1000_validate_mcast_bins(int mcast_bins)
break;
default:
x = 0;
- pr_info("Hash table entries set to unexpected value %d",
- mcast_bins);
+ dev_info(dev, "Hash table entries set to unexpected value %d\n",
+ mcast_bins);
break;
}
return x;
@@ -53,6 +54,7 @@ static int dwmac1000_validate_mcast_bins(int mcast_bins)
/**
* dwmac1000_validate_ucast_entries - validate the Unicast address entries
+ * @dev: struct device of the platform device
* @ucast_entries: number of Unicast address entries
* Description:
* This function validates the number of Unicast address entries supported
@@ -62,7 +64,8 @@ static int dwmac1000_validate_mcast_bins(int mcast_bins)
* selected, and defaults to 1 Unicast address if an unsupported
* configuration is selected.
*/
-static int dwmac1000_validate_ucast_entries(int ucast_entries)
+static int dwmac1000_validate_ucast_entries(struct device *dev,
+ int ucast_entries)
{
int x = ucast_entries;
@@ -73,8 +76,8 @@ static int dwmac1000_validate_ucast_entries(int ucast_entries)
break;
default:
x = 1;
- pr_info("Unicast table entries set to unexpected value %d\n",
- ucast_entries);
+ dev_info(dev, "Unicast table entries set to unexpected value %d\n",
+ ucast_entries);
break;
}
return x;
@@ -463,9 +466,9 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
of_property_read_u32(np, "snps,perfect-filter-entries",
&plat->unicast_filter_entries);
plat->unicast_filter_entries = dwmac1000_validate_ucast_entries(
- plat->unicast_filter_entries);
+ &pdev->dev, plat->unicast_filter_entries);
plat->multicast_filter_bins = dwmac1000_validate_mcast_bins(
- plat->multicast_filter_bins);
+ &pdev->dev, plat->multicast_filter_bins);
plat->has_gmac = 1;
plat->pmt = 1;
}
@@ -514,7 +517,8 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
plat->force_thresh_dma_mode = of_property_read_bool(np, "snps,force_thresh_dma_mode");
if (plat->force_thresh_dma_mode) {
plat->force_sf_dma_mode = 0;
- pr_warn("force_sf_dma_mode is ignored if force_thresh_dma_mode is set.");
+ dev_warn(&pdev->dev,
+ "force_sf_dma_mode is ignored if force_thresh_dma_mode is set.\n");
}
of_property_read_u32(np, "snps,ps-speed", &plat->mac_port_sel_speed);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
index ecc8602c6799..305d24935cf4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
@@ -43,9 +43,11 @@ struct stmmac_packet_attrs {
int dont_wait;
int timeout;
int size;
+ int max_size;
int remove_sa;
u8 id;
int sarc;
+ u16 queue_mapping;
};
static u8 stmmac_test_next_id;
@@ -73,12 +75,14 @@ static struct sk_buff *stmmac_test_get_udp_skb(struct stmmac_priv *priv,
else
size += sizeof(struct udphdr);
- skb = netdev_alloc_skb(priv->dev, size);
+ if (attr->max_size && (attr->max_size > size))
+ size = attr->max_size;
+
+ skb = netdev_alloc_skb_ip_align(priv->dev, size);
if (!skb)
return NULL;
prefetchw(skb->data);
- skb_reserve(skb, NET_IP_ALIGN);
if (attr->vlan > 1)
ehdr = skb_push(skb, ETH_HLEN + 8);
@@ -147,6 +151,9 @@ static struct sk_buff *stmmac_test_get_udp_skb(struct stmmac_priv *priv,
uhdr->source = htons(attr->sport);
uhdr->dest = htons(attr->dport);
uhdr->len = htons(sizeof(*shdr) + sizeof(*uhdr) + attr->size);
+ if (attr->max_size)
+ uhdr->len = htons(attr->max_size -
+ (sizeof(*ihdr) + sizeof(*ehdr)));
uhdr->check = 0;
}
@@ -162,9 +169,13 @@ static struct sk_buff *stmmac_test_get_udp_skb(struct stmmac_priv *priv,
iplen += sizeof(*thdr);
else
iplen += sizeof(*uhdr);
+
+ if (attr->max_size)
+ iplen = attr->max_size - sizeof(*ehdr);
+
ihdr->tot_len = htons(iplen);
ihdr->frag_off = 0;
- ihdr->saddr = 0;
+ ihdr->saddr = htonl(attr->ip_src);
ihdr->daddr = htonl(attr->ip_dst);
ihdr->tos = 0;
ihdr->id = 0;
@@ -178,6 +189,8 @@ static struct sk_buff *stmmac_test_get_udp_skb(struct stmmac_priv *priv,
if (attr->size)
skb_put(skb, attr->size);
+ if (attr->max_size && (attr->max_size > skb->len))
+ skb_put(skb, attr->max_size - skb->len);
skb->csum = 0;
skb->ip_summed = CHECKSUM_PARTIAL;
@@ -196,6 +209,24 @@ static struct sk_buff *stmmac_test_get_udp_skb(struct stmmac_priv *priv,
return skb;
}
+static struct sk_buff *stmmac_test_get_arp_skb(struct stmmac_priv *priv,
+ struct stmmac_packet_attrs *attr)
+{
+ __be32 ip_src = htonl(attr->ip_src);
+ __be32 ip_dst = htonl(attr->ip_dst);
+ struct sk_buff *skb = NULL;
+
+ skb = arp_create(ARPOP_REQUEST, ETH_P_ARP, ip_dst, priv->dev, ip_src,
+ NULL, attr->src, attr->dst);
+ if (!skb)
+ return NULL;
+
+ skb->pkt_type = PACKET_HOST;
+ skb->dev = priv->dev;
+
+ return skb;
+}
+
struct stmmac_test_priv {
struct stmmac_packet_attrs *packet;
struct packet_type pt;
@@ -306,7 +337,7 @@ static int __stmmac_test_loopback(struct stmmac_priv *priv,
goto cleanup;
}
- skb_set_queue_mapping(skb, 0);
+ skb_set_queue_mapping(skb, attr->queue_mapping);
ret = dev_queue_xmit(skb);
if (ret)
goto cleanup;
@@ -318,7 +349,7 @@ static int __stmmac_test_loopback(struct stmmac_priv *priv,
attr->timeout = STMMAC_LB_TIMEOUT;
wait_for_completion_timeout(&tpriv->comp, attr->timeout);
- ret = !tpriv->ok;
+ ret = tpriv->ok ? 0 : -ETIMEDOUT;
cleanup:
if (!attr->dont_wait)
@@ -480,7 +511,7 @@ static int stmmac_test_hfilt(struct stmmac_priv *priv)
/* Shall NOT receive packet */
ret = __stmmac_test_loopback(priv, &attr);
- ret = !ret;
+ ret = ret ? 0 : -EINVAL;
cleanup:
dev_mc_del(priv->dev, gd_addr);
@@ -512,7 +543,7 @@ static int stmmac_test_pfilt(struct stmmac_priv *priv)
/* Shall NOT receive packet */
ret = __stmmac_test_loopback(priv, &attr);
- ret = !ret;
+ ret = ret ? 0 : -EINVAL;
cleanup:
dev_uc_del(priv->dev, gd_addr);
@@ -562,7 +593,7 @@ static int stmmac_test_mcfilt(struct stmmac_priv *priv)
/* Shall NOT receive packet */
ret = __stmmac_test_loopback(priv, &attr);
- ret = !ret;
+ ret = ret ? 0 : -EINVAL;
cleanup:
dev_uc_del(priv->dev, uc_addr);
@@ -600,7 +631,7 @@ static int stmmac_test_ucfilt(struct stmmac_priv *priv)
/* Shall NOT receive packet */
ret = __stmmac_test_loopback(priv, &attr);
- ret = !ret;
+ ret = ret ? 0 : -EINVAL;
cleanup:
dev_mc_del(priv->dev, mc_addr);
@@ -699,7 +730,7 @@ static int stmmac_test_flowctrl(struct stmmac_priv *priv)
}
wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT);
- ret = !tpriv->ok;
+ ret = tpriv->ok ? 0 : -ETIMEDOUT;
cleanup:
dev_mc_del(priv->dev, paddr);
@@ -833,11 +864,11 @@ static int stmmac_test_vlanfilt(struct stmmac_priv *priv)
goto vlan_del;
wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT);
- ret = !tpriv->ok;
+ ret = tpriv->ok ? 0 : -ETIMEDOUT;
if (ret && !i) {
goto vlan_del;
} else if (!ret && i) {
- ret = -1;
+ ret = -EINVAL;
goto vlan_del;
} else {
ret = 0;
@@ -909,11 +940,11 @@ static int stmmac_test_dvlanfilt(struct stmmac_priv *priv)
goto vlan_del;
wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT);
- ret = !tpriv->ok;
+ ret = tpriv->ok ? 0 : -ETIMEDOUT;
if (ret && !i) {
goto vlan_del;
} else if (!ret && i) {
- ret = -1;
+ ret = -EINVAL;
goto vlan_del;
} else {
ret = 0;
@@ -998,7 +1029,7 @@ static int stmmac_test_rxp(struct stmmac_priv *priv)
attr.src = addr;
ret = __stmmac_test_loopback(priv, &attr);
- ret = !ret; /* Shall NOT receive packet */
+ ret = ret ? 0 : -EINVAL; /* Shall NOT receive packet */
cls_u32.command = TC_CLSU32_DELETE_KNODE;
stmmac_tc_setup_cls_u32(priv, priv, &cls_u32);
@@ -1168,6 +1199,392 @@ static int stmmac_test_svlanoff(struct stmmac_priv *priv)
return stmmac_test_vlanoff_common(priv, true);
}
+#ifdef CONFIG_NET_CLS_ACT
+static int __stmmac_test_l3filt(struct stmmac_priv *priv, u32 dst, u32 src,
+ u32 dst_mask, u32 src_mask)
+{
+ struct flow_dissector_key_ipv4_addrs key, mask;
+ unsigned long dummy_cookie = 0xdeadbeef;
+ struct stmmac_packet_attrs attr = { };
+ struct flow_dissector *dissector;
+ struct flow_cls_offload *cls;
+ struct flow_rule *rule;
+ int ret;
+
+ if (!tc_can_offload(priv->dev))
+ return -EOPNOTSUPP;
+ if (!priv->dma_cap.l3l4fnum)
+ return -EOPNOTSUPP;
+ if (priv->rss.enable) {
+ struct stmmac_rss rss = { .enable = false, };
+
+ stmmac_rss_configure(priv, priv->hw, &rss,
+ priv->plat->rx_queues_to_use);
+ }
+
+ dissector = kzalloc(sizeof(*dissector), GFP_KERNEL);
+ if (!dissector) {
+ ret = -ENOMEM;
+ goto cleanup_rss;
+ }
+
+ dissector->used_keys |= (1 << FLOW_DISSECTOR_KEY_IPV4_ADDRS);
+ dissector->offset[FLOW_DISSECTOR_KEY_IPV4_ADDRS] = 0;
+
+ cls = kzalloc(sizeof(*cls), GFP_KERNEL);
+ if (!cls) {
+ ret = -ENOMEM;
+ goto cleanup_dissector;
+ }
+
+ cls->common.chain_index = 0;
+ cls->command = FLOW_CLS_REPLACE;
+ cls->cookie = dummy_cookie;
+
+ rule = kzalloc(struct_size(rule, action.entries, 1), GFP_KERNEL);
+ if (!rule) {
+ ret = -ENOMEM;
+ goto cleanup_cls;
+ }
+
+ rule->match.dissector = dissector;
+ rule->match.key = (void *)&key;
+ rule->match.mask = (void *)&mask;
+
+ key.src = htonl(src);
+ key.dst = htonl(dst);
+ mask.src = src_mask;
+ mask.dst = dst_mask;
+
+ cls->rule = rule;
+
+ rule->action.entries[0].id = FLOW_ACTION_DROP;
+ rule->action.num_entries = 1;
+
+ attr.dst = priv->dev->dev_addr;
+ attr.ip_dst = dst;
+ attr.ip_src = src;
+
+ /* Shall receive packet */
+ ret = __stmmac_test_loopback(priv, &attr);
+ if (ret)
+ goto cleanup_rule;
+
+ ret = stmmac_tc_setup_cls(priv, priv, cls);
+ if (ret)
+ goto cleanup_rule;
+
+ /* Shall NOT receive packet */
+ ret = __stmmac_test_loopback(priv, &attr);
+ ret = ret ? 0 : -EINVAL;
+
+ cls->command = FLOW_CLS_DESTROY;
+ stmmac_tc_setup_cls(priv, priv, cls);
+cleanup_rule:
+ kfree(rule);
+cleanup_cls:
+ kfree(cls);
+cleanup_dissector:
+ kfree(dissector);
+cleanup_rss:
+ if (priv->rss.enable) {
+ stmmac_rss_configure(priv, priv->hw, &priv->rss,
+ priv->plat->rx_queues_to_use);
+ }
+
+ return ret;
+}
+#else
+static int __stmmac_test_l3filt(struct stmmac_priv *priv, u32 dst, u32 src,
+ u32 dst_mask, u32 src_mask)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+static int stmmac_test_l3filt_da(struct stmmac_priv *priv)
+{
+ u32 addr = 0x10203040;
+
+ return __stmmac_test_l3filt(priv, addr, 0, ~0, 0);
+}
+
+static int stmmac_test_l3filt_sa(struct stmmac_priv *priv)
+{
+ u32 addr = 0x10203040;
+
+ return __stmmac_test_l3filt(priv, 0, addr, 0, ~0);
+}
+
+#ifdef CONFIG_NET_CLS_ACT
+static int __stmmac_test_l4filt(struct stmmac_priv *priv, u32 dst, u32 src,
+ u32 dst_mask, u32 src_mask, bool udp)
+{
+ struct {
+ struct flow_dissector_key_basic bkey;
+ struct flow_dissector_key_ports key;
+ } __aligned(BITS_PER_LONG / 8) keys;
+ struct {
+ struct flow_dissector_key_basic bmask;
+ struct flow_dissector_key_ports mask;
+ } __aligned(BITS_PER_LONG / 8) masks;
+ unsigned long dummy_cookie = 0xdeadbeef;
+ struct stmmac_packet_attrs attr = { };
+ struct flow_dissector *dissector;
+ struct flow_cls_offload *cls;
+ struct flow_rule *rule;
+ int ret;
+
+ if (!tc_can_offload(priv->dev))
+ return -EOPNOTSUPP;
+ if (!priv->dma_cap.l3l4fnum)
+ return -EOPNOTSUPP;
+ if (priv->rss.enable) {
+ struct stmmac_rss rss = { .enable = false, };
+
+ stmmac_rss_configure(priv, priv->hw, &rss,
+ priv->plat->rx_queues_to_use);
+ }
+
+ dissector = kzalloc(sizeof(*dissector), GFP_KERNEL);
+ if (!dissector) {
+ ret = -ENOMEM;
+ goto cleanup_rss;
+ }
+
+ dissector->used_keys |= (1 << FLOW_DISSECTOR_KEY_BASIC);
+ dissector->used_keys |= (1 << FLOW_DISSECTOR_KEY_PORTS);
+ dissector->offset[FLOW_DISSECTOR_KEY_BASIC] = 0;
+ dissector->offset[FLOW_DISSECTOR_KEY_PORTS] = offsetof(typeof(keys), key);
+
+ cls = kzalloc(sizeof(*cls), GFP_KERNEL);
+ if (!cls) {
+ ret = -ENOMEM;
+ goto cleanup_dissector;
+ }
+
+ cls->common.chain_index = 0;
+ cls->command = FLOW_CLS_REPLACE;
+ cls->cookie = dummy_cookie;
+
+ rule = kzalloc(struct_size(rule, action.entries, 1), GFP_KERNEL);
+ if (!rule) {
+ ret = -ENOMEM;
+ goto cleanup_cls;
+ }
+
+ rule->match.dissector = dissector;
+ rule->match.key = (void *)&keys;
+ rule->match.mask = (void *)&masks;
+
+ keys.bkey.ip_proto = udp ? IPPROTO_UDP : IPPROTO_TCP;
+ keys.key.src = htons(src);
+ keys.key.dst = htons(dst);
+ masks.mask.src = src_mask;
+ masks.mask.dst = dst_mask;
+
+ cls->rule = rule;
+
+ rule->action.entries[0].id = FLOW_ACTION_DROP;
+ rule->action.num_entries = 1;
+
+ attr.dst = priv->dev->dev_addr;
+ attr.tcp = !udp;
+ attr.sport = src;
+ attr.dport = dst;
+ attr.ip_dst = 0;
+
+ /* Shall receive packet */
+ ret = __stmmac_test_loopback(priv, &attr);
+ if (ret)
+ goto cleanup_rule;
+
+ ret = stmmac_tc_setup_cls(priv, priv, cls);
+ if (ret)
+ goto cleanup_rule;
+
+ /* Shall NOT receive packet */
+ ret = __stmmac_test_loopback(priv, &attr);
+ ret = ret ? 0 : -EINVAL;
+
+ cls->command = FLOW_CLS_DESTROY;
+ stmmac_tc_setup_cls(priv, priv, cls);
+cleanup_rule:
+ kfree(rule);
+cleanup_cls:
+ kfree(cls);
+cleanup_dissector:
+ kfree(dissector);
+cleanup_rss:
+ if (priv->rss.enable) {
+ stmmac_rss_configure(priv, priv->hw, &priv->rss,
+ priv->plat->rx_queues_to_use);
+ }
+
+ return ret;
+}
+#else
+static int __stmmac_test_l4filt(struct stmmac_priv *priv, u32 dst, u32 src,
+ u32 dst_mask, u32 src_mask, bool udp)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+static int stmmac_test_l4filt_da_tcp(struct stmmac_priv *priv)
+{
+ u16 dummy_port = 0x123;
+
+ return __stmmac_test_l4filt(priv, dummy_port, 0, ~0, 0, false);
+}
+
+static int stmmac_test_l4filt_sa_tcp(struct stmmac_priv *priv)
+{
+ u16 dummy_port = 0x123;
+
+ return __stmmac_test_l4filt(priv, 0, dummy_port, 0, ~0, false);
+}
+
+static int stmmac_test_l4filt_da_udp(struct stmmac_priv *priv)
+{
+ u16 dummy_port = 0x123;
+
+ return __stmmac_test_l4filt(priv, dummy_port, 0, ~0, 0, true);
+}
+
+static int stmmac_test_l4filt_sa_udp(struct stmmac_priv *priv)
+{
+ u16 dummy_port = 0x123;
+
+ return __stmmac_test_l4filt(priv, 0, dummy_port, 0, ~0, true);
+}
+
+static int stmmac_test_arp_validate(struct sk_buff *skb,
+ struct net_device *ndev,
+ struct packet_type *pt,
+ struct net_device *orig_ndev)
+{
+ struct stmmac_test_priv *tpriv = pt->af_packet_priv;
+ struct ethhdr *ehdr;
+ struct arphdr *ahdr;
+
+ ehdr = (struct ethhdr *)skb_mac_header(skb);
+ if (!ether_addr_equal(ehdr->h_dest, tpriv->packet->src))
+ goto out;
+
+ ahdr = arp_hdr(skb);
+ if (ahdr->ar_op != htons(ARPOP_REPLY))
+ goto out;
+
+ tpriv->ok = true;
+ complete(&tpriv->comp);
+out:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int stmmac_test_arpoffload(struct stmmac_priv *priv)
+{
+ unsigned char src[ETH_ALEN] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06};
+ unsigned char dst[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+ struct stmmac_packet_attrs attr = { };
+ struct stmmac_test_priv *tpriv;
+ struct sk_buff *skb = NULL;
+ u32 ip_addr = 0xdeadcafe;
+ u32 ip_src = 0xdeadbeef;
+ int ret;
+
+ if (!priv->dma_cap.arpoffsel)
+ return -EOPNOTSUPP;
+
+ tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
+ if (!tpriv)
+ return -ENOMEM;
+
+ tpriv->ok = false;
+ init_completion(&tpriv->comp);
+
+ tpriv->pt.type = htons(ETH_P_ARP);
+ tpriv->pt.func = stmmac_test_arp_validate;
+ tpriv->pt.dev = priv->dev;
+ tpriv->pt.af_packet_priv = tpriv;
+ tpriv->packet = &attr;
+ dev_add_pack(&tpriv->pt);
+
+ attr.src = src;
+ attr.ip_src = ip_src;
+ attr.dst = dst;
+ attr.ip_dst = ip_addr;
+
+ skb = stmmac_test_get_arp_skb(priv, &attr);
+ if (!skb) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ ret = stmmac_set_arp_offload(priv, priv->hw, true, ip_addr);
+ if (ret)
+ goto cleanup;
+
+ ret = dev_set_promiscuity(priv->dev, 1);
+ if (ret)
+ goto cleanup;
+
+ skb_set_queue_mapping(skb, 0);
+ ret = dev_queue_xmit(skb);
+ if (ret)
+ goto cleanup_promisc;
+
+ wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT);
+ ret = tpriv->ok ? 0 : -ETIMEDOUT;
+
+cleanup_promisc:
+ dev_set_promiscuity(priv->dev, -1);
+cleanup:
+ stmmac_set_arp_offload(priv, priv->hw, false, 0x0);
+ dev_remove_pack(&tpriv->pt);
+ kfree(tpriv);
+ return ret;
+}
+
+static int __stmmac_test_jumbo(struct stmmac_priv *priv, u16 queue)
+{
+ struct stmmac_packet_attrs attr = { };
+ int size = priv->dma_buf_sz;
+
+ /* Only XGMAC has SW support for multiple RX descs in same packet */
+ if (priv->plat->has_xgmac)
+ size = priv->dev->max_mtu;
+
+ attr.dst = priv->dev->dev_addr;
+ attr.max_size = size - ETH_FCS_LEN;
+ attr.queue_mapping = queue;
+
+ return __stmmac_test_loopback(priv, &attr);
+}
+
+static int stmmac_test_jumbo(struct stmmac_priv *priv)
+{
+ return __stmmac_test_jumbo(priv, 0);
+}
+
+static int stmmac_test_mjumbo(struct stmmac_priv *priv)
+{
+ u32 chan, tx_cnt = priv->plat->tx_queues_to_use;
+ int ret;
+
+ if (tx_cnt <= 1)
+ return -EOPNOTSUPP;
+
+ for (chan = 0; chan < tx_cnt; chan++) {
+ ret = __stmmac_test_jumbo(priv, chan);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
#define STMMAC_LOOPBACK_NONE 0
#define STMMAC_LOOPBACK_MAC 1
#define STMMAC_LOOPBACK_PHY 2
@@ -1253,6 +1670,42 @@ static const struct stmmac_test {
.name = "SVLAN TX Insertion ",
.lb = STMMAC_LOOPBACK_PHY,
.fn = stmmac_test_svlanoff,
+ }, {
+ .name = "L3 DA Filtering ",
+ .lb = STMMAC_LOOPBACK_PHY,
+ .fn = stmmac_test_l3filt_da,
+ }, {
+ .name = "L3 SA Filtering ",
+ .lb = STMMAC_LOOPBACK_PHY,
+ .fn = stmmac_test_l3filt_sa,
+ }, {
+ .name = "L4 DA TCP Filtering ",
+ .lb = STMMAC_LOOPBACK_PHY,
+ .fn = stmmac_test_l4filt_da_tcp,
+ }, {
+ .name = "L4 SA TCP Filtering ",
+ .lb = STMMAC_LOOPBACK_PHY,
+ .fn = stmmac_test_l4filt_sa_tcp,
+ }, {
+ .name = "L4 DA UDP Filtering ",
+ .lb = STMMAC_LOOPBACK_PHY,
+ .fn = stmmac_test_l4filt_da_udp,
+ }, {
+ .name = "L4 SA UDP Filtering ",
+ .lb = STMMAC_LOOPBACK_PHY,
+ .fn = stmmac_test_l4filt_sa_udp,
+ }, {
+ .name = "ARP Offload ",
+ .lb = STMMAC_LOOPBACK_PHY,
+ .fn = stmmac_test_arpoffload,
+ }, {
+ .name = "Jumbo Frame ",
+ .lb = STMMAC_LOOPBACK_PHY,
+ .fn = stmmac_test_jumbo,
+ }, {
+ .name = "Multichannel Jumbo ",
+ .lb = STMMAC_LOOPBACK_PHY,
+ .fn = stmmac_test_mjumbo,
},
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index 6c305b6ecad0..e231098061b6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -242,9 +242,27 @@ static int tc_init(struct stmmac_priv *priv)
{
struct dma_features *dma_cap = &priv->dma_cap;
unsigned int count;
+ int i;
+
+ if (dma_cap->l3l4fnum) {
+ priv->flow_entries_max = dma_cap->l3l4fnum;
+ priv->flow_entries = devm_kcalloc(priv->device,
+ dma_cap->l3l4fnum,
+ sizeof(*priv->flow_entries),
+ GFP_KERNEL);
+ if (!priv->flow_entries)
+ return -ENOMEM;
+ for (i = 0; i < priv->flow_entries_max; i++)
+ priv->flow_entries[i].idx = i;
+
+ dev_info(priv->device, "Enabled Flow TC (entries=%d)\n",
+ priv->flow_entries_max);
+ }
+
+ /* Fail silently as we can still use remaining features, e.g. CBS */
if (!dma_cap->frpsel)
- return -EINVAL;
+ return 0;
switch (dma_cap->frpbs) {
case 0x0:
@@ -349,8 +367,235 @@ static int tc_setup_cbs(struct stmmac_priv *priv,
return 0;
}
+static int tc_parse_flow_actions(struct stmmac_priv *priv,
+ struct flow_action *action,
+ struct stmmac_flow_entry *entry)
+{
+ struct flow_action_entry *act;
+ int i;
+
+ if (!flow_action_has_entries(action))
+ return -EINVAL;
+
+ flow_action_for_each(i, act, action) {
+ switch (act->id) {
+ case FLOW_ACTION_DROP:
+ entry->action |= STMMAC_FLOW_ACTION_DROP;
+ return 0;
+ default:
+ break;
+ }
+ }
+
+ /* Nothing to do, maybe inverse filter ? */
+ return 0;
+}
+
+static int tc_add_basic_flow(struct stmmac_priv *priv,
+ struct flow_cls_offload *cls,
+ struct stmmac_flow_entry *entry)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+ struct flow_dissector *dissector = rule->match.dissector;
+ struct flow_match_basic match;
+
+ /* Nothing to do here */
+ if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_BASIC))
+ return -EINVAL;
+
+ flow_rule_match_basic(rule, &match);
+ entry->ip_proto = match.key->ip_proto;
+ return 0;
+}
+
+static int tc_add_ip4_flow(struct stmmac_priv *priv,
+ struct flow_cls_offload *cls,
+ struct stmmac_flow_entry *entry)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+ struct flow_dissector *dissector = rule->match.dissector;
+ bool inv = entry->action & STMMAC_FLOW_ACTION_DROP;
+ struct flow_match_ipv4_addrs match;
+ u32 hw_match;
+ int ret;
+
+ /* Nothing to do here */
+ if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS))
+ return -EINVAL;
+
+ flow_rule_match_ipv4_addrs(rule, &match);
+ hw_match = ntohl(match.key->src) & ntohl(match.mask->src);
+ if (hw_match) {
+ ret = stmmac_config_l3_filter(priv, priv->hw, entry->idx, true,
+ false, true, inv, hw_match);
+ if (ret)
+ return ret;
+ }
+
+ hw_match = ntohl(match.key->dst) & ntohl(match.mask->dst);
+ if (hw_match) {
+ ret = stmmac_config_l3_filter(priv, priv->hw, entry->idx, true,
+ false, false, inv, hw_match);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int tc_add_ports_flow(struct stmmac_priv *priv,
+ struct flow_cls_offload *cls,
+ struct stmmac_flow_entry *entry)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+ struct flow_dissector *dissector = rule->match.dissector;
+ bool inv = entry->action & STMMAC_FLOW_ACTION_DROP;
+ struct flow_match_ports match;
+ u32 hw_match;
+ bool is_udp;
+ int ret;
+
+ /* Nothing to do here */
+ if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_PORTS))
+ return -EINVAL;
+
+ switch (entry->ip_proto) {
+ case IPPROTO_TCP:
+ is_udp = false;
+ break;
+ case IPPROTO_UDP:
+ is_udp = true;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ flow_rule_match_ports(rule, &match);
+
+ hw_match = ntohs(match.key->src) & ntohs(match.mask->src);
+ if (hw_match) {
+ ret = stmmac_config_l4_filter(priv, priv->hw, entry->idx, true,
+ is_udp, true, inv, hw_match);
+ if (ret)
+ return ret;
+ }
+
+ hw_match = ntohs(match.key->dst) & ntohs(match.mask->dst);
+ if (hw_match) {
+ ret = stmmac_config_l4_filter(priv, priv->hw, entry->idx, true,
+ is_udp, false, inv, hw_match);
+ if (ret)
+ return ret;
+ }
+
+ entry->is_l4 = true;
+ return 0;
+}
+
+static struct stmmac_flow_entry *tc_find_flow(struct stmmac_priv *priv,
+ struct flow_cls_offload *cls,
+ bool get_free)
+{
+ int i;
+
+ for (i = 0; i < priv->flow_entries_max; i++) {
+ struct stmmac_flow_entry *entry = &priv->flow_entries[i];
+
+ if (entry->cookie == cls->cookie)
+ return entry;
+ if (get_free && (entry->in_use == false))
+ return entry;
+ }
+
+ return NULL;
+}
+
+struct {
+ int (*fn)(struct stmmac_priv *priv, struct flow_cls_offload *cls,
+ struct stmmac_flow_entry *entry);
+} tc_flow_parsers[] = {
+ { .fn = tc_add_basic_flow },
+ { .fn = tc_add_ip4_flow },
+ { .fn = tc_add_ports_flow },
+};
+
+static int tc_add_flow(struct stmmac_priv *priv,
+ struct flow_cls_offload *cls)
+{
+ struct stmmac_flow_entry *entry = tc_find_flow(priv, cls, false);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+ int i, ret;
+
+ if (!entry) {
+ entry = tc_find_flow(priv, cls, true);
+ if (!entry)
+ return -ENOENT;
+ }
+
+ ret = tc_parse_flow_actions(priv, &rule->action, entry);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < ARRAY_SIZE(tc_flow_parsers); i++) {
+ ret = tc_flow_parsers[i].fn(priv, cls, entry);
+ if (!ret) {
+ entry->in_use = true;
+ continue;
+ }
+ }
+
+ if (!entry->in_use)
+ return -EINVAL;
+
+ entry->cookie = cls->cookie;
+ return 0;
+}
+
+static int tc_del_flow(struct stmmac_priv *priv,
+ struct flow_cls_offload *cls)
+{
+ struct stmmac_flow_entry *entry = tc_find_flow(priv, cls, false);
+ int ret;
+
+ if (!entry || !entry->in_use)
+ return -ENOENT;
+
+ if (entry->is_l4) {
+ ret = stmmac_config_l4_filter(priv, priv->hw, entry->idx, false,
+ false, false, false, 0);
+ } else {
+ ret = stmmac_config_l3_filter(priv, priv->hw, entry->idx, false,
+ false, false, false, 0);
+ }
+
+ entry->in_use = false;
+ entry->cookie = 0;
+ entry->is_l4 = false;
+ return ret;
+}
+
+static int tc_setup_cls(struct stmmac_priv *priv,
+ struct flow_cls_offload *cls)
+{
+ int ret = 0;
+
+ switch (cls->command) {
+ case FLOW_CLS_REPLACE:
+ ret = tc_add_flow(priv, cls);
+ break;
+ case FLOW_CLS_DESTROY:
+ ret = tc_del_flow(priv, cls);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
const struct stmmac_tc_ops dwmac510_tc_ops = {
.init = tc_init,
.setup_cls_u32 = tc_setup_cls_u32,
.setup_cbs = tc_setup_cbs,
+ .setup_cls = tc_setup_cls,
};
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index cb2ea8facd8d..3ab24fdccd3b 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1345,7 +1345,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
info->key.u.ipv4.dst =
nla_get_in_addr(data[IFLA_GENEVE_REMOTE]);
- if (IN_MULTICAST(ntohl(info->key.u.ipv4.dst))) {
+ if (ipv4_is_multicast(info->key.u.ipv4.dst)) {
NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE],
"Remote IPv4 address cannot be Multicast");
return -EINVAL;
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 35d29a823af8..7c92afd36bbe 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -525,6 +525,12 @@ static int phy_check_link_status(struct phy_device *phydev)
WARN_ON(!mutex_is_locked(&phydev->lock));
+ /* Keep previous state if loopback is enabled because some PHYs
+ * report that Link is Down when loopback is enabled.
+ */
+ if (phydev->loopback_enabled)
+ return 0;
+
err = phy_read_status(phydev);
if (err)
return err;
diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c
index 2d1449345959..151c2a3f0b3a 100644
--- a/drivers/net/phy/xilinx_gmii2rgmii.c
+++ b/drivers/net/phy/xilinx_gmii2rgmii.c
@@ -29,7 +29,7 @@ struct gmii2rgmii {
static int xgmiitorgmii_read_status(struct phy_device *phydev)
{
- struct gmii2rgmii *priv = phydev->priv;
+ struct gmii2rgmii *priv = mdiodev_get_drvdata(&phydev->mdio);
struct mii_bus *bus = priv->mdio->bus;
int addr = priv->mdio->addr;
u16 val = 0;
@@ -90,7 +90,7 @@ static int xgmiitorgmii_probe(struct mdio_device *mdiodev)
memcpy(&priv->conv_phy_drv, priv->phy_dev->drv,
sizeof(struct phy_driver));
priv->conv_phy_drv.read_status = xgmiitorgmii_read_status;
- priv->phy_dev->priv = priv;
+ mdiodev_set_drvdata(&priv->phy_dev->mdio, priv);
priv->phy_dev->drv = &priv->conv_phy_drv;
return 0;
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 778d27d1fb15..08726090570e 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -445,18 +445,18 @@
#define UPS_FLAGS_250M_CKDIV BIT(2)
#define UPS_FLAGS_EN_ALDPS BIT(3)
#define UPS_FLAGS_CTAP_SHORT_DIS BIT(4)
-#define UPS_FLAGS_SPEED_MASK (0xf << 16)
#define ups_flags_speed(x) ((x) << 16)
#define UPS_FLAGS_EN_EEE BIT(20)
#define UPS_FLAGS_EN_500M_EEE BIT(21)
#define UPS_FLAGS_EN_EEE_CKDIV BIT(22)
+#define UPS_FLAGS_EEE_PLLOFF_100 BIT(23)
#define UPS_FLAGS_EEE_PLLOFF_GIGA BIT(24)
#define UPS_FLAGS_EEE_CMOD_LV_EN BIT(25)
#define UPS_FLAGS_EN_GREEN BIT(26)
#define UPS_FLAGS_EN_FLOW_CTR BIT(27)
enum spd_duplex {
- NWAY_10M_HALF = 1,
+ NWAY_10M_HALF,
NWAY_10M_FULL,
NWAY_100M_HALF,
NWAY_100M_FULL,
@@ -749,6 +749,23 @@ struct r8152 {
void (*autosuspend_en)(struct r8152 *tp, bool enable);
} rtl_ops;
+ struct ups_info {
+ u32 _10m_ckdiv:1;
+ u32 _250m_ckdiv:1;
+ u32 aldps:1;
+ u32 lite_mode:2;
+ u32 speed_duplex:4;
+ u32 eee:1;
+ u32 eee_lite:1;
+ u32 eee_ckdiv:1;
+ u32 eee_plloff_100:1;
+ u32 eee_plloff_giga:1;
+ u32 eee_cmod_lv:1;
+ u32 green:1;
+ u32 flow_control:1;
+ u32 ctap_short_off:1;
+ } ups_info;
+
atomic_t rx_count;
bool eee_en;
@@ -757,6 +774,7 @@ struct r8152 {
u32 msg_enable;
u32 tx_qlen;
u32 coalesce;
+ u32 advertising;
u32 rx_buf_sz;
u32 rx_copybreak;
u32 rx_pending;
@@ -790,6 +808,13 @@ enum tx_csum_stat {
TX_CSUM_NONE
};
+#define RTL_ADVERTISED_10_HALF BIT(0)
+#define RTL_ADVERTISED_10_FULL BIT(1)
+#define RTL_ADVERTISED_100_HALF BIT(2)
+#define RTL_ADVERTISED_100_FULL BIT(3)
+#define RTL_ADVERTISED_1000_HALF BIT(4)
+#define RTL_ADVERTISED_1000_FULL BIT(5)
+
/* Maximum number of multicast addresses to filter (vs. Rx-all-multicast).
* The RTL chips use a 64 element hash table based on the Ethernet CRC.
*/
@@ -2857,14 +2882,76 @@ static void r8153_u2p3en(struct r8152 *tp, bool enable)
ocp_write_word(tp, MCU_TYPE_USB, USB_U2P3_CTRL, ocp_data);
}
-static void r8153b_ups_flags_w1w0(struct r8152 *tp, u32 set, u32 clear)
+static void r8153b_ups_flags(struct r8152 *tp)
{
- u32 ocp_data;
+ u32 ups_flags = 0;
+
+ if (tp->ups_info.green)
+ ups_flags |= UPS_FLAGS_EN_GREEN;
+
+ if (tp->ups_info.aldps)
+ ups_flags |= UPS_FLAGS_EN_ALDPS;
+
+ if (tp->ups_info.eee)
+ ups_flags |= UPS_FLAGS_EN_EEE;
+
+ if (tp->ups_info.flow_control)
+ ups_flags |= UPS_FLAGS_EN_FLOW_CTR;
+
+ if (tp->ups_info.eee_ckdiv)
+ ups_flags |= UPS_FLAGS_EN_EEE_CKDIV;
+
+ if (tp->ups_info.eee_cmod_lv)
+ ups_flags |= UPS_FLAGS_EEE_CMOD_LV_EN;
+
+ if (tp->ups_info._10m_ckdiv)
+ ups_flags |= UPS_FLAGS_EN_10M_CKDIV;
+
+ if (tp->ups_info.eee_plloff_100)
+ ups_flags |= UPS_FLAGS_EEE_PLLOFF_100;
+
+ if (tp->ups_info.eee_plloff_giga)
+ ups_flags |= UPS_FLAGS_EEE_PLLOFF_GIGA;
+
+ if (tp->ups_info._250m_ckdiv)
+ ups_flags |= UPS_FLAGS_250M_CKDIV;
+
+ if (tp->ups_info.ctap_short_off)
+ ups_flags |= UPS_FLAGS_CTAP_SHORT_DIS;
+
+ switch (tp->ups_info.speed_duplex) {
+ case NWAY_10M_HALF:
+ ups_flags |= ups_flags_speed(1);
+ break;
+ case NWAY_10M_FULL:
+ ups_flags |= ups_flags_speed(2);
+ break;
+ case NWAY_100M_HALF:
+ ups_flags |= ups_flags_speed(3);
+ break;
+ case NWAY_100M_FULL:
+ ups_flags |= ups_flags_speed(4);
+ break;
+ case NWAY_1000M_FULL:
+ ups_flags |= ups_flags_speed(5);
+ break;
+ case FORCE_10M_HALF:
+ ups_flags |= ups_flags_speed(6);
+ break;
+ case FORCE_10M_FULL:
+ ups_flags |= ups_flags_speed(7);
+ break;
+ case FORCE_100M_HALF:
+ ups_flags |= ups_flags_speed(8);
+ break;
+ case FORCE_100M_FULL:
+ ups_flags |= ups_flags_speed(9);
+ break;
+ default:
+ break;
+ }
- ocp_data = ocp_read_dword(tp, MCU_TYPE_USB, USB_UPS_FLAGS);
- ocp_data &= ~clear;
- ocp_data |= set;
- ocp_write_dword(tp, MCU_TYPE_USB, USB_UPS_FLAGS, ocp_data);
+ ocp_write_dword(tp, MCU_TYPE_USB, USB_UPS_FLAGS, ups_flags);
}
static void r8153b_green_en(struct r8152 *tp, bool enable)
@@ -2885,7 +2972,7 @@ static void r8153b_green_en(struct r8152 *tp, bool enable)
data |= GREEN_ETH_EN;
sram_write(tp, SRAM_GREEN_CFG, data);
- r8153b_ups_flags_w1w0(tp, UPS_FLAGS_EN_GREEN, 0);
+ tp->ups_info.green = enable;
}
static u16 r8153_phy_status(struct r8152 *tp, u16 desired)
@@ -2915,6 +3002,8 @@ static void r8153b_ups_en(struct r8152 *tp, bool enable)
u32 ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_POWER_CUT);
if (enable) {
+ r8153b_ups_flags(tp);
+
ocp_data |= UPS_EN | USP_PREWAKE | PHASE2_EN;
ocp_write_byte(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data);
@@ -3223,16 +3312,8 @@ static void r8153_eee_en(struct r8152 *tp, bool enable)
ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
ocp_reg_write(tp, OCP_EEE_CFG, config);
-}
-
-static void r8153b_eee_en(struct r8152 *tp, bool enable)
-{
- r8153_eee_en(tp, enable);
- if (enable)
- r8153b_ups_flags_w1w0(tp, UPS_FLAGS_EN_EEE, 0);
- else
- r8153b_ups_flags_w1w0(tp, 0, UPS_FLAGS_EN_EEE);
+ tp->ups_info.eee = enable;
}
static void rtl_eee_enable(struct r8152 *tp, bool enable)
@@ -3254,21 +3335,13 @@ static void rtl_eee_enable(struct r8152 *tp, bool enable)
case RTL_VER_04:
case RTL_VER_05:
case RTL_VER_06:
- if (enable) {
- r8153_eee_en(tp, true);
- ocp_reg_write(tp, OCP_EEE_ADV, tp->eee_adv);
- } else {
- r8153_eee_en(tp, false);
- ocp_reg_write(tp, OCP_EEE_ADV, 0);
- }
- break;
case RTL_VER_08:
case RTL_VER_09:
if (enable) {
- r8153b_eee_en(tp, true);
+ r8153_eee_en(tp, true);
ocp_reg_write(tp, OCP_EEE_ADV, tp->eee_adv);
} else {
- r8153b_eee_en(tp, false);
+ r8153_eee_en(tp, false);
ocp_reg_write(tp, OCP_EEE_ADV, 0);
}
break;
@@ -3284,6 +3357,8 @@ static void r8152b_enable_fc(struct r8152 *tp)
anar = r8152_mdio_read(tp, MII_ADVERTISE);
anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
r8152_mdio_write(tp, MII_ADVERTISE, anar);
+
+ tp->ups_info.flow_control = true;
}
static void rtl8152_disable(struct r8152 *tp)
@@ -3477,22 +3552,8 @@ static void r8153_aldps_en(struct r8152 *tp, bool enable)
break;
}
}
-}
-
-static void r8153b_aldps_en(struct r8152 *tp, bool enable)
-{
- r8153_aldps_en(tp, enable);
- if (enable)
- r8153b_ups_flags_w1w0(tp, UPS_FLAGS_EN_ALDPS, 0);
- else
- r8153b_ups_flags_w1w0(tp, 0, UPS_FLAGS_EN_ALDPS);
-}
-
-static void r8153b_enable_fc(struct r8152 *tp)
-{
- r8152b_enable_fc(tp);
- r8153b_ups_flags_w1w0(tp, UPS_FLAGS_EN_FLOW_CTR, 0);
+ tp->ups_info.aldps = enable;
}
static void r8153_hw_phy_cfg(struct r8152 *tp)
@@ -3569,11 +3630,11 @@ static u32 r8152_efuse_read(struct r8152 *tp, u8 addr)
static void r8153b_hw_phy_cfg(struct r8152 *tp)
{
- u32 ocp_data, ups_flags = 0;
+ u32 ocp_data;
u16 data;
/* disable ALDPS before updating the PHY parameters */
- r8153b_aldps_en(tp, false);
+ r8153_aldps_en(tp, false);
/* disable EEE before updating the PHY parameters */
rtl_eee_enable(tp, false);
@@ -3621,28 +3682,27 @@ static void r8153b_hw_phy_cfg(struct r8152 *tp)
data = ocp_reg_read(tp, OCP_POWER_CFG);
data |= EEE_CLKDIV_EN;
ocp_reg_write(tp, OCP_POWER_CFG, data);
+ tp->ups_info.eee_ckdiv = true;
data = ocp_reg_read(tp, OCP_DOWN_SPEED);
data |= EN_EEE_CMODE | EN_EEE_1000 | EN_10M_CLKDIV;
ocp_reg_write(tp, OCP_DOWN_SPEED, data);
+ tp->ups_info.eee_cmod_lv = true;
+ tp->ups_info._10m_ckdiv = true;
+ tp->ups_info.eee_plloff_giga = true;
ocp_reg_write(tp, OCP_SYSCLK_CFG, 0);
ocp_reg_write(tp, OCP_SYSCLK_CFG, clk_div_expo(5));
-
- ups_flags |= UPS_FLAGS_EN_10M_CKDIV | UPS_FLAGS_250M_CKDIV |
- UPS_FLAGS_EN_EEE_CKDIV | UPS_FLAGS_EEE_CMOD_LV_EN |
- UPS_FLAGS_EEE_PLLOFF_GIGA;
+ tp->ups_info._250m_ckdiv = true;
r8153_patch_request(tp, false);
}
- r8153b_ups_flags_w1w0(tp, ups_flags, 0);
-
if (tp->eee_en)
rtl_eee_enable(tp, true);
- r8153b_aldps_en(tp, true);
- r8153b_enable_fc(tp);
+ r8153_aldps_en(tp, true);
+ r8152b_enable_fc(tp);
r8153_u2p3en(tp, true);
set_bit(PHY_RESET, &tp->flags);
@@ -3793,111 +3853,118 @@ static void rtl8153_disable(struct r8152 *tp)
r8153_aldps_en(tp, true);
}
-static void rtl8153b_disable(struct r8152 *tp)
+static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u32 speed, u8 duplex,
+ u32 advertising)
{
- r8153b_aldps_en(tp, false);
- rtl_disable(tp);
- rtl_reset_bmu(tp);
- r8153b_aldps_en(tp, true);
-}
-
-static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u16 speed, u8 duplex)
-{
- u16 bmcr, anar, gbcr;
- enum spd_duplex speed_duplex;
+ u16 bmcr;
int ret = 0;
- anar = r8152_mdio_read(tp, MII_ADVERTISE);
- anar &= ~(ADVERTISE_10HALF | ADVERTISE_10FULL |
- ADVERTISE_100HALF | ADVERTISE_100FULL);
- if (tp->mii.supports_gmii) {
- gbcr = r8152_mdio_read(tp, MII_CTRL1000);
- gbcr &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF);
- } else {
- gbcr = 0;
- }
-
if (autoneg == AUTONEG_DISABLE) {
- if (speed == SPEED_10) {
- bmcr = 0;
- anar |= ADVERTISE_10HALF | ADVERTISE_10FULL;
- speed_duplex = FORCE_10M_HALF;
- } else if (speed == SPEED_100) {
- bmcr = BMCR_SPEED100;
- anar |= ADVERTISE_100HALF | ADVERTISE_100FULL;
- speed_duplex = FORCE_100M_HALF;
- } else if (speed == SPEED_1000 && tp->mii.supports_gmii) {
- bmcr = BMCR_SPEED1000;
- gbcr |= ADVERTISE_1000FULL | ADVERTISE_1000HALF;
- speed_duplex = NWAY_1000M_FULL;
- } else {
- ret = -EINVAL;
- goto out;
- }
+ if (duplex != DUPLEX_HALF && duplex != DUPLEX_FULL)
+ return -EINVAL;
- if (duplex == DUPLEX_FULL) {
- bmcr |= BMCR_FULLDPLX;
- if (speed != SPEED_1000)
- speed_duplex++;
- }
- } else {
- if (speed == SPEED_10) {
+ switch (speed) {
+ case SPEED_10:
+ bmcr = BMCR_SPEED10;
if (duplex == DUPLEX_FULL) {
- anar |= ADVERTISE_10HALF | ADVERTISE_10FULL;
- speed_duplex = NWAY_10M_FULL;
+ bmcr |= BMCR_FULLDPLX;
+ tp->ups_info.speed_duplex = FORCE_10M_FULL;
} else {
- anar |= ADVERTISE_10HALF;
- speed_duplex = NWAY_10M_HALF;
+ tp->ups_info.speed_duplex = FORCE_10M_HALF;
}
- } else if (speed == SPEED_100) {
+ break;
+ case SPEED_100:
+ bmcr = BMCR_SPEED100;
if (duplex == DUPLEX_FULL) {
- anar |= ADVERTISE_10HALF | ADVERTISE_10FULL;
- anar |= ADVERTISE_100HALF | ADVERTISE_100FULL;
- speed_duplex = NWAY_100M_FULL;
+ bmcr |= BMCR_FULLDPLX;
+ tp->ups_info.speed_duplex = FORCE_100M_FULL;
} else {
- anar |= ADVERTISE_10HALF;
- anar |= ADVERTISE_100HALF;
- speed_duplex = NWAY_100M_HALF;
+ tp->ups_info.speed_duplex = FORCE_100M_HALF;
}
- } else if (speed == SPEED_1000 && tp->mii.supports_gmii) {
- if (duplex == DUPLEX_FULL) {
- anar |= ADVERTISE_10HALF | ADVERTISE_10FULL;
- anar |= ADVERTISE_100HALF | ADVERTISE_100FULL;
- gbcr |= ADVERTISE_1000FULL | ADVERTISE_1000HALF;
- } else {
- anar |= ADVERTISE_10HALF;
- anar |= ADVERTISE_100HALF;
- gbcr |= ADVERTISE_1000HALF;
+ break;
+ case SPEED_1000:
+ if (tp->mii.supports_gmii) {
+ bmcr = BMCR_SPEED1000 | BMCR_FULLDPLX;
+ tp->ups_info.speed_duplex = NWAY_1000M_FULL;
+ break;
}
- speed_duplex = NWAY_1000M_FULL;
- } else {
+ /* fall through */
+ default:
ret = -EINVAL;
goto out;
}
+ if (duplex == DUPLEX_FULL)
+ tp->mii.full_duplex = 1;
+ else
+ tp->mii.full_duplex = 0;
+
+ tp->mii.force_media = 1;
+ } else {
+ u16 anar, tmp1;
+ u32 support;
+
+ support = RTL_ADVERTISED_10_HALF | RTL_ADVERTISED_10_FULL |
+ RTL_ADVERTISED_100_HALF | RTL_ADVERTISED_100_FULL;
+
+ if (tp->mii.supports_gmii)
+ support |= RTL_ADVERTISED_1000_FULL;
+
+ if (!(advertising & support))
+ return -EINVAL;
+
+ anar = r8152_mdio_read(tp, MII_ADVERTISE);
+ tmp1 = anar & ~(ADVERTISE_10HALF | ADVERTISE_10FULL |
+ ADVERTISE_100HALF | ADVERTISE_100FULL);
+ if (advertising & RTL_ADVERTISED_10_HALF) {
+ tmp1 |= ADVERTISE_10HALF;
+ tp->ups_info.speed_duplex = NWAY_10M_HALF;
+ }
+ if (advertising & RTL_ADVERTISED_10_FULL) {
+ tmp1 |= ADVERTISE_10FULL;
+ tp->ups_info.speed_duplex = NWAY_10M_FULL;
+ }
+
+ if (advertising & RTL_ADVERTISED_100_HALF) {
+ tmp1 |= ADVERTISE_100HALF;
+ tp->ups_info.speed_duplex = NWAY_100M_HALF;
+ }
+ if (advertising & RTL_ADVERTISED_100_FULL) {
+ tmp1 |= ADVERTISE_100FULL;
+ tp->ups_info.speed_duplex = NWAY_100M_FULL;
+ }
+
+ if (anar != tmp1) {
+ r8152_mdio_write(tp, MII_ADVERTISE, tmp1);
+ tp->mii.advertising = tmp1;
+ }
+
+ if (tp->mii.supports_gmii) {
+ u16 gbcr;
+
+ gbcr = r8152_mdio_read(tp, MII_CTRL1000);
+ tmp1 = gbcr & ~(ADVERTISE_1000FULL |
+ ADVERTISE_1000HALF);
+
+ if (advertising & RTL_ADVERTISED_1000_FULL) {
+ tmp1 |= ADVERTISE_1000FULL;
+ tp->ups_info.speed_duplex = NWAY_1000M_FULL;
+ }
+
+ if (gbcr != tmp1)
+ r8152_mdio_write(tp, MII_CTRL1000, tmp1);
+ }
+
bmcr = BMCR_ANENABLE | BMCR_ANRESTART;
+
+ tp->mii.force_media = 0;
}
if (test_and_clear_bit(PHY_RESET, &tp->flags))
bmcr |= BMCR_RESET;
- if (tp->mii.supports_gmii)
- r8152_mdio_write(tp, MII_CTRL1000, gbcr);
-
- r8152_mdio_write(tp, MII_ADVERTISE, anar);
r8152_mdio_write(tp, MII_BMCR, bmcr);
- switch (tp->version) {
- case RTL_VER_08:
- case RTL_VER_09:
- r8153b_ups_flags_w1w0(tp, ups_flags_speed(speed_duplex),
- UPS_FLAGS_SPEED_MASK);
- break;
-
- default:
- break;
- }
-
if (bmcr & BMCR_RESET) {
int i;
@@ -3982,12 +4049,12 @@ static void rtl8153b_up(struct r8152 *tp)
r8153b_u1u2en(tp, false);
r8153_u2p3en(tp, false);
- r8153b_aldps_en(tp, false);
+ r8153_aldps_en(tp, false);
r8153_first_init(tp);
ocp_write_dword(tp, MCU_TYPE_USB, USB_RX_BUF_TH, RX_THR_B);
- r8153b_aldps_en(tp, true);
+ r8153_aldps_en(tp, true);
r8153_u2p3en(tp, true);
r8153b_u1u2en(tp, true);
}
@@ -4002,9 +4069,9 @@ static void rtl8153b_down(struct r8152 *tp)
r8153b_u1u2en(tp, false);
r8153_u2p3en(tp, false);
r8153b_power_cut_en(tp, false);
- r8153b_aldps_en(tp, false);
+ r8153_aldps_en(tp, false);
r8153_enter_oob(tp);
- r8153b_aldps_en(tp, true);
+ r8153_aldps_en(tp, true);
}
static bool rtl8152_in_nway(struct r8152 *tp)
@@ -4122,7 +4189,8 @@ static void rtl_hw_phy_work_func_t(struct work_struct *work)
tp->rtl_ops.hw_phy_cfg(tp);
- rtl8152_set_speed(tp, tp->autoneg, tp->speed, tp->duplex);
+ rtl8152_set_speed(tp, tp->autoneg, tp->speed, tp->duplex,
+ tp->advertising);
mutex_unlock(&tp->control);
@@ -4840,20 +4908,46 @@ static int rtl8152_set_link_ksettings(struct net_device *dev,
const struct ethtool_link_ksettings *cmd)
{
struct r8152 *tp = netdev_priv(dev);
+ u32 advertising = 0;
int ret;
ret = usb_autopm_get_interface(tp->intf);
if (ret < 0)
goto out;
+ if (test_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT,
+ cmd->link_modes.advertising))
+ advertising |= RTL_ADVERTISED_10_HALF;
+
+ if (test_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT,
+ cmd->link_modes.advertising))
+ advertising |= RTL_ADVERTISED_10_FULL;
+
+ if (test_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT,
+ cmd->link_modes.advertising))
+ advertising |= RTL_ADVERTISED_100_HALF;
+
+ if (test_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+ cmd->link_modes.advertising))
+ advertising |= RTL_ADVERTISED_100_FULL;
+
+ if (test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
+ cmd->link_modes.advertising))
+ advertising |= RTL_ADVERTISED_1000_HALF;
+
+ if (test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+ cmd->link_modes.advertising))
+ advertising |= RTL_ADVERTISED_1000_FULL;
+
mutex_lock(&tp->control);
ret = rtl8152_set_speed(tp, cmd->base.autoneg, cmd->base.speed,
- cmd->base.duplex);
+ cmd->base.duplex, advertising);
if (!ret) {
tp->autoneg = cmd->base.autoneg;
tp->speed = cmd->base.speed;
tp->duplex = cmd->base.duplex;
+ tp->advertising = advertising;
}
mutex_unlock(&tp->control);
@@ -5383,7 +5477,7 @@ static int rtl_ops_init(struct r8152 *tp)
case RTL_VER_09:
ops->init = r8153b_init;
ops->enable = rtl8153_enable;
- ops->disable = rtl8153b_disable;
+ ops->disable = rtl8153_disable;
ops->up = rtl8153b_up;
ops->down = rtl8153b_down;
ops->unload = rtl8153b_unload;
@@ -5568,7 +5662,13 @@ static int rtl8152_probe(struct usb_interface *intf,
tp->mii.phy_id = R8152_PHY_ID;
tp->autoneg = AUTONEG_ENABLE;
- tp->speed = tp->mii.supports_gmii ? SPEED_1000 : SPEED_100;
+ tp->speed = SPEED_100;
+ tp->advertising = RTL_ADVERTISED_10_HALF | RTL_ADVERTISED_10_FULL |
+ RTL_ADVERTISED_100_HALF | RTL_ADVERTISED_100_FULL;
+ if (tp->mii.supports_gmii) {
+ tp->speed = SPEED_1000;
+ tp->advertising |= RTL_ADVERTISED_1000_FULL;
+ }
tp->duplex = DUPLEX_FULL;
tp->rx_copybreak = RTL8152_RXFG_HEADSZ;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f9a506147c8a..5b9d22338606 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -24,6 +24,9 @@ struct seq_file;
struct btf;
struct btf_type;
+extern struct idr btf_idr;
+extern spinlock_t btf_idr_lock;
+
/* map is generic key/value storage optionally accesible by eBPF programs */
struct bpf_map_ops {
/* funcs callable from userspace (via syscall) */
@@ -647,6 +650,8 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
struct bpf_map *__bpf_map_get(struct fd f);
struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
+struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map,
+ bool uref);
void bpf_map_put_with_uref(struct bpf_map *map);
void bpf_map_put(struct bpf_map *map);
int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 5fe99f322b1c..26a6d58ca78c 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -355,6 +355,7 @@ struct bpf_verifier_env {
struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
int stack_size; /* number of states to be processed */
bool strict_alignment; /* perform strict pointer alignment checks */
+ bool test_state_freq; /* test verifier with different pruning frequency */
struct bpf_verifier_state *cur_state; /* current verifier state */
struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
struct bpf_verifier_state_list *free_list;
diff --git a/include/linux/can/can-ml.h b/include/linux/can/can-ml.h
new file mode 100644
index 000000000000..2f5d731ae251
--- /dev/null
+++ b/include/linux/can/can-ml.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/* Copyright (c) 2002-2007 Volkswagen Group Electronic Research
+ * Copyright (c) 2017 Pengutronix, Marc Kleine-Budde <[email protected]>
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Volkswagen nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * The provided data structures and external interfaces from this code
+ * are not restricted to be used by modules with a GPL compatible license.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ */
+
+#ifndef CAN_ML_H
+#define CAN_ML_H
+
+#include <linux/can.h>
+#include <linux/list.h>
+
+#define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS)
+#define CAN_EFF_RCV_HASH_BITS 10
+#define CAN_EFF_RCV_ARRAY_SZ (1 << CAN_EFF_RCV_HASH_BITS)
+
+enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_MAX };
+
+struct can_dev_rcv_lists {
+ struct hlist_head rx[RX_MAX];
+ struct hlist_head rx_sff[CAN_SFF_RCV_ARRAY_SZ];
+ struct hlist_head rx_eff[CAN_EFF_RCV_ARRAY_SZ];
+ int entries;
+};
+
+struct can_ml_priv {
+ struct can_dev_rcv_lists dev_rcv_lists;
+#ifdef CAN_J1939
+ struct j1939_priv *j1939_priv;
+#endif
+};
+
+#endif /* CAN_ML_H */
diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index 708c10d3417a..8339071ab08b 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h
@@ -41,6 +41,14 @@ struct can_proto {
struct proto *prot;
};
+/* required_size
+ * macro to find the minimum size of a struct
+ * that includes a requested member
+ */
+#define CAN_REQUIRED_SIZE(struct_type, member) \
+ (offsetof(typeof(struct_type), member) + \
+ sizeof(((typeof(struct_type) *)(NULL))->member))
+
/* function prototypes for the CAN networklayer core (af_can.c) */
extern int can_proto_register(const struct can_proto *cp);
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index f01623aef2f7..9b3c720a31b1 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -169,7 +169,8 @@ void can_change_state(struct net_device *dev, struct can_frame *cf,
void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
unsigned int idx);
-struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr);
+struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx,
+ u8 *len_ptr);
unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx);
void can_free_echo_skb(struct net_device *dev, unsigned int idx);
diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h
index 9daa1119ea42..01219f2902bf 100644
--- a/include/linux/can/rx-offload.h
+++ b/include/linux/can/rx-offload.h
@@ -15,7 +15,8 @@
struct can_rx_offload {
struct net_device *dev;
- unsigned int (*mailbox_read)(struct can_rx_offload *offload, struct can_frame *cf,
+ unsigned int (*mailbox_read)(struct can_rx_offload *offload,
+ struct can_frame *cf,
u32 *timestamp, unsigned int mb);
struct sk_buff_head skb_queue;
@@ -29,9 +30,13 @@ struct can_rx_offload {
bool inc;
};
-int can_rx_offload_add_timestamp(struct net_device *dev, struct can_rx_offload *offload);
-int can_rx_offload_add_fifo(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight);
-int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 reg);
+int can_rx_offload_add_timestamp(struct net_device *dev,
+ struct can_rx_offload *offload);
+int can_rx_offload_add_fifo(struct net_device *dev,
+ struct can_rx_offload *offload,
+ unsigned int weight);
+int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload,
+ u64 reg);
int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload);
int can_rx_offload_queue_sorted(struct can_rx_offload *offload,
struct sk_buff *skb, u32 timestamp);
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index e8242ad88c81..a7604248777b 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -68,6 +68,17 @@ struct mdio_driver {
#define to_mdio_driver(d) \
container_of(to_mdio_common_driver(d), struct mdio_driver, mdiodrv)
+/* device driver data */
+static inline void mdiodev_set_drvdata(struct mdio_device *mdio, void *data)
+{
+ dev_set_drvdata(&mdio->dev, data);
+}
+
+static inline void *mdiodev_get_drvdata(struct mdio_device *mdio)
+{
+ return dev_get_drvdata(&mdio->dev);
+}
+
void mdio_device_free(struct mdio_device *mdiodev);
struct mdio_device *mdio_device_create(struct mii_bus *bus, int addr);
int mdio_device_register(struct mdio_device *mdiodev);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index c2f056b5766d..8dd081051a79 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1162,6 +1162,9 @@ enum mlx5_qcam_feature_groups {
#define MLX5_CAP_FLOWTABLE(mdev, cap) \
MLX5_GET(flow_table_nic_cap, mdev->caps.hca_cur[MLX5_CAP_FLOW_TABLE], cap)
+#define MLX5_CAP64_FLOWTABLE(mdev, cap) \
+ MLX5_GET64(flow_table_nic_cap, (mdev)->caps.hca_cur[MLX5_CAP_FLOW_TABLE], cap)
+
#define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \
MLX5_GET(flow_table_nic_cap, mdev->caps.hca_max[MLX5_CAP_FLOW_TABLE], cap)
@@ -1225,6 +1228,10 @@ enum mlx5_qcam_feature_groups {
MLX5_GET(e_switch_cap, \
mdev->caps.hca_cur[MLX5_CAP_ESWITCH], cap)
+#define MLX5_CAP64_ESW_FLOWTABLE(mdev, cap) \
+ MLX5_GET64(flow_table_eswitch_cap, \
+ (mdev)->caps.hca_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap)
+
#define MLX5_CAP_ESW_MAX(mdev, cap) \
MLX5_GET(e_switch_cap, \
mdev->caps.hca_max[MLX5_CAP_ESWITCH], cap)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 52a56d741f79..3e80f03a387f 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -626,6 +626,11 @@ struct mlx5e_resources {
struct mlx5_sq_bfreg bfreg;
};
+enum mlx5_sw_icm_type {
+ MLX5_SW_ICM_TYPE_STEERING,
+ MLX5_SW_ICM_TYPE_HEADER_MODIFY,
+};
+
#define MLX5_MAX_RESERVED_GIDS 8
struct mlx5_rsvd_gids {
@@ -657,11 +662,15 @@ struct mlx5_clock {
struct mlx5_pps pps_info;
};
+struct mlx5_dm;
struct mlx5_fw_tracer;
struct mlx5_vxlan;
struct mlx5_geneve;
struct mlx5_hv_vhca;
+#define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity))
+#define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
+
struct mlx5_core_dev {
struct device *device;
enum mlx5_coredev_type coredev_type;
@@ -695,6 +704,7 @@ struct mlx5_core_dev {
atomic_t num_qps;
u32 issi;
struct mlx5e_resources mlx5e_res;
+ struct mlx5_dm *dm;
struct mlx5_vxlan *vxlan;
struct mlx5_geneve *geneve;
struct {
@@ -1078,6 +1088,10 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
size_t *offsets);
struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
+int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
+ u64 length, u16 uid, phys_addr_t *addr, u32 *obj_id);
+int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
+ u64 length, u16 uid, phys_addr_t addr, u32 obj_id);
#ifdef CONFIG_MLX5_CORE_IPOIB
struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index 38a70d16d8d5..98e667b176ef 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -60,7 +60,6 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
u16 vport_num);
void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
-u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
struct mlx5_flow_handle *
mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
u16 vport_num, u32 sqn);
@@ -74,7 +73,14 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev);
bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw);
u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
u16 vport_num);
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
#else /* CONFIG_MLX5_ESWITCH */
+
+static inline u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
+{
+ return MLX5_ESWITCH_NONE;
+}
+
static inline enum devlink_eswitch_encap_mode
mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
{
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 97ec6be62ac4..724d276ea133 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -84,6 +84,8 @@ enum {
FDB_SLOW_PATH,
};
+struct mlx5_pkt_reformat;
+struct mlx5_modify_hdr;
struct mlx5_flow_table;
struct mlx5_flow_group;
struct mlx5_flow_namespace;
@@ -121,7 +123,7 @@ struct mlx5_flow_destination {
struct {
u16 num;
u16 vhca_id;
- u32 reformat_id;
+ struct mlx5_pkt_reformat *pkt_reformat;
u8 flags;
} vport;
};
@@ -195,8 +197,8 @@ enum {
struct mlx5_flow_act {
u32 action;
- u32 reformat_id;
- u32 modify_id;
+ struct mlx5_modify_hdr *modify_hdr;
+ struct mlx5_pkt_reformat *pkt_reformat;
uintptr_t esp_id;
u32 flags;
struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH];
@@ -205,8 +207,6 @@ struct mlx5_flow_act {
#define MLX5_DECLARE_FLOW_ACT(name) \
struct mlx5_flow_act name = { .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\
- .reformat_id = 0, \
- .modify_id = 0, \
.flags = 0, }
/* Single destination per rule.
@@ -236,19 +236,18 @@ u32 mlx5_fc_id(struct mlx5_fc *counter);
int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
-int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
- u8 namespace, u8 num_actions,
- void *modify_actions, u32 *modify_header_id);
+struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
+ u8 ns_type, u8 num_actions,
+ void *modify_actions);
void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
- u32 modify_header_id);
-
-int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
- int reformat_type,
- size_t size,
- void *reformat_data,
- enum mlx5_flow_namespace_type namespace,
- u32 *packet_reformat_id);
+ struct mlx5_modify_hdr *modify_hdr);
+
+struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
+ int reformat_type,
+ size_t size,
+ void *reformat_data,
+ enum mlx5_flow_namespace_type ns_type);
void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
- u32 packet_reformat_id);
+ struct mlx5_pkt_reformat *reformat);
#endif
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 66b60afd5e06..7d65c0578ac9 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -282,6 +282,7 @@ enum {
MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940,
MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT = 0x942,
+ MLX5_CMD_OP_SYNC_STEERING = 0xb00,
MLX5_CMD_OP_FPGA_CREATE_QP = 0x960,
MLX5_CMD_OP_FPGA_MODIFY_QP = 0x961,
MLX5_CMD_OP_FPGA_QUERY_QP = 0x962,
@@ -485,7 +486,11 @@ union mlx5_ifc_gre_key_bits {
};
struct mlx5_ifc_fte_match_set_misc_bits {
- u8 reserved_at_0[0x8];
+ u8 gre_c_present[0x1];
+ u8 reserved_auto1[0x1];
+ u8 gre_k_present[0x1];
+ u8 gre_s_present[0x1];
+ u8 source_vhca_port[0x4];
u8 source_sqn[0x18];
u8 source_eswitch_owner_vhca_id[0x10];
@@ -565,12 +570,38 @@ struct mlx5_ifc_fte_match_set_misc2_bits {
u8 metadata_reg_a[0x20];
- u8 reserved_at_1a0[0x60];
+ u8 metadata_reg_b[0x20];
+
+ u8 reserved_at_1c0[0x40];
};
struct mlx5_ifc_fte_match_set_misc3_bits {
- u8 reserved_at_0[0x120];
+ u8 inner_tcp_seq_num[0x20];
+
+ u8 outer_tcp_seq_num[0x20];
+
+ u8 inner_tcp_ack_num[0x20];
+
+ u8 outer_tcp_ack_num[0x20];
+
+ u8 reserved_at_80[0x8];
+ u8 outer_vxlan_gpe_vni[0x18];
+
+ u8 outer_vxlan_gpe_next_protocol[0x8];
+ u8 outer_vxlan_gpe_flags[0x8];
+ u8 reserved_at_b0[0x10];
+
+ u8 icmp_header_data[0x20];
+
+ u8 icmpv6_header_data[0x20];
+
+ u8 icmp_type[0x8];
+ u8 icmp_code[0x8];
+ u8 icmpv6_type[0x8];
+ u8 icmpv6_code[0x8];
+
u8 geneve_tlv_option_0_data[0x20];
+
u8 reserved_at_140[0xc0];
};
@@ -666,7 +697,15 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_sniffer;
- u8 reserved_at_e00[0x7200];
+ u8 reserved_at_e00[0x1200];
+
+ u8 sw_steering_nic_rx_action_drop_icm_address[0x40];
+
+ u8 sw_steering_nic_tx_action_drop_icm_address[0x40];
+
+ u8 sw_steering_nic_tx_action_allow_icm_address[0x40];
+
+ u8 reserved_at_20c0[0x5f40];
};
enum {
@@ -698,7 +737,17 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits {
struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_esw_acl_egress;
- u8 reserved_at_800[0x7800];
+ u8 reserved_at_800[0x1000];
+
+ u8 sw_steering_fdb_action_drop_icm_address_rx[0x40];
+
+ u8 sw_steering_fdb_action_drop_icm_address_tx[0x40];
+
+ u8 sw_steering_uplink_icm_address_rx[0x40];
+
+ u8 sw_steering_uplink_icm_address_tx[0x40];
+
+ u8 reserved_at_1900[0x6700];
};
enum {
@@ -849,6 +898,25 @@ struct mlx5_ifc_roce_cap_bits {
u8 reserved_at_100[0x700];
};
+struct mlx5_ifc_sync_steering_in_bits {
+ u8 opcode[0x10];
+ u8 uid[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0xc0];
+};
+
+struct mlx5_ifc_sync_steering_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
struct mlx5_ifc_device_mem_cap_bits {
u8 memic[0x1];
u8 reserved_at_1[0x1f];
@@ -1042,6 +1110,12 @@ enum {
};
enum {
+ MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED = 1 << 7,
+ MLX5_FLEX_PARSER_ICMP_V4_ENABLED = 1 << 8,
+ MLX5_FLEX_PARSER_ICMP_V6_ENABLED = 1 << 9,
+};
+
+enum {
MLX5_UCTX_CAP_RAW_TX = 1UL << 0,
MLX5_UCTX_CAP_INTERNAL_DEV_RES = 1UL << 1,
};
@@ -1414,7 +1488,14 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_6c0[0x4];
u8 flex_parser_id_geneve_tlv_option_0[0x4];
- u8 reserved_at_6c8[0x28];
+ u8 flex_parser_id_icmp_dw1[0x4];
+ u8 flex_parser_id_icmp_dw0[0x4];
+ u8 flex_parser_id_icmpv6_dw1[0x4];
+ u8 flex_parser_id_icmpv6_dw0[0x4];
+ u8 flex_parser_id_outer_first_mpls_over_gre[0x4];
+ u8 flex_parser_id_outer_first_mpls_over_udp_label[0x4];
+
+ u8 reserved_at_6e0[0x10];
u8 sf_base_id[0x10];
u8 reserved_at_700[0x80];
@@ -2652,6 +2733,7 @@ union mlx5_ifc_hca_cap_union_bits {
struct mlx5_ifc_debug_cap_bits debug_cap;
struct mlx5_ifc_fpga_cap_bits fpga_cap;
struct mlx5_ifc_tls_cap_bits tls_cap;
+ struct mlx5_ifc_device_mem_cap_bits device_mem_cap;
u8 reserved_at_0[0x8000];
};
@@ -3255,7 +3337,11 @@ struct mlx5_ifc_esw_vport_context_bits {
u8 cvlan_pcp[0x3];
u8 cvlan_id[0xc];
- u8 reserved_at_60[0x7a0];
+ u8 reserved_at_60[0x720];
+
+ u8 sw_steering_vport_icm_address_rx[0x40];
+
+ u8 sw_steering_vport_icm_address_tx[0x40];
};
enum {
@@ -4941,23 +5027,98 @@ struct mlx5_ifc_query_hca_cap_in_bits {
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
- u8 reserved_at_40[0x40];
+ u8 other_function[0x1];
+ u8 reserved_at_41[0xf];
+ u8 function_id[0x10];
+
+ u8 reserved_at_60[0x20];
};
-struct mlx5_ifc_query_flow_table_out_bits {
+struct mlx5_ifc_other_hca_cap_bits {
+ u8 roce[0x1];
+ u8 reserved_0[0x27f];
+};
+
+struct mlx5_ifc_query_other_hca_cap_out_bits {
u8 status[0x8];
- u8 reserved_at_8[0x18];
+ u8 reserved_0[0x18];
u8 syndrome[0x20];
- u8 reserved_at_40[0x80];
+ u8 reserved_1[0x40];
- u8 reserved_at_c0[0x8];
+ struct mlx5_ifc_other_hca_cap_bits other_capability;
+};
+
+struct mlx5_ifc_query_other_hca_cap_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_0[0x10];
+
+ u8 reserved_1[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_2[0x10];
+ u8 function_id[0x10];
+
+ u8 reserved_3[0x20];
+};
+
+struct mlx5_ifc_modify_other_hca_cap_out_bits {
+ u8 status[0x8];
+ u8 reserved_0[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_1[0x40];
+};
+
+struct mlx5_ifc_modify_other_hca_cap_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_0[0x10];
+
+ u8 reserved_1[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_2[0x10];
+ u8 function_id[0x10];
+ u8 field_select[0x20];
+
+ struct mlx5_ifc_other_hca_cap_bits other_capability;
+};
+
+struct mlx5_ifc_flow_table_context_bits {
+ u8 reformat_en[0x1];
+ u8 decap_en[0x1];
+ u8 sw_owner[0x1];
+ u8 termination_table[0x1];
+ u8 table_miss_action[0x4];
u8 level[0x8];
- u8 reserved_at_d0[0x8];
+ u8 reserved_at_10[0x8];
u8 log_size[0x8];
- u8 reserved_at_e0[0x120];
+ u8 reserved_at_20[0x8];
+ u8 table_miss_id[0x18];
+
+ u8 reserved_at_40[0x8];
+ u8 lag_master_next_table_id[0x18];
+
+ u8 reserved_at_60[0x60];
+
+ u8 sw_owner_icm_root_1[0x40];
+
+ u8 sw_owner_icm_root_0[0x40];
+
+};
+
+struct mlx5_ifc_query_flow_table_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x80];
+
+ struct mlx5_ifc_flow_table_context_bits flow_table_context;
};
struct mlx5_ifc_query_flow_table_in_bits {
@@ -5227,7 +5388,7 @@ struct mlx5_ifc_alloc_packet_reformat_context_out_bits {
u8 reserved_at_60[0x20];
};
-enum {
+enum mlx5_reformat_ctx_type {
MLX5_REFORMAT_TYPE_L2_TO_VXLAN = 0x0,
MLX5_REFORMAT_TYPE_L2_TO_NVGRE = 0x1,
MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x2,
@@ -5323,7 +5484,16 @@ enum {
MLX5_ACTION_IN_FIELD_OUT_DIPV4 = 0x16,
MLX5_ACTION_IN_FIELD_OUT_FIRST_VID = 0x17,
MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT = 0x47,
+ MLX5_ACTION_IN_FIELD_METADATA_REG_A = 0x49,
+ MLX5_ACTION_IN_FIELD_METADATA_REG_B = 0x50,
MLX5_ACTION_IN_FIELD_METADATA_REG_C_0 = 0x51,
+ MLX5_ACTION_IN_FIELD_METADATA_REG_C_1 = 0x52,
+ MLX5_ACTION_IN_FIELD_METADATA_REG_C_2 = 0x53,
+ MLX5_ACTION_IN_FIELD_METADATA_REG_C_3 = 0x54,
+ MLX5_ACTION_IN_FIELD_METADATA_REG_C_4 = 0x55,
+ MLX5_ACTION_IN_FIELD_METADATA_REG_C_5 = 0x56,
+ MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM = 0x59,
+ MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM = 0x5B,
};
struct mlx5_ifc_alloc_modify_header_context_out_bits {
@@ -7371,35 +7541,26 @@ struct mlx5_ifc_create_mkey_in_bits {
u8 klm_pas_mtt[0][0x20];
};
+enum {
+ MLX5_FLOW_TABLE_TYPE_NIC_RX = 0x0,
+ MLX5_FLOW_TABLE_TYPE_NIC_TX = 0x1,
+ MLX5_FLOW_TABLE_TYPE_ESW_EGRESS_ACL = 0x2,
+ MLX5_FLOW_TABLE_TYPE_ESW_INGRESS_ACL = 0x3,
+ MLX5_FLOW_TABLE_TYPE_FDB = 0X4,
+ MLX5_FLOW_TABLE_TYPE_SNIFFER_RX = 0X5,
+ MLX5_FLOW_TABLE_TYPE_SNIFFER_TX = 0X6,
+};
+
struct mlx5_ifc_create_flow_table_out_bits {
u8 status[0x8];
- u8 reserved_at_8[0x18];
+ u8 icm_address_63_40[0x18];
u8 syndrome[0x20];
- u8 reserved_at_40[0x8];
+ u8 icm_address_39_32[0x8];
u8 table_id[0x18];
- u8 reserved_at_60[0x20];
-};
-
-struct mlx5_ifc_flow_table_context_bits {
- u8 reformat_en[0x1];
- u8 decap_en[0x1];
- u8 reserved_at_2[0x1];
- u8 termination_table[0x1];
- u8 table_miss_action[0x4];
- u8 level[0x8];
- u8 reserved_at_10[0x8];
- u8 log_size[0x8];
-
- u8 reserved_at_20[0x8];
- u8 table_miss_id[0x18];
-
- u8 reserved_at_40[0x8];
- u8 lag_master_next_table_id[0x18];
-
- u8 reserved_at_60[0xe0];
+ u8 icm_address_31_0[0x20];
};
struct mlx5_ifc_create_flow_table_in_bits {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b5d28dadf964..d7d5626002e9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -901,6 +901,10 @@ struct netdev_bpf {
};
};
+/* Flags for ndo_xsk_wakeup. */
+#define XDP_WAKEUP_RX (1 << 0)
+#define XDP_WAKEUP_TX (1 << 1)
+
#ifdef CONFIG_XFRM_OFFLOAD
struct xfrmdev_ops {
int (*xdo_dev_state_add) (struct xfrm_state *x);
@@ -1227,6 +1231,12 @@ struct tlsdev_ops;
* that got dropped are freed/returned via xdp_return_frame().
* Returns negative number, means general error invoking ndo, meaning
* no frames were xmit'ed and core-caller will free all frames.
+ * int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags);
+ * This function is used to wake up the softirq, ksoftirqd or kthread
+ * responsible for sending and/or receiving packets on a specific
+ * queue id bound to an AF_XDP socket. The flags field specifies if
+ * only RX, only Tx, or both should be woken up using the flags
+ * XDP_WAKEUP_RX and XDP_WAKEUP_TX.
* struct devlink_port *(*ndo_get_devlink_port)(struct net_device *dev);
* Get devlink port instance associated with a given netdev.
* Called with a reference on the netdevice and devlink locks only,
@@ -1426,8 +1436,8 @@ struct net_device_ops {
int (*ndo_xdp_xmit)(struct net_device *dev, int n,
struct xdp_frame **xdp,
u32 flags);
- int (*ndo_xsk_async_xmit)(struct net_device *dev,
- u32 queue_id);
+ int (*ndo_xsk_wakeup)(struct net_device *dev,
+ u32 queue_id, u32 flags);
struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
};
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 77c6dc88e95d..028e684fa974 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -279,6 +279,16 @@ struct nf_bridge_info {
};
#endif
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+/* Chain in tc_skb_ext will be used to share the tc chain with
+ * ovs recirc_id. It will be set to the current chain by tc
+ * and read by ovs to recirc_id.
+ */
+struct tc_skb_ext {
+ __u32 chain;
+};
+#endif
+
struct sk_buff_head {
/* These two members must be first. */
struct sk_buff *next;
@@ -4058,6 +4068,9 @@ enum skb_ext_id {
#ifdef CONFIG_XFRM
SKB_EXT_SEC_PATH,
#endif
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ TC_SKB_EXT,
+#endif
SKB_EXT_NUM, /* must be last */
};
diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index c7dc2b5902c0..c17af77f3fae 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -5,6 +5,10 @@
* propagate the unknown bits such that the tnum result represents all the
* possible results for possible values of the operands.
*/
+
+#ifndef _LINUX_TNUM_H
+#define _LINUX_TNUM_H
+
#include <linux/types.h>
struct tnum {
@@ -81,3 +85,5 @@ bool tnum_in(struct tnum a, struct tnum b);
int tnum_strn(char *str, size_t size, struct tnum a);
/* Format a tnum as tristate binary expansion */
int tnum_sbin(char *str, size_t size, struct tnum a);
+
+#endif /* _LINUX_TNUM_H */
diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index b9dcb02e756b..8e4f831d2e52 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -10,4 +10,14 @@ void bpf_sk_storage_free(struct sock *sk);
extern const struct bpf_func_proto bpf_sk_storage_get_proto;
extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
+#ifdef CONFIG_BPF_SYSCALL
+int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
+#else
+static inline int bpf_sk_storage_clone(const struct sock *sk,
+ struct sock *newsk)
+{
+ return 0;
+}
+#endif
+
#endif /* _BPF_SK_STORAGE_H */
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 13523b0a0642..460bc629d1a4 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -458,6 +458,13 @@ enum devlink_param_generic_id {
/* Maker of the board */
#define DEVLINK_INFO_VERSION_GENERIC_BOARD_MANUFACTURE "board.manufacture"
+/* Part number, identifier of asic design */
+#define DEVLINK_INFO_VERSION_GENERIC_ASIC_ID "asic.id"
+/* Revision of asic design */
+#define DEVLINK_INFO_VERSION_GENERIC_ASIC_REV "asic.rev"
+
+/* Overall FW version */
+#define DEVLINK_INFO_VERSION_GENERIC_FW "fw"
/* Control processor FW version */
#define DEVLINK_INFO_VERSION_GENERIC_FW_MGMT "fw.mgmt"
/* Data path microcode controlling high-speed packet processing */
diff --git a/include/net/netns/can.h b/include/net/netns/can.h
index ca9bd9fba5b5..b6ab7d1530d7 100644
--- a/include/net/netns/can.h
+++ b/include/net/netns/can.h
@@ -9,8 +9,8 @@
#include <linux/spinlock.h>
struct can_dev_rcv_lists;
-struct s_stats;
-struct s_pstats;
+struct can_pkg_stats;
+struct can_rcv_lists_stats;
struct netns_can {
#if IS_ENABLED(CONFIG_PROC_FS)
@@ -28,11 +28,11 @@ struct netns_can {
#endif
/* receive filters subscribed for 'all' CAN devices */
- struct can_dev_rcv_lists *can_rx_alldev_list;
- spinlock_t can_rcvlists_lock;
- struct timer_list can_stattimer;/* timer for statistics update */
- struct s_stats *can_stats; /* packet statistics */
- struct s_pstats *can_pstats; /* receive list statistics */
+ struct can_dev_rcv_lists *rx_alldev_list;
+ spinlock_t rcvlists_lock;
+ struct timer_list stattimer; /* timer for statistics update */
+ struct can_pkg_stats *pkg_stats;
+ struct can_rcv_lists_stats *rcv_lists_stats;
/* CAN GW per-net gateway jobs */
struct hlist_head cgw_list;
diff --git a/include/net/tls.h b/include/net/tls.h
index ec3c3ed2c6c3..c664e6dba0d1 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -275,16 +275,6 @@ struct tls_context {
struct proto *sk_proto;
void (*sk_destruct)(struct sock *sk);
- void (*sk_proto_close)(struct sock *sk, long timeout);
-
- int (*setsockopt)(struct sock *sk, int level,
- int optname, char __user *optval,
- unsigned int optlen);
- int (*getsockopt)(struct sock *sk, int level,
- int optname, char __user *optval,
- int __user *optlen);
- int (*hash)(struct sock *sk);
- void (*unhash)(struct sock *sk);
union tls_crypto_context crypto_send;
union tls_crypto_context crypto_recv;
@@ -376,13 +366,9 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags);
-int tls_set_device_offload(struct sock *sk, struct tls_context *ctx);
int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int tls_device_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
-void tls_device_free_resources_tx(struct sock *sk);
-void tls_device_init(void);
-void tls_device_cleanup(void);
int tls_tx_records(struct sock *sk, int flags);
struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
@@ -659,7 +645,6 @@ int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
unsigned char *record_type);
void tls_register_device(struct tls_device *device);
void tls_unregister_device(struct tls_device *device);
-int tls_device_decrypted(struct sock *sk, struct sk_buff *skb);
int decrypt_skb(struct sock *sk, struct sk_buff *skb,
struct scatterlist *sgout);
struct sk_buff *tls_encrypt_skb(struct sk_buff *skb);
@@ -672,9 +657,40 @@ int tls_sw_fallback_init(struct sock *sk,
struct tls_offload_context_tx *offload_ctx,
struct tls_crypto_info *crypto_info);
+#ifdef CONFIG_TLS_DEVICE
+void tls_device_init(void);
+void tls_device_cleanup(void);
+int tls_set_device_offload(struct sock *sk, struct tls_context *ctx);
+void tls_device_free_resources_tx(struct sock *sk);
int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx);
-
void tls_device_offload_cleanup_rx(struct sock *sk);
void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq);
+int tls_device_decrypted(struct sock *sk, struct sk_buff *skb);
+#else
+static inline void tls_device_init(void) {}
+static inline void tls_device_cleanup(void) {}
+static inline int
+tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void tls_device_free_resources_tx(struct sock *sk) {}
+
+static inline int
+tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void tls_device_offload_cleanup_rx(struct sock *sk) {}
+static inline void
+tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) {}
+
+static inline int tls_device_decrypted(struct sock *sk, struct sk_buff *skb)
+{
+ return 0;
+}
+#endif
#endif /* _TLS_OFFLOAD_H */
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index dc1583a1fb8a..335283dbe9b3 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -391,7 +391,7 @@ static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
if (ipa->sa.sa_family == AF_INET6)
return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr);
else
- return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
+ return ipv4_is_multicast(ipa->sin.sin_addr.s_addr);
}
#else /* !IS_ENABLED(CONFIG_IPV6) */
@@ -403,7 +403,7 @@ static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
{
- return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
+ return ipv4_is_multicast(ipa->sin.sin_addr.s_addr);
}
#endif /* IS_ENABLED(CONFIG_IPV6) */
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 69796d264f06..c9398ce7960f 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -16,6 +16,13 @@
struct net_device;
struct xsk_queue;
+/* Masks for xdp_umem_page flags.
+ * The low 12-bits of the addr will be 0 since this is the page address, so we
+ * can use them for flags.
+ */
+#define XSK_NEXT_PG_CONTIG_SHIFT 0
+#define XSK_NEXT_PG_CONTIG_MASK (1ULL << XSK_NEXT_PG_CONTIG_SHIFT)
+
struct xdp_umem_page {
void *addr;
dma_addr_t dma;
@@ -27,6 +34,13 @@ struct xdp_umem_fq_reuse {
u64 handles[];
};
+/* Flags for the umem flags field.
+ *
+ * The NEED_WAKEUP flag is 1 due to the reuse of the flags field for public
+ * flags. See inlude/uapi/include/linux/if_xdp.h.
+ */
+#define XDP_UMEM_USES_NEED_WAKEUP (1 << 1)
+
struct xdp_umem {
struct xsk_queue *fq;
struct xsk_queue *cq;
@@ -41,15 +55,27 @@ struct xdp_umem {
struct work_struct work;
struct page **pgs;
u32 npgs;
+ u16 queue_id;
+ u8 need_wakeup;
+ u8 flags;
int id;
struct net_device *dev;
struct xdp_umem_fq_reuse *fq_reuse;
- u16 queue_id;
bool zc;
spinlock_t xsk_list_lock;
struct list_head xsk_list;
};
+/* Nodes are linked in the struct xdp_sock map_list field, and used to
+ * track which maps a certain socket reside in.
+ */
+struct xsk_map;
+struct xsk_map_node {
+ struct list_head node;
+ struct xsk_map *map;
+ struct xdp_sock **map_entry;
+};
+
struct xdp_sock {
/* struct sock must be the first member of struct xdp_sock */
struct sock sk;
@@ -75,6 +101,9 @@ struct xdp_sock {
/* Protects generic receive. */
spinlock_t rx_lock;
u64 rx_dropped;
+ struct list_head map_list;
+ /* Protects map_list */
+ spinlock_t map_list_lock;
};
struct xdp_buff;
@@ -95,15 +124,47 @@ struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
struct xdp_umem_fq_reuse *newq);
void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq);
struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id);
+void xsk_set_rx_need_wakeup(struct xdp_umem *umem);
+void xsk_set_tx_need_wakeup(struct xdp_umem *umem);
+void xsk_clear_rx_need_wakeup(struct xdp_umem *umem);
+void xsk_clear_tx_need_wakeup(struct xdp_umem *umem);
+bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem);
+
+void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
+ struct xdp_sock **map_entry);
+int xsk_map_inc(struct xsk_map *map);
+void xsk_map_put(struct xsk_map *map);
+
+static inline u64 xsk_umem_extract_addr(u64 addr)
+{
+ return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
+}
+
+static inline u64 xsk_umem_extract_offset(u64 addr)
+{
+ return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+}
+
+static inline u64 xsk_umem_add_offset_to_addr(u64 addr)
+{
+ return xsk_umem_extract_addr(addr) + xsk_umem_extract_offset(addr);
+}
static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
{
- return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1));
+ unsigned long page_addr;
+
+ addr = xsk_umem_add_offset_to_addr(addr);
+ page_addr = (unsigned long)umem->pages[addr >> PAGE_SHIFT].addr;
+
+ return (char *)(page_addr & PAGE_MASK) + (addr & ~PAGE_MASK);
}
static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
{
- return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1));
+ addr = xsk_umem_add_offset_to_addr(addr);
+
+ return umem->pages[addr >> PAGE_SHIFT].dma + (addr & ~PAGE_MASK);
}
/* Reuse-queue aware version of FILL queue helpers */
@@ -144,6 +205,19 @@ static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
rq->handles[rq->length++] = addr;
}
+
+/* Handle the offset appropriately depending on aligned or unaligned mode.
+ * For unaligned mode, we store the offset in the upper 16-bits of the address.
+ * For aligned mode, we simply add the offset to the address.
+ */
+static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 address,
+ u64 offset)
+{
+ if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+ return address + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
+ else
+ return address + offset;
+}
#else
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
@@ -213,6 +287,21 @@ static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
return NULL;
}
+static inline u64 xsk_umem_extract_addr(u64 addr)
+{
+ return 0;
+}
+
+static inline u64 xsk_umem_extract_offset(u64 addr)
+{
+ return 0;
+}
+
+static inline u64 xsk_umem_add_offset_to_addr(u64 addr)
+{
+ return 0;
+}
+
static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
{
return NULL;
@@ -241,6 +330,33 @@ static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
{
}
+static inline void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
+{
+}
+
+static inline void xsk_set_tx_need_wakeup(struct xdp_umem *umem)
+{
+}
+
+static inline void xsk_clear_rx_need_wakeup(struct xdp_umem *umem)
+{
+}
+
+static inline void xsk_clear_tx_need_wakeup(struct xdp_umem *umem)
+{
+}
+
+static inline bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
+{
+ return false;
+}
+
+static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle,
+ u64 offset)
+{
+ return 0;
+}
+
#endif /* CONFIG_XDP_SOCKETS */
#endif /* _LINUX_XDP_SOCK_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0e66371bea13..77c6be96d676 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -106,6 +106,7 @@ enum bpf_cmd {
BPF_TASK_FD_QUERY,
BPF_MAP_LOOKUP_AND_DELETE_ELEM,
BPF_MAP_FREEZE,
+ BPF_BTF_GET_NEXT_ID,
};
enum bpf_map_type {
@@ -284,6 +285,9 @@ enum bpf_attach_type {
*/
#define BPF_F_TEST_RND_HI32 (1U << 2)
+/* The verifier internal test flag. Behavior is undefined */
+#define BPF_F_TEST_STATE_FREQ (1U << 3)
+
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* two extensions:
*
@@ -337,6 +341,9 @@ enum bpf_attach_type {
#define BPF_F_RDONLY_PROG (1U << 7)
#define BPF_F_WRONLY_PROG (1U << 8)
+/* Clone map from listener for newly accepted socket */
+#define BPF_F_CLONE (1U << 9)
+
/* flags for BPF_PROG_QUERY */
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
@@ -576,6 +583,8 @@ union bpf_attr {
* limited to five).
*
* Each time the helper is called, it appends a line to the trace.
+ * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is
+ * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this.
* The format of the trace is customizable, and the exact output
* one will get depends on the options set in
* *\/sys/kernel/debug/tracing/trace_options* (see also the
@@ -1014,7 +1023,7 @@ union bpf_attr {
* The realm of the route for the packet associated to *skb*, or 0
* if none was found.
*
- * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * int bpf_perf_event_output(struct pt_regs *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
* Description
* Write raw *data* blob into a special BPF perf event held by
* *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -1076,7 +1085,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags)
+ * int bpf_get_stackid(struct pt_regs *ctx, struct bpf_map *map, u64 flags)
* Description
* Walk a user or a kernel stack and return its id. To achieve
* this, the helper needs *ctx*, which is a pointer to the context
@@ -1725,7 +1734,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_override_return(struct pt_reg *regs, u64 rc)
+ * int bpf_override_return(struct pt_regs *regs, u64 rc)
* Description
* Used for error injection, this helper uses kprobes to override
* the return value of the probed function, and to set it to *rc*.
diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h
index 0afb7d8e867f..1e988fdeba34 100644
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h
@@ -157,7 +157,8 @@ struct canfd_frame {
#define CAN_TP20 4 /* VAG Transport Protocol v2.0 */
#define CAN_MCNET 5 /* Bosch MCNet */
#define CAN_ISOTP 6 /* ISO 15765-2 Transport Protocol */
-#define CAN_NPROTO 7
+#define CAN_J1939 7 /* SAE J1939 */
+#define CAN_NPROTO 8
#define SOL_CAN_BASE 100
@@ -174,6 +175,23 @@ struct sockaddr_can {
/* transport protocol class address information (e.g. ISOTP) */
struct { canid_t rx_id, tx_id; } tp;
+ /* J1939 address information */
+ struct {
+ /* 8 byte name when using dynamic addressing */
+ __u64 name;
+
+ /* pgn:
+ * 8 bit: PS in PDU2 case, else 0
+ * 8 bit: PF
+ * 1 bit: DP
+ * 1 bit: reserved
+ */
+ __u32 pgn;
+
+ /* 1 byte address */
+ __u8 addr;
+ } j1939;
+
/* reserved for future CAN protocols address information */
} can_addr;
};
diff --git a/include/uapi/linux/can/j1939.h b/include/uapi/linux/can/j1939.h
new file mode 100644
index 000000000000..c32325342d30
--- /dev/null
+++ b/include/uapi/linux/can/j1939.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * j1939.h
+ *
+ * Copyright (c) 2010-2011 EIA Electronics
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _UAPI_CAN_J1939_H_
+#define _UAPI_CAN_J1939_H_
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/can.h>
+
+#define J1939_MAX_UNICAST_ADDR 0xfd
+#define J1939_IDLE_ADDR 0xfe
+#define J1939_NO_ADDR 0xff /* == broadcast or no addr */
+#define J1939_NO_NAME 0
+#define J1939_PGN_REQUEST 0x0ea00 /* Request PG */
+#define J1939_PGN_ADDRESS_CLAIMED 0x0ee00 /* Address Claimed */
+#define J1939_PGN_ADDRESS_COMMANDED 0x0fed8 /* Commanded Address */
+#define J1939_PGN_PDU1_MAX 0x3ff00
+#define J1939_PGN_MAX 0x3ffff
+#define J1939_NO_PGN 0x40000
+
+/* J1939 Parameter Group Number
+ *
+ * bit 0-7 : PDU Specific (PS)
+ * bit 8-15 : PDU Format (PF)
+ * bit 16 : Data Page (DP)
+ * bit 17 : Reserved (R)
+ * bit 19-31 : set to zero
+ */
+typedef __u32 pgn_t;
+
+/* J1939 Priority
+ *
+ * bit 0-2 : Priority (P)
+ * bit 3-7 : set to zero
+ */
+typedef __u8 priority_t;
+
+/* J1939 NAME
+ *
+ * bit 0-20 : Identity Number
+ * bit 21-31 : Manufacturer Code
+ * bit 32-34 : ECU Instance
+ * bit 35-39 : Function Instance
+ * bit 40-47 : Function
+ * bit 48 : Reserved
+ * bit 49-55 : Vehicle System
+ * bit 56-59 : Vehicle System Instance
+ * bit 60-62 : Industry Group
+ * bit 63 : Arbitrary Address Capable
+ */
+typedef __u64 name_t;
+
+/* J1939 socket options */
+#define SOL_CAN_J1939 (SOL_CAN_BASE + CAN_J1939)
+enum {
+ SO_J1939_FILTER = 1, /* set filters */
+ SO_J1939_PROMISC = 2, /* set/clr promiscuous mode */
+ SO_J1939_SEND_PRIO = 3,
+ SO_J1939_ERRQUEUE = 4,
+};
+
+enum {
+ SCM_J1939_DEST_ADDR = 1,
+ SCM_J1939_DEST_NAME = 2,
+ SCM_J1939_PRIO = 3,
+ SCM_J1939_ERRQUEUE = 4,
+};
+
+enum {
+ J1939_NLA_PAD,
+ J1939_NLA_BYTES_ACKED,
+};
+
+enum {
+ J1939_EE_INFO_NONE,
+ J1939_EE_INFO_TX_ABORT,
+};
+
+struct j1939_filter {
+ name_t name;
+ name_t name_mask;
+ pgn_t pgn;
+ pgn_t pgn_mask;
+ __u8 addr;
+ __u8 addr_mask;
+};
+
+#define J1939_FILTER_MAX 512 /* maximum number of j1939_filter set via setsockopt() */
+
+#endif /* !_UAPI_CAN_J1939_H_ */
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index faaa5ca2a117..be328c59389d 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -16,6 +16,18 @@
#define XDP_SHARED_UMEM (1 << 0)
#define XDP_COPY (1 << 1) /* Force copy-mode */
#define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */
+/* If this option is set, the driver might go sleep and in that case
+ * the XDP_RING_NEED_WAKEUP flag in the fill and/or Tx rings will be
+ * set. If it is set, the application need to explicitly wake up the
+ * driver with a poll() (Rx and Tx) or sendto() (Tx only). If you are
+ * running the driver and the application on the same core, you should
+ * use this option so that the kernel will yield to the user space
+ * application.
+ */
+#define XDP_USE_NEED_WAKEUP (1 << 3)
+
+/* Flags for xsk_umem_config flags */
+#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0)
struct sockaddr_xdp {
__u16 sxdp_family;
@@ -25,10 +37,14 @@ struct sockaddr_xdp {
__u32 sxdp_shared_umem_fd;
};
+/* XDP_RING flags */
+#define XDP_RING_NEED_WAKEUP (1 << 0)
+
struct xdp_ring_offset {
__u64 producer;
__u64 consumer;
__u64 desc;
+ __u64 flags;
};
struct xdp_mmap_offsets {
@@ -53,6 +69,7 @@ struct xdp_umem_reg {
__u64 len; /* Length of packet data area */
__u32 chunk_size;
__u32 headroom;
+ __u32 flags;
};
struct xdp_statistics {
@@ -74,6 +91,11 @@ struct xdp_options {
#define XDP_UMEM_PGOFF_FILL_RING 0x100000000ULL
#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL
+/* Masks for unaligned chunks mode */
+#define XSK_UNALIGNED_BUF_OFFSET_SHIFT 48
+#define XSK_UNALIGNED_BUF_ADDR_MASK \
+ ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1)
+
/* Rx/Tx descriptor */
struct xdp_desc {
__u64 addr;
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index f271f1ec50ae..1887a451c388 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -123,6 +123,9 @@ struct ovs_vport_stats {
/* Allow datapath to associate multiple Netlink PIDs to each vport */
#define OVS_DP_F_VPORT_PIDS (1 << 1)
+/* Allow tc offload recirc sharing */
+#define OVS_DP_F_TC_RECIRC_SHARING (1 << 2)
+
/* Fixed logical ports. */
#define OVSP_LOCAL ((__u32)0)
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index b057aeeb6338..a6aa466fac9e 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -160,6 +160,8 @@ enum {
TCA_POLICE_RESULT,
TCA_POLICE_TM,
TCA_POLICE_PAD,
+ TCA_POLICE_RATE64,
+ TCA_POLICE_PEAKRATE64,
__TCA_POLICE_MAX
#define TCA_POLICE_RESULT TCA_POLICE_RESULT
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 5fcc7a17eb5a..adb3adcebe3c 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -195,8 +195,8 @@
i < btf_type_vlen(struct_type); \
i++, member++)
-static DEFINE_IDR(btf_idr);
-static DEFINE_SPINLOCK(btf_idr_lock);
+DEFINE_IDR(btf_idr);
+DEFINE_SPINLOCK(btf_idr_lock);
struct btf {
void *data;
@@ -3376,6 +3376,15 @@ void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
btf_type_ops(t)->seq_show(btf, t, type_id, obj, 0, m);
}
+#ifdef CONFIG_PROC_FS
+static void bpf_btf_show_fdinfo(struct seq_file *m, struct file *filp)
+{
+ const struct btf *btf = filp->private_data;
+
+ seq_printf(m, "btf_id:\t%u\n", btf->id);
+}
+#endif
+
static int btf_release(struct inode *inode, struct file *filp)
{
btf_put(filp->private_data);
@@ -3383,6 +3392,9 @@ static int btf_release(struct inode *inode, struct file *filp)
}
const struct file_operations btf_fops = {
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = bpf_btf_show_fdinfo,
+#endif
.release = btf_release,
};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 272071e9112f..82eabd4e38ad 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -683,8 +683,8 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
}
/* map_idr_lock should have been held */
-static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map,
- bool uref)
+static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map,
+ bool uref)
{
int refold;
@@ -704,6 +704,16 @@ static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map,
return map;
}
+struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
+{
+ spin_lock_bh(&map_idr_lock);
+ map = __bpf_map_inc_not_zero(map, uref);
+ spin_unlock_bh(&map_idr_lock);
+
+ return map;
+}
+EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero);
+
int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
{
return -ENOTSUPP;
@@ -1619,6 +1629,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT |
BPF_F_ANY_ALIGNMENT |
+ BPF_F_TEST_STATE_FREQ |
BPF_F_TEST_RND_HI32))
return -EINVAL;
@@ -2183,7 +2194,7 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
spin_lock_bh(&map_idr_lock);
map = idr_find(&map_idr, id);
if (map)
- map = bpf_map_inc_not_zero(map, true);
+ map = __bpf_map_inc_not_zero(map, true);
else
map = ERR_PTR(-ENOENT);
spin_unlock_bh(&map_idr_lock);
@@ -2880,6 +2891,10 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
err = bpf_obj_get_next_id(&attr, uattr,
&map_idr, &map_idr_lock);
break;
+ case BPF_BTF_GET_NEXT_ID:
+ err = bpf_obj_get_next_id(&attr, uattr,
+ &btf_idr, &btf_idr_lock);
+ break;
case BPF_PROG_GET_FD_BY_ID:
err = bpf_prog_get_fd_by_id(&attr);
break;
diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c
index 4659349fc795..7ae5dddd1fe6 100644
--- a/kernel/bpf/sysfs_btf.c
+++ b/kernel/bpf/sysfs_btf.c
@@ -30,17 +30,12 @@ static struct kobject *btf_kobj;
static int __init btf_vmlinux_init(void)
{
- int err;
-
if (!_binary__btf_vmlinux_bin_start)
return 0;
btf_kobj = kobject_create_and_add("btf", kernel_kobj);
- if (IS_ERR(btf_kobj)) {
- err = PTR_ERR(btf_kobj);
- btf_kobj = NULL;
- return err;
- }
+ if (!btf_kobj)
+ return -ENOMEM;
bin_attr_btf_vmlinux.size = _binary__btf_vmlinux_bin_end -
_binary__btf_vmlinux_bin_start;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 16d66bd7af09..3fb50757e812 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7223,7 +7223,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
struct bpf_verifier_state_list *sl, **pprev;
struct bpf_verifier_state *cur = env->cur_state, *new;
int i, j, err, states_cnt = 0;
- bool add_new_state = false;
+ bool add_new_state = env->test_state_freq ? true : false;
cur->last_insn_idx = env->prev_insn_idx;
if (!env->insn_aux_data[insn_idx].prune_point)
@@ -9263,6 +9263,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
env->allow_ptr_leaks = is_priv;
+ if (is_priv)
+ env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
+
ret = replace_map_fd_with_map_ptr(env);
if (ret < 0)
goto skip_full_check;
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 9bb96ace9fa1..942c662e2eed 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -13,8 +13,71 @@ struct xsk_map {
struct bpf_map map;
struct xdp_sock **xsk_map;
struct list_head __percpu *flush_list;
+ spinlock_t lock; /* Synchronize map updates */
};
+int xsk_map_inc(struct xsk_map *map)
+{
+ struct bpf_map *m = &map->map;
+
+ m = bpf_map_inc(m, false);
+ return PTR_ERR_OR_ZERO(m);
+}
+
+void xsk_map_put(struct xsk_map *map)
+{
+ bpf_map_put(&map->map);
+}
+
+static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map,
+ struct xdp_sock **map_entry)
+{
+ struct xsk_map_node *node;
+ int err;
+
+ node = kzalloc(sizeof(*node), GFP_ATOMIC | __GFP_NOWARN);
+ if (!node)
+ return NULL;
+
+ err = xsk_map_inc(map);
+ if (err) {
+ kfree(node);
+ return ERR_PTR(err);
+ }
+
+ node->map = map;
+ node->map_entry = map_entry;
+ return node;
+}
+
+static void xsk_map_node_free(struct xsk_map_node *node)
+{
+ xsk_map_put(node->map);
+ kfree(node);
+}
+
+static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node)
+{
+ spin_lock_bh(&xs->map_list_lock);
+ list_add_tail(&node->node, &xs->map_list);
+ spin_unlock_bh(&xs->map_list_lock);
+}
+
+static void xsk_map_sock_delete(struct xdp_sock *xs,
+ struct xdp_sock **map_entry)
+{
+ struct xsk_map_node *n, *tmp;
+
+ spin_lock_bh(&xs->map_list_lock);
+ list_for_each_entry_safe(n, tmp, &xs->map_list, node) {
+ if (map_entry == n->map_entry) {
+ list_del(&n->node);
+ xsk_map_node_free(n);
+ }
+ }
+ spin_unlock_bh(&xs->map_list_lock);
+}
+
static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
{
struct xsk_map *m;
@@ -34,6 +97,7 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&m->map, attr);
+ spin_lock_init(&m->lock);
cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
cost += sizeof(struct list_head) * num_possible_cpus();
@@ -71,21 +135,9 @@ free_m:
static void xsk_map_free(struct bpf_map *map)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
- int i;
bpf_clear_redirect_map(map);
synchronize_net();
-
- for (i = 0; i < map->max_entries; i++) {
- struct xdp_sock *xs;
-
- xs = m->xsk_map[i];
- if (!xs)
- continue;
-
- sock_put((struct sock *)xs);
- }
-
free_percpu(m->flush_list);
bpf_map_area_free(m->xsk_map);
kfree(m);
@@ -164,8 +216,9 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
u64 map_flags)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
+ struct xdp_sock *xs, *old_xs, **map_entry;
u32 i = *(u32 *)key, fd = *(u32 *)value;
- struct xdp_sock *xs, *old_xs;
+ struct xsk_map_node *node;
struct socket *sock;
int err;
@@ -173,8 +226,6 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
return -EINVAL;
if (unlikely(i >= m->map.max_entries))
return -E2BIG;
- if (unlikely(map_flags == BPF_NOEXIST))
- return -EEXIST;
sock = sockfd_lookup(fd, &err);
if (!sock)
@@ -192,32 +243,70 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
return -EOPNOTSUPP;
}
- sock_hold(sock->sk);
+ map_entry = &m->xsk_map[i];
+ node = xsk_map_node_alloc(m, map_entry);
+ if (IS_ERR(node)) {
+ sockfd_put(sock);
+ return PTR_ERR(node);
+ }
- old_xs = xchg(&m->xsk_map[i], xs);
+ spin_lock_bh(&m->lock);
+ old_xs = READ_ONCE(*map_entry);
+ if (old_xs == xs) {
+ err = 0;
+ goto out;
+ } else if (old_xs && map_flags == BPF_NOEXIST) {
+ err = -EEXIST;
+ goto out;
+ } else if (!old_xs && map_flags == BPF_EXIST) {
+ err = -ENOENT;
+ goto out;
+ }
+ xsk_map_sock_add(xs, node);
+ WRITE_ONCE(*map_entry, xs);
if (old_xs)
- sock_put((struct sock *)old_xs);
-
+ xsk_map_sock_delete(old_xs, map_entry);
+ spin_unlock_bh(&m->lock);
sockfd_put(sock);
return 0;
+
+out:
+ spin_unlock_bh(&m->lock);
+ sockfd_put(sock);
+ xsk_map_node_free(node);
+ return err;
}
static int xsk_map_delete_elem(struct bpf_map *map, void *key)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
- struct xdp_sock *old_xs;
+ struct xdp_sock *old_xs, **map_entry;
int k = *(u32 *)key;
if (k >= map->max_entries)
return -EINVAL;
- old_xs = xchg(&m->xsk_map[k], NULL);
+ spin_lock_bh(&m->lock);
+ map_entry = &m->xsk_map[k];
+ old_xs = xchg(map_entry, NULL);
if (old_xs)
- sock_put((struct sock *)old_xs);
+ xsk_map_sock_delete(old_xs, map_entry);
+ spin_unlock_bh(&m->lock);
return 0;
}
+void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
+ struct xdp_sock **map_entry)
+{
+ spin_lock_bh(&map->lock);
+ if (READ_ONCE(*map_entry) == xs) {
+ WRITE_ONCE(*map_entry, NULL);
+ xsk_map_sock_delete(xs, map_entry);
+ }
+ spin_unlock_bh(&map->lock);
+}
+
const struct bpf_map_ops xsk_map_ops = {
.map_alloc = xsk_map_alloc,
.map_free = xsk_map_free,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 98da8998c25c..b09d7b1ffffd 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -520,7 +520,8 @@ config BPF_EVENTS
bool
default y
help
- This allows the user to attach BPF programs to kprobe events.
+ This allows the user to attach BPF programs to kprobe, uprobe, and
+ tracepoint events.
config DYNAMIC_EVENTS
def_bool n
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index c41705835cba..5ef3eccee27c 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -867,7 +867,7 @@ static struct bpf_test tests[] = {
},
CLASSIC,
{ },
- { { 4, 10 ^ 300 }, { 20, 10 ^ 300 } },
+ { { 4, 0xA ^ 300 }, { 20, 0xA ^ 300 } },
},
{
"SPILL_FILL",
diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index 4bb418313720..3286f9d527d3 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c
@@ -180,8 +180,7 @@ static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc)
static void in_cache_put(in_cache_entry *entry)
{
if (refcount_dec_and_test(&entry->use)) {
- memset(entry, 0, sizeof(in_cache_entry));
- kfree(entry);
+ kzfree(entry);
}
}
@@ -416,8 +415,7 @@ static eg_cache_entry *eg_cache_get_by_src_ip(__be32 ipaddr,
static void eg_cache_put(eg_cache_entry *entry)
{
if (refcount_dec_and_test(&entry->use)) {
- memset(entry, 0, sizeof(eg_cache_entry));
- kfree(entry);
+ kzfree(entry);
}
}
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index bd3da9af5ef6..45d8e1d5d033 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -216,9 +216,7 @@ static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
pvcc->chan.mtu += LLC_LEN;
break;
}
- pr_debug("Couldn't autodetect yet (skb: %02X %02X %02X %02X %02X %02X)\n",
- skb->data[0], skb->data[1], skb->data[2],
- skb->data[3], skb->data[4], skb->data[5]);
+ pr_debug("Couldn't autodetect yet (skb: %6ph)\n", skb->data);
goto error;
case e_vc:
break;
diff --git a/net/can/Kconfig b/net/can/Kconfig
index d4319aa3e1b1..d77042752457 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -53,6 +53,8 @@ config CAN_GW
They can be modified with AND/OR/XOR/SET operations as configured
by the netlink configuration interface known e.g. from iptables.
+source "net/can/j1939/Kconfig"
+
source "drivers/net/can/Kconfig"
endif
diff --git a/net/can/Makefile b/net/can/Makefile
index 1242bbbfe57f..08bd217fc051 100644
--- a/net/can/Makefile
+++ b/net/can/Makefile
@@ -15,3 +15,5 @@ can-bcm-y := bcm.o
obj-$(CONFIG_CAN_GW) += can-gw.o
can-gw-y := gw.o
+
+obj-$(CONFIG_CAN_J1939) += j1939/
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 9a9a51847c7c..5518a7d9eed9 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -58,6 +58,7 @@
#include <linux/can.h>
#include <linux/can/core.h>
#include <linux/can/skb.h>
+#include <linux/can/can-ml.h>
#include <linux/ratelimit.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -198,7 +199,7 @@ int can_send(struct sk_buff *skb, int loop)
{
struct sk_buff *newskb = NULL;
struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
- struct s_stats *can_stats = dev_net(skb->dev)->can.can_stats;
+ struct can_pkg_stats *pkg_stats = dev_net(skb->dev)->can.pkg_stats;
int err = -EINVAL;
if (skb->len == CAN_MTU) {
@@ -285,8 +286,8 @@ int can_send(struct sk_buff *skb, int loop)
netif_rx_ni(newskb);
/* update statistics */
- can_stats->tx_frames++;
- can_stats->tx_frames_delta++;
+ pkg_stats->tx_frames++;
+ pkg_stats->tx_frames_delta++;
return 0;
@@ -298,13 +299,15 @@ EXPORT_SYMBOL(can_send);
/* af_can rx path */
-static struct can_dev_rcv_lists *find_dev_rcv_lists(struct net *net,
- struct net_device *dev)
+static struct can_dev_rcv_lists *can_dev_rcv_lists_find(struct net *net,
+ struct net_device *dev)
{
- if (!dev)
- return net->can.can_rx_alldev_list;
- else
- return (struct can_dev_rcv_lists *)dev->ml_priv;
+ if (dev) {
+ struct can_ml_priv *ml_priv = dev->ml_priv;
+ return &ml_priv->dev_rcv_lists;
+ } else {
+ return net->can.rx_alldev_list;
+ }
}
/**
@@ -331,7 +334,7 @@ static unsigned int effhash(canid_t can_id)
}
/**
- * find_rcv_list - determine optimal filterlist inside device filter struct
+ * can_rcv_list_find - determine optimal filterlist inside device filter struct
* @can_id: pointer to CAN identifier of a given can_filter
* @mask: pointer to CAN mask of a given can_filter
* @d: pointer to the device filter struct
@@ -357,8 +360,8 @@ static unsigned int effhash(canid_t can_id)
* Constistency checked mask.
* Reduced can_id to have a preprocessed filter compare value.
*/
-static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
- struct can_dev_rcv_lists *d)
+static struct hlist_head *can_rcv_list_find(canid_t *can_id, canid_t *mask,
+ struct can_dev_rcv_lists *dev_rcv_lists)
{
canid_t inv = *can_id & CAN_INV_FILTER; /* save flag before masking */
@@ -366,7 +369,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
if (*mask & CAN_ERR_FLAG) {
/* clear CAN_ERR_FLAG in filter entry */
*mask &= CAN_ERR_MASK;
- return &d->rx[RX_ERR];
+ return &dev_rcv_lists->rx[RX_ERR];
}
/* with cleared CAN_ERR_FLAG we have a simple mask/value filterpair */
@@ -382,26 +385,26 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
/* inverse can_id/can_mask filter */
if (inv)
- return &d->rx[RX_INV];
+ return &dev_rcv_lists->rx[RX_INV];
/* mask == 0 => no condition testing at receive time */
if (!(*mask))
- return &d->rx[RX_ALL];
+ return &dev_rcv_lists->rx[RX_ALL];
/* extra filterlists for the subscription of a single non-RTR can_id */
if (((*mask & CAN_EFF_RTR_FLAGS) == CAN_EFF_RTR_FLAGS) &&
!(*can_id & CAN_RTR_FLAG)) {
if (*can_id & CAN_EFF_FLAG) {
if (*mask == (CAN_EFF_MASK | CAN_EFF_RTR_FLAGS))
- return &d->rx_eff[effhash(*can_id)];
+ return &dev_rcv_lists->rx_eff[effhash(*can_id)];
} else {
if (*mask == (CAN_SFF_MASK | CAN_EFF_RTR_FLAGS))
- return &d->rx_sff[*can_id];
+ return &dev_rcv_lists->rx_sff[*can_id];
}
}
/* default: filter via can_id/can_mask */
- return &d->rx[RX_FIL];
+ return &dev_rcv_lists->rx[RX_FIL];
}
/**
@@ -438,10 +441,10 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
canid_t mask, void (*func)(struct sk_buff *, void *),
void *data, char *ident, struct sock *sk)
{
- struct receiver *r;
- struct hlist_head *rl;
- struct can_dev_rcv_lists *d;
- struct s_pstats *can_pstats = net->can.can_pstats;
+ struct receiver *rcv;
+ struct hlist_head *rcv_list;
+ struct can_dev_rcv_lists *dev_rcv_lists;
+ struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats;
int err = 0;
/* insert new receiver (dev,canid,mask) -> (func,data) */
@@ -452,36 +455,30 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
if (dev && !net_eq(net, dev_net(dev)))
return -ENODEV;
- r = kmem_cache_alloc(rcv_cache, GFP_KERNEL);
- if (!r)
+ rcv = kmem_cache_alloc(rcv_cache, GFP_KERNEL);
+ if (!rcv)
return -ENOMEM;
- spin_lock(&net->can.can_rcvlists_lock);
+ spin_lock_bh(&net->can.rcvlists_lock);
- d = find_dev_rcv_lists(net, dev);
- if (d) {
- rl = find_rcv_list(&can_id, &mask, d);
+ dev_rcv_lists = can_dev_rcv_lists_find(net, dev);
+ rcv_list = can_rcv_list_find(&can_id, &mask, dev_rcv_lists);
- r->can_id = can_id;
- r->mask = mask;
- r->matches = 0;
- r->func = func;
- r->data = data;
- r->ident = ident;
- r->sk = sk;
+ rcv->can_id = can_id;
+ rcv->mask = mask;
+ rcv->matches = 0;
+ rcv->func = func;
+ rcv->data = data;
+ rcv->ident = ident;
+ rcv->sk = sk;
- hlist_add_head_rcu(&r->list, rl);
- d->entries++;
-
- can_pstats->rcv_entries++;
- if (can_pstats->rcv_entries_max < can_pstats->rcv_entries)
- can_pstats->rcv_entries_max = can_pstats->rcv_entries;
- } else {
- kmem_cache_free(rcv_cache, r);
- err = -ENODEV;
- }
+ hlist_add_head_rcu(&rcv->list, rcv_list);
+ dev_rcv_lists->entries++;
- spin_unlock(&net->can.can_rcvlists_lock);
+ rcv_lists_stats->rcv_entries++;
+ rcv_lists_stats->rcv_entries_max = max(rcv_lists_stats->rcv_entries_max,
+ rcv_lists_stats->rcv_entries);
+ spin_unlock_bh(&net->can.rcvlists_lock);
return err;
}
@@ -490,10 +487,10 @@ EXPORT_SYMBOL(can_rx_register);
/* can_rx_delete_receiver - rcu callback for single receiver entry removal */
static void can_rx_delete_receiver(struct rcu_head *rp)
{
- struct receiver *r = container_of(rp, struct receiver, rcu);
- struct sock *sk = r->sk;
+ struct receiver *rcv = container_of(rp, struct receiver, rcu);
+ struct sock *sk = rcv->sk;
- kmem_cache_free(rcv_cache, r);
+ kmem_cache_free(rcv_cache, rcv);
if (sk)
sock_put(sk);
}
@@ -513,10 +510,10 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id,
canid_t mask, void (*func)(struct sk_buff *, void *),
void *data)
{
- struct receiver *r = NULL;
- struct hlist_head *rl;
- struct s_pstats *can_pstats = net->can.can_pstats;
- struct can_dev_rcv_lists *d;
+ struct receiver *rcv = NULL;
+ struct hlist_head *rcv_list;
+ struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats;
+ struct can_dev_rcv_lists *dev_rcv_lists;
if (dev && dev->type != ARPHRD_CAN)
return;
@@ -524,83 +521,69 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id,
if (dev && !net_eq(net, dev_net(dev)))
return;
- spin_lock(&net->can.can_rcvlists_lock);
-
- d = find_dev_rcv_lists(net, dev);
- if (!d) {
- pr_err("BUG: receive list not found for dev %s, id %03X, mask %03X\n",
- DNAME(dev), can_id, mask);
- goto out;
- }
+ spin_lock_bh(&net->can.rcvlists_lock);
- rl = find_rcv_list(&can_id, &mask, d);
+ dev_rcv_lists = can_dev_rcv_lists_find(net, dev);
+ rcv_list = can_rcv_list_find(&can_id, &mask, dev_rcv_lists);
/* Search the receiver list for the item to delete. This should
* exist, since no receiver may be unregistered that hasn't
* been registered before.
*/
-
- hlist_for_each_entry_rcu(r, rl, list) {
- if (r->can_id == can_id && r->mask == mask &&
- r->func == func && r->data == data)
+ hlist_for_each_entry_rcu(rcv, rcv_list, list) {
+ if (rcv->can_id == can_id && rcv->mask == mask &&
+ rcv->func == func && rcv->data == data)
break;
}
/* Check for bugs in CAN protocol implementations using af_can.c:
- * 'r' will be NULL if no matching list item was found for removal.
+ * 'rcv' will be NULL if no matching list item was found for removal.
*/
-
- if (!r) {
+ if (!rcv) {
WARN(1, "BUG: receive list entry not found for dev %s, id %03X, mask %03X\n",
DNAME(dev), can_id, mask);
goto out;
}
- hlist_del_rcu(&r->list);
- d->entries--;
+ hlist_del_rcu(&rcv->list);
+ dev_rcv_lists->entries--;
- if (can_pstats->rcv_entries > 0)
- can_pstats->rcv_entries--;
-
- /* remove device structure requested by NETDEV_UNREGISTER */
- if (d->remove_on_zero_entries && !d->entries) {
- kfree(d);
- dev->ml_priv = NULL;
- }
+ if (rcv_lists_stats->rcv_entries > 0)
+ rcv_lists_stats->rcv_entries--;
out:
- spin_unlock(&net->can.can_rcvlists_lock);
+ spin_unlock_bh(&net->can.rcvlists_lock);
/* schedule the receiver item for deletion */
- if (r) {
- if (r->sk)
- sock_hold(r->sk);
- call_rcu(&r->rcu, can_rx_delete_receiver);
+ if (rcv) {
+ if (rcv->sk)
+ sock_hold(rcv->sk);
+ call_rcu(&rcv->rcu, can_rx_delete_receiver);
}
}
EXPORT_SYMBOL(can_rx_unregister);
-static inline void deliver(struct sk_buff *skb, struct receiver *r)
+static inline void deliver(struct sk_buff *skb, struct receiver *rcv)
{
- r->func(skb, r->data);
- r->matches++;
+ rcv->func(skb, rcv->data);
+ rcv->matches++;
}
-static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb)
+static int can_rcv_filter(struct can_dev_rcv_lists *dev_rcv_lists, struct sk_buff *skb)
{
- struct receiver *r;
+ struct receiver *rcv;
int matches = 0;
struct can_frame *cf = (struct can_frame *)skb->data;
canid_t can_id = cf->can_id;
- if (d->entries == 0)
+ if (dev_rcv_lists->entries == 0)
return 0;
if (can_id & CAN_ERR_FLAG) {
/* check for error message frame entries only */
- hlist_for_each_entry_rcu(r, &d->rx[RX_ERR], list) {
- if (can_id & r->mask) {
- deliver(skb, r);
+ hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_ERR], list) {
+ if (can_id & rcv->mask) {
+ deliver(skb, rcv);
matches++;
}
}
@@ -608,23 +591,23 @@ static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb)
}
/* check for unfiltered entries */
- hlist_for_each_entry_rcu(r, &d->rx[RX_ALL], list) {
- deliver(skb, r);
+ hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_ALL], list) {
+ deliver(skb, rcv);
matches++;
}
/* check for can_id/mask entries */
- hlist_for_each_entry_rcu(r, &d->rx[RX_FIL], list) {
- if ((can_id & r->mask) == r->can_id) {
- deliver(skb, r);
+ hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_FIL], list) {
+ if ((can_id & rcv->mask) == rcv->can_id) {
+ deliver(skb, rcv);
matches++;
}
}
/* check for inverted can_id/mask entries */
- hlist_for_each_entry_rcu(r, &d->rx[RX_INV], list) {
- if ((can_id & r->mask) != r->can_id) {
- deliver(skb, r);
+ hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_INV], list) {
+ if ((can_id & rcv->mask) != rcv->can_id) {
+ deliver(skb, rcv);
matches++;
}
}
@@ -634,16 +617,16 @@ static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb)
return matches;
if (can_id & CAN_EFF_FLAG) {
- hlist_for_each_entry_rcu(r, &d->rx_eff[effhash(can_id)], list) {
- if (r->can_id == can_id) {
- deliver(skb, r);
+ hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx_eff[effhash(can_id)], list) {
+ if (rcv->can_id == can_id) {
+ deliver(skb, rcv);
matches++;
}
}
} else {
can_id &= CAN_SFF_MASK;
- hlist_for_each_entry_rcu(r, &d->rx_sff[can_id], list) {
- deliver(skb, r);
+ hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx_sff[can_id], list) {
+ deliver(skb, rcv);
matches++;
}
}
@@ -653,14 +636,14 @@ static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb)
static void can_receive(struct sk_buff *skb, struct net_device *dev)
{
- struct can_dev_rcv_lists *d;
+ struct can_dev_rcv_lists *dev_rcv_lists;
struct net *net = dev_net(dev);
- struct s_stats *can_stats = net->can.can_stats;
+ struct can_pkg_stats *pkg_stats = net->can.pkg_stats;
int matches;
/* update statistics */
- can_stats->rx_frames++;
- can_stats->rx_frames_delta++;
+ pkg_stats->rx_frames++;
+ pkg_stats->rx_frames_delta++;
/* create non-zero unique skb identifier together with *skb */
while (!(can_skb_prv(skb)->skbcnt))
@@ -669,12 +652,11 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
rcu_read_lock();
/* deliver the packet to sockets listening on all devices */
- matches = can_rcv_filter(net->can.can_rx_alldev_list, skb);
+ matches = can_rcv_filter(net->can.rx_alldev_list, skb);
/* find receive list for this device */
- d = find_dev_rcv_lists(net, dev);
- if (d)
- matches += can_rcv_filter(d, skb);
+ dev_rcv_lists = can_dev_rcv_lists_find(net, dev);
+ matches += can_rcv_filter(dev_rcv_lists, skb);
rcu_read_unlock();
@@ -682,8 +664,8 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
consume_skb(skb);
if (matches > 0) {
- can_stats->matches++;
- can_stats->matches_delta++;
+ pkg_stats->matches++;
+ pkg_stats->matches_delta++;
}
}
@@ -789,41 +771,14 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct can_dev_rcv_lists *d;
if (dev->type != ARPHRD_CAN)
return NOTIFY_DONE;
switch (msg) {
case NETDEV_REGISTER:
-
- /* create new dev_rcv_lists for this device */
- d = kzalloc(sizeof(*d), GFP_KERNEL);
- if (!d)
- return NOTIFY_DONE;
- BUG_ON(dev->ml_priv);
- dev->ml_priv = d;
-
- break;
-
- case NETDEV_UNREGISTER:
- spin_lock(&dev_net(dev)->can.can_rcvlists_lock);
-
- d = dev->ml_priv;
- if (d) {
- if (d->entries) {
- d->remove_on_zero_entries = 1;
- } else {
- kfree(d);
- dev->ml_priv = NULL;
- }
- } else {
- pr_err("can: notifier: receive list not found for dev %s\n",
- dev->name);
- }
-
- spin_unlock(&dev_net(dev)->can.can_rcvlists_lock);
-
+ WARN(!dev->ml_priv,
+ "No CAN mid layer private allocated, please fix your driver and use alloc_candev()!\n");
break;
}
@@ -832,66 +787,51 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
static int can_pernet_init(struct net *net)
{
- spin_lock_init(&net->can.can_rcvlists_lock);
- net->can.can_rx_alldev_list =
- kzalloc(sizeof(*net->can.can_rx_alldev_list), GFP_KERNEL);
- if (!net->can.can_rx_alldev_list)
+ spin_lock_init(&net->can.rcvlists_lock);
+ net->can.rx_alldev_list =
+ kzalloc(sizeof(*net->can.rx_alldev_list), GFP_KERNEL);
+ if (!net->can.rx_alldev_list)
goto out;
- net->can.can_stats = kzalloc(sizeof(*net->can.can_stats), GFP_KERNEL);
- if (!net->can.can_stats)
- goto out_free_alldev_list;
- net->can.can_pstats = kzalloc(sizeof(*net->can.can_pstats), GFP_KERNEL);
- if (!net->can.can_pstats)
- goto out_free_can_stats;
+ net->can.pkg_stats = kzalloc(sizeof(*net->can.pkg_stats), GFP_KERNEL);
+ if (!net->can.pkg_stats)
+ goto out_free_rx_alldev_list;
+ net->can.rcv_lists_stats = kzalloc(sizeof(*net->can.rcv_lists_stats), GFP_KERNEL);
+ if (!net->can.rcv_lists_stats)
+ goto out_free_pkg_stats;
if (IS_ENABLED(CONFIG_PROC_FS)) {
/* the statistics are updated every second (timer triggered) */
if (stats_timer) {
- timer_setup(&net->can.can_stattimer, can_stat_update,
+ timer_setup(&net->can.stattimer, can_stat_update,
0);
- mod_timer(&net->can.can_stattimer,
+ mod_timer(&net->can.stattimer,
round_jiffies(jiffies + HZ));
}
- net->can.can_stats->jiffies_init = jiffies;
+ net->can.pkg_stats->jiffies_init = jiffies;
can_init_proc(net);
}
return 0;
- out_free_can_stats:
- kfree(net->can.can_stats);
- out_free_alldev_list:
- kfree(net->can.can_rx_alldev_list);
+ out_free_pkg_stats:
+ kfree(net->can.pkg_stats);
+ out_free_rx_alldev_list:
+ kfree(net->can.rx_alldev_list);
out:
return -ENOMEM;
}
static void can_pernet_exit(struct net *net)
{
- struct net_device *dev;
-
if (IS_ENABLED(CONFIG_PROC_FS)) {
can_remove_proc(net);
if (stats_timer)
- del_timer_sync(&net->can.can_stattimer);
+ del_timer_sync(&net->can.stattimer);
}
- /* remove created dev_rcv_lists from still registered CAN devices */
- rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
- if (dev->type == ARPHRD_CAN && dev->ml_priv) {
- struct can_dev_rcv_lists *d = dev->ml_priv;
-
- BUG_ON(d->entries);
- kfree(d);
- dev->ml_priv = NULL;
- }
- }
- rcu_read_unlock();
-
- kfree(net->can.can_rx_alldev_list);
- kfree(net->can.can_stats);
- kfree(net->can.can_pstats);
+ kfree(net->can.rx_alldev_list);
+ kfree(net->can.pkg_stats);
+ kfree(net->can.rcv_lists_stats);
}
/* af_can module init/exit functions */
diff --git a/net/can/af_can.h b/net/can/af_can.h
index 9cdb79083623..7c2d9161e224 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -60,25 +60,10 @@ struct receiver {
struct rcu_head rcu;
};
-#define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS)
-#define CAN_EFF_RCV_HASH_BITS 10
-#define CAN_EFF_RCV_ARRAY_SZ (1 << CAN_EFF_RCV_HASH_BITS)
-
-enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_MAX };
-
-/* per device receive filters linked at dev->ml_priv */
-struct can_dev_rcv_lists {
- struct hlist_head rx[RX_MAX];
- struct hlist_head rx_sff[CAN_SFF_RCV_ARRAY_SZ];
- struct hlist_head rx_eff[CAN_EFF_RCV_ARRAY_SZ];
- int remove_on_zero_entries;
- int entries;
-};
-
/* statistic structures */
/* can be reset e.g. by can_init_stats() */
-struct s_stats {
+struct can_pkg_stats {
unsigned long jiffies_init;
unsigned long rx_frames;
@@ -103,7 +88,7 @@ struct s_stats {
};
/* persistent statistics */
-struct s_pstats {
+struct can_rcv_lists_stats {
unsigned long stats_reset;
unsigned long user_reset;
unsigned long rcv_entries;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 28fd1a1c8487..c96fa0f33db3 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1294,7 +1294,7 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
/* no bound device as default => check msg_name */
DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name);
- if (msg->msg_namelen < sizeof(*addr))
+ if (msg->msg_namelen < CAN_REQUIRED_SIZE(*addr, can_ifindex))
return -EINVAL;
if (addr->can_family != AF_CAN)
@@ -1536,7 +1536,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
struct net *net = sock_net(sk);
int ret = 0;
- if (len < sizeof(*addr))
+ if (len < CAN_REQUIRED_SIZE(*addr, can_ifindex))
return -EINVAL;
lock_sock(sk);
diff --git a/net/can/j1939/Kconfig b/net/can/j1939/Kconfig
new file mode 100644
index 000000000000..2998298b71ec
--- /dev/null
+++ b/net/can/j1939/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# SAE J1939 network layer core configuration
+#
+
+config CAN_J1939
+ tristate "SAE J1939"
+ depends on CAN
+ help
+ SAE J1939
+ Say Y to have in-kernel support for j1939 socket type. This
+ allows communication according to SAE j1939.
+ The relevant parts in kernel are
+ SAE j1939-21 (datalink & transport protocol)
+ & SAE j1939-81 (network management).
diff --git a/net/can/j1939/Makefile b/net/can/j1939/Makefile
new file mode 100644
index 000000000000..19181bdae173
--- /dev/null
+++ b/net/can/j1939/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_CAN_J1939) += can-j1939.o
+
+can-j1939-objs := \
+ address-claim.o \
+ bus.o \
+ main.o \
+ socket.o \
+ transport.o
diff --git a/net/can/j1939/address-claim.c b/net/can/j1939/address-claim.c
new file mode 100644
index 000000000000..f33c47327927
--- /dev/null
+++ b/net/can/j1939/address-claim.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2010-2011 EIA Electronics,
+// Kurt Van Dijck <[email protected]>
+// Copyright (c) 2010-2011 EIA Electronics,
+// Pieter Beyens <[email protected]>
+// Copyright (c) 2017-2019 Pengutronix,
+// Marc Kleine-Budde <[email protected]>
+// Copyright (c) 2017-2019 Pengutronix,
+// Oleksij Rempel <[email protected]>
+
+/* J1939 Address Claiming.
+ * Address Claiming in the kernel
+ * - keeps track of the AC states of ECU's,
+ * - resolves NAME<=>SA taking into account the AC states of ECU's.
+ *
+ * All Address Claim msgs (including host-originated msg) are processed
+ * at the receive path (a sent msg is always received again via CAN echo).
+ * As such, the processing of AC msgs is done in the order on which msgs
+ * are sent on the bus.
+ *
+ * This module doesn't send msgs itself (e.g. replies on Address Claims),
+ * this is the responsibility of a user space application or daemon.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+#include "j1939-priv.h"
+
+static inline name_t j1939_skb_to_name(const struct sk_buff *skb)
+{
+ return le64_to_cpup((__le64 *)skb->data);
+}
+
+static inline bool j1939_ac_msg_is_request(struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ int req_pgn;
+
+ if (skb->len < 3 || skcb->addr.pgn != J1939_PGN_REQUEST)
+ return false;
+
+ req_pgn = skb->data[0] | (skb->data[1] << 8) | (skb->data[2] << 16);
+
+ return req_pgn == J1939_PGN_ADDRESS_CLAIMED;
+}
+
+static int j1939_ac_verify_outgoing(struct j1939_priv *priv,
+ struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+
+ if (skb->len != 8) {
+ netdev_notice(priv->ndev, "tx address claim with dlc %i\n",
+ skb->len);
+ return -EPROTO;
+ }
+
+ if (skcb->addr.src_name != j1939_skb_to_name(skb)) {
+ netdev_notice(priv->ndev, "tx address claim with different name\n");
+ return -EPROTO;
+ }
+
+ if (skcb->addr.sa == J1939_NO_ADDR) {
+ netdev_notice(priv->ndev, "tx address claim with broadcast sa\n");
+ return -EPROTO;
+ }
+
+ /* ac must always be a broadcast */
+ if (skcb->addr.dst_name || skcb->addr.da != J1939_NO_ADDR) {
+ netdev_notice(priv->ndev, "tx address claim with dest, not broadcast\n");
+ return -EPROTO;
+ }
+ return 0;
+}
+
+int j1939_ac_fixup(struct j1939_priv *priv, struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ int ret;
+ u8 addr;
+
+ /* network mgmt: address claiming msgs */
+ if (skcb->addr.pgn == J1939_PGN_ADDRESS_CLAIMED) {
+ struct j1939_ecu *ecu;
+
+ ret = j1939_ac_verify_outgoing(priv, skb);
+ /* return both when failure & when successful */
+ if (ret < 0)
+ return ret;
+ ecu = j1939_ecu_get_by_name(priv, skcb->addr.src_name);
+ if (!ecu)
+ return -ENODEV;
+
+ if (ecu->addr != skcb->addr.sa)
+ /* hold further traffic for ecu, remove from parent */
+ j1939_ecu_unmap(ecu);
+ j1939_ecu_put(ecu);
+ } else if (skcb->addr.src_name) {
+ /* assign source address */
+ addr = j1939_name_to_addr(priv, skcb->addr.src_name);
+ if (!j1939_address_is_unicast(addr) &&
+ !j1939_ac_msg_is_request(skb)) {
+ netdev_notice(priv->ndev, "tx drop: invalid sa for name 0x%016llx\n",
+ skcb->addr.src_name);
+ return -EADDRNOTAVAIL;
+ }
+ skcb->addr.sa = addr;
+ }
+
+ /* assign destination address */
+ if (skcb->addr.dst_name) {
+ addr = j1939_name_to_addr(priv, skcb->addr.dst_name);
+ if (!j1939_address_is_unicast(addr)) {
+ netdev_notice(priv->ndev, "tx drop: invalid da for name 0x%016llx\n",
+ skcb->addr.dst_name);
+ return -EADDRNOTAVAIL;
+ }
+ skcb->addr.da = addr;
+ }
+ return 0;
+}
+
+static void j1939_ac_process(struct j1939_priv *priv, struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_ecu *ecu, *prev;
+ name_t name;
+
+ if (skb->len != 8) {
+ netdev_notice(priv->ndev, "rx address claim with wrong dlc %i\n",
+ skb->len);
+ return;
+ }
+
+ name = j1939_skb_to_name(skb);
+ skcb->addr.src_name = name;
+ if (!name) {
+ netdev_notice(priv->ndev, "rx address claim without name\n");
+ return;
+ }
+
+ if (!j1939_address_is_valid(skcb->addr.sa)) {
+ netdev_notice(priv->ndev, "rx address claim with broadcast sa\n");
+ return;
+ }
+
+ write_lock_bh(&priv->lock);
+
+ /* Few words on the ECU ref counting:
+ *
+ * First we get an ECU handle, either with
+ * j1939_ecu_get_by_name_locked() (increments the ref counter)
+ * or j1939_ecu_create_locked() (initializes an ECU object
+ * with a ref counter of 1).
+ *
+ * j1939_ecu_unmap_locked() will decrement the ref counter,
+ * but only if the ECU was mapped before. So "ecu" still
+ * belongs to us.
+ *
+ * j1939_ecu_timer_start() will increment the ref counter
+ * before it starts the timer, so we can put the ecu when
+ * leaving this function.
+ */
+ ecu = j1939_ecu_get_by_name_locked(priv, name);
+ if (!ecu && j1939_address_is_unicast(skcb->addr.sa))
+ ecu = j1939_ecu_create_locked(priv, name);
+
+ if (IS_ERR_OR_NULL(ecu))
+ goto out_unlock_bh;
+
+ /* cancel pending (previous) address claim */
+ j1939_ecu_timer_cancel(ecu);
+
+ if (j1939_address_is_idle(skcb->addr.sa)) {
+ j1939_ecu_unmap_locked(ecu);
+ goto out_ecu_put;
+ }
+
+ /* save new addr */
+ if (ecu->addr != skcb->addr.sa)
+ j1939_ecu_unmap_locked(ecu);
+ ecu->addr = skcb->addr.sa;
+
+ prev = j1939_ecu_get_by_addr_locked(priv, skcb->addr.sa);
+ if (prev) {
+ if (ecu->name > prev->name) {
+ j1939_ecu_unmap_locked(ecu);
+ j1939_ecu_put(prev);
+ goto out_ecu_put;
+ } else {
+ /* kick prev if less or equal */
+ j1939_ecu_unmap_locked(prev);
+ j1939_ecu_put(prev);
+ }
+ }
+
+ j1939_ecu_timer_start(ecu);
+ out_ecu_put:
+ j1939_ecu_put(ecu);
+ out_unlock_bh:
+ write_unlock_bh(&priv->lock);
+}
+
+void j1939_ac_recv(struct j1939_priv *priv, struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_ecu *ecu;
+
+ /* network mgmt */
+ if (skcb->addr.pgn == J1939_PGN_ADDRESS_CLAIMED) {
+ j1939_ac_process(priv, skb);
+ } else if (j1939_address_is_unicast(skcb->addr.sa)) {
+ /* assign source name */
+ ecu = j1939_ecu_get_by_addr(priv, skcb->addr.sa);
+ if (ecu) {
+ skcb->addr.src_name = ecu->name;
+ j1939_ecu_put(ecu);
+ }
+ }
+
+ /* assign destination name */
+ ecu = j1939_ecu_get_by_addr(priv, skcb->addr.da);
+ if (ecu) {
+ skcb->addr.dst_name = ecu->name;
+ j1939_ecu_put(ecu);
+ }
+}
diff --git a/net/can/j1939/bus.c b/net/can/j1939/bus.c
new file mode 100644
index 000000000000..486687901602
--- /dev/null
+++ b/net/can/j1939/bus.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2010-2011 EIA Electronics,
+// Kurt Van Dijck <[email protected]>
+// Copyright (c) 2017-2019 Pengutronix,
+// Marc Kleine-Budde <[email protected]>
+// Copyright (c) 2017-2019 Pengutronix,
+// Oleksij Rempel <[email protected]>
+
+/* bus for j1939 remote devices
+ * Since rtnetlink, no real bus is used.
+ */
+
+#include <net/sock.h>
+
+#include "j1939-priv.h"
+
+static void __j1939_ecu_release(struct kref *kref)
+{
+ struct j1939_ecu *ecu = container_of(kref, struct j1939_ecu, kref);
+ struct j1939_priv *priv = ecu->priv;
+
+ list_del(&ecu->list);
+ kfree(ecu);
+ j1939_priv_put(priv);
+}
+
+void j1939_ecu_put(struct j1939_ecu *ecu)
+{
+ kref_put(&ecu->kref, __j1939_ecu_release);
+}
+
+static void j1939_ecu_get(struct j1939_ecu *ecu)
+{
+ kref_get(&ecu->kref);
+}
+
+static bool j1939_ecu_is_mapped_locked(struct j1939_ecu *ecu)
+{
+ struct j1939_priv *priv = ecu->priv;
+
+ lockdep_assert_held(&priv->lock);
+
+ return j1939_ecu_find_by_addr_locked(priv, ecu->addr) == ecu;
+}
+
+/* ECU device interface */
+/* map ECU to a bus address space */
+static void j1939_ecu_map_locked(struct j1939_ecu *ecu)
+{
+ struct j1939_priv *priv = ecu->priv;
+ struct j1939_addr_ent *ent;
+
+ lockdep_assert_held(&priv->lock);
+
+ if (!j1939_address_is_unicast(ecu->addr))
+ return;
+
+ ent = &priv->ents[ecu->addr];
+
+ if (ent->ecu) {
+ netdev_warn(priv->ndev, "Trying to map already mapped ECU, addr: 0x%02x, name: 0x%016llx. Skip it.\n",
+ ecu->addr, ecu->name);
+ return;
+ }
+
+ j1939_ecu_get(ecu);
+ ent->ecu = ecu;
+ ent->nusers += ecu->nusers;
+}
+
+/* unmap ECU from a bus address space */
+void j1939_ecu_unmap_locked(struct j1939_ecu *ecu)
+{
+ struct j1939_priv *priv = ecu->priv;
+ struct j1939_addr_ent *ent;
+
+ lockdep_assert_held(&priv->lock);
+
+ if (!j1939_address_is_unicast(ecu->addr))
+ return;
+
+ if (!j1939_ecu_is_mapped_locked(ecu))
+ return;
+
+ ent = &priv->ents[ecu->addr];
+ ent->ecu = NULL;
+ ent->nusers -= ecu->nusers;
+ j1939_ecu_put(ecu);
+}
+
+void j1939_ecu_unmap(struct j1939_ecu *ecu)
+{
+ write_lock_bh(&ecu->priv->lock);
+ j1939_ecu_unmap_locked(ecu);
+ write_unlock_bh(&ecu->priv->lock);
+}
+
+void j1939_ecu_unmap_all(struct j1939_priv *priv)
+{
+ int i;
+
+ write_lock_bh(&priv->lock);
+ for (i = 0; i < ARRAY_SIZE(priv->ents); i++)
+ if (priv->ents[i].ecu)
+ j1939_ecu_unmap_locked(priv->ents[i].ecu);
+ write_unlock_bh(&priv->lock);
+}
+
+void j1939_ecu_timer_start(struct j1939_ecu *ecu)
+{
+ /* The ECU is held here and released in the
+ * j1939_ecu_timer_handler() or j1939_ecu_timer_cancel().
+ */
+ j1939_ecu_get(ecu);
+
+ /* Schedule timer in 250 msec to commit address change. */
+ hrtimer_start(&ecu->ac_timer, ms_to_ktime(250),
+ HRTIMER_MODE_REL_SOFT);
+}
+
+void j1939_ecu_timer_cancel(struct j1939_ecu *ecu)
+{
+ if (hrtimer_cancel(&ecu->ac_timer))
+ j1939_ecu_put(ecu);
+}
+
+static enum hrtimer_restart j1939_ecu_timer_handler(struct hrtimer *hrtimer)
+{
+ struct j1939_ecu *ecu =
+ container_of(hrtimer, struct j1939_ecu, ac_timer);
+ struct j1939_priv *priv = ecu->priv;
+
+ write_lock_bh(&priv->lock);
+ /* TODO: can we test if ecu->addr is unicast before starting
+ * the timer?
+ */
+ j1939_ecu_map_locked(ecu);
+
+ /* The corresponding j1939_ecu_get() is in
+ * j1939_ecu_timer_start().
+ */
+ j1939_ecu_put(ecu);
+ write_unlock_bh(&priv->lock);
+
+ return HRTIMER_NORESTART;
+}
+
+struct j1939_ecu *j1939_ecu_create_locked(struct j1939_priv *priv, name_t name)
+{
+ struct j1939_ecu *ecu;
+
+ lockdep_assert_held(&priv->lock);
+
+ ecu = kzalloc(sizeof(*ecu), gfp_any());
+ if (!ecu)
+ return ERR_PTR(-ENOMEM);
+ kref_init(&ecu->kref);
+ ecu->addr = J1939_IDLE_ADDR;
+ ecu->name = name;
+
+ hrtimer_init(&ecu->ac_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
+ ecu->ac_timer.function = j1939_ecu_timer_handler;
+ INIT_LIST_HEAD(&ecu->list);
+
+ j1939_priv_get(priv);
+ ecu->priv = priv;
+ list_add_tail(&ecu->list, &priv->ecus);
+
+ return ecu;
+}
+
+struct j1939_ecu *j1939_ecu_find_by_addr_locked(struct j1939_priv *priv,
+ u8 addr)
+{
+ lockdep_assert_held(&priv->lock);
+
+ return priv->ents[addr].ecu;
+}
+
+struct j1939_ecu *j1939_ecu_get_by_addr_locked(struct j1939_priv *priv, u8 addr)
+{
+ struct j1939_ecu *ecu;
+
+ lockdep_assert_held(&priv->lock);
+
+ if (!j1939_address_is_unicast(addr))
+ return NULL;
+
+ ecu = j1939_ecu_find_by_addr_locked(priv, addr);
+ if (ecu)
+ j1939_ecu_get(ecu);
+
+ return ecu;
+}
+
+struct j1939_ecu *j1939_ecu_get_by_addr(struct j1939_priv *priv, u8 addr)
+{
+ struct j1939_ecu *ecu;
+
+ read_lock_bh(&priv->lock);
+ ecu = j1939_ecu_get_by_addr_locked(priv, addr);
+ read_unlock_bh(&priv->lock);
+
+ return ecu;
+}
+
+/* get pointer to ecu without increasing ref counter */
+static struct j1939_ecu *j1939_ecu_find_by_name_locked(struct j1939_priv *priv,
+ name_t name)
+{
+ struct j1939_ecu *ecu;
+
+ lockdep_assert_held(&priv->lock);
+
+ list_for_each_entry(ecu, &priv->ecus, list) {
+ if (ecu->name == name)
+ return ecu;
+ }
+
+ return NULL;
+}
+
+struct j1939_ecu *j1939_ecu_get_by_name_locked(struct j1939_priv *priv,
+ name_t name)
+{
+ struct j1939_ecu *ecu;
+
+ lockdep_assert_held(&priv->lock);
+
+ if (!name)
+ return NULL;
+
+ ecu = j1939_ecu_find_by_name_locked(priv, name);
+ if (ecu)
+ j1939_ecu_get(ecu);
+
+ return ecu;
+}
+
+struct j1939_ecu *j1939_ecu_get_by_name(struct j1939_priv *priv, name_t name)
+{
+ struct j1939_ecu *ecu;
+
+ read_lock_bh(&priv->lock);
+ ecu = j1939_ecu_get_by_name_locked(priv, name);
+ read_unlock_bh(&priv->lock);
+
+ return ecu;
+}
+
+u8 j1939_name_to_addr(struct j1939_priv *priv, name_t name)
+{
+ struct j1939_ecu *ecu;
+ int addr = J1939_IDLE_ADDR;
+
+ if (!name)
+ return J1939_NO_ADDR;
+
+ read_lock_bh(&priv->lock);
+ ecu = j1939_ecu_find_by_name_locked(priv, name);
+ if (ecu && j1939_ecu_is_mapped_locked(ecu))
+ /* ecu's SA is registered */
+ addr = ecu->addr;
+
+ read_unlock_bh(&priv->lock);
+
+ return addr;
+}
+
+/* TX addr/name accounting
+ * Transport protocol needs to know if a SA is local or not
+ * These functions originate from userspace manipulating sockets,
+ * so locking is straigforward
+ */
+
+int j1939_local_ecu_get(struct j1939_priv *priv, name_t name, u8 sa)
+{
+ struct j1939_ecu *ecu;
+ int err = 0;
+
+ write_lock_bh(&priv->lock);
+
+ if (j1939_address_is_unicast(sa))
+ priv->ents[sa].nusers++;
+
+ if (!name)
+ goto done;
+
+ ecu = j1939_ecu_get_by_name_locked(priv, name);
+ if (!ecu)
+ ecu = j1939_ecu_create_locked(priv, name);
+ err = PTR_ERR_OR_ZERO(ecu);
+ if (err)
+ goto done;
+
+ ecu->nusers++;
+ /* TODO: do we care if ecu->addr != sa? */
+ if (j1939_ecu_is_mapped_locked(ecu))
+ /* ecu's sa is active already */
+ priv->ents[ecu->addr].nusers++;
+
+ done:
+ write_unlock_bh(&priv->lock);
+
+ return err;
+}
+
+void j1939_local_ecu_put(struct j1939_priv *priv, name_t name, u8 sa)
+{
+ struct j1939_ecu *ecu;
+
+ write_lock_bh(&priv->lock);
+
+ if (j1939_address_is_unicast(sa))
+ priv->ents[sa].nusers--;
+
+ if (!name)
+ goto done;
+
+ ecu = j1939_ecu_find_by_name_locked(priv, name);
+ if (WARN_ON_ONCE(!ecu))
+ goto done;
+
+ ecu->nusers--;
+ /* TODO: do we care if ecu->addr != sa? */
+ if (j1939_ecu_is_mapped_locked(ecu))
+ /* ecu's sa is active already */
+ priv->ents[ecu->addr].nusers--;
+ j1939_ecu_put(ecu);
+
+ done:
+ write_unlock_bh(&priv->lock);
+}
diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h
new file mode 100644
index 000000000000..12369b604ce9
--- /dev/null
+++ b/net/can/j1939/j1939-priv.h
@@ -0,0 +1,338 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2010-2011 EIA Electronics,
+// Kurt Van Dijck <[email protected]>
+// Copyright (c) 2017-2019 Pengutronix,
+// Marc Kleine-Budde <[email protected]>
+// Copyright (c) 2017-2019 Pengutronix,
+// Oleksij Rempel <[email protected]>
+
+#ifndef _J1939_PRIV_H_
+#define _J1939_PRIV_H_
+
+#include <linux/can/j1939.h>
+#include <net/sock.h>
+
+/* Timeout to receive the abort signal over loop back. In case CAN
+ * bus is open, the timeout should be triggered.
+ */
+#define J1939_XTP_ABORT_TIMEOUT_MS 500
+#define J1939_SIMPLE_ECHO_TIMEOUT_MS (10 * 1000)
+
+struct j1939_session;
+enum j1939_sk_errqueue_type {
+ J1939_ERRQUEUE_ACK,
+ J1939_ERRQUEUE_SCHED,
+ J1939_ERRQUEUE_ABORT,
+};
+
+/* j1939 devices */
+struct j1939_ecu {
+ struct list_head list;
+ name_t name;
+ u8 addr;
+
+ /* indicates that this ecu successfully claimed @sa as its address */
+ struct hrtimer ac_timer;
+ struct kref kref;
+ struct j1939_priv *priv;
+
+ /* count users, to help transport protocol decide for interaction */
+ int nusers;
+};
+
+struct j1939_priv {
+ struct list_head ecus;
+ /* local list entry in priv
+ * These allow irq (& softirq) context lookups on j1939 devices
+ * This approach (separate lists) is done as the other 2 alternatives
+ * are not easier or even wrong
+ * 1) using the pure kobject methods involves mutexes, which are not
+ * allowed in irq context.
+ * 2) duplicating data structures would require a lot of synchronization
+ * code
+ * usage:
+ */
+
+ /* segments need a lock to protect the above list */
+ rwlock_t lock;
+
+ struct net_device *ndev;
+
+ /* list of 256 ecu ptrs, that cache the claimed addresses.
+ * also protected by the above lock
+ */
+ struct j1939_addr_ent {
+ struct j1939_ecu *ecu;
+ /* count users, to help transport protocol */
+ int nusers;
+ } ents[256];
+
+ struct kref kref;
+
+ /* List of active sessions to prevent start of conflicting
+ * one.
+ *
+ * Do not start two sessions of same type, addresses and
+ * direction.
+ */
+ struct list_head active_session_list;
+
+ /* protects active_session_list */
+ spinlock_t active_session_list_lock;
+
+ unsigned int tp_max_packet_size;
+
+ /* lock for j1939_socks list */
+ spinlock_t j1939_socks_lock;
+ struct list_head j1939_socks;
+
+ struct kref rx_kref;
+};
+
+void j1939_ecu_put(struct j1939_ecu *ecu);
+
+/* keep the cache of what is local */
+int j1939_local_ecu_get(struct j1939_priv *priv, name_t name, u8 sa);
+void j1939_local_ecu_put(struct j1939_priv *priv, name_t name, u8 sa);
+
+static inline bool j1939_address_is_unicast(u8 addr)
+{
+ return addr <= J1939_MAX_UNICAST_ADDR;
+}
+
+static inline bool j1939_address_is_idle(u8 addr)
+{
+ return addr == J1939_IDLE_ADDR;
+}
+
+static inline bool j1939_address_is_valid(u8 addr)
+{
+ return addr != J1939_NO_ADDR;
+}
+
+static inline bool j1939_pgn_is_pdu1(pgn_t pgn)
+{
+ /* ignore dp & res bits for this */
+ return (pgn & 0xff00) < 0xf000;
+}
+
+/* utility to correctly unmap an ECU */
+void j1939_ecu_unmap_locked(struct j1939_ecu *ecu);
+void j1939_ecu_unmap(struct j1939_ecu *ecu);
+
+u8 j1939_name_to_addr(struct j1939_priv *priv, name_t name);
+struct j1939_ecu *j1939_ecu_find_by_addr_locked(struct j1939_priv *priv,
+ u8 addr);
+struct j1939_ecu *j1939_ecu_get_by_addr(struct j1939_priv *priv, u8 addr);
+struct j1939_ecu *j1939_ecu_get_by_addr_locked(struct j1939_priv *priv,
+ u8 addr);
+struct j1939_ecu *j1939_ecu_get_by_name(struct j1939_priv *priv, name_t name);
+struct j1939_ecu *j1939_ecu_get_by_name_locked(struct j1939_priv *priv,
+ name_t name);
+
+enum j1939_transfer_type {
+ J1939_TP,
+ J1939_ETP,
+ J1939_SIMPLE,
+};
+
+struct j1939_addr {
+ name_t src_name;
+ name_t dst_name;
+ pgn_t pgn;
+
+ u8 sa;
+ u8 da;
+
+ u8 type;
+};
+
+/* control buffer of the sk_buff */
+struct j1939_sk_buff_cb {
+ /* Offset in bytes within one ETP session */
+ u32 offset;
+
+ /* for tx, MSG_SYN will be used to sync on sockets */
+ u32 msg_flags;
+ u32 tskey;
+
+ struct j1939_addr addr;
+
+ /* Flags for quick lookups during skb processing.
+ * These are set in the receive path only.
+ */
+#define J1939_ECU_LOCAL_SRC BIT(0)
+#define J1939_ECU_LOCAL_DST BIT(1)
+ u8 flags;
+
+ priority_t priority;
+};
+
+static inline
+struct j1939_sk_buff_cb *j1939_skb_to_cb(const struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(struct j1939_sk_buff_cb) > sizeof(skb->cb));
+
+ return (struct j1939_sk_buff_cb *)skb->cb;
+}
+
+int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb);
+void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb);
+bool j1939_sk_recv_match(struct j1939_priv *priv,
+ struct j1939_sk_buff_cb *skcb);
+void j1939_sk_send_loop_abort(struct sock *sk, int err);
+void j1939_sk_errqueue(struct j1939_session *session,
+ enum j1939_sk_errqueue_type type);
+void j1939_sk_queue_activate_next(struct j1939_session *session);
+
+/* stack entries */
+struct j1939_session *j1939_tp_send(struct j1939_priv *priv,
+ struct sk_buff *skb, size_t size);
+int j1939_tp_recv(struct j1939_priv *priv, struct sk_buff *skb);
+int j1939_ac_fixup(struct j1939_priv *priv, struct sk_buff *skb);
+void j1939_ac_recv(struct j1939_priv *priv, struct sk_buff *skb);
+void j1939_simple_recv(struct j1939_priv *priv, struct sk_buff *skb);
+
+/* network management */
+struct j1939_ecu *j1939_ecu_create_locked(struct j1939_priv *priv, name_t name);
+
+void j1939_ecu_timer_start(struct j1939_ecu *ecu);
+void j1939_ecu_timer_cancel(struct j1939_ecu *ecu);
+void j1939_ecu_unmap_all(struct j1939_priv *priv);
+
+struct j1939_priv *j1939_netdev_start(struct net_device *ndev);
+void j1939_netdev_stop(struct j1939_priv *priv);
+
+void j1939_priv_put(struct j1939_priv *priv);
+void j1939_priv_get(struct j1939_priv *priv);
+
+/* notify/alert all j1939 sockets bound to ifindex */
+void j1939_sk_netdev_event_netdown(struct j1939_priv *priv);
+int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk);
+void j1939_tp_init(struct j1939_priv *priv);
+
+/* decrement pending skb for a j1939 socket */
+void j1939_sock_pending_del(struct sock *sk);
+
+enum j1939_session_state {
+ J1939_SESSION_NEW,
+ J1939_SESSION_ACTIVE,
+ /* waiting for abort signal on the bus */
+ J1939_SESSION_WAITING_ABORT,
+ J1939_SESSION_ACTIVE_MAX,
+ J1939_SESSION_DONE,
+};
+
+struct j1939_session {
+ struct j1939_priv *priv;
+ struct list_head active_session_list_entry;
+ struct list_head sk_session_queue_entry;
+ struct kref kref;
+ struct sock *sk;
+
+ /* ifindex, src, dst, pgn define the session block
+ * the are _never_ modified after insertion in the list
+ * this decreases locking problems a _lot_
+ */
+ struct j1939_sk_buff_cb skcb;
+ struct sk_buff_head skb_queue;
+
+ /* all tx related stuff (last_txcmd, pkt.tx)
+ * is protected (modified only) with the txtimer hrtimer
+ * 'total' & 'block' are never changed,
+ * last_cmd, last & block are protected by ->lock
+ * this means that the tx may run after cts is received that should
+ * have stopped tx, but this time discrepancy is never avoided anyhow
+ */
+ u8 last_cmd, last_txcmd;
+ bool transmission;
+ bool extd;
+ /* Total message size, number of bytes */
+ unsigned int total_message_size;
+ /* Total number of bytes queue from socket to the session */
+ unsigned int total_queued_size;
+ unsigned int tx_retry;
+
+ int err;
+ u32 tskey;
+ enum j1939_session_state state;
+
+ /* Packets counters for a (extended) transfer session. The packet is
+ * maximal of 7 bytes.
+ */
+ struct {
+ /* total - total number of packets for this session */
+ unsigned int total;
+ /* last - last packet of a transfer block after which
+ * responder should send ETP.CM_CTS and originator
+ * ETP.CM_DPO
+ */
+ unsigned int last;
+ /* tx - number of packets send by originator node.
+ * this counter can be set back if responder node
+ * didn't received all packets send by originator.
+ */
+ unsigned int tx;
+ unsigned int tx_acked;
+ /* rx - number of packets received */
+ unsigned int rx;
+ /* block - amount of packets expected in one block */
+ unsigned int block;
+ /* dpo - ETP.CM_DPO, Data Packet Offset */
+ unsigned int dpo;
+ } pkt;
+ struct hrtimer txtimer, rxtimer;
+};
+
+struct j1939_sock {
+ struct sock sk; /* must be first to skip with memset */
+ struct j1939_priv *priv;
+ struct list_head list;
+
+#define J1939_SOCK_BOUND BIT(0)
+#define J1939_SOCK_CONNECTED BIT(1)
+#define J1939_SOCK_PROMISC BIT(2)
+#define J1939_SOCK_ERRQUEUE BIT(3)
+ int state;
+
+ int ifindex;
+ struct j1939_addr addr;
+ struct j1939_filter *filters;
+ int nfilters;
+ pgn_t pgn_rx_filter;
+
+ /* j1939 may emit equal PGN (!= equal CAN-id's) out of order
+ * when transport protocol comes in.
+ * To allow emitting in order, keep a 'pending' nr. of packets
+ */
+ atomic_t skb_pending;
+ wait_queue_head_t waitq;
+
+ /* lock for the sk_session_queue list */
+ spinlock_t sk_session_queue_lock;
+ struct list_head sk_session_queue;
+};
+
+static inline struct j1939_sock *j1939_sk(const struct sock *sk)
+{
+ return container_of(sk, struct j1939_sock, sk);
+}
+
+void j1939_session_get(struct j1939_session *session);
+void j1939_session_put(struct j1939_session *session);
+void j1939_session_skb_queue(struct j1939_session *session,
+ struct sk_buff *skb);
+int j1939_session_activate(struct j1939_session *session);
+void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec);
+void j1939_session_timers_cancel(struct j1939_session *session);
+
+#define J1939_MAX_TP_PACKET_SIZE (7 * 0xff)
+#define J1939_MAX_ETP_PACKET_SIZE (7 * 0x00ffffff)
+
+#define J1939_REGULAR 0
+#define J1939_EXTENDED 1
+
+/* CAN protocol */
+extern const struct can_proto j1939_can_proto;
+
+#endif /* _J1939_PRIV_H_ */
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
new file mode 100644
index 000000000000..def2f813ffce
--- /dev/null
+++ b/net/can/j1939/main.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2010-2011 EIA Electronics,
+// Pieter Beyens <[email protected]>
+// Copyright (c) 2010-2011 EIA Electronics,
+// Kurt Van Dijck <[email protected]>
+// Copyright (c) 2018 Protonic,
+// Robin van der Gracht <[email protected]>
+// Copyright (c) 2017-2019 Pengutronix,
+// Marc Kleine-Budde <[email protected]>
+// Copyright (c) 2017-2019 Pengutronix,
+// Oleksij Rempel <[email protected]>
+
+/* Core of can-j1939 that links j1939 to CAN. */
+
+#include <linux/can/can-ml.h>
+#include <linux/can/core.h>
+#include <linux/can/skb.h>
+#include <linux/if_arp.h>
+#include <linux/module.h>
+
+#include "j1939-priv.h"
+
+MODULE_DESCRIPTION("PF_CAN SAE J1939");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("EIA Electronics (Kurt Van Dijck & Pieter Beyens)");
+MODULE_ALIAS("can-proto-" __stringify(CAN_J1939));
+
+/* LOWLEVEL CAN interface */
+
+/* CAN_HDR: #bytes before can_frame data part */
+#define J1939_CAN_HDR (offsetof(struct can_frame, data))
+
+/* CAN_FTR: #bytes beyond data part */
+#define J1939_CAN_FTR (sizeof(struct can_frame) - J1939_CAN_HDR - \
+ sizeof(((struct can_frame *)0)->data))
+
+/* lowest layer */
+static void j1939_can_recv(struct sk_buff *iskb, void *data)
+{
+ struct j1939_priv *priv = data;
+ struct sk_buff *skb;
+ struct j1939_sk_buff_cb *skcb, *iskcb;
+ struct can_frame *cf;
+
+ /* create a copy of the skb
+ * j1939 only delivers the real data bytes,
+ * the header goes into sockaddr.
+ * j1939 may not touch the incoming skb in such way
+ */
+ skb = skb_clone(iskb, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ can_skb_set_owner(skb, iskb->sk);
+
+ /* get a pointer to the header of the skb
+ * the skb payload (pointer) is moved, so that the next skb_data
+ * returns the actual payload
+ */
+ cf = (void *)skb->data;
+ skb_pull(skb, J1939_CAN_HDR);
+
+ /* fix length, set to dlc, with 8 maximum */
+ skb_trim(skb, min_t(uint8_t, cf->can_dlc, 8));
+
+ /* set addr */
+ skcb = j1939_skb_to_cb(skb);
+ memset(skcb, 0, sizeof(*skcb));
+
+ iskcb = j1939_skb_to_cb(iskb);
+ skcb->tskey = iskcb->tskey;
+ skcb->priority = (cf->can_id >> 26) & 0x7;
+ skcb->addr.sa = cf->can_id;
+ skcb->addr.pgn = (cf->can_id >> 8) & J1939_PGN_MAX;
+ /* set default message type */
+ skcb->addr.type = J1939_TP;
+ if (j1939_pgn_is_pdu1(skcb->addr.pgn)) {
+ /* Type 1: with destination address */
+ skcb->addr.da = skcb->addr.pgn;
+ /* normalize pgn: strip dst address */
+ skcb->addr.pgn &= 0x3ff00;
+ } else {
+ /* set broadcast address */
+ skcb->addr.da = J1939_NO_ADDR;
+ }
+
+ /* update localflags */
+ read_lock_bh(&priv->lock);
+ if (j1939_address_is_unicast(skcb->addr.sa) &&
+ priv->ents[skcb->addr.sa].nusers)
+ skcb->flags |= J1939_ECU_LOCAL_SRC;
+ if (j1939_address_is_unicast(skcb->addr.da) &&
+ priv->ents[skcb->addr.da].nusers)
+ skcb->flags |= J1939_ECU_LOCAL_DST;
+ read_unlock_bh(&priv->lock);
+
+ /* deliver into the j1939 stack ... */
+ j1939_ac_recv(priv, skb);
+
+ if (j1939_tp_recv(priv, skb))
+ /* this means the transport layer processed the message */
+ goto done;
+
+ j1939_simple_recv(priv, skb);
+ j1939_sk_recv(priv, skb);
+ done:
+ kfree_skb(skb);
+}
+
+/* NETDEV MANAGEMENT */
+
+/* values for can_rx_(un)register */
+#define J1939_CAN_ID CAN_EFF_FLAG
+#define J1939_CAN_MASK (CAN_EFF_FLAG | CAN_RTR_FLAG)
+
+static DEFINE_SPINLOCK(j1939_netdev_lock);
+
+static struct j1939_priv *j1939_priv_create(struct net_device *ndev)
+{
+ struct j1939_priv *priv;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return NULL;
+
+ rwlock_init(&priv->lock);
+ INIT_LIST_HEAD(&priv->ecus);
+ priv->ndev = ndev;
+ kref_init(&priv->kref);
+ kref_init(&priv->rx_kref);
+ dev_hold(ndev);
+
+ netdev_dbg(priv->ndev, "%s : 0x%p\n", __func__, priv);
+
+ return priv;
+}
+
+static inline void j1939_priv_set(struct net_device *ndev,
+ struct j1939_priv *priv)
+{
+ struct can_ml_priv *can_ml_priv = ndev->ml_priv;
+
+ can_ml_priv->j1939_priv = priv;
+}
+
+static void __j1939_priv_release(struct kref *kref)
+{
+ struct j1939_priv *priv = container_of(kref, struct j1939_priv, kref);
+ struct net_device *ndev = priv->ndev;
+
+ netdev_dbg(priv->ndev, "%s: 0x%p\n", __func__, priv);
+
+ dev_put(ndev);
+ kfree(priv);
+}
+
+void j1939_priv_put(struct j1939_priv *priv)
+{
+ kref_put(&priv->kref, __j1939_priv_release);
+}
+
+void j1939_priv_get(struct j1939_priv *priv)
+{
+ kref_get(&priv->kref);
+}
+
+static int j1939_can_rx_register(struct j1939_priv *priv)
+{
+ struct net_device *ndev = priv->ndev;
+ int ret;
+
+ j1939_priv_get(priv);
+ ret = can_rx_register(dev_net(ndev), ndev, J1939_CAN_ID, J1939_CAN_MASK,
+ j1939_can_recv, priv, "j1939", NULL);
+ if (ret < 0) {
+ j1939_priv_put(priv);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void j1939_can_rx_unregister(struct j1939_priv *priv)
+{
+ struct net_device *ndev = priv->ndev;
+
+ can_rx_unregister(dev_net(ndev), ndev, J1939_CAN_ID, J1939_CAN_MASK,
+ j1939_can_recv, priv);
+
+ j1939_priv_put(priv);
+}
+
+static void __j1939_rx_release(struct kref *kref)
+ __releases(&j1939_netdev_lock)
+{
+ struct j1939_priv *priv = container_of(kref, struct j1939_priv,
+ rx_kref);
+
+ j1939_can_rx_unregister(priv);
+ j1939_ecu_unmap_all(priv);
+ j1939_priv_set(priv->ndev, NULL);
+ spin_unlock(&j1939_netdev_lock);
+}
+
+/* get pointer to priv without increasing ref counter */
+static inline struct j1939_priv *j1939_ndev_to_priv(struct net_device *ndev)
+{
+ struct can_ml_priv *can_ml_priv = ndev->ml_priv;
+
+ return can_ml_priv->j1939_priv;
+}
+
+static struct j1939_priv *j1939_priv_get_by_ndev_locked(struct net_device *ndev)
+{
+ struct j1939_priv *priv;
+
+ lockdep_assert_held(&j1939_netdev_lock);
+
+ if (ndev->type != ARPHRD_CAN)
+ return NULL;
+
+ priv = j1939_ndev_to_priv(ndev);
+ if (priv)
+ j1939_priv_get(priv);
+
+ return priv;
+}
+
+static struct j1939_priv *j1939_priv_get_by_ndev(struct net_device *ndev)
+{
+ struct j1939_priv *priv;
+
+ spin_lock(&j1939_netdev_lock);
+ priv = j1939_priv_get_by_ndev_locked(ndev);
+ spin_unlock(&j1939_netdev_lock);
+
+ return priv;
+}
+
+struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
+{
+ struct j1939_priv *priv, *priv_new;
+ int ret;
+
+ priv = j1939_priv_get_by_ndev(ndev);
+ if (priv) {
+ kref_get(&priv->rx_kref);
+ return priv;
+ }
+
+ priv = j1939_priv_create(ndev);
+ if (!priv)
+ return ERR_PTR(-ENOMEM);
+
+ j1939_tp_init(priv);
+ spin_lock_init(&priv->j1939_socks_lock);
+ INIT_LIST_HEAD(&priv->j1939_socks);
+
+ spin_lock(&j1939_netdev_lock);
+ priv_new = j1939_priv_get_by_ndev_locked(ndev);
+ if (priv_new) {
+ /* Someone was faster than us, use their priv and roll
+ * back our's.
+ */
+ spin_unlock(&j1939_netdev_lock);
+ dev_put(ndev);
+ kfree(priv);
+ kref_get(&priv_new->rx_kref);
+ return priv_new;
+ }
+ j1939_priv_set(ndev, priv);
+ spin_unlock(&j1939_netdev_lock);
+
+ ret = j1939_can_rx_register(priv);
+ if (ret < 0)
+ goto out_priv_put;
+
+ return priv;
+
+ out_priv_put:
+ j1939_priv_set(ndev, NULL);
+ dev_put(ndev);
+ kfree(priv);
+
+ return ERR_PTR(ret);
+}
+
+void j1939_netdev_stop(struct j1939_priv *priv)
+{
+ kref_put_lock(&priv->rx_kref, __j1939_rx_release, &j1939_netdev_lock);
+ j1939_priv_put(priv);
+}
+
+int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb)
+{
+ int ret, dlc;
+ canid_t canid;
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct can_frame *cf;
+
+ /* apply sanity checks */
+ if (j1939_pgn_is_pdu1(skcb->addr.pgn))
+ skcb->addr.pgn &= J1939_PGN_PDU1_MAX;
+ else
+ skcb->addr.pgn &= J1939_PGN_MAX;
+
+ if (skcb->priority > 7)
+ skcb->priority = 6;
+
+ ret = j1939_ac_fixup(priv, skb);
+ if (unlikely(ret))
+ goto failed;
+ dlc = skb->len;
+
+ /* re-claim the CAN_HDR from the SKB */
+ cf = skb_push(skb, J1939_CAN_HDR);
+
+ /* make it a full can frame again */
+ skb_put(skb, J1939_CAN_FTR + (8 - dlc));
+
+ canid = CAN_EFF_FLAG |
+ (skcb->priority << 26) |
+ (skcb->addr.pgn << 8) |
+ skcb->addr.sa;
+ if (j1939_pgn_is_pdu1(skcb->addr.pgn))
+ canid |= skcb->addr.da << 8;
+
+ cf->can_id = canid;
+ cf->can_dlc = dlc;
+
+ return can_send(skb, 1);
+
+ failed:
+ kfree_skb(skb);
+ return ret;
+}
+
+static int j1939_netdev_notify(struct notifier_block *nb,
+ unsigned long msg, void *data)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(data);
+ struct j1939_priv *priv;
+
+ priv = j1939_priv_get_by_ndev(ndev);
+ if (!priv)
+ goto notify_done;
+
+ if (ndev->type != ARPHRD_CAN)
+ goto notify_put;
+
+ switch (msg) {
+ case NETDEV_DOWN:
+ j1939_cancel_active_session(priv, NULL);
+ j1939_sk_netdev_event_netdown(priv);
+ j1939_ecu_unmap_all(priv);
+ break;
+ }
+
+notify_put:
+ j1939_priv_put(priv);
+
+notify_done:
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block j1939_netdev_notifier = {
+ .notifier_call = j1939_netdev_notify,
+};
+
+/* MODULE interface */
+static __init int j1939_module_init(void)
+{
+ int ret;
+
+ pr_info("can: SAE J1939\n");
+
+ ret = register_netdevice_notifier(&j1939_netdev_notifier);
+ if (ret)
+ goto fail_notifier;
+
+ ret = can_proto_register(&j1939_can_proto);
+ if (ret < 0) {
+ pr_err("can: registration of j1939 protocol failed\n");
+ goto fail_sk;
+ }
+
+ return 0;
+
+ fail_sk:
+ unregister_netdevice_notifier(&j1939_netdev_notifier);
+ fail_notifier:
+ return ret;
+}
+
+static __exit void j1939_module_exit(void)
+{
+ can_proto_unregister(&j1939_can_proto);
+
+ unregister_netdevice_notifier(&j1939_netdev_notifier);
+}
+
+module_init(j1939_module_init);
+module_exit(j1939_module_exit);
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
new file mode 100644
index 000000000000..37c1040bcb9c
--- /dev/null
+++ b/net/can/j1939/socket.c
@@ -0,0 +1,1160 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2010-2011 EIA Electronics,
+// Pieter Beyens <[email protected]>
+// Copyright (c) 2010-2011 EIA Electronics,
+// Kurt Van Dijck <[email protected]>
+// Copyright (c) 2018 Protonic,
+// Robin van der Gracht <[email protected]>
+// Copyright (c) 2017-2019 Pengutronix,
+// Marc Kleine-Budde <[email protected]>
+// Copyright (c) 2017-2019 Pengutronix,
+// Oleksij Rempel <[email protected]>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/can/core.h>
+#include <linux/can/skb.h>
+#include <linux/errqueue.h>
+#include <linux/if_arp.h>
+
+#include "j1939-priv.h"
+
+#define J1939_MIN_NAMELEN CAN_REQUIRED_SIZE(struct sockaddr_can, can_addr.j1939)
+
+/* conversion function between struct sock::sk_priority from linux and
+ * j1939 priority field
+ */
+static inline priority_t j1939_prio(u32 sk_priority)
+{
+ sk_priority = min(sk_priority, 7U);
+
+ return 7 - sk_priority;
+}
+
+static inline u32 j1939_to_sk_priority(priority_t prio)
+{
+ return 7 - prio;
+}
+
+/* function to see if pgn is to be evaluated */
+static inline bool j1939_pgn_is_valid(pgn_t pgn)
+{
+ return pgn <= J1939_PGN_MAX;
+}
+
+/* test function to avoid non-zero DA placeholder for pdu1 pgn's */
+static inline bool j1939_pgn_is_clean_pdu(pgn_t pgn)
+{
+ if (j1939_pgn_is_pdu1(pgn))
+ return !(pgn & 0xff);
+ else
+ return true;
+}
+
+static inline void j1939_sock_pending_add(struct sock *sk)
+{
+ struct j1939_sock *jsk = j1939_sk(sk);
+
+ atomic_inc(&jsk->skb_pending);
+}
+
+static int j1939_sock_pending_get(struct sock *sk)
+{
+ struct j1939_sock *jsk = j1939_sk(sk);
+
+ return atomic_read(&jsk->skb_pending);
+}
+
+void j1939_sock_pending_del(struct sock *sk)
+{
+ struct j1939_sock *jsk = j1939_sk(sk);
+
+ /* atomic_dec_return returns the new value */
+ if (!atomic_dec_return(&jsk->skb_pending))
+ wake_up(&jsk->waitq); /* no pending SKB's */
+}
+
+static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk)
+{
+ jsk->state |= J1939_SOCK_BOUND;
+ j1939_priv_get(priv);
+ jsk->priv = priv;
+
+ spin_lock_bh(&priv->j1939_socks_lock);
+ list_add_tail(&jsk->list, &priv->j1939_socks);
+ spin_unlock_bh(&priv->j1939_socks_lock);
+}
+
+static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk)
+{
+ spin_lock_bh(&priv->j1939_socks_lock);
+ list_del_init(&jsk->list);
+ spin_unlock_bh(&priv->j1939_socks_lock);
+
+ jsk->priv = NULL;
+ j1939_priv_put(priv);
+ jsk->state &= ~J1939_SOCK_BOUND;
+}
+
+static bool j1939_sk_queue_session(struct j1939_session *session)
+{
+ struct j1939_sock *jsk = j1939_sk(session->sk);
+ bool empty;
+
+ spin_lock_bh(&jsk->sk_session_queue_lock);
+ empty = list_empty(&jsk->sk_session_queue);
+ j1939_session_get(session);
+ list_add_tail(&session->sk_session_queue_entry, &jsk->sk_session_queue);
+ spin_unlock_bh(&jsk->sk_session_queue_lock);
+ j1939_sock_pending_add(&jsk->sk);
+
+ return empty;
+}
+
+static struct
+j1939_session *j1939_sk_get_incomplete_session(struct j1939_sock *jsk)
+{
+ struct j1939_session *session = NULL;
+
+ spin_lock_bh(&jsk->sk_session_queue_lock);
+ if (!list_empty(&jsk->sk_session_queue)) {
+ session = list_last_entry(&jsk->sk_session_queue,
+ struct j1939_session,
+ sk_session_queue_entry);
+ if (session->total_queued_size == session->total_message_size)
+ session = NULL;
+ else
+ j1939_session_get(session);
+ }
+ spin_unlock_bh(&jsk->sk_session_queue_lock);
+
+ return session;
+}
+
+static void j1939_sk_queue_drop_all(struct j1939_priv *priv,
+ struct j1939_sock *jsk, int err)
+{
+ struct j1939_session *session, *tmp;
+
+ netdev_dbg(priv->ndev, "%s: err: %i\n", __func__, err);
+ spin_lock_bh(&jsk->sk_session_queue_lock);
+ list_for_each_entry_safe(session, tmp, &jsk->sk_session_queue,
+ sk_session_queue_entry) {
+ list_del_init(&session->sk_session_queue_entry);
+ session->err = err;
+ j1939_session_put(session);
+ }
+ spin_unlock_bh(&jsk->sk_session_queue_lock);
+}
+
+static void j1939_sk_queue_activate_next_locked(struct j1939_session *session)
+{
+ struct j1939_sock *jsk;
+ struct j1939_session *first;
+ int err;
+
+ /* RX-Session don't have a socket (yet) */
+ if (!session->sk)
+ return;
+
+ jsk = j1939_sk(session->sk);
+ lockdep_assert_held(&jsk->sk_session_queue_lock);
+
+ err = session->err;
+
+ first = list_first_entry_or_null(&jsk->sk_session_queue,
+ struct j1939_session,
+ sk_session_queue_entry);
+
+ /* Some else has already activated the next session */
+ if (first != session)
+ return;
+
+activate_next:
+ list_del_init(&first->sk_session_queue_entry);
+ j1939_session_put(first);
+ first = list_first_entry_or_null(&jsk->sk_session_queue,
+ struct j1939_session,
+ sk_session_queue_entry);
+ if (!first)
+ return;
+
+ if (WARN_ON_ONCE(j1939_session_activate(first))) {
+ first->err = -EBUSY;
+ goto activate_next;
+ } else {
+ /* Give receiver some time (arbitrary chosen) to recover */
+ int time_ms = 0;
+
+ if (err)
+ time_ms = 10 + prandom_u32_max(16);
+
+ j1939_tp_schedule_txtimer(first, time_ms);
+ }
+}
+
+void j1939_sk_queue_activate_next(struct j1939_session *session)
+{
+ struct j1939_sock *jsk;
+
+ if (!session->sk)
+ return;
+
+ jsk = j1939_sk(session->sk);
+
+ spin_lock_bh(&jsk->sk_session_queue_lock);
+ j1939_sk_queue_activate_next_locked(session);
+ spin_unlock_bh(&jsk->sk_session_queue_lock);
+}
+
+static bool j1939_sk_match_dst(struct j1939_sock *jsk,
+ const struct j1939_sk_buff_cb *skcb)
+{
+ if ((jsk->state & J1939_SOCK_PROMISC))
+ return true;
+
+ /* Destination address filter */
+ if (jsk->addr.src_name && skcb->addr.dst_name) {
+ if (jsk->addr.src_name != skcb->addr.dst_name)
+ return false;
+ } else {
+ /* receive (all sockets) if
+ * - all packages that match our bind() address
+ * - all broadcast on a socket if SO_BROADCAST
+ * is set
+ */
+ if (j1939_address_is_unicast(skcb->addr.da)) {
+ if (jsk->addr.sa != skcb->addr.da)
+ return false;
+ } else if (!sock_flag(&jsk->sk, SOCK_BROADCAST)) {
+ /* receiving broadcast without SO_BROADCAST
+ * flag is not allowed
+ */
+ return false;
+ }
+ }
+
+ /* Source address filter */
+ if (jsk->state & J1939_SOCK_CONNECTED) {
+ /* receive (all sockets) if
+ * - all packages that match our connect() name or address
+ */
+ if (jsk->addr.dst_name && skcb->addr.src_name) {
+ if (jsk->addr.dst_name != skcb->addr.src_name)
+ return false;
+ } else {
+ if (jsk->addr.da != skcb->addr.sa)
+ return false;
+ }
+ }
+
+ /* PGN filter */
+ if (j1939_pgn_is_valid(jsk->pgn_rx_filter) &&
+ jsk->pgn_rx_filter != skcb->addr.pgn)
+ return false;
+
+ return true;
+}
+
+/* matches skb control buffer (addr) with a j1939 filter */
+static bool j1939_sk_match_filter(struct j1939_sock *jsk,
+ const struct j1939_sk_buff_cb *skcb)
+{
+ const struct j1939_filter *f = jsk->filters;
+ int nfilter = jsk->nfilters;
+
+ if (!nfilter)
+ /* receive all when no filters are assigned */
+ return true;
+
+ for (; nfilter; ++f, --nfilter) {
+ if ((skcb->addr.pgn & f->pgn_mask) != f->pgn)
+ continue;
+ if ((skcb->addr.sa & f->addr_mask) != f->addr)
+ continue;
+ if ((skcb->addr.src_name & f->name_mask) != f->name)
+ continue;
+ return true;
+ }
+ return false;
+}
+
+static bool j1939_sk_recv_match_one(struct j1939_sock *jsk,
+ const struct j1939_sk_buff_cb *skcb)
+{
+ if (!(jsk->state & J1939_SOCK_BOUND))
+ return false;
+
+ if (!j1939_sk_match_dst(jsk, skcb))
+ return false;
+
+ if (!j1939_sk_match_filter(jsk, skcb))
+ return false;
+
+ return true;
+}
+
+static void j1939_sk_recv_one(struct j1939_sock *jsk, struct sk_buff *oskb)
+{
+ const struct j1939_sk_buff_cb *oskcb = j1939_skb_to_cb(oskb);
+ struct j1939_sk_buff_cb *skcb;
+ struct sk_buff *skb;
+
+ if (oskb->sk == &jsk->sk)
+ return;
+
+ if (!j1939_sk_recv_match_one(jsk, oskcb))
+ return;
+
+ skb = skb_clone(oskb, GFP_ATOMIC);
+ if (!skb) {
+ pr_warn("skb clone failed\n");
+ return;
+ }
+ can_skb_set_owner(skb, oskb->sk);
+
+ skcb = j1939_skb_to_cb(skb);
+ skcb->msg_flags &= ~(MSG_DONTROUTE);
+ if (skb->sk)
+ skcb->msg_flags |= MSG_DONTROUTE;
+
+ if (sock_queue_rcv_skb(&jsk->sk, skb) < 0)
+ kfree_skb(skb);
+}
+
+bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb)
+{
+ struct j1939_sock *jsk;
+ bool match = false;
+
+ spin_lock_bh(&priv->j1939_socks_lock);
+ list_for_each_entry(jsk, &priv->j1939_socks, list) {
+ match = j1939_sk_recv_match_one(jsk, skcb);
+ if (match)
+ break;
+ }
+ spin_unlock_bh(&priv->j1939_socks_lock);
+
+ return match;
+}
+
+void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb)
+{
+ struct j1939_sock *jsk;
+
+ spin_lock_bh(&priv->j1939_socks_lock);
+ list_for_each_entry(jsk, &priv->j1939_socks, list) {
+ j1939_sk_recv_one(jsk, skb);
+ }
+ spin_unlock_bh(&priv->j1939_socks_lock);
+}
+
+static int j1939_sk_init(struct sock *sk)
+{
+ struct j1939_sock *jsk = j1939_sk(sk);
+
+ /* Ensure that "sk" is first member in "struct j1939_sock", so that we
+ * can skip it during memset().
+ */
+ BUILD_BUG_ON(offsetof(struct j1939_sock, sk) != 0);
+ memset((void *)jsk + sizeof(jsk->sk), 0x0,
+ sizeof(*jsk) - sizeof(jsk->sk));
+
+ INIT_LIST_HEAD(&jsk->list);
+ init_waitqueue_head(&jsk->waitq);
+ jsk->sk.sk_priority = j1939_to_sk_priority(6);
+ jsk->sk.sk_reuse = 1; /* per default */
+ jsk->addr.sa = J1939_NO_ADDR;
+ jsk->addr.da = J1939_NO_ADDR;
+ jsk->addr.pgn = J1939_NO_PGN;
+ jsk->pgn_rx_filter = J1939_NO_PGN;
+ atomic_set(&jsk->skb_pending, 0);
+ spin_lock_init(&jsk->sk_session_queue_lock);
+ INIT_LIST_HEAD(&jsk->sk_session_queue);
+
+ return 0;
+}
+
+static int j1939_sk_sanity_check(struct sockaddr_can *addr, int len)
+{
+ if (!addr)
+ return -EDESTADDRREQ;
+ if (len < J1939_MIN_NAMELEN)
+ return -EINVAL;
+ if (addr->can_family != AF_CAN)
+ return -EINVAL;
+ if (!addr->can_ifindex)
+ return -ENODEV;
+ if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn) &&
+ !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
+{
+ struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
+ struct j1939_sock *jsk = j1939_sk(sock->sk);
+ struct j1939_priv *priv = jsk->priv;
+ struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
+ int ret = 0;
+
+ ret = j1939_sk_sanity_check(addr, len);
+ if (ret)
+ return ret;
+
+ lock_sock(sock->sk);
+
+ /* Already bound to an interface? */
+ if (jsk->state & J1939_SOCK_BOUND) {
+ /* A re-bind() to a different interface is not
+ * supported.
+ */
+ if (jsk->ifindex != addr->can_ifindex) {
+ ret = -EINVAL;
+ goto out_release_sock;
+ }
+
+ /* drop old references */
+ j1939_jsk_del(priv, jsk);
+ j1939_local_ecu_put(priv, jsk->addr.src_name, jsk->addr.sa);
+ } else {
+ struct net_device *ndev;
+
+ ndev = dev_get_by_index(net, addr->can_ifindex);
+ if (!ndev) {
+ ret = -ENODEV;
+ goto out_release_sock;
+ }
+
+ if (ndev->type != ARPHRD_CAN) {
+ dev_put(ndev);
+ ret = -ENODEV;
+ goto out_release_sock;
+ }
+
+ priv = j1939_netdev_start(ndev);
+ dev_put(ndev);
+ if (IS_ERR(priv)) {
+ ret = PTR_ERR(priv);
+ goto out_release_sock;
+ }
+
+ jsk->ifindex = addr->can_ifindex;
+ }
+
+ /* set default transmit pgn */
+ if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn))
+ jsk->pgn_rx_filter = addr->can_addr.j1939.pgn;
+ jsk->addr.src_name = addr->can_addr.j1939.name;
+ jsk->addr.sa = addr->can_addr.j1939.addr;
+
+ /* get new references */
+ ret = j1939_local_ecu_get(priv, jsk->addr.src_name, jsk->addr.sa);
+ if (ret) {
+ j1939_netdev_stop(priv);
+ goto out_release_sock;
+ }
+
+ j1939_jsk_add(priv, jsk);
+
+ out_release_sock: /* fall through */
+ release_sock(sock->sk);
+
+ return ret;
+}
+
+static int j1939_sk_connect(struct socket *sock, struct sockaddr *uaddr,
+ int len, int flags)
+{
+ struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
+ struct j1939_sock *jsk = j1939_sk(sock->sk);
+ int ret = 0;
+
+ ret = j1939_sk_sanity_check(addr, len);
+ if (ret)
+ return ret;
+
+ lock_sock(sock->sk);
+
+ /* bind() before connect() is mandatory */
+ if (!(jsk->state & J1939_SOCK_BOUND)) {
+ ret = -EINVAL;
+ goto out_release_sock;
+ }
+
+ /* A connect() to a different interface is not supported. */
+ if (jsk->ifindex != addr->can_ifindex) {
+ ret = -EINVAL;
+ goto out_release_sock;
+ }
+
+ if (!addr->can_addr.j1939.name &&
+ addr->can_addr.j1939.addr == J1939_NO_ADDR &&
+ !sock_flag(&jsk->sk, SOCK_BROADCAST)) {
+ /* broadcast, but SO_BROADCAST not set */
+ ret = -EACCES;
+ goto out_release_sock;
+ }
+
+ jsk->addr.dst_name = addr->can_addr.j1939.name;
+ jsk->addr.da = addr->can_addr.j1939.addr;
+
+ if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn))
+ jsk->addr.pgn = addr->can_addr.j1939.pgn;
+
+ jsk->state |= J1939_SOCK_CONNECTED;
+
+ out_release_sock: /* fall through */
+ release_sock(sock->sk);
+
+ return ret;
+}
+
+static void j1939_sk_sock2sockaddr_can(struct sockaddr_can *addr,
+ const struct j1939_sock *jsk, int peer)
+{
+ addr->can_family = AF_CAN;
+ addr->can_ifindex = jsk->ifindex;
+ addr->can_addr.j1939.pgn = jsk->addr.pgn;
+ if (peer) {
+ addr->can_addr.j1939.name = jsk->addr.dst_name;
+ addr->can_addr.j1939.addr = jsk->addr.da;
+ } else {
+ addr->can_addr.j1939.name = jsk->addr.src_name;
+ addr->can_addr.j1939.addr = jsk->addr.sa;
+ }
+}
+
+static int j1939_sk_getname(struct socket *sock, struct sockaddr *uaddr,
+ int peer)
+{
+ struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
+ struct sock *sk = sock->sk;
+ struct j1939_sock *jsk = j1939_sk(sk);
+ int ret = 0;
+
+ lock_sock(sk);
+
+ if (peer && !(jsk->state & J1939_SOCK_CONNECTED)) {
+ ret = -EADDRNOTAVAIL;
+ goto failure;
+ }
+
+ j1939_sk_sock2sockaddr_can(addr, jsk, peer);
+ ret = J1939_MIN_NAMELEN;
+
+ failure:
+ release_sock(sk);
+
+ return ret;
+}
+
+static int j1939_sk_release(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ struct j1939_sock *jsk;
+
+ if (!sk)
+ return 0;
+
+ jsk = j1939_sk(sk);
+ lock_sock(sk);
+
+ if (jsk->state & J1939_SOCK_BOUND) {
+ struct j1939_priv *priv = jsk->priv;
+
+ if (wait_event_interruptible(jsk->waitq,
+ !j1939_sock_pending_get(&jsk->sk))) {
+ j1939_cancel_active_session(priv, sk);
+ j1939_sk_queue_drop_all(priv, jsk, ESHUTDOWN);
+ }
+
+ j1939_jsk_del(priv, jsk);
+
+ j1939_local_ecu_put(priv, jsk->addr.src_name,
+ jsk->addr.sa);
+
+ j1939_netdev_stop(priv);
+ }
+
+ sock_orphan(sk);
+ sock->sk = NULL;
+
+ release_sock(sk);
+ sock_put(sk);
+
+ return 0;
+}
+
+static int j1939_sk_setsockopt_flag(struct j1939_sock *jsk, char __user *optval,
+ unsigned int optlen, int flag)
+{
+ int tmp;
+
+ if (optlen != sizeof(tmp))
+ return -EINVAL;
+ if (copy_from_user(&tmp, optval, optlen))
+ return -EFAULT;
+ lock_sock(&jsk->sk);
+ if (tmp)
+ jsk->state |= flag;
+ else
+ jsk->state &= ~flag;
+ release_sock(&jsk->sk);
+ return tmp;
+}
+
+static int j1939_sk_setsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, unsigned int optlen)
+{
+ struct sock *sk = sock->sk;
+ struct j1939_sock *jsk = j1939_sk(sk);
+ int tmp, count = 0, ret = 0;
+ struct j1939_filter *filters = NULL, *ofilters;
+
+ if (level != SOL_CAN_J1939)
+ return -EINVAL;
+
+ switch (optname) {
+ case SO_J1939_FILTER:
+ if (optval) {
+ struct j1939_filter *f;
+ int c;
+
+ if (optlen % sizeof(*filters) != 0)
+ return -EINVAL;
+
+ if (optlen > J1939_FILTER_MAX *
+ sizeof(struct j1939_filter))
+ return -EINVAL;
+
+ count = optlen / sizeof(*filters);
+ filters = memdup_user(optval, optlen);
+ if (IS_ERR(filters))
+ return PTR_ERR(filters);
+
+ for (f = filters, c = count; c; f++, c--) {
+ f->name &= f->name_mask;
+ f->pgn &= f->pgn_mask;
+ f->addr &= f->addr_mask;
+ }
+ }
+
+ lock_sock(&jsk->sk);
+ ofilters = jsk->filters;
+ jsk->filters = filters;
+ jsk->nfilters = count;
+ release_sock(&jsk->sk);
+ kfree(ofilters);
+ return 0;
+ case SO_J1939_PROMISC:
+ return j1939_sk_setsockopt_flag(jsk, optval, optlen,
+ J1939_SOCK_PROMISC);
+ case SO_J1939_ERRQUEUE:
+ ret = j1939_sk_setsockopt_flag(jsk, optval, optlen,
+ J1939_SOCK_ERRQUEUE);
+ if (ret < 0)
+ return ret;
+
+ if (!(jsk->state & J1939_SOCK_ERRQUEUE))
+ skb_queue_purge(&sk->sk_error_queue);
+ return ret;
+ case SO_J1939_SEND_PRIO:
+ if (optlen != sizeof(tmp))
+ return -EINVAL;
+ if (copy_from_user(&tmp, optval, optlen))
+ return -EFAULT;
+ if (tmp < 0 || tmp > 7)
+ return -EDOM;
+ if (tmp < 2 && !capable(CAP_NET_ADMIN))
+ return -EPERM;
+ lock_sock(&jsk->sk);
+ jsk->sk.sk_priority = j1939_to_sk_priority(tmp);
+ release_sock(&jsk->sk);
+ return 0;
+ default:
+ return -ENOPROTOOPT;
+ }
+}
+
+static int j1939_sk_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ struct sock *sk = sock->sk;
+ struct j1939_sock *jsk = j1939_sk(sk);
+ int ret, ulen;
+ /* set defaults for using 'int' properties */
+ int tmp = 0;
+ int len = sizeof(tmp);
+ void *val = &tmp;
+
+ if (level != SOL_CAN_J1939)
+ return -EINVAL;
+ if (get_user(ulen, optlen))
+ return -EFAULT;
+ if (ulen < 0)
+ return -EINVAL;
+
+ lock_sock(&jsk->sk);
+ switch (optname) {
+ case SO_J1939_PROMISC:
+ tmp = (jsk->state & J1939_SOCK_PROMISC) ? 1 : 0;
+ break;
+ case SO_J1939_ERRQUEUE:
+ tmp = (jsk->state & J1939_SOCK_ERRQUEUE) ? 1 : 0;
+ break;
+ case SO_J1939_SEND_PRIO:
+ tmp = j1939_prio(jsk->sk.sk_priority);
+ break;
+ default:
+ ret = -ENOPROTOOPT;
+ goto no_copy;
+ }
+
+ /* copy to user, based on 'len' & 'val'
+ * but most sockopt's are 'int' properties, and have 'len' & 'val'
+ * left unchanged, but instead modified 'tmp'
+ */
+ if (len > ulen)
+ ret = -EFAULT;
+ else if (put_user(len, optlen))
+ ret = -EFAULT;
+ else if (copy_to_user(optval, val, len))
+ ret = -EFAULT;
+ else
+ ret = 0;
+ no_copy:
+ release_sock(&jsk->sk);
+ return ret;
+}
+
+static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t size, int flags)
+{
+ struct sock *sk = sock->sk;
+ struct sk_buff *skb;
+ struct j1939_sk_buff_cb *skcb;
+ int ret = 0;
+
+ if (flags & ~(MSG_DONTWAIT | MSG_ERRQUEUE))
+ return -EINVAL;
+
+ if (flags & MSG_ERRQUEUE)
+ return sock_recv_errqueue(sock->sk, msg, size, SOL_CAN_J1939,
+ SCM_J1939_ERRQUEUE);
+
+ skb = skb_recv_datagram(sk, flags, 0, &ret);
+ if (!skb)
+ return ret;
+
+ if (size < skb->len)
+ msg->msg_flags |= MSG_TRUNC;
+ else
+ size = skb->len;
+
+ ret = memcpy_to_msg(msg, skb->data, size);
+ if (ret < 0) {
+ skb_free_datagram(sk, skb);
+ return ret;
+ }
+
+ skcb = j1939_skb_to_cb(skb);
+ if (j1939_address_is_valid(skcb->addr.da))
+ put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_DEST_ADDR,
+ sizeof(skcb->addr.da), &skcb->addr.da);
+
+ if (skcb->addr.dst_name)
+ put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_DEST_NAME,
+ sizeof(skcb->addr.dst_name), &skcb->addr.dst_name);
+
+ put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_PRIO,
+ sizeof(skcb->priority), &skcb->priority);
+
+ if (msg->msg_name) {
+ struct sockaddr_can *paddr = msg->msg_name;
+
+ msg->msg_namelen = J1939_MIN_NAMELEN;
+ memset(msg->msg_name, 0, msg->msg_namelen);
+ paddr->can_family = AF_CAN;
+ paddr->can_ifindex = skb->skb_iif;
+ paddr->can_addr.j1939.name = skcb->addr.src_name;
+ paddr->can_addr.j1939.addr = skcb->addr.sa;
+ paddr->can_addr.j1939.pgn = skcb->addr.pgn;
+ }
+
+ sock_recv_ts_and_drops(msg, sk, skb);
+ msg->msg_flags |= skcb->msg_flags;
+ skb_free_datagram(sk, skb);
+
+ return size;
+}
+
+static struct sk_buff *j1939_sk_alloc_skb(struct net_device *ndev,
+ struct sock *sk,
+ struct msghdr *msg, size_t size,
+ int *errcode)
+{
+ struct j1939_sock *jsk = j1939_sk(sk);
+ struct j1939_sk_buff_cb *skcb;
+ struct sk_buff *skb;
+ int ret;
+
+ skb = sock_alloc_send_skb(sk,
+ size +
+ sizeof(struct can_frame) -
+ sizeof(((struct can_frame *)NULL)->data) +
+ sizeof(struct can_skb_priv),
+ msg->msg_flags & MSG_DONTWAIT, &ret);
+ if (!skb)
+ goto failure;
+
+ can_skb_reserve(skb);
+ can_skb_prv(skb)->ifindex = ndev->ifindex;
+ can_skb_prv(skb)->skbcnt = 0;
+ skb_reserve(skb, offsetof(struct can_frame, data));
+
+ ret = memcpy_from_msg(skb_put(skb, size), msg, size);
+ if (ret < 0)
+ goto free_skb;
+
+ skb->dev = ndev;
+
+ skcb = j1939_skb_to_cb(skb);
+ memset(skcb, 0, sizeof(*skcb));
+ skcb->addr = jsk->addr;
+ skcb->priority = j1939_prio(sk->sk_priority);
+
+ if (msg->msg_name) {
+ struct sockaddr_can *addr = msg->msg_name;
+
+ if (addr->can_addr.j1939.name ||
+ addr->can_addr.j1939.addr != J1939_NO_ADDR) {
+ skcb->addr.dst_name = addr->can_addr.j1939.name;
+ skcb->addr.da = addr->can_addr.j1939.addr;
+ }
+ if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn))
+ skcb->addr.pgn = addr->can_addr.j1939.pgn;
+ }
+
+ *errcode = ret;
+ return skb;
+
+free_skb:
+ kfree_skb(skb);
+failure:
+ *errcode = ret;
+ return NULL;
+}
+
+static size_t j1939_sk_opt_stats_get_size(void)
+{
+ return
+ nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */
+ 0;
+}
+
+static struct sk_buff *
+j1939_sk_get_timestamping_opt_stats(struct j1939_session *session)
+{
+ struct sk_buff *stats;
+ u32 size;
+
+ stats = alloc_skb(j1939_sk_opt_stats_get_size(), GFP_ATOMIC);
+ if (!stats)
+ return NULL;
+
+ if (session->skcb.addr.type == J1939_SIMPLE)
+ size = session->total_message_size;
+ else
+ size = min(session->pkt.tx_acked * 7,
+ session->total_message_size);
+
+ nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size);
+
+ return stats;
+}
+
+void j1939_sk_errqueue(struct j1939_session *session,
+ enum j1939_sk_errqueue_type type)
+{
+ struct j1939_priv *priv = session->priv;
+ struct sock *sk = session->sk;
+ struct j1939_sock *jsk;
+ struct sock_exterr_skb *serr;
+ struct sk_buff *skb;
+ char *state = "UNK";
+ int err;
+
+ /* currently we have no sk for the RX session */
+ if (!sk)
+ return;
+
+ jsk = j1939_sk(sk);
+
+ if (!(jsk->state & J1939_SOCK_ERRQUEUE))
+ return;
+
+ skb = j1939_sk_get_timestamping_opt_stats(session);
+ if (!skb)
+ return;
+
+ skb->tstamp = ktime_get_real();
+
+ BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb));
+
+ serr = SKB_EXT_ERR(skb);
+ memset(serr, 0, sizeof(*serr));
+ switch (type) {
+ case J1939_ERRQUEUE_ACK:
+ if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))
+ return;
+
+ serr->ee.ee_errno = ENOMSG;
+ serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+ serr->ee.ee_info = SCM_TSTAMP_ACK;
+ state = "ACK";
+ break;
+ case J1939_ERRQUEUE_SCHED:
+ if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED))
+ return;
+
+ serr->ee.ee_errno = ENOMSG;
+ serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+ serr->ee.ee_info = SCM_TSTAMP_SCHED;
+ state = "SCH";
+ break;
+ case J1939_ERRQUEUE_ABORT:
+ serr->ee.ee_errno = session->err;
+ serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+ serr->ee.ee_info = J1939_EE_INFO_TX_ABORT;
+ state = "ABT";
+ break;
+ default:
+ netdev_err(priv->ndev, "Unknown errqueue type %i\n", type);
+ }
+
+ serr->opt_stats = true;
+ if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+ serr->ee.ee_data = session->tskey;
+
+ netdev_dbg(session->priv->ndev, "%s: 0x%p tskey: %i, state: %s\n",
+ __func__, session, session->tskey, state);
+ err = sock_queue_err_skb(sk, skb);
+
+ if (err)
+ kfree_skb(skb);
+};
+
+void j1939_sk_send_loop_abort(struct sock *sk, int err)
+{
+ sk->sk_err = err;
+
+ sk->sk_error_report(sk);
+}
+
+static int j1939_sk_send_loop(struct j1939_priv *priv, struct sock *sk,
+ struct msghdr *msg, size_t size)
+
+{
+ struct j1939_sock *jsk = j1939_sk(sk);
+ struct j1939_session *session = j1939_sk_get_incomplete_session(jsk);
+ struct sk_buff *skb;
+ size_t segment_size, todo_size;
+ int ret = 0;
+
+ if (session &&
+ session->total_message_size != session->total_queued_size + size) {
+ j1939_session_put(session);
+ return -EIO;
+ }
+
+ todo_size = size;
+
+ while (todo_size) {
+ struct j1939_sk_buff_cb *skcb;
+
+ segment_size = min_t(size_t, J1939_MAX_TP_PACKET_SIZE,
+ todo_size);
+
+ /* Allocate skb for one segment */
+ skb = j1939_sk_alloc_skb(priv->ndev, sk, msg, segment_size,
+ &ret);
+ if (ret)
+ break;
+
+ skcb = j1939_skb_to_cb(skb);
+
+ if (!session) {
+ /* at this point the size should be full size
+ * of the session
+ */
+ skcb->offset = 0;
+ session = j1939_tp_send(priv, skb, size);
+ if (IS_ERR(session)) {
+ ret = PTR_ERR(session);
+ goto kfree_skb;
+ }
+ if (j1939_sk_queue_session(session)) {
+ /* try to activate session if we a
+ * fist in the queue
+ */
+ if (!j1939_session_activate(session)) {
+ j1939_tp_schedule_txtimer(session, 0);
+ } else {
+ ret = -EBUSY;
+ session->err = ret;
+ j1939_sk_queue_drop_all(priv, jsk,
+ EBUSY);
+ break;
+ }
+ }
+ } else {
+ skcb->offset = session->total_queued_size;
+ j1939_session_skb_queue(session, skb);
+ }
+
+ todo_size -= segment_size;
+ session->total_queued_size += segment_size;
+ }
+
+ switch (ret) {
+ case 0: /* OK */
+ if (todo_size)
+ netdev_warn(priv->ndev,
+ "no error found and not completely queued?! %zu\n",
+ todo_size);
+ ret = size;
+ break;
+ case -ERESTARTSYS:
+ ret = -EINTR;
+ /* fall through */
+ case -EAGAIN: /* OK */
+ if (todo_size != size)
+ ret = size - todo_size;
+ break;
+ default: /* ERROR */
+ break;
+ }
+
+ if (session)
+ j1939_session_put(session);
+
+ return ret;
+
+ kfree_skb:
+ kfree_skb(skb);
+ return ret;
+}
+
+static int j1939_sk_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t size)
+{
+ struct sock *sk = sock->sk;
+ struct j1939_sock *jsk = j1939_sk(sk);
+ struct j1939_priv *priv = jsk->priv;
+ int ifindex;
+ int ret;
+
+ /* various socket state tests */
+ if (!(jsk->state & J1939_SOCK_BOUND))
+ return -EBADFD;
+
+ ifindex = jsk->ifindex;
+
+ if (!jsk->addr.src_name && jsk->addr.sa == J1939_NO_ADDR)
+ /* no source address assigned yet */
+ return -EBADFD;
+
+ /* deal with provided destination address info */
+ if (msg->msg_name) {
+ struct sockaddr_can *addr = msg->msg_name;
+
+ if (msg->msg_namelen < J1939_MIN_NAMELEN)
+ return -EINVAL;
+
+ if (addr->can_family != AF_CAN)
+ return -EINVAL;
+
+ if (addr->can_ifindex && addr->can_ifindex != ifindex)
+ return -EBADFD;
+
+ if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn) &&
+ !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn))
+ return -EINVAL;
+
+ if (!addr->can_addr.j1939.name &&
+ addr->can_addr.j1939.addr == J1939_NO_ADDR &&
+ !sock_flag(sk, SOCK_BROADCAST))
+ /* broadcast, but SO_BROADCAST not set */
+ return -EACCES;
+ } else {
+ if (!jsk->addr.dst_name && jsk->addr.da == J1939_NO_ADDR &&
+ !sock_flag(sk, SOCK_BROADCAST))
+ /* broadcast, but SO_BROADCAST not set */
+ return -EACCES;
+ }
+
+ ret = j1939_sk_send_loop(priv, sk, msg, size);
+
+ return ret;
+}
+
+void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
+{
+ struct j1939_sock *jsk;
+ int error_code = ENETDOWN;
+
+ spin_lock_bh(&priv->j1939_socks_lock);
+ list_for_each_entry(jsk, &priv->j1939_socks, list) {
+ jsk->sk.sk_err = error_code;
+ if (!sock_flag(&jsk->sk, SOCK_DEAD))
+ jsk->sk.sk_error_report(&jsk->sk);
+
+ j1939_sk_queue_drop_all(priv, jsk, error_code);
+ }
+ spin_unlock_bh(&priv->j1939_socks_lock);
+}
+
+static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd,
+ unsigned long arg)
+{
+ /* no ioctls for socket layer -> hand it down to NIC layer */
+ return -ENOIOCTLCMD;
+}
+
+static const struct proto_ops j1939_ops = {
+ .family = PF_CAN,
+ .release = j1939_sk_release,
+ .bind = j1939_sk_bind,
+ .connect = j1939_sk_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = j1939_sk_getname,
+ .poll = datagram_poll,
+ .ioctl = j1939_sk_no_ioctlcmd,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .setsockopt = j1939_sk_setsockopt,
+ .getsockopt = j1939_sk_getsockopt,
+ .sendmsg = j1939_sk_sendmsg,
+ .recvmsg = j1939_sk_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+};
+
+static struct proto j1939_proto __read_mostly = {
+ .name = "CAN_J1939",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct j1939_sock),
+ .init = j1939_sk_init,
+};
+
+const struct can_proto j1939_can_proto = {
+ .type = SOCK_DGRAM,
+ .protocol = CAN_J1939,
+ .ops = &j1939_ops,
+ .prot = &j1939_proto,
+};
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
new file mode 100644
index 000000000000..fe000ea757ea
--- /dev/null
+++ b/net/can/j1939/transport.c
@@ -0,0 +1,2027 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2010-2011 EIA Electronics,
+// Kurt Van Dijck <[email protected]>
+// Copyright (c) 2018 Protonic,
+// Robin van der Gracht <[email protected]>
+// Copyright (c) 2017-2019 Pengutronix,
+// Marc Kleine-Budde <[email protected]>
+// Copyright (c) 2017-2019 Pengutronix,
+// Oleksij Rempel <[email protected]>
+
+#include <linux/can/skb.h>
+
+#include "j1939-priv.h"
+
+#define J1939_XTP_TX_RETRY_LIMIT 100
+
+#define J1939_ETP_PGN_CTL 0xc800
+#define J1939_ETP_PGN_DAT 0xc700
+#define J1939_TP_PGN_CTL 0xec00
+#define J1939_TP_PGN_DAT 0xeb00
+
+#define J1939_TP_CMD_RTS 0x10
+#define J1939_TP_CMD_CTS 0x11
+#define J1939_TP_CMD_EOMA 0x13
+#define J1939_TP_CMD_BAM 0x20
+#define J1939_TP_CMD_ABORT 0xff
+
+#define J1939_ETP_CMD_RTS 0x14
+#define J1939_ETP_CMD_CTS 0x15
+#define J1939_ETP_CMD_DPO 0x16
+#define J1939_ETP_CMD_EOMA 0x17
+#define J1939_ETP_CMD_ABORT 0xff
+
+enum j1939_xtp_abort {
+ J1939_XTP_NO_ABORT = 0,
+ J1939_XTP_ABORT_BUSY = 1,
+ /* Already in one or more connection managed sessions and
+ * cannot support another.
+ *
+ * EALREADY:
+ * Operation already in progress
+ */
+
+ J1939_XTP_ABORT_RESOURCE = 2,
+ /* System resources were needed for another task so this
+ * connection managed session was terminated.
+ *
+ * EMSGSIZE:
+ * The socket type requires that message be sent atomically,
+ * and the size of the message to be sent made this
+ * impossible.
+ */
+
+ J1939_XTP_ABORT_TIMEOUT = 3,
+ /* A timeout occurred and this is the connection abort to
+ * close the session.
+ *
+ * EHOSTUNREACH:
+ * The destination host cannot be reached (probably because
+ * the host is down or a remote router cannot reach it).
+ */
+
+ J1939_XTP_ABORT_GENERIC = 4,
+ /* CTS messages received when data transfer is in progress
+ *
+ * EBADMSG:
+ * Not a data message
+ */
+
+ J1939_XTP_ABORT_FAULT = 5,
+ /* Maximal retransmit request limit reached
+ *
+ * ENOTRECOVERABLE:
+ * State not recoverable
+ */
+
+ J1939_XTP_ABORT_UNEXPECTED_DATA = 6,
+ /* Unexpected data transfer packet
+ *
+ * ENOTCONN:
+ * Transport endpoint is not connected
+ */
+
+ J1939_XTP_ABORT_BAD_SEQ = 7,
+ /* Bad sequence number (and software is not able to recover)
+ *
+ * EILSEQ:
+ * Illegal byte sequence
+ */
+
+ J1939_XTP_ABORT_DUP_SEQ = 8,
+ /* Duplicate sequence number (and software is not able to
+ * recover)
+ */
+
+ J1939_XTP_ABORT_EDPO_UNEXPECTED = 9,
+ /* Unexpected EDPO packet (ETP) or Message size > 1785 bytes
+ * (TP)
+ */
+
+ J1939_XTP_ABORT_BAD_EDPO_PGN = 10,
+ /* Unexpected EDPO PGN (PGN in EDPO is bad) */
+
+ J1939_XTP_ABORT_EDPO_OUTOF_CTS = 11,
+ /* EDPO number of packets is greater than CTS */
+
+ J1939_XTP_ABORT_BAD_EDPO_OFFSET = 12,
+ /* Bad EDPO offset */
+
+ J1939_XTP_ABORT_OTHER_DEPRECATED = 13,
+ /* Deprecated. Use 250 instead (Any other reason) */
+
+ J1939_XTP_ABORT_ECTS_UNXPECTED_PGN = 14,
+ /* Unexpected ECTS PGN (PGN in ECTS is bad) */
+
+ J1939_XTP_ABORT_ECTS_TOO_BIG = 15,
+ /* ECTS requested packets exceeds message size */
+
+ J1939_XTP_ABORT_OTHER = 250,
+ /* Any other reason (if a Connection Abort reason is
+ * identified that is not listed in the table use code 250)
+ */
+};
+
+static unsigned int j1939_tp_block = 255;
+static unsigned int j1939_tp_packet_delay;
+static unsigned int j1939_tp_padding = 1;
+
+/* helpers */
+static const char *j1939_xtp_abort_to_str(enum j1939_xtp_abort abort)
+{
+ switch (abort) {
+ case J1939_XTP_ABORT_BUSY:
+ return "Already in one or more connection managed sessions and cannot support another.";
+ case J1939_XTP_ABORT_RESOURCE:
+ return "System resources were needed for another task so this connection managed session was terminated.";
+ case J1939_XTP_ABORT_TIMEOUT:
+ return "A timeout occurred and this is the connection abort to close the session.";
+ case J1939_XTP_ABORT_GENERIC:
+ return "CTS messages received when data transfer is in progress";
+ case J1939_XTP_ABORT_FAULT:
+ return "Maximal retransmit request limit reached";
+ case J1939_XTP_ABORT_UNEXPECTED_DATA:
+ return "Unexpected data transfer packet";
+ case J1939_XTP_ABORT_BAD_SEQ:
+ return "Bad sequence number (and software is not able to recover)";
+ case J1939_XTP_ABORT_DUP_SEQ:
+ return "Duplicate sequence number (and software is not able to recover)";
+ case J1939_XTP_ABORT_EDPO_UNEXPECTED:
+ return "Unexpected EDPO packet (ETP) or Message size > 1785 bytes (TP)";
+ case J1939_XTP_ABORT_BAD_EDPO_PGN:
+ return "Unexpected EDPO PGN (PGN in EDPO is bad)";
+ case J1939_XTP_ABORT_EDPO_OUTOF_CTS:
+ return "EDPO number of packets is greater than CTS";
+ case J1939_XTP_ABORT_BAD_EDPO_OFFSET:
+ return "Bad EDPO offset";
+ case J1939_XTP_ABORT_OTHER_DEPRECATED:
+ return "Deprecated. Use 250 instead (Any other reason)";
+ case J1939_XTP_ABORT_ECTS_UNXPECTED_PGN:
+ return "Unexpected ECTS PGN (PGN in ECTS is bad)";
+ case J1939_XTP_ABORT_ECTS_TOO_BIG:
+ return "ECTS requested packets exceeds message size";
+ case J1939_XTP_ABORT_OTHER:
+ return "Any other reason (if a Connection Abort reason is identified that is not listed in the table use code 250)";
+ default:
+ return "<unknown>";
+ }
+}
+
+static int j1939_xtp_abort_to_errno(struct j1939_priv *priv,
+ enum j1939_xtp_abort abort)
+{
+ int err;
+
+ switch (abort) {
+ case J1939_XTP_NO_ABORT:
+ WARN_ON_ONCE(abort == J1939_XTP_NO_ABORT);
+ err = 0;
+ break;
+ case J1939_XTP_ABORT_BUSY:
+ err = EALREADY;
+ break;
+ case J1939_XTP_ABORT_RESOURCE:
+ err = EMSGSIZE;
+ break;
+ case J1939_XTP_ABORT_TIMEOUT:
+ err = EHOSTUNREACH;
+ break;
+ case J1939_XTP_ABORT_GENERIC:
+ err = EBADMSG;
+ break;
+ case J1939_XTP_ABORT_FAULT:
+ err = ENOTRECOVERABLE;
+ break;
+ case J1939_XTP_ABORT_UNEXPECTED_DATA:
+ err = ENOTCONN;
+ break;
+ case J1939_XTP_ABORT_BAD_SEQ:
+ err = EILSEQ;
+ break;
+ case J1939_XTP_ABORT_DUP_SEQ:
+ err = EPROTO;
+ break;
+ case J1939_XTP_ABORT_EDPO_UNEXPECTED:
+ err = EPROTO;
+ break;
+ case J1939_XTP_ABORT_BAD_EDPO_PGN:
+ err = EPROTO;
+ break;
+ case J1939_XTP_ABORT_EDPO_OUTOF_CTS:
+ err = EPROTO;
+ break;
+ case J1939_XTP_ABORT_BAD_EDPO_OFFSET:
+ err = EPROTO;
+ break;
+ case J1939_XTP_ABORT_OTHER_DEPRECATED:
+ err = EPROTO;
+ break;
+ case J1939_XTP_ABORT_ECTS_UNXPECTED_PGN:
+ err = EPROTO;
+ break;
+ case J1939_XTP_ABORT_ECTS_TOO_BIG:
+ err = EPROTO;
+ break;
+ case J1939_XTP_ABORT_OTHER:
+ err = EPROTO;
+ break;
+ default:
+ netdev_warn(priv->ndev, "Unknown abort code %i", abort);
+ err = EPROTO;
+ }
+
+ return err;
+}
+
+static inline void j1939_session_list_lock(struct j1939_priv *priv)
+{
+ spin_lock_bh(&priv->active_session_list_lock);
+}
+
+static inline void j1939_session_list_unlock(struct j1939_priv *priv)
+{
+ spin_unlock_bh(&priv->active_session_list_lock);
+}
+
+void j1939_session_get(struct j1939_session *session)
+{
+ kref_get(&session->kref);
+}
+
+/* session completion functions */
+static void __j1939_session_drop(struct j1939_session *session)
+{
+ if (!session->transmission)
+ return;
+
+ j1939_sock_pending_del(session->sk);
+}
+
+static void j1939_session_destroy(struct j1939_session *session)
+{
+ if (session->err)
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_ABORT);
+ else
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_ACK);
+
+ netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+ skb_queue_purge(&session->skb_queue);
+ __j1939_session_drop(session);
+ j1939_priv_put(session->priv);
+ kfree(session);
+}
+
+static void __j1939_session_release(struct kref *kref)
+{
+ struct j1939_session *session = container_of(kref, struct j1939_session,
+ kref);
+
+ j1939_session_destroy(session);
+}
+
+void j1939_session_put(struct j1939_session *session)
+{
+ kref_put(&session->kref, __j1939_session_release);
+}
+
+static void j1939_session_txtimer_cancel(struct j1939_session *session)
+{
+ if (hrtimer_cancel(&session->txtimer))
+ j1939_session_put(session);
+}
+
+static void j1939_session_rxtimer_cancel(struct j1939_session *session)
+{
+ if (hrtimer_cancel(&session->rxtimer))
+ j1939_session_put(session);
+}
+
+void j1939_session_timers_cancel(struct j1939_session *session)
+{
+ j1939_session_txtimer_cancel(session);
+ j1939_session_rxtimer_cancel(session);
+}
+
+static inline bool j1939_cb_is_broadcast(const struct j1939_sk_buff_cb *skcb)
+{
+ return (!skcb->addr.dst_name && (skcb->addr.da == 0xff));
+}
+
+static void j1939_session_skb_drop_old(struct j1939_session *session)
+{
+ struct sk_buff *do_skb;
+ struct j1939_sk_buff_cb *do_skcb;
+ unsigned int offset_start;
+ unsigned long flags;
+
+ if (skb_queue_len(&session->skb_queue) < 2)
+ return;
+
+ offset_start = session->pkt.tx_acked * 7;
+
+ spin_lock_irqsave(&session->skb_queue.lock, flags);
+ do_skb = skb_peek(&session->skb_queue);
+ do_skcb = j1939_skb_to_cb(do_skb);
+
+ if ((do_skcb->offset + do_skb->len) < offset_start) {
+ __skb_unlink(do_skb, &session->skb_queue);
+ kfree_skb(do_skb);
+ }
+ spin_unlock_irqrestore(&session->skb_queue.lock, flags);
+}
+
+void j1939_session_skb_queue(struct j1939_session *session,
+ struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_priv *priv = session->priv;
+
+ j1939_ac_fixup(priv, skb);
+
+ if (j1939_address_is_unicast(skcb->addr.da) &&
+ priv->ents[skcb->addr.da].nusers)
+ skcb->flags |= J1939_ECU_LOCAL_DST;
+
+ skcb->flags |= J1939_ECU_LOCAL_SRC;
+
+ skb_queue_tail(&session->skb_queue, skb);
+}
+
+static struct sk_buff *j1939_session_skb_find(struct j1939_session *session)
+{
+ struct j1939_priv *priv = session->priv;
+ struct sk_buff *skb = NULL;
+ struct sk_buff *do_skb;
+ struct j1939_sk_buff_cb *do_skcb;
+ unsigned int offset_start;
+ unsigned long flags;
+
+ offset_start = session->pkt.dpo * 7;
+
+ spin_lock_irqsave(&session->skb_queue.lock, flags);
+ skb_queue_walk(&session->skb_queue, do_skb) {
+ do_skcb = j1939_skb_to_cb(do_skb);
+
+ if (offset_start >= do_skcb->offset &&
+ offset_start < (do_skcb->offset + do_skb->len)) {
+ skb = do_skb;
+ }
+ }
+ spin_unlock_irqrestore(&session->skb_queue.lock, flags);
+
+ if (!skb)
+ netdev_dbg(priv->ndev, "%s: 0x%p: no skb found for start: %i, queue size: %i\n",
+ __func__, session, offset_start,
+ skb_queue_len(&session->skb_queue));
+
+ return skb;
+}
+
+/* see if we are receiver
+ * returns 0 for broadcasts, although we will receive them
+ */
+static inline int j1939_tp_im_receiver(const struct j1939_sk_buff_cb *skcb)
+{
+ return skcb->flags & J1939_ECU_LOCAL_DST;
+}
+
+/* see if we are sender */
+static inline int j1939_tp_im_transmitter(const struct j1939_sk_buff_cb *skcb)
+{
+ return skcb->flags & J1939_ECU_LOCAL_SRC;
+}
+
+/* see if we are involved as either receiver or transmitter */
+static int j1939_tp_im_involved(const struct j1939_sk_buff_cb *skcb, bool swap)
+{
+ if (swap)
+ return j1939_tp_im_receiver(skcb);
+ else
+ return j1939_tp_im_transmitter(skcb);
+}
+
+static int j1939_tp_im_involved_anydir(struct j1939_sk_buff_cb *skcb)
+{
+ return skcb->flags & (J1939_ECU_LOCAL_SRC | J1939_ECU_LOCAL_DST);
+}
+
+/* extract pgn from flow-ctl message */
+static inline pgn_t j1939_xtp_ctl_to_pgn(const u8 *dat)
+{
+ pgn_t pgn;
+
+ pgn = (dat[7] << 16) | (dat[6] << 8) | (dat[5] << 0);
+ if (j1939_pgn_is_pdu1(pgn))
+ pgn &= 0xffff00;
+ return pgn;
+}
+
+static inline unsigned int j1939_tp_ctl_to_size(const u8 *dat)
+{
+ return (dat[2] << 8) + (dat[1] << 0);
+}
+
+static inline unsigned int j1939_etp_ctl_to_packet(const u8 *dat)
+{
+ return (dat[4] << 16) | (dat[3] << 8) | (dat[2] << 0);
+}
+
+static inline unsigned int j1939_etp_ctl_to_size(const u8 *dat)
+{
+ return (dat[4] << 24) | (dat[3] << 16) |
+ (dat[2] << 8) | (dat[1] << 0);
+}
+
+/* find existing session:
+ * reverse: swap cb's src & dst
+ * there is no problem with matching broadcasts, since
+ * broadcasts (no dst, no da) would never call this
+ * with reverse == true
+ */
+static bool j1939_session_match(struct j1939_addr *se_addr,
+ struct j1939_addr *sk_addr, bool reverse)
+{
+ if (se_addr->type != sk_addr->type)
+ return false;
+
+ if (reverse) {
+ if (se_addr->src_name) {
+ if (se_addr->src_name != sk_addr->dst_name)
+ return false;
+ } else if (se_addr->sa != sk_addr->da) {
+ return false;
+ }
+
+ if (se_addr->dst_name) {
+ if (se_addr->dst_name != sk_addr->src_name)
+ return false;
+ } else if (se_addr->da != sk_addr->sa) {
+ return false;
+ }
+ } else {
+ if (se_addr->src_name) {
+ if (se_addr->src_name != sk_addr->src_name)
+ return false;
+ } else if (se_addr->sa != sk_addr->sa) {
+ return false;
+ }
+
+ if (se_addr->dst_name) {
+ if (se_addr->dst_name != sk_addr->dst_name)
+ return false;
+ } else if (se_addr->da != sk_addr->da) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static struct
+j1939_session *j1939_session_get_by_addr_locked(struct j1939_priv *priv,
+ struct list_head *root,
+ struct j1939_addr *addr,
+ bool reverse, bool transmitter)
+{
+ struct j1939_session *session;
+
+ lockdep_assert_held(&priv->active_session_list_lock);
+
+ list_for_each_entry(session, root, active_session_list_entry) {
+ j1939_session_get(session);
+ if (j1939_session_match(&session->skcb.addr, addr, reverse) &&
+ session->transmission == transmitter)
+ return session;
+ j1939_session_put(session);
+ }
+
+ return NULL;
+}
+
+static struct
+j1939_session *j1939_session_get_simple(struct j1939_priv *priv,
+ struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_session *session;
+
+ lockdep_assert_held(&priv->active_session_list_lock);
+
+ list_for_each_entry(session, &priv->active_session_list,
+ active_session_list_entry) {
+ j1939_session_get(session);
+ if (session->skcb.addr.type == J1939_SIMPLE &&
+ session->tskey == skcb->tskey && session->sk == skb->sk)
+ return session;
+ j1939_session_put(session);
+ }
+
+ return NULL;
+}
+
+static struct
+j1939_session *j1939_session_get_by_addr(struct j1939_priv *priv,
+ struct j1939_addr *addr,
+ bool reverse, bool transmitter)
+{
+ struct j1939_session *session;
+
+ j1939_session_list_lock(priv);
+ session = j1939_session_get_by_addr_locked(priv,
+ &priv->active_session_list,
+ addr, reverse, transmitter);
+ j1939_session_list_unlock(priv);
+
+ return session;
+}
+
+static void j1939_skbcb_swap(struct j1939_sk_buff_cb *skcb)
+{
+ u8 tmp = 0;
+
+ swap(skcb->addr.dst_name, skcb->addr.src_name);
+ swap(skcb->addr.da, skcb->addr.sa);
+
+ /* swap SRC and DST flags, leave other untouched */
+ if (skcb->flags & J1939_ECU_LOCAL_SRC)
+ tmp |= J1939_ECU_LOCAL_DST;
+ if (skcb->flags & J1939_ECU_LOCAL_DST)
+ tmp |= J1939_ECU_LOCAL_SRC;
+ skcb->flags &= ~(J1939_ECU_LOCAL_SRC | J1939_ECU_LOCAL_DST);
+ skcb->flags |= tmp;
+}
+
+static struct
+sk_buff *j1939_tp_tx_dat_new(struct j1939_priv *priv,
+ const struct j1939_sk_buff_cb *re_skcb,
+ bool ctl,
+ bool swap_src_dst)
+{
+ struct sk_buff *skb;
+ struct j1939_sk_buff_cb *skcb;
+
+ skb = alloc_skb(sizeof(struct can_frame) + sizeof(struct can_skb_priv),
+ GFP_ATOMIC);
+ if (unlikely(!skb))
+ return ERR_PTR(-ENOMEM);
+
+ skb->dev = priv->ndev;
+ can_skb_reserve(skb);
+ can_skb_prv(skb)->ifindex = priv->ndev->ifindex;
+ /* reserve CAN header */
+ skb_reserve(skb, offsetof(struct can_frame, data));
+
+ memcpy(skb->cb, re_skcb, sizeof(skb->cb));
+ skcb = j1939_skb_to_cb(skb);
+ if (swap_src_dst)
+ j1939_skbcb_swap(skcb);
+
+ if (ctl) {
+ if (skcb->addr.type == J1939_ETP)
+ skcb->addr.pgn = J1939_ETP_PGN_CTL;
+ else
+ skcb->addr.pgn = J1939_TP_PGN_CTL;
+ } else {
+ if (skcb->addr.type == J1939_ETP)
+ skcb->addr.pgn = J1939_ETP_PGN_DAT;
+ else
+ skcb->addr.pgn = J1939_TP_PGN_DAT;
+ }
+
+ return skb;
+}
+
+/* TP transmit packet functions */
+static int j1939_tp_tx_dat(struct j1939_session *session,
+ const u8 *dat, int len)
+{
+ struct j1939_priv *priv = session->priv;
+ struct sk_buff *skb;
+
+ skb = j1939_tp_tx_dat_new(priv, &session->skcb,
+ false, false);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ skb_put_data(skb, dat, len);
+ if (j1939_tp_padding && len < 8)
+ memset(skb_put(skb, 8 - len), 0xff, 8 - len);
+
+ return j1939_send_one(priv, skb);
+}
+
+static int j1939_xtp_do_tx_ctl(struct j1939_priv *priv,
+ const struct j1939_sk_buff_cb *re_skcb,
+ bool swap_src_dst, pgn_t pgn, const u8 *dat)
+{
+ struct sk_buff *skb;
+ u8 *skdat;
+
+ if (!j1939_tp_im_involved(re_skcb, swap_src_dst))
+ return 0;
+
+ skb = j1939_tp_tx_dat_new(priv, re_skcb, true, swap_src_dst);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ skdat = skb_put(skb, 8);
+ memcpy(skdat, dat, 5);
+ skdat[5] = (pgn >> 0);
+ skdat[6] = (pgn >> 8);
+ skdat[7] = (pgn >> 16);
+
+ return j1939_send_one(priv, skb);
+}
+
+static inline int j1939_tp_tx_ctl(struct j1939_session *session,
+ bool swap_src_dst, const u8 *dat)
+{
+ struct j1939_priv *priv = session->priv;
+
+ return j1939_xtp_do_tx_ctl(priv, &session->skcb,
+ swap_src_dst,
+ session->skcb.addr.pgn, dat);
+}
+
+static int j1939_xtp_tx_abort(struct j1939_priv *priv,
+ const struct j1939_sk_buff_cb *re_skcb,
+ bool swap_src_dst,
+ enum j1939_xtp_abort err,
+ pgn_t pgn)
+{
+ u8 dat[5];
+
+ if (!j1939_tp_im_involved(re_skcb, swap_src_dst))
+ return 0;
+
+ memset(dat, 0xff, sizeof(dat));
+ dat[0] = J1939_TP_CMD_ABORT;
+ dat[1] = err;
+ return j1939_xtp_do_tx_ctl(priv, re_skcb, swap_src_dst, pgn, dat);
+}
+
+void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec)
+{
+ j1939_session_get(session);
+ hrtimer_start(&session->txtimer, ms_to_ktime(msec),
+ HRTIMER_MODE_REL_SOFT);
+}
+
+static inline void j1939_tp_set_rxtimeout(struct j1939_session *session,
+ int msec)
+{
+ j1939_session_rxtimer_cancel(session);
+ j1939_session_get(session);
+ hrtimer_start(&session->rxtimer, ms_to_ktime(msec),
+ HRTIMER_MODE_REL_SOFT);
+}
+
+static int j1939_session_tx_rts(struct j1939_session *session)
+{
+ u8 dat[8];
+ int ret;
+
+ memset(dat, 0xff, sizeof(dat));
+
+ dat[1] = (session->total_message_size >> 0);
+ dat[2] = (session->total_message_size >> 8);
+ dat[3] = session->pkt.total;
+
+ if (session->skcb.addr.type == J1939_ETP) {
+ dat[0] = J1939_ETP_CMD_RTS;
+ dat[1] = (session->total_message_size >> 0);
+ dat[2] = (session->total_message_size >> 8);
+ dat[3] = (session->total_message_size >> 16);
+ dat[4] = (session->total_message_size >> 24);
+ } else if (j1939_cb_is_broadcast(&session->skcb)) {
+ dat[0] = J1939_TP_CMD_BAM;
+ /* fake cts for broadcast */
+ session->pkt.tx = 0;
+ } else {
+ dat[0] = J1939_TP_CMD_RTS;
+ dat[4] = dat[3];
+ }
+
+ if (dat[0] == session->last_txcmd)
+ /* done already */
+ return 0;
+
+ ret = j1939_tp_tx_ctl(session, false, dat);
+ if (ret < 0)
+ return ret;
+
+ session->last_txcmd = dat[0];
+ if (dat[0] == J1939_TP_CMD_BAM)
+ j1939_tp_schedule_txtimer(session, 50);
+
+ j1939_tp_set_rxtimeout(session, 1250);
+
+ netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+ return 0;
+}
+
+static int j1939_session_tx_dpo(struct j1939_session *session)
+{
+ unsigned int pkt;
+ u8 dat[8];
+ int ret;
+
+ memset(dat, 0xff, sizeof(dat));
+
+ dat[0] = J1939_ETP_CMD_DPO;
+ session->pkt.dpo = session->pkt.tx_acked;
+ pkt = session->pkt.dpo;
+ dat[1] = session->pkt.last - session->pkt.tx_acked;
+ dat[2] = (pkt >> 0);
+ dat[3] = (pkt >> 8);
+ dat[4] = (pkt >> 16);
+
+ ret = j1939_tp_tx_ctl(session, false, dat);
+ if (ret < 0)
+ return ret;
+
+ session->last_txcmd = dat[0];
+ j1939_tp_set_rxtimeout(session, 1250);
+ session->pkt.tx = session->pkt.tx_acked;
+
+ netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+ return 0;
+}
+
+static int j1939_session_tx_dat(struct j1939_session *session)
+{
+ struct j1939_priv *priv = session->priv;
+ struct j1939_sk_buff_cb *skcb;
+ int offset, pkt_done, pkt_end;
+ unsigned int len, pdelay;
+ struct sk_buff *se_skb;
+ const u8 *tpdat;
+ int ret = 0;
+ u8 dat[8];
+
+ se_skb = j1939_session_skb_find(session);
+ if (!se_skb)
+ return -ENOBUFS;
+
+ skcb = j1939_skb_to_cb(se_skb);
+ tpdat = se_skb->data;
+ ret = 0;
+ pkt_done = 0;
+ if (session->skcb.addr.type != J1939_ETP &&
+ j1939_cb_is_broadcast(&session->skcb))
+ pkt_end = session->pkt.total;
+ else
+ pkt_end = session->pkt.last;
+
+ while (session->pkt.tx < pkt_end) {
+ dat[0] = session->pkt.tx - session->pkt.dpo + 1;
+ offset = (session->pkt.tx * 7) - skcb->offset;
+ len = se_skb->len - offset;
+ if (len > 7)
+ len = 7;
+
+ memcpy(&dat[1], &tpdat[offset], len);
+ ret = j1939_tp_tx_dat(session, dat, len + 1);
+ if (ret < 0) {
+ /* ENOBUS == CAN interface TX queue is full */
+ if (ret != -ENOBUFS)
+ netdev_alert(priv->ndev,
+ "%s: 0x%p: queue data error: %i\n",
+ __func__, session, ret);
+ break;
+ }
+
+ session->last_txcmd = 0xff;
+ pkt_done++;
+ session->pkt.tx++;
+ pdelay = j1939_cb_is_broadcast(&session->skcb) ? 50 :
+ j1939_tp_packet_delay;
+
+ if (session->pkt.tx < session->pkt.total && pdelay) {
+ j1939_tp_schedule_txtimer(session, pdelay);
+ break;
+ }
+ }
+
+ if (pkt_done)
+ j1939_tp_set_rxtimeout(session, 250);
+
+ return ret;
+}
+
+static int j1939_xtp_txnext_transmiter(struct j1939_session *session)
+{
+ struct j1939_priv *priv = session->priv;
+ int ret = 0;
+
+ if (!j1939_tp_im_transmitter(&session->skcb)) {
+ netdev_alert(priv->ndev, "%s: 0x%p: called by not transmitter!\n",
+ __func__, session);
+ return -EINVAL;
+ }
+
+ switch (session->last_cmd) {
+ case 0:
+ ret = j1939_session_tx_rts(session);
+ break;
+
+ case J1939_ETP_CMD_CTS:
+ if (session->last_txcmd != J1939_ETP_CMD_DPO) {
+ ret = j1939_session_tx_dpo(session);
+ if (ret)
+ return ret;
+ }
+
+ /* fall through */
+ case J1939_TP_CMD_CTS:
+ case 0xff: /* did some data */
+ case J1939_ETP_CMD_DPO:
+ case J1939_TP_CMD_BAM:
+ ret = j1939_session_tx_dat(session);
+
+ break;
+ default:
+ netdev_alert(priv->ndev, "%s: 0x%p: unexpected last_cmd: %x\n",
+ __func__, session, session->last_cmd);
+ }
+
+ return ret;
+}
+
+static int j1939_session_tx_cts(struct j1939_session *session)
+{
+ struct j1939_priv *priv = session->priv;
+ unsigned int pkt, len;
+ int ret;
+ u8 dat[8];
+
+ if (!j1939_sk_recv_match(priv, &session->skcb))
+ return -ENOENT;
+
+ len = session->pkt.total - session->pkt.rx;
+ len = min3(len, session->pkt.block, j1939_tp_block ?: 255);
+ memset(dat, 0xff, sizeof(dat));
+
+ if (session->skcb.addr.type == J1939_ETP) {
+ pkt = session->pkt.rx + 1;
+ dat[0] = J1939_ETP_CMD_CTS;
+ dat[1] = len;
+ dat[2] = (pkt >> 0);
+ dat[3] = (pkt >> 8);
+ dat[4] = (pkt >> 16);
+ } else {
+ dat[0] = J1939_TP_CMD_CTS;
+ dat[1] = len;
+ dat[2] = session->pkt.rx + 1;
+ }
+
+ if (dat[0] == session->last_txcmd)
+ /* done already */
+ return 0;
+
+ ret = j1939_tp_tx_ctl(session, true, dat);
+ if (ret < 0)
+ return ret;
+
+ if (len)
+ /* only mark cts done when len is set */
+ session->last_txcmd = dat[0];
+ j1939_tp_set_rxtimeout(session, 1250);
+
+ netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+ return 0;
+}
+
+static int j1939_session_tx_eoma(struct j1939_session *session)
+{
+ struct j1939_priv *priv = session->priv;
+ u8 dat[8];
+ int ret;
+
+ if (!j1939_sk_recv_match(priv, &session->skcb))
+ return -ENOENT;
+
+ memset(dat, 0xff, sizeof(dat));
+
+ if (session->skcb.addr.type == J1939_ETP) {
+ dat[0] = J1939_ETP_CMD_EOMA;
+ dat[1] = session->total_message_size >> 0;
+ dat[2] = session->total_message_size >> 8;
+ dat[3] = session->total_message_size >> 16;
+ dat[4] = session->total_message_size >> 24;
+ } else {
+ dat[0] = J1939_TP_CMD_EOMA;
+ dat[1] = session->total_message_size;
+ dat[2] = session->total_message_size >> 8;
+ dat[3] = session->pkt.total;
+ }
+
+ if (dat[0] == session->last_txcmd)
+ /* done already */
+ return 0;
+
+ ret = j1939_tp_tx_ctl(session, true, dat);
+ if (ret < 0)
+ return ret;
+
+ session->last_txcmd = dat[0];
+
+ /* wait for the EOMA packet to come in */
+ j1939_tp_set_rxtimeout(session, 1250);
+
+ netdev_dbg(session->priv->ndev, "%p: 0x%p\n", __func__, session);
+
+ return 0;
+}
+
+static int j1939_xtp_txnext_receiver(struct j1939_session *session)
+{
+ struct j1939_priv *priv = session->priv;
+ int ret = 0;
+
+ if (!j1939_tp_im_receiver(&session->skcb)) {
+ netdev_alert(priv->ndev, "%s: 0x%p: called by not receiver!\n",
+ __func__, session);
+ return -EINVAL;
+ }
+
+ switch (session->last_cmd) {
+ case J1939_TP_CMD_RTS:
+ case J1939_ETP_CMD_RTS:
+ ret = j1939_session_tx_cts(session);
+ break;
+
+ case J1939_ETP_CMD_CTS:
+ case J1939_TP_CMD_CTS:
+ case 0xff: /* did some data */
+ case J1939_ETP_CMD_DPO:
+ if ((session->skcb.addr.type == J1939_TP &&
+ j1939_cb_is_broadcast(&session->skcb)))
+ break;
+
+ if (session->pkt.rx >= session->pkt.total) {
+ ret = j1939_session_tx_eoma(session);
+ } else if (session->pkt.rx >= session->pkt.last) {
+ session->last_txcmd = 0;
+ ret = j1939_session_tx_cts(session);
+ }
+ break;
+ default:
+ netdev_alert(priv->ndev, "%s: 0x%p: unexpected last_cmd: %x\n",
+ __func__, session, session->last_cmd);
+ }
+
+ return ret;
+}
+
+static int j1939_simple_txnext(struct j1939_session *session)
+{
+ struct j1939_priv *priv = session->priv;
+ struct sk_buff *se_skb = j1939_session_skb_find(session);
+ struct sk_buff *skb;
+ int ret;
+
+ if (!se_skb)
+ return 0;
+
+ skb = skb_clone(se_skb, GFP_ATOMIC);
+ if (!skb)
+ return -ENOMEM;
+
+ can_skb_set_owner(skb, se_skb->sk);
+
+ j1939_tp_set_rxtimeout(session, J1939_SIMPLE_ECHO_TIMEOUT_MS);
+
+ ret = j1939_send_one(priv, skb);
+ if (ret)
+ return ret;
+
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED);
+ j1939_sk_queue_activate_next(session);
+
+ return 0;
+}
+
+static bool j1939_session_deactivate_locked(struct j1939_session *session)
+{
+ bool active = false;
+
+ lockdep_assert_held(&session->priv->active_session_list_lock);
+
+ if (session->state >= J1939_SESSION_ACTIVE &&
+ session->state < J1939_SESSION_ACTIVE_MAX) {
+ active = true;
+
+ list_del_init(&session->active_session_list_entry);
+ session->state = J1939_SESSION_DONE;
+ j1939_session_put(session);
+ }
+
+ return active;
+}
+
+static bool j1939_session_deactivate(struct j1939_session *session)
+{
+ bool active;
+
+ j1939_session_list_lock(session->priv);
+ active = j1939_session_deactivate_locked(session);
+ j1939_session_list_unlock(session->priv);
+
+ return active;
+}
+
+static void
+j1939_session_deactivate_activate_next(struct j1939_session *session)
+{
+ if (j1939_session_deactivate(session))
+ j1939_sk_queue_activate_next(session);
+}
+
+static void j1939_session_cancel(struct j1939_session *session,
+ enum j1939_xtp_abort err)
+{
+ struct j1939_priv *priv = session->priv;
+
+ WARN_ON_ONCE(!err);
+
+ session->err = j1939_xtp_abort_to_errno(priv, err);
+ /* do not send aborts on incoming broadcasts */
+ if (!j1939_cb_is_broadcast(&session->skcb)) {
+ session->state = J1939_SESSION_WAITING_ABORT;
+ j1939_xtp_tx_abort(priv, &session->skcb,
+ !session->transmission,
+ err, session->skcb.addr.pgn);
+ }
+
+ if (session->sk)
+ j1939_sk_send_loop_abort(session->sk, session->err);
+}
+
+static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
+{
+ struct j1939_session *session =
+ container_of(hrtimer, struct j1939_session, txtimer);
+ struct j1939_priv *priv = session->priv;
+ int ret = 0;
+
+ if (session->skcb.addr.type == J1939_SIMPLE) {
+ ret = j1939_simple_txnext(session);
+ } else {
+ if (session->transmission)
+ ret = j1939_xtp_txnext_transmiter(session);
+ else
+ ret = j1939_xtp_txnext_receiver(session);
+ }
+
+ switch (ret) {
+ case -ENOBUFS:
+ /* Retry limit is currently arbitrary chosen */
+ if (session->tx_retry < J1939_XTP_TX_RETRY_LIMIT) {
+ session->tx_retry++;
+ j1939_tp_schedule_txtimer(session,
+ 10 + prandom_u32_max(16));
+ } else {
+ netdev_alert(priv->ndev, "%s: 0x%p: tx retry count reached\n",
+ __func__, session);
+ session->err = -ENETUNREACH;
+ j1939_session_rxtimer_cancel(session);
+ j1939_session_deactivate_activate_next(session);
+ }
+ break;
+ case -ENETDOWN:
+ /* In this case we should get a netdev_event(), all active
+ * sessions will be cleared by
+ * j1939_cancel_all_active_sessions(). So handle this as an
+ * error, but let j1939_cancel_all_active_sessions() do the
+ * cleanup including propagation of the error to user space.
+ */
+ break;
+ case 0:
+ session->tx_retry = 0;
+ break;
+ default:
+ netdev_alert(priv->ndev, "%s: 0x%p: tx aborted with unknown reason: %i\n",
+ __func__, session, ret);
+ if (session->skcb.addr.type != J1939_SIMPLE) {
+ j1939_tp_set_rxtimeout(session,
+ J1939_XTP_ABORT_TIMEOUT_MS);
+ j1939_session_cancel(session, J1939_XTP_ABORT_OTHER);
+ } else {
+ session->err = ret;
+ j1939_session_rxtimer_cancel(session);
+ j1939_session_deactivate_activate_next(session);
+ }
+ }
+
+ j1939_session_put(session);
+
+ return HRTIMER_NORESTART;
+}
+
+static void j1939_session_completed(struct j1939_session *session)
+{
+ struct sk_buff *skb;
+
+ if (!session->transmission) {
+ skb = j1939_session_skb_find(session);
+ /* distribute among j1939 receivers */
+ j1939_sk_recv(session->priv, skb);
+ }
+
+ j1939_session_deactivate_activate_next(session);
+}
+
+static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer)
+{
+ struct j1939_session *session = container_of(hrtimer,
+ struct j1939_session,
+ rxtimer);
+ struct j1939_priv *priv = session->priv;
+
+ if (session->state == J1939_SESSION_WAITING_ABORT) {
+ netdev_alert(priv->ndev, "%s: 0x%p: abort rx timeout. Force session deactivation\n",
+ __func__, session);
+
+ j1939_session_deactivate_activate_next(session);
+
+ } else if (session->skcb.addr.type == J1939_SIMPLE) {
+ netdev_alert(priv->ndev, "%s: 0x%p: Timeout. Failed to send simple message.\n",
+ __func__, session);
+
+ /* The message is probably stuck in the CAN controller and can
+ * be send as soon as CAN bus is in working state again.
+ */
+ session->err = -ETIME;
+ j1939_session_deactivate(session);
+ } else {
+ netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n",
+ __func__, session);
+
+ j1939_session_list_lock(session->priv);
+ if (session->state >= J1939_SESSION_ACTIVE &&
+ session->state < J1939_SESSION_ACTIVE_MAX) {
+ j1939_session_get(session);
+ hrtimer_start(&session->rxtimer,
+ ms_to_ktime(J1939_XTP_ABORT_TIMEOUT_MS),
+ HRTIMER_MODE_REL_SOFT);
+ j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT);
+ }
+ j1939_session_list_unlock(session->priv);
+ }
+
+ j1939_session_put(session);
+
+ return HRTIMER_NORESTART;
+}
+
+static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session,
+ const struct sk_buff *skb)
+{
+ const struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ pgn_t pgn = j1939_xtp_ctl_to_pgn(skb->data);
+ struct j1939_priv *priv = session->priv;
+ enum j1939_xtp_abort abort = J1939_XTP_NO_ABORT;
+ u8 cmd = skb->data[0];
+
+ if (session->skcb.addr.pgn == pgn)
+ return false;
+
+ switch (cmd) {
+ case J1939_TP_CMD_BAM:
+ abort = J1939_XTP_NO_ABORT;
+ break;
+
+ case J1939_ETP_CMD_RTS:
+ case J1939_TP_CMD_RTS: /* fall through */
+ abort = J1939_XTP_ABORT_BUSY;
+ break;
+
+ case J1939_ETP_CMD_CTS:
+ case J1939_TP_CMD_CTS: /* fall through */
+ abort = J1939_XTP_ABORT_ECTS_UNXPECTED_PGN;
+ break;
+
+ case J1939_ETP_CMD_DPO:
+ abort = J1939_XTP_ABORT_BAD_EDPO_PGN;
+ break;
+
+ case J1939_ETP_CMD_EOMA:
+ case J1939_TP_CMD_EOMA: /* fall through */
+ abort = J1939_XTP_ABORT_OTHER;
+ break;
+
+ case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */
+ abort = J1939_XTP_NO_ABORT;
+ break;
+
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ netdev_warn(priv->ndev, "%s: 0x%p: CMD 0x%02x with PGN 0x%05x for running session with different PGN 0x%05x.\n",
+ __func__, session, cmd, pgn, session->skcb.addr.pgn);
+ if (abort != J1939_XTP_NO_ABORT)
+ j1939_xtp_tx_abort(priv, skcb, true, abort, pgn);
+
+ return true;
+}
+
+static void j1939_xtp_rx_abort_one(struct j1939_priv *priv, struct sk_buff *skb,
+ bool reverse, bool transmitter)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_session *session;
+ u8 abort = skb->data[1];
+
+ session = j1939_session_get_by_addr(priv, &skcb->addr, reverse,
+ transmitter);
+ if (!session)
+ return;
+
+ if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
+ goto abort_put;
+
+ netdev_info(priv->ndev, "%s: 0x%p: 0x%05x: (%u) %s\n", __func__,
+ session, j1939_xtp_ctl_to_pgn(skb->data), abort,
+ j1939_xtp_abort_to_str(abort));
+
+ j1939_session_timers_cancel(session);
+ session->err = j1939_xtp_abort_to_errno(priv, abort);
+ if (session->sk)
+ j1939_sk_send_loop_abort(session->sk, session->err);
+ j1939_session_deactivate_activate_next(session);
+
+abort_put:
+ j1939_session_put(session);
+}
+
+/* abort packets may come in 2 directions */
+static void
+j1939_xtp_rx_abort(struct j1939_priv *priv, struct sk_buff *skb,
+ bool transmitter)
+{
+ j1939_xtp_rx_abort_one(priv, skb, false, transmitter);
+ j1939_xtp_rx_abort_one(priv, skb, true, transmitter);
+}
+
+static void
+j1939_xtp_rx_eoma_one(struct j1939_session *session, struct sk_buff *skb)
+{
+ if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
+ return;
+
+ netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+ session->pkt.tx_acked = session->pkt.total;
+ j1939_session_timers_cancel(session);
+ /* transmitted without problems */
+ j1939_session_completed(session);
+}
+
+static void
+j1939_xtp_rx_eoma(struct j1939_priv *priv, struct sk_buff *skb,
+ bool transmitter)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_session *session;
+
+ session = j1939_session_get_by_addr(priv, &skcb->addr, true,
+ transmitter);
+ if (!session)
+ return;
+
+ j1939_xtp_rx_eoma_one(session, skb);
+ j1939_session_put(session);
+}
+
+static void
+j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb)
+{
+ enum j1939_xtp_abort err = J1939_XTP_ABORT_FAULT;
+ unsigned int pkt;
+ const u8 *dat;
+
+ dat = skb->data;
+
+ if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
+ return;
+
+ netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+ if (session->last_cmd == dat[0]) {
+ err = J1939_XTP_ABORT_DUP_SEQ;
+ goto out_session_cancel;
+ }
+
+ if (session->skcb.addr.type == J1939_ETP)
+ pkt = j1939_etp_ctl_to_packet(dat);
+ else
+ pkt = dat[2];
+
+ if (!pkt)
+ goto out_session_cancel;
+ else if (dat[1] > session->pkt.block /* 0xff for etp */)
+ goto out_session_cancel;
+
+ /* set packet counters only when not CTS(0) */
+ session->pkt.tx_acked = pkt - 1;
+ j1939_session_skb_drop_old(session);
+ session->pkt.last = session->pkt.tx_acked + dat[1];
+ if (session->pkt.last > session->pkt.total)
+ /* safety measure */
+ session->pkt.last = session->pkt.total;
+ /* TODO: do not set tx here, do it in txtimer */
+ session->pkt.tx = session->pkt.tx_acked;
+
+ session->last_cmd = dat[0];
+ if (dat[1]) {
+ j1939_tp_set_rxtimeout(session, 1250);
+ if (session->transmission) {
+ if (session->pkt.tx_acked)
+ j1939_sk_errqueue(session,
+ J1939_ERRQUEUE_SCHED);
+ j1939_session_txtimer_cancel(session);
+ j1939_tp_schedule_txtimer(session, 0);
+ }
+ } else {
+ /* CTS(0) */
+ j1939_tp_set_rxtimeout(session, 550);
+ }
+ return;
+
+ out_session_cancel:
+ j1939_session_timers_cancel(session);
+ j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
+ j1939_session_cancel(session, err);
+}
+
+static void
+j1939_xtp_rx_cts(struct j1939_priv *priv, struct sk_buff *skb, bool transmitter)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_session *session;
+
+ session = j1939_session_get_by_addr(priv, &skcb->addr, true,
+ transmitter);
+ if (!session)
+ return;
+ j1939_xtp_rx_cts_one(session, skb);
+ j1939_session_put(session);
+}
+
+static struct j1939_session *j1939_session_new(struct j1939_priv *priv,
+ struct sk_buff *skb, size_t size)
+{
+ struct j1939_session *session;
+ struct j1939_sk_buff_cb *skcb;
+
+ session = kzalloc(sizeof(*session), gfp_any());
+ if (!session)
+ return NULL;
+
+ INIT_LIST_HEAD(&session->active_session_list_entry);
+ INIT_LIST_HEAD(&session->sk_session_queue_entry);
+ kref_init(&session->kref);
+
+ j1939_priv_get(priv);
+ session->priv = priv;
+ session->total_message_size = size;
+ session->state = J1939_SESSION_NEW;
+
+ skb_queue_head_init(&session->skb_queue);
+ skb_queue_tail(&session->skb_queue, skb);
+
+ skcb = j1939_skb_to_cb(skb);
+ memcpy(&session->skcb, skcb, sizeof(session->skcb));
+
+ hrtimer_init(&session->txtimer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_SOFT);
+ session->txtimer.function = j1939_tp_txtimer;
+ hrtimer_init(&session->rxtimer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_SOFT);
+ session->rxtimer.function = j1939_tp_rxtimer;
+
+ netdev_dbg(priv->ndev, "%s: 0x%p: sa: %02x, da: %02x\n",
+ __func__, session, skcb->addr.sa, skcb->addr.da);
+
+ return session;
+}
+
+static struct
+j1939_session *j1939_session_fresh_new(struct j1939_priv *priv,
+ int size,
+ const struct j1939_sk_buff_cb *rel_skcb)
+{
+ struct sk_buff *skb;
+ struct j1939_sk_buff_cb *skcb;
+ struct j1939_session *session;
+
+ skb = alloc_skb(size + sizeof(struct can_skb_priv), GFP_ATOMIC);
+ if (unlikely(!skb))
+ return NULL;
+
+ skb->dev = priv->ndev;
+ can_skb_reserve(skb);
+ can_skb_prv(skb)->ifindex = priv->ndev->ifindex;
+ skcb = j1939_skb_to_cb(skb);
+ memcpy(skcb, rel_skcb, sizeof(*skcb));
+
+ session = j1939_session_new(priv, skb, skb->len);
+ if (!session) {
+ kfree_skb(skb);
+ return NULL;
+ }
+
+ /* alloc data area */
+ skb_put(skb, size);
+ /* skb is recounted in j1939_session_new() */
+ return session;
+}
+
+int j1939_session_activate(struct j1939_session *session)
+{
+ struct j1939_priv *priv = session->priv;
+ struct j1939_session *active = NULL;
+ int ret = 0;
+
+ j1939_session_list_lock(priv);
+ if (session->skcb.addr.type != J1939_SIMPLE)
+ active = j1939_session_get_by_addr_locked(priv,
+ &priv->active_session_list,
+ &session->skcb.addr, false,
+ session->transmission);
+ if (active) {
+ j1939_session_put(active);
+ ret = -EAGAIN;
+ } else {
+ WARN_ON_ONCE(session->state != J1939_SESSION_NEW);
+ list_add_tail(&session->active_session_list_entry,
+ &priv->active_session_list);
+ j1939_session_get(session);
+ session->state = J1939_SESSION_ACTIVE;
+
+ netdev_dbg(session->priv->ndev, "%s: 0x%p\n",
+ __func__, session);
+ }
+ j1939_session_list_unlock(priv);
+
+ return ret;
+}
+
+static struct
+j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv,
+ struct sk_buff *skb)
+{
+ enum j1939_xtp_abort abort = J1939_XTP_NO_ABORT;
+ struct j1939_sk_buff_cb skcb = *j1939_skb_to_cb(skb);
+ struct j1939_session *session;
+ const u8 *dat;
+ pgn_t pgn;
+ int len;
+
+ netdev_dbg(priv->ndev, "%s\n", __func__);
+
+ dat = skb->data;
+ pgn = j1939_xtp_ctl_to_pgn(dat);
+ skcb.addr.pgn = pgn;
+
+ if (!j1939_sk_recv_match(priv, &skcb))
+ return NULL;
+
+ if (skcb.addr.type == J1939_ETP) {
+ len = j1939_etp_ctl_to_size(dat);
+ if (len > J1939_MAX_ETP_PACKET_SIZE)
+ abort = J1939_XTP_ABORT_FAULT;
+ else if (len > priv->tp_max_packet_size)
+ abort = J1939_XTP_ABORT_RESOURCE;
+ else if (len <= J1939_MAX_TP_PACKET_SIZE)
+ abort = J1939_XTP_ABORT_FAULT;
+ } else {
+ len = j1939_tp_ctl_to_size(dat);
+ if (len > J1939_MAX_TP_PACKET_SIZE)
+ abort = J1939_XTP_ABORT_FAULT;
+ else if (len > priv->tp_max_packet_size)
+ abort = J1939_XTP_ABORT_RESOURCE;
+ }
+
+ if (abort != J1939_XTP_NO_ABORT) {
+ j1939_xtp_tx_abort(priv, &skcb, true, abort, pgn);
+ return NULL;
+ }
+
+ session = j1939_session_fresh_new(priv, len, &skcb);
+ if (!session) {
+ j1939_xtp_tx_abort(priv, &skcb, true,
+ J1939_XTP_ABORT_RESOURCE, pgn);
+ return NULL;
+ }
+
+ /* initialize the control buffer: plain copy */
+ session->pkt.total = (len + 6) / 7;
+ session->pkt.block = 0xff;
+ if (skcb.addr.type != J1939_ETP) {
+ if (dat[3] != session->pkt.total)
+ netdev_alert(priv->ndev, "%s: 0x%p: strange total, %u != %u\n",
+ __func__, session, session->pkt.total,
+ dat[3]);
+ session->pkt.total = dat[3];
+ session->pkt.block = min(dat[3], dat[4]);
+ }
+
+ session->pkt.rx = 0;
+ session->pkt.tx = 0;
+
+ WARN_ON_ONCE(j1939_session_activate(session));
+
+ return session;
+}
+
+static int j1939_xtp_rx_rts_session_active(struct j1939_session *session,
+ struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_priv *priv = session->priv;
+
+ if (!session->transmission) {
+ if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
+ return -EBUSY;
+
+ /* RTS on active session */
+ j1939_session_timers_cancel(session);
+ j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
+ j1939_session_cancel(session, J1939_XTP_ABORT_BUSY);
+ }
+
+ if (session->last_cmd != 0) {
+ /* we received a second rts on the same connection */
+ netdev_alert(priv->ndev, "%s: 0x%p: connection exists (%02x %02x). last cmd: %x\n",
+ __func__, session, skcb->addr.sa, skcb->addr.da,
+ session->last_cmd);
+
+ j1939_session_timers_cancel(session);
+ j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
+ j1939_session_cancel(session, J1939_XTP_ABORT_BUSY);
+
+ return -EBUSY;
+ }
+
+ if (session->skcb.addr.sa != skcb->addr.sa ||
+ session->skcb.addr.da != skcb->addr.da)
+ netdev_warn(priv->ndev, "%s: 0x%p: session->skcb.addr.sa=0x%02x skcb->addr.sa=0x%02x session->skcb.addr.da=0x%02x skcb->addr.da=0x%02x\n",
+ __func__, session,
+ session->skcb.addr.sa, skcb->addr.sa,
+ session->skcb.addr.da, skcb->addr.da);
+ /* make sure 'sa' & 'da' are correct !
+ * They may be 'not filled in yet' for sending
+ * skb's, since they did not pass the Address Claim ever.
+ */
+ session->skcb.addr.sa = skcb->addr.sa;
+ session->skcb.addr.da = skcb->addr.da;
+
+ netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+ return 0;
+}
+
+static void j1939_xtp_rx_rts(struct j1939_priv *priv, struct sk_buff *skb,
+ bool transmitter)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_session *session;
+ u8 cmd = skb->data[0];
+
+ session = j1939_session_get_by_addr(priv, &skcb->addr, false,
+ transmitter);
+
+ if (!session) {
+ if (transmitter) {
+ /* If we're the transmitter and this function is called,
+ * we received our own RTS. A session has already been
+ * created.
+ *
+ * For some reasons however it might have been destroyed
+ * already. So don't create a new one here (using
+ * "j1939_xtp_rx_rts_session_new()") as this will be a
+ * receiver session.
+ *
+ * The reasons the session is already destroyed might
+ * be:
+ * - user space closed socket was and the session was
+ * aborted
+ * - session was aborted due to external abort message
+ */
+ return;
+ }
+ session = j1939_xtp_rx_rts_session_new(priv, skb);
+ if (!session)
+ return;
+ } else {
+ if (j1939_xtp_rx_rts_session_active(session, skb)) {
+ j1939_session_put(session);
+ return;
+ }
+ }
+ session->last_cmd = cmd;
+
+ j1939_tp_set_rxtimeout(session, 1250);
+
+ if (cmd != J1939_TP_CMD_BAM && !session->transmission) {
+ j1939_session_txtimer_cancel(session);
+ j1939_tp_schedule_txtimer(session, 0);
+ }
+
+ j1939_session_put(session);
+}
+
+static void j1939_xtp_rx_dpo_one(struct j1939_session *session,
+ struct sk_buff *skb)
+{
+ const u8 *dat = skb->data;
+
+ if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
+ return;
+
+ netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+ /* transmitted without problems */
+ session->pkt.dpo = j1939_etp_ctl_to_packet(skb->data);
+ session->last_cmd = dat[0];
+ j1939_tp_set_rxtimeout(session, 750);
+}
+
+static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb,
+ bool transmitter)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_session *session;
+
+ session = j1939_session_get_by_addr(priv, &skcb->addr, false,
+ transmitter);
+ if (!session) {
+ netdev_info(priv->ndev,
+ "%s: no connection found\n", __func__);
+ return;
+ }
+
+ j1939_xtp_rx_dpo_one(session, skb);
+ j1939_session_put(session);
+}
+
+static void j1939_xtp_rx_dat_one(struct j1939_session *session,
+ struct sk_buff *skb)
+{
+ struct j1939_priv *priv = session->priv;
+ struct j1939_sk_buff_cb *skcb;
+ struct sk_buff *se_skb;
+ const u8 *dat;
+ u8 *tpdat;
+ int offset;
+ int nbytes;
+ bool final = false;
+ bool do_cts_eoma = false;
+ int packet;
+
+ skcb = j1939_skb_to_cb(skb);
+ dat = skb->data;
+ if (skb->len <= 1)
+ /* makes no sense */
+ goto out_session_cancel;
+
+ switch (session->last_cmd) {
+ case 0xff:
+ break;
+ case J1939_ETP_CMD_DPO:
+ if (skcb->addr.type == J1939_ETP)
+ break;
+ /* fall through */
+ case J1939_TP_CMD_BAM: /* fall through */
+ case J1939_TP_CMD_CTS: /* fall through */
+ if (skcb->addr.type != J1939_ETP)
+ break;
+ /* fall through */
+ default:
+ netdev_info(priv->ndev, "%s: 0x%p: last %02x\n", __func__,
+ session, session->last_cmd);
+ goto out_session_cancel;
+ }
+
+ packet = (dat[0] - 1 + session->pkt.dpo);
+ if (packet > session->pkt.total ||
+ (session->pkt.rx + 1) > session->pkt.total) {
+ netdev_info(priv->ndev, "%s: 0x%p: should have been completed\n",
+ __func__, session);
+ goto out_session_cancel;
+ }
+ se_skb = j1939_session_skb_find(session);
+ if (!se_skb) {
+ netdev_warn(priv->ndev, "%s: 0x%p: no skb found\n", __func__,
+ session);
+ goto out_session_cancel;
+ }
+
+ skcb = j1939_skb_to_cb(se_skb);
+ offset = packet * 7 - skcb->offset;
+ nbytes = se_skb->len - offset;
+ if (nbytes > 7)
+ nbytes = 7;
+ if (nbytes <= 0 || (nbytes + 1) > skb->len) {
+ netdev_info(priv->ndev, "%s: 0x%p: nbytes %i, len %i\n",
+ __func__, session, nbytes, skb->len);
+ goto out_session_cancel;
+ }
+
+ tpdat = se_skb->data;
+ memcpy(&tpdat[offset], &dat[1], nbytes);
+ if (packet == session->pkt.rx)
+ session->pkt.rx++;
+
+ if (skcb->addr.type != J1939_ETP &&
+ j1939_cb_is_broadcast(&session->skcb)) {
+ if (session->pkt.rx >= session->pkt.total)
+ final = true;
+ } else {
+ /* never final, an EOMA must follow */
+ if (session->pkt.rx >= session->pkt.last)
+ do_cts_eoma = true;
+ }
+
+ if (final) {
+ j1939_session_completed(session);
+ } else if (do_cts_eoma) {
+ j1939_tp_set_rxtimeout(session, 1250);
+ if (!session->transmission)
+ j1939_tp_schedule_txtimer(session, 0);
+ } else {
+ j1939_tp_set_rxtimeout(session, 250);
+ }
+ session->last_cmd = 0xff;
+ j1939_session_put(session);
+
+ return;
+
+ out_session_cancel:
+ j1939_session_timers_cancel(session);
+ j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
+ j1939_session_cancel(session, J1939_XTP_ABORT_FAULT);
+ j1939_session_put(session);
+}
+
+static void j1939_xtp_rx_dat(struct j1939_priv *priv, struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb;
+ struct j1939_session *session;
+
+ skcb = j1939_skb_to_cb(skb);
+
+ if (j1939_tp_im_transmitter(skcb)) {
+ session = j1939_session_get_by_addr(priv, &skcb->addr, false,
+ true);
+ if (!session)
+ netdev_info(priv->ndev, "%s: no tx connection found\n",
+ __func__);
+ else
+ j1939_xtp_rx_dat_one(session, skb);
+ }
+
+ if (j1939_tp_im_receiver(skcb)) {
+ session = j1939_session_get_by_addr(priv, &skcb->addr, false,
+ false);
+ if (!session)
+ netdev_info(priv->ndev, "%s: no rx connection found\n",
+ __func__);
+ else
+ j1939_xtp_rx_dat_one(session, skb);
+ }
+}
+
+/* j1939 main intf */
+struct j1939_session *j1939_tp_send(struct j1939_priv *priv,
+ struct sk_buff *skb, size_t size)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ struct j1939_session *session;
+ int ret;
+
+ if (skcb->addr.pgn == J1939_TP_PGN_DAT ||
+ skcb->addr.pgn == J1939_TP_PGN_CTL ||
+ skcb->addr.pgn == J1939_ETP_PGN_DAT ||
+ skcb->addr.pgn == J1939_ETP_PGN_CTL)
+ /* avoid conflict */
+ return ERR_PTR(-EDOM);
+
+ if (size > priv->tp_max_packet_size)
+ return ERR_PTR(-EMSGSIZE);
+
+ if (size <= 8)
+ skcb->addr.type = J1939_SIMPLE;
+ else if (size > J1939_MAX_TP_PACKET_SIZE)
+ skcb->addr.type = J1939_ETP;
+ else
+ skcb->addr.type = J1939_TP;
+
+ if (skcb->addr.type == J1939_ETP &&
+ j1939_cb_is_broadcast(skcb))
+ return ERR_PTR(-EDESTADDRREQ);
+
+ /* fill in addresses from names */
+ ret = j1939_ac_fixup(priv, skb);
+ if (unlikely(ret))
+ return ERR_PTR(ret);
+
+ /* fix DST flags, it may be used there soon */
+ if (j1939_address_is_unicast(skcb->addr.da) &&
+ priv->ents[skcb->addr.da].nusers)
+ skcb->flags |= J1939_ECU_LOCAL_DST;
+
+ /* src is always local, I'm sending ... */
+ skcb->flags |= J1939_ECU_LOCAL_SRC;
+
+ /* prepare new session */
+ session = j1939_session_new(priv, skb, size);
+ if (!session)
+ return ERR_PTR(-ENOMEM);
+
+ /* skb is recounted in j1939_session_new() */
+ session->sk = skb->sk;
+ session->transmission = true;
+ session->pkt.total = (size + 6) / 7;
+ session->pkt.block = skcb->addr.type == J1939_ETP ? 255 :
+ min(j1939_tp_block ?: 255, session->pkt.total);
+
+ if (j1939_cb_is_broadcast(&session->skcb))
+ /* set the end-packet for broadcast */
+ session->pkt.last = session->pkt.total;
+
+ skcb->tskey = session->sk->sk_tskey++;
+ session->tskey = skcb->tskey;
+
+ return session;
+}
+
+static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ int extd = J1939_TP;
+ u8 cmd = skb->data[0];
+
+ switch (cmd) {
+ case J1939_ETP_CMD_RTS:
+ extd = J1939_ETP;
+ /* fall through */
+ case J1939_TP_CMD_BAM: /* fall through */
+ case J1939_TP_CMD_RTS: /* fall through */
+ if (skcb->addr.type != extd)
+ return;
+
+ if (cmd == J1939_TP_CMD_RTS && j1939_cb_is_broadcast(skcb)) {
+ netdev_alert(priv->ndev, "%s: rts without destination (%02x)\n",
+ __func__, skcb->addr.sa);
+ return;
+ }
+
+ if (j1939_tp_im_transmitter(skcb))
+ j1939_xtp_rx_rts(priv, skb, true);
+
+ if (j1939_tp_im_receiver(skcb))
+ j1939_xtp_rx_rts(priv, skb, false);
+
+ break;
+
+ case J1939_ETP_CMD_CTS:
+ extd = J1939_ETP;
+ /* fall through */
+ case J1939_TP_CMD_CTS:
+ if (skcb->addr.type != extd)
+ return;
+
+ if (j1939_tp_im_transmitter(skcb))
+ j1939_xtp_rx_cts(priv, skb, false);
+
+ if (j1939_tp_im_receiver(skcb))
+ j1939_xtp_rx_cts(priv, skb, true);
+
+ break;
+
+ case J1939_ETP_CMD_DPO:
+ if (skcb->addr.type != J1939_ETP)
+ return;
+
+ if (j1939_tp_im_transmitter(skcb))
+ j1939_xtp_rx_dpo(priv, skb, true);
+
+ if (j1939_tp_im_receiver(skcb))
+ j1939_xtp_rx_dpo(priv, skb, false);
+
+ break;
+
+ case J1939_ETP_CMD_EOMA:
+ extd = J1939_ETP;
+ /* fall through */
+ case J1939_TP_CMD_EOMA:
+ if (skcb->addr.type != extd)
+ return;
+
+ if (j1939_tp_im_transmitter(skcb))
+ j1939_xtp_rx_eoma(priv, skb, false);
+
+ if (j1939_tp_im_receiver(skcb))
+ j1939_xtp_rx_eoma(priv, skb, true);
+
+ break;
+
+ case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */
+ if (j1939_tp_im_transmitter(skcb))
+ j1939_xtp_rx_abort(priv, skb, true);
+
+ if (j1939_tp_im_receiver(skcb))
+ j1939_xtp_rx_abort(priv, skb, false);
+
+ break;
+ default:
+ return;
+ }
+}
+
+int j1939_tp_recv(struct j1939_priv *priv, struct sk_buff *skb)
+{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+
+ if (!j1939_tp_im_involved_anydir(skcb))
+ return 0;
+
+ switch (skcb->addr.pgn) {
+ case J1939_ETP_PGN_DAT:
+ skcb->addr.type = J1939_ETP;
+ /* fall through */
+ case J1939_TP_PGN_DAT:
+ j1939_xtp_rx_dat(priv, skb);
+ break;
+
+ case J1939_ETP_PGN_CTL:
+ skcb->addr.type = J1939_ETP;
+ /* fall through */
+ case J1939_TP_PGN_CTL:
+ if (skb->len < 8)
+ return 0; /* Don't care. Nothing to extract here */
+
+ j1939_tp_cmd_recv(priv, skb);
+ break;
+ default:
+ return 0; /* no problem */
+ }
+ return 1; /* "I processed the message" */
+}
+
+void j1939_simple_recv(struct j1939_priv *priv, struct sk_buff *skb)
+{
+ struct j1939_session *session;
+
+ if (!skb->sk)
+ return;
+
+ j1939_session_list_lock(priv);
+ session = j1939_session_get_simple(priv, skb);
+ j1939_session_list_unlock(priv);
+ if (!session) {
+ netdev_warn(priv->ndev,
+ "%s: Received already invalidated message\n",
+ __func__);
+ return;
+ }
+
+ j1939_session_timers_cancel(session);
+ j1939_session_deactivate(session);
+ j1939_session_put(session);
+}
+
+int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk)
+{
+ struct j1939_session *session, *saved;
+
+ netdev_dbg(priv->ndev, "%s, sk: %p\n", __func__, sk);
+ j1939_session_list_lock(priv);
+ list_for_each_entry_safe(session, saved,
+ &priv->active_session_list,
+ active_session_list_entry) {
+ if (!sk || sk == session->sk) {
+ j1939_session_timers_cancel(session);
+ session->err = ESHUTDOWN;
+ j1939_session_deactivate_locked(session);
+ }
+ }
+ j1939_session_list_unlock(priv);
+ return NOTIFY_DONE;
+}
+
+void j1939_tp_init(struct j1939_priv *priv)
+{
+ spin_lock_init(&priv->active_session_list_lock);
+ INIT_LIST_HEAD(&priv->active_session_list);
+ priv->tp_max_packet_size = J1939_MAX_ETP_PACKET_SIZE;
+}
diff --git a/net/can/proc.c b/net/can/proc.c
index edb822c31902..e6881bfc3ed1 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -45,6 +45,7 @@
#include <linux/list.h>
#include <linux/rcupdate.h>
#include <linux/if_arp.h>
+#include <linux/can/can-ml.h>
#include <linux/can/core.h>
#include "af_can.h"
@@ -78,21 +79,21 @@ static const char rx_list_name[][8] = {
static void can_init_stats(struct net *net)
{
- struct s_stats *can_stats = net->can.can_stats;
- struct s_pstats *can_pstats = net->can.can_pstats;
+ struct can_pkg_stats *pkg_stats = net->can.pkg_stats;
+ struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats;
/*
* This memset function is called from a timer context (when
* can_stattimer is active which is the default) OR in a process
* context (reading the proc_fs when can_stattimer is disabled).
*/
- memset(can_stats, 0, sizeof(struct s_stats));
- can_stats->jiffies_init = jiffies;
+ memset(pkg_stats, 0, sizeof(struct can_pkg_stats));
+ pkg_stats->jiffies_init = jiffies;
- can_pstats->stats_reset++;
+ rcv_lists_stats->stats_reset++;
if (user_reset) {
user_reset = 0;
- can_pstats->user_reset++;
+ rcv_lists_stats->user_reset++;
}
}
@@ -118,8 +119,8 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
void can_stat_update(struct timer_list *t)
{
- struct net *net = from_timer(net, t, can.can_stattimer);
- struct s_stats *can_stats = net->can.can_stats;
+ struct net *net = from_timer(net, t, can.stattimer);
+ struct can_pkg_stats *pkg_stats = net->can.pkg_stats;
unsigned long j = jiffies; /* snapshot */
/* restart counting in timer context on user request */
@@ -127,57 +128,57 @@ void can_stat_update(struct timer_list *t)
can_init_stats(net);
/* restart counting on jiffies overflow */
- if (j < can_stats->jiffies_init)
+ if (j < pkg_stats->jiffies_init)
can_init_stats(net);
/* prevent overflow in calc_rate() */
- if (can_stats->rx_frames > (ULONG_MAX / HZ))
+ if (pkg_stats->rx_frames > (ULONG_MAX / HZ))
can_init_stats(net);
/* prevent overflow in calc_rate() */
- if (can_stats->tx_frames > (ULONG_MAX / HZ))
+ if (pkg_stats->tx_frames > (ULONG_MAX / HZ))
can_init_stats(net);
/* matches overflow - very improbable */
- if (can_stats->matches > (ULONG_MAX / 100))
+ if (pkg_stats->matches > (ULONG_MAX / 100))
can_init_stats(net);
/* calc total values */
- if (can_stats->rx_frames)
- can_stats->total_rx_match_ratio = (can_stats->matches * 100) /
- can_stats->rx_frames;
+ if (pkg_stats->rx_frames)
+ pkg_stats->total_rx_match_ratio = (pkg_stats->matches * 100) /
+ pkg_stats->rx_frames;
- can_stats->total_tx_rate = calc_rate(can_stats->jiffies_init, j,
- can_stats->tx_frames);
- can_stats->total_rx_rate = calc_rate(can_stats->jiffies_init, j,
- can_stats->rx_frames);
+ pkg_stats->total_tx_rate = calc_rate(pkg_stats->jiffies_init, j,
+ pkg_stats->tx_frames);
+ pkg_stats->total_rx_rate = calc_rate(pkg_stats->jiffies_init, j,
+ pkg_stats->rx_frames);
/* calc current values */
- if (can_stats->rx_frames_delta)
- can_stats->current_rx_match_ratio =
- (can_stats->matches_delta * 100) /
- can_stats->rx_frames_delta;
+ if (pkg_stats->rx_frames_delta)
+ pkg_stats->current_rx_match_ratio =
+ (pkg_stats->matches_delta * 100) /
+ pkg_stats->rx_frames_delta;
- can_stats->current_tx_rate = calc_rate(0, HZ, can_stats->tx_frames_delta);
- can_stats->current_rx_rate = calc_rate(0, HZ, can_stats->rx_frames_delta);
+ pkg_stats->current_tx_rate = calc_rate(0, HZ, pkg_stats->tx_frames_delta);
+ pkg_stats->current_rx_rate = calc_rate(0, HZ, pkg_stats->rx_frames_delta);
/* check / update maximum values */
- if (can_stats->max_tx_rate < can_stats->current_tx_rate)
- can_stats->max_tx_rate = can_stats->current_tx_rate;
+ if (pkg_stats->max_tx_rate < pkg_stats->current_tx_rate)
+ pkg_stats->max_tx_rate = pkg_stats->current_tx_rate;
- if (can_stats->max_rx_rate < can_stats->current_rx_rate)
- can_stats->max_rx_rate = can_stats->current_rx_rate;
+ if (pkg_stats->max_rx_rate < pkg_stats->current_rx_rate)
+ pkg_stats->max_rx_rate = pkg_stats->current_rx_rate;
- if (can_stats->max_rx_match_ratio < can_stats->current_rx_match_ratio)
- can_stats->max_rx_match_ratio = can_stats->current_rx_match_ratio;
+ if (pkg_stats->max_rx_match_ratio < pkg_stats->current_rx_match_ratio)
+ pkg_stats->max_rx_match_ratio = pkg_stats->current_rx_match_ratio;
/* clear values for 'current rate' calculation */
- can_stats->tx_frames_delta = 0;
- can_stats->rx_frames_delta = 0;
- can_stats->matches_delta = 0;
+ pkg_stats->tx_frames_delta = 0;
+ pkg_stats->rx_frames_delta = 0;
+ pkg_stats->matches_delta = 0;
/* restart timer (one second) */
- mod_timer(&net->can.can_stattimer, round_jiffies(jiffies + HZ));
+ mod_timer(&net->can.stattimer, round_jiffies(jiffies + HZ));
}
/*
@@ -212,60 +213,60 @@ static void can_print_recv_banner(struct seq_file *m)
static int can_stats_proc_show(struct seq_file *m, void *v)
{
struct net *net = m->private;
- struct s_stats *can_stats = net->can.can_stats;
- struct s_pstats *can_pstats = net->can.can_pstats;
+ struct can_pkg_stats *pkg_stats = net->can.pkg_stats;
+ struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats;
seq_putc(m, '\n');
- seq_printf(m, " %8ld transmitted frames (TXF)\n", can_stats->tx_frames);
- seq_printf(m, " %8ld received frames (RXF)\n", can_stats->rx_frames);
- seq_printf(m, " %8ld matched frames (RXMF)\n", can_stats->matches);
+ seq_printf(m, " %8ld transmitted frames (TXF)\n", pkg_stats->tx_frames);
+ seq_printf(m, " %8ld received frames (RXF)\n", pkg_stats->rx_frames);
+ seq_printf(m, " %8ld matched frames (RXMF)\n", pkg_stats->matches);
seq_putc(m, '\n');
- if (net->can.can_stattimer.function == can_stat_update) {
+ if (net->can.stattimer.function == can_stat_update) {
seq_printf(m, " %8ld %% total match ratio (RXMR)\n",
- can_stats->total_rx_match_ratio);
+ pkg_stats->total_rx_match_ratio);
seq_printf(m, " %8ld frames/s total tx rate (TXR)\n",
- can_stats->total_tx_rate);
+ pkg_stats->total_tx_rate);
seq_printf(m, " %8ld frames/s total rx rate (RXR)\n",
- can_stats->total_rx_rate);
+ pkg_stats->total_rx_rate);
seq_putc(m, '\n');
seq_printf(m, " %8ld %% current match ratio (CRXMR)\n",
- can_stats->current_rx_match_ratio);
+ pkg_stats->current_rx_match_ratio);
seq_printf(m, " %8ld frames/s current tx rate (CTXR)\n",
- can_stats->current_tx_rate);
+ pkg_stats->current_tx_rate);
seq_printf(m, " %8ld frames/s current rx rate (CRXR)\n",
- can_stats->current_rx_rate);
+ pkg_stats->current_rx_rate);
seq_putc(m, '\n');
seq_printf(m, " %8ld %% max match ratio (MRXMR)\n",
- can_stats->max_rx_match_ratio);
+ pkg_stats->max_rx_match_ratio);
seq_printf(m, " %8ld frames/s max tx rate (MTXR)\n",
- can_stats->max_tx_rate);
+ pkg_stats->max_tx_rate);
seq_printf(m, " %8ld frames/s max rx rate (MRXR)\n",
- can_stats->max_rx_rate);
+ pkg_stats->max_rx_rate);
seq_putc(m, '\n');
}
seq_printf(m, " %8ld current receive list entries (CRCV)\n",
- can_pstats->rcv_entries);
+ rcv_lists_stats->rcv_entries);
seq_printf(m, " %8ld maximum receive list entries (MRCV)\n",
- can_pstats->rcv_entries_max);
+ rcv_lists_stats->rcv_entries_max);
- if (can_pstats->stats_reset)
+ if (rcv_lists_stats->stats_reset)
seq_printf(m, "\n %8ld statistic resets (STR)\n",
- can_pstats->stats_reset);
+ rcv_lists_stats->stats_reset);
- if (can_pstats->user_reset)
+ if (rcv_lists_stats->user_reset)
seq_printf(m, " %8ld user statistic resets (USTR)\n",
- can_pstats->user_reset);
+ rcv_lists_stats->user_reset);
seq_putc(m, '\n');
return 0;
@@ -274,20 +275,20 @@ static int can_stats_proc_show(struct seq_file *m, void *v)
static int can_reset_stats_proc_show(struct seq_file *m, void *v)
{
struct net *net = m->private;
- struct s_pstats *can_pstats = net->can.can_pstats;
- struct s_stats *can_stats = net->can.can_stats;
+ struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats;
+ struct can_pkg_stats *pkg_stats = net->can.pkg_stats;
user_reset = 1;
- if (net->can.can_stattimer.function == can_stat_update) {
+ if (net->can.stattimer.function == can_stat_update) {
seq_printf(m, "Scheduled statistic reset #%ld.\n",
- can_pstats->stats_reset + 1);
+ rcv_lists_stats->stats_reset + 1);
} else {
- if (can_stats->jiffies_init != jiffies)
+ if (pkg_stats->jiffies_init != jiffies)
can_init_stats(net);
seq_printf(m, "Performed statistic reset #%ld.\n",
- can_pstats->stats_reset);
+ rcv_lists_stats->stats_reset);
}
return 0;
}
@@ -300,11 +301,11 @@ static int can_version_proc_show(struct seq_file *m, void *v)
static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx,
struct net_device *dev,
- struct can_dev_rcv_lists *d)
+ struct can_dev_rcv_lists *dev_rcv_lists)
{
- if (!hlist_empty(&d->rx[idx])) {
+ if (!hlist_empty(&dev_rcv_lists->rx[idx])) {
can_print_recv_banner(m);
- can_print_rcvlist(m, &d->rx[idx], dev);
+ can_print_rcvlist(m, &dev_rcv_lists->rx[idx], dev);
} else
seq_printf(m, " (%s: no entry)\n", DNAME(dev));
@@ -315,7 +316,7 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v)
/* double cast to prevent GCC warning */
int idx = (int)(long)PDE_DATA(m->file->f_inode);
struct net_device *dev;
- struct can_dev_rcv_lists *d;
+ struct can_dev_rcv_lists *dev_rcv_lists;
struct net *net = m->private;
seq_printf(m, "\nreceive list '%s':\n", rx_list_name[idx]);
@@ -323,8 +324,8 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v)
rcu_read_lock();
/* receive list for 'all' CAN devices (dev == NULL) */
- d = net->can.can_rx_alldev_list;
- can_rcvlist_proc_show_one(m, idx, NULL, d);
+ dev_rcv_lists = net->can.rx_alldev_list;
+ can_rcvlist_proc_show_one(m, idx, NULL, dev_rcv_lists);
/* receive list for registered CAN devices */
for_each_netdev_rcu(net, dev) {
@@ -366,7 +367,7 @@ static inline void can_rcvlist_proc_show_array(struct seq_file *m,
static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
{
struct net_device *dev;
- struct can_dev_rcv_lists *d;
+ struct can_dev_rcv_lists *dev_rcv_lists;
struct net *net = m->private;
/* RX_SFF */
@@ -375,15 +376,16 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
rcu_read_lock();
/* sff receive list for 'all' CAN devices (dev == NULL) */
- d = net->can.can_rx_alldev_list;
- can_rcvlist_proc_show_array(m, NULL, d->rx_sff, ARRAY_SIZE(d->rx_sff));
+ dev_rcv_lists = net->can.rx_alldev_list;
+ can_rcvlist_proc_show_array(m, NULL, dev_rcv_lists->rx_sff,
+ ARRAY_SIZE(dev_rcv_lists->rx_sff));
/* sff receive list for registered CAN devices */
for_each_netdev_rcu(net, dev) {
if (dev->type == ARPHRD_CAN && dev->ml_priv) {
- d = dev->ml_priv;
- can_rcvlist_proc_show_array(m, dev, d->rx_sff,
- ARRAY_SIZE(d->rx_sff));
+ dev_rcv_lists = dev->ml_priv;
+ can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_sff,
+ ARRAY_SIZE(dev_rcv_lists->rx_sff));
}
}
@@ -396,7 +398,7 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
{
struct net_device *dev;
- struct can_dev_rcv_lists *d;
+ struct can_dev_rcv_lists *dev_rcv_lists;
struct net *net = m->private;
/* RX_EFF */
@@ -405,15 +407,16 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
rcu_read_lock();
/* eff receive list for 'all' CAN devices (dev == NULL) */
- d = net->can.can_rx_alldev_list;
- can_rcvlist_proc_show_array(m, NULL, d->rx_eff, ARRAY_SIZE(d->rx_eff));
+ dev_rcv_lists = net->can.rx_alldev_list;
+ can_rcvlist_proc_show_array(m, NULL, dev_rcv_lists->rx_eff,
+ ARRAY_SIZE(dev_rcv_lists->rx_eff));
/* eff receive list for registered CAN devices */
for_each_netdev_rcu(net, dev) {
if (dev->type == ARPHRD_CAN && dev->ml_priv) {
- d = dev->ml_priv;
- can_rcvlist_proc_show_array(m, dev, d->rx_eff,
- ARRAY_SIZE(d->rx_eff));
+ dev_rcv_lists = dev->ml_priv;
+ can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_eff,
+ ARRAY_SIZE(dev_rcv_lists->rx_eff));
}
}
diff --git a/net/can/raw.c b/net/can/raw.c
index fdbc36140e9b..59c039d73c6d 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -396,7 +396,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
int err = 0;
int notify_enetdown = 0;
- if (len < sizeof(*addr))
+ if (len < CAN_REQUIRED_SIZE(*addr, can_ifindex))
return -EINVAL;
if (addr->can_family != AF_CAN)
return -EINVAL;
@@ -733,7 +733,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (msg->msg_name) {
DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name);
- if (msg->msg_namelen < sizeof(*addr))
+ if (msg->msg_namelen < CAN_REQUIRED_SIZE(*addr, can_ifindex))
return -EINVAL;
if (addr->can_family != AF_CAN)
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 94c7f77ecb6b..da5639a5bd3b 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -12,6 +12,9 @@
static atomic_t cache_idx;
+#define SK_STORAGE_CREATE_FLAG_MASK \
+ (BPF_F_NO_PREALLOC | BPF_F_CLONE)
+
struct bucket {
struct hlist_head list;
raw_spinlock_t lock;
@@ -209,7 +212,6 @@ static void selem_unlink_sk(struct bpf_sk_storage_elem *selem)
kfree_rcu(sk_storage, rcu);
}
-/* sk_storage->lock must be held and sk_storage->list cannot be empty */
static void __selem_link_sk(struct bpf_sk_storage *sk_storage,
struct bpf_sk_storage_elem *selem)
{
@@ -509,7 +511,7 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
return 0;
}
-/* Called by __sk_destruct() */
+/* Called by __sk_destruct() & bpf_sk_storage_clone() */
void bpf_sk_storage_free(struct sock *sk)
{
struct bpf_sk_storage_elem *selem;
@@ -557,6 +559,11 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
smap = (struct bpf_sk_storage_map *)map;
+ /* Note that this map might be concurrently cloned from
+ * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
+ * RCU read section to finish before proceeding. New RCU
+ * read sections should be prevented via bpf_map_inc_not_zero.
+ */
synchronize_rcu();
/* bpf prog and the userspace can no longer access this map
@@ -601,7 +608,9 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
{
- if (attr->map_flags != BPF_F_NO_PREALLOC || attr->max_entries ||
+ if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK ||
+ !(attr->map_flags & BPF_F_NO_PREALLOC) ||
+ attr->max_entries ||
attr->key_size != sizeof(int) || !attr->value_size ||
/* Enforce BTF for userspace sk dumping */
!attr->btf_key_type_id || !attr->btf_value_type_id)
@@ -739,6 +748,95 @@ static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
return err;
}
+static struct bpf_sk_storage_elem *
+bpf_sk_storage_clone_elem(struct sock *newsk,
+ struct bpf_sk_storage_map *smap,
+ struct bpf_sk_storage_elem *selem)
+{
+ struct bpf_sk_storage_elem *copy_selem;
+
+ copy_selem = selem_alloc(smap, newsk, NULL, true);
+ if (!copy_selem)
+ return NULL;
+
+ if (map_value_has_spin_lock(&smap->map))
+ copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
+ SDATA(selem)->data, true);
+ else
+ copy_map_value(&smap->map, SDATA(copy_selem)->data,
+ SDATA(selem)->data);
+
+ return copy_selem;
+}
+
+int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
+{
+ struct bpf_sk_storage *new_sk_storage = NULL;
+ struct bpf_sk_storage *sk_storage;
+ struct bpf_sk_storage_elem *selem;
+ int ret = 0;
+
+ RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
+
+ rcu_read_lock();
+ sk_storage = rcu_dereference(sk->sk_bpf_storage);
+
+ if (!sk_storage || hlist_empty(&sk_storage->list))
+ goto out;
+
+ hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
+ struct bpf_sk_storage_elem *copy_selem;
+ struct bpf_sk_storage_map *smap;
+ struct bpf_map *map;
+
+ smap = rcu_dereference(SDATA(selem)->smap);
+ if (!(smap->map.map_flags & BPF_F_CLONE))
+ continue;
+
+ /* Note that for lockless listeners adding new element
+ * here can race with cleanup in bpf_sk_storage_map_free.
+ * Try to grab map refcnt to make sure that it's still
+ * alive and prevent concurrent removal.
+ */
+ map = bpf_map_inc_not_zero(&smap->map, false);
+ if (IS_ERR(map))
+ continue;
+
+ copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem);
+ if (!copy_selem) {
+ ret = -ENOMEM;
+ bpf_map_put(map);
+ goto out;
+ }
+
+ if (new_sk_storage) {
+ selem_link_map(smap, copy_selem);
+ __selem_link_sk(new_sk_storage, copy_selem);
+ } else {
+ ret = sk_storage_alloc(newsk, smap, copy_selem);
+ if (ret) {
+ kfree(copy_selem);
+ atomic_sub(smap->elem_size,
+ &newsk->sk_omem_alloc);
+ bpf_map_put(map);
+ goto out;
+ }
+
+ new_sk_storage = rcu_dereference(copy_selem->sk_storage);
+ }
+ bpf_map_put(map);
+ }
+
+out:
+ rcu_read_unlock();
+
+ /* In case of an error, don't free anything explicitly here, the
+ * caller is responsible to call bpf_sk_storage_free.
+ */
+
+ return ret;
+}
+
BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
void *, value, u64, flags)
{
diff --git a/net/core/dev.c b/net/core/dev.c
index 49589ed2018d..b1afafee3e2a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8126,12 +8126,15 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
bpf_chk = generic_xdp_install;
if (fd >= 0) {
+ u32 prog_id;
+
if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) {
NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time");
return -EEXIST;
}
- if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
- __dev_xdp_query(dev, bpf_op, query)) {
+
+ prog_id = __dev_xdp_query(dev, bpf_op, query);
+ if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && prog_id) {
NL_SET_ERR_MSG(extack, "XDP program already attached");
return -EBUSY;
}
@@ -8146,6 +8149,14 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
bpf_prog_put(prog);
return -EINVAL;
}
+
+ if (prog->aux->id == prog_id) {
+ bpf_prog_put(prog);
+ return 0;
+ }
+ } else {
+ if (!__dev_xdp_query(dev, bpf_op, query))
+ return 0;
}
err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
diff --git a/net/core/filter.c b/net/core/filter.c
index b91988f8b94e..ed6563622ce3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5903,7 +5903,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
default:
return -EPROTONOSUPPORT;
}
- if (mss <= 0)
+ if (mss == 0)
return -ENOENT;
return cookie | ((u64)mss << 32);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ea8e8d332d85..2b40b5a9425b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4087,6 +4087,9 @@ static const u8 skb_ext_type_len[] = {
#ifdef CONFIG_XFRM
[SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
#endif
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
+#endif
};
static __always_inline unsigned int skb_ext_total_length(void)
@@ -4098,6 +4101,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
#ifdef CONFIG_XFRM
skb_ext_type_len[SKB_EXT_SEC_PATH] +
#endif
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ skb_ext_type_len[TC_SKB_EXT] +
+#endif
0;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 545fac19a711..07863edbe6fc 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1851,9 +1851,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
goto out;
}
RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
-#ifdef CONFIG_BPF_SYSCALL
- RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
-#endif
+
+ if (bpf_sk_storage_clone(sk, newsk)) {
+ sk_free_unlock_clone(newsk);
+ newsk = NULL;
+ goto out;
+ }
newsk->sk_err = 0;
newsk->sk_err_soft = 0;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 65122bbccd27..dde9d762edee 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1545,10 +1545,34 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *in
dp->user_features = 0;
}
-static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
+DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
+
+static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
{
- if (a[OVS_DP_ATTR_USER_FEATURES])
- dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
+ u32 user_features = 0;
+
+ if (a[OVS_DP_ATTR_USER_FEATURES]) {
+ user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
+
+ if (user_features & ~(OVS_DP_F_VPORT_PIDS |
+ OVS_DP_F_UNALIGNED |
+ OVS_DP_F_TC_RECIRC_SHARING))
+ return -EOPNOTSUPP;
+
+#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
+ return -EOPNOTSUPP;
+#endif
+ }
+
+ dp->user_features = user_features;
+
+ if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
+ static_branch_enable(&tc_recirc_sharing_support);
+ else
+ static_branch_disable(&tc_recirc_sharing_support);
+
+ return 0;
}
static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
@@ -1610,7 +1634,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
parms.port_no = OVSP_LOCAL;
parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
- ovs_dp_change(dp, a);
+ err = ovs_dp_change(dp, a);
+ if (err)
+ goto err_destroy_meters;
/* So far only local changes have been made, now need the lock. */
ovs_lock();
@@ -1736,7 +1762,9 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
if (IS_ERR(dp))
goto err_unlock_free;
- ovs_dp_change(dp, info->attrs);
+ err = ovs_dp_change(dp, info->attrs);
+ if (err)
+ goto err_unlock_free;
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
info->snd_seq, 0, OVS_DP_CMD_SET);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 751d34accdf9..81e85dde8217 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -218,6 +218,8 @@ static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
extern struct notifier_block ovs_dp_device_notifier;
extern struct genl_family dp_vport_genl_family;
+DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
+
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
void ovs_dp_detach_port(struct vport *);
int ovs_dp_upcall(struct datapath *, struct sk_buff *,
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 9d81d2c7bf82..38147e6a20f5 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -842,6 +842,9 @@ static int key_extract_mac_proto(struct sk_buff *skb)
int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
struct sk_buff *skb, struct sw_flow_key *key)
{
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ struct tc_skb_ext *tc_ext;
+#endif
int res, err;
/* Extract metadata from packet. */
@@ -874,7 +877,17 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
if (res < 0)
return res;
key->mac_proto = res;
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ if (static_branch_unlikely(&tc_recirc_sharing_support)) {
+ tc_ext = skb_ext_find(skb, TC_SKB_EXT);
+ key->recirc_id = tc_ext ? tc_ext->chain : 0;
+ } else {
+ key->recirc_id = 0;
+ }
+#else
key->recirc_id = 0;
+#endif
err = key_extract(skb, key);
if (!err)
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 2977137c28eb..1a5bf3fa4578 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -559,7 +559,7 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
ret = -EDESTADDRREQ;
break;
}
- if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) ||
+ if (ipv4_is_multicast(sin->sin_addr.s_addr) ||
sin->sin_addr.s_addr == htonl(INADDR_BROADCAST)) {
ret = -EINVAL;
break;
@@ -593,7 +593,7 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
addr4 = sin6->sin6_addr.s6_addr32[3];
if (addr4 == htonl(INADDR_ANY) ||
addr4 == htonl(INADDR_BROADCAST) ||
- IN_MULTICAST(ntohl(addr4))) {
+ ipv4_is_multicast(addr4)) {
ret = -EPROTOTYPE;
break;
}
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 0f4398e7f2a7..6dbb763bc1fd 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -181,7 +181,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (addr_len < sizeof(struct sockaddr_in) ||
sin->sin_addr.s_addr == htonl(INADDR_ANY) ||
sin->sin_addr.s_addr == htonl(INADDR_BROADCAST) ||
- IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+ ipv4_is_multicast(sin->sin_addr.s_addr))
return -EINVAL;
ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr);
binding_addr = &v6addr;
@@ -206,7 +206,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
addr4 = sin6->sin6_addr.s6_addr32[3];
if (addr4 == htonl(INADDR_ANY) ||
addr4 == htonl(INADDR_BROADCAST) ||
- IN_MULTICAST(ntohl(addr4)))
+ ipv4_is_multicast(addr4))
return -EINVAL;
}
/* The scope ID must be specified for link local address. */
diff --git a/net/rds/send.c b/net/rds/send.c
index 9ce552abf9e9..82dcd8b84fe7 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1144,7 +1144,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
case AF_INET:
if (usin->sin_addr.s_addr == htonl(INADDR_ANY) ||
usin->sin_addr.s_addr == htonl(INADDR_BROADCAST) ||
- IN_MULTICAST(ntohl(usin->sin_addr.s_addr))) {
+ ipv4_is_multicast(usin->sin_addr.s_addr)) {
ret = -EINVAL;
goto out;
}
@@ -1175,7 +1175,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
addr4 = sin6->sin6_addr.s6_addr32[3];
if (addr4 == htonl(INADDR_ANY) ||
addr4 == htonl(INADDR_BROADCAST) ||
- IN_MULTICAST(ntohl(addr4))) {
+ ipv4_is_multicast(addr4)) {
ret = -EINVAL;
goto out;
}
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index afd2ba157a13..b3faafeafab9 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -963,6 +963,19 @@ config NET_IFE_SKBTCINDEX
tristate "Support to encoding decoding skb tcindex on IFE action"
depends on NET_ACT_IFE
+config NET_TC_SKB_EXT
+ bool "TC recirculation support"
+ depends on NET_CLS_ACT
+ default y if NET_CLS_ACT
+ select SKB_EXTENSIONS
+
+ help
+ Say Y here to allow tc chain misses to continue in OvS datapath in
+ the correct recirc_id, and hardware chain misses to continue in
+ the correct chain in tc software datapath.
+
+ Say N here if you won't be using tc<->ovs offload or tc chains offload.
+
endif # NET_SCHED
config NET_SCH_FIFO
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 6315e0f8d26e..89c04c52af3d 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -40,6 +40,8 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
[TCA_POLICE_PEAKRATE] = { .len = TC_RTAB_SIZE },
[TCA_POLICE_AVRATE] = { .type = NLA_U32 },
[TCA_POLICE_RESULT] = { .type = NLA_U32 },
+ [TCA_POLICE_RATE64] = { .type = NLA_U64 },
+ [TCA_POLICE_PEAKRATE64] = { .type = NLA_U64 },
};
static int tcf_police_init(struct net *net, struct nlattr *nla,
@@ -58,6 +60,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
struct tcf_police_params *new;
bool exists = false;
u32 index;
+ u64 rate64, prate64;
if (nla == NULL)
return -EINVAL;
@@ -155,14 +158,18 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
}
if (R_tab) {
new->rate_present = true;
- psched_ratecfg_precompute(&new->rate, &R_tab->rate, 0);
+ rate64 = tb[TCA_POLICE_RATE64] ?
+ nla_get_u64(tb[TCA_POLICE_RATE64]) : 0;
+ psched_ratecfg_precompute(&new->rate, &R_tab->rate, rate64);
qdisc_put_rtab(R_tab);
} else {
new->rate_present = false;
}
if (P_tab) {
new->peak_present = true;
- psched_ratecfg_precompute(&new->peak, &P_tab->rate, 0);
+ prate64 = tb[TCA_POLICE_PEAKRATE64] ?
+ nla_get_u64(tb[TCA_POLICE_PEAKRATE64]) : 0;
+ psched_ratecfg_precompute(&new->peak, &P_tab->rate, prate64);
qdisc_put_rtab(P_tab);
} else {
new->peak_present = false;
@@ -313,10 +320,22 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
lockdep_is_held(&police->tcf_lock));
opt.mtu = p->tcfp_mtu;
opt.burst = PSCHED_NS2TICKS(p->tcfp_burst);
- if (p->rate_present)
+ if (p->rate_present) {
psched_ratecfg_getrate(&opt.rate, &p->rate);
- if (p->peak_present)
+ if ((police->params->rate.rate_bytes_ps >= (1ULL << 32)) &&
+ nla_put_u64_64bit(skb, TCA_POLICE_RATE64,
+ police->params->rate.rate_bytes_ps,
+ TCA_POLICE_PAD))
+ goto nla_put_failure;
+ }
+ if (p->peak_present) {
psched_ratecfg_getrate(&opt.peakrate, &p->peak);
+ if ((police->params->peak.rate_bytes_ps >= (1ULL << 32)) &&
+ nla_put_u64_64bit(skb, TCA_POLICE_PEAKRATE64,
+ police->params->peak.rate_bytes_ps,
+ TCA_POLICE_PAD))
+ goto nla_put_failure;
+ }
if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt))
goto nla_put_failure;
if (p->tcfp_result &&
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 671ca905dbb5..05c4fe1c3ca2 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1514,6 +1514,18 @@ reclassify:
goto reset;
} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
first_tp = res->goto_tp;
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ {
+ struct tc_skb_ext *ext;
+
+ ext = skb_ext_add(skb, TC_SKB_EXT);
+ if (WARN_ON_ONCE(!ext))
+ return TC_ACT_SHOT;
+
+ ext->chain = err & TC_ACT_EXT_VAL_MASK;
+ }
+#endif
goto reset;
}
#endif
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index 810645b5c086..93b58fde99b7 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -299,7 +299,7 @@ static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q)
{
struct ethtool_link_ksettings ecmd;
int speed = SPEED_10;
- int port_rate = -1;
+ int port_rate;
int err;
err = __ethtool_get_link_ksettings(dev, &ecmd);
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 550fdf18d3b3..3b7f721c023b 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -228,14 +228,11 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
ret = 0;
err_free_raw:
- memset(rawkey, 0, keybytes);
- kfree(rawkey);
+ kzfree(rawkey);
err_free_out:
- memset(outblockdata, 0, blocksize);
- kfree(outblockdata);
+ kzfree(outblockdata);
err_free_in:
- memset(inblockdata, 0, blocksize);
- kfree(inblockdata);
+ kzfree(inblockdata);
err_free_cipher:
crypto_free_sync_skcipher(cipher);
err_return:
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index e188139f0464..41c106e45f01 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -159,12 +159,8 @@ static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq)
spin_lock_irqsave(&ctx->lock, flags);
info = ctx->retransmit_hint;
- if (info && !before(acked_seq, info->end_seq)) {
+ if (info && !before(acked_seq, info->end_seq))
ctx->retransmit_hint = NULL;
- list_del(&info->list);
- destroy_record(info);
- deleted_records++;
- }
list_for_each_entry_safe(info, temp, &ctx->records_list, list) {
if (before(acked_seq, info->end_seq))
@@ -838,22 +834,18 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
struct net_device *netdev;
char *iv, *rec_seq;
struct sk_buff *skb;
- int rc = -EINVAL;
__be64 rcd_sn;
+ int rc;
if (!ctx)
- goto out;
+ return -EINVAL;
- if (ctx->priv_ctx_tx) {
- rc = -EEXIST;
- goto out;
- }
+ if (ctx->priv_ctx_tx)
+ return -EEXIST;
start_marker_record = kmalloc(sizeof(*start_marker_record), GFP_KERNEL);
- if (!start_marker_record) {
- rc = -ENOMEM;
- goto out;
- }
+ if (!start_marker_record)
+ return -ENOMEM;
offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL);
if (!offload_ctx) {
@@ -939,17 +931,11 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
if (skb)
TCP_SKB_CB(skb)->eor = 1;
- /* We support starting offload on multiple sockets
- * concurrently, so we only need a read lock here.
- * This lock must precede get_netdev_for_sock to prevent races between
- * NETDEV_DOWN and setsockopt.
- */
- down_read(&device_offload_lock);
netdev = get_netdev_for_sock(sk);
if (!netdev) {
pr_err_ratelimited("%s: netdev not found\n", __func__);
rc = -EINVAL;
- goto release_lock;
+ goto disable_cad;
}
if (!(netdev->features & NETIF_F_HW_TLS_TX)) {
@@ -960,10 +946,15 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
/* Avoid offloading if the device is down
* We don't want to offload new flows after
* the NETDEV_DOWN event
+ *
+ * device_offload_lock is taken in tls_devices's NETDEV_DOWN
+ * handler thus protecting from the device going down before
+ * ctx was added to tls_device_list.
*/
+ down_read(&device_offload_lock);
if (!(netdev->flags & IFF_UP)) {
rc = -EINVAL;
- goto release_netdev;
+ goto release_lock;
}
ctx->priv_ctx_tx = offload_ctx;
@@ -971,9 +962,10 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
&ctx->crypto_send.info,
tcp_sk(sk)->write_seq);
if (rc)
- goto release_netdev;
+ goto release_lock;
tls_device_attach(ctx, sk, netdev);
+ up_read(&device_offload_lock);
/* following this assignment tls_is_sk_tx_device_offloaded
* will return true and the context might be accessed
@@ -981,13 +973,14 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
*/
smp_store_release(&sk->sk_validate_xmit_skb, tls_validate_xmit_skb);
dev_put(netdev);
- up_read(&device_offload_lock);
- goto out;
-release_netdev:
- dev_put(netdev);
+ return 0;
+
release_lock:
up_read(&device_offload_lock);
+release_netdev:
+ dev_put(netdev);
+disable_cad:
clean_acked_data_disable(inet_csk(sk));
crypto_free_aead(offload_ctx->aead_send);
free_rec_seq:
@@ -999,7 +992,6 @@ free_offload_ctx:
ctx->priv_ctx_tx = NULL;
free_marker_record:
kfree(start_marker_record);
-out:
return rc;
}
@@ -1012,17 +1004,10 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
if (ctx->crypto_recv.info.version != TLS_1_2_VERSION)
return -EOPNOTSUPP;
- /* We support starting offload on multiple sockets
- * concurrently, so we only need a read lock here.
- * This lock must precede get_netdev_for_sock to prevent races between
- * NETDEV_DOWN and setsockopt.
- */
- down_read(&device_offload_lock);
netdev = get_netdev_for_sock(sk);
if (!netdev) {
pr_err_ratelimited("%s: netdev not found\n", __func__);
- rc = -EINVAL;
- goto release_lock;
+ return -EINVAL;
}
if (!(netdev->features & NETIF_F_HW_TLS_RX)) {
@@ -1033,16 +1018,21 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
/* Avoid offloading if the device is down
* We don't want to offload new flows after
* the NETDEV_DOWN event
+ *
+ * device_offload_lock is taken in tls_devices's NETDEV_DOWN
+ * handler thus protecting from the device going down before
+ * ctx was added to tls_device_list.
*/
+ down_read(&device_offload_lock);
if (!(netdev->flags & IFF_UP)) {
rc = -EINVAL;
- goto release_netdev;
+ goto release_lock;
}
context = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_RX, GFP_KERNEL);
if (!context) {
rc = -ENOMEM;
- goto release_netdev;
+ goto release_lock;
}
context->resync_nh_reset = 1;
@@ -1058,7 +1048,11 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
goto free_sw_resources;
tls_device_attach(ctx, sk, netdev);
- goto release_netdev;
+ up_read(&device_offload_lock);
+
+ dev_put(netdev);
+
+ return 0;
free_sw_resources:
up_read(&device_offload_lock);
@@ -1066,10 +1060,10 @@ free_sw_resources:
down_read(&device_offload_lock);
release_ctx:
ctx->priv_ctx_rx = NULL;
-release_netdev:
- dev_put(netdev);
release_lock:
up_read(&device_offload_lock);
+release_netdev:
+ dev_put(netdev);
return rc;
}
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 277f7c209fed..ac88877dcade 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -286,19 +286,14 @@ static void tls_sk_proto_cleanup(struct sock *sk,
kfree(ctx->tx.rec_seq);
kfree(ctx->tx.iv);
tls_sw_release_resources_tx(sk);
-#ifdef CONFIG_TLS_DEVICE
} else if (ctx->tx_conf == TLS_HW) {
tls_device_free_resources_tx(sk);
-#endif
}
if (ctx->rx_conf == TLS_SW)
tls_sw_release_resources_rx(sk);
-
-#ifdef CONFIG_TLS_DEVICE
- if (ctx->rx_conf == TLS_HW)
+ else if (ctx->rx_conf == TLS_HW)
tls_device_offload_cleanup_rx(sk);
-#endif
}
static void tls_sk_proto_close(struct sock *sk, long timeout)
@@ -331,7 +326,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
tls_sw_strparser_done(ctx);
if (ctx->rx_conf == TLS_SW)
tls_sw_free_ctx_rx(ctx);
- ctx->sk_proto_close(sk, timeout);
+ ctx->sk_proto->close(sk, timeout);
if (free_ctx)
tls_ctx_free(sk, ctx);
@@ -451,7 +446,8 @@ static int tls_getsockopt(struct sock *sk, int level, int optname,
struct tls_context *ctx = tls_get_ctx(sk);
if (level != SOL_TLS)
- return ctx->getsockopt(sk, level, optname, optval, optlen);
+ return ctx->sk_proto->getsockopt(sk, level,
+ optname, optval, optlen);
return do_tls_getsockopt(sk, optname, optval, optlen);
}
@@ -536,26 +532,18 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
}
if (tx) {
-#ifdef CONFIG_TLS_DEVICE
rc = tls_set_device_offload(sk, ctx);
conf = TLS_HW;
if (rc) {
-#else
- {
-#endif
rc = tls_set_sw_offload(sk, ctx, 1);
if (rc)
goto err_crypto_info;
conf = TLS_SW;
}
} else {
-#ifdef CONFIG_TLS_DEVICE
rc = tls_set_device_offload_rx(sk, ctx);
conf = TLS_HW;
if (rc) {
-#else
- {
-#endif
rc = tls_set_sw_offload(sk, ctx, 0);
if (rc)
goto err_crypto_info;
@@ -609,7 +597,8 @@ static int tls_setsockopt(struct sock *sk, int level, int optname,
struct tls_context *ctx = tls_get_ctx(sk);
if (level != SOL_TLS)
- return ctx->setsockopt(sk, level, optname, optval, optlen);
+ return ctx->sk_proto->setsockopt(sk, level, optname, optval,
+ optlen);
return do_tls_setsockopt(sk, optname, optval, optlen);
}
@@ -624,10 +613,7 @@ static struct tls_context *create_ctx(struct sock *sk)
return NULL;
rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
- ctx->setsockopt = sk->sk_prot->setsockopt;
- ctx->getsockopt = sk->sk_prot->getsockopt;
- ctx->sk_proto_close = sk->sk_prot->close;
- ctx->unhash = sk->sk_prot->unhash;
+ ctx->sk_proto = sk->sk_prot;
return ctx;
}
@@ -683,9 +669,6 @@ static int tls_hw_prot(struct sock *sk)
spin_unlock_bh(&device_spinlock);
tls_build_proto(sk);
- ctx->hash = sk->sk_prot->hash;
- ctx->unhash = sk->sk_prot->unhash;
- ctx->sk_proto_close = sk->sk_prot->close;
ctx->sk_destruct = sk->sk_destruct;
sk->sk_destruct = tls_hw_sk_destruct;
ctx->rx_conf = TLS_HW_RECORD;
@@ -717,7 +700,7 @@ static void tls_hw_unhash(struct sock *sk)
}
}
spin_unlock_bh(&device_spinlock);
- ctx->unhash(sk);
+ ctx->sk_proto->unhash(sk);
}
static int tls_hw_hash(struct sock *sk)
@@ -726,7 +709,7 @@ static int tls_hw_hash(struct sock *sk)
struct tls_device *dev;
int err;
- err = ctx->hash(sk);
+ err = ctx->sk_proto->hash(sk);
spin_lock_bh(&device_spinlock);
list_for_each_entry(dev, &device_list, dev_list) {
if (dev->hash) {
@@ -816,7 +799,6 @@ static int tls_init(struct sock *sk)
ctx->tx_conf = TLS_BASE;
ctx->rx_conf = TLS_BASE;
- ctx->sk_proto = sk->sk_prot;
update_sk_prot(sk, ctx);
out:
write_unlock_bh(&sk->sk_callback_lock);
@@ -828,12 +810,10 @@ static void tls_update(struct sock *sk, struct proto *p)
struct tls_context *ctx;
ctx = tls_get_ctx(sk);
- if (likely(ctx)) {
- ctx->sk_proto_close = p->close;
+ if (likely(ctx))
ctx->sk_proto = p;
- } else {
+ else
sk->sk_prot = p;
- }
}
static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
@@ -927,9 +907,7 @@ static int __init tls_register(void)
tls_sw_proto_ops = inet_stream_ops;
tls_sw_proto_ops.splice_read = tls_sw_splice_read;
-#ifdef CONFIG_TLS_DEVICE
tls_device_init();
-#endif
tcp_register_ulp(&tcp_tls_ulp_ops);
return 0;
@@ -938,9 +916,7 @@ static int __init tls_register(void)
static void __exit tls_unregister(void)
{
tcp_unregister_ulp(&tcp_tls_ulp_ops);
-#ifdef CONFIG_TLS_DEVICE
tls_device_cleanup();
-#endif
}
module_init(tls_register);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 91d21b048a9b..c2b5e0d2ba1a 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1489,13 +1489,12 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
int pad, err = 0;
if (!ctx->decrypted) {
-#ifdef CONFIG_TLS_DEVICE
if (tls_ctx->rx_conf == TLS_HW) {
err = tls_device_decrypted(sk, skb);
if (err < 0)
return err;
}
-#endif
+
/* Still not decrypted after tls_device */
if (!ctx->decrypted) {
err = decrypt_internal(sk, skb, dest, NULL, chunk, zc,
@@ -2014,10 +2013,9 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
ret = -EINVAL;
goto read_failure;
}
-#ifdef CONFIG_TLS_DEVICE
+
tls_device_rx_resync_new_rec(strp->sk, data_len + TLS_HEADER_SIZE,
TCP_SKB_CB(skb)->seq + rxm->offset);
-#endif
return data_len + TLS_HEADER_SIZE;
read_failure:
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 94cc0fa3e848..5bb70c692b1e 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -307,8 +307,13 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
spin_unlock_bh(&vvs->rx_lock);
- /* We send a credit update only when the space available seen
- * by the transmitter is less than VIRTIO_VSOCK_MAX_PKT_BUF_SIZE
+ /* To reduce the number of credit update messages,
+ * don't update credits as long as lots of space is available.
+ * Note: the limit chosen here is arbitrary. Setting the limit
+ * too high causes extra messages. Too low causes transmitter
+ * stalls. As stalls are in theory more expensive than extra
+ * messages, we set the limit to a high value. TODO: experiment
+ * with different values.
*/
if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
virtio_transport_send_credit_update(vsk,
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 0e0062127124..947b8ff0227e 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -14,7 +14,7 @@
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/idr.h>
-#include <linux/highmem.h>
+#include <linux/vmalloc.h>
#include "xdp_umem.h"
#include "xsk_queue.h"
@@ -106,14 +106,22 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
umem->dev = dev;
umem->queue_id = queue_id;
+ if (flags & XDP_USE_NEED_WAKEUP) {
+ umem->flags |= XDP_UMEM_USES_NEED_WAKEUP;
+ /* Tx needs to be explicitly woken up the first time.
+ * Also for supporting drivers that do not implement this
+ * feature. They will always have to call sendto().
+ */
+ xsk_set_tx_need_wakeup(umem);
+ }
+
dev_hold(dev);
if (force_copy)
/* For copy-mode, we are done. */
return 0;
- if (!dev->netdev_ops->ndo_bpf ||
- !dev->netdev_ops->ndo_xsk_async_xmit) {
+ if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_wakeup) {
err = -EOPNOTSUPP;
goto err_unreg_umem;
}
@@ -170,7 +178,30 @@ static void xdp_umem_unmap_pages(struct xdp_umem *umem)
unsigned int i;
for (i = 0; i < umem->npgs; i++)
- kunmap(umem->pgs[i]);
+ if (PageHighMem(umem->pgs[i]))
+ vunmap(umem->pages[i].addr);
+}
+
+static int xdp_umem_map_pages(struct xdp_umem *umem)
+{
+ unsigned int i;
+ void *addr;
+
+ for (i = 0; i < umem->npgs; i++) {
+ if (PageHighMem(umem->pgs[i]))
+ addr = vmap(&umem->pgs[i], 1, VM_MAP, PAGE_KERNEL);
+ else
+ addr = page_address(umem->pgs[i]);
+
+ if (!addr) {
+ xdp_umem_unmap_pages(umem);
+ return -ENOMEM;
+ }
+
+ umem->pages[i].addr = addr;
+ }
+
+ return 0;
}
static void xdp_umem_unpin_pages(struct xdp_umem *umem)
@@ -309,10 +340,11 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
{
+ bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
unsigned int chunks, chunks_per_page;
u64 addr = mr->addr, size = mr->len;
- int size_chk, err, i;
+ int size_chk, err;
if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
/* Strictly speaking we could support this, if:
@@ -324,7 +356,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
return -EINVAL;
}
- if (!is_power_of_2(chunk_size))
+ if (mr->flags & ~(XDP_UMEM_UNALIGNED_CHUNK_FLAG |
+ XDP_UMEM_USES_NEED_WAKEUP))
+ return -EINVAL;
+
+ if (!unaligned_chunks && !is_power_of_2(chunk_size))
return -EINVAL;
if (!PAGE_ALIGNED(addr)) {
@@ -341,9 +377,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (chunks == 0)
return -EINVAL;
- chunks_per_page = PAGE_SIZE / chunk_size;
- if (chunks < chunks_per_page || chunks % chunks_per_page)
- return -EINVAL;
+ if (!unaligned_chunks) {
+ chunks_per_page = PAGE_SIZE / chunk_size;
+ if (chunks < chunks_per_page || chunks % chunks_per_page)
+ return -EINVAL;
+ }
headroom = ALIGN(headroom, 64);
@@ -352,13 +390,15 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
return -EINVAL;
umem->address = (unsigned long)addr;
- umem->chunk_mask = ~((u64)chunk_size - 1);
+ umem->chunk_mask = unaligned_chunks ? XSK_UNALIGNED_BUF_ADDR_MASK
+ : ~((u64)chunk_size - 1);
umem->size = size;
umem->headroom = headroom;
umem->chunk_size_nohr = chunk_size - headroom;
umem->npgs = size / PAGE_SIZE;
umem->pgs = NULL;
umem->user = NULL;
+ umem->flags = mr->flags;
INIT_LIST_HEAD(&umem->xsk_list);
spin_lock_init(&umem->xsk_list_lock);
@@ -378,10 +418,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
goto out_pin;
}
- for (i = 0; i < umem->npgs; i++)
- umem->pages[i].addr = kmap(umem->pgs[i]);
+ err = xdp_umem_map_pages(umem);
+ if (!err)
+ return 0;
- return 0;
+ kfree(umem->pages);
out_pin:
xdp_umem_unpin_pages(umem);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 59b57d708697..c2f1af3b6a7c 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -45,7 +45,7 @@ EXPORT_SYMBOL(xsk_umem_has_addrs);
u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
{
- return xskq_peek_addr(umem->fq, addr);
+ return xskq_peek_addr(umem->fq, addr, umem);
}
EXPORT_SYMBOL(xsk_umem_peek_addr);
@@ -55,21 +55,103 @@ void xsk_umem_discard_addr(struct xdp_umem *umem)
}
EXPORT_SYMBOL(xsk_umem_discard_addr);
+void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
+{
+ if (umem->need_wakeup & XDP_WAKEUP_RX)
+ return;
+
+ umem->fq->ring->flags |= XDP_RING_NEED_WAKEUP;
+ umem->need_wakeup |= XDP_WAKEUP_RX;
+}
+EXPORT_SYMBOL(xsk_set_rx_need_wakeup);
+
+void xsk_set_tx_need_wakeup(struct xdp_umem *umem)
+{
+ struct xdp_sock *xs;
+
+ if (umem->need_wakeup & XDP_WAKEUP_TX)
+ return;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+ xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
+ }
+ rcu_read_unlock();
+
+ umem->need_wakeup |= XDP_WAKEUP_TX;
+}
+EXPORT_SYMBOL(xsk_set_tx_need_wakeup);
+
+void xsk_clear_rx_need_wakeup(struct xdp_umem *umem)
+{
+ if (!(umem->need_wakeup & XDP_WAKEUP_RX))
+ return;
+
+ umem->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP;
+ umem->need_wakeup &= ~XDP_WAKEUP_RX;
+}
+EXPORT_SYMBOL(xsk_clear_rx_need_wakeup);
+
+void xsk_clear_tx_need_wakeup(struct xdp_umem *umem)
+{
+ struct xdp_sock *xs;
+
+ if (!(umem->need_wakeup & XDP_WAKEUP_TX))
+ return;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+ xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP;
+ }
+ rcu_read_unlock();
+
+ umem->need_wakeup &= ~XDP_WAKEUP_TX;
+}
+EXPORT_SYMBOL(xsk_clear_tx_need_wakeup);
+
+bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
+{
+ return umem->flags & XDP_UMEM_USES_NEED_WAKEUP;
+}
+EXPORT_SYMBOL(xsk_umem_uses_need_wakeup);
+
+/* If a buffer crosses a page boundary, we need to do 2 memcpy's, one for
+ * each page. This is only required in copy mode.
+ */
+static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf,
+ u32 len, u32 metalen)
+{
+ void *to_buf = xdp_umem_get_data(umem, addr);
+
+ addr = xsk_umem_add_offset_to_addr(addr);
+ if (xskq_crosses_non_contig_pg(umem, addr, len + metalen)) {
+ void *next_pg_addr = umem->pages[(addr >> PAGE_SHIFT) + 1].addr;
+ u64 page_start = addr & ~(PAGE_SIZE - 1);
+ u64 first_len = PAGE_SIZE - (addr - page_start);
+
+ memcpy(to_buf, from_buf, first_len + metalen);
+ memcpy(next_pg_addr, from_buf + first_len, len - first_len);
+
+ return;
+ }
+
+ memcpy(to_buf, from_buf, len + metalen);
+}
+
static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
- void *to_buf, *from_buf;
+ u64 offset = xs->umem->headroom;
+ u64 addr, memcpy_addr;
+ void *from_buf;
u32 metalen;
- u64 addr;
int err;
- if (!xskq_peek_addr(xs->umem->fq, &addr) ||
+ if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||
len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
xs->rx_dropped++;
return -ENOSPC;
}
- addr += xs->umem->headroom;
-
if (unlikely(xdp_data_meta_unsupported(xdp))) {
from_buf = xdp->data;
metalen = 0;
@@ -78,9 +160,11 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
metalen = xdp->data - xdp->data_meta;
}
- to_buf = xdp_umem_get_data(xs->umem, addr);
- memcpy(to_buf, from_buf, len + metalen);
- addr += metalen;
+ memcpy_addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
+ __xsk_rcv_memcpy(xs->umem, memcpy_addr, from_buf, len, metalen);
+
+ offset += metalen;
+ addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
err = xskq_produce_batch_desc(xs->rx, addr, len);
if (!err) {
xskq_discard_addr(xs->umem->fq);
@@ -102,10 +186,23 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
return err;
}
+static bool xsk_is_bound(struct xdp_sock *xs)
+{
+ if (READ_ONCE(xs->state) == XSK_BOUND) {
+ /* Matches smp_wmb() in bind(). */
+ smp_rmb();
+ return true;
+ }
+ return false;
+}
+
int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
u32 len;
+ if (!xsk_is_bound(xs))
+ return -EINVAL;
+
if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
return -EINVAL;
@@ -125,6 +222,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
u32 metalen = xdp->data - xdp->data_meta;
u32 len = xdp->data_end - xdp->data;
+ u64 offset = xs->umem->headroom;
void *buffer;
u64 addr;
int err;
@@ -136,17 +234,17 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
goto out_unlock;
}
- if (!xskq_peek_addr(xs->umem->fq, &addr) ||
+ if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||
len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
err = -ENOSPC;
goto out_drop;
}
- addr += xs->umem->headroom;
-
+ addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
buffer = xdp_umem_get_data(xs->umem, addr);
memcpy(buffer, xdp->data_meta, len + metalen);
- addr += metalen;
+
+ addr = xsk_umem_adjust_offset(xs->umem, addr, metalen);
err = xskq_produce_batch_desc(xs->rx, addr, len);
if (err)
goto out_drop;
@@ -190,7 +288,7 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
rcu_read_lock();
list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
- if (!xskq_peek_desc(xs->tx, desc))
+ if (!xskq_peek_desc(xs->tx, desc, umem))
continue;
if (xskq_produce_addr_lazy(umem->cq, desc->addr))
@@ -212,7 +310,8 @@ static int xsk_zc_xmit(struct sock *sk)
struct xdp_sock *xs = xdp_sk(sk);
struct net_device *dev = xs->dev;
- return dev->netdev_ops->ndo_xsk_async_xmit(dev, xs->queue_id);
+ return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
+ XDP_WAKEUP_TX);
}
static void xsk_destruct_skb(struct sk_buff *skb)
@@ -243,7 +342,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
if (xs->queue_id >= xs->dev->real_num_tx_queues)
goto out;
- while (xskq_peek_desc(xs->tx, &desc)) {
+ while (xskq_peek_desc(xs->tx, &desc, xs->umem)) {
char *buffer;
u64 addr;
u32 len;
@@ -272,7 +371,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
skb->dev = xs->dev;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
- skb_shinfo(skb)->destructor_arg = (void *)(long)addr;
+ skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
skb->destructor = xsk_destruct_skb;
err = dev_direct_xmit(skb, xs->queue_id);
@@ -301,7 +400,7 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
- if (unlikely(!xs->dev))
+ if (unlikely(!xsk_is_bound(xs)))
return -ENXIO;
if (unlikely(!(xs->dev->flags & IFF_UP)))
return -ENETDOWN;
@@ -317,8 +416,19 @@ static unsigned int xsk_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait)
{
unsigned int mask = datagram_poll(file, sock, wait);
- struct sock *sk = sock->sk;
- struct xdp_sock *xs = xdp_sk(sk);
+ struct xdp_sock *xs = xdp_sk(sock->sk);
+ struct net_device *dev;
+ struct xdp_umem *umem;
+
+ if (unlikely(!xsk_is_bound(xs)))
+ return mask;
+
+ dev = xs->dev;
+ umem = xs->umem;
+
+ if (umem->need_wakeup)
+ dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
+ umem->need_wakeup);
if (xs->rx && !xskq_empty_desc(xs->rx))
mask |= POLLIN | POLLRDNORM;
@@ -342,7 +452,7 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
/* Make sure queue is ready before it can be seen by others */
smp_wmb();
- *queue = q;
+ WRITE_ONCE(*queue, q);
return 0;
}
@@ -350,10 +460,9 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
{
struct net_device *dev = xs->dev;
- if (!dev || xs->state != XSK_BOUND)
+ if (xs->state != XSK_BOUND)
return;
-
- xs->state = XSK_UNBOUND;
+ WRITE_ONCE(xs->state, XSK_UNBOUND);
/* Wait for driver to stop using the xdp socket. */
xdp_del_sk_umem(xs->umem, xs);
@@ -362,6 +471,52 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
dev_put(dev);
}
+static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs,
+ struct xdp_sock ***map_entry)
+{
+ struct xsk_map *map = NULL;
+ struct xsk_map_node *node;
+
+ *map_entry = NULL;
+
+ spin_lock_bh(&xs->map_list_lock);
+ node = list_first_entry_or_null(&xs->map_list, struct xsk_map_node,
+ node);
+ if (node) {
+ WARN_ON(xsk_map_inc(node->map));
+ map = node->map;
+ *map_entry = node->map_entry;
+ }
+ spin_unlock_bh(&xs->map_list_lock);
+ return map;
+}
+
+static void xsk_delete_from_maps(struct xdp_sock *xs)
+{
+ /* This function removes the current XDP socket from all the
+ * maps it resides in. We need to take extra care here, due to
+ * the two locks involved. Each map has a lock synchronizing
+ * updates to the entries, and each socket has a lock that
+ * synchronizes access to the list of maps (map_list). For
+ * deadlock avoidance the locks need to be taken in the order
+ * "map lock"->"socket map list lock". We start off by
+ * accessing the socket map list, and take a reference to the
+ * map to guarantee existence between the
+ * xsk_get_map_list_entry() and xsk_map_try_sock_delete()
+ * calls. Then we ask the map to remove the socket, which
+ * tries to remove the socket from the map. Note that there
+ * might be updates to the map between
+ * xsk_get_map_list_entry() and xsk_map_try_sock_delete().
+ */
+ struct xdp_sock **map_entry = NULL;
+ struct xsk_map *map;
+
+ while ((map = xsk_get_map_list_entry(xs, &map_entry))) {
+ xsk_map_try_sock_delete(map, xs, map_entry);
+ xsk_map_put(map);
+ }
+}
+
static int xsk_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -381,7 +536,10 @@ static int xsk_release(struct socket *sock)
sock_prot_inuse_add(net, sk->sk_prot, -1);
local_bh_enable();
+ xsk_delete_from_maps(xs);
+ mutex_lock(&xs->mutex);
xsk_unbind_dev(xs);
+ mutex_unlock(&xs->mutex);
xskq_destroy(xs->rx);
xskq_destroy(xs->tx);
@@ -412,6 +570,24 @@ static struct socket *xsk_lookup_xsk_from_fd(int fd)
return sock;
}
+/* Check if umem pages are contiguous.
+ * If zero-copy mode, use the DMA address to do the page contiguity check
+ * For all other modes we use addr (kernel virtual address)
+ * Store the result in the low bits of addr.
+ */
+static void xsk_check_page_contiguity(struct xdp_umem *umem, u32 flags)
+{
+ struct xdp_umem_page *pgs = umem->pages;
+ int i, is_contig;
+
+ for (i = 0; i < umem->npgs - 1; i++) {
+ is_contig = (flags & XDP_ZEROCOPY) ?
+ (pgs[i].dma + PAGE_SIZE == pgs[i + 1].dma) :
+ (pgs[i].addr + PAGE_SIZE == pgs[i + 1].addr);
+ pgs[i].addr += is_contig << XSK_NEXT_PG_CONTIG_SHIFT;
+ }
+}
+
static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
{
struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
@@ -427,7 +603,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
return -EINVAL;
flags = sxdp->sxdp_flags;
- if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY))
+ if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY |
+ XDP_USE_NEED_WAKEUP))
return -EINVAL;
rtnl_lock();
@@ -454,7 +631,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
struct xdp_sock *umem_xs;
struct socket *sock;
- if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY)) {
+ if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) ||
+ (flags & XDP_USE_NEED_WAKEUP)) {
/* Cannot specify flags for shared sockets. */
err = -EINVAL;
goto out_unlock;
@@ -473,19 +651,19 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
}
umem_xs = xdp_sk(sock->sk);
- if (!umem_xs->umem) {
- /* No umem to inherit. */
+ if (!xsk_is_bound(umem_xs)) {
err = -EBADF;
sockfd_put(sock);
goto out_unlock;
- } else if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
+ }
+ if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
err = -EINVAL;
sockfd_put(sock);
goto out_unlock;
}
xdp_get_umem(umem_xs->umem);
- xs->umem = umem_xs->umem;
+ WRITE_ONCE(xs->umem, umem_xs->umem);
sockfd_put(sock);
} else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
err = -EINVAL;
@@ -500,6 +678,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
if (err)
goto out_unlock;
+
+ xsk_check_page_contiguity(xs->umem, flags);
}
xs->dev = dev;
@@ -510,16 +690,28 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xdp_add_sk_umem(xs->umem, xs);
out_unlock:
- if (err)
+ if (err) {
dev_put(dev);
- else
- xs->state = XSK_BOUND;
+ } else {
+ /* Matches smp_rmb() in bind() for shared umem
+ * sockets, and xsk_is_bound().
+ */
+ smp_wmb();
+ WRITE_ONCE(xs->state, XSK_BOUND);
+ }
out_release:
mutex_unlock(&xs->mutex);
rtnl_unlock();
return err;
}
+struct xdp_umem_reg_v1 {
+ __u64 addr; /* Start of packet data area */
+ __u64 len; /* Length of packet data area */
+ __u32 chunk_size;
+ __u32 headroom;
+};
+
static int xsk_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, unsigned int optlen)
{
@@ -549,15 +741,24 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
}
q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx;
err = xsk_init_queue(entries, q, false);
+ if (!err && optname == XDP_TX_RING)
+ /* Tx needs to be explicitly woken up the first time */
+ xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
mutex_unlock(&xs->mutex);
return err;
}
case XDP_UMEM_REG:
{
- struct xdp_umem_reg mr;
+ size_t mr_size = sizeof(struct xdp_umem_reg);
+ struct xdp_umem_reg mr = {};
struct xdp_umem *umem;
- if (copy_from_user(&mr, optval, sizeof(mr)))
+ if (optlen < sizeof(struct xdp_umem_reg_v1))
+ return -EINVAL;
+ else if (optlen < sizeof(mr))
+ mr_size = sizeof(struct xdp_umem_reg_v1);
+
+ if (copy_from_user(&mr, optval, mr_size))
return -EFAULT;
mutex_lock(&xs->mutex);
@@ -574,7 +775,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
/* Make sure umem is ready before it can be seen by others */
smp_wmb();
- xs->umem = umem;
+ WRITE_ONCE(xs->umem, umem);
mutex_unlock(&xs->mutex);
return 0;
}
@@ -610,6 +811,20 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
return -ENOPROTOOPT;
}
+static void xsk_enter_rxtx_offsets(struct xdp_ring_offset_v1 *ring)
+{
+ ring->producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
+ ring->consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
+ ring->desc = offsetof(struct xdp_rxtx_ring, desc);
+}
+
+static void xsk_enter_umem_offsets(struct xdp_ring_offset_v1 *ring)
+{
+ ring->producer = offsetof(struct xdp_umem_ring, ptrs.producer);
+ ring->consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
+ ring->desc = offsetof(struct xdp_umem_ring, desc);
+}
+
static int xsk_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -649,26 +864,49 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
case XDP_MMAP_OFFSETS:
{
struct xdp_mmap_offsets off;
+ struct xdp_mmap_offsets_v1 off_v1;
+ bool flags_supported = true;
+ void *to_copy;
- if (len < sizeof(off))
+ if (len < sizeof(off_v1))
return -EINVAL;
+ else if (len < sizeof(off))
+ flags_supported = false;
+
+ if (flags_supported) {
+ /* xdp_ring_offset is identical to xdp_ring_offset_v1
+ * except for the flags field added to the end.
+ */
+ xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *)
+ &off.rx);
+ xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *)
+ &off.tx);
+ xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *)
+ &off.fr);
+ xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *)
+ &off.cr);
+ off.rx.flags = offsetof(struct xdp_rxtx_ring,
+ ptrs.flags);
+ off.tx.flags = offsetof(struct xdp_rxtx_ring,
+ ptrs.flags);
+ off.fr.flags = offsetof(struct xdp_umem_ring,
+ ptrs.flags);
+ off.cr.flags = offsetof(struct xdp_umem_ring,
+ ptrs.flags);
+
+ len = sizeof(off);
+ to_copy = &off;
+ } else {
+ xsk_enter_rxtx_offsets(&off_v1.rx);
+ xsk_enter_rxtx_offsets(&off_v1.tx);
+ xsk_enter_umem_offsets(&off_v1.fr);
+ xsk_enter_umem_offsets(&off_v1.cr);
+
+ len = sizeof(off_v1);
+ to_copy = &off_v1;
+ }
- off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
- off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
- off.rx.desc = offsetof(struct xdp_rxtx_ring, desc);
- off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
- off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
- off.tx.desc = offsetof(struct xdp_rxtx_ring, desc);
-
- off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
- off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
- off.fr.desc = offsetof(struct xdp_umem_ring, desc);
- off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
- off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
- off.cr.desc = offsetof(struct xdp_umem_ring, desc);
-
- len = sizeof(off);
- if (copy_to_user(optval, &off, len))
+ if (copy_to_user(optval, to_copy, len))
return -EFAULT;
if (put_user(len, optlen))
return -EFAULT;
@@ -713,7 +951,7 @@ static int xsk_mmap(struct file *file, struct socket *sock,
unsigned long pfn;
struct page *qpg;
- if (xs->state != XSK_READY)
+ if (READ_ONCE(xs->state) != XSK_READY)
return -EBUSY;
if (offset == XDP_PGOFF_RX_RING) {
@@ -855,6 +1093,9 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
spin_lock_init(&xs->rx_lock);
spin_lock_init(&xs->tx_completion_lock);
+ INIT_LIST_HEAD(&xs->map_list);
+ spin_lock_init(&xs->map_list_lock);
+
mutex_lock(&net->xdp.lock);
sk_add_node_rcu(sk, &net->xdp.list);
mutex_unlock(&net->xdp.lock);
diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
index ba8120610426..4cfd106bdb53 100644
--- a/net/xdp/xsk.h
+++ b/net/xdp/xsk.h
@@ -4,6 +4,19 @@
#ifndef XSK_H_
#define XSK_H_
+struct xdp_ring_offset_v1 {
+ __u64 producer;
+ __u64 consumer;
+ __u64 desc;
+};
+
+struct xdp_mmap_offsets_v1 {
+ struct xdp_ring_offset_v1 rx;
+ struct xdp_ring_offset_v1 tx;
+ struct xdp_ring_offset_v1 fr;
+ struct xdp_ring_offset_v1 cr;
+};
+
static inline struct xdp_sock *xdp_sk(struct sock *sk)
{
return (struct xdp_sock *)sk;
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index d5e06c8e0cbf..f59791ba43a0 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -56,7 +56,7 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
du.id = umem->id;
du.size = umem->size;
du.num_pages = umem->npgs;
- du.chunk_size = (__u32)(~umem->chunk_mask + 1);
+ du.chunk_size = umem->chunk_size_nohr + umem->headroom;
du.headroom = umem->headroom;
du.ifindex = umem->dev ? umem->dev->ifindex : 0;
du.queue_id = umem->queue_id;
@@ -97,6 +97,7 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
msg->xdiag_ino = sk_ino;
sock_diag_save_cookie(sk, msg->xdiag_cookie);
+ mutex_lock(&xs->mutex);
if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
goto out_nlmsg_trim;
@@ -117,10 +118,12 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO))
goto out_nlmsg_trim;
+ mutex_unlock(&xs->mutex);
nlmsg_end(nlskb, nlh);
return 0;
out_nlmsg_trim:
+ mutex_unlock(&xs->mutex);
nlmsg_cancel(nlskb, nlh);
return -EMSGSIZE;
}
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 909c5168ed0f..eddae4688862 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -16,6 +16,7 @@
struct xdp_ring {
u32 producer ____cacheline_aligned_in_smp;
u32 consumer ____cacheline_aligned_in_smp;
+ u32 flags;
};
/* Used for the RX and TX queues for packets */
@@ -133,6 +134,17 @@ static inline bool xskq_has_addrs(struct xsk_queue *q, u32 cnt)
/* UMEM queue */
+static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr,
+ u64 length)
+{
+ bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE;
+ bool next_pg_contig =
+ (unsigned long)umem->pages[(addr >> PAGE_SHIFT)].addr &
+ XSK_NEXT_PG_CONTIG_MASK;
+
+ return cross_pg && !next_pg_contig;
+}
+
static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
{
if (addr >= q->size) {
@@ -143,23 +155,51 @@ static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
return true;
}
-static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr)
+static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q, u64 addr,
+ u64 length,
+ struct xdp_umem *umem)
+{
+ u64 base_addr = xsk_umem_extract_addr(addr);
+
+ addr = xsk_umem_add_offset_to_addr(addr);
+ if (base_addr >= q->size || addr >= q->size ||
+ xskq_crosses_non_contig_pg(umem, addr, length)) {
+ q->invalid_descs++;
+ return false;
+ }
+
+ return true;
+}
+
+static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr,
+ struct xdp_umem *umem)
{
while (q->cons_tail != q->cons_head) {
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
unsigned int idx = q->cons_tail & q->ring_mask;
*addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask;
+
+ if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
+ if (xskq_is_valid_addr_unaligned(q, *addr,
+ umem->chunk_size_nohr,
+ umem))
+ return addr;
+ goto out;
+ }
+
if (xskq_is_valid_addr(q, *addr))
return addr;
+out:
q->cons_tail++;
}
return NULL;
}
-static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr)
+static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr,
+ struct xdp_umem *umem)
{
if (q->cons_tail == q->cons_head) {
smp_mb(); /* D, matches A */
@@ -170,7 +210,7 @@ static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr)
smp_rmb();
}
- return xskq_validate_addr(q, addr);
+ return xskq_validate_addr(q, addr, umem);
}
static inline void xskq_discard_addr(struct xsk_queue *q)
@@ -229,8 +269,21 @@ static inline int xskq_reserve_addr(struct xsk_queue *q)
/* Rx/Tx queue */
-static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
+static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d,
+ struct xdp_umem *umem)
{
+ if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
+ if (!xskq_is_valid_addr_unaligned(q, d->addr, d->len, umem))
+ return false;
+
+ if (d->len > umem->chunk_size_nohr || d->options) {
+ q->invalid_descs++;
+ return false;
+ }
+
+ return true;
+ }
+
if (!xskq_is_valid_addr(q, d->addr))
return false;
@@ -244,14 +297,15 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
}
static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
- struct xdp_desc *desc)
+ struct xdp_desc *desc,
+ struct xdp_umem *umem)
{
while (q->cons_tail != q->cons_head) {
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
unsigned int idx = q->cons_tail & q->ring_mask;
*desc = READ_ONCE(ring->desc[idx]);
- if (xskq_is_valid_desc(q, desc))
+ if (xskq_is_valid_desc(q, desc, umem))
return desc;
q->cons_tail++;
@@ -261,7 +315,8 @@ static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
}
static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
- struct xdp_desc *desc)
+ struct xdp_desc *desc,
+ struct xdp_umem *umem)
{
if (q->cons_tail == q->cons_head) {
smp_mb(); /* D, matches A */
@@ -272,7 +327,7 @@ static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
smp_rmb(); /* C, matches B */
}
- return xskq_validate_desc(q, desc);
+ return xskq_validate_desc(q, desc, umem);
}
static inline void xskq_discard_desc(struct xsk_queue *q)
diff --git a/samples/bpf/syscall_nrs.c b/samples/bpf/syscall_nrs.c
index 516e255cbe8f..88f940052450 100644
--- a/samples/bpf/syscall_nrs.c
+++ b/samples/bpf/syscall_nrs.c
@@ -9,5 +9,11 @@ void syscall_defines(void)
COMMENT("Linux system call numbers.");
SYSNR(__NR_write);
SYSNR(__NR_read);
+#ifdef __NR_mmap2
+ SYSNR(__NR_mmap2);
+#endif
+#ifdef __NR_mmap
SYSNR(__NR_mmap);
+#endif
+
}
diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c
index f57f4e1ea1ec..35cb0eed3be5 100644
--- a/samples/bpf/tracex5_kern.c
+++ b/samples/bpf/tracex5_kern.c
@@ -68,12 +68,25 @@ PROG(SYS__NR_read)(struct pt_regs *ctx)
return 0;
}
+#ifdef __NR_mmap2
+PROG(SYS__NR_mmap2)(struct pt_regs *ctx)
+{
+ char fmt[] = "mmap2\n";
+
+ bpf_trace_printk(fmt, sizeof(fmt));
+ return 0;
+}
+#endif
+
+#ifdef __NR_mmap
PROG(SYS__NR_mmap)(struct pt_regs *ctx)
{
char fmt[] = "mmap\n";
+
bpf_trace_printk(fmt, sizeof(fmt));
return 0;
}
+#endif
char _license[] SEC("license") = "GPL";
u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 93eaaf7239b2..102eace22956 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -67,8 +67,14 @@ static int opt_ifindex;
static int opt_queue;
static int opt_poll;
static int opt_interval = 1;
+static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
+static u32 opt_umem_flags;
+static int opt_unaligned_chunks;
+static int opt_mmap_flags;
static u32 opt_xdp_bind_flags;
static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+static int opt_timeout = 1000;
+static bool opt_need_wakeup = true;
static __u32 prog_id;
struct xsk_umem_info {
@@ -282,7 +288,9 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
.frame_size = opt_xsk_frame_size,
.frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
+ .flags = opt_umem_flags
};
+
int ret;
umem = calloc(1, sizeof(*umem));
@@ -291,6 +299,7 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
&cfg);
+
if (ret)
exit_with_error(-ret);
@@ -352,6 +361,8 @@ static struct option long_options[] = {
{"zero-copy", no_argument, 0, 'z'},
{"copy", no_argument, 0, 'c'},
{"frame-size", required_argument, 0, 'f'},
+ {"no-need-wakeup", no_argument, 0, 'm'},
+ {"unaligned", no_argument, 0, 'u'},
{0, 0, 0, 0}
};
@@ -372,6 +383,9 @@ static void usage(const char *prog)
" -z, --zero-copy Force zero-copy mode.\n"
" -c, --copy Force copy mode.\n"
" -f, --frame-size=n Set the frame size (must be a power of two, default is %d).\n"
+ " -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n"
+ " -f, --frame-size=n Set the frame size (must be a power of two in aligned mode, default is %d).\n"
+ " -u, --unaligned Enable unaligned chunk placement\n"
"\n";
fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE);
exit(EXIT_FAILURE);
@@ -384,8 +398,8 @@ static void parse_command_line(int argc, char **argv)
opterr = 0;
for (;;) {
- c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:", long_options,
- &option_index);
+ c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:mu",
+ long_options, &option_index);
if (c == -1)
break;
@@ -424,12 +438,21 @@ static void parse_command_line(int argc, char **argv)
case 'c':
opt_xdp_bind_flags |= XDP_COPY;
break;
+ case 'u':
+ opt_umem_flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
+ opt_unaligned_chunks = 1;
+ opt_mmap_flags = MAP_HUGETLB;
+ break;
case 'F':
opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
break;
case 'f':
opt_xsk_frame_size = atoi(optarg);
+ case 'm':
+ opt_need_wakeup = false;
+ opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP;
break;
+
default:
usage(basename(argv[0]));
}
@@ -442,7 +465,8 @@ static void parse_command_line(int argc, char **argv)
usage(basename(argv[0]));
}
- if (opt_xsk_frame_size & (opt_xsk_frame_size - 1)) {
+ if ((opt_xsk_frame_size & (opt_xsk_frame_size - 1)) &&
+ !opt_unaligned_chunks) {
fprintf(stderr, "--frame-size=%d is not a power of two\n",
opt_xsk_frame_size);
usage(basename(argv[0]));
@@ -459,8 +483,10 @@ static void kick_tx(struct xsk_socket_info *xsk)
exit_with_error(errno);
}
-static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk)
+static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
+ struct pollfd *fds)
{
+ struct xsk_umem_info *umem = xsk->umem;
u32 idx_cq = 0, idx_fq = 0;
unsigned int rcvd;
size_t ndescs;
@@ -468,27 +494,30 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk)
if (!xsk->outstanding_tx)
return;
- kick_tx(xsk);
+ if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
+ kick_tx(xsk);
+
ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE :
xsk->outstanding_tx;
/* re-add completed Tx buffers */
- rcvd = xsk_ring_cons__peek(&xsk->umem->cq, ndescs, &idx_cq);
+ rcvd = xsk_ring_cons__peek(&umem->cq, ndescs, &idx_cq);
if (rcvd > 0) {
unsigned int i;
int ret;
- ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+ ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
while (ret != rcvd) {
if (ret < 0)
exit_with_error(-ret);
- ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd,
- &idx_fq);
+ if (xsk_ring_prod__needs_wakeup(&umem->fq))
+ ret = poll(fds, num_socks, opt_timeout);
+ ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
}
+
for (i = 0; i < rcvd; i++)
- *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) =
- *xsk_ring_cons__comp_addr(&xsk->umem->cq,
- idx_cq++);
+ *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) =
+ *xsk_ring_cons__comp_addr(&umem->cq, idx_cq++);
xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
xsk_ring_cons__release(&xsk->umem->cq, rcvd);
@@ -505,7 +534,8 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk)
if (!xsk->outstanding_tx)
return;
- kick_tx(xsk);
+ if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
+ kick_tx(xsk);
rcvd = xsk_ring_cons__peek(&xsk->umem->cq, BATCH_SIZE, &idx);
if (rcvd > 0) {
@@ -515,30 +545,38 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk)
}
}
-static void rx_drop(struct xsk_socket_info *xsk)
+static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds)
{
unsigned int rcvd, i;
u32 idx_rx = 0, idx_fq = 0;
int ret;
rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
- if (!rcvd)
+ if (!rcvd) {
+ if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
+ ret = poll(fds, num_socks, opt_timeout);
return;
+ }
ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
while (ret != rcvd) {
if (ret < 0)
exit_with_error(-ret);
+ if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
+ ret = poll(fds, num_socks, opt_timeout);
ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
}
for (i = 0; i < rcvd; i++) {
u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
+ u64 orig = xsk_umem__extract_addr(addr);
+
+ addr = xsk_umem__add_offset_to_addr(addr);
char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
hex_dump(pkt, len, addr);
- *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = addr;
+ *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
}
xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
@@ -549,42 +587,65 @@ static void rx_drop(struct xsk_socket_info *xsk)
static void rx_drop_all(void)
{
struct pollfd fds[MAX_SOCKS + 1];
- int i, ret, timeout, nfds = 1;
+ int i, ret;
memset(fds, 0, sizeof(fds));
for (i = 0; i < num_socks; i++) {
fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
fds[i].events = POLLIN;
- timeout = 1000; /* 1sn */
}
for (;;) {
if (opt_poll) {
- ret = poll(fds, nfds, timeout);
+ ret = poll(fds, num_socks, opt_timeout);
if (ret <= 0)
continue;
}
for (i = 0; i < num_socks; i++)
- rx_drop(xsks[i]);
+ rx_drop(xsks[i], fds);
+ }
+}
+
+static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb)
+{
+ u32 idx;
+
+ if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) == BATCH_SIZE) {
+ unsigned int i;
+
+ for (i = 0; i < BATCH_SIZE; i++) {
+ xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr =
+ (frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
+ xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
+ sizeof(pkt_data) - 1;
+ }
+
+ xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE);
+ xsk->outstanding_tx += BATCH_SIZE;
+ frame_nb += BATCH_SIZE;
+ frame_nb %= NUM_FRAMES;
}
+
+ complete_tx_only(xsk);
}
-static void tx_only(struct xsk_socket_info *xsk)
+static void tx_only_all(void)
{
- int timeout, ret, nfds = 1;
- struct pollfd fds[nfds + 1];
- u32 idx, frame_nb = 0;
+ struct pollfd fds[MAX_SOCKS];
+ u32 frame_nb[MAX_SOCKS] = {};
+ int i, ret;
memset(fds, 0, sizeof(fds));
- fds[0].fd = xsk_socket__fd(xsk->xsk);
- fds[0].events = POLLOUT;
- timeout = 1000; /* 1sn */
+ for (i = 0; i < num_socks; i++) {
+ fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
+ fds[0].events = POLLOUT;
+ }
for (;;) {
if (opt_poll) {
- ret = poll(fds, nfds, timeout);
+ ret = poll(fds, num_socks, opt_timeout);
if (ret <= 0)
continue;
@@ -592,69 +653,78 @@ static void tx_only(struct xsk_socket_info *xsk)
continue;
}
- if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) ==
- BATCH_SIZE) {
- unsigned int i;
-
- for (i = 0; i < BATCH_SIZE; i++) {
- xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr
- = (frame_nb + i) * opt_xsk_frame_size;
- xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
- sizeof(pkt_data) - 1;
- }
-
- xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE);
- xsk->outstanding_tx += BATCH_SIZE;
- frame_nb += BATCH_SIZE;
- frame_nb %= NUM_FRAMES;
- }
-
- complete_tx_only(xsk);
+ for (i = 0; i < num_socks; i++)
+ tx_only(xsks[i], frame_nb[i]);
}
}
-static void l2fwd(struct xsk_socket_info *xsk)
+static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
{
- for (;;) {
- unsigned int rcvd, i;
- u32 idx_rx = 0, idx_tx = 0;
- int ret;
+ unsigned int rcvd, i;
+ u32 idx_rx = 0, idx_tx = 0;
+ int ret;
- for (;;) {
- complete_tx_l2fwd(xsk);
+ complete_tx_l2fwd(xsk, fds);
- rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE,
- &idx_rx);
- if (rcvd > 0)
- break;
- }
+ rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+ if (!rcvd) {
+ if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
+ ret = poll(fds, num_socks, opt_timeout);
+ return;
+ }
+ ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
+ while (ret != rcvd) {
+ if (ret < 0)
+ exit_with_error(-ret);
+ if (xsk_ring_prod__needs_wakeup(&xsk->tx))
+ kick_tx(xsk);
ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
- while (ret != rcvd) {
- if (ret < 0)
- exit_with_error(-ret);
- ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
- }
+ }
- for (i = 0; i < rcvd; i++) {
- u64 addr = xsk_ring_cons__rx_desc(&xsk->rx,
- idx_rx)->addr;
- u32 len = xsk_ring_cons__rx_desc(&xsk->rx,
- idx_rx++)->len;
- char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
+ for (i = 0; i < rcvd; i++) {
+ u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
+ u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
+ u64 orig = xsk_umem__extract_addr(addr);
- swap_mac_addresses(pkt);
+ addr = xsk_umem__add_offset_to_addr(addr);
+ char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
- hex_dump(pkt, len, addr);
- xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = addr;
- xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len;
- }
+ swap_mac_addresses(pkt);
+
+ hex_dump(pkt, len, addr);
+ xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = orig;
+ xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len;
+ }
+
+ xsk_ring_prod__submit(&xsk->tx, rcvd);
+ xsk_ring_cons__release(&xsk->rx, rcvd);
+
+ xsk->rx_npkts += rcvd;
+ xsk->outstanding_tx += rcvd;
+}
- xsk_ring_prod__submit(&xsk->tx, rcvd);
- xsk_ring_cons__release(&xsk->rx, rcvd);
+static void l2fwd_all(void)
+{
+ struct pollfd fds[MAX_SOCKS];
+ int i, ret;
+
+ memset(fds, 0, sizeof(fds));
- xsk->rx_npkts += rcvd;
- xsk->outstanding_tx += rcvd;
+ for (i = 0; i < num_socks; i++) {
+ fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
+ fds[i].events = POLLOUT | POLLIN;
+ }
+
+ for (;;) {
+ if (opt_poll) {
+ ret = poll(fds, num_socks, opt_timeout);
+ if (ret <= 0)
+ continue;
+ }
+
+ for (i = 0; i < num_socks; i++)
+ l2fwd(xsks[i], fds);
}
}
@@ -674,11 +744,14 @@ int main(int argc, char **argv)
exit(EXIT_FAILURE);
}
- ret = posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */
- NUM_FRAMES * opt_xsk_frame_size);
- if (ret)
- exit_with_error(ret);
-
+ /* Reserve memory for the umem. Use hugepages if unaligned chunk mode */
+ bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | opt_mmap_flags, -1, 0);
+ if (bufs == MAP_FAILED) {
+ printf("ERROR: mmap failed\n");
+ exit(EXIT_FAILURE);
+ }
/* Create sockets... */
umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size);
xsks[num_socks++] = xsk_configure_socket(umem);
@@ -705,9 +778,9 @@ int main(int argc, char **argv)
if (opt_bench == BENCH_RXDROP)
rx_drop_all();
else if (opt_bench == BENCH_TXONLY)
- tx_only(xsks[0]);
+ tx_only_all();
else
- l2fwd(xsks[0]);
+ l2fwd_all();
return 0;
}
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index c31193340108..0d8f41db8cd6 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -115,10 +115,12 @@ gen_btf()
LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1}
# dump .BTF section into raw binary file to link with final vmlinux
- bin_arch=$(${OBJDUMP} -f ${1} | grep architecture | \
+ bin_arch=$(LANG=C ${OBJDUMP} -f ${1} | grep architecture | \
cut -d, -f1 | cut -d' ' -f2)
+ bin_format=$(LANG=C ${OBJDUMP} -f ${1} | grep 'file format' | \
+ awk '{print $4}')
${OBJCOPY} --dump-section .BTF=.btf.vmlinux.bin ${1} 2>/dev/null
- ${OBJCOPY} -I binary -O ${CONFIG_OUTPUT_FORMAT} -B ${bin_arch} \
+ ${OBJCOPY} -I binary -O ${bin_format} -B ${bin_arch} \
--rename-section .data=.BTF .btf.vmlinux.bin ${2}
}
diff --git a/tools/bpf/.gitignore b/tools/bpf/.gitignore
index dfe2bd5a4b95..59024197e71d 100644
--- a/tools/bpf/.gitignore
+++ b/tools/bpf/.gitignore
@@ -1,4 +1,5 @@
FEATURE-DUMP.bpf
+feature
bpf_asm
bpf_dbg
bpf_exp.yacc.*
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index 53b60ad452f5..fbf5e4a0cb9c 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -81,10 +81,11 @@ $(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c
clean: bpftool_clean
$(call QUIET_CLEAN, bpf-progs)
- $(Q)rm -rf $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
+ $(Q)$(RM) -r -- $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
$(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
$(call QUIET_CLEAN, core-gen)
- $(Q)rm -f $(OUTPUT)FEATURE-DUMP.bpf
+ $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpf
+ $(Q)$(RM) -r -- $(OUTPUT)feature
install: $(PROGS) bpftool_install
$(call QUIET_INSTALL, bpf_jit_disasm)
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index 8248b8dd89d4..b13926432b84 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -3,3 +3,5 @@
bpftool*.8
bpf-helpers.*
FEATURE-DUMP.bpftool
+feature
+libbpf
diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
index 6694a0fc8f99..39615f8e145b 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -19,6 +19,7 @@ SYNOPSIS
BTF COMMANDS
=============
+| **bpftool** **btf** { **show** | **list** } [**id** *BTF_ID*]
| **bpftool** **btf dump** *BTF_SRC* [**format** *FORMAT*]
| **bpftool** **btf help**
|
@@ -29,6 +30,12 @@ BTF COMMANDS
DESCRIPTION
===========
+ **bpftool btf { show | list }** [**id** *BTF_ID*]
+ Show information about loaded BTF objects. If a BTF ID is
+ specified, show information only about given BTF object,
+ otherwise list all BTF objects currently loaded on the
+ system.
+
**bpftool btf dump** *BTF_SRC*
Dump BTF entries from a given *BTF_SRC*.
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 61d1d270eb5e..1c0f7146aab0 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -36,6 +36,7 @@ MAP COMMANDS
| **bpftool** **map pop** *MAP*
| **bpftool** **map enqueue** *MAP* **value** *VALUE*
| **bpftool** **map dequeue** *MAP*
+| **bpftool** **map freeze** *MAP*
| **bpftool** **map help**
|
| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
@@ -127,6 +128,14 @@ DESCRIPTION
**bpftool map dequeue** *MAP*
Dequeue and print **value** from the queue.
+ **bpftool map freeze** *MAP*
+ Freeze the map as read-only from user space. Entries from a
+ frozen map can not longer be updated or deleted with the
+ **bpf\ ()** system call. This operation is not reversible,
+ and the map remains immutable from user space until its
+ destruction. However, read and write permissions for BPF
+ programs to the map remain unchanged.
+
**bpftool map help**
Print short help message.
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
index d8e5237a2085..8651b00b81ea 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -15,17 +15,22 @@ SYNOPSIS
*OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
*COMMANDS* :=
- { **show** | **list** } [ **dev** name ] | **help**
+ { **show** | **list** | **attach** | **detach** | **help** }
NET COMMANDS
============
-| **bpftool** **net { show | list } [ dev name ]**
+| **bpftool** **net { show | list }** [ **dev** *NAME* ]
+| **bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ]
+| **bpftool** **net detach** *ATTACH_TYPE* **dev** *NAME*
| **bpftool** **net help**
+|
+| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
+| *ATTACH_TYPE* := { **xdp** | **xdpgeneric** | **xdpdrv** | **xdpoffload** }
DESCRIPTION
===========
- **bpftool net { show | list } [ dev name ]**
+ **bpftool net { show | list }** [ **dev** *NAME* ]
List bpf program attachments in the kernel networking subsystem.
Currently, only device driver xdp attachments and tc filter
@@ -47,6 +52,24 @@ DESCRIPTION
all bpf programs attached to non clsact qdiscs, and finally all
bpf programs attached to root and clsact qdisc.
+ **bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ]
+ Attach bpf program *PROG* to network interface *NAME* with
+ type specified by *ATTACH_TYPE*. Previously attached bpf program
+ can be replaced by the command used with **overwrite** option.
+ Currently, only XDP-related modes are supported for *ATTACH_TYPE*.
+
+ *ATTACH_TYPE* can be of:
+ **xdp** - try native XDP and fallback to generic XDP if NIC driver does not support it;
+ **xdpgeneric** - Generic XDP. runs at generic XDP hook when packet already enters receive path as skb;
+ **xdpdrv** - Native XDP. runs earliest point in driver's receive path;
+ **xdpoffload** - Offload XDP. runs directly on NIC on each packet reception;
+
+ **bpftool** **net detach** *ATTACH_TYPE* **dev** *NAME*
+ Detach bpf program attached to network interface *NAME* with
+ type specified by *ATTACH_TYPE*. To detach bpf program, same
+ *ATTACH_TYPE* previously used for attach must be specified.
+ Currently, only XDP-related modes are supported for *ATTACH_TYPE*.
+
**bpftool net help**
Print short help message.
@@ -137,6 +160,34 @@ EXAMPLES
}
]
+|
+| **# bpftool net attach xdpdrv id 16 dev enp6s0np0**
+| **# bpftool net**
+
+::
+
+ xdp:
+ enp6s0np0(4) driver id 16
+
+|
+| **# bpftool net attach xdpdrv id 16 dev enp6s0np0**
+| **# bpftool net attach xdpdrv id 20 dev enp6s0np0 overwrite**
+| **# bpftool net**
+
+::
+
+ xdp:
+ enp6s0np0(4) driver id 20
+
+|
+| **# bpftool net attach xdpdrv id 16 dev enp6s0np0**
+| **# bpftool net detach xdpdrv dev enp6s0np0**
+| **# bpftool net**
+
+::
+
+ xdp:
+
SEE ALSO
========
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 4c9d1ffc3fc7..39bc6f0f4f0b 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -17,27 +17,30 @@ endif
BPF_DIR = $(srctree)/tools/lib/bpf/
ifneq ($(OUTPUT),)
- BPF_PATH = $(OUTPUT)
+ LIBBPF_OUTPUT = $(OUTPUT)/libbpf/
+ LIBBPF_PATH = $(LIBBPF_OUTPUT)
else
- BPF_PATH = $(BPF_DIR)
+ LIBBPF_PATH = $(BPF_DIR)
endif
-LIBBPF = $(BPF_PATH)libbpf.a
+LIBBPF = $(LIBBPF_PATH)libbpf.a
-BPFTOOL_VERSION := $(shell make --no-print-directory -sC ../../.. kernelversion)
+BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
$(LIBBPF): FORCE
- $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a
+ $(if $(LIBBPF_OUTPUT),@mkdir -p $(LIBBPF_OUTPUT))
+ $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) $(LIBBPF_OUTPUT)libbpf.a
$(LIBBPF)-clean:
$(call QUIET_CLEAN, libbpf)
- $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) clean >/dev/null
+ $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) clean >/dev/null
prefix ?= /usr/local
bash_compdir ?= /usr/share/bash-completion/completions
CFLAGS += -O2
-CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers
+CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers
+CFLAGS += $(filter-out -Wswitch-enum,$(EXTRA_WARNINGS))
CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
-I$(srctree)/kernel/bpf/ \
-I$(srctree)/tools/include \
@@ -52,7 +55,7 @@ ifneq ($(EXTRA_LDFLAGS),)
LDFLAGS += $(EXTRA_LDFLAGS)
endif
-LIBS = -lelf -lz $(LIBBPF)
+LIBS = $(LIBBPF) -lelf -lz
INSTALL ?= install
RM ?= rm -f
@@ -114,16 +117,18 @@ $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
$(OUTPUT)feature.o: | zdep
$(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
- $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
$(OUTPUT)%.o: %.c
$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
clean: $(LIBBPF)-clean
$(call QUIET_CLEAN, bpftool)
- $(Q)$(RM) $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+ $(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+ $(Q)$(RM) -r -- $(OUTPUT)libbpf/
$(call QUIET_CLEAN, core-gen)
- $(Q)$(RM) $(OUTPUT)FEATURE-DUMP.bpftool
+ $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool
+ $(Q)$(RM) -r -- $(OUTPUT)feature/
install: $(OUTPUT)bpftool
$(call QUIET_INSTALL, bpftool)
@@ -134,8 +139,8 @@ install: $(OUTPUT)bpftool
uninstall:
$(call QUIET_UNINST, bpftool)
- $(Q)$(RM) $(DESTDIR)$(prefix)/sbin/bpftool
- $(Q)$(RM) $(DESTDIR)$(bash_compdir)/bpftool
+ $(Q)$(RM) -- $(DESTDIR)$(prefix)/sbin/bpftool
+ $(Q)$(RM) -- $(DESTDIR)$(bash_compdir)/bpftool
doc:
$(call descend,Documentation)
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index df16c5415444..70493a6da206 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -73,8 +73,8 @@ _bpftool_get_prog_tags()
_bpftool_get_btf_ids()
{
- COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \
- command sed -n 's/.*"btf_id": \(.*\),\?$/\1/p' )" -- "$cur" ) )
+ COMPREPLY+=( $( compgen -W "$( bpftool -jp btf 2>&1 | \
+ command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
}
_bpftool_get_obj_map_names()
@@ -201,6 +201,10 @@ _bpftool()
_bpftool_get_prog_tags
return 0
;;
+ dev)
+ _sysfs_get_netdevs
+ return 0
+ ;;
file|pinned)
_filedir
return 0
@@ -399,10 +403,6 @@ _bpftool()
_filedir
return 0
;;
- dev)
- _sysfs_get_netdevs
- return 0
- ;;
*)
COMPREPLY=( $( compgen -W "map" -- "$cur" ) )
_bpftool_once_attr 'type'
@@ -449,7 +449,7 @@ _bpftool()
map)
local MAP_TYPE='id pinned'
case $command in
- show|list|dump|peek|pop|dequeue)
+ show|list|dump|peek|pop|dequeue|freeze)
case $prev in
$command)
COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
@@ -498,10 +498,6 @@ _bpftool()
key|value|flags|name|entries)
return 0
;;
- dev)
- _sysfs_get_netdevs
- return 0
- ;;
*)
_bpftool_once_attr 'type'
_bpftool_once_attr 'key'
@@ -642,7 +638,7 @@ _bpftool()
[[ $prev == $object ]] && \
COMPREPLY=( $( compgen -W 'delete dump getnext help \
lookup pin event_pipe show list update create \
- peek push enqueue pop dequeue' -- \
+ peek push enqueue pop dequeue freeze' -- \
"$cur" ) )
;;
esac
@@ -674,7 +670,7 @@ _bpftool()
map)
_bpftool_get_map_ids
;;
- dump)
+ $command)
_bpftool_get_btf_ids
;;
esac
@@ -702,9 +698,21 @@ _bpftool()
;;
esac
;;
+ show|list)
+ case $prev in
+ $command)
+ COMPREPLY+=( $( compgen -W "id" -- "$cur" ) )
+ ;;
+ id)
+ _bpftool_get_btf_ids
+ ;;
+ esac
+ return 0
+ ;;
*)
[[ $prev == $object ]] && \
- COMPREPLY=( $( compgen -W 'dump help' -- "$cur" ) )
+ COMPREPLY=( $( compgen -W 'dump help show list' \
+ -- "$cur" ) )
;;
esac
;;
@@ -778,18 +786,67 @@ _bpftool()
esac
;;
net)
+ local PROG_TYPE='id pinned tag'
+ local ATTACH_TYPES='xdp xdpgeneric xdpdrv xdpoffload'
case $command in
+ show|list)
+ [[ $prev != "$command" ]] && return 0
+ COMPREPLY=( $( compgen -W 'dev' -- "$cur" ) )
+ return 0
+ ;;
+ attach)
+ case $cword in
+ 3)
+ COMPREPLY=( $( compgen -W "$ATTACH_TYPES" -- "$cur" ) )
+ return 0
+ ;;
+ 4)
+ COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
+ return 0
+ ;;
+ 5)
+ case $prev in
+ id)
+ _bpftool_get_prog_ids
+ ;;
+ pinned)
+ _filedir
+ ;;
+ esac
+ return 0
+ ;;
+ 6)
+ COMPREPLY=( $( compgen -W 'dev' -- "$cur" ) )
+ return 0
+ ;;
+ 8)
+ _bpftool_once_attr 'overwrite'
+ return 0
+ ;;
+ esac
+ ;;
+ detach)
+ case $cword in
+ 3)
+ COMPREPLY=( $( compgen -W "$ATTACH_TYPES" -- "$cur" ) )
+ return 0
+ ;;
+ 4)
+ COMPREPLY=( $( compgen -W 'dev' -- "$cur" ) )
+ return 0
+ ;;
+ esac
+ ;;
*)
[[ $prev == $object ]] && \
COMPREPLY=( $( compgen -W 'help \
- show list' -- "$cur" ) )
+ show list attach detach' -- "$cur" ) )
;;
esac
;;
feature)
case $command in
probe)
- [[ $prev == "dev" ]] && _sysfs_get_netdevs && return 0
[[ $prev == "prefix" ]] && return 0
if _bpftool_search_list 'macros'; then
COMPREPLY+=( $( compgen -W 'prefix' -- "$cur" ) )
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 1b8ec91899e6..9a9376d1d3df 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -11,6 +11,7 @@
#include <bpf.h>
#include <libbpf.h>
#include <linux/btf.h>
+#include <linux/hashtable.h>
#include "btf.h"
#include "json_writer.h"
@@ -35,6 +36,16 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_DATASEC] = "DATASEC",
};
+struct btf_attach_table {
+ DECLARE_HASHTABLE(table, 16);
+};
+
+struct btf_attach_point {
+ __u32 obj_id;
+ __u32 btf_id;
+ struct hlist_node hash;
+};
+
static const char *btf_int_enc_str(__u8 encoding)
{
switch (encoding) {
@@ -449,7 +460,7 @@ static int do_dump(int argc, char **argv)
btf_id = strtoul(*argv, &endptr, 0);
if (*endptr) {
- p_err("can't parse %s as ID", **argv);
+ p_err("can't parse %s as ID", *argv);
return -1;
}
NEXT_ARG();
@@ -522,6 +533,330 @@ done:
return err;
}
+static int btf_parse_fd(int *argc, char ***argv)
+{
+ unsigned int id;
+ char *endptr;
+ int fd;
+
+ if (!is_prefix(*argv[0], "id")) {
+ p_err("expected 'id', got: '%s'?", **argv);
+ return -1;
+ }
+ NEXT_ARGP();
+
+ id = strtoul(**argv, &endptr, 0);
+ if (*endptr) {
+ p_err("can't parse %s as ID", **argv);
+ return -1;
+ }
+ NEXT_ARGP();
+
+ fd = bpf_btf_get_fd_by_id(id);
+ if (fd < 0)
+ p_err("can't get BTF object by id (%u): %s",
+ id, strerror(errno));
+
+ return fd;
+}
+
+static void delete_btf_table(struct btf_attach_table *tab)
+{
+ struct btf_attach_point *obj;
+ struct hlist_node *tmp;
+
+ unsigned int bkt;
+
+ hash_for_each_safe(tab->table, bkt, tmp, obj, hash) {
+ hash_del(&obj->hash);
+ free(obj);
+ }
+}
+
+static int
+build_btf_type_table(struct btf_attach_table *tab, enum bpf_obj_type type,
+ void *info, __u32 *len)
+{
+ static const char * const names[] = {
+ [BPF_OBJ_UNKNOWN] = "unknown",
+ [BPF_OBJ_PROG] = "prog",
+ [BPF_OBJ_MAP] = "map",
+ };
+ struct btf_attach_point *obj_node;
+ __u32 btf_id, id = 0;
+ int err;
+ int fd;
+
+ while (true) {
+ switch (type) {
+ case BPF_OBJ_PROG:
+ err = bpf_prog_get_next_id(id, &id);
+ break;
+ case BPF_OBJ_MAP:
+ err = bpf_map_get_next_id(id, &id);
+ break;
+ default:
+ err = -1;
+ p_err("unexpected object type: %d", type);
+ goto err_free;
+ }
+ if (err) {
+ if (errno == ENOENT) {
+ err = 0;
+ break;
+ }
+ p_err("can't get next %s: %s%s", names[type],
+ strerror(errno),
+ errno == EINVAL ? " -- kernel too old?" : "");
+ goto err_free;
+ }
+
+ switch (type) {
+ case BPF_OBJ_PROG:
+ fd = bpf_prog_get_fd_by_id(id);
+ break;
+ case BPF_OBJ_MAP:
+ fd = bpf_map_get_fd_by_id(id);
+ break;
+ default:
+ err = -1;
+ p_err("unexpected object type: %d", type);
+ goto err_free;
+ }
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+ p_err("can't get %s by id (%u): %s", names[type], id,
+ strerror(errno));
+ err = -1;
+ goto err_free;
+ }
+
+ memset(info, 0, *len);
+ err = bpf_obj_get_info_by_fd(fd, info, len);
+ close(fd);
+ if (err) {
+ p_err("can't get %s info: %s", names[type],
+ strerror(errno));
+ goto err_free;
+ }
+
+ switch (type) {
+ case BPF_OBJ_PROG:
+ btf_id = ((struct bpf_prog_info *)info)->btf_id;
+ break;
+ case BPF_OBJ_MAP:
+ btf_id = ((struct bpf_map_info *)info)->btf_id;
+ break;
+ default:
+ err = -1;
+ p_err("unexpected object type: %d", type);
+ goto err_free;
+ }
+ if (!btf_id)
+ continue;
+
+ obj_node = calloc(1, sizeof(*obj_node));
+ if (!obj_node) {
+ p_err("failed to allocate memory: %s", strerror(errno));
+ goto err_free;
+ }
+
+ obj_node->obj_id = id;
+ obj_node->btf_id = btf_id;
+ hash_add(tab->table, &obj_node->hash, obj_node->btf_id);
+ }
+
+ return 0;
+
+err_free:
+ delete_btf_table(tab);
+ return err;
+}
+
+static int
+build_btf_tables(struct btf_attach_table *btf_prog_table,
+ struct btf_attach_table *btf_map_table)
+{
+ struct bpf_prog_info prog_info;
+ __u32 prog_len = sizeof(prog_info);
+ struct bpf_map_info map_info;
+ __u32 map_len = sizeof(map_info);
+ int err = 0;
+
+ err = build_btf_type_table(btf_prog_table, BPF_OBJ_PROG, &prog_info,
+ &prog_len);
+ if (err)
+ return err;
+
+ err = build_btf_type_table(btf_map_table, BPF_OBJ_MAP, &map_info,
+ &map_len);
+ if (err) {
+ delete_btf_table(btf_prog_table);
+ return err;
+ }
+
+ return 0;
+}
+
+static void
+show_btf_plain(struct bpf_btf_info *info, int fd,
+ struct btf_attach_table *btf_prog_table,
+ struct btf_attach_table *btf_map_table)
+{
+ struct btf_attach_point *obj;
+ int n;
+
+ printf("%u: ", info->id);
+ printf("size %uB", info->btf_size);
+
+ n = 0;
+ hash_for_each_possible(btf_prog_table->table, obj, hash, info->id) {
+ if (obj->btf_id == info->id)
+ printf("%s%u", n++ == 0 ? " prog_ids " : ",",
+ obj->obj_id);
+ }
+
+ n = 0;
+ hash_for_each_possible(btf_map_table->table, obj, hash, info->id) {
+ if (obj->btf_id == info->id)
+ printf("%s%u", n++ == 0 ? " map_ids " : ",",
+ obj->obj_id);
+ }
+
+ printf("\n");
+}
+
+static void
+show_btf_json(struct bpf_btf_info *info, int fd,
+ struct btf_attach_table *btf_prog_table,
+ struct btf_attach_table *btf_map_table)
+{
+ struct btf_attach_point *obj;
+
+ jsonw_start_object(json_wtr); /* btf object */
+ jsonw_uint_field(json_wtr, "id", info->id);
+ jsonw_uint_field(json_wtr, "size", info->btf_size);
+
+ jsonw_name(json_wtr, "prog_ids");
+ jsonw_start_array(json_wtr); /* prog_ids */
+ hash_for_each_possible(btf_prog_table->table, obj, hash,
+ info->id) {
+ if (obj->btf_id == info->id)
+ jsonw_uint(json_wtr, obj->obj_id);
+ }
+ jsonw_end_array(json_wtr); /* prog_ids */
+
+ jsonw_name(json_wtr, "map_ids");
+ jsonw_start_array(json_wtr); /* map_ids */
+ hash_for_each_possible(btf_map_table->table, obj, hash,
+ info->id) {
+ if (obj->btf_id == info->id)
+ jsonw_uint(json_wtr, obj->obj_id);
+ }
+ jsonw_end_array(json_wtr); /* map_ids */
+ jsonw_end_object(json_wtr); /* btf object */
+}
+
+static int
+show_btf(int fd, struct btf_attach_table *btf_prog_table,
+ struct btf_attach_table *btf_map_table)
+{
+ struct bpf_btf_info info = {};
+ __u32 len = sizeof(info);
+ int err;
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ p_err("can't get BTF object info: %s", strerror(errno));
+ return -1;
+ }
+
+ if (json_output)
+ show_btf_json(&info, fd, btf_prog_table, btf_map_table);
+ else
+ show_btf_plain(&info, fd, btf_prog_table, btf_map_table);
+
+ return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+ struct btf_attach_table btf_prog_table;
+ struct btf_attach_table btf_map_table;
+ int err, fd = -1;
+ __u32 id = 0;
+
+ if (argc == 2) {
+ fd = btf_parse_fd(&argc, &argv);
+ if (fd < 0)
+ return -1;
+ }
+
+ if (argc) {
+ if (fd >= 0)
+ close(fd);
+ return BAD_ARG();
+ }
+
+ hash_init(btf_prog_table.table);
+ hash_init(btf_map_table.table);
+ err = build_btf_tables(&btf_prog_table, &btf_map_table);
+ if (err) {
+ if (fd >= 0)
+ close(fd);
+ return err;
+ }
+
+ if (fd >= 0) {
+ err = show_btf(fd, &btf_prog_table, &btf_map_table);
+ close(fd);
+ goto exit_free;
+ }
+
+ if (json_output)
+ jsonw_start_array(json_wtr); /* root array */
+
+ while (true) {
+ err = bpf_btf_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT) {
+ err = 0;
+ break;
+ }
+ p_err("can't get next BTF object: %s%s",
+ strerror(errno),
+ errno == EINVAL ? " -- kernel too old?" : "");
+ err = -1;
+ break;
+ }
+
+ fd = bpf_btf_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+ p_err("can't get BTF object by id (%u): %s",
+ id, strerror(errno));
+ err = -1;
+ break;
+ }
+
+ err = show_btf(fd, &btf_prog_table, &btf_map_table);
+ close(fd);
+ if (err)
+ break;
+ }
+
+ if (json_output)
+ jsonw_end_array(json_wtr); /* root array */
+
+exit_free:
+ delete_btf_table(&btf_prog_table);
+ delete_btf_table(&btf_map_table);
+
+ return err;
+}
+
static int do_help(int argc, char **argv)
{
if (json_output) {
@@ -530,7 +865,8 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s btf dump BTF_SRC [format FORMAT]\n"
+ "Usage: %s btf { show | list } [id BTF_ID]\n"
+ " %s btf dump BTF_SRC [format FORMAT]\n"
" %s btf help\n"
"\n"
" BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n"
@@ -539,12 +875,14 @@ static int do_help(int argc, char **argv)
" " HELP_SPEC_PROGRAM "\n"
" " HELP_SPEC_OPTIONS "\n"
"",
- bin_name, bin_name);
+ bin_name, bin_name, bin_name);
return 0;
}
static const struct cmd cmds[] = {
+ { "show", do_show },
+ { "list", do_show },
{ "help", do_help },
{ "dump", do_dump },
{ 0 }
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 8cafb9b31467..d66131f69689 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -26,9 +26,9 @@ static void btf_dumper_ptr(const void *data, json_writer_t *jw,
bool is_plain_text)
{
if (is_plain_text)
- jsonw_printf(jw, "%p", *(unsigned long *)data);
+ jsonw_printf(jw, "%p", data);
else
- jsonw_printf(jw, "%u", *(unsigned long *)data);
+ jsonw_printf(jw, "%lu", *(unsigned long *)data);
}
static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id,
@@ -216,7 +216,7 @@ static int btf_dumper_int(const struct btf_type *t, __u8 bit_offset,
switch (BTF_INT_ENCODING(*int_type)) {
case 0:
if (BTF_INT_BITS(*int_type) == 64)
- jsonw_printf(jw, "%lu", *(__u64 *)data);
+ jsonw_printf(jw, "%llu", *(__u64 *)data);
else if (BTF_INT_BITS(*int_type) == 32)
jsonw_printf(jw, "%u", *(__u32 *)data);
else if (BTF_INT_BITS(*int_type) == 16)
@@ -229,7 +229,7 @@ static int btf_dumper_int(const struct btf_type *t, __u8 bit_offset,
break;
case BTF_INT_SIGNED:
if (BTF_INT_BITS(*int_type) == 64)
- jsonw_printf(jw, "%ld", *(long long *)data);
+ jsonw_printf(jw, "%lld", *(long long *)data);
else if (BTF_INT_BITS(*int_type) == 32)
jsonw_printf(jw, "%d", *(int *)data);
else if (BTF_INT_BITS(*int_type) == 16)
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 44352b5aca85..1ef45e55039e 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -120,8 +120,8 @@ static int count_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
int level)
{
+ const char *attach_flags_str;
__u32 prog_ids[1024] = {0};
- char *attach_flags_str;
__u32 prog_cnt, iter;
__u32 attach_flags;
char buf[32];
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 6a71324be628..88264abaa738 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -29,7 +29,7 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
-void __printf(1, 2) p_err(const char *fmt, ...)
+void p_err(const char *fmt, ...)
{
va_list ap;
@@ -47,7 +47,7 @@ void __printf(1, 2) p_err(const char *fmt, ...)
va_end(ap);
}
-void __printf(1, 2) p_info(const char *fmt, ...)
+void p_info(const char *fmt, ...)
{
va_list ap;
diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c
index 6046dcab51cc..86501cd3c763 100644
--- a/tools/bpf/bpftool/json_writer.c
+++ b/tools/bpf/bpftool/json_writer.c
@@ -15,7 +15,6 @@
#include <malloc.h>
#include <inttypes.h>
#include <stdint.h>
-#include <linux/compiler.h>
#include "json_writer.h"
@@ -153,8 +152,7 @@ void jsonw_name(json_writer_t *self, const char *name)
putc(' ', self->out);
}
-void __printf(2, 0)
-jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap)
+void jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap)
{
jsonw_eor(self);
putc('"', self->out);
@@ -162,7 +160,7 @@ jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap)
putc('"', self->out);
}
-void __printf(2, 3) jsonw_printf(json_writer_t *self, const char *fmt, ...)
+void jsonw_printf(json_writer_t *self, const char *fmt, ...)
{
va_list ap;
diff --git a/tools/bpf/bpftool/json_writer.h b/tools/bpf/bpftool/json_writer.h
index cb9a1993681c..35cf1f00f96c 100644
--- a/tools/bpf/bpftool/json_writer.h
+++ b/tools/bpf/bpftool/json_writer.h
@@ -14,6 +14,7 @@
#include <stdbool.h>
#include <stdint.h>
#include <stdarg.h>
+#include <linux/compiler.h>
/* Opaque class structure */
typedef struct json_writer json_writer_t;
@@ -30,8 +31,9 @@ void jsonw_pretty(json_writer_t *self, bool on);
void jsonw_name(json_writer_t *self, const char *name);
/* Add value */
-void jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap);
-void jsonw_printf(json_writer_t *self, const char *fmt, ...);
+void __printf(2, 0) jsonw_vprintf_enquote(json_writer_t *self, const char *fmt,
+ va_list ap);
+void __printf(2, 3) jsonw_printf(json_writer_t *self, const char *fmt, ...);
void jsonw_string(json_writer_t *self, const char *value);
void jsonw_bool(json_writer_t *self, bool value);
void jsonw_float(json_writer_t *self, double number);
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index e916ff25697f..93d008687020 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -139,7 +139,7 @@ int detect_common_prefix(const char *arg, ...)
strncat(msg, "'", sizeof(msg) - strlen(msg) - 1);
if (count >= 2) {
- p_err(msg);
+ p_err("%s", msg);
return -1;
}
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 7031a4bf87a0..af9ad56c303a 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -98,8 +98,8 @@ extern int bpf_flags;
extern struct pinned_obj_table prog_table;
extern struct pinned_obj_table map_table;
-void p_err(const char *fmt, ...);
-void p_info(const char *fmt, ...);
+void __printf(1, 2) p_err(const char *fmt, ...);
+void __printf(1, 2) p_info(const char *fmt, ...);
bool is_prefix(const char *pfx, const char *str);
int detect_common_prefix(const char *arg, ...);
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index bfbbc6b4cb83..de61d73b9030 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -481,9 +481,11 @@ static int parse_elem(char **argv, struct bpf_map_info *info,
static int show_map_close_json(int fd, struct bpf_map_info *info)
{
- char *memlock;
+ char *memlock, *frozen_str;
+ int frozen = 0;
memlock = get_fdinfo(fd, "memlock");
+ frozen_str = get_fdinfo(fd, "frozen");
jsonw_start_object(json_wtr);
@@ -533,6 +535,12 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
}
close(fd);
+ if (frozen_str) {
+ frozen = atoi(frozen_str);
+ free(frozen_str);
+ }
+ jsonw_int_field(json_wtr, "frozen", frozen);
+
if (info->btf_id)
jsonw_int_field(json_wtr, "btf_id", info->btf_id);
@@ -555,9 +563,11 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
static int show_map_close_plain(int fd, struct bpf_map_info *info)
{
- char *memlock;
+ char *memlock, *frozen_str;
+ int frozen = 0;
memlock = get_fdinfo(fd, "memlock");
+ frozen_str = get_fdinfo(fd, "frozen");
printf("%u: ", info->id);
if (info->type < ARRAY_SIZE(map_type_name))
@@ -610,9 +620,23 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
printf("\n\tpinned %s", obj->path);
}
}
+ printf("\n");
+
+ if (frozen_str) {
+ frozen = atoi(frozen_str);
+ free(frozen_str);
+ }
+
+ if (!info->btf_id && !frozen)
+ return 0;
+
+ printf("\t");
if (info->btf_id)
- printf("\n\tbtf_id %d", info->btf_id);
+ printf("btf_id %d", info->btf_id);
+
+ if (frozen)
+ printf("%sfrozen", info->btf_id ? " " : "");
printf("\n");
return 0;
@@ -1238,6 +1262,35 @@ exit_free:
return err;
}
+static int do_freeze(int argc, char **argv)
+{
+ int err, fd;
+
+ if (!REQ_ARGS(2))
+ return -1;
+
+ fd = map_parse_fd(&argc, &argv);
+ if (fd < 0)
+ return -1;
+
+ if (argc) {
+ close(fd);
+ return BAD_ARG();
+ }
+
+ err = bpf_map_freeze(fd);
+ close(fd);
+ if (err) {
+ p_err("failed to freeze map: %s", strerror(errno));
+ return err;
+ }
+
+ if (json_output)
+ jsonw_null(json_wtr);
+
+ return 0;
+}
+
static int do_help(int argc, char **argv)
{
if (json_output) {
@@ -1262,6 +1315,7 @@ static int do_help(int argc, char **argv)
" %s %s pop MAP\n"
" %s %s enqueue MAP value VALUE\n"
" %s %s dequeue MAP\n"
+ " %s %s freeze MAP\n"
" %s %s help\n"
"\n"
" " HELP_SPEC_MAP "\n"
@@ -1280,7 +1334,8 @@ static int do_help(int argc, char **argv)
bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]);
+ bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+ bin_name, argv[-2]);
return 0;
}
@@ -1302,6 +1357,7 @@ static const struct cmd cmds[] = {
{ "enqueue", do_update },
{ "pop", do_pop_dequeue },
{ "dequeue", do_pop_dequeue },
+ { "freeze", do_freeze },
{ 0 }
};
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
index 3f108ab17797..4c5531d1a450 100644
--- a/tools/bpf/bpftool/map_perf_ring.c
+++ b/tools/bpf/bpftool/map_perf_ring.c
@@ -157,7 +157,7 @@ int do_event_pipe(int argc, char **argv)
NEXT_ARG();
ctx.cpu = strtoul(*argv, &endptr, 0);
if (*endptr) {
- p_err("can't parse %s as CPU ID", **argv);
+ p_err("can't parse %s as CPU ID", *argv);
goto err_close_map;
}
@@ -168,7 +168,7 @@ int do_event_pipe(int argc, char **argv)
NEXT_ARG();
ctx.idx = strtoul(*argv, &endptr, 0);
if (*endptr) {
- p_err("can't parse %s as index", **argv);
+ p_err("can't parse %s as index", *argv);
goto err_close_map;
}
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index 67e99c56bc88..4f52d3151616 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -55,6 +55,35 @@ struct bpf_attach_info {
__u32 flow_dissector_id;
};
+enum net_attach_type {
+ NET_ATTACH_TYPE_XDP,
+ NET_ATTACH_TYPE_XDP_GENERIC,
+ NET_ATTACH_TYPE_XDP_DRIVER,
+ NET_ATTACH_TYPE_XDP_OFFLOAD,
+};
+
+static const char * const attach_type_strings[] = {
+ [NET_ATTACH_TYPE_XDP] = "xdp",
+ [NET_ATTACH_TYPE_XDP_GENERIC] = "xdpgeneric",
+ [NET_ATTACH_TYPE_XDP_DRIVER] = "xdpdrv",
+ [NET_ATTACH_TYPE_XDP_OFFLOAD] = "xdpoffload",
+};
+
+const size_t net_attach_type_size = ARRAY_SIZE(attach_type_strings);
+
+static enum net_attach_type parse_attach_type(const char *str)
+{
+ enum net_attach_type type;
+
+ for (type = 0; type < net_attach_type_size; type++) {
+ if (attach_type_strings[type] &&
+ is_prefix(str, attach_type_strings[type]))
+ return type;
+ }
+
+ return net_attach_type_size;
+}
+
static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb)
{
struct bpf_netdev_t *netinfo = cookie;
@@ -197,7 +226,7 @@ static int query_flow_dissector(struct bpf_attach_info *attach_info)
fd = open("/proc/self/ns/net", O_RDONLY);
if (fd < 0) {
- p_err("can't open /proc/self/ns/net: %d",
+ p_err("can't open /proc/self/ns/net: %s",
strerror(errno));
return -1;
}
@@ -223,6 +252,134 @@ static int query_flow_dissector(struct bpf_attach_info *attach_info)
return 0;
}
+static int net_parse_dev(int *argc, char ***argv)
+{
+ int ifindex;
+
+ if (is_prefix(**argv, "dev")) {
+ NEXT_ARGP();
+
+ ifindex = if_nametoindex(**argv);
+ if (!ifindex)
+ p_err("invalid devname %s", **argv);
+
+ NEXT_ARGP();
+ } else {
+ p_err("expected 'dev', got: '%s'?", **argv);
+ return -1;
+ }
+
+ return ifindex;
+}
+
+static int do_attach_detach_xdp(int progfd, enum net_attach_type attach_type,
+ int ifindex, bool overwrite)
+{
+ __u32 flags = 0;
+
+ if (!overwrite)
+ flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+ if (attach_type == NET_ATTACH_TYPE_XDP_GENERIC)
+ flags |= XDP_FLAGS_SKB_MODE;
+ if (attach_type == NET_ATTACH_TYPE_XDP_DRIVER)
+ flags |= XDP_FLAGS_DRV_MODE;
+ if (attach_type == NET_ATTACH_TYPE_XDP_OFFLOAD)
+ flags |= XDP_FLAGS_HW_MODE;
+
+ return bpf_set_link_xdp_fd(ifindex, progfd, flags);
+}
+
+static int do_attach(int argc, char **argv)
+{
+ enum net_attach_type attach_type;
+ int progfd, ifindex, err = 0;
+ bool overwrite = false;
+
+ /* parse attach args */
+ if (!REQ_ARGS(5))
+ return -EINVAL;
+
+ attach_type = parse_attach_type(*argv);
+ if (attach_type == net_attach_type_size) {
+ p_err("invalid net attach/detach type: %s", *argv);
+ return -EINVAL;
+ }
+ NEXT_ARG();
+
+ progfd = prog_parse_fd(&argc, &argv);
+ if (progfd < 0)
+ return -EINVAL;
+
+ ifindex = net_parse_dev(&argc, &argv);
+ if (ifindex < 1) {
+ close(progfd);
+ return -EINVAL;
+ }
+
+ if (argc) {
+ if (is_prefix(*argv, "overwrite")) {
+ overwrite = true;
+ } else {
+ p_err("expected 'overwrite', got: '%s'?", *argv);
+ close(progfd);
+ return -EINVAL;
+ }
+ }
+
+ /* attach xdp prog */
+ if (is_prefix("xdp", attach_type_strings[attach_type]))
+ err = do_attach_detach_xdp(progfd, attach_type, ifindex,
+ overwrite);
+
+ if (err < 0) {
+ p_err("interface %s attach failed: %s",
+ attach_type_strings[attach_type], strerror(-err));
+ return err;
+ }
+
+ if (json_output)
+ jsonw_null(json_wtr);
+
+ return 0;
+}
+
+static int do_detach(int argc, char **argv)
+{
+ enum net_attach_type attach_type;
+ int progfd, ifindex, err = 0;
+
+ /* parse detach args */
+ if (!REQ_ARGS(3))
+ return -EINVAL;
+
+ attach_type = parse_attach_type(*argv);
+ if (attach_type == net_attach_type_size) {
+ p_err("invalid net attach/detach type: %s", *argv);
+ return -EINVAL;
+ }
+ NEXT_ARG();
+
+ ifindex = net_parse_dev(&argc, &argv);
+ if (ifindex < 1)
+ return -EINVAL;
+
+ /* detach xdp prog */
+ progfd = -1;
+ if (is_prefix("xdp", attach_type_strings[attach_type]))
+ err = do_attach_detach_xdp(progfd, attach_type, ifindex, NULL);
+
+ if (err < 0) {
+ p_err("interface %s detach failed: %s",
+ attach_type_strings[attach_type], strerror(-err));
+ return err;
+ }
+
+ if (json_output)
+ jsonw_null(json_wtr);
+
+ return 0;
+}
+
static int do_show(int argc, char **argv)
{
struct bpf_attach_info attach_info = {};
@@ -232,13 +389,9 @@ static int do_show(int argc, char **argv)
char err_buf[256];
if (argc == 2) {
- if (strcmp(argv[0], "dev") != 0)
- usage();
- filter_idx = if_nametoindex(argv[1]);
- if (filter_idx == 0) {
- fprintf(stderr, "invalid dev name %s\n", argv[1]);
+ filter_idx = net_parse_dev(&argc, &argv);
+ if (filter_idx < 1)
return -1;
- }
} else if (argc != 0) {
usage();
}
@@ -305,13 +458,20 @@ static int do_help(int argc, char **argv)
fprintf(stderr,
"Usage: %s %s { show | list } [dev <devname>]\n"
+ " %s %s attach ATTACH_TYPE PROG dev <devname> [ overwrite ]\n"
+ " %s %s detach ATTACH_TYPE dev <devname>\n"
" %s %s help\n"
+ "\n"
+ " " HELP_SPEC_PROGRAM "\n"
+ " ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n"
+ "\n"
"Note: Only xdp and tc attachments are supported now.\n"
" For progs attached to cgroups, use \"bpftool cgroup\"\n"
" to dump program attachments. For program types\n"
" sk_{filter,skb,msg,reuseport} and lwt/seg6, please\n"
" consult iproute2.\n",
- bin_name, argv[-2], bin_name, argv[-2]);
+ bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+ bin_name, argv[-2]);
return 0;
}
@@ -319,6 +479,8 @@ static int do_help(int argc, char **argv)
static const struct cmd cmds[] = {
{ "show", do_show },
{ "list", do_show },
+ { "attach", do_attach },
+ { "detach", do_detach },
{ "help", do_help },
{ 0 }
};
diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
index f2a545e667c4..b2046f33e23f 100644
--- a/tools/bpf/bpftool/perf.c
+++ b/tools/bpf/bpftool/perf.c
@@ -104,6 +104,8 @@ static void print_perf_json(int pid, int fd, __u32 prog_id, __u32 fd_type,
jsonw_string_field(json_wtr, "filename", buf);
jsonw_lluint_field(json_wtr, "offset", probe_offset);
break;
+ default:
+ break;
}
jsonw_end_object(json_wtr);
}
@@ -140,6 +142,8 @@ static void print_perf_plain(int pid, int fd, __u32 prog_id, __u32 fd_type,
printf("uretprobe filename %s offset %llu\n", buf,
probe_offset);
break;
+ default:
+ break;
}
}
diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
index 0d35f18006a1..95c072b70d0e 100644
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -6,9 +6,11 @@
/*
* Common definitions for all gcc versions go here.
*/
+#ifndef GCC_VERSION
#define GCC_VERSION (__GNUC__ * 10000 \
+ __GNUC_MINOR__ * 100 \
+ __GNUC_PATCHLEVEL__)
+#endif
#if GCC_VERSION >= 70000 && !defined(__CHECKER__)
# define __fallthrough __attribute__ ((fallthrough))
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0e66371bea13..77c6be96d676 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -106,6 +106,7 @@ enum bpf_cmd {
BPF_TASK_FD_QUERY,
BPF_MAP_LOOKUP_AND_DELETE_ELEM,
BPF_MAP_FREEZE,
+ BPF_BTF_GET_NEXT_ID,
};
enum bpf_map_type {
@@ -284,6 +285,9 @@ enum bpf_attach_type {
*/
#define BPF_F_TEST_RND_HI32 (1U << 2)
+/* The verifier internal test flag. Behavior is undefined */
+#define BPF_F_TEST_STATE_FREQ (1U << 3)
+
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* two extensions:
*
@@ -337,6 +341,9 @@ enum bpf_attach_type {
#define BPF_F_RDONLY_PROG (1U << 7)
#define BPF_F_WRONLY_PROG (1U << 8)
+/* Clone map from listener for newly accepted socket */
+#define BPF_F_CLONE (1U << 9)
+
/* flags for BPF_PROG_QUERY */
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
@@ -576,6 +583,8 @@ union bpf_attr {
* limited to five).
*
* Each time the helper is called, it appends a line to the trace.
+ * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is
+ * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this.
* The format of the trace is customizable, and the exact output
* one will get depends on the options set in
* *\/sys/kernel/debug/tracing/trace_options* (see also the
@@ -1014,7 +1023,7 @@ union bpf_attr {
* The realm of the route for the packet associated to *skb*, or 0
* if none was found.
*
- * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * int bpf_perf_event_output(struct pt_regs *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
* Description
* Write raw *data* blob into a special BPF perf event held by
* *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -1076,7 +1085,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags)
+ * int bpf_get_stackid(struct pt_regs *ctx, struct bpf_map *map, u64 flags)
* Description
* Walk a user or a kernel stack and return its id. To achieve
* this, the helper needs *ctx*, which is a pointer to the context
@@ -1725,7 +1734,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_override_return(struct pt_reg *regs, u64 rc)
+ * int bpf_override_return(struct pt_regs *regs, u64 rc)
* Description
* Used for error injection, this helper uses kprobes to override
* the return value of the probed function, and to set it to *rc*.
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index faaa5ca2a117..be328c59389d 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -16,6 +16,18 @@
#define XDP_SHARED_UMEM (1 << 0)
#define XDP_COPY (1 << 1) /* Force copy-mode */
#define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */
+/* If this option is set, the driver might go sleep and in that case
+ * the XDP_RING_NEED_WAKEUP flag in the fill and/or Tx rings will be
+ * set. If it is set, the application need to explicitly wake up the
+ * driver with a poll() (Rx and Tx) or sendto() (Tx only). If you are
+ * running the driver and the application on the same core, you should
+ * use this option so that the kernel will yield to the user space
+ * application.
+ */
+#define XDP_USE_NEED_WAKEUP (1 << 3)
+
+/* Flags for xsk_umem_config flags */
+#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0)
struct sockaddr_xdp {
__u16 sxdp_family;
@@ -25,10 +37,14 @@ struct sockaddr_xdp {
__u32 sxdp_shared_umem_fd;
};
+/* XDP_RING flags */
+#define XDP_RING_NEED_WAKEUP (1 << 0)
+
struct xdp_ring_offset {
__u64 producer;
__u64 consumer;
__u64 desc;
+ __u64 flags;
};
struct xdp_mmap_offsets {
@@ -53,6 +69,7 @@ struct xdp_umem_reg {
__u64 len; /* Length of packet data area */
__u32 chunk_size;
__u32 headroom;
+ __u32 flags;
};
struct xdp_statistics {
@@ -74,6 +91,11 @@ struct xdp_options {
#define XDP_UMEM_PGOFF_FILL_RING 0x100000000ULL
#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL
+/* Masks for unaligned chunks mode */
+#define XSK_UNALIGNED_BUF_OFFSET_SHIFT 48
+#define XSK_UNALIGNED_BUF_ADDR_MASK \
+ ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1)
+
/* Rx/Tx descriptor */
struct xdp_desc {
__u64 addr;
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 9312066a1ae3..c6f94cffe06e 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -1,9 +1,10 @@
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
# Most of this file is copied from tools/lib/traceevent/Makefile
-BPF_VERSION = 0
-BPF_PATCHLEVEL = 0
-BPF_EXTRAVERSION = 4
+LIBBPF_VERSION := $(shell \
+ grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | \
+ sort -rV | head -n1 | cut -d'_' -f2)
+LIBBPF_MAJOR_VERSION := $(firstword $(subst ., ,$(LIBBPF_VERSION)))
MAKEFLAGS += --no-print-directory
@@ -79,15 +80,9 @@ export prefix libdir src obj
libdir_SQ = $(subst ','\'',$(libdir))
libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
-VERSION = $(BPF_VERSION)
-PATCHLEVEL = $(BPF_PATCHLEVEL)
-EXTRAVERSION = $(BPF_EXTRAVERSION)
-
OBJ = $@
N =
-LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION)
-
LIB_TARGET = libbpf.a libbpf.so.$(LIBBPF_VERSION)
LIB_FILE = libbpf.a libbpf.so*
PC_FILE = libbpf.pc
@@ -113,6 +108,7 @@ override CFLAGS += -Werror -Wall
override CFLAGS += -fPIC
override CFLAGS += $(INCLUDES)
override CFLAGS += -fvisibility=hidden
+override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
ifeq ($(VERBOSE),1)
Q =
@@ -138,7 +134,9 @@ LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
PC_FILE := $(addprefix $(OUTPUT),$(PC_FILE))
GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \
- awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}')
+ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \
+ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}' | \
+ sort -u | wc -l)
VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
@@ -178,10 +176,10 @@ $(BPF_IN): force elfdep bpfdep
$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN)
- $(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(VERSION) \
+ $(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \
-Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@
@ln -sf $(@F) $(OUTPUT)libbpf.so
- @ln -sf $(@F) $(OUTPUT)libbpf.so.$(VERSION)
+ @ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)
$(OUTPUT)libbpf.a: $(BPF_IN)
$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
@@ -205,6 +203,7 @@ check_abi: $(OUTPUT)libbpf.so
"Please make sure all LIBBPF_API symbols are" \
"versioned in $(VERSION_SCRIPT)." >&2; \
readelf -s --wide $(OUTPUT)libbpf-in.o | \
+ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}'| \
sort -u > $(OUTPUT)libbpf_global_syms.tmp; \
readelf -s --wide $(OUTPUT)libbpf.so | \
@@ -257,7 +256,8 @@ config-clean:
clean:
$(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \
- *.o *~ *.a *.so *.so.$(VERSION) .*.d .*.cmd *.pc LIBBPF-CFLAGS
+ *.o *~ *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) .*.d .*.cmd \
+ *.pc LIBBPF-CFLAGS
$(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index c7d7993c44bb..cbb933532981 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -568,7 +568,7 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
return ret;
}
-int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
+static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
{
union bpf_attr attr;
int err;
@@ -576,26 +576,26 @@ int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
memset(&attr, 0, sizeof(attr));
attr.start_id = start_id;
- err = sys_bpf(BPF_PROG_GET_NEXT_ID, &attr, sizeof(attr));
+ err = sys_bpf(cmd, &attr, sizeof(attr));
if (!err)
*next_id = attr.next_id;
return err;
}
-int bpf_map_get_next_id(__u32 start_id, __u32 *next_id)
+int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
{
- union bpf_attr attr;
- int err;
-
- memset(&attr, 0, sizeof(attr));
- attr.start_id = start_id;
+ return bpf_obj_get_next_id(start_id, next_id, BPF_PROG_GET_NEXT_ID);
+}
- err = sys_bpf(BPF_MAP_GET_NEXT_ID, &attr, sizeof(attr));
- if (!err)
- *next_id = attr.next_id;
+int bpf_map_get_next_id(__u32 start_id, __u32 *next_id)
+{
+ return bpf_obj_get_next_id(start_id, next_id, BPF_MAP_GET_NEXT_ID);
+}
- return err;
+int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id)
+{
+ return bpf_obj_get_next_id(start_id, next_id, BPF_BTF_GET_NEXT_ID);
}
int bpf_prog_get_fd_by_id(__u32 id)
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index ff42ca043dc8..0db01334740f 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -156,6 +156,7 @@ LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data,
__u32 *retval, __u32 *duration);
LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
+LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id);
LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
LIBBPF_API int bpf_map_get_fd_by_id(__u32 id);
LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index f9d316e873d8..d04c7cb623ed 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -183,4 +183,10 @@ LIBBPF_0.0.4 {
perf_buffer__new;
perf_buffer__new_raw;
perf_buffer__poll;
+ xsk_umem__create;
} LIBBPF_0.0.3;
+
+LIBBPF_0.0.5 {
+ global:
+ bpf_btf_get_next_id;
+} LIBBPF_0.0.4;
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 680e63066cf3..842c4fd55859 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -74,23 +74,6 @@ struct xsk_nl_info {
int fd;
};
-/* For 32-bit systems, we need to use mmap2 as the offsets are 64-bit.
- * Unfortunately, it is not part of glibc.
- */
-static inline void *xsk_mmap(void *addr, size_t length, int prot, int flags,
- int fd, __u64 offset)
-{
-#ifdef __NR_mmap2
- unsigned int page_shift = __builtin_ffs(getpagesize()) - 1;
- long ret = syscall(__NR_mmap2, addr, length, prot, flags, fd,
- (off_t)(offset >> page_shift));
-
- return (void *)ret;
-#else
- return mmap(addr, length, prot, flags, fd, offset);
-#endif
-}
-
int xsk_umem__fd(const struct xsk_umem *umem)
{
return umem ? umem->fd : -EINVAL;
@@ -116,6 +99,7 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg,
cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+ cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
return;
}
@@ -123,6 +107,7 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg,
cfg->comp_size = usr_cfg->comp_size;
cfg->frame_size = usr_cfg->frame_size;
cfg->frame_headroom = usr_cfg->frame_headroom;
+ cfg->flags = usr_cfg->flags;
}
static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
@@ -149,9 +134,10 @@ static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
return 0;
}
-int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
- struct xsk_ring_prod *fill, struct xsk_ring_cons *comp,
- const struct xsk_umem_config *usr_config)
+int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
+ __u64 size, struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *usr_config)
{
struct xdp_mmap_offsets off;
struct xdp_umem_reg mr;
@@ -182,6 +168,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
mr.len = size;
mr.chunk_size = umem->config.frame_size;
mr.headroom = umem->config.frame_headroom;
+ mr.flags = umem->config.flags;
err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
if (err) {
@@ -210,10 +197,9 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
goto out_socket;
}
- map = xsk_mmap(NULL, off.fr.desc +
- umem->config.fill_size * sizeof(__u64),
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
- umem->fd, XDP_UMEM_PGOFF_FILL_RING);
+ map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
+ XDP_UMEM_PGOFF_FILL_RING);
if (map == MAP_FAILED) {
err = -errno;
goto out_socket;
@@ -224,13 +210,13 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
fill->size = umem->config.fill_size;
fill->producer = map + off.fr.producer;
fill->consumer = map + off.fr.consumer;
+ fill->flags = map + off.fr.flags;
fill->ring = map + off.fr.desc;
fill->cached_cons = umem->config.fill_size;
- map = xsk_mmap(NULL,
- off.cr.desc + umem->config.comp_size * sizeof(__u64),
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
- umem->fd, XDP_UMEM_PGOFF_COMPLETION_RING);
+ map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
+ XDP_UMEM_PGOFF_COMPLETION_RING);
if (map == MAP_FAILED) {
err = -errno;
goto out_mmap;
@@ -241,6 +227,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
comp->size = umem->config.comp_size;
comp->producer = map + off.cr.producer;
comp->consumer = map + off.cr.consumer;
+ comp->flags = map + off.cr.flags;
comp->ring = map + off.cr.desc;
*umem_ptr = umem;
@@ -255,6 +242,29 @@ out_umem_alloc:
return err;
}
+struct xsk_umem_config_v1 {
+ __u32 fill_size;
+ __u32 comp_size;
+ __u32 frame_size;
+ __u32 frame_headroom;
+};
+
+int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area,
+ __u64 size, struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *usr_config)
+{
+ struct xsk_umem_config config;
+
+ memcpy(&config, usr_config, sizeof(struct xsk_umem_config_v1));
+ config.flags = 0;
+
+ return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp,
+ &config);
+}
+asm(".symver xsk_umem__create_v0_0_2, xsk_umem__create@LIBBPF_0.0.2");
+asm(".symver xsk_umem__create_v0_0_4, xsk_umem__create@@LIBBPF_0.0.4");
+
static int xsk_load_xdp_prog(struct xsk_socket *xsk)
{
static const int log_buf_size = 16 * 1024;
@@ -550,11 +560,10 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
}
if (rx) {
- rx_map = xsk_mmap(NULL, off.rx.desc +
- xsk->config.rx_size * sizeof(struct xdp_desc),
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE,
- xsk->fd, XDP_PGOFF_RX_RING);
+ rx_map = mmap(NULL, off.rx.desc +
+ xsk->config.rx_size * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ xsk->fd, XDP_PGOFF_RX_RING);
if (rx_map == MAP_FAILED) {
err = -errno;
goto out_socket;
@@ -564,16 +573,16 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
rx->size = xsk->config.rx_size;
rx->producer = rx_map + off.rx.producer;
rx->consumer = rx_map + off.rx.consumer;
+ rx->flags = rx_map + off.rx.flags;
rx->ring = rx_map + off.rx.desc;
}
xsk->rx = rx;
if (tx) {
- tx_map = xsk_mmap(NULL, off.tx.desc +
- xsk->config.tx_size * sizeof(struct xdp_desc),
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE,
- xsk->fd, XDP_PGOFF_TX_RING);
+ tx_map = mmap(NULL, off.tx.desc +
+ xsk->config.tx_size * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ xsk->fd, XDP_PGOFF_TX_RING);
if (tx_map == MAP_FAILED) {
err = -errno;
goto out_mmap_rx;
@@ -583,6 +592,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
tx->size = xsk->config.tx_size;
tx->producer = tx_map + off.tx.producer;
tx->consumer = tx_map + off.tx.consumer;
+ tx->flags = tx_map + off.tx.flags;
tx->ring = tx_map + off.tx.desc;
tx->cached_cons = xsk->config.tx_size;
}
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index 833a6e60d065..584f6820a639 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -32,6 +32,7 @@ struct name { \
__u32 *producer; \
__u32 *consumer; \
void *ring; \
+ __u32 *flags; \
}
DEFINE_XSK_RING(xsk_ring_prod);
@@ -76,6 +77,11 @@ xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx)
return &descs[idx & rx->mask];
}
+static inline int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r)
+{
+ return *r->flags & XDP_RING_NEED_WAKEUP;
+}
+
static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
{
__u32 free_entries = r->cached_cons - r->cached_prod;
@@ -162,6 +168,21 @@ static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
return &((char *)umem_area)[addr];
}
+static inline __u64 xsk_umem__extract_addr(__u64 addr)
+{
+ return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
+}
+
+static inline __u64 xsk_umem__extract_offset(__u64 addr)
+{
+ return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+}
+
+static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr)
+{
+ return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr);
+}
+
LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem);
LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk);
@@ -170,12 +191,14 @@ LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk);
#define XSK_UMEM__DEFAULT_FRAME_SHIFT 12 /* 4096 bytes */
#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT)
#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0
+#define XSK_UMEM__DEFAULT_FLAGS 0
struct xsk_umem_config {
__u32 fill_size;
__u32 comp_size;
__u32 frame_size;
__u32 frame_headroom;
+ __u32 flags;
};
/* Flags for the libbpf_flags field. */
@@ -195,6 +218,16 @@ LIBBPF_API int xsk_umem__create(struct xsk_umem **umem,
struct xsk_ring_prod *fill,
struct xsk_ring_cons *comp,
const struct xsk_umem_config *config);
+LIBBPF_API int xsk_umem__create_v0_0_2(struct xsk_umem **umem,
+ void *umem_area, __u64 size,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *config);
+LIBBPF_API int xsk_umem__create_v0_0_4(struct xsk_umem **umem,
+ void *umem_area, __u64 size,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *config);
LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk,
const char *ifname, __u32 queue_id,
struct xsk_umem *umem,
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 90f70d2c7c22..60c9338cd9b4 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -42,4 +42,5 @@ xdping
test_sockopt
test_sockopt_sk
test_sockopt_multi
+test_sockopt_inherit
test_tcp_rtt
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index d69c541e2039..9eef5edf17be 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -29,7 +29,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_cgroup_storage test_select_reuseport test_section_names \
test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
test_btf_dump test_cgroup_attach xdping test_sockopt test_sockopt_sk \
- test_sockopt_multi test_tcp_rtt
+ test_sockopt_multi test_sockopt_inherit test_tcp_rtt
BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
TEST_GEN_FILES = $(BPF_OBJ_FILES)
@@ -66,7 +66,8 @@ TEST_PROGS := test_kmod.sh \
test_tcp_check_syncookie.sh \
test_tc_tunnel.sh \
test_tc_edt.sh \
- test_xdping.sh
+ test_xdping.sh \
+ test_bpftool_build.sh
TEST_PROGS_EXTENDED := with_addr.sh \
with_tunnels.sh \
@@ -115,6 +116,7 @@ $(OUTPUT)/test_cgroup_attach: cgroup_helpers.c
$(OUTPUT)/test_sockopt: cgroup_helpers.c
$(OUTPUT)/test_sockopt_sk: cgroup_helpers.c
$(OUTPUT)/test_sockopt_multi: cgroup_helpers.c
+$(OUTPUT)/test_sockopt_inherit: cgroup_helpers.c
$(OUTPUT)/test_tcp_rtt: cgroup_helpers.c
.PHONY: force
diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h
index 05f036df8a4c..fbe28008450f 100644
--- a/tools/testing/selftests/bpf/bpf_endian.h
+++ b/tools/testing/selftests/bpf/bpf_endian.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
#ifndef __BPF_ENDIAN__
#define __BPF_ENDIAN__
@@ -29,6 +29,10 @@
# define __bpf_htonl(x) __builtin_bswap32(x)
# define __bpf_constant_ntohl(x) ___constant_swab32(x)
# define __bpf_constant_htonl(x) ___constant_swab32(x)
+# define __bpf_be64_to_cpu(x) __builtin_bswap64(x)
+# define __bpf_cpu_to_be64(x) __builtin_bswap64(x)
+# define __bpf_constant_be64_to_cpu(x) ___constant_swab64(x)
+# define __bpf_constant_cpu_to_be64(x) ___constant_swab64(x)
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
# define __bpf_ntohs(x) (x)
# define __bpf_htons(x) (x)
@@ -38,6 +42,10 @@
# define __bpf_htonl(x) (x)
# define __bpf_constant_ntohl(x) (x)
# define __bpf_constant_htonl(x) (x)
+# define __bpf_be64_to_cpu(x) (x)
+# define __bpf_cpu_to_be64(x) (x)
+# define __bpf_constant_be64_to_cpu(x) (x)
+# define __bpf_constant_cpu_to_be64(x) (x)
#else
# error "Fix your compiler's __BYTE_ORDER__?!"
#endif
@@ -54,5 +62,11 @@
#define bpf_ntohl(x) \
(__builtin_constant_p(x) ? \
__bpf_constant_ntohl(x) : __bpf_ntohl(x))
+#define bpf_cpu_to_be64(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_cpu_to_be64(x) : __bpf_cpu_to_be64(x))
+#define bpf_be64_to_cpu(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_be64_to_cpu(x) : __bpf_be64_to_cpu(x))
#endif /* __BPF_ENDIAN__ */
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 8b503ea142f0..6c4930bc6e2e 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
#ifndef __BPF_HELPERS_H
#define __BPF_HELPERS_H
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index fb5840a62548..f10029821e16 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -48,16 +48,17 @@ void test_bpf_obj_id(void)
/* test_obj_id.o is a dumb prog. It should never fail
* to load.
*/
- if (err)
- error_cnt++;
- assert(!err);
+ if (CHECK_FAIL(err))
+ continue;
/* Insert a magic value to the map */
map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id");
- assert(map_fds[i] >= 0);
+ if (CHECK_FAIL(map_fds[i] < 0))
+ goto done;
err = bpf_map_update_elem(map_fds[i], &array_key,
&array_magic_value, 0);
- assert(!err);
+ if (CHECK_FAIL(err))
+ goto done;
/* Check getting map info */
info_len = sizeof(struct bpf_map_info) * 2;
@@ -96,9 +97,11 @@ void test_bpf_obj_id(void)
prog_infos[i].map_ids = ptr_to_u64(map_ids + i);
prog_infos[i].nr_map_ids = 2;
err = clock_gettime(CLOCK_REALTIME, &real_time_ts);
- assert(!err);
+ if (CHECK_FAIL(err))
+ goto done;
err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts);
- assert(!err);
+ if (CHECK_FAIL(err))
+ goto done;
err = bpf_obj_get_info_by_fd(prog_fds[i], &prog_infos[i],
&info_len);
load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec)
@@ -224,7 +227,8 @@ void test_bpf_obj_id(void)
nr_id_found++;
err = bpf_map_lookup_elem(map_fd, &array_key, &array_value);
- assert(!err);
+ if (CHECK_FAIL(err))
+ goto done;
err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
CHECK(err || info_len != sizeof(struct bpf_map_info) ||
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index 1a1eae356f81..1c01ee2600a9 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -28,8 +28,6 @@ static int check_load(const char *file, enum bpf_prog_type type)
attr.prog_flags = BPF_F_TEST_RND_HI32;
err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
bpf_object__close(obj);
- if (err)
- error_cnt++;
return err;
}
@@ -105,12 +103,7 @@ void test_bpf_verif_scale(void)
continue;
err = check_load(test->file, test->attach_type);
- if (test->fails) { /* expected to fail */
- if (err)
- error_cnt--;
- else
- error_cnt++;
- }
+ CHECK_FAIL(err && !test->fails);
}
if (env.verifier_stats)
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 6892b88ae065..92563898867c 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -344,7 +344,6 @@ struct test tests[] = {
.tcp.dest = 8080,
},
.keys = {
- .nhoff = 0,
.nhoff = ETH_HLEN,
.thoff = ETH_HLEN + sizeof(struct iphdr) +
sizeof(struct iphdr),
@@ -452,10 +451,8 @@ void test_flow_dissector(void)
err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector",
"jmp_table", "last_dissection", &prog_fd, &keys_fd);
- if (err) {
- error_cnt++;
+ if (CHECK_FAIL(err))
return;
- }
for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct bpf_flow_keys flow_keys;
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
index 3d59b3c841fe..eba9a970703b 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -135,10 +135,7 @@ void test_get_stack_raw_tp(void)
exp_cnt -= err;
}
- goto close_prog_noerr;
close_prog:
- error_cnt++;
-close_prog_noerr:
if (!IS_ERR_OR_NULL(link))
bpf_link__destroy(link);
if (!IS_ERR_OR_NULL(pb))
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c
index d011079fb0bf..c680926fce73 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -7,10 +7,8 @@ static void test_global_data_number(struct bpf_object *obj, __u32 duration)
uint64_t num;
map_fd = bpf_find_map(__func__, obj, "result_number");
- if (map_fd < 0) {
- error_cnt++;
+ if (CHECK_FAIL(map_fd < 0))
return;
- }
struct {
char *name;
@@ -44,10 +42,8 @@ static void test_global_data_string(struct bpf_object *obj, __u32 duration)
char str[32];
map_fd = bpf_find_map(__func__, obj, "result_string");
- if (map_fd < 0) {
- error_cnt++;
+ if (CHECK_FAIL(map_fd < 0))
return;
- }
struct {
char *name;
@@ -81,10 +77,8 @@ static void test_global_data_struct(struct bpf_object *obj, __u32 duration)
struct foo val;
map_fd = bpf_find_map(__func__, obj, "result_struct");
- if (map_fd < 0) {
- error_cnt++;
+ if (CHECK_FAIL(map_fd < 0))
return;
- }
struct {
char *name;
@@ -112,16 +106,12 @@ static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
__u8 *buff;
map = bpf_object__find_map_by_name(obj, "test_glo.rodata");
- if (!map || !bpf_map__is_internal(map)) {
- error_cnt++;
+ if (CHECK_FAIL(!map || !bpf_map__is_internal(map)))
return;
- }
map_fd = bpf_map__fd(map);
- if (map_fd < 0) {
- error_cnt++;
+ if (CHECK_FAIL(map_fd < 0))
return;
- }
buff = malloc(bpf_map__def(map)->value_size);
if (buff)
diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
index 20ddca830e68..eaf64595be88 100644
--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
+++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
@@ -30,10 +30,8 @@ static void test_l4lb(const char *file)
u32 *magic = (u32 *)buf;
err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
- if (err) {
- error_cnt++;
+ if (CHECK_FAIL(err))
return;
- }
map_fd = bpf_find_map(__func__, obj, "vip_map");
if (map_fd < 0)
@@ -72,10 +70,9 @@ static void test_l4lb(const char *file)
bytes += stats[i].bytes;
pkts += stats[i].pkts;
}
- if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
- error_cnt++;
+ if (CHECK_FAIL(bytes != MAGIC_BYTES * NUM_ITER * 2 ||
+ pkts != NUM_ITER * 2))
printf("test_l4lb:FAIL:stats %lld %lld\n", bytes, pkts);
- }
out:
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c
index ee99368c595c..8f91f1881d11 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c
@@ -8,14 +8,12 @@ static void *parallel_map_access(void *arg)
for (i = 0; i < 10000; i++) {
err = bpf_map_lookup_elem_flags(map_fd, &key, vars, BPF_F_LOCK);
- if (err) {
+ if (CHECK_FAIL(err)) {
printf("lookup failed\n");
- error_cnt++;
goto out;
}
- if (vars[0] != 0) {
+ if (CHECK_FAIL(vars[0] != 0)) {
printf("lookup #%d var[0]=%d\n", i, vars[0]);
- error_cnt++;
goto out;
}
rnd = vars[1];
@@ -24,7 +22,7 @@ static void *parallel_map_access(void *arg)
continue;
printf("lookup #%d var[1]=%d var[%d]=%d\n",
i, rnd, j, vars[j]);
- error_cnt++;
+ CHECK_FAIL(vars[j] != rnd);
goto out;
}
}
@@ -42,34 +40,36 @@ void test_map_lock(void)
void *ret;
err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
- if (err) {
+ if (CHECK_FAIL(err)) {
printf("test_map_lock:bpf_prog_load errno %d\n", errno);
goto close_prog;
}
map_fd[0] = bpf_find_map(__func__, obj, "hash_map");
- if (map_fd[0] < 0)
+ if (CHECK_FAIL(map_fd[0] < 0))
goto close_prog;
map_fd[1] = bpf_find_map(__func__, obj, "array_map");
- if (map_fd[1] < 0)
+ if (CHECK_FAIL(map_fd[1] < 0))
goto close_prog;
bpf_map_update_elem(map_fd[0], &key, vars, BPF_F_LOCK);
for (i = 0; i < 4; i++)
- assert(pthread_create(&thread_id[i], NULL,
- &spin_lock_thread, &prog_fd) == 0);
+ if (CHECK_FAIL(pthread_create(&thread_id[i], NULL,
+ &spin_lock_thread, &prog_fd)))
+ goto close_prog;
for (i = 4; i < 6; i++)
- assert(pthread_create(&thread_id[i], NULL,
- &parallel_map_access, &map_fd[i - 4]) == 0);
+ if (CHECK_FAIL(pthread_create(&thread_id[i], NULL,
+ &parallel_map_access,
+ &map_fd[i - 4])))
+ goto close_prog;
for (i = 0; i < 4; i++)
- assert(pthread_join(thread_id[i], &ret) == 0 &&
- ret == (void *)&prog_fd);
+ if (CHECK_FAIL(pthread_join(thread_id[i], &ret) ||
+ ret != (void *)&prog_fd))
+ goto close_prog;
for (i = 4; i < 6; i++)
- assert(pthread_join(thread_id[i], &ret) == 0 &&
- ret == (void *)&map_fd[i - 4]);
- goto close_prog_noerr;
+ if (CHECK_FAIL(pthread_join(thread_id[i], &ret) ||
+ ret != (void *)&map_fd[i - 4]))
+ goto close_prog;
close_prog:
- error_cnt++;
-close_prog_noerr:
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
index 4ecfd721a044..a2537dfa899c 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
@@ -9,10 +9,8 @@ void test_pkt_access(void)
int err, prog_fd;
err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
- if (err) {
- error_cnt++;
+ if (CHECK_FAIL(err))
return;
- }
err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
NULL, NULL, &retval, &duration);
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
index ac0d43435806..5f7aea605019 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
@@ -9,10 +9,8 @@ void test_pkt_md_access(void)
int err, prog_fd;
err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
- if (err) {
- error_cnt++;
+ if (CHECK_FAIL(err))
return;
- }
err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4),
NULL, NULL, &retval, &duration);
diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
index e60cd5ff1f55..faccc66f4e39 100644
--- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
@@ -27,10 +27,8 @@ static void test_queue_stack_map_by_type(int type)
return;
err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
- if (err) {
- error_cnt++;
+ if (CHECK_FAIL(err))
return;
- }
map_in_fd = bpf_find_map(__func__, obj, "map_in");
if (map_in_fd < 0)
@@ -43,10 +41,8 @@ static void test_queue_stack_map_by_type(int type)
/* Push 32 elements to the input map */
for (i = 0; i < MAP_SIZE; i++) {
err = bpf_map_update_elem(map_in_fd, NULL, &vals[i], 0);
- if (err) {
- error_cnt++;
+ if (CHECK_FAIL(err))
goto out;
- }
}
/* The eBPF program pushes iph.saddr in the output map,
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
index 4a4f428d1a78..5c78e2b5a917 100644
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -10,10 +10,8 @@ void test_reference_tracking(void)
int err = 0;
obj = bpf_object__open(file);
- if (IS_ERR(obj)) {
- error_cnt++;
+ if (CHECK_FAIL(IS_ERR(obj)))
return;
- }
bpf_object__for_each_program(prog, obj) {
const char *title;
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 1575f0a1f586..b607112c64e7 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -8,7 +8,7 @@ static void sigusr1_handler(int signum)
sigusr1_received++;
}
-static int test_send_signal_common(struct perf_event_attr *attr,
+static void test_send_signal_common(struct perf_event_attr *attr,
int prog_type,
const char *test_name)
{
@@ -23,13 +23,13 @@ static int test_send_signal_common(struct perf_event_attr *attr,
if (CHECK(pipe(pipe_c2p), test_name,
"pipe pipe_c2p error: %s\n", strerror(errno)))
- goto no_fork_done;
+ return;
if (CHECK(pipe(pipe_p2c), test_name,
"pipe pipe_p2c error: %s\n", strerror(errno))) {
close(pipe_c2p[0]);
close(pipe_c2p[1]);
- goto no_fork_done;
+ return;
}
pid = fork();
@@ -38,7 +38,7 @@ static int test_send_signal_common(struct perf_event_attr *attr,
close(pipe_c2p[1]);
close(pipe_p2c[0]);
close(pipe_p2c[1]);
- goto no_fork_done;
+ return;
}
if (pid == 0) {
@@ -125,7 +125,7 @@ static int test_send_signal_common(struct perf_event_attr *attr,
goto disable_pmu;
}
- err = CHECK(buf[0] != '2', test_name, "incorrect result\n");
+ CHECK(buf[0] != '2', test_name, "incorrect result\n");
/* notify child safe to exit */
write(pipe_p2c[1], buf, 1);
@@ -138,11 +138,9 @@ prog_load_failure:
close(pipe_c2p[0]);
close(pipe_p2c[1]);
wait(NULL);
-no_fork_done:
- return err;
}
-static int test_send_signal_tracepoint(void)
+static void test_send_signal_tracepoint(void)
{
const char *id_path = "/sys/kernel/debug/tracing/events/syscalls/sys_enter_nanosleep/id";
struct perf_event_attr attr = {
@@ -159,21 +157,21 @@ static int test_send_signal_tracepoint(void)
if (CHECK(efd < 0, "tracepoint",
"open syscalls/sys_enter_nanosleep/id failure: %s\n",
strerror(errno)))
- return -1;
+ return;
bytes = read(efd, buf, sizeof(buf));
close(efd);
if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "tracepoint",
"read syscalls/sys_enter_nanosleep/id failure: %s\n",
strerror(errno)))
- return -1;
+ return;
attr.config = strtol(buf, NULL, 0);
- return test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint");
+ test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint");
}
-static int test_send_signal_perf(void)
+static void test_send_signal_perf(void)
{
struct perf_event_attr attr = {
.sample_period = 1,
@@ -181,11 +179,11 @@ static int test_send_signal_perf(void)
.config = PERF_COUNT_SW_CPU_CLOCK,
};
- return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT,
- "perf_sw_event");
+ test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT,
+ "perf_sw_event");
}
-static int test_send_signal_nmi(void)
+static void test_send_signal_nmi(void)
{
struct perf_event_attr attr = {
.sample_freq = 50,
@@ -204,25 +202,24 @@ static int test_send_signal_nmi(void)
if (errno == ENOENT) {
printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n",
__func__);
- return 0;
+ test__skip();
+ return;
}
/* Let the test fail with a more informative message */
} else {
close(pmu_fd);
}
- return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT,
- "perf_hw_event");
+ test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT,
+ "perf_hw_event");
}
void test_send_signal(void)
{
- int ret = 0;
-
if (test__start_subtest("send_signal_tracepoint"))
- ret |= test_send_signal_tracepoint();
+ test_send_signal_tracepoint();
if (test__start_subtest("send_signal_perf"))
- ret |= test_send_signal_perf();
+ test_send_signal_perf();
if (test__start_subtest("send_signal_nmi"))
- ret |= test_send_signal_nmi();
+ test_send_signal_nmi();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c
index 114ebe6a438e..1ae00cd3174e 100644
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c
@@ -11,19 +11,19 @@ void test_spinlock(void)
void *ret;
err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
- if (err) {
+ if (CHECK_FAIL(err)) {
printf("test_spin_lock:bpf_prog_load errno %d\n", errno);
goto close_prog;
}
for (i = 0; i < 4; i++)
- assert(pthread_create(&thread_id[i], NULL,
- &spin_lock_thread, &prog_fd) == 0);
+ if (CHECK_FAIL(pthread_create(&thread_id[i], NULL,
+ &spin_lock_thread, &prog_fd)))
+ goto close_prog;
+
for (i = 0; i < 4; i++)
- assert(pthread_join(thread_id[i], &ret) == 0 &&
- ret == (void *)&prog_fd);
- goto close_prog_noerr;
+ if (CHECK_FAIL(pthread_join(thread_id[i], &ret) ||
+ ret != (void *)&prog_fd))
+ goto close_prog;
close_prog:
- error_cnt++;
-close_prog_noerr:
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
index ac44fda84833..d841dced971f 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
@@ -51,9 +51,10 @@ retry:
"err %d errno %d\n", err, errno))
goto disable_pmu;
- assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
- == 0);
- assert(system("./urandom_read") == 0);
+ if (CHECK_FAIL(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")))
+ goto disable_pmu;
+ if (CHECK_FAIL(system("./urandom_read")))
+ goto disable_pmu;
/* disable stack trace collection */
key = 0;
val = 1;
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 9557b7dfb782..f62aa0eb959b 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -82,9 +82,10 @@ retry:
"err %d errno %d\n", err, errno))
goto disable_pmu;
- assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
- == 0);
- assert(system("taskset 0x1 ./urandom_read 100000") == 0);
+ if (CHECK_FAIL(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")))
+ goto disable_pmu;
+ if (CHECK_FAIL(system("taskset 0x1 ./urandom_read 100000")))
+ goto disable_pmu;
/* disable stack trace collection */
key = 0;
val = 1;
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
index fc539335c5b3..37269d23df93 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
@@ -26,19 +26,19 @@ void test_stacktrace_map(void)
/* find map fds */
control_map_fd = bpf_find_map(__func__, obj, "control_map");
- if (control_map_fd < 0)
+ if (CHECK_FAIL(control_map_fd < 0))
goto disable_pmu;
stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
- if (stackid_hmap_fd < 0)
+ if (CHECK_FAIL(stackid_hmap_fd < 0))
goto disable_pmu;
stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
- if (stackmap_fd < 0)
+ if (CHECK_FAIL(stackmap_fd < 0))
goto disable_pmu;
stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
- if (stack_amap_fd < 0)
+ if (CHECK_FAIL(stack_amap_fd < 0))
goto disable_pmu;
/* give some time for bpf program run */
@@ -55,23 +55,20 @@ void test_stacktrace_map(void)
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu_noerr;
+ goto disable_pmu;
err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu_noerr;
+ goto disable_pmu;
stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
"err %d errno %d\n", err, errno))
- goto disable_pmu_noerr;
+ goto disable_pmu;
- goto disable_pmu_noerr;
disable_pmu:
- error_cnt++;
-disable_pmu_noerr:
bpf_link__destroy(link);
close_prog:
bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
index fbfa8e76cf63..404a5498e1a3 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
@@ -26,15 +26,15 @@ void test_stacktrace_map_raw_tp(void)
/* find map fds */
control_map_fd = bpf_find_map(__func__, obj, "control_map");
- if (control_map_fd < 0)
+ if (CHECK_FAIL(control_map_fd < 0))
goto close_prog;
stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
- if (stackid_hmap_fd < 0)
+ if (CHECK_FAIL(stackid_hmap_fd < 0))
goto close_prog;
stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
- if (stackmap_fd < 0)
+ if (CHECK_FAIL(stackmap_fd < 0))
goto close_prog;
/* give some time for bpf program run */
@@ -58,10 +58,7 @@ void test_stacktrace_map_raw_tp(void)
"err %d errno %d\n", err, errno))
goto close_prog;
- goto close_prog_noerr;
close_prog:
- error_cnt++;
-close_prog_noerr:
if (!IS_ERR_OR_NULL(link))
bpf_link__destroy(link);
bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c
index 958a3d88de99..1bdc1d86a50c 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c
@@ -70,9 +70,6 @@ void test_task_fd_query_rawtp(void)
if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
goto close_prog;
- goto close_prog_noerr;
close_prog:
- error_cnt++;
-close_prog_noerr:
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
index f9b70e81682b..3f131b8fe328 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
@@ -62,14 +62,9 @@ static void test_task_fd_query_tp_core(const char *probe_name,
fd_type, buf))
goto close_pmu;
- close(pmu_fd);
- goto close_prog_noerr;
-
close_pmu:
close(pmu_fd);
close_prog:
- error_cnt++;
-close_prog_noerr:
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
index bb8759d69099..594307dffd13 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
@@ -10,10 +10,8 @@ void test_tcp_estats(void)
err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
CHECK(err, "", "err %d errno %d\n", err, errno);
- if (err) {
- error_cnt++;
+ if (err)
return;
- }
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp.c b/tools/testing/selftests/bpf/prog_tests/xdp.c
index a74167289545..dcb5ecac778e 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp.c
@@ -16,10 +16,8 @@ void test_xdp(void)
int err, prog_fd, map_fd;
err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
- if (err) {
- error_cnt++;
+ if (CHECK_FAIL(err))
return;
- }
map_fd = bpf_find_map(__func__, obj, "vip2tnl");
if (map_fd < 0)
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index 922aa0a19764..3744196d7cba 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -10,10 +10,8 @@ void test_xdp_adjust_tail(void)
int err, prog_fd;
err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
- if (err) {
- error_cnt++;
+ if (CHECK_FAIL(err))
return;
- }
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
index 15f7c272edb0..c9404e6b226e 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
@@ -31,10 +31,8 @@ void test_xdp_noinline(void)
u32 *magic = (u32 *)buf;
err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
- if (err) {
- error_cnt++;
+ if (CHECK_FAIL(err))
return;
- }
map_fd = bpf_find_map(__func__, obj, "vip_map");
if (map_fd < 0)
@@ -73,8 +71,8 @@ void test_xdp_noinline(void)
bytes += stats[i].bytes;
pkts += stats[i].pkts;
}
- if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
- error_cnt++;
+ if (CHECK_FAIL(bytes != MAGIC_BYTES * NUM_ITER * 2 ||
+ pkts != NUM_ITER * 2)) {
printf("test_xdp_noinline:FAIL:stats %lld %lld\n",
bytes, pkts);
}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_inherit.c b/tools/testing/selftests/bpf/progs/sockopt_inherit.c
new file mode 100644
index 000000000000..dede0fcd6102
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sockopt_inherit.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1;
+
+#define SOL_CUSTOM 0xdeadbeef
+#define CUSTOM_INHERIT1 0
+#define CUSTOM_INHERIT2 1
+#define CUSTOM_LISTENER 2
+
+struct sockopt_inherit {
+ __u8 val;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE);
+ __type(key, int);
+ __type(value, struct sockopt_inherit);
+} cloned1_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE);
+ __type(key, int);
+ __type(value, struct sockopt_inherit);
+} cloned2_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct sockopt_inherit);
+} listener_only_map SEC(".maps");
+
+static __inline struct sockopt_inherit *get_storage(struct bpf_sockopt *ctx)
+{
+ if (ctx->optname == CUSTOM_INHERIT1)
+ return bpf_sk_storage_get(&cloned1_map, ctx->sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ else if (ctx->optname == CUSTOM_INHERIT2)
+ return bpf_sk_storage_get(&cloned2_map, ctx->sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ else
+ return bpf_sk_storage_get(&listener_only_map, ctx->sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+}
+
+SEC("cgroup/getsockopt")
+int _getsockopt(struct bpf_sockopt *ctx)
+{
+ __u8 *optval_end = ctx->optval_end;
+ struct sockopt_inherit *storage;
+ __u8 *optval = ctx->optval;
+
+ if (ctx->level != SOL_CUSTOM)
+ return 1; /* only interested in SOL_CUSTOM */
+
+ if (optval + 1 > optval_end)
+ return 0; /* EPERM, bounds check */
+
+ storage = get_storage(ctx);
+ if (!storage)
+ return 0; /* EPERM, couldn't get sk storage */
+
+ ctx->retval = 0; /* Reset system call return value to zero */
+
+ optval[0] = storage->val;
+ ctx->optlen = 1;
+
+ return 1;
+}
+
+SEC("cgroup/setsockopt")
+int _setsockopt(struct bpf_sockopt *ctx)
+{
+ __u8 *optval_end = ctx->optval_end;
+ struct sockopt_inherit *storage;
+ __u8 *optval = ctx->optval;
+
+ if (ctx->level != SOL_CUSTOM)
+ return 1; /* only interested in SOL_CUSTOM */
+
+ if (optval + 1 > optval_end)
+ return 0; /* EPERM, bounds check */
+
+ storage = get_storage(ctx);
+ if (!storage)
+ return 0; /* EPERM, couldn't get sk storage */
+
+ storage->val = optval[0];
+ ctx->optlen = -1;
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
index a334a0e882e4..41a3ebcd593d 100644
--- a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
+++ b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
@@ -12,10 +12,6 @@
#define SR6_FLAG_ALERT (1 << 4)
-#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
- 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
-#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
- 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
#define BPF_PACKET_HEADER __attribute__((packed))
struct ip6_t {
@@ -276,8 +272,8 @@ int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh)
return 0;
// check if egress TLV value is correct
- if (ntohll(egr_addr.hi) == 0xfd00000000000000 &&
- ntohll(egr_addr.lo) == 0x4)
+ if (bpf_be64_to_cpu(egr_addr.hi) == 0xfd00000000000000 &&
+ bpf_be64_to_cpu(egr_addr.lo) == 0x4)
return 1;
}
@@ -308,8 +304,8 @@ int __encap_srh(struct __sk_buff *skb)
#pragma clang loop unroll(full)
for (unsigned long long lo = 0; lo < 4; lo++) {
- seg->lo = htonll(4 - lo);
- seg->hi = htonll(hi);
+ seg->lo = bpf_cpu_to_be64(4 - lo);
+ seg->hi = bpf_cpu_to_be64(hi);
seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg));
}
@@ -349,8 +345,8 @@ int __add_egr_x(struct __sk_buff *skb)
if (err)
return BPF_DROP;
- addr.lo = htonll(lo);
- addr.hi = htonll(hi);
+ addr.lo = bpf_cpu_to_be64(lo);
+ addr.hi = bpf_cpu_to_be64(hi);
err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
(void *)&addr, sizeof(addr));
if (err)
diff --git a/tools/testing/selftests/bpf/progs/test_seg6_loop.c b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
index 1dbe1d4d467e..c4d104428643 100644
--- a/tools/testing/selftests/bpf/progs/test_seg6_loop.c
+++ b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
@@ -12,10 +12,6 @@
#define SR6_FLAG_ALERT (1 << 4)
-#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
- 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
-#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
- 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
#define BPF_PACKET_HEADER __attribute__((packed))
struct ip6_t {
@@ -251,8 +247,8 @@ int __add_egr_x(struct __sk_buff *skb)
if (err)
return BPF_DROP;
- addr.lo = htonll(lo);
- addr.hi = htonll(hi);
+ addr.lo = bpf_cpu_to_be64(lo);
+ addr.hi = bpf_cpu_to_be64(hi);
err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
(void *)&addr, sizeof(addr));
if (err)
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
new file mode 100755
index 000000000000..4ba5a34bff56
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+ERROR=0
+TMPDIR=
+
+# If one build fails, continue but return non-0 on exit.
+return_value() {
+ if [ -d "$TMPDIR" ] ; then
+ rm -rf -- $TMPDIR
+ fi
+ exit $ERROR
+}
+trap return_value EXIT
+
+case $1 in
+ -h|--help)
+ echo -e "$0 [-j <n>]"
+ echo -e "\tTest the different ways of building bpftool."
+ echo -e ""
+ echo -e "\tOptions:"
+ echo -e "\t\t-j <n>:\tPass -j flag to 'make'."
+ exit
+ ;;
+esac
+
+J=$*
+
+# Assume script is located under tools/testing/selftests/bpf/. We want to start
+# build attempts from the top of kernel repository.
+SCRIPT_REL_PATH=$(realpath --relative-to=$PWD $0)
+SCRIPT_REL_DIR=$(dirname $SCRIPT_REL_PATH)
+KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../)
+cd $KDIR_ROOT_DIR
+
+check() {
+ local dir=$(realpath $1)
+
+ echo -n "binary: "
+ # Returns non-null if file is found (and "false" is run)
+ find $dir -type f -executable -name bpftool -print -exec false {} + && \
+ ERROR=1 && printf "FAILURE: Did not find bpftool\n"
+}
+
+make_and_clean() {
+ echo -e "\$PWD: $PWD"
+ echo -e "command: make -s $* >/dev/null"
+ make $J -s $* >/dev/null
+ if [ $? -ne 0 ] ; then
+ ERROR=1
+ fi
+ if [ $# -ge 1 ] ; then
+ check ${@: -1}
+ else
+ check .
+ fi
+ (
+ if [ $# -ge 1 ] ; then
+ cd ${@: -1}
+ fi
+ make -s clean
+ )
+ echo
+}
+
+make_with_tmpdir() {
+ local ARGS
+
+ TMPDIR=$(mktemp -d)
+ if [ $# -ge 2 ] ; then
+ ARGS=${@:1:(($# - 1))}
+ fi
+ echo -e "\$PWD: $PWD"
+ echo -e "command: make -s $ARGS ${@: -1}=$TMPDIR/ >/dev/null"
+ make $J -s $ARGS ${@: -1}=$TMPDIR/ >/dev/null
+ if [ $? -ne 0 ] ; then
+ ERROR=1
+ fi
+ check $TMPDIR
+ rm -rf -- $TMPDIR
+ echo
+}
+
+echo "Trying to build bpftool"
+echo -e "... through kbuild\n"
+
+if [ -f ".config" ] ; then
+ make_and_clean tools/bpf
+
+ ## $OUTPUT is overwritten in kbuild Makefile, and thus cannot be passed
+ ## down from toplevel Makefile to bpftool's Makefile.
+
+ # make_with_tmpdir tools/bpf OUTPUT
+ echo -e "skip: make tools/bpf OUTPUT=<dir> (not supported)\n"
+
+ make_with_tmpdir tools/bpf O
+else
+ echo -e "skip: make tools/bpf (no .config found)\n"
+ echo -e "skip: make tools/bpf OUTPUT=<dir> (not supported)\n"
+ echo -e "skip: make tools/bpf O=<dir> (no .config found)\n"
+fi
+
+echo -e "... from kernel source tree\n"
+
+make_and_clean -C tools/bpf/bpftool
+
+make_with_tmpdir -C tools/bpf/bpftool OUTPUT
+
+make_with_tmpdir -C tools/bpf/bpftool O
+
+echo -e "... from tools/\n"
+cd tools/
+
+make_and_clean bpf
+
+## In tools/bpf/Makefile, function "descend" is called and passes $(O) and
+## $(OUTPUT). We would like $(OUTPUT) to have "bpf/bpftool/" appended before
+## calling bpftool's Makefile, but this is not the case as the "descend"
+## function focuses on $(O)/$(subdir). However, in the present case, updating
+## $(O) to have $(OUTPUT) recomputed from it in bpftool's Makefile does not
+## work, because $(O) is not defined from command line and $(OUTPUT) is not
+## updated in tools/scripts/Makefile.include.
+##
+## Workarounds would require to a) edit "descend" or use an alternative way to
+## call bpftool's Makefile, b) modify the conditions to update $(OUTPUT) and
+## other variables in tools/scripts/Makefile.include (at the risk of breaking
+## the build of other tools), or c) append manually the "bpf/bpftool" suffix to
+## $(OUTPUT) in bpf's Makefile, which may break if targets for other directories
+## use "descend" in the future.
+
+# make_with_tmpdir bpf OUTPUT
+echo -e "skip: make bpf OUTPUT=<dir> (not supported)\n"
+
+make_with_tmpdir bpf O
+
+echo -e "... from bpftool's dir\n"
+cd bpf/bpftool
+
+make_and_clean
+
+make_with_tmpdir OUTPUT
+
+make_with_tmpdir O
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 425f9ed27c3b..15a666329a34 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -1353,7 +1353,7 @@ try:
bpftool_prog_list_wait(expected=1)
ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
- fail(ifnameB != simB1['ifname'], "program not bound to originial device")
+ fail(ifnameB != simB1['ifname'], "program not bound to original device")
simB1.remove()
bpftool_prog_list_wait(expected=1)
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 12895d03d58b..e8616e778cb5 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -8,22 +8,20 @@
/* defined in test_progs.h */
struct test_env env;
-int error_cnt, pass_cnt;
struct prog_test_def {
const char *test_name;
int test_num;
void (*run_test)(void);
bool force_log;
- int pass_cnt;
int error_cnt;
+ int skip_cnt;
bool tested;
const char *subtest_name;
int subtest_num;
/* store counts before subtest started */
- int old_pass_cnt;
int old_error_cnt;
};
@@ -47,6 +45,7 @@ static void dump_test_log(const struct prog_test_def *test, bool failed)
if (env.verbose || test->force_log || failed) {
if (env.log_cnt) {
+ env.log_buf[env.log_cnt] = '\0';
fprintf(env.stdout, "%s", env.log_buf);
if (env.log_buf[env.log_cnt - 1] != '\n')
fprintf(env.stdout, "\n");
@@ -56,15 +55,24 @@ static void dump_test_log(const struct prog_test_def *test, bool failed)
fseeko(stdout, 0, SEEK_SET); /* rewind */
}
+static void skip_account(void)
+{
+ if (env.test->skip_cnt) {
+ env.skip_cnt++;
+ env.test->skip_cnt = 0;
+ }
+}
+
void test__end_subtest()
{
struct prog_test_def *test = env.test;
- int sub_error_cnt = error_cnt - test->old_error_cnt;
+ int sub_error_cnt = test->error_cnt - test->old_error_cnt;
if (sub_error_cnt)
env.fail_cnt++;
else
env.sub_succ_cnt++;
+ skip_account();
dump_test_log(test, sub_error_cnt);
@@ -95,8 +103,7 @@ bool test__start_subtest(const char *name)
return false;
test->subtest_name = name;
- env.test->old_pass_cnt = pass_cnt;
- env.test->old_error_cnt = error_cnt;
+ env.test->old_error_cnt = env.test->error_cnt;
return true;
}
@@ -105,6 +112,16 @@ void test__force_log() {
env.test->force_log = true;
}
+void test__skip(void)
+{
+ env.test->skip_cnt++;
+}
+
+void test__fail(void)
+{
+ env.test->error_cnt++;
+}
+
struct ipv4_packet pkt_v4 = {
.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
.iph.ihl = 5,
@@ -129,7 +146,7 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name)
map = bpf_object__find_map_by_name(obj, name);
if (!map) {
printf("%s:FAIL:map '%s' not found\n", test, name);
- error_cnt++;
+ test__fail();
return -1;
}
return bpf_map__fd(map);
@@ -488,8 +505,6 @@ int main(int argc, char **argv)
stdio_hijack();
for (i = 0; i < prog_test_cnt; i++) {
struct prog_test_def *test = &prog_test_defs[i];
- int old_pass_cnt = pass_cnt;
- int old_error_cnt = error_cnt;
env.test = test;
test->test_num = i + 1;
@@ -504,12 +519,11 @@ int main(int argc, char **argv)
test__end_subtest();
test->tested = true;
- test->pass_cnt = pass_cnt - old_pass_cnt;
- test->error_cnt = error_cnt - old_error_cnt;
if (test->error_cnt)
env.fail_cnt++;
else
env.succ_cnt++;
+ skip_account();
dump_test_log(test, test->error_cnt);
@@ -518,11 +532,11 @@ int main(int argc, char **argv)
test->error_cnt ? "FAIL" : "OK");
}
stdio_restore();
- printf("Summary: %d/%d PASSED, %d FAILED\n",
- env.succ_cnt, env.sub_succ_cnt, env.fail_cnt);
+ printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
+ env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
free(env.test_selector.num_set);
free(env.subtest_selector.num_set);
- return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
+ return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
}
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 37d427f5a1e5..c8edb9464ba6 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -38,8 +38,6 @@ typedef __u16 __sum16;
#include "trace_helpers.h"
#include "flow_dissector_load.h"
-struct prog_test_def;
-
struct test_selector {
const char *name;
bool *num_set;
@@ -64,14 +62,15 @@ struct test_env {
int succ_cnt; /* successful tests */
int sub_succ_cnt; /* successful sub-tests */
int fail_cnt; /* total failed tests + sub-tests */
+ int skip_cnt; /* skipped tests */
};
-extern int error_cnt;
-extern int pass_cnt;
extern struct test_env env;
extern void test__force_log();
extern bool test__start_subtest(const char *name);
+extern void test__skip(void);
+extern void test__fail(void);
#define MAGIC_BYTES 123
@@ -94,17 +93,25 @@ extern struct ipv6_packet pkt_v6;
#define _CHECK(condition, tag, duration, format...) ({ \
int __ret = !!(condition); \
if (__ret) { \
- error_cnt++; \
+ test__fail(); \
printf("%s:FAIL:%s ", __func__, tag); \
printf(format); \
} else { \
- pass_cnt++; \
printf("%s:PASS:%s %d nsec\n", \
__func__, tag, duration); \
} \
__ret; \
})
+#define CHECK_FAIL(condition) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ test__fail(); \
+ printf("%s:FAIL:%d\n", __func__, __LINE__); \
+ } \
+ __ret; \
+})
+
#define CHECK(condition, tag, format...) \
_CHECK(condition, tag, duration, format)
#define CHECK_ATTR(condition, tag, format...) \
diff --git a/tools/testing/selftests/bpf/test_sockopt_inherit.c b/tools/testing/selftests/bpf/test_sockopt_inherit.c
new file mode 100644
index 000000000000..1bf699815b9b
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockopt_inherit.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <pthread.h>
+
+#include <linux/filter.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+#define CG_PATH "/sockopt_inherit"
+#define SOL_CUSTOM 0xdeadbeef
+#define CUSTOM_INHERIT1 0
+#define CUSTOM_INHERIT2 1
+#define CUSTOM_LISTENER 2
+
+static int connect_to_server(int server_fd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int fd;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd < 0) {
+ log_err("Failed to create client socket");
+ return -1;
+ }
+
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+ log_err("Failed to get server addr");
+ goto out;
+ }
+
+ if (connect(fd, (const struct sockaddr *)&addr, len) < 0) {
+ log_err("Fail to connect to server");
+ goto out;
+ }
+
+ return fd;
+
+out:
+ close(fd);
+ return -1;
+}
+
+static int verify_sockopt(int fd, int optname, const char *msg, char expected)
+{
+ socklen_t optlen = 1;
+ char buf = 0;
+ int err;
+
+ err = getsockopt(fd, SOL_CUSTOM, optname, &buf, &optlen);
+ if (err) {
+ log_err("%s: failed to call getsockopt", msg);
+ return 1;
+ }
+
+ printf("%s %d: got=0x%x ? expected=0x%x\n", msg, optname, buf, expected);
+
+ if (buf != expected) {
+ log_err("%s: unexpected getsockopt value %d != %d", msg,
+ buf, expected);
+ return 1;
+ }
+
+ return 0;
+}
+
+static void *server_thread(void *arg)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int fd = *(int *)arg;
+ int client_fd;
+ int err = 0;
+
+ if (listen(fd, 1) < 0)
+ error(1, errno, "Failed to listed on socket");
+
+ err += verify_sockopt(fd, CUSTOM_INHERIT1, "listen", 1);
+ err += verify_sockopt(fd, CUSTOM_INHERIT2, "listen", 1);
+ err += verify_sockopt(fd, CUSTOM_LISTENER, "listen", 1);
+
+ client_fd = accept(fd, (struct sockaddr *)&addr, &len);
+ if (client_fd < 0)
+ error(1, errno, "Failed to accept client");
+
+ err += verify_sockopt(client_fd, CUSTOM_INHERIT1, "accept", 1);
+ err += verify_sockopt(client_fd, CUSTOM_INHERIT2, "accept", 1);
+ err += verify_sockopt(client_fd, CUSTOM_LISTENER, "accept", 0);
+
+ close(client_fd);
+
+ return (void *)(long)err;
+}
+
+static int start_server(void)
+{
+ struct sockaddr_in addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ };
+ char buf;
+ int err;
+ int fd;
+ int i;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd < 0) {
+ log_err("Failed to create server socket");
+ return -1;
+ }
+
+ for (i = CUSTOM_INHERIT1; i <= CUSTOM_LISTENER; i++) {
+ buf = 0x01;
+ err = setsockopt(fd, SOL_CUSTOM, i, &buf, 1);
+ if (err) {
+ log_err("Failed to call setsockopt(%d)", i);
+ close(fd);
+ return -1;
+ }
+ }
+
+ if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
+ log_err("Failed to bind socket");
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
+{
+ enum bpf_attach_type attach_type;
+ enum bpf_prog_type prog_type;
+ struct bpf_program *prog;
+ int err;
+
+ err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
+ if (err) {
+ log_err("Failed to deduct types for %s BPF program", title);
+ return -1;
+ }
+
+ prog = bpf_object__find_program_by_title(obj, title);
+ if (!prog) {
+ log_err("Failed to find %s BPF program", title);
+ return -1;
+ }
+
+ err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
+ attach_type, 0);
+ if (err) {
+ log_err("Failed to attach %s BPF program", title);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int run_test(int cgroup_fd)
+{
+ struct bpf_prog_load_attr attr = {
+ .file = "./sockopt_inherit.o",
+ };
+ int server_fd = -1, client_fd;
+ struct bpf_object *obj;
+ void *server_err;
+ pthread_t tid;
+ int ignored;
+ int err;
+
+ err = bpf_prog_load_xattr(&attr, &obj, &ignored);
+ if (err) {
+ log_err("Failed to load BPF object");
+ return -1;
+ }
+
+ err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt");
+ if (err)
+ goto close_bpf_object;
+
+ err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt");
+ if (err)
+ goto close_bpf_object;
+
+ server_fd = start_server();
+ if (server_fd < 0) {
+ err = -1;
+ goto close_bpf_object;
+ }
+
+ pthread_create(&tid, NULL, server_thread, (void *)&server_fd);
+
+ client_fd = connect_to_server(server_fd);
+ if (client_fd < 0) {
+ err = -1;
+ goto close_server_fd;
+ }
+
+ err += verify_sockopt(client_fd, CUSTOM_INHERIT1, "connect", 0);
+ err += verify_sockopt(client_fd, CUSTOM_INHERIT2, "connect", 0);
+ err += verify_sockopt(client_fd, CUSTOM_LISTENER, "connect", 0);
+
+ pthread_join(tid, &server_err);
+
+ err += (int)(long)server_err;
+
+ close(client_fd);
+
+close_server_fd:
+ close(server_fd);
+close_bpf_object:
+ bpf_object__close(obj);
+ return err;
+}
+
+int main(int args, char **argv)
+{
+ int cgroup_fd;
+ int err = EXIT_SUCCESS;
+
+ if (setup_cgroup_environment())
+ return err;
+
+ cgroup_fd = create_and_get_cgroup(CG_PATH);
+ if (cgroup_fd < 0)
+ goto cleanup_cgroup_env;
+
+ if (join_cgroup(CG_PATH))
+ goto cleanup_cgroup;
+
+ if (run_test(cgroup_fd))
+ err = EXIT_FAILURE;
+
+ printf("test_sockopt_inherit: %s\n",
+ err == EXIT_SUCCESS ? "PASSED" : "FAILED");
+
+cleanup_cgroup:
+ close(cgroup_fd);
+cleanup_cgroup_env:
+ cleanup_cgroup_environment();
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index a3bebd7c68dd..fc33ae36b760 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -13,6 +13,7 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include "bpf_endian.h"
#include "bpf_rlimit.h"
#include "bpf_util.h"
#include "cgroup_helpers.h"
@@ -100,7 +101,7 @@ static struct sysctl_test tests[] = {
.descr = "ctx:write sysctl:write read ok",
.insns = {
/* If (write) */
- BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1,
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
offsetof(struct bpf_sysctl, write)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2),
@@ -214,7 +215,8 @@ static struct sysctl_test tests[] = {
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, sizeof("tcp_mem") - 1, 6),
/* buf == "tcp_mem\0") */
- BPF_LD_IMM64(BPF_REG_8, 0x006d656d5f706374ULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x7463705f6d656d00ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
@@ -255,7 +257,8 @@ static struct sysctl_test tests[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
/* buf[0:7] == "tcp_me\0") */
- BPF_LD_IMM64(BPF_REG_8, 0x00656d5f706374ULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x7463705f6d650000ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
@@ -298,12 +301,14 @@ static struct sysctl_test tests[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 16, 14),
/* buf[0:8] == "net/ipv4" && */
- BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x6e65742f69707634ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10),
/* buf[8:16] == "/tcp_mem" && */
- BPF_LD_IMM64(BPF_REG_8, 0x6d656d5f7063742fULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x2f7463705f6d656dULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
@@ -350,12 +355,14 @@ static struct sysctl_test tests[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 10),
/* buf[0:8] == "net/ipv4" && */
- BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x6e65742f69707634ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
/* buf[8:16] == "/tcp_me\0") */
- BPF_LD_IMM64(BPF_REG_8, 0x00656d5f7063742fULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x2f7463705f6d6500ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
@@ -396,7 +403,8 @@ static struct sysctl_test tests[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
/* buf[0:8] == "net/ip\0") */
- BPF_LD_IMM64(BPF_REG_8, 0x000070692f74656eULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x6e65742f69700000ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
@@ -431,7 +439,8 @@ static struct sysctl_test tests[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6),
/* buf[0:6] == "Linux\n\0") */
- BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x4c696e75780a0000ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
@@ -469,7 +478,8 @@ static struct sysctl_test tests[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6),
/* buf[0:6] == "Linux\n\0") */
- BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x4c696e75780a0000ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
@@ -507,7 +517,8 @@ static struct sysctl_test tests[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
/* buf[0:6] == "Linux\0") */
- BPF_LD_IMM64(BPF_REG_8, 0x000078756e694cULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x4c696e7578000000ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
@@ -650,7 +661,8 @@ static struct sysctl_test tests[] = {
/* buf[0:4] == "606\0") */
BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0x00363036, 2),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_9,
+ bpf_ntohl(0x36303600), 2),
/* return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -685,17 +697,20 @@ static struct sysctl_test tests[] = {
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 23, 14),
/* buf[0:8] == "3000000 " && */
- BPF_LD_IMM64(BPF_REG_8, 0x2030303030303033ULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x3330303030303020ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10),
/* buf[8:16] == "4000000 " && */
- BPF_LD_IMM64(BPF_REG_8, 0x2030303030303034ULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x3430303030303020ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
/* buf[16:24] == "6000000\0") */
- BPF_LD_IMM64(BPF_REG_8, 0x0030303030303036ULL),
+ BPF_LD_IMM64(BPF_REG_8,
+ bpf_be64_to_cpu(0x3630303030303000ULL)),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
@@ -735,7 +750,8 @@ static struct sysctl_test tests[] = {
/* buf[0:3] == "60\0") */
BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0x003036, 2),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_9,
+ bpf_ntohl(0x36300000), 2),
/* return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -757,7 +773,8 @@ static struct sysctl_test tests[] = {
/* sysctl_set_new_value arg2 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+ BPF_MOV64_IMM(BPF_REG_0,
+ bpf_ntohl(0x36303000)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
@@ -791,7 +808,7 @@ static struct sysctl_test tests[] = {
/* sysctl_set_new_value arg2 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, FIXUP_SYSCTL_VALUE),
+ BPF_LD_IMM64(BPF_REG_0, FIXUP_SYSCTL_VALUE),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
@@ -825,8 +842,9 @@ static struct sysctl_test tests[] = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0,
+ bpf_ntohl(0x36303000)),
+ BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -869,7 +887,8 @@ static struct sysctl_test tests[] = {
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
/* "600 602\0" */
- BPF_LD_IMM64(BPF_REG_0, 0x0032303620303036ULL),
+ BPF_LD_IMM64(BPF_REG_0,
+ bpf_be64_to_cpu(0x3630302036303200ULL)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -937,7 +956,8 @@ static struct sysctl_test tests[] = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+ BPF_MOV64_IMM(BPF_REG_0,
+ bpf_ntohl(0x36303000)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -969,8 +989,9 @@ static struct sysctl_test tests[] = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00373730),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0,
+ bpf_ntohl(0x30373700)),
+ BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1012,7 +1033,8 @@ static struct sysctl_test tests[] = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
+ BPF_MOV64_IMM(BPF_REG_0,
+ bpf_ntohl(0x36303000)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1052,7 +1074,8 @@ static struct sysctl_test tests[] = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x090a0c0d),
+ BPF_MOV64_IMM(BPF_REG_0,
+ bpf_ntohl(0x0d0c0a09)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1092,7 +1115,9 @@ static struct sysctl_test tests[] = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00362d0a), /* " -6\0" */
+ /* " -6\0" */
+ BPF_MOV64_IMM(BPF_REG_0,
+ bpf_ntohl(0x0a2d3600)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1132,8 +1157,10 @@ static struct sysctl_test tests[] = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00362d0a), /* " -6\0" */
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ /* " -6\0" */
+ BPF_MOV64_IMM(BPF_REG_0,
+ bpf_ntohl(0x0a2d3600)),
+ BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1175,8 +1202,10 @@ static struct sysctl_test tests[] = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x65667830), /* "0xfe" */
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ /* "0xfe" */
+ BPF_MOV64_IMM(BPF_REG_0,
+ bpf_ntohl(0x30786665)),
+ BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1218,11 +1247,14 @@ static struct sysctl_test tests[] = {
/* arg1 (buf) 9223372036854775807 */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
- BPF_LD_IMM64(BPF_REG_0, 0x3032373333323239ULL),
+ BPF_LD_IMM64(BPF_REG_0,
+ bpf_be64_to_cpu(0x3932323333373230ULL)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
- BPF_LD_IMM64(BPF_REG_0, 0x3537373435383633ULL),
+ BPF_LD_IMM64(BPF_REG_0,
+ bpf_be64_to_cpu(0x3336383534373735ULL)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
- BPF_LD_IMM64(BPF_REG_0, 0x0000000000373038ULL),
+ BPF_LD_IMM64(BPF_REG_0,
+ bpf_be64_to_cpu(0x3830370000000000ULL)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1266,11 +1298,14 @@ static struct sysctl_test tests[] = {
/* arg1 (buf) 9223372036854775808 */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
- BPF_LD_IMM64(BPF_REG_0, 0x3032373333323239ULL),
+ BPF_LD_IMM64(BPF_REG_0,
+ bpf_be64_to_cpu(0x3932323333373230ULL)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
- BPF_LD_IMM64(BPF_REG_0, 0x3537373435383633ULL),
+ BPF_LD_IMM64(BPF_REG_0,
+ bpf_be64_to_cpu(0x3336383534373735ULL)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
- BPF_LD_IMM64(BPF_REG_0, 0x0000000000383038ULL),
+ BPF_LD_IMM64(BPF_REG_0,
+ bpf_be64_to_cpu(0x3830380000000000ULL)),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
@@ -1344,20 +1379,24 @@ static size_t probe_prog_length(const struct bpf_insn *fp)
static int fixup_sysctl_value(const char *buf, size_t buf_len,
struct bpf_insn *prog, size_t insn_num)
{
- uint32_t value_num = 0;
+ union {
+ uint8_t raw[sizeof(uint64_t)];
+ uint64_t num;
+ } value = {};
uint8_t c, i;
- if (buf_len > sizeof(value_num)) {
+ if (buf_len > sizeof(value)) {
log_err("Value is too big (%zd) to use in fixup", buf_len);
return -1;
}
-
- for (i = 0; i < buf_len; ++i) {
- c = buf[i];
- value_num |= (c << i * 8);
+ if (prog[insn_num].code != (BPF_LD | BPF_DW | BPF_IMM)) {
+ log_err("Can fixup only BPF_LD_IMM64 insns");
+ return -1;
}
- prog[insn_num].imm = value_num;
+ memcpy(value.raw, buf, buf_len);
+ prog[insn_num].imm = (uint32_t)value.num;
+ prog[insn_num + 1].imm = (uint32_t)(value.num >> 32);
return 0;
}
@@ -1499,6 +1538,7 @@ static int run_test_case(int cgfd, struct sysctl_test *test)
goto err;
}
+ errno = 0;
if (access_sysctl(sysctl_path, test) == -1) {
if (test->result == OP_EPERM && errno == EPERM)
goto out;
@@ -1507,7 +1547,7 @@ static int run_test_case(int cgfd, struct sysctl_test *test)
}
if (test->result != SUCCESS) {
- log_err("Unexpected failure");
+ log_err("Unexpected success");
goto err;
}
diff --git a/tools/testing/selftests/bpf/test_tcp_rtt.c b/tools/testing/selftests/bpf/test_tcp_rtt.c
index 90c3862f74a8..93916a69823e 100644
--- a/tools/testing/selftests/bpf/test_tcp_rtt.c
+++ b/tools/testing/selftests/bpf/test_tcp_rtt.c
@@ -6,6 +6,7 @@
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
+#include <netinet/tcp.h>
#include <pthread.h>
#include <linux/filter.h>
@@ -34,6 +35,30 @@ static void send_byte(int fd)
error(1, errno, "Failed to send single byte");
}
+static int wait_for_ack(int fd, int retries)
+{
+ struct tcp_info info;
+ socklen_t optlen;
+ int i, err;
+
+ for (i = 0; i < retries; i++) {
+ optlen = sizeof(info);
+ err = getsockopt(fd, SOL_TCP, TCP_INFO, &info, &optlen);
+ if (err < 0) {
+ log_err("Failed to lookup TCP stats");
+ return err;
+ }
+
+ if (info.tcpi_unacked == 0)
+ return 0;
+
+ usleep(10);
+ }
+
+ log_err("Did not receive ACK");
+ return -1;
+}
+
static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked,
__u32 dsack_dups, __u32 delivered, __u32 delivered_ce,
__u32 icsk_retransmits)
@@ -149,6 +174,11 @@ static int run_test(int cgroup_fd, int server_fd)
/*icsk_retransmits=*/0);
send_byte(client_fd);
+ if (wait_for_ack(client_fd, 100) < 0) {
+ err = -1;
+ goto close_client_fd;
+ }
+
err += verify_sk(map_fd, client_fd, "first payload byte",
/*invoked=*/2,
@@ -157,6 +187,7 @@ static int run_test(int cgroup_fd, int server_fd)
/*delivered_ce=*/0,
/*icsk_retransmits=*/0);
+close_client_fd:
close(client_fd);
close_bpf_object:
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 44e2d640b088..d27fd929abb9 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -61,6 +61,7 @@
#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
static bool unpriv_disabled = false;
static int skips;
+static bool verbose = false;
struct bpf_test {
const char *descr;
@@ -92,7 +93,8 @@ struct bpf_test {
enum {
UNDEF,
ACCEPT,
- REJECT
+ REJECT,
+ VERBOSE_ACCEPT,
} result, result_unpriv;
enum bpf_prog_type prog_type;
uint8_t flags;
@@ -859,6 +861,36 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
return 0;
}
+static bool cmp_str_seq(const char *log, const char *exp)
+{
+ char needle[80];
+ const char *p, *q;
+ int len;
+
+ do {
+ p = strchr(exp, '\t');
+ if (!p)
+ p = exp + strlen(exp);
+
+ len = p - exp;
+ if (len >= sizeof(needle) || !len) {
+ printf("FAIL\nTestcase bug\n");
+ return false;
+ }
+ strncpy(needle, exp, len);
+ needle[len] = 0;
+ q = strstr(log, needle);
+ if (!q) {
+ printf("FAIL\nUnexpected verifier log in successful load!\n"
+ "EXP: %s\nRES:\n", needle);
+ return false;
+ }
+ log = q + len;
+ exp = p + 1;
+ } while (*p);
+ return true;
+}
+
static void do_test_single(struct bpf_test *test, bool unpriv,
int *passes, int *errors)
{
@@ -897,14 +929,20 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
pflags |= BPF_F_STRICT_ALIGNMENT;
if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
pflags |= BPF_F_ANY_ALIGNMENT;
+ if (test->flags & ~3)
+ pflags |= test->flags;
+ expected_ret = unpriv && test->result_unpriv != UNDEF ?
+ test->result_unpriv : test->result;
+ expected_err = unpriv && test->errstr_unpriv ?
+ test->errstr_unpriv : test->errstr;
memset(&attr, 0, sizeof(attr));
attr.prog_type = prog_type;
attr.expected_attach_type = test->expected_attach_type;
attr.insns = prog;
attr.insns_cnt = prog_len;
attr.license = "GPL";
- attr.log_level = 4;
+ attr.log_level = verbose || expected_ret == VERBOSE_ACCEPT ? 1 : 4;
attr.prog_flags = pflags;
fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog));
@@ -914,14 +952,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
goto close_fds;
}
- expected_ret = unpriv && test->result_unpriv != UNDEF ?
- test->result_unpriv : test->result;
- expected_err = unpriv && test->errstr_unpriv ?
- test->errstr_unpriv : test->errstr;
-
alignment_prevented_execution = 0;
- if (expected_ret == ACCEPT) {
+ if (expected_ret == ACCEPT || expected_ret == VERBOSE_ACCEPT) {
if (fd_prog < 0) {
printf("FAIL\nFailed to load prog '%s'!\n",
strerror(errno));
@@ -932,6 +965,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
(test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS))
alignment_prevented_execution = 1;
#endif
+ if (expected_ret == VERBOSE_ACCEPT && !cmp_str_seq(bpf_vlog, expected_err)) {
+ goto fail_log;
+ }
} else {
if (fd_prog >= 0) {
printf("FAIL\nUnexpected success to load!\n");
@@ -957,6 +993,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
}
}
+ if (verbose)
+ printf(", verifier log:\n%s", bpf_vlog);
+
run_errs = 0;
run_successes = 0;
if (!alignment_prevented_execution && fd_prog >= 0) {
@@ -1097,17 +1136,24 @@ int main(int argc, char **argv)
{
unsigned int from = 0, to = ARRAY_SIZE(tests);
bool unpriv = !is_admin();
+ int arg = 1;
+
+ if (argc > 1 && strcmp(argv[1], "-v") == 0) {
+ arg++;
+ verbose = true;
+ argc--;
+ }
if (argc == 3) {
- unsigned int l = atoi(argv[argc - 2]);
- unsigned int u = atoi(argv[argc - 1]);
+ unsigned int l = atoi(argv[arg]);
+ unsigned int u = atoi(argv[arg + 1]);
if (l < to && u < to) {
from = l;
to = u + 1;
}
} else if (argc == 2) {
- unsigned int t = atoi(argv[argc - 1]);
+ unsigned int t = atoi(argv[arg]);
if (t < to) {
from = t;
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
new file mode 100644
index 000000000000..02151f8c940f
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -0,0 +1,194 @@
+{
+ "precise: test 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8), /* map_value_ptr -= map_value_ptr */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_9),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 8, 1),
+ BPF_EXIT_INSN(),
+
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=inv(umin=1, umax=8) */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .fixup_map_array_48b = { 1 },
+ .result = VERBOSE_ACCEPT,
+ .errstr =
+ "26: (85) call bpf_probe_read#4\
+ last_idx 26 first_idx 20\
+ regs=4 stack=0 before 25\
+ regs=4 stack=0 before 24\
+ regs=4 stack=0 before 23\
+ regs=4 stack=0 before 22\
+ regs=4 stack=0 before 20\
+ parent didn't have regs=4 stack=0 marks\
+ last_idx 19 first_idx 10\
+ regs=4 stack=0 before 19\
+ regs=200 stack=0 before 18\
+ regs=300 stack=0 before 17\
+ regs=201 stack=0 before 15\
+ regs=201 stack=0 before 14\
+ regs=200 stack=0 before 13\
+ regs=200 stack=0 before 12\
+ regs=200 stack=0 before 11\
+ regs=200 stack=0 before 10\
+ parent already had regs=0 stack=0 marks",
+},
+{
+ "precise: test 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8), /* map_value_ptr -= map_value_ptr */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_9),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 8, 1),
+ BPF_EXIT_INSN(),
+
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=inv(umin=1, umax=8) */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .fixup_map_array_48b = { 1 },
+ .result = VERBOSE_ACCEPT,
+ .flags = BPF_F_TEST_STATE_FREQ,
+ .errstr =
+ "26: (85) call bpf_probe_read#4\
+ last_idx 26 first_idx 22\
+ regs=4 stack=0 before 25\
+ regs=4 stack=0 before 24\
+ regs=4 stack=0 before 23\
+ regs=4 stack=0 before 22\
+ parent didn't have regs=4 stack=0 marks\
+ last_idx 20 first_idx 20\
+ regs=4 stack=0 before 20\
+ parent didn't have regs=4 stack=0 marks\
+ last_idx 19 first_idx 17\
+ regs=4 stack=0 before 19\
+ regs=200 stack=0 before 18\
+ regs=300 stack=0 before 17\
+ parent already had regs=0 stack=0 marks",
+},
+{
+ "precise: cross frame pruning",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_8, 1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_IMM(BPF_REG_9, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 1, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = BPF_F_TEST_STATE_FREQ,
+ .errstr = "!read_ok",
+ .result = REJECT,
+},
+{
+ "precise: ST insn causing spi > allocated_stack",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_3, -8, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_MOV64_IMM(BPF_REG_0, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = BPF_F_TEST_STATE_FREQ,
+ .errstr = "5: (2d) if r4 > r0 goto pc+0\
+ last_idx 5 first_idx 5\
+ parent didn't have regs=10 stack=0 marks\
+ last_idx 4 first_idx 2\
+ regs=10 stack=0 before 4\
+ regs=10 stack=0 before 3\
+ regs=0 stack=1 before 2\
+ last_idx 5 first_idx 5\
+ parent didn't have regs=1 stack=0 marks",
+ .result = VERBOSE_ACCEPT,
+ .retval = -1,
+},
+{
+ "precise: STX insn causing spi > allocated_stack",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_MOV64_IMM(BPF_REG_0, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = BPF_F_TEST_STATE_FREQ,
+ .errstr = "last_idx 6 first_idx 6\
+ parent didn't have regs=10 stack=0 marks\
+ last_idx 5 first_idx 3\
+ regs=10 stack=0 before 5\
+ regs=10 stack=0 before 4\
+ regs=0 stack=1 before 3\
+ last_idx 6 first_idx 6\
+ parent didn't have regs=1 stack=0 marks\
+ last_idx 5 first_idx 3\
+ regs=1 stack=0 before 5",
+ .result = VERBOSE_ACCEPT,
+ .retval = -1,
+},