aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/net/dsa/dsa.txt255
-rw-r--r--Documentation/devicetree/bindings/net/dsa/dsa.yaml92
-rw-r--r--Documentation/networking/bareudp.rst6
-rw-r--r--Documentation/networking/dccp.rst3
-rw-r--r--arch/riscv/net/bpf_jit.h483
-rw-r--r--arch/riscv/net/bpf_jit_comp32.c14
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c293
-rw-r--r--arch/riscv/net/bpf_jit_core.c6
-rw-r--r--arch/s390/net/bpf_jit_comp.c63
-rw-r--r--crypto/af_alg.c7
-rw-r--r--drivers/crypto/chelsio/chtls/chtls_main.c18
-rw-r--r--drivers/infiniband/hw/qedr/main.c20
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c97
-rw-r--r--drivers/isdn/mISDN/socket.c8
-rw-r--r--drivers/net/bareudp.c21
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c12
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.h3
-rw-r--r--drivers/net/dsa/mv88e6xxx/global1.c17
-rw-r--r--drivers/net/dsa/mv88e6xxx/global1.h2
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_admin_defs.h47
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_com.c19
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_com.h13
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_eth_com.c51
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_eth_com.h3
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_ethtool.c4
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c178
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.h3
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h5
-rw-r--r--drivers/net/ethernet/broadcom/tg3.h2
-rw-r--r--drivers/net/ethernet/brocade/bna/bfi.h2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h8
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c52
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/sge.c107
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/smt.c2
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c65
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h3
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h13
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpni.c36
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpni.h16
-rw-r--r--drivers/net/ethernet/freescale/enetc/Kconfig2
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.c156
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.h36
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_ethtool.c84
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_hw.h23
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_pf.c10
-rw-r--r--drivers/net/ethernet/hisilicon/hix5hd2_gmac.c6
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h11
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adminq_cmd.h67
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c1229
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.h24
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c740
-rw-r--r--drivers/net/ethernet/intel/ice/ice_hw_autogen.h9
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c24
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c774
-rw-r--r--drivers/net/ethernet/intel/ice/ice_nvm.c5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_nvm.h4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h57
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c25
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.c3
-rw-r--r--drivers/net/ethernet/mscc/ocelot_vsc7514.c28
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c9
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.h2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_if.h88
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c24
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.h14
-rw-r--r--drivers/net/ethernet/qlogic/qed/Makefile37
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_chain.c369
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_cxt.c5
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dev.c273
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dev_api.h32
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_hsi.h2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_iscsi.c39
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_ll2.c44
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_rdma.c1
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_roce.c3
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_sp_commands.c4
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_spq.c90
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede.h175
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_fp.c174
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c185
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c2
-rw-r--r--drivers/net/ethernet/sfc/ef10.c162
-rw-r--r--drivers/net/ethernet/sfc/efx.c72
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h11
-rw-r--r--drivers/net/hyperv/hyperv_net.h1
-rw-r--r--drivers/net/hyperv/netvsc_drv.c43
-rw-r--r--drivers/net/phy/mdio-mux-gpio.c11
-rw-r--r--drivers/net/phy/phy_device.c4
-rw-r--r--drivers/net/phy/phylink.c372
-rw-r--r--drivers/net/vrf.c138
-rw-r--r--include/linux/bpf-netns.h3
-rw-r--r--include/linux/bpf.h15
-rw-r--r--include/linux/bpf_types.h2
-rw-r--r--include/linux/bpfilter.h6
-rw-r--r--include/linux/btf_ids.h40
-rw-r--r--include/linux/filter.h150
-rw-r--r--include/linux/icmp.h3
-rw-r--r--include/linux/ipv6.h1
-rw-r--r--include/linux/mroute.h5
-rw-r--r--include/linux/mroute6.h8
-rw-r--r--include/linux/net.h4
-rw-r--r--include/linux/netfilter.h6
-rw-r--r--include/linux/netfilter/x_tables.h4
-rw-r--r--include/linux/phylink.h103
-rw-r--r--include/linux/qed/qed_chain.h328
-rw-r--r--include/linux/qed/qed_if.h9
-rw-r--r--include/linux/skbuff.h4
-rw-r--r--include/linux/sockptr.h132
-rw-r--r--include/net/bareudp.h1
-rw-r--r--include/net/devlink.h4
-rw-r--r--include/net/dsa.h23
-rw-r--r--include/net/flow_dissector.h9
-rw-r--r--include/net/inet_connection_sock.h3
-rw-r--r--include/net/ip.h7
-rw-r--r--include/net/ipv6.h6
-rw-r--r--include/net/sctp/structs.h2
-rw-r--r--include/net/sock.h7
-rw-r--r--include/net/tcp.h6
-rw-r--r--include/net/udp.h2
-rw-r--r--include/net/xdp.h42
-rw-r--r--include/net/xfrm.h8
-rw-r--r--include/trace/events/xdp.h16
-rw-r--r--include/uapi/linux/bpf.h97
-rw-r--r--include/uapi/linux/icmpv6.h1
-rw-r--r--include/uapi/linux/if_link.h1
-rw-r--r--include/uapi/linux/in6.h1
-rw-r--r--include/uapi/linux/pkt_cls.h3
-rw-r--r--kernel/bpf/btf.c6
-rw-r--r--kernel/bpf/core.c55
-rw-r--r--kernel/bpf/cpumap.c167
-rw-r--r--kernel/bpf/map_iter.c7
-rw-r--r--kernel/bpf/net_namespace.c131
-rw-r--r--kernel/bpf/syscall.c9
-rw-r--r--kernel/bpf/task_iter.c12
-rw-r--r--kernel/bpf/verifier.c13
-rw-r--r--lib/test_bpf.c20
-rw-r--r--net/atm/common.c6
-rw-r--r--net/atm/common.h2
-rw-r--r--net/atm/pvc.c2
-rw-r--r--net/atm/svc.c6
-rw-r--r--net/ax25/af_ax25.c6
-rw-r--r--net/bluetooth/hci_sock.c8
-rw-r--r--net/bluetooth/l2cap_sock.c22
-rw-r--r--net/bluetooth/rfcomm/sock.c12
-rw-r--r--net/bluetooth/sco.c6
-rw-r--r--net/bpfilter/bpfilter_kern.c55
-rw-r--r--net/bridge/netfilter/ebtables.c46
-rw-r--r--net/caif/caif_socket.c8
-rw-r--r--net/can/j1939/socket.c12
-rw-r--r--net/can/raw.c16
-rw-r--r--net/core/dev.c14
-rw-r--r--net/core/devlink.c170
-rw-r--r--net/core/filter.c234
-rw-r--r--net/core/flow_dissector.c17
-rw-r--r--net/core/sock.c37
-rw-r--r--net/dccp/dccp.h2
-rw-r--r--net/dccp/proto.c29
-rw-r--r--net/decnet/af_decnet.c13
-rw-r--r--net/dsa/dsa2.c8
-rw-r--r--net/dsa/master.c12
-rw-r--r--net/ieee802154/socket.c6
-rw-r--r--net/ipv4/bpfilter/sockopt.c16
-rw-r--r--net/ipv4/icmp.c26
-rw-r--r--net/ipv4/inet_hashtables.c60
-rw-r--r--net/ipv4/ip_options.c43
-rw-r--r--net/ipv4/ip_sockglue.c80
-rw-r--r--net/ipv4/ipmr.c14
-rw-r--r--net/ipv4/netfilter/arp_tables.c33
-rw-r--r--net/ipv4/netfilter/ip_tables.c29
-rw-r--r--net/ipv4/raw.c8
-rw-r--r--net/ipv4/tcp.c30
-rw-r--r--net/ipv4/tcp_ipv4.c8
-rw-r--r--net/ipv4/udp.c108
-rw-r--r--net/ipv4/udp_impl.h4
-rw-r--r--net/ipv6/datagram.c16
-rw-r--r--net/ipv6/inet6_hashtables.c66
-rw-r--r--net/ipv6/ip6_flowlabel.c315
-rw-r--r--net/ipv6/ip6mr.c17
-rw-r--r--net/ipv6/ipv6_sockglue.c215
-rw-r--r--net/ipv6/netfilter/ip6_tables.c28
-rw-r--r--net/ipv6/raw.c10
-rw-r--r--net/ipv6/route.c7
-rw-r--r--net/ipv6/tcp_ipv6.c4
-rw-r--r--net/ipv6/udp.c104
-rw-r--r--net/ipv6/udp_impl.h4
-rw-r--r--net/iucv/af_iucv.c4
-rw-r--r--net/kcm/kcmsock.c6
-rw-r--r--net/l2tp/l2tp_core.c119
-rw-r--r--net/l2tp/l2tp_core.h82
-rw-r--r--net/l2tp/l2tp_debugfs.c34
-rw-r--r--net/l2tp/l2tp_eth.c19
-rw-r--r--net/l2tp/l2tp_ip.c31
-rw-r--r--net/l2tp/l2tp_ip6.c37
-rw-r--r--net/l2tp/l2tp_netlink.c257
-rw-r--r--net/l2tp/l2tp_ppp.c97
-rw-r--r--net/llc/af_llc.c4
-rw-r--r--net/mptcp/options.c5
-rw-r--r--net/mptcp/protocol.c29
-rw-r--r--net/mptcp/protocol.h19
-rw-r--r--net/mptcp/subflow.c91
-rw-r--r--net/mptcp/token.c14
-rw-r--r--net/ncsi/ncsi-rsp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c4
-rw-r--r--net/netfilter/nf_sockopt.c2
-rw-r--r--net/netfilter/x_tables.c20
-rw-r--r--net/netlink/af_netlink.c11
-rw-r--r--net/netrom/af_netrom.c4
-rw-r--r--net/nfc/llcp_sock.c6
-rw-r--r--net/openvswitch/datapath.c23
-rw-r--r--net/openvswitch/datapath.h4
-rw-r--r--net/packet/af_packet.c39
-rw-r--r--net/phonet/pep.c4
-rw-r--r--net/rds/af_rds.c30
-rw-r--r--net/rds/rdma.c14
-rw-r--r--net/rds/rds.h6
-rw-r--r--net/rose/af_rose.c4
-rw-r--r--net/rxrpc/af_rxrpc.c8
-rw-r--r--net/rxrpc/ar-internal.h4
-rw-r--r--net/rxrpc/key.c9
-rw-r--r--net/sched/cls_flower.c16
-rw-r--r--net/sctp/protocol.c12
-rw-r--r--net/sctp/socket.c54
-rw-r--r--net/smc/af_smc.c4
-rw-r--r--net/socket.c24
-rw-r--r--net/tipc/socket.c8
-rw-r--r--net/tls/tls_main.c17
-rw-r--r--net/vmw_vsock/af_vsock.c4
-rw-r--r--net/x25/af_x25.c4
-rw-r--r--net/xdp/xsk.c8
-rw-r--r--net/xfrm/xfrm_state.c6
-rw-r--r--samples/bpf/offwaketime_kern.c7
-rw-r--r--samples/bpf/test_overhead_kprobe_kern.c12
-rw-r--r--samples/bpf/tracex1_kern.c9
-rw-r--r--samples/bpf/tracex5_kern.c4
-rw-r--r--samples/bpf/xdp_redirect_cpu_kern.c25
-rw-r--r--samples/bpf/xdp_redirect_cpu_user.c209
-rwxr-xr-xscripts/bpf_helpers_doc.py9
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst2
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool2
-rw-r--r--tools/bpf/bpftool/common.c138
-rw-r--r--tools/bpf/bpftool/gen.c5
-rw-r--r--tools/bpf/bpftool/main.h4
-rw-r--r--tools/bpf/bpftool/prog.c3
-rw-r--r--tools/bpf/bpftool/skeleton/pid_iter.bpf.c3
-rw-r--r--tools/include/linux/btf_ids.h51
-rw-r--r--tools/include/uapi/linux/bpf.h97
-rw-r--r--tools/lib/bpf/bpf_helpers.h2
-rw-r--r--tools/lib/bpf/libbpf.c5
-rw-r--r--tools/lib/bpf/libbpf.h2
-rw-r--r--tools/lib/bpf/libbpf.map2
-rw-r--r--tools/lib/bpf/libbpf_probes.c3
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c58
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/resolve_btfids.c34
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_lookup.c1282
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c70
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_netlink.c6
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp4.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp6.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup.c641
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c36
-rwxr-xr-xtools/testing/selftests/bpf/test_kmod.sh12
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_seg6local.sh2
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c492
-rwxr-xr-xtools/testing/selftests/net/vrf_strict_mode_test.sh6
269 files changed, 11416 insertions, 4259 deletions
diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.txt b/Documentation/devicetree/bindings/net/dsa/dsa.txt
index f66bb7ecdb82..bf7328aba330 100644
--- a/Documentation/devicetree/bindings/net/dsa/dsa.txt
+++ b/Documentation/devicetree/bindings/net/dsa/dsa.txt
@@ -1,257 +1,4 @@
Distributed Switch Architecture Device Tree Bindings
----------------------------------------------------
-Switches are true Linux devices and can be probed by any means. Once
-probed, they register to the DSA framework, passing a node
-pointer. This node is expected to fulfil the following binding, and
-may contain additional properties as required by the device it is
-embedded within.
-
-Required properties:
-
-- ports : A container for child nodes representing switch ports.
-
-Optional properties:
-
-- dsa,member : A two element list indicates which DSA cluster, and position
- within the cluster a switch takes. <0 0> is cluster 0,
- switch 0. <0 1> is cluster 0, switch 1. <1 0> is cluster 1,
- switch 0. A switch not part of any cluster (single device
- hanging off a CPU port) must not specify this property
-
-The ports container has the following properties
-
-Required properties:
-
-- #address-cells : Must be 1
-- #size-cells : Must be 0
-
-Each port children node must have the following mandatory properties:
-- reg : Describes the port address in the switch
-
-An uplink/downlink port between switches in the cluster has the following
-mandatory property:
-
-- link : Should be a list of phandles to other switch's DSA
- port. This port is used as the outgoing port
- towards the phandle ports. The full routing
- information must be given, not just the one hop
- routes to neighbouring switches.
-
-A CPU port has the following mandatory property:
-
-- ethernet : Should be a phandle to a valid Ethernet device node.
- This host device is what the switch port is
- connected to.
-
-A user port has the following optional property:
-
-- label : Describes the label associated with this port, which
- will become the netdev name.
-
-Port child nodes may also contain the following optional standardised
-properties, described in binding documents:
-
-- phy-handle : Phandle to a PHY on an MDIO bus. See
- Documentation/devicetree/bindings/net/ethernet.txt
- for details.
-
-- phy-mode : See
- Documentation/devicetree/bindings/net/ethernet.txt
- for details.
-
-- fixed-link : Fixed-link subnode describing a link to a non-MDIO
- managed entity. See
- Documentation/devicetree/bindings/net/fixed-link.txt
- for details.
-
-The MAC address will be determined using the optional properties
-defined in ethernet.txt.
-
-Example
-
-The following example shows three switches on three MDIO busses,
-linked into one DSA cluster.
-
-&mdio1 {
- #address-cells = <1>;
- #size-cells = <0>;
-
- switch0: switch0@0 {
- compatible = "marvell,mv88e6085";
- reg = <0>;
-
- dsa,member = <0 0>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
- port@0 {
- reg = <0>;
- label = "lan0";
- };
-
- port@1 {
- reg = <1>;
- label = "lan1";
- local-mac-address = [00 00 00 00 00 00];
- };
-
- port@2 {
- reg = <2>;
- label = "lan2";
- };
-
- switch0port5: port@5 {
- reg = <5>;
- phy-mode = "rgmii-txid";
- link = <&switch1port6
- &switch2port9>;
- fixed-link {
- speed = <1000>;
- full-duplex;
- };
- };
-
- port@6 {
- reg = <6>;
- ethernet = <&fec1>;
- fixed-link {
- speed = <100>;
- full-duplex;
- };
- };
- };
- };
-};
-
-&mdio2 {
- #address-cells = <1>;
- #size-cells = <0>;
-
- switch1: switch1@0 {
- compatible = "marvell,mv88e6085";
- reg = <0>;
-
- dsa,member = <0 1>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
- port@0 {
- reg = <0>;
- label = "lan3";
- phy-handle = <&switch1phy0>;
- };
-
- port@1 {
- reg = <1>;
- label = "lan4";
- phy-handle = <&switch1phy1>;
- };
-
- port@2 {
- reg = <2>;
- label = "lan5";
- phy-handle = <&switch1phy2>;
- };
-
- switch1port5: port@5 {
- reg = <5>;
- link = <&switch2port9>;
- phy-mode = "rgmii-txid";
- fixed-link {
- speed = <1000>;
- full-duplex;
- };
- };
-
- switch1port6: port@6 {
- reg = <6>;
- phy-mode = "rgmii-txid";
- link = <&switch0port5>;
- fixed-link {
- speed = <1000>;
- full-duplex;
- };
- };
- };
- mdio-bus {
- #address-cells = <1>;
- #size-cells = <0>;
- switch1phy0: switch1phy0@0 {
- reg = <0>;
- };
- switch1phy1: switch1phy0@1 {
- reg = <1>;
- };
- switch1phy2: switch1phy0@2 {
- reg = <2>;
- };
- };
- };
-};
-
-&mdio4 {
- #address-cells = <1>;
- #size-cells = <0>;
-
- switch2: switch2@0 {
- compatible = "marvell,mv88e6085";
- reg = <0>;
-
- dsa,member = <0 2>;
-
- ports {
- #address-cells = <1>;
- #size-cells = <0>;
- port@0 {
- reg = <0>;
- label = "lan6";
- };
-
- port@1 {
- reg = <1>;
- label = "lan7";
- };
-
- port@2 {
- reg = <2>;
- label = "lan8";
- };
-
- port@3 {
- reg = <3>;
- label = "optical3";
- fixed-link {
- speed = <1000>;
- full-duplex;
- link-gpios = <&gpio6 2
- GPIO_ACTIVE_HIGH>;
- };
- };
-
- port@4 {
- reg = <4>;
- label = "optical4";
- fixed-link {
- speed = <1000>;
- full-duplex;
- link-gpios = <&gpio6 3
- GPIO_ACTIVE_HIGH>;
- };
- };
-
- switch2port9: port@9 {
- reg = <9>;
- phy-mode = "rgmii-txid";
- link = <&switch1port5
- &switch0port5>;
- fixed-link {
- speed = <1000>;
- full-duplex;
- };
- };
- };
- };
-};
+See Documentation/devicetree/bindings/net/dsa/dsa.yaml for the documenation.
diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.yaml b/Documentation/devicetree/bindings/net/dsa/dsa.yaml
new file mode 100644
index 000000000000..faea214339ca
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/dsa/dsa.yaml
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/dsa/dsa.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Ethernet Switch Device Tree Bindings
+
+maintainers:
+ - Andrew Lunn <[email protected]>
+ - Florian Fainelli <[email protected]>
+ - Vivien Didelot <[email protected]>
+
+description:
+ This binding represents Ethernet Switches which have a dedicated CPU
+ port. That port is usually connected to an Ethernet Controller of the
+ SoC. Such setups are typical for embedded devices.
+
+select: false
+
+properties:
+ $nodename:
+ pattern: "^switch(@.*)?$"
+
+ dsa,member:
+ minItems: 2
+ maxItems: 2
+ description:
+ A two element list indicates which DSA cluster, and position within the
+ cluster a switch takes. <0 0> is cluster 0, switch 0. <0 1> is cluster 0,
+ switch 1. <1 0> is cluster 1, switch 0. A switch not part of any cluster
+ (single device hanging off a CPU port) must not specify this property
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+
+patternProperties:
+ "^(ethernet-)?ports$":
+ type: object
+ properties:
+ '#address-cells':
+ const: 1
+ '#size-cells':
+ const: 0
+
+ patternProperties:
+ "^(ethernet-)?port@[0-9]+$":
+ type: object
+ description: Ethernet switch ports
+
+ properties:
+ reg:
+ description: Port number
+
+ label:
+ description:
+ Describes the label associated with this port, which will become
+ the netdev name
+ $ref: /schemas/types.yaml#definitions/string
+
+ link:
+ description:
+ Should be a list of phandles to other switch's DSA port. This
+ port is used as the outgoing port towards the phandle ports. The
+ full routing information must be given, not just the one hop
+ routes to neighbouring switches
+ $ref: /schemas/types.yaml#definitions/phandle-array
+
+ ethernet:
+ description:
+ Should be a phandle to a valid Ethernet device node. This host
+ device is what the switch port is connected to
+ $ref: /schemas/types.yaml#definitions/phandle
+
+ phy-handle: true
+
+ phy-mode: true
+
+ fixed-link: true
+
+ mac-address: true
+
+ required:
+ - reg
+
+ additionalProperties: false
+
+oneOf:
+ - required:
+ - ports
+ - required:
+ - ethernet-ports
+
+...
diff --git a/Documentation/networking/bareudp.rst b/Documentation/networking/bareudp.rst
index 0e00636d8d74..465a8b251bfe 100644
--- a/Documentation/networking/bareudp.rst
+++ b/Documentation/networking/bareudp.rst
@@ -48,7 +48,5 @@ enabled.
The bareudp device could be used along with OVS or flower filter in TC.
The OVS or TC flower layer must set the tunnel information in SKB dst field before
sending packet buffer to the bareudp device for transmission. On reception the
-bareudp device decapsulates the udp header and passes the inner packet to the
-network stack. If RX_COLLECT_METADATA flag is enabled in the device the tunnel
-information will be stored in the SKB dst field before the packet buffer is
-passed to the network stack.
+bareudp device extracts and stores the tunnel information in SKB dst field before
+passing the packet buffer to the network stack.
diff --git a/Documentation/networking/dccp.rst b/Documentation/networking/dccp.rst
index dde16be04456..91e5c33ba3ff 100644
--- a/Documentation/networking/dccp.rst
+++ b/Documentation/networking/dccp.rst
@@ -192,6 +192,9 @@ FIONREAD
Works as in udp(7): returns in the ``int`` argument pointer the size of
the next pending datagram in bytes, or 0 when no datagram is pending.
+SIOCOUTQ
+ Returns the number of unsent data bytes in the socket send queue as ``int``
+ into the buffer specified by the argument pointer.
Other tunables
==============
diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
index 20e235d06f66..75c1e9996867 100644
--- a/arch/riscv/net/bpf_jit.h
+++ b/arch/riscv/net/bpf_jit.h
@@ -13,6 +13,11 @@
#include <linux/filter.h>
#include <asm/cacheflush.h>
+static inline bool rvc_enabled(void)
+{
+ return IS_ENABLED(CONFIG_RISCV_ISA_C);
+}
+
enum {
RV_REG_ZERO = 0, /* The constant value 0 */
RV_REG_RA = 1, /* Return address */
@@ -48,9 +53,21 @@ enum {
RV_REG_T6 = 31,
};
+static inline bool is_creg(u8 reg)
+{
+ return (1 << reg) & (BIT(RV_REG_FP) |
+ BIT(RV_REG_S1) |
+ BIT(RV_REG_A0) |
+ BIT(RV_REG_A1) |
+ BIT(RV_REG_A2) |
+ BIT(RV_REG_A3) |
+ BIT(RV_REG_A4) |
+ BIT(RV_REG_A5));
+}
+
struct rv_jit_context {
struct bpf_prog *prog;
- u32 *insns; /* RV insns */
+ u16 *insns; /* RV insns */
int ninsns;
int epilogue_offset;
int *offset; /* BPF to RV */
@@ -58,6 +75,12 @@ struct rv_jit_context {
int stack_size;
};
+/* Convert from ninsns to bytes. */
+static inline int ninsns_rvoff(int ninsns)
+{
+ return ninsns << 1;
+}
+
struct rv_jit_data {
struct bpf_binary_header *header;
u8 *image;
@@ -74,8 +97,22 @@ static inline void bpf_flush_icache(void *start, void *end)
flush_icache_range((unsigned long)start, (unsigned long)end);
}
+/* Emit a 4-byte riscv instruction. */
static inline void emit(const u32 insn, struct rv_jit_context *ctx)
{
+ if (ctx->insns) {
+ ctx->insns[ctx->ninsns] = insn;
+ ctx->insns[ctx->ninsns + 1] = (insn >> 16);
+ }
+
+ ctx->ninsns += 2;
+}
+
+/* Emit a 2-byte riscv compressed instruction. */
+static inline void emitc(const u16 insn, struct rv_jit_context *ctx)
+{
+ BUILD_BUG_ON(!rvc_enabled());
+
if (ctx->insns)
ctx->insns[ctx->ninsns] = insn;
@@ -86,7 +123,7 @@ static inline int epilogue_offset(struct rv_jit_context *ctx)
{
int to = ctx->epilogue_offset, from = ctx->ninsns;
- return (to - from) << 2;
+ return ninsns_rvoff(to - from);
}
/* Return -1 or inverted cond. */
@@ -117,6 +154,36 @@ static inline int invert_bpf_cond(u8 cond)
return -1;
}
+static inline bool is_6b_int(long val)
+{
+ return -(1L << 5) <= val && val < (1L << 5);
+}
+
+static inline bool is_7b_uint(unsigned long val)
+{
+ return val < (1UL << 7);
+}
+
+static inline bool is_8b_uint(unsigned long val)
+{
+ return val < (1UL << 8);
+}
+
+static inline bool is_9b_uint(unsigned long val)
+{
+ return val < (1UL << 9);
+}
+
+static inline bool is_10b_int(long val)
+{
+ return -(1L << 9) <= val && val < (1L << 9);
+}
+
+static inline bool is_10b_uint(unsigned long val)
+{
+ return val < (1UL << 10);
+}
+
static inline bool is_12b_int(long val)
{
return -(1L << 11) <= val && val < (1L << 11);
@@ -149,7 +216,7 @@ static inline int rv_offset(int insn, int off, struct rv_jit_context *ctx)
off++; /* BPF branch is from PC+1, RV is from PC */
from = (insn > 0) ? ctx->offset[insn - 1] : 0;
to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0;
- return (to - from) << 2;
+ return ninsns_rvoff(to - from);
}
/* Instruction formats. */
@@ -207,6 +274,59 @@ static inline u32 rv_amo_insn(u8 funct5, u8 aq, u8 rl, u8 rs2, u8 rs1,
return rv_r_insn(funct7, rs2, rs1, funct3, rd, opcode);
}
+/* RISC-V compressed instruction formats. */
+
+static inline u16 rv_cr_insn(u8 funct4, u8 rd, u8 rs2, u8 op)
+{
+ return (funct4 << 12) | (rd << 7) | (rs2 << 2) | op;
+}
+
+static inline u16 rv_ci_insn(u8 funct3, u32 imm6, u8 rd, u8 op)
+{
+ u32 imm;
+
+ imm = ((imm6 & 0x20) << 7) | ((imm6 & 0x1f) << 2);
+ return (funct3 << 13) | (rd << 7) | op | imm;
+}
+
+static inline u16 rv_css_insn(u8 funct3, u32 uimm, u8 rs2, u8 op)
+{
+ return (funct3 << 13) | (uimm << 7) | (rs2 << 2) | op;
+}
+
+static inline u16 rv_ciw_insn(u8 funct3, u32 uimm, u8 rd, u8 op)
+{
+ return (funct3 << 13) | (uimm << 5) | ((rd & 0x7) << 2) | op;
+}
+
+static inline u16 rv_cl_insn(u8 funct3, u32 imm_hi, u8 rs1, u32 imm_lo, u8 rd,
+ u8 op)
+{
+ return (funct3 << 13) | (imm_hi << 10) | ((rs1 & 0x7) << 7) |
+ (imm_lo << 5) | ((rd & 0x7) << 2) | op;
+}
+
+static inline u16 rv_cs_insn(u8 funct3, u32 imm_hi, u8 rs1, u32 imm_lo, u8 rs2,
+ u8 op)
+{
+ return (funct3 << 13) | (imm_hi << 10) | ((rs1 & 0x7) << 7) |
+ (imm_lo << 5) | ((rs2 & 0x7) << 2) | op;
+}
+
+static inline u16 rv_ca_insn(u8 funct6, u8 rd, u8 funct2, u8 rs2, u8 op)
+{
+ return (funct6 << 10) | ((rd & 0x7) << 7) | (funct2 << 5) |
+ ((rs2 & 0x7) << 2) | op;
+}
+
+static inline u16 rv_cb_insn(u8 funct3, u32 imm6, u8 funct2, u8 rd, u8 op)
+{
+ u32 imm;
+
+ imm = ((imm6 & 0x20) << 7) | ((imm6 & 0x1f) << 2);
+ return (funct3 << 13) | (funct2 << 10) | ((rd & 0x7) << 7) | op | imm;
+}
+
/* Instructions shared by both RV32 and RV64. */
static inline u32 rv_addi(u8 rd, u8 rs1, u16 imm11_0)
@@ -414,6 +534,135 @@ static inline u32 rv_amoadd_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f);
}
+/* RVC instrutions. */
+
+static inline u16 rvc_addi4spn(u8 rd, u32 imm10)
+{
+ u32 imm;
+
+ imm = ((imm10 & 0x30) << 2) | ((imm10 & 0x3c0) >> 4) |
+ ((imm10 & 0x4) >> 1) | ((imm10 & 0x8) >> 3);
+ return rv_ciw_insn(0x0, imm, rd, 0x0);
+}
+
+static inline u16 rvc_lw(u8 rd, u32 imm7, u8 rs1)
+{
+ u32 imm_hi, imm_lo;
+
+ imm_hi = (imm7 & 0x38) >> 3;
+ imm_lo = ((imm7 & 0x4) >> 1) | ((imm7 & 0x40) >> 6);
+ return rv_cl_insn(0x2, imm_hi, rs1, imm_lo, rd, 0x0);
+}
+
+static inline u16 rvc_sw(u8 rs1, u32 imm7, u8 rs2)
+{
+ u32 imm_hi, imm_lo;
+
+ imm_hi = (imm7 & 0x38) >> 3;
+ imm_lo = ((imm7 & 0x4) >> 1) | ((imm7 & 0x40) >> 6);
+ return rv_cs_insn(0x6, imm_hi, rs1, imm_lo, rs2, 0x0);
+}
+
+static inline u16 rvc_addi(u8 rd, u32 imm6)
+{
+ return rv_ci_insn(0, imm6, rd, 0x1);
+}
+
+static inline u16 rvc_li(u8 rd, u32 imm6)
+{
+ return rv_ci_insn(0x2, imm6, rd, 0x1);
+}
+
+static inline u16 rvc_addi16sp(u32 imm10)
+{
+ u32 imm;
+
+ imm = ((imm10 & 0x200) >> 4) | (imm10 & 0x10) | ((imm10 & 0x40) >> 3) |
+ ((imm10 & 0x180) >> 6) | ((imm10 & 0x20) >> 5);
+ return rv_ci_insn(0x3, imm, RV_REG_SP, 0x1);
+}
+
+static inline u16 rvc_lui(u8 rd, u32 imm6)
+{
+ return rv_ci_insn(0x3, imm6, rd, 0x1);
+}
+
+static inline u16 rvc_srli(u8 rd, u32 imm6)
+{
+ return rv_cb_insn(0x4, imm6, 0, rd, 0x1);
+}
+
+static inline u16 rvc_srai(u8 rd, u32 imm6)
+{
+ return rv_cb_insn(0x4, imm6, 0x1, rd, 0x1);
+}
+
+static inline u16 rvc_andi(u8 rd, u32 imm6)
+{
+ return rv_cb_insn(0x4, imm6, 0x2, rd, 0x1);
+}
+
+static inline u16 rvc_sub(u8 rd, u8 rs)
+{
+ return rv_ca_insn(0x23, rd, 0, rs, 0x1);
+}
+
+static inline u16 rvc_xor(u8 rd, u8 rs)
+{
+ return rv_ca_insn(0x23, rd, 0x1, rs, 0x1);
+}
+
+static inline u16 rvc_or(u8 rd, u8 rs)
+{
+ return rv_ca_insn(0x23, rd, 0x2, rs, 0x1);
+}
+
+static inline u16 rvc_and(u8 rd, u8 rs)
+{
+ return rv_ca_insn(0x23, rd, 0x3, rs, 0x1);
+}
+
+static inline u16 rvc_slli(u8 rd, u32 imm6)
+{
+ return rv_ci_insn(0, imm6, rd, 0x2);
+}
+
+static inline u16 rvc_lwsp(u8 rd, u32 imm8)
+{
+ u32 imm;
+
+ imm = ((imm8 & 0xc0) >> 6) | (imm8 & 0x3c);
+ return rv_ci_insn(0x2, imm, rd, 0x2);
+}
+
+static inline u16 rvc_jr(u8 rs1)
+{
+ return rv_cr_insn(0x8, rs1, RV_REG_ZERO, 0x2);
+}
+
+static inline u16 rvc_mv(u8 rd, u8 rs)
+{
+ return rv_cr_insn(0x8, rd, rs, 0x2);
+}
+
+static inline u16 rvc_jalr(u8 rs1)
+{
+ return rv_cr_insn(0x9, rs1, RV_REG_ZERO, 0x2);
+}
+
+static inline u16 rvc_add(u8 rd, u8 rs)
+{
+ return rv_cr_insn(0x9, rd, rs, 0x2);
+}
+
+static inline u16 rvc_swsp(u32 imm8, u8 rs2)
+{
+ u32 imm;
+
+ imm = (imm8 & 0x3c) | ((imm8 & 0xc0) >> 6);
+ return rv_css_insn(0x6, imm, rs2, 0x2);
+}
+
/*
* RV64-only instructions.
*
@@ -503,6 +752,234 @@ static inline u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f);
}
+/* RV64-only RVC instructions. */
+
+static inline u16 rvc_ld(u8 rd, u32 imm8, u8 rs1)
+{
+ u32 imm_hi, imm_lo;
+
+ imm_hi = (imm8 & 0x38) >> 3;
+ imm_lo = (imm8 & 0xc0) >> 6;
+ return rv_cl_insn(0x3, imm_hi, rs1, imm_lo, rd, 0x0);
+}
+
+static inline u16 rvc_sd(u8 rs1, u32 imm8, u8 rs2)
+{
+ u32 imm_hi, imm_lo;
+
+ imm_hi = (imm8 & 0x38) >> 3;
+ imm_lo = (imm8 & 0xc0) >> 6;
+ return rv_cs_insn(0x7, imm_hi, rs1, imm_lo, rs2, 0x0);
+}
+
+static inline u16 rvc_subw(u8 rd, u8 rs)
+{
+ return rv_ca_insn(0x27, rd, 0, rs, 0x1);
+}
+
+static inline u16 rvc_addiw(u8 rd, u32 imm6)
+{
+ return rv_ci_insn(0x1, imm6, rd, 0x1);
+}
+
+static inline u16 rvc_ldsp(u8 rd, u32 imm9)
+{
+ u32 imm;
+
+ imm = ((imm9 & 0x1c0) >> 6) | (imm9 & 0x38);
+ return rv_ci_insn(0x3, imm, rd, 0x2);
+}
+
+static inline u16 rvc_sdsp(u32 imm9, u8 rs2)
+{
+ u32 imm;
+
+ imm = (imm9 & 0x38) | ((imm9 & 0x1c0) >> 6);
+ return rv_css_insn(0x7, imm, rs2, 0x2);
+}
+
+#endif /* __riscv_xlen == 64 */
+
+/* Helper functions that emit RVC instructions when possible. */
+
+static inline void emit_jalr(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rd == RV_REG_RA && rs && !imm)
+ emitc(rvc_jalr(rs), ctx);
+ else if (rvc_enabled() && !rd && rs && !imm)
+ emitc(rvc_jr(rs), ctx);
+ else
+ emit(rv_jalr(rd, rs, imm), ctx);
+}
+
+static inline void emit_mv(u8 rd, u8 rs, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rd && rs)
+ emitc(rvc_mv(rd, rs), ctx);
+ else
+ emit(rv_addi(rd, rs, 0), ctx);
+}
+
+static inline void emit_add(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rd && rd == rs1 && rs2)
+ emitc(rvc_add(rd, rs2), ctx);
+ else
+ emit(rv_add(rd, rs1, rs2), ctx);
+}
+
+static inline void emit_addi(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rd == RV_REG_SP && rd == rs && is_10b_int(imm) && imm && !(imm & 0xf))
+ emitc(rvc_addi16sp(imm), ctx);
+ else if (rvc_enabled() && is_creg(rd) && rs == RV_REG_SP && is_10b_uint(imm) &&
+ !(imm & 0x3) && imm)
+ emitc(rvc_addi4spn(rd, imm), ctx);
+ else if (rvc_enabled() && rd && rd == rs && imm && is_6b_int(imm))
+ emitc(rvc_addi(rd, imm), ctx);
+ else
+ emit(rv_addi(rd, rs, imm), ctx);
+}
+
+static inline void emit_li(u8 rd, s32 imm, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rd && is_6b_int(imm))
+ emitc(rvc_li(rd, imm), ctx);
+ else
+ emit(rv_addi(rd, RV_REG_ZERO, imm), ctx);
+}
+
+static inline void emit_lui(u8 rd, s32 imm, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rd && rd != RV_REG_SP && is_6b_int(imm) && imm)
+ emitc(rvc_lui(rd, imm), ctx);
+ else
+ emit(rv_lui(rd, imm), ctx);
+}
+
+static inline void emit_slli(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rd && rd == rs && imm && (u32)imm < __riscv_xlen)
+ emitc(rvc_slli(rd, imm), ctx);
+ else
+ emit(rv_slli(rd, rs, imm), ctx);
+}
+
+static inline void emit_andi(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && is_creg(rd) && rd == rs && is_6b_int(imm))
+ emitc(rvc_andi(rd, imm), ctx);
+ else
+ emit(rv_andi(rd, rs, imm), ctx);
+}
+
+static inline void emit_srli(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && is_creg(rd) && rd == rs && imm && (u32)imm < __riscv_xlen)
+ emitc(rvc_srli(rd, imm), ctx);
+ else
+ emit(rv_srli(rd, rs, imm), ctx);
+}
+
+static inline void emit_srai(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && is_creg(rd) && rd == rs && imm && (u32)imm < __riscv_xlen)
+ emitc(rvc_srai(rd, imm), ctx);
+ else
+ emit(rv_srai(rd, rs, imm), ctx);
+}
+
+static inline void emit_sub(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
+ emitc(rvc_sub(rd, rs2), ctx);
+ else
+ emit(rv_sub(rd, rs1, rs2), ctx);
+}
+
+static inline void emit_or(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
+ emitc(rvc_or(rd, rs2), ctx);
+ else
+ emit(rv_or(rd, rs1, rs2), ctx);
+}
+
+static inline void emit_and(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
+ emitc(rvc_and(rd, rs2), ctx);
+ else
+ emit(rv_and(rd, rs1, rs2), ctx);
+}
+
+static inline void emit_xor(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
+ emitc(rvc_xor(rd, rs2), ctx);
+ else
+ emit(rv_xor(rd, rs1, rs2), ctx);
+}
+
+static inline void emit_lw(u8 rd, s32 off, u8 rs1, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rs1 == RV_REG_SP && rd && is_8b_uint(off) && !(off & 0x3))
+ emitc(rvc_lwsp(rd, off), ctx);
+ else if (rvc_enabled() && is_creg(rd) && is_creg(rs1) && is_7b_uint(off) && !(off & 0x3))
+ emitc(rvc_lw(rd, off, rs1), ctx);
+ else
+ emit(rv_lw(rd, off, rs1), ctx);
+}
+
+static inline void emit_sw(u8 rs1, s32 off, u8 rs2, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rs1 == RV_REG_SP && is_8b_uint(off) && !(off & 0x3))
+ emitc(rvc_swsp(off, rs2), ctx);
+ else if (rvc_enabled() && is_creg(rs1) && is_creg(rs2) && is_7b_uint(off) && !(off & 0x3))
+ emitc(rvc_sw(rs1, off, rs2), ctx);
+ else
+ emit(rv_sw(rs1, off, rs2), ctx);
+}
+
+/* RV64-only helper functions. */
+#if __riscv_xlen == 64
+
+static inline void emit_addiw(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rd && rd == rs && is_6b_int(imm))
+ emitc(rvc_addiw(rd, imm), ctx);
+ else
+ emit(rv_addiw(rd, rs, imm), ctx);
+}
+
+static inline void emit_ld(u8 rd, s32 off, u8 rs1, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rs1 == RV_REG_SP && rd && is_9b_uint(off) && !(off & 0x7))
+ emitc(rvc_ldsp(rd, off), ctx);
+ else if (rvc_enabled() && is_creg(rd) && is_creg(rs1) && is_8b_uint(off) && !(off & 0x7))
+ emitc(rvc_ld(rd, off, rs1), ctx);
+ else
+ emit(rv_ld(rd, off, rs1), ctx);
+}
+
+static inline void emit_sd(u8 rs1, s32 off, u8 rs2, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && rs1 == RV_REG_SP && is_9b_uint(off) && !(off & 0x7))
+ emitc(rvc_sdsp(off, rs2), ctx);
+ else if (rvc_enabled() && is_creg(rs1) && is_creg(rs2) && is_8b_uint(off) && !(off & 0x7))
+ emitc(rvc_sd(rs1, off, rs2), ctx);
+ else
+ emit(rv_sd(rs1, off, rs2), ctx);
+}
+
+static inline void emit_subw(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+ if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
+ emitc(rvc_subw(rd, rs2), ctx);
+ else
+ emit(rv_subw(rd, rs1, rs2), ctx);
+}
+
#endif /* __riscv_xlen == 64 */
void bpf_jit_build_prologue(struct rv_jit_context *ctx);
diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c
index b198eaa74456..bc5f2204693f 100644
--- a/arch/riscv/net/bpf_jit_comp32.c
+++ b/arch/riscv/net/bpf_jit_comp32.c
@@ -644,7 +644,7 @@ static int emit_branch_r64(const s8 *src1, const s8 *src2, s32 rvoff,
e = ctx->ninsns;
/* Adjust for extra insns. */
- rvoff -= (e - s) << 2;
+ rvoff -= ninsns_rvoff(e - s);
emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx);
return 0;
}
@@ -713,7 +713,7 @@ static int emit_bcc(u8 op, u8 rd, u8 rs, int rvoff, struct rv_jit_context *ctx)
if (far) {
e = ctx->ninsns;
/* Adjust for extra insns. */
- rvoff -= (e - s) << 2;
+ rvoff -= ninsns_rvoff(e - s);
emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx);
}
return 0;
@@ -731,7 +731,7 @@ static int emit_branch_r32(const s8 *src1, const s8 *src2, s32 rvoff,
e = ctx->ninsns;
/* Adjust for extra insns. */
- rvoff -= (e - s) << 2;
+ rvoff -= ninsns_rvoff(e - s);
if (emit_bcc(op, lo(rs1), lo(rs2), rvoff, ctx))
return -1;
@@ -795,7 +795,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
* if (index >= max_entries)
* goto out;
*/
- off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+ off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
emit_bcc(BPF_JGE, lo(idx_reg), RV_REG_T1, off, ctx);
/*
@@ -804,7 +804,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
* goto out;
*/
emit(rv_addi(RV_REG_T1, RV_REG_TCC, -1), ctx);
- off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+ off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
emit_bcc(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx);
/*
@@ -818,7 +818,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
if (is_12b_check(off, insn))
return -1;
emit(rv_lw(RV_REG_T0, off, RV_REG_T0), ctx);
- off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+ off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
emit_bcc(BPF_JEQ, RV_REG_T0, RV_REG_ZERO, off, ctx);
/*
@@ -1214,7 +1214,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
emit_imm32(tmp2, imm, ctx);
src = tmp2;
e = ctx->ninsns;
- rvoff -= (e - s) << 2;
+ rvoff -= ninsns_rvoff(e - s);
}
if (is64)
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 6cfd164cbe88..8a56b5293117 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -132,19 +132,23 @@ static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
*
* This also means that we need to process LSB to MSB.
*/
- s64 upper = (val + (1 << 11)) >> 12, lower = val & 0xfff;
+ s64 upper = (val + (1 << 11)) >> 12;
+ /* Sign-extend lower 12 bits to 64 bits since immediates for li, addiw,
+ * and addi are signed and RVC checks will perform signed comparisons.
+ */
+ s64 lower = ((val & 0xfff) << 52) >> 52;
int shift;
if (is_32b_int(val)) {
if (upper)
- emit(rv_lui(rd, upper), ctx);
+ emit_lui(rd, upper, ctx);
if (!upper) {
- emit(rv_addi(rd, RV_REG_ZERO, lower), ctx);
+ emit_li(rd, lower, ctx);
return;
}
- emit(rv_addiw(rd, rd, lower), ctx);
+ emit_addiw(rd, rd, lower, ctx);
return;
}
@@ -154,9 +158,9 @@ static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
emit_imm(rd, upper, ctx);
- emit(rv_slli(rd, rd, shift), ctx);
+ emit_slli(rd, rd, shift, ctx);
if (lower)
- emit(rv_addi(rd, rd, lower), ctx);
+ emit_addi(rd, rd, lower, ctx);
}
static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
@@ -164,43 +168,43 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8;
if (seen_reg(RV_REG_RA, ctx)) {
- emit(rv_ld(RV_REG_RA, store_offset, RV_REG_SP), ctx);
+ emit_ld(RV_REG_RA, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
- emit(rv_ld(RV_REG_FP, store_offset, RV_REG_SP), ctx);
+ emit_ld(RV_REG_FP, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
if (seen_reg(RV_REG_S1, ctx)) {
- emit(rv_ld(RV_REG_S1, store_offset, RV_REG_SP), ctx);
+ emit_ld(RV_REG_S1, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S2, ctx)) {
- emit(rv_ld(RV_REG_S2, store_offset, RV_REG_SP), ctx);
+ emit_ld(RV_REG_S2, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S3, ctx)) {
- emit(rv_ld(RV_REG_S3, store_offset, RV_REG_SP), ctx);
+ emit_ld(RV_REG_S3, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S4, ctx)) {
- emit(rv_ld(RV_REG_S4, store_offset, RV_REG_SP), ctx);
+ emit_ld(RV_REG_S4, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S5, ctx)) {
- emit(rv_ld(RV_REG_S5, store_offset, RV_REG_SP), ctx);
+ emit_ld(RV_REG_S5, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S6, ctx)) {
- emit(rv_ld(RV_REG_S6, store_offset, RV_REG_SP), ctx);
+ emit_ld(RV_REG_S6, store_offset, RV_REG_SP, ctx);
store_offset -= 8;
}
- emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx);
+ emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx);
/* Set return value. */
if (!is_tail_call)
- emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx);
- emit(rv_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
- is_tail_call ? 4 : 0), /* skip TCC init */
- ctx);
+ emit_mv(RV_REG_A0, RV_REG_A5, ctx);
+ emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
+ is_tail_call ? 4 : 0, /* skip TCC init */
+ ctx);
}
static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff,
@@ -280,8 +284,8 @@ static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff,
static void emit_zext_32(u8 reg, struct rv_jit_context *ctx)
{
- emit(rv_slli(reg, reg, 32), ctx);
- emit(rv_srli(reg, reg, 32), ctx);
+ emit_slli(reg, reg, 32, ctx);
+ emit_srli(reg, reg, 32, ctx);
}
static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
@@ -304,35 +308,35 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
if (is_12b_check(off, insn))
return -1;
emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx);
- off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+ off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx);
/* if (TCC-- < 0)
* goto out;
*/
- emit(rv_addi(RV_REG_T1, tcc, -1), ctx);
- off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+ emit_addi(RV_REG_T1, tcc, -1, ctx);
+ off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx);
/* prog = array->ptrs[index];
* if (!prog)
* goto out;
*/
- emit(rv_slli(RV_REG_T2, RV_REG_A2, 3), ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_A1), ctx);
+ emit_slli(RV_REG_T2, RV_REG_A2, 3, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_A1, ctx);
off = offsetof(struct bpf_array, ptrs);
if (is_12b_check(off, insn))
return -1;
- emit(rv_ld(RV_REG_T2, off, RV_REG_T2), ctx);
- off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+ emit_ld(RV_REG_T2, off, RV_REG_T2, ctx);
+ off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
emit_branch(BPF_JEQ, RV_REG_T2, RV_REG_ZERO, off, ctx);
/* goto *(prog->bpf_func + 4); */
off = offsetof(struct bpf_prog, bpf_func);
if (is_12b_check(off, insn))
return -1;
- emit(rv_ld(RV_REG_T3, off, RV_REG_T2), ctx);
- emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx);
+ emit_ld(RV_REG_T3, off, RV_REG_T2, ctx);
+ emit_mv(RV_REG_TCC, RV_REG_T1, ctx);
__build_epilogue(true, ctx);
return 0;
}
@@ -360,9 +364,9 @@ static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn,
static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
{
- emit(rv_addi(RV_REG_T2, *rd, 0), ctx);
+ emit_mv(RV_REG_T2, *rd, ctx);
emit_zext_32(RV_REG_T2, ctx);
- emit(rv_addi(RV_REG_T1, *rs, 0), ctx);
+ emit_mv(RV_REG_T1, *rs, ctx);
emit_zext_32(RV_REG_T1, ctx);
*rd = RV_REG_T2;
*rs = RV_REG_T1;
@@ -370,15 +374,15 @@ static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
static void emit_sext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
{
- emit(rv_addiw(RV_REG_T2, *rd, 0), ctx);
- emit(rv_addiw(RV_REG_T1, *rs, 0), ctx);
+ emit_addiw(RV_REG_T2, *rd, 0, ctx);
+ emit_addiw(RV_REG_T1, *rs, 0, ctx);
*rd = RV_REG_T2;
*rs = RV_REG_T1;
}
static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx)
{
- emit(rv_addi(RV_REG_T2, *rd, 0), ctx);
+ emit_mv(RV_REG_T2, *rd, ctx);
emit_zext_32(RV_REG_T2, ctx);
emit_zext_32(RV_REG_T1, ctx);
*rd = RV_REG_T2;
@@ -386,7 +390,7 @@ static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx)
static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx)
{
- emit(rv_addiw(RV_REG_T2, *rd, 0), ctx);
+ emit_addiw(RV_REG_T2, *rd, 0, ctx);
*rd = RV_REG_T2;
}
@@ -432,7 +436,7 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx)
if (ret)
return ret;
rd = bpf_to_rv_reg(BPF_REG_0, ctx);
- emit(rv_addi(rd, RV_REG_A0, 0), ctx);
+ emit_mv(rd, RV_REG_A0, ctx);
return 0;
}
@@ -458,7 +462,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
emit_zext_32(rd, ctx);
break;
}
- emit(is64 ? rv_addi(rd, rs, 0) : rv_addiw(rd, rs, 0), ctx);
+ emit_mv(rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
@@ -466,31 +470,35 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
/* dst = dst OP src */
case BPF_ALU | BPF_ADD | BPF_X:
case BPF_ALU64 | BPF_ADD | BPF_X:
- emit(is64 ? rv_add(rd, rd, rs) : rv_addw(rd, rd, rs), ctx);
+ emit_add(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_SUB | BPF_X:
case BPF_ALU64 | BPF_SUB | BPF_X:
- emit(is64 ? rv_sub(rd, rd, rs) : rv_subw(rd, rd, rs), ctx);
+ if (is64)
+ emit_sub(rd, rd, rs, ctx);
+ else
+ emit_subw(rd, rd, rs, ctx);
+
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_AND | BPF_X:
case BPF_ALU64 | BPF_AND | BPF_X:
- emit(rv_and(rd, rd, rs), ctx);
+ emit_and(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_OR | BPF_X:
case BPF_ALU64 | BPF_OR | BPF_X:
- emit(rv_or(rd, rd, rs), ctx);
+ emit_or(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_XOR | BPF_X:
case BPF_ALU64 | BPF_XOR | BPF_X:
- emit(rv_xor(rd, rd, rs), ctx);
+ emit_xor(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
@@ -534,8 +542,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
/* dst = -dst */
case BPF_ALU | BPF_NEG:
case BPF_ALU64 | BPF_NEG:
- emit(is64 ? rv_sub(rd, RV_REG_ZERO, rd) :
- rv_subw(rd, RV_REG_ZERO, rd), ctx);
+ emit_sub(rd, RV_REG_ZERO, rd, ctx);
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
@@ -544,8 +551,8 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_ALU | BPF_END | BPF_FROM_LE:
switch (imm) {
case 16:
- emit(rv_slli(rd, rd, 48), ctx);
- emit(rv_srli(rd, rd, 48), ctx);
+ emit_slli(rd, rd, 48, ctx);
+ emit_srli(rd, rd, 48, ctx);
break;
case 32:
if (!aux->verifier_zext)
@@ -558,51 +565,51 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
break;
case BPF_ALU | BPF_END | BPF_FROM_BE:
- emit(rv_addi(RV_REG_T2, RV_REG_ZERO, 0), ctx);
+ emit_li(RV_REG_T2, 0, ctx);
- emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
- emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
- emit(rv_srli(rd, rd, 8), ctx);
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
if (imm == 16)
goto out_be;
- emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
- emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
- emit(rv_srli(rd, rd, 8), ctx);
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
- emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
- emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
- emit(rv_srli(rd, rd, 8), ctx);
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
if (imm == 32)
goto out_be;
- emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
- emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
- emit(rv_srli(rd, rd, 8), ctx);
-
- emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
- emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
- emit(rv_srli(rd, rd, 8), ctx);
-
- emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
- emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
- emit(rv_srli(rd, rd, 8), ctx);
-
- emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
- emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
- emit(rv_srli(rd, rd, 8), ctx);
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
+
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
+
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
+
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
out_be:
- emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
- emit(rv_addi(rd, RV_REG_T2, 0), ctx);
+ emit_mv(rd, RV_REG_T2, ctx);
break;
/* dst = imm */
@@ -617,12 +624,10 @@ out_be:
case BPF_ALU | BPF_ADD | BPF_K:
case BPF_ALU64 | BPF_ADD | BPF_K:
if (is_12b_int(imm)) {
- emit(is64 ? rv_addi(rd, rd, imm) :
- rv_addiw(rd, rd, imm), ctx);
+ emit_addi(rd, rd, imm, ctx);
} else {
emit_imm(RV_REG_T1, imm, ctx);
- emit(is64 ? rv_add(rd, rd, RV_REG_T1) :
- rv_addw(rd, rd, RV_REG_T1), ctx);
+ emit_add(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
@@ -630,12 +635,10 @@ out_be:
case BPF_ALU | BPF_SUB | BPF_K:
case BPF_ALU64 | BPF_SUB | BPF_K:
if (is_12b_int(-imm)) {
- emit(is64 ? rv_addi(rd, rd, -imm) :
- rv_addiw(rd, rd, -imm), ctx);
+ emit_addi(rd, rd, -imm, ctx);
} else {
emit_imm(RV_REG_T1, imm, ctx);
- emit(is64 ? rv_sub(rd, rd, RV_REG_T1) :
- rv_subw(rd, rd, RV_REG_T1), ctx);
+ emit_sub(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
@@ -643,10 +646,10 @@ out_be:
case BPF_ALU | BPF_AND | BPF_K:
case BPF_ALU64 | BPF_AND | BPF_K:
if (is_12b_int(imm)) {
- emit(rv_andi(rd, rd, imm), ctx);
+ emit_andi(rd, rd, imm, ctx);
} else {
emit_imm(RV_REG_T1, imm, ctx);
- emit(rv_and(rd, rd, RV_REG_T1), ctx);
+ emit_and(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
@@ -657,7 +660,7 @@ out_be:
emit(rv_ori(rd, rd, imm), ctx);
} else {
emit_imm(RV_REG_T1, imm, ctx);
- emit(rv_or(rd, rd, RV_REG_T1), ctx);
+ emit_or(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
@@ -668,7 +671,7 @@ out_be:
emit(rv_xori(rd, rd, imm), ctx);
} else {
emit_imm(RV_REG_T1, imm, ctx);
- emit(rv_xor(rd, rd, RV_REG_T1), ctx);
+ emit_xor(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
@@ -699,19 +702,28 @@ out_be:
break;
case BPF_ALU | BPF_LSH | BPF_K:
case BPF_ALU64 | BPF_LSH | BPF_K:
- emit(is64 ? rv_slli(rd, rd, imm) : rv_slliw(rd, rd, imm), ctx);
+ emit_slli(rd, rd, imm, ctx);
+
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_RSH | BPF_K:
case BPF_ALU64 | BPF_RSH | BPF_K:
- emit(is64 ? rv_srli(rd, rd, imm) : rv_srliw(rd, rd, imm), ctx);
+ if (is64)
+ emit_srli(rd, rd, imm, ctx);
+ else
+ emit(rv_srliw(rd, rd, imm), ctx);
+
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
case BPF_ALU | BPF_ARSH | BPF_K:
case BPF_ALU64 | BPF_ARSH | BPF_K:
- emit(is64 ? rv_srai(rd, rd, imm) : rv_sraiw(rd, rd, imm), ctx);
+ if (is64)
+ emit_srai(rd, rd, imm, ctx);
+ else
+ emit(rv_sraiw(rd, rd, imm), ctx);
+
if (!is64 && !aux->verifier_zext)
emit_zext_32(rd, ctx);
break;
@@ -757,13 +769,13 @@ out_be:
e = ctx->ninsns;
/* Adjust for extra insns */
- rvoff -= (e - s) << 2;
+ rvoff -= ninsns_rvoff(e - s);
}
if (BPF_OP(code) == BPF_JSET) {
/* Adjust for and */
rvoff -= 4;
- emit(rv_and(RV_REG_T1, rd, rs), ctx);
+ emit_and(RV_REG_T1, rd, rs, ctx);
emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff,
ctx);
} else {
@@ -810,7 +822,7 @@ out_be:
e = ctx->ninsns;
/* Adjust for extra insns */
- rvoff -= (e - s) << 2;
+ rvoff -= ninsns_rvoff(e - s);
emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
break;
@@ -819,19 +831,19 @@ out_be:
rvoff = rv_offset(i, off, ctx);
s = ctx->ninsns;
if (is_12b_int(imm)) {
- emit(rv_andi(RV_REG_T1, rd, imm), ctx);
+ emit_andi(RV_REG_T1, rd, imm, ctx);
} else {
emit_imm(RV_REG_T1, imm, ctx);
- emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx);
+ emit_and(RV_REG_T1, rd, RV_REG_T1, ctx);
}
/* For jset32, we should clear the upper 32 bits of t1, but
* sign-extension is sufficient here and saves one instruction,
* as t1 is used only in comparison against zero.
*/
if (!is64 && imm < 0)
- emit(rv_addiw(RV_REG_T1, RV_REG_T1, 0), ctx);
+ emit_addiw(RV_REG_T1, RV_REG_T1, 0, ctx);
e = ctx->ninsns;
- rvoff -= (e - s) << 2;
+ rvoff -= ninsns_rvoff(e - s);
emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx);
break;
@@ -887,7 +899,7 @@ out_be:
}
emit_imm(RV_REG_T1, off, ctx);
- emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
emit(rv_lbu(rd, 0, RV_REG_T1), ctx);
if (insn_is_zext(&insn[1]))
return 1;
@@ -899,7 +911,7 @@ out_be:
}
emit_imm(RV_REG_T1, off, ctx);
- emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
emit(rv_lhu(rd, 0, RV_REG_T1), ctx);
if (insn_is_zext(&insn[1]))
return 1;
@@ -911,20 +923,20 @@ out_be:
}
emit_imm(RV_REG_T1, off, ctx);
- emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
emit(rv_lwu(rd, 0, RV_REG_T1), ctx);
if (insn_is_zext(&insn[1]))
return 1;
break;
case BPF_LDX | BPF_MEM | BPF_DW:
if (is_12b_int(off)) {
- emit(rv_ld(rd, off, rs), ctx);
+ emit_ld(rd, off, rs, ctx);
break;
}
emit_imm(RV_REG_T1, off, ctx);
- emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
- emit(rv_ld(rd, 0, RV_REG_T1), ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
+ emit_ld(rd, 0, RV_REG_T1, ctx);
break;
/* ST: *(size *)(dst + off) = imm */
@@ -936,7 +948,7 @@ out_be:
}
emit_imm(RV_REG_T2, off, ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx);
break;
@@ -948,30 +960,30 @@ out_be:
}
emit_imm(RV_REG_T2, off, ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx);
break;
case BPF_ST | BPF_MEM | BPF_W:
emit_imm(RV_REG_T1, imm, ctx);
if (is_12b_int(off)) {
- emit(rv_sw(rd, off, RV_REG_T1), ctx);
+ emit_sw(rd, off, RV_REG_T1, ctx);
break;
}
emit_imm(RV_REG_T2, off, ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx);
- emit(rv_sw(RV_REG_T2, 0, RV_REG_T1), ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
+ emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx);
break;
case BPF_ST | BPF_MEM | BPF_DW:
emit_imm(RV_REG_T1, imm, ctx);
if (is_12b_int(off)) {
- emit(rv_sd(rd, off, RV_REG_T1), ctx);
+ emit_sd(rd, off, RV_REG_T1, ctx);
break;
}
emit_imm(RV_REG_T2, off, ctx);
- emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx);
- emit(rv_sd(RV_REG_T2, 0, RV_REG_T1), ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
+ emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx);
break;
/* STX: *(size *)(dst + off) = src */
@@ -982,7 +994,7 @@ out_be:
}
emit_imm(RV_REG_T1, off, ctx);
- emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
emit(rv_sb(RV_REG_T1, 0, rs), ctx);
break;
case BPF_STX | BPF_MEM | BPF_H:
@@ -992,28 +1004,28 @@ out_be:
}
emit_imm(RV_REG_T1, off, ctx);
- emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
emit(rv_sh(RV_REG_T1, 0, rs), ctx);
break;
case BPF_STX | BPF_MEM | BPF_W:
if (is_12b_int(off)) {
- emit(rv_sw(rd, off, rs), ctx);
+ emit_sw(rd, off, rs, ctx);
break;
}
emit_imm(RV_REG_T1, off, ctx);
- emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
- emit(rv_sw(RV_REG_T1, 0, rs), ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
+ emit_sw(RV_REG_T1, 0, rs, ctx);
break;
case BPF_STX | BPF_MEM | BPF_DW:
if (is_12b_int(off)) {
- emit(rv_sd(rd, off, rs), ctx);
+ emit_sd(rd, off, rs, ctx);
break;
}
emit_imm(RV_REG_T1, off, ctx);
- emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
- emit(rv_sd(RV_REG_T1, 0, rs), ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
+ emit_sd(RV_REG_T1, 0, rs, ctx);
break;
/* STX XADD: lock *(u32 *)(dst + off) += src */
case BPF_STX | BPF_XADD | BPF_W:
@@ -1021,10 +1033,10 @@ out_be:
case BPF_STX | BPF_XADD | BPF_DW:
if (off) {
if (is_12b_int(off)) {
- emit(rv_addi(RV_REG_T1, rd, off), ctx);
+ emit_addi(RV_REG_T1, rd, off, ctx);
} else {
emit_imm(RV_REG_T1, off, ctx);
- emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
}
rd = RV_REG_T1;
@@ -1073,52 +1085,53 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx)
/* First instruction is always setting the tail-call-counter
* (TCC) register. This instruction is skipped for tail calls.
+ * Force using a 4-byte (non-compressed) instruction.
*/
emit(rv_addi(RV_REG_TCC, RV_REG_ZERO, MAX_TAIL_CALL_CNT), ctx);
- emit(rv_addi(RV_REG_SP, RV_REG_SP, -stack_adjust), ctx);
+ emit_addi(RV_REG_SP, RV_REG_SP, -stack_adjust, ctx);
if (seen_reg(RV_REG_RA, ctx)) {
- emit(rv_sd(RV_REG_SP, store_offset, RV_REG_RA), ctx);
+ emit_sd(RV_REG_SP, store_offset, RV_REG_RA, ctx);
store_offset -= 8;
}
- emit(rv_sd(RV_REG_SP, store_offset, RV_REG_FP), ctx);
+ emit_sd(RV_REG_SP, store_offset, RV_REG_FP, ctx);
store_offset -= 8;
if (seen_reg(RV_REG_S1, ctx)) {
- emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S1), ctx);
+ emit_sd(RV_REG_SP, store_offset, RV_REG_S1, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S2, ctx)) {
- emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S2), ctx);
+ emit_sd(RV_REG_SP, store_offset, RV_REG_S2, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S3, ctx)) {
- emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S3), ctx);
+ emit_sd(RV_REG_SP, store_offset, RV_REG_S3, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S4, ctx)) {
- emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S4), ctx);
+ emit_sd(RV_REG_SP, store_offset, RV_REG_S4, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S5, ctx)) {
- emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S5), ctx);
+ emit_sd(RV_REG_SP, store_offset, RV_REG_S5, ctx);
store_offset -= 8;
}
if (seen_reg(RV_REG_S6, ctx)) {
- emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S6), ctx);
+ emit_sd(RV_REG_SP, store_offset, RV_REG_S6, ctx);
store_offset -= 8;
}
- emit(rv_addi(RV_REG_FP, RV_REG_SP, stack_adjust), ctx);
+ emit_addi(RV_REG_FP, RV_REG_SP, stack_adjust, ctx);
if (bpf_stack_adjust)
- emit(rv_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust), ctx);
+ emit_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust, ctx);
/* Program contains calls and tail calls, so RV_REG_TCC need
* to be saved across calls.
*/
if (seen_tail_call(ctx) && seen_call(ctx))
- emit(rv_addi(RV_REG_TCC_SAVED, RV_REG_TCC, 0), ctx);
+ emit_mv(RV_REG_TCC_SAVED, RV_REG_TCC, ctx);
ctx->stack_size = stack_adjust;
}
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index 709b94ece3ed..3630d447352c 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -73,7 +73,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
if (ctx->offset) {
extra_pass = true;
- image_size = sizeof(u32) * ctx->ninsns;
+ image_size = sizeof(*ctx->insns) * ctx->ninsns;
goto skip_init_ctx;
}
@@ -103,7 +103,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
if (jit_data->header)
break;
- image_size = sizeof(u32) * ctx->ninsns;
+ image_size = sizeof(*ctx->insns) * ctx->ninsns;
jit_data->header =
bpf_jit_binary_alloc(image_size,
&jit_data->image,
@@ -114,7 +114,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
goto out_offset;
}
- ctx->insns = (u32 *)jit_data->image;
+ ctx->insns = (u16 *)jit_data->image;
/*
* Now, when the image is allocated, the image can
* potentially shrink more (auipc/jalr -> jal).
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index f4242b894cf2..26f97a10e793 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -489,6 +489,24 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
} while (re <= last);
}
+static void bpf_skip(struct bpf_jit *jit, int size)
+{
+ if (size >= 6 && !is_valid_rel(size)) {
+ /* brcl 0xf,size */
+ EMIT6_PCREL_RIL(0xc0f4000000, size);
+ size -= 6;
+ } else if (size >= 4 && is_valid_rel(size)) {
+ /* brc 0xf,size */
+ EMIT4_PCREL(0xa7f40000, size);
+ size -= 4;
+ }
+ while (size >= 2) {
+ /* bcr 0,%0 */
+ _EMIT2(0x0700);
+ size -= 2;
+ }
+}
+
/*
* Emit function prologue
*
@@ -501,10 +519,11 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
} else {
- /* j tail_call_start: NOP if no tail calls are used */
- EMIT4_PCREL(0xa7f40000, 6);
- /* bcr 0,%0 */
- EMIT2(0x0700, 0, REG_0);
+ /*
+ * There are no tail calls. Insert nops in order to have
+ * tail_call_start at a predictable offset.
+ */
+ bpf_skip(jit, 6);
}
/* Tail calls have to skip above initialization */
jit->tail_call_start = jit->prg;
@@ -1268,8 +1287,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
last = (i == fp->len - 1) ? 1 : 0;
if (last)
break;
- /* j <exit> */
- EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
+ if (!is_first_pass(jit) && can_use_rel(jit, jit->exit_ip))
+ /* brc 0xf, <exit> */
+ EMIT4_PCREL_RIC(0xa7040000, 0xf, jit->exit_ip);
+ else
+ /* brcl 0xf, <exit> */
+ EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->exit_ip);
break;
/*
* Branch relative (number of skipped instructions) to offset on
@@ -1417,21 +1440,10 @@ branch_ks:
}
break;
branch_ku:
- is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
- /* clfi or clgfi %dst,imm */
- EMIT6_IMM(is_jmp32 ? 0xc20f0000 : 0xc20e0000,
- dst_reg, imm);
- if (!is_first_pass(jit) &&
- can_use_rel(jit, addrs[i + off + 1])) {
- /* brc mask,off */
- EMIT4_PCREL_RIC(0xa7040000,
- mask >> 12, addrs[i + off + 1]);
- } else {
- /* brcl mask,off */
- EMIT6_PCREL_RILC(0xc0040000,
- mask >> 12, addrs[i + off + 1]);
- }
- break;
+ /* lgfi %w1,imm (load sign extend imm) */
+ src_reg = REG_1;
+ EMIT6_IMM(0xc0010000, src_reg, imm);
+ goto branch_xu;
branch_xs:
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
if (!is_first_pass(jit) &&
@@ -1510,7 +1522,14 @@ static bool bpf_is_new_addr_sane(struct bpf_jit *jit, int i)
*/
static int bpf_set_addr(struct bpf_jit *jit, int i)
{
- if (!bpf_is_new_addr_sane(jit, i))
+ int delta;
+
+ if (is_codegen_pass(jit)) {
+ delta = jit->prg - jit->addrs[i];
+ if (delta < 0)
+ bpf_skip(jit, -delta);
+ }
+ if (WARN_ON_ONCE(!bpf_is_new_addr_sane(jit, i)))
return -1;
jit->addrs[i] = jit->prg;
return 0;
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 29f71428520b..892242a42c3e 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -197,8 +197,7 @@ unlock:
return err;
}
-static int alg_setkey(struct sock *sk, char __user *ukey,
- unsigned int keylen)
+static int alg_setkey(struct sock *sk, sockptr_t ukey, unsigned int keylen)
{
struct alg_sock *ask = alg_sk(sk);
const struct af_alg_type *type = ask->type;
@@ -210,7 +209,7 @@ static int alg_setkey(struct sock *sk, char __user *ukey,
return -ENOMEM;
err = -EFAULT;
- if (copy_from_user(key, ukey, keylen))
+ if (copy_from_sockptr(key, ukey, keylen))
goto out;
err = type->setkey(ask->private, key, keylen);
@@ -222,7 +221,7 @@ out:
}
static int alg_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c
index d98b89d0fa6e..c3058dcdb33c 100644
--- a/drivers/crypto/chelsio/chtls/chtls_main.c
+++ b/drivers/crypto/chelsio/chtls/chtls_main.c
@@ -488,7 +488,7 @@ static int chtls_getsockopt(struct sock *sk, int level, int optname,
}
static int do_chtls_setsockopt(struct sock *sk, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct tls_crypto_info *crypto_info, tmp_crypto_info;
struct chtls_sock *csk;
@@ -498,12 +498,12 @@ static int do_chtls_setsockopt(struct sock *sk, int optname,
csk = rcu_dereference_sk_user_data(sk);
- if (!optval || optlen < sizeof(*crypto_info)) {
+ if (sockptr_is_null(optval) || optlen < sizeof(*crypto_info)) {
rc = -EINVAL;
goto out;
}
- rc = copy_from_user(&tmp_crypto_info, optval, sizeof(*crypto_info));
+ rc = copy_from_sockptr(&tmp_crypto_info, optval, sizeof(*crypto_info));
if (rc) {
rc = -EFAULT;
goto out;
@@ -525,8 +525,9 @@ static int do_chtls_setsockopt(struct sock *sk, int optname,
/* Obtain version and type from previous copy */
crypto_info[0] = tmp_crypto_info;
/* Now copy the following data */
- rc = copy_from_user((char *)crypto_info + sizeof(*crypto_info),
- optval + sizeof(*crypto_info),
+ sockptr_advance(optval, sizeof(*crypto_info));
+ rc = copy_from_sockptr((char *)crypto_info + sizeof(*crypto_info),
+ optval,
sizeof(struct tls12_crypto_info_aes_gcm_128)
- sizeof(*crypto_info));
@@ -541,8 +542,9 @@ static int do_chtls_setsockopt(struct sock *sk, int optname,
}
case TLS_CIPHER_AES_GCM_256: {
crypto_info[0] = tmp_crypto_info;
- rc = copy_from_user((char *)crypto_info + sizeof(*crypto_info),
- optval + sizeof(*crypto_info),
+ sockptr_advance(optval, sizeof(*crypto_info));
+ rc = copy_from_sockptr((char *)crypto_info + sizeof(*crypto_info),
+ optval,
sizeof(struct tls12_crypto_info_aes_gcm_256)
- sizeof(*crypto_info));
@@ -565,7 +567,7 @@ out:
}
static int chtls_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct tls_context *ctx = tls_get_ctx(sk);
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index ccaedfd53e49..b1de8d608e4d 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -346,9 +346,14 @@ static void qedr_free_resources(struct qedr_dev *dev)
static int qedr_alloc_resources(struct qedr_dev *dev)
{
+ struct qed_chain_init_params params = {
+ .mode = QED_CHAIN_MODE_PBL,
+ .intended_use = QED_CHAIN_USE_TO_CONSUME,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U16,
+ .elem_size = sizeof(struct regpair *),
+ };
struct qedr_cnq *cnq;
__le16 *cons_pi;
- u16 n_entries;
int i, rc;
dev->sgid_tbl = kcalloc(QEDR_MAX_SGID, sizeof(union ib_gid),
@@ -382,7 +387,9 @@ static int qedr_alloc_resources(struct qedr_dev *dev)
dev->sb_start = dev->ops->rdma_get_start_sb(dev->cdev);
/* Allocate CNQ PBLs */
- n_entries = min_t(u32, QED_RDMA_MAX_CNQ_SIZE, QEDR_ROCE_MAX_CNQ_SIZE);
+ params.num_elems = min_t(u32, QED_RDMA_MAX_CNQ_SIZE,
+ QEDR_ROCE_MAX_CNQ_SIZE);
+
for (i = 0; i < dev->num_cnq; i++) {
cnq = &dev->cnq_array[i];
@@ -391,13 +398,8 @@ static int qedr_alloc_resources(struct qedr_dev *dev)
if (rc)
goto err3;
- rc = dev->ops->common->chain_alloc(dev->cdev,
- QED_CHAIN_USE_TO_CONSUME,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U16,
- n_entries,
- sizeof(struct regpair *),
- &cnq->pbl, NULL);
+ rc = dev->ops->common->chain_alloc(dev->cdev, &cnq->pbl,
+ &params);
if (rc)
goto err4;
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 9b9e80266367..49b8a43e3fa2 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -891,6 +891,12 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
udata, struct qedr_ucontext, ibucontext);
struct qed_rdma_destroy_cq_out_params destroy_oparams;
struct qed_rdma_destroy_cq_in_params destroy_iparams;
+ struct qed_chain_init_params chain_params = {
+ .mode = QED_CHAIN_MODE_PBL,
+ .intended_use = QED_CHAIN_USE_TO_CONSUME,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U32,
+ .elem_size = sizeof(union rdma_cqe),
+ };
struct qedr_dev *dev = get_qedr_dev(ibdev);
struct qed_rdma_create_cq_in_params params;
struct qedr_create_cq_ureq ureq = {};
@@ -917,6 +923,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
chain_entries = qedr_align_cq_entries(entries);
chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
+ chain_params.num_elems = chain_entries;
/* calc db offset. user will add DPI base, kernel will add db addr */
db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
@@ -951,13 +958,8 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
} else {
cq->cq_type = QEDR_CQ_TYPE_KERNEL;
- rc = dev->ops->common->chain_alloc(dev->cdev,
- QED_CHAIN_USE_TO_CONSUME,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U32,
- chain_entries,
- sizeof(union rdma_cqe),
- &cq->pbl, NULL);
+ rc = dev->ops->common->chain_alloc(dev->cdev, &cq->pbl,
+ &chain_params);
if (rc)
goto err0;
@@ -1446,6 +1448,12 @@ static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
struct ib_srq_init_attr *init_attr)
{
struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
+ struct qed_chain_init_params params = {
+ .mode = QED_CHAIN_MODE_PBL,
+ .intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U32,
+ .elem_size = QEDR_SRQ_WQE_ELEM_SIZE,
+ };
dma_addr_t phy_prod_pair_addr;
u32 num_elems;
void *va;
@@ -1464,13 +1472,9 @@ static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
hw_srq->virt_prod_pair_addr = va;
num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
- rc = dev->ops->common->chain_alloc(dev->cdev,
- QED_CHAIN_USE_TO_CONSUME_PRODUCE,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U32,
- num_elems,
- QEDR_SRQ_WQE_ELEM_SIZE,
- &hw_srq->pbl, NULL);
+ params.num_elems = num_elems;
+
+ rc = dev->ops->common->chain_alloc(dev->cdev, &hw_srq->pbl, &params);
if (rc)
goto err0;
@@ -1901,29 +1905,28 @@ qedr_roce_create_kernel_qp(struct qedr_dev *dev,
u32 n_sq_elems, u32 n_rq_elems)
{
struct qed_rdma_create_qp_out_params out_params;
+ struct qed_chain_init_params params = {
+ .mode = QED_CHAIN_MODE_PBL,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U32,
+ };
int rc;
- rc = dev->ops->common->chain_alloc(dev->cdev,
- QED_CHAIN_USE_TO_PRODUCE,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U32,
- n_sq_elems,
- QEDR_SQE_ELEMENT_SIZE,
- &qp->sq.pbl, NULL);
+ params.intended_use = QED_CHAIN_USE_TO_PRODUCE;
+ params.num_elems = n_sq_elems;
+ params.elem_size = QEDR_SQE_ELEMENT_SIZE;
+ rc = dev->ops->common->chain_alloc(dev->cdev, &qp->sq.pbl, &params);
if (rc)
return rc;
in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
- rc = dev->ops->common->chain_alloc(dev->cdev,
- QED_CHAIN_USE_TO_CONSUME_PRODUCE,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U32,
- n_rq_elems,
- QEDR_RQE_ELEMENT_SIZE,
- &qp->rq.pbl, NULL);
+ params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
+ params.elem_size = n_rq_elems;
+ params.elem_size = QEDR_RQE_ELEMENT_SIZE;
+
+ rc = dev->ops->common->chain_alloc(dev->cdev, &qp->rq.pbl, &params);
if (rc)
return rc;
@@ -1949,14 +1952,19 @@ qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
u32 n_sq_elems, u32 n_rq_elems)
{
struct qed_rdma_create_qp_out_params out_params;
- struct qed_chain_ext_pbl ext_pbl;
+ struct qed_chain_init_params params = {
+ .mode = QED_CHAIN_MODE_PBL,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U32,
+ };
int rc;
in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
QEDR_SQE_ELEMENT_SIZE,
+ QED_CHAIN_PAGE_SIZE,
QED_CHAIN_MODE_PBL);
in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
QEDR_RQE_ELEMENT_SIZE,
+ QED_CHAIN_PAGE_SIZE,
QED_CHAIN_MODE_PBL);
qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
@@ -1966,31 +1974,24 @@ qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
return -EINVAL;
/* Now we allocate the chain */
- ext_pbl.p_pbl_virt = out_params.sq_pbl_virt;
- ext_pbl.p_pbl_phys = out_params.sq_pbl_phys;
- rc = dev->ops->common->chain_alloc(dev->cdev,
- QED_CHAIN_USE_TO_PRODUCE,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U32,
- n_sq_elems,
- QEDR_SQE_ELEMENT_SIZE,
- &qp->sq.pbl, &ext_pbl);
+ params.intended_use = QED_CHAIN_USE_TO_PRODUCE;
+ params.num_elems = n_sq_elems;
+ params.elem_size = QEDR_SQE_ELEMENT_SIZE;
+ params.ext_pbl_virt = out_params.sq_pbl_virt;
+ params.ext_pbl_phys = out_params.sq_pbl_phys;
+ rc = dev->ops->common->chain_alloc(dev->cdev, &qp->sq.pbl, &params);
if (rc)
goto err;
- ext_pbl.p_pbl_virt = out_params.rq_pbl_virt;
- ext_pbl.p_pbl_phys = out_params.rq_pbl_phys;
-
- rc = dev->ops->common->chain_alloc(dev->cdev,
- QED_CHAIN_USE_TO_CONSUME_PRODUCE,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U32,
- n_rq_elems,
- QEDR_RQE_ELEMENT_SIZE,
- &qp->rq.pbl, &ext_pbl);
+ params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
+ params.num_elems = n_rq_elems;
+ params.elem_size = QEDR_RQE_ELEMENT_SIZE;
+ params.ext_pbl_virt = out_params.rq_pbl_virt;
+ params.ext_pbl_phys = out_params.rq_pbl_phys;
+ rc = dev->ops->common->chain_alloc(dev->cdev, &qp->rq.pbl, &params);
if (rc)
goto err;
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 1b2b91479107..a6606736d8c5 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -401,20 +401,20 @@ data_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
}
static int data_sock_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int len)
+ sockptr_t optval, unsigned int len)
{
struct sock *sk = sock->sk;
int err = 0, opt = 0;
if (*debug & DEBUG_SOCKET)
- printk(KERN_DEBUG "%s(%p, %d, %x, %p, %d)\n", __func__, sock,
- level, optname, optval, len);
+ printk(KERN_DEBUG "%s(%p, %d, %x, optval, %d)\n", __func__, sock,
+ level, optname, len);
lock_sock(sk);
switch (optname) {
case MISDN_TIME_STAMP:
- if (get_user(opt, (int __user *)optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(int))) {
err = -EFAULT;
break;
}
diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c
index 108a8cafc4f8..44eb2b1d0416 100644
--- a/drivers/net/bareudp.c
+++ b/drivers/net/bareudp.c
@@ -46,7 +46,6 @@ struct bareudp_dev {
__be16 port;
u16 sport_min;
bool multi_proto_mode;
- bool rx_collect_metadata;
struct socket __rcu *sock;
struct list_head next; /* bareudp node on namespace list */
struct gro_cells gro_cells;
@@ -126,14 +125,12 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
bareudp->dev->stats.rx_dropped++;
goto drop;
}
- if (bareudp->rx_collect_metadata) {
- tun_dst = udp_tun_rx_dst(skb, family, TUNNEL_KEY, 0, 0);
- if (!tun_dst) {
- bareudp->dev->stats.rx_dropped++;
- goto drop;
- }
- skb_dst_set(skb, &tun_dst->dst);
+ tun_dst = udp_tun_rx_dst(skb, family, TUNNEL_KEY, 0, 0);
+ if (!tun_dst) {
+ bareudp->dev->stats.rx_dropped++;
+ goto drop;
}
+ skb_dst_set(skb, &tun_dst->dst);
skb->dev = bareudp->dev;
oiph = skb_network_header(skb);
skb_reset_network_header(skb);
@@ -577,9 +574,6 @@ static int bareudp2info(struct nlattr *data[], struct bareudp_conf *conf,
if (data[IFLA_BAREUDP_MULTIPROTO_MODE])
conf->multi_proto_mode = true;
- if (data[IFLA_BAREUDP_RX_COLLECT_METADATA])
- conf->rx_collect_metadata = true;
-
return 0;
}
@@ -617,7 +611,6 @@ static int bareudp_configure(struct net *net, struct net_device *dev,
bareudp->ethertype = conf->ethertype;
bareudp->sport_min = conf->sport_min;
bareudp->multi_proto_mode = conf->multi_proto_mode;
- bareudp->rx_collect_metadata = conf->rx_collect_metadata;
err = register_netdevice(dev);
if (err)
@@ -676,7 +669,6 @@ static size_t bareudp_get_size(const struct net_device *dev)
nla_total_size(sizeof(__be16)) + /* IFLA_BAREUDP_ETHERTYPE */
nla_total_size(sizeof(__u16)) + /* IFLA_BAREUDP_SRCPORT_MIN */
nla_total_size(0) + /* IFLA_BAREUDP_MULTIPROTO_MODE */
- nla_total_size(0) + /* IFLA_BAREUDP_RX_COLLECT_METADATA */
0;
}
@@ -693,9 +685,6 @@ static int bareudp_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (bareudp->multi_proto_mode &&
nla_put_flag(skb, IFLA_BAREUDP_MULTIPROTO_MODE))
goto nla_put_failure;
- if (bareudp->rx_collect_metadata &&
- nla_put_flag(skb, IFLA_BAREUDP_RX_COLLECT_METADATA))
- goto nla_put_failure;
return 0;
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 6f019955ae42..b8a3e8c88c07 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2699,6 +2699,8 @@ static int mv88e6xxx_get_max_mtu(struct dsa_switch *ds, int port)
if (chip->info->ops->port_set_jumbo_size)
return 10240;
+ else if (chip->info->ops->set_max_frame_size)
+ return 1632;
return 1522;
}
@@ -2710,6 +2712,8 @@ static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
mv88e6xxx_reg_lock(chip);
if (chip->info->ops->port_set_jumbo_size)
ret = chip->info->ops->port_set_jumbo_size(chip, port, new_mtu);
+ else if (chip->info->ops->set_max_frame_size)
+ ret = chip->info->ops->set_max_frame_size(chip, new_mtu);
else
if (new_mtu > 1522)
ret = -EINVAL;
@@ -3450,6 +3454,7 @@ static const struct mv88e6xxx_ops mv88e6085_ops = {
.vtu_getnext = mv88e6352_g1_vtu_getnext,
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
.phylink_validate = mv88e6185_phylink_validate,
+ .set_max_frame_size = mv88e6185_g1_set_max_frame_size,
};
static const struct mv88e6xxx_ops mv88e6095_ops = {
@@ -3478,6 +3483,7 @@ static const struct mv88e6xxx_ops mv88e6095_ops = {
.vtu_getnext = mv88e6185_g1_vtu_getnext,
.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
.phylink_validate = mv88e6185_phylink_validate,
+ .set_max_frame_size = mv88e6185_g1_set_max_frame_size,
};
static const struct mv88e6xxx_ops mv88e6097_ops = {
@@ -3494,7 +3500,6 @@ static const struct mv88e6xxx_ops mv88e6097_ops = {
.port_set_frame_mode = mv88e6351_port_set_frame_mode,
.port_set_egress_floods = mv88e6352_port_set_egress_floods,
.port_set_ether_type = mv88e6351_port_set_ether_type,
- .port_set_jumbo_size = mv88e6165_port_set_jumbo_size,
.port_egress_rate_limiting = mv88e6095_port_egress_rate_limiting,
.port_pause_limit = mv88e6097_port_pause_limit,
.port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
@@ -3516,6 +3521,7 @@ static const struct mv88e6xxx_ops mv88e6097_ops = {
.vtu_getnext = mv88e6352_g1_vtu_getnext,
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
.phylink_validate = mv88e6185_phylink_validate,
+ .set_max_frame_size = mv88e6185_g1_set_max_frame_size,
};
static const struct mv88e6xxx_ops mv88e6123_ops = {
@@ -3550,6 +3556,7 @@ static const struct mv88e6xxx_ops mv88e6123_ops = {
.vtu_getnext = mv88e6352_g1_vtu_getnext,
.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
.phylink_validate = mv88e6185_phylink_validate,
+ .set_max_frame_size = mv88e6185_g1_set_max_frame_size,
};
static const struct mv88e6xxx_ops mv88e6131_ops = {
@@ -3939,6 +3946,7 @@ static const struct mv88e6xxx_ops mv88e6185_ops = {
.vtu_getnext = mv88e6185_g1_vtu_getnext,
.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
.phylink_validate = mv88e6185_phylink_validate,
+ .set_max_frame_size = mv88e6185_g1_set_max_frame_size,
};
static const struct mv88e6xxx_ops mv88e6190_ops = {
@@ -3959,6 +3967,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = {
.port_set_frame_mode = mv88e6351_port_set_frame_mode,
.port_set_egress_floods = mv88e6352_port_set_egress_floods,
.port_set_ether_type = mv88e6351_port_set_ether_type,
+ .port_set_jumbo_size = mv88e6165_port_set_jumbo_size,
.port_pause_limit = mv88e6390_port_pause_limit,
.port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
@@ -4017,6 +4026,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = {
.port_set_frame_mode = mv88e6351_port_set_frame_mode,
.port_set_egress_floods = mv88e6352_port_set_egress_floods,
.port_set_ether_type = mv88e6351_port_set_ether_type,
+ .port_set_jumbo_size = mv88e6165_port_set_jumbo_size,
.port_pause_limit = mv88e6390_port_pause_limit,
.port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 1c541b074256..2d70eac30e58 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -552,6 +552,9 @@ struct mv88e6xxx_ops {
void (*phylink_validate)(struct mv88e6xxx_chip *chip, int port,
unsigned long *mask,
struct phylink_link_state *state);
+
+ /* Max Frame Size */
+ int (*set_max_frame_size)(struct mv88e6xxx_chip *chip, int mtu);
};
struct mv88e6xxx_irq_ops {
diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c
index ca3a7a7a73c3..f62aa83ca08d 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.c
+++ b/drivers/net/dsa/mv88e6xxx/global1.c
@@ -196,6 +196,23 @@ int mv88e6185_g1_ppu_disable(struct mv88e6xxx_chip *chip)
return mv88e6185_g1_wait_ppu_disabled(chip);
}
+int mv88e6185_g1_set_max_frame_size(struct mv88e6xxx_chip *chip, int mtu)
+{
+ u16 val;
+ int err;
+
+ err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_CTL1, &val);
+ if (err)
+ return err;
+
+ val &= ~MV88E6185_G1_CTL1_MAX_FRAME_1632;
+
+ if (mtu > 1518)
+ val |= MV88E6185_G1_CTL1_MAX_FRAME_1632;
+
+ return mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, val);
+}
+
/* Offset 0x10: IP-PRI Mapping Register 0
* Offset 0x11: IP-PRI Mapping Register 1
* Offset 0x12: IP-PRI Mapping Register 2
diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h
index 5324c6f4ae90..1e3546f8b072 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.h
+++ b/drivers/net/dsa/mv88e6xxx/global1.h
@@ -282,6 +282,8 @@ int mv88e6250_g1_reset(struct mv88e6xxx_chip *chip);
int mv88e6185_g1_ppu_enable(struct mv88e6xxx_chip *chip);
int mv88e6185_g1_ppu_disable(struct mv88e6xxx_chip *chip);
+int mv88e6185_g1_set_max_frame_size(struct mv88e6xxx_chip *chip, int mtu);
+
int mv88e6xxx_g1_stats_snapshot(struct mv88e6xxx_chip *chip, int port);
int mv88e6320_g1_stats_snapshot(struct mv88e6xxx_chip *chip, int port);
int mv88e6390_g1_stats_snapshot(struct mv88e6xxx_chip *chip, int port);
diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
index 336742f6e3c3..b818a169c193 100644
--- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
@@ -491,6 +491,36 @@ enum ena_admin_llq_stride_ctrl {
ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY = 2,
};
+enum ena_admin_accel_mode_feat {
+ ENA_ADMIN_DISABLE_META_CACHING = 0,
+ ENA_ADMIN_LIMIT_TX_BURST = 1,
+};
+
+struct ena_admin_accel_mode_get {
+ /* bit field of enum ena_admin_accel_mode_feat */
+ u16 supported_flags;
+
+ /* maximum burst size between two doorbells. The size is in bytes */
+ u16 max_tx_burst_size;
+};
+
+struct ena_admin_accel_mode_set {
+ /* bit field of enum ena_admin_accel_mode_feat */
+ u16 enabled_flags;
+
+ u16 reserved;
+};
+
+struct ena_admin_accel_mode_req {
+ union {
+ u32 raw[2];
+
+ struct ena_admin_accel_mode_get get;
+
+ struct ena_admin_accel_mode_set set;
+ } u;
+};
+
struct ena_admin_feature_llq_desc {
u32 max_llq_num;
@@ -536,10 +566,13 @@ struct ena_admin_feature_llq_desc {
/* the stride control the driver selected to use */
u16 descriptors_stride_ctrl_enabled;
- /* Maximum size in bytes taken by llq entries in a single tx burst.
- * Set to 0 when there is no such limit.
+ /* reserved */
+ u32 reserved1;
+
+ /* accelerated low latency queues requirement. driver needs to
+ * support those requirements in order to use accelerated llq
*/
- u32 max_tx_burst_size;
+ struct ena_admin_accel_mode_req accel_mode;
};
struct ena_admin_queue_ext_feature_fields {
@@ -816,7 +849,9 @@ struct ena_admin_host_info {
/* 0 : reserved
* 1 : rx_offset
* 2 : interrupt_moderation
- * 31:3 : reserved
+ * 3 : rx_buf_mirroring
+ * 4 : rss_configurable_function_key
+ * 31:5 : reserved
*/
u32 driver_supported_features;
};
@@ -1129,6 +1164,10 @@ struct ena_admin_ena_mmio_req_read_less_resp {
#define ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK BIT(1)
#define ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_SHIFT 2
#define ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK BIT(2)
+#define ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_SHIFT 3
+#define ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK BIT(3)
+#define ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_SHIFT 4
+#define ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK BIT(4)
/* aenq_common_desc */
#define ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0)
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 432f143559a1..435bf05a853c 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -403,6 +403,8 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
0x0, io_sq->llq_info.desc_list_entry_size);
io_sq->llq_buf_ctrl.descs_left_in_line =
io_sq->llq_info.descs_num_before_header;
+ io_sq->disable_meta_caching =
+ io_sq->llq_info.disable_meta_caching;
if (io_sq->llq_info.max_entries_in_tx_burst > 0)
io_sq->entries_in_tx_burst_left =
@@ -626,6 +628,10 @@ static int ena_com_set_llq(struct ena_com_dev *ena_dev)
cmd.u.llq.desc_num_before_header_enabled = llq_info->descs_num_before_header;
cmd.u.llq.descriptors_stride_ctrl_enabled = llq_info->desc_stride_ctrl;
+ cmd.u.llq.accel_mode.u.set.enabled_flags =
+ BIT(ENA_ADMIN_DISABLE_META_CACHING) |
+ BIT(ENA_ADMIN_LIMIT_TX_BURST);
+
ret = ena_com_execute_admin_command(admin_queue,
(struct ena_admin_aq_entry *)&cmd,
sizeof(cmd),
@@ -643,6 +649,7 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev,
struct ena_llq_configurations *llq_default_cfg)
{
struct ena_com_llq_info *llq_info = &ena_dev->llq_info;
+ struct ena_admin_accel_mode_get llq_accel_mode_get;
u16 supported_feat;
int rc;
@@ -742,9 +749,17 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev,
llq_default_cfg->llq_num_decs_before_header,
supported_feat, llq_info->descs_num_before_header);
}
+ /* Check for accelerated queue supported */
+ llq_accel_mode_get = llq_features->accel_mode.u.get;
+
+ llq_info->disable_meta_caching =
+ !!(llq_accel_mode_get.supported_flags &
+ BIT(ENA_ADMIN_DISABLE_META_CACHING));
- llq_info->max_entries_in_tx_burst =
- (u16)(llq_features->max_tx_burst_size / llq_default_cfg->llq_ring_entry_size_value);
+ if (llq_accel_mode_get.supported_flags & BIT(ENA_ADMIN_LIMIT_TX_BURST))
+ llq_info->max_entries_in_tx_burst =
+ llq_accel_mode_get.max_tx_burst_size /
+ llq_default_cfg->llq_ring_entry_size_value;
rc = ena_com_set_llq(ena_dev);
if (rc)
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
index bc187adf54e4..4287d47b2b0b 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -127,6 +127,7 @@ struct ena_com_llq_info {
u16 descs_num_before_header;
u16 descs_per_entry;
u16 max_entries_in_tx_burst;
+ bool disable_meta_caching;
};
struct ena_com_io_cq {
@@ -189,6 +190,8 @@ struct ena_com_io_sq {
enum queue_direction direction;
enum ena_admin_placement_policy_type mem_queue_type;
+ bool disable_meta_caching;
+
u32 msix_vector;
struct ena_com_tx_meta cached_tx_meta;
struct ena_com_llq_info llq_info;
@@ -230,11 +233,11 @@ struct ena_com_admin_sq {
};
struct ena_com_stats_admin {
- u32 aborted_cmd;
- u32 submitted_cmd;
- u32 completed_cmd;
- u32 out_of_space;
- u32 no_completion;
+ u64 aborted_cmd;
+ u64 submitted_cmd;
+ u64 completed_cmd;
+ u64 out_of_space;
+ u64 no_completion;
};
struct ena_com_admin_queue {
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
index ec8ea25e988d..ccd440589565 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
@@ -285,11 +285,10 @@ static u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq,
return count;
}
-static int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq,
- struct ena_com_tx_ctx *ena_tx_ctx)
+static int ena_com_create_meta(struct ena_com_io_sq *io_sq,
+ struct ena_com_tx_meta *ena_meta)
{
struct ena_eth_io_tx_meta_desc *meta_desc = NULL;
- struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
meta_desc = get_sq_desc(io_sq);
memset(meta_desc, 0x0, sizeof(struct ena_eth_io_tx_meta_desc));
@@ -309,12 +308,13 @@ static int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq,
/* Extended meta desc */
meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK;
- meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK;
meta_desc->len_ctrl |= (io_sq->phase <<
ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT) &
ENA_ETH_IO_TX_META_DESC_PHASE_MASK;
meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_FIRST_MASK;
+ meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK;
+
meta_desc->word2 |= ena_meta->l3_hdr_len &
ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK;
meta_desc->word2 |= (ena_meta->l3_hdr_offset <<
@@ -325,13 +325,36 @@ static int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq,
ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT) &
ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK;
- meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK;
+ return ena_com_sq_update_tail(io_sq);
+}
+
+static int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq,
+ struct ena_com_tx_ctx *ena_tx_ctx,
+ bool *have_meta)
+{
+ struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
- /* Cached the meta desc */
- memcpy(&io_sq->cached_tx_meta, ena_meta,
- sizeof(struct ena_com_tx_meta));
+ /* When disable meta caching is set, don't bother to save the meta and
+ * compare it to the stored version, just create the meta
+ */
+ if (io_sq->disable_meta_caching) {
+ if (unlikely(!ena_tx_ctx->meta_valid))
+ return -EINVAL;
- return ena_com_sq_update_tail(io_sq);
+ *have_meta = true;
+ return ena_com_create_meta(io_sq, ena_meta);
+ }
+
+ if (ena_com_meta_desc_changed(io_sq, ena_tx_ctx)) {
+ *have_meta = true;
+ /* Cache the meta desc */
+ memcpy(&io_sq->cached_tx_meta, ena_meta,
+ sizeof(struct ena_com_tx_meta));
+ return ena_com_create_meta(io_sq, ena_meta);
+ }
+
+ *have_meta = false;
+ return 0;
}
static void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx,
@@ -402,12 +425,10 @@ int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
if (unlikely(rc))
return rc;
- have_meta = ena_tx_ctx->meta_valid && ena_com_meta_desc_changed(io_sq,
- ena_tx_ctx);
- if (have_meta) {
- rc = ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx);
- if (unlikely(rc))
- return rc;
+ rc = ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx, &have_meta);
+ if (unlikely(rc)) {
+ pr_err("failed to create and store tx meta desc\n");
+ return rc;
}
/* If the caller doesn't want to send packets */
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
index 8b1afd3b32f2..b6592cb93b04 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
@@ -157,7 +157,8 @@ static inline bool ena_com_is_doorbell_needed(struct ena_com_io_sq *io_sq,
llq_info = &io_sq->llq_info;
num_descs = ena_tx_ctx->num_bufs;
- if (unlikely(ena_com_meta_desc_changed(io_sq, ena_tx_ctx)))
+ if (llq_info->disable_meta_caching ||
+ unlikely(ena_com_meta_desc_changed(io_sq, ena_tx_ctx)))
++num_descs;
if (num_descs > llq_info->descs_num_before_header) {
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index e340b65af08c..430275bc0d04 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -164,13 +164,13 @@ static void ena_queue_stats(struct ena_adapter *adapter, u64 **data)
static void ena_dev_admin_queue_stats(struct ena_adapter *adapter, u64 **data)
{
const struct ena_stats *ena_stats;
- u32 *ptr;
+ u64 *ptr;
int i;
for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) {
ena_stats = &ena_stats_ena_com_strings[i];
- ptr = (u32 *)((uintptr_t)&adapter->ena_dev->admin_queue.stats +
+ ptr = (u64 *)((uintptr_t)&adapter->ena_dev->admin_queue.stats +
(uintptr_t)ena_stats->stat_offset);
*(*data)++ = *ptr;
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 3eb63b12dd68..6478c1e0d137 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -307,7 +307,7 @@ static int ena_xdp_xmit_buff(struct net_device *dev,
struct ena_rx_buffer *rx_info)
{
struct ena_adapter *adapter = netdev_priv(dev);
- struct ena_com_tx_ctx ena_tx_ctx = {0};
+ struct ena_com_tx_ctx ena_tx_ctx = {};
struct ena_tx_buffer *tx_info;
struct ena_ring *xdp_ring;
u16 next_to_use, req_id;
@@ -655,6 +655,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter,
txr->sgl_size = adapter->max_tx_sgl_size;
txr->smoothed_interval =
ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
+ txr->disable_meta_caching = adapter->disable_meta_caching;
/* Don't init RX queues for xdp queues */
if (!ENA_IS_XDP_INDEX(adapter, i)) {
@@ -959,8 +960,11 @@ static int ena_alloc_rx_page(struct ena_ring *rx_ring,
return -ENOMEM;
}
+ /* To enable NIC-side port-mirroring, AKA SPAN port,
+ * we make the buffer readable from the nic as well
+ */
dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE,
- DMA_FROM_DEVICE);
+ DMA_BIDIRECTIONAL);
if (unlikely(dma_mapping_error(rx_ring->dev, dma))) {
u64_stats_update_begin(&rx_ring->syncp);
rx_ring->rx_stats.dma_mapping_err++;
@@ -993,10 +997,9 @@ static void ena_free_rx_page(struct ena_ring *rx_ring,
return;
}
- dma_unmap_page(rx_ring->dev,
- ena_buf->paddr - rx_ring->rx_headroom,
+ dma_unmap_page(rx_ring->dev, ena_buf->paddr - rx_ring->rx_headroom,
ENA_PAGE_SIZE,
- DMA_FROM_DEVICE);
+ DMA_BIDIRECTIONAL);
__free_page(page);
rx_info->page = NULL;
@@ -1431,7 +1434,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
do {
dma_unmap_page(rx_ring->dev,
dma_unmap_addr(&rx_info->ena_buf, paddr),
- ENA_PAGE_SIZE, DMA_FROM_DEVICE);
+ ENA_PAGE_SIZE, DMA_BIDIRECTIONAL);
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
rx_info->page_offset, len, ENA_PAGE_SIZE);
@@ -1913,7 +1916,10 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
/* Update numa and unmask the interrupt only when schedule
* from the interrupt context (vs from sk_busy_loop)
*/
- if (napi_complete_done(napi, rx_work_done)) {
+ if (napi_complete_done(napi, rx_work_done) &&
+ READ_ONCE(ena_napi->interrupts_masked)) {
+ smp_rmb(); /* make sure interrupts_masked is read */
+ WRITE_ONCE(ena_napi->interrupts_masked, false);
/* We apply adaptive moderation on Rx path only.
* Tx uses static interrupt moderation.
*/
@@ -1961,6 +1967,9 @@ static irqreturn_t ena_intr_msix_io(int irq, void *data)
ena_napi->first_interrupt = true;
+ WRITE_ONCE(ena_napi->interrupts_masked, true);
+ smp_wmb(); /* write interrupts_masked before calling napi */
+
napi_schedule_irqoff(&ena_napi->napi);
return IRQ_HANDLED;
@@ -2775,7 +2784,9 @@ int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count)
return dev_was_up ? ena_open(adapter->netdev) : 0;
}
-static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb)
+static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx,
+ struct sk_buff *skb,
+ bool disable_meta_caching)
{
u32 mss = skb_shinfo(skb)->gso_size;
struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
@@ -2819,7 +2830,9 @@ static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb)
ena_meta->l3_hdr_len = skb_network_header_len(skb);
ena_meta->l3_hdr_offset = skb_network_offset(skb);
ena_tx_ctx->meta_valid = 1;
-
+ } else if (disable_meta_caching) {
+ memset(ena_meta, 0, sizeof(*ena_meta));
+ ena_tx_ctx->meta_valid = 1;
} else {
ena_tx_ctx->meta_valid = 0;
}
@@ -3003,7 +3016,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
ena_tx_ctx.header_len = header_len;
/* set flags and meta data */
- ena_tx_csum(&ena_tx_ctx, skb);
+ ena_tx_csum(&ena_tx_ctx, skb, tx_ring->disable_meta_caching);
rc = ena_xmit_common(dev,
tx_ring,
@@ -3117,7 +3130,9 @@ static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pd
host_info->driver_supported_features =
ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK |
- ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK;
+ ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK |
+ ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK |
+ ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK;
rc = ena_com_set_host_attributes(ena_dev);
if (rc) {
@@ -3270,10 +3285,71 @@ static int ena_device_validate_params(struct ena_adapter *adapter,
return 0;
}
+static void set_default_llq_configurations(struct ena_llq_configurations *llq_config)
+{
+ llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
+ llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
+ llq_config->llq_num_decs_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
+ llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
+ llq_config->llq_ring_entry_size_value = 128;
+}
+
+static int ena_set_queues_placement_policy(struct pci_dev *pdev,
+ struct ena_com_dev *ena_dev,
+ struct ena_admin_feature_llq_desc *llq,
+ struct ena_llq_configurations *llq_default_configurations)
+{
+ int rc;
+ u32 llq_feature_mask;
+
+ llq_feature_mask = 1 << ENA_ADMIN_LLQ;
+ if (!(ena_dev->supported_features & llq_feature_mask)) {
+ dev_err(&pdev->dev,
+ "LLQ is not supported Fallback to host mode policy.\n");
+ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+ return 0;
+ }
+
+ rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
+ if (unlikely(rc)) {
+ dev_err(&pdev->dev,
+ "Failed to configure the device mode. Fallback to host mode policy.\n");
+ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+ }
+
+ return 0;
+}
+
+static int ena_map_llq_mem_bar(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
+ int bars)
+{
+ bool has_mem_bar = !!(bars & BIT(ENA_MEM_BAR));
+
+ if (!has_mem_bar) {
+ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+ dev_err(&pdev->dev,
+ "ENA device does not expose LLQ bar. Fallback to host mode policy.\n");
+ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+ }
+
+ return 0;
+ }
+
+ ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
+ pci_resource_start(pdev, ENA_MEM_BAR),
+ pci_resource_len(pdev, ENA_MEM_BAR));
+
+ if (!ena_dev->mem_bar)
+ return -EFAULT;
+
+ return 0;
+}
+
static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
struct ena_com_dev_get_features_ctx *get_feat_ctx,
bool *wd_state)
{
+ struct ena_llq_configurations llq_config;
struct device *dev = &pdev->dev;
bool readless_supported;
u32 aenq_groups;
@@ -3364,6 +3440,15 @@ static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
*wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
+ set_default_llq_configurations(&llq_config);
+
+ rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq,
+ &llq_config);
+ if (rc) {
+ dev_err(&pdev->dev, "ena device init failed\n");
+ goto err_admin_init;
+ }
+
return 0;
err_admin_init:
@@ -3870,54 +3955,6 @@ static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev,
return max_num_io_queues;
}
-static int ena_set_queues_placement_policy(struct pci_dev *pdev,
- struct ena_com_dev *ena_dev,
- struct ena_admin_feature_llq_desc *llq,
- struct ena_llq_configurations *llq_default_configurations)
-{
- bool has_mem_bar;
- int rc;
- u32 llq_feature_mask;
-
- llq_feature_mask = 1 << ENA_ADMIN_LLQ;
- if (!(ena_dev->supported_features & llq_feature_mask)) {
- dev_err(&pdev->dev,
- "LLQ is not supported Fallback to host mode policy.\n");
- ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
- return 0;
- }
-
- has_mem_bar = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(ENA_MEM_BAR);
-
- rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
- if (unlikely(rc)) {
- dev_err(&pdev->dev,
- "Failed to configure the device mode. Fallback to host mode policy.\n");
- ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
- return 0;
- }
-
- /* Nothing to config, exit */
- if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
- return 0;
-
- if (!has_mem_bar) {
- dev_err(&pdev->dev,
- "ENA device does not expose LLQ bar. Fallback to host mode policy.\n");
- ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
- return 0;
- }
-
- ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
- pci_resource_start(pdev, ENA_MEM_BAR),
- pci_resource_len(pdev, ENA_MEM_BAR));
-
- if (!ena_dev->mem_bar)
- return -EFAULT;
-
- return 0;
-}
-
static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
struct net_device *netdev)
{
@@ -4033,14 +4070,6 @@ static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
pci_release_selected_regions(pdev, release_bars);
}
-static void set_default_llq_configurations(struct ena_llq_configurations *llq_config)
-{
- llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
- llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
- llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
- llq_config->llq_num_decs_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
- llq_config->llq_ring_entry_size_value = 128;
-}
static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
{
@@ -4122,7 +4151,6 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
struct ena_com_dev_get_features_ctx get_feat_ctx;
- struct ena_llq_configurations llq_config;
struct ena_com_dev *ena_dev = NULL;
struct ena_adapter *adapter;
struct net_device *netdev;
@@ -4177,13 +4205,10 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_free_region;
}
- set_default_llq_configurations(&llq_config);
-
- rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx.llq,
- &llq_config);
+ rc = ena_map_llq_mem_bar(pdev, ena_dev, bars);
if (rc) {
- dev_err(&pdev->dev, "ena device init failed\n");
- goto err_device_destroy;
+ dev_err(&pdev->dev, "ena llq bar mapping failed\n");
+ goto err_free_ena_dev;
}
calc_queue_ctx.ena_dev = ena_dev;
@@ -4240,6 +4265,11 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
adapter->xdp_num_queues = 0;
adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
+ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+ adapter->disable_meta_caching =
+ !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags &
+ BIT(ENA_ADMIN_DISABLE_META_CACHING));
+
adapter->wd_state = wd_state;
snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found);
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index ba030d260940..0c8504006247 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -167,6 +167,7 @@ struct ena_napi {
struct ena_ring *rx_ring;
struct ena_ring *xdp_ring;
bool first_interrupt;
+ bool interrupts_masked;
u32 qid;
struct dim dim;
};
@@ -297,6 +298,7 @@ struct ena_ring {
u8 tx_max_header_size;
bool first_interrupt;
+ bool disable_meta_caching;
u16 no_interrupt_event_cnt;
/* cpu for TPH */
@@ -398,6 +400,7 @@ struct ena_adapter {
bool wd_state;
bool dev_up_before_reset;
+ bool disable_meta_caching;
unsigned long last_keep_alive_jiffies;
struct u64_stats_sync syncp;
diff --git a/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h b/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h
index f80d2a47fa94..426e57e10a7f 100644
--- a/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h
+++ b/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h
@@ -53,10 +53,15 @@
#define PCI_DEV_ID_ENA_LLQ_VF 0xec21
#endif
+#ifndef PCI_DEV_ID_ENA_RESRV0
+#define PCI_DEV_ID_ENA_RESRV0 0x0051
+#endif
+
#define ENA_PCI_ID_TABLE_ENTRY(devid) \
{PCI_DEVICE(PCI_VENDOR_ID_AMAZON, devid)},
static const struct pci_device_id ena_pci_tbl[] = {
+ ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_RESRV0)
ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_PF)
ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_LLQ_PF)
ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_VF)
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index 6953d0546acb..1000c894064f 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -2847,7 +2847,7 @@ struct tg3_ocir {
u32 port1_flags;
u32 port2_flags;
u32 port3_flags;
- u32 reserved2[1];
+ u32 reserved2;
};
diff --git a/drivers/net/ethernet/brocade/bna/bfi.h b/drivers/net/ethernet/brocade/bna/bfi.h
index 09c912e984fe..f780d42c946d 100644
--- a/drivers/net/ethernet/brocade/bna/bfi.h
+++ b/drivers/net/ethernet/brocade/bna/bfi.h
@@ -389,7 +389,7 @@ struct bfi_msgq_mhdr {
u16 msg_token;
u16 num_entries;
u8 enet_id;
- u8 rsvd[1];
+ u8 rsvd;
} __packed;
#define bfi_msgq_mhdr_set(_mh, _mc, _mid, _tok, _enet_id) do { \
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index c59e9ccc2f18..adbc0d088070 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -532,6 +532,12 @@ static inline struct mbox_cmd *mbox_cmd_log_entry(struct mbox_cmd_log *log,
FW_HDR_FW_VER_BUILD_G(chip##FW_VERSION_BUILD))
#define FW_INTFVER(chip, intf) (FW_HDR_INTFVER_##intf)
+struct cxgb4_ethtool_lb_test {
+ struct completion completion;
+ int result;
+ int loopback;
+};
+
struct fw_info {
u8 chip;
char *fs_name;
@@ -685,6 +691,7 @@ struct port_info {
u16 nmirrorqsets;
u32 vi_mirror_count;
struct mutex vi_mirror_mutex; /* Sync access to Mirror VI info */
+ struct cxgb4_ethtool_lb_test ethtool_lb;
};
struct dentry;
@@ -1595,6 +1602,7 @@ void t4_free_sge_resources(struct adapter *adap);
void t4_free_ofld_rxqs(struct adapter *adap, int n, struct sge_ofld_rxq *q);
irq_handler_t t4_intr_handler(struct adapter *adap);
netdev_tx_t t4_start_xmit(struct sk_buff *skb, struct net_device *dev);
+int cxgb4_selftest_lb_pkt(struct net_device *netdev);
int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
const struct pkt_gl *gl);
int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index b66a2e6cbbeb..12ef9ddd1e54 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -25,6 +25,15 @@ static void set_msglevel(struct net_device *dev, u32 val)
netdev2adap(dev)->msg_enable = val;
}
+enum cxgb4_ethtool_tests {
+ CXGB4_ETHTOOL_LB_TEST,
+ CXGB4_ETHTOOL_MAX_TEST,
+};
+
+static const char cxgb4_selftest_strings[CXGB4_ETHTOOL_MAX_TEST][ETH_GSTRING_LEN] = {
+ "Loop back test",
+};
+
static const char * const flash_region_strings[] = {
"All",
"Firmware",
@@ -166,6 +175,8 @@ static int get_sset_count(struct net_device *dev, int sset)
ARRAY_SIZE(loopback_stats_strings);
case ETH_SS_PRIV_FLAGS:
return ARRAY_SIZE(cxgb4_priv_flags_strings);
+ case ETH_SS_TEST:
+ return ARRAY_SIZE(cxgb4_selftest_strings);
default:
return -EOPNOTSUPP;
}
@@ -228,6 +239,9 @@ static void get_strings(struct net_device *dev, u32 stringset, u8 *data)
} else if (stringset == ETH_SS_PRIV_FLAGS) {
memcpy(data, cxgb4_priv_flags_strings,
sizeof(cxgb4_priv_flags_strings));
+ } else if (stringset == ETH_SS_TEST) {
+ memcpy(data, cxgb4_selftest_strings,
+ sizeof(cxgb4_selftest_strings));
}
}
@@ -2056,6 +2070,43 @@ static int cxgb4_set_priv_flags(struct net_device *netdev, u32 flags)
return 0;
}
+static void cxgb4_lb_test(struct net_device *netdev, u64 *lb_status)
+{
+ int dev_state = netif_running(netdev);
+
+ if (dev_state) {
+ netif_tx_stop_all_queues(netdev);
+ netif_carrier_off(netdev);
+ }
+
+ *lb_status = cxgb4_selftest_lb_pkt(netdev);
+
+ if (dev_state) {
+ netif_tx_start_all_queues(netdev);
+ netif_carrier_on(netdev);
+ }
+}
+
+static void cxgb4_self_test(struct net_device *netdev,
+ struct ethtool_test *eth_test, u64 *data)
+{
+ struct port_info *pi = netdev_priv(netdev);
+ struct adapter *adap = pi->adapter;
+
+ memset(data, 0, sizeof(u64) * CXGB4_ETHTOOL_MAX_TEST);
+
+ if (!(adap->flags & CXGB4_FW_OK)) {
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+ return;
+ }
+
+ if (eth_test->flags == ETH_TEST_FL_OFFLINE)
+ cxgb4_lb_test(netdev, &data[CXGB4_ETHTOOL_LB_TEST]);
+
+ if (data[CXGB4_ETHTOOL_LB_TEST])
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+}
+
static const struct ethtool_ops cxgb_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
ETHTOOL_COALESCE_RX_MAX_FRAMES |
@@ -2090,6 +2141,7 @@ static const struct ethtool_ops cxgb_ethtool_ops = {
.get_rxfh_indir_size = get_rss_table_size,
.get_rxfh = get_rss_table,
.set_rxfh = set_rss_table,
+ .self_test = cxgb4_self_test,
.flash_device = set_flash,
.get_ts_info = get_ts_info,
.set_dump = set_dump,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index f9c6f13d7c99..3f0fdffdb2b5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2537,6 +2537,80 @@ static void ctrlq_check_stop(struct sge_ctrl_txq *q, struct fw_wr_hdr *wr)
}
}
+#define CXGB4_SELFTEST_LB_STR "CHELSIO_SELFTEST"
+
+int cxgb4_selftest_lb_pkt(struct net_device *netdev)
+{
+ struct port_info *pi = netdev_priv(netdev);
+ struct adapter *adap = pi->adapter;
+ struct cxgb4_ethtool_lb_test *lb;
+ int ret, i = 0, pkt_len, credits;
+ struct fw_eth_tx_pkt_wr *wr;
+ struct cpl_tx_pkt_core *cpl;
+ u32 ctrl0, ndesc, flits;
+ struct sge_eth_txq *q;
+ u8 *sgl;
+
+ pkt_len = ETH_HLEN + sizeof(CXGB4_SELFTEST_LB_STR);
+
+ flits = DIV_ROUND_UP(pkt_len + sizeof(struct cpl_tx_pkt) +
+ sizeof(*wr), sizeof(__be64));
+ ndesc = flits_to_desc(flits);
+
+ lb = &pi->ethtool_lb;
+ lb->loopback = 1;
+
+ q = &adap->sge.ethtxq[pi->first_qset];
+
+ reclaim_completed_tx(adap, &q->q, -1, true);
+ credits = txq_avail(&q->q) - ndesc;
+ if (unlikely(credits < 0))
+ return -ENOMEM;
+
+ wr = (void *)&q->q.desc[q->q.pidx];
+ memset(wr, 0, sizeof(struct tx_desc));
+
+ wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) |
+ FW_WR_IMMDLEN_V(pkt_len +
+ sizeof(*cpl)));
+ wr->equiq_to_len16 = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2)));
+ wr->r3 = cpu_to_be64(0);
+
+ cpl = (void *)(wr + 1);
+ sgl = (u8 *)(cpl + 1);
+
+ ctrl0 = TXPKT_OPCODE_V(CPL_TX_PKT_XT) | TXPKT_PF_V(adap->pf) |
+ TXPKT_INTF_V(pi->tx_chan + 4);
+
+ cpl->ctrl0 = htonl(ctrl0);
+ cpl->pack = htons(0);
+ cpl->len = htons(pkt_len);
+ cpl->ctrl1 = cpu_to_be64(TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F);
+
+ eth_broadcast_addr(sgl);
+ i += ETH_ALEN;
+ ether_addr_copy(&sgl[i], netdev->dev_addr);
+ i += ETH_ALEN;
+
+ snprintf(&sgl[i], sizeof(CXGB4_SELFTEST_LB_STR), "%s",
+ CXGB4_SELFTEST_LB_STR);
+
+ init_completion(&lb->completion);
+ txq_advance(&q->q, ndesc);
+ cxgb4_ring_tx_db(adap, &q->q, ndesc);
+
+ /* wait for the pkt to return */
+ ret = wait_for_completion_timeout(&lb->completion, 10 * HZ);
+ if (!ret)
+ ret = -ETIMEDOUT;
+ else
+ ret = lb->result;
+
+ lb->loopback = 0;
+
+ return ret;
+}
+
/**
* ctrl_xmit - send a packet through an SGE control Tx queue
* @q: the control queue
@@ -3413,6 +3487,31 @@ static void t4_tx_completion_handler(struct sge_rspq *rspq,
t4_sge_eth_txq_egress_update(adapter, txq, -1);
}
+static int cxgb4_validate_lb_pkt(struct port_info *pi, const struct pkt_gl *si)
+{
+ struct adapter *adap = pi->adapter;
+ struct cxgb4_ethtool_lb_test *lb;
+ struct sge *s = &adap->sge;
+ struct net_device *netdev;
+ u8 *data;
+ int i;
+
+ netdev = adap->port[pi->port_id];
+ lb = &pi->ethtool_lb;
+ data = si->va + s->pktshift;
+
+ i = ETH_ALEN;
+ if (!ether_addr_equal(data + i, netdev->dev_addr))
+ return -1;
+
+ i += ETH_ALEN;
+ if (strcmp(&data[i], CXGB4_SELFTEST_LB_STR))
+ lb->result = -EIO;
+
+ complete(&lb->completion);
+ return 0;
+}
+
/**
* t4_ethrx_handler - process an ingress ethernet packet
* @q: the response queue that received the packet
@@ -3436,6 +3535,7 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
struct port_info *pi;
int ret = 0;
+ pi = netdev_priv(q->netdev);
/* If we're looking at TX Queue CIDX Update, handle that separately
* and return.
*/
@@ -3463,6 +3563,12 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
if (err_vec)
rxq->stats.bad_rx_pkts++;
+ if (unlikely(pi->ethtool_lb.loopback && pkt->iff >= NCHAN)) {
+ ret = cxgb4_validate_lb_pkt(pi, si);
+ if (!ret)
+ return 0;
+ }
+
if (((pkt->l2info & htonl(RXF_TCP_F)) ||
tnl_hdr_len) &&
(q->netdev->features & NETIF_F_GRO) && csum_ok && !pkt->ip_frag) {
@@ -3476,7 +3582,6 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
rxq->stats.rx_drops++;
return 0;
}
- pi = netdev_priv(q->netdev);
/* Handle PTP Event Rx packet */
if (unlikely(pi->ptp_enable)) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/smt.c b/drivers/net/ethernet/chelsio/cxgb4/smt.c
index cbe72ed27b1e..e617e4aabbcc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/smt.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/smt.c
@@ -55,7 +55,7 @@ struct smt_data *t4_init_smt(void)
for (i = 0; i < s->smt_size; ++i) {
s->smtab[i].idx = i;
s->smtab[i].state = SMT_STATE_UNUSED;
- memset(&s->smtab[i].src_mac, 0, ETH_ALEN);
+ eth_zero_addr(s->smtab[i].src_mac);
spin_lock_init(&s->smtab[i].lock);
s->smtab[i].refcnt = 0;
}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index d0cc1dc49aaa..c1bea9132f50 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -15,6 +15,7 @@
#include <linux/fsl/mc.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
+#include <net/pkt_cls.h>
#include <net/sock.h>
#include "dpaa2-eth.h"
@@ -2213,17 +2214,13 @@ static int update_xps(struct dpaa2_eth_priv *priv)
return err;
}
-static int dpaa2_eth_setup_tc(struct net_device *net_dev,
- enum tc_setup_type type, void *type_data)
+static int dpaa2_eth_setup_mqprio(struct net_device *net_dev,
+ struct tc_mqprio_qopt *mqprio)
{
struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
- struct tc_mqprio_qopt *mqprio = type_data;
u8 num_tc, num_queues;
int i;
- if (type != TC_SETUP_QDISC_MQPRIO)
- return -EOPNOTSUPP;
-
mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
num_queues = dpaa2_eth_queue_count(priv);
num_tc = mqprio->num_tc;
@@ -2255,6 +2252,60 @@ out:
return 0;
}
+#define bps_to_mbits(rate) (div_u64((rate), 1000000) * 8)
+
+static int dpaa2_eth_setup_tbf(struct net_device *net_dev, struct tc_tbf_qopt_offload *p)
+{
+ struct tc_tbf_qopt_offload_replace_params *cfg = &p->replace_params;
+ struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+ struct dpni_tx_shaping_cfg tx_cr_shaper = { 0 };
+ struct dpni_tx_shaping_cfg tx_er_shaper = { 0 };
+ int err;
+
+ if (p->command == TC_TBF_STATS)
+ return -EOPNOTSUPP;
+
+ /* Only per port Tx shaping */
+ if (p->parent != TC_H_ROOT)
+ return -EOPNOTSUPP;
+
+ if (p->command == TC_TBF_REPLACE) {
+ if (cfg->max_size > DPAA2_ETH_MAX_BURST_SIZE) {
+ netdev_err(net_dev, "burst size cannot be greater than %d\n",
+ DPAA2_ETH_MAX_BURST_SIZE);
+ return -EINVAL;
+ }
+
+ tx_cr_shaper.max_burst_size = cfg->max_size;
+ /* The TBF interface is in bytes/s, whereas DPAA2 expects the
+ * rate in Mbits/s
+ */
+ tx_cr_shaper.rate_limit = bps_to_mbits(cfg->rate.rate_bytes_ps);
+ }
+
+ err = dpni_set_tx_shaping(priv->mc_io, 0, priv->mc_token, &tx_cr_shaper,
+ &tx_er_shaper, 0);
+ if (err) {
+ netdev_err(net_dev, "dpni_set_tx_shaping() = %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+static int dpaa2_eth_setup_tc(struct net_device *net_dev,
+ enum tc_setup_type type, void *type_data)
+{
+ switch (type) {
+ case TC_SETUP_QDISC_MQPRIO:
+ return dpaa2_eth_setup_mqprio(net_dev, type_data);
+ case TC_SETUP_QDISC_TBF:
+ return dpaa2_eth_setup_tbf(net_dev, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static const struct net_device_ops dpaa2_eth_ops = {
.ndo_open = dpaa2_eth_open,
.ndo_start_xmit = dpaa2_eth_tx,
@@ -3719,7 +3770,7 @@ static int netdev_init(struct net_device *net_dev)
net_dev->features = NETIF_F_RXCSUM |
NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
NETIF_F_SG | NETIF_F_HIGHDMA |
- NETIF_F_LLTX;
+ NETIF_F_LLTX | NETIF_F_HW_TC;
net_dev->hw_features = net_dev->features;
return 0;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 9138a35a68f9..7f3c41dc98f2 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -43,6 +43,9 @@
*/
#define DPAA2_ETH_FQ_TAILDROP_THRESH (1024 * 1024)
+/* Maximum burst size value for Tx shaping */
+#define DPAA2_ETH_MAX_BURST_SIZE 0xF7FF
+
/* Maximum number of Tx confirmation frames to be processed
* in a single NAPI call
*/
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h
index fd069f67be9b..593e3812af93 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h
@@ -626,4 +626,17 @@ struct dpni_cmd_set_congestion_notification {
__le32 threshold_exit;
};
+#define DPNI_COUPLED_SHIFT 0
+#define DPNI_COUPLED_SIZE 1
+
+struct dpni_cmd_set_tx_shaping {
+ __le16 tx_cr_max_burst_size;
+ __le16 tx_er_max_burst_size;
+ __le32 pad;
+ __le32 tx_cr_rate_limit;
+ __le32 tx_er_rate_limit;
+ /* from LSB: coupled:1 */
+ u8 coupled;
+};
+
#endif /* _FSL_DPNI_CMD_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.c b/drivers/net/ethernet/freescale/dpaa2/dpni.c
index 426100607854..68ed4c41b282 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.c
@@ -1963,3 +1963,39 @@ int dpni_clear_qos_table(struct fsl_mc_io *mc_io,
/* send command to mc*/
return mc_send_command(mc_io, &cmd);
}
+
+/**
+ * dpni_set_tx_shaping() - Set the transmit shaping
+ * @mc_io: Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token: Token of DPNI object
+ * @tx_cr_shaper: TX committed rate shaping configuration
+ * @tx_er_shaper: TX excess rate shaping configuration
+ * @coupled: Committed and excess rate shapers are coupled
+ *
+ * Return: '0' on Success; Error code otherwise.
+ */
+int dpni_set_tx_shaping(struct fsl_mc_io *mc_io,
+ u32 cmd_flags,
+ u16 token,
+ const struct dpni_tx_shaping_cfg *tx_cr_shaper,
+ const struct dpni_tx_shaping_cfg *tx_er_shaper,
+ int coupled)
+{
+ struct dpni_cmd_set_tx_shaping *cmd_params;
+ struct fsl_mc_command cmd = { 0 };
+
+ /* prepare command */
+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_TX_SHAPING,
+ cmd_flags,
+ token);
+ cmd_params = (struct dpni_cmd_set_tx_shaping *)cmd.params;
+ cmd_params->tx_cr_max_burst_size = cpu_to_le16(tx_cr_shaper->max_burst_size);
+ cmd_params->tx_er_max_burst_size = cpu_to_le16(tx_er_shaper->max_burst_size);
+ cmd_params->tx_cr_rate_limit = cpu_to_le32(tx_cr_shaper->rate_limit);
+ cmd_params->tx_er_rate_limit = cpu_to_le32(tx_er_shaper->rate_limit);
+ dpni_set_field(cmd_params->coupled, COUPLED, coupled);
+
+ /* send command to mc*/
+ return mc_send_command(mc_io, &cmd);
+}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.h b/drivers/net/ethernet/freescale/dpaa2/dpni.h
index e874d8084142..39387991a1f9 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.h
@@ -1062,5 +1062,21 @@ int dpni_get_api_version(struct fsl_mc_io *mc_io,
u32 cmd_flags,
u16 *major_ver,
u16 *minor_ver);
+/**
+ * struct dpni_tx_shaping - Structure representing DPNI tx shaping configuration
+ * @rate_limit: Rate in Mbps
+ * @max_burst_size: Burst size in bytes (up to 64KB)
+ */
+struct dpni_tx_shaping_cfg {
+ u32 rate_limit;
+ u16 max_burst_size;
+};
+
+int dpni_set_tx_shaping(struct fsl_mc_io *mc_io,
+ u32 cmd_flags,
+ u16 token,
+ const struct dpni_tx_shaping_cfg *tx_cr_shaper,
+ const struct dpni_tx_shaping_cfg *tx_er_shaper,
+ int coupled);
#endif /* __FSL_DPNI_H */
diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig
index 2b43848e1363..37b804f8bd76 100644
--- a/drivers/net/ethernet/freescale/enetc/Kconfig
+++ b/drivers/net/ethernet/freescale/enetc/Kconfig
@@ -4,6 +4,7 @@ config FSL_ENETC
depends on PCI && PCI_MSI
select FSL_ENETC_MDIO
select PHYLIB
+ select DIMLIB
help
This driver supports NXP ENETC gigabit ethernet controller PCIe
physical function (PF) devices, managing ENETC Ports at a privileged
@@ -15,6 +16,7 @@ config FSL_ENETC_VF
tristate "ENETC VF driver"
depends on PCI && PCI_MSI
select PHYLIB
+ select DIMLIB
help
This driver supports NXP ENETC gigabit ethernet controller PCIe
virtual function (VF) devices enabled by the ENETC PF driver.
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 3f32b85ba2cf..f50353cbb4db 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -265,6 +265,7 @@ static irqreturn_t enetc_msix(int irq, void *data)
/* disable interrupts */
enetc_wr_reg(v->rbier, 0);
+ enetc_wr_reg(v->ricr1, v->rx_ictt);
for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS)
enetc_wr_reg(v->tbier_base + ENETC_BDR_OFF(i), 0);
@@ -278,6 +279,34 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget);
static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
struct napi_struct *napi, int work_limit);
+static void enetc_rx_dim_work(struct work_struct *w)
+{
+ struct dim *dim = container_of(w, struct dim, work);
+ struct dim_cq_moder moder =
+ net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
+ struct enetc_int_vector *v =
+ container_of(dim, struct enetc_int_vector, rx_dim);
+
+ v->rx_ictt = enetc_usecs_to_cycles(moder.usec);
+ dim->state = DIM_START_MEASURE;
+}
+
+static void enetc_rx_net_dim(struct enetc_int_vector *v)
+{
+ struct dim_sample dim_sample;
+
+ v->comp_cnt++;
+
+ if (!v->rx_napi_work)
+ return;
+
+ dim_update_sample(v->comp_cnt,
+ v->rx_ring.stats.packets,
+ v->rx_ring.stats.bytes,
+ &dim_sample);
+ net_dim(&v->rx_dim, dim_sample);
+}
+
static int enetc_poll(struct napi_struct *napi, int budget)
{
struct enetc_int_vector
@@ -293,12 +322,19 @@ static int enetc_poll(struct napi_struct *napi, int budget)
work_done = enetc_clean_rx_ring(&v->rx_ring, napi, budget);
if (work_done == budget)
complete = false;
+ if (work_done)
+ v->rx_napi_work = true;
if (!complete)
return budget;
napi_complete_done(napi, work_done);
+ if (likely(v->rx_dim_en))
+ enetc_rx_net_dim(v);
+
+ v->rx_napi_work = false;
+
/* enable interrupts */
enetc_wr_reg(v->rbier, ENETC_RBIER_RXTIE);
@@ -1064,8 +1100,8 @@ void enetc_init_si_rings_params(struct enetc_ndev_priv *priv)
struct enetc_si *si = priv->si;
int cpus = num_online_cpus();
- priv->tx_bd_count = ENETC_BDR_DEFAULT_SIZE;
- priv->rx_bd_count = ENETC_BDR_DEFAULT_SIZE;
+ priv->tx_bd_count = ENETC_TX_RING_DEFAULT_SIZE;
+ priv->rx_bd_count = ENETC_RX_RING_DEFAULT_SIZE;
/* Enable all available TX rings in order to configure as many
* priorities as possible, when needed.
@@ -1074,6 +1110,8 @@ void enetc_init_si_rings_params(struct enetc_ndev_priv *priv)
priv->num_rx_rings = min_t(int, cpus, si->num_rx_rings);
priv->num_tx_rings = si->num_tx_rings;
priv->bdr_int_num = cpus;
+ priv->ic_mode = ENETC_IC_RX_ADAPTIVE | ENETC_IC_TX_MANUAL;
+ priv->tx_ictt = ENETC_TXIC_TIMETHR;
/* SI specific */
si->cbd_ring.bd_count = ENETC_CBDR_DEFAULT_SIZE;
@@ -1140,7 +1178,7 @@ static void enetc_setup_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring)
tx_ring->next_to_clean = enetc_txbdr_rd(hw, idx, ENETC_TBCIR);
/* enable Tx ints by setting pkt thr to 1 */
- enetc_txbdr_wr(hw, idx, ENETC_TBICIR0, ENETC_TBICIR0_ICEN | 0x1);
+ enetc_txbdr_wr(hw, idx, ENETC_TBICR0, ENETC_TBICR0_ICEN | 0x1);
tbmr = ENETC_TBMR_EN;
if (tx_ring->ndev->features & NETIF_F_HW_VLAN_CTAG_TX)
@@ -1174,7 +1212,7 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
enetc_rxbdr_wr(hw, idx, ENETC_RBPIR, 0);
/* enable Rx ints by setting pkt thr to 1 */
- enetc_rxbdr_wr(hw, idx, ENETC_RBICIR0, ENETC_RBICIR0_ICEN | 0x1);
+ enetc_rxbdr_wr(hw, idx, ENETC_RBICR0, ENETC_RBICR0_ICEN | 0x1);
rbmr = ENETC_RBMR_EN;
@@ -1264,9 +1302,11 @@ static int enetc_setup_irqs(struct enetc_ndev_priv *priv)
dev_err(priv->dev, "request_irq() failed!\n");
goto irq_err;
}
+ disable_irq(irq);
v->tbier_base = hw->reg + ENETC_BDR(TX, 0, ENETC_TBIER);
v->rbier = hw->reg + ENETC_BDR(RX, i, ENETC_RBIER);
+ v->ricr1 = hw->reg + ENETC_BDR(RX, i, ENETC_RBICR1);
enetc_wr(hw, ENETC_SIMSIRRV(i), entry);
@@ -1306,23 +1346,42 @@ static void enetc_free_irqs(struct enetc_ndev_priv *priv)
}
}
-static void enetc_enable_interrupts(struct enetc_ndev_priv *priv)
+static void enetc_setup_interrupts(struct enetc_ndev_priv *priv)
{
+ struct enetc_hw *hw = &priv->si->hw;
+ u32 icpt, ictt;
int i;
/* enable Tx & Rx event indication */
+ if (priv->ic_mode &
+ (ENETC_IC_RX_MANUAL | ENETC_IC_RX_ADAPTIVE)) {
+ icpt = ENETC_RBICR0_SET_ICPT(ENETC_RXIC_PKTTHR);
+ /* init to non-0 minimum, will be adjusted later */
+ ictt = 0x1;
+ } else {
+ icpt = 0x1; /* enable Rx ints by setting pkt thr to 1 */
+ ictt = 0;
+ }
+
for (i = 0; i < priv->num_rx_rings; i++) {
- enetc_rxbdr_wr(&priv->si->hw, i,
- ENETC_RBIER, ENETC_RBIER_RXTIE);
+ enetc_rxbdr_wr(hw, i, ENETC_RBICR1, ictt);
+ enetc_rxbdr_wr(hw, i, ENETC_RBICR0, ENETC_RBICR0_ICEN | icpt);
+ enetc_rxbdr_wr(hw, i, ENETC_RBIER, ENETC_RBIER_RXTIE);
}
+ if (priv->ic_mode & ENETC_IC_TX_MANUAL)
+ icpt = ENETC_TBICR0_SET_ICPT(ENETC_TXIC_PKTTHR);
+ else
+ icpt = 0x1; /* enable Tx ints by setting pkt thr to 1 */
+
for (i = 0; i < priv->num_tx_rings; i++) {
- enetc_txbdr_wr(&priv->si->hw, i,
- ENETC_TBIER, ENETC_TBIER_TXTIE);
+ enetc_txbdr_wr(hw, i, ENETC_TBICR1, priv->tx_ictt);
+ enetc_txbdr_wr(hw, i, ENETC_TBICR0, ENETC_TBICR0_ICEN | icpt);
+ enetc_txbdr_wr(hw, i, ENETC_TBIER, ENETC_TBIER_TXTIE);
}
}
-static void enetc_disable_interrupts(struct enetc_ndev_priv *priv)
+static void enetc_clear_interrupts(struct enetc_ndev_priv *priv)
{
int i;
@@ -1369,10 +1428,33 @@ static int enetc_phy_connect(struct net_device *ndev)
return 0;
}
+void enetc_start(struct net_device *ndev)
+{
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
+ int i;
+
+ enetc_setup_interrupts(priv);
+
+ for (i = 0; i < priv->bdr_int_num; i++) {
+ int irq = pci_irq_vector(priv->si->pdev,
+ ENETC_BDR_INT_BASE_IDX + i);
+
+ napi_enable(&priv->int_vector[i]->napi);
+ enable_irq(irq);
+ }
+
+ if (ndev->phydev)
+ phy_start(ndev->phydev);
+ else
+ netif_carrier_on(ndev);
+
+ netif_tx_start_all_queues(ndev);
+}
+
int enetc_open(struct net_device *ndev)
{
struct enetc_ndev_priv *priv = netdev_priv(ndev);
- int i, err;
+ int err;
err = enetc_setup_irqs(priv);
if (err)
@@ -1390,8 +1472,6 @@ int enetc_open(struct net_device *ndev)
if (err)
goto err_alloc_rx;
- enetc_setup_bdrs(priv);
-
err = netif_set_real_num_tx_queues(ndev, priv->num_tx_rings);
if (err)
goto err_set_queues;
@@ -1400,17 +1480,8 @@ int enetc_open(struct net_device *ndev)
if (err)
goto err_set_queues;
- for (i = 0; i < priv->bdr_int_num; i++)
- napi_enable(&priv->int_vector[i]->napi);
-
- enetc_enable_interrupts(priv);
-
- if (ndev->phydev)
- phy_start(ndev->phydev);
- else
- netif_carrier_on(ndev);
-
- netif_tx_start_all_queues(ndev);
+ enetc_setup_bdrs(priv);
+ enetc_start(ndev);
return 0;
@@ -1427,28 +1498,39 @@ err_phy_connect:
return err;
}
-int enetc_close(struct net_device *ndev)
+void enetc_stop(struct net_device *ndev)
{
struct enetc_ndev_priv *priv = netdev_priv(ndev);
int i;
netif_tx_stop_all_queues(ndev);
- if (ndev->phydev) {
- phy_stop(ndev->phydev);
- phy_disconnect(ndev->phydev);
- } else {
- netif_carrier_off(ndev);
- }
-
for (i = 0; i < priv->bdr_int_num; i++) {
+ int irq = pci_irq_vector(priv->si->pdev,
+ ENETC_BDR_INT_BASE_IDX + i);
+
+ disable_irq(irq);
napi_synchronize(&priv->int_vector[i]->napi);
napi_disable(&priv->int_vector[i]->napi);
}
- enetc_disable_interrupts(priv);
+ if (ndev->phydev)
+ phy_stop(ndev->phydev);
+ else
+ netif_carrier_off(ndev);
+
+ enetc_clear_interrupts(priv);
+}
+
+int enetc_close(struct net_device *ndev)
+{
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
+
+ enetc_stop(ndev);
enetc_clear_bdrs(priv);
+ if (ndev->phydev)
+ phy_disconnect(ndev->phydev);
enetc_free_rxtx_rings(priv);
enetc_free_rx_resources(priv);
enetc_free_tx_resources(priv);
@@ -1742,6 +1824,12 @@ int enetc_alloc_msix(struct enetc_ndev_priv *priv)
priv->int_vector[i] = v;
+ /* init defaults for adaptive IC */
+ if (priv->ic_mode & ENETC_IC_RX_ADAPTIVE) {
+ v->rx_ictt = 0x1;
+ v->rx_dim_en = true;
+ }
+ INIT_WORK(&v->rx_dim.work, enetc_rx_dim_work);
netif_napi_add(priv->ndev, &v->napi, enetc_poll,
NAPI_POLL_WEIGHT);
v->count_tx_rings = v_tx_rings;
@@ -1777,6 +1865,7 @@ int enetc_alloc_msix(struct enetc_ndev_priv *priv)
fail:
while (i--) {
netif_napi_del(&priv->int_vector[i]->napi);
+ cancel_work_sync(&priv->int_vector[i]->rx_dim.work);
kfree(priv->int_vector[i]);
}
@@ -1793,6 +1882,7 @@ void enetc_free_msix(struct enetc_ndev_priv *priv)
struct enetc_int_vector *v = priv->int_vector[i];
netif_napi_del(&v->napi);
+ cancel_work_sync(&v->rx_dim.work);
}
for (i = 0; i < priv->num_rx_rings; i++)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index b705464f6882..d309803cfeb6 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -10,6 +10,7 @@
#include <linux/ethtool.h>
#include <linux/if_vlan.h>
#include <linux/phy.h>
+#include <linux/dim.h>
#include "enetc_hw.h"
@@ -44,8 +45,9 @@ struct enetc_ring_stats {
unsigned int rx_alloc_errs;
};
-#define ENETC_BDR_DEFAULT_SIZE 1024
-#define ENETC_DEFAULT_TX_WORK 256
+#define ENETC_RX_RING_DEFAULT_SIZE 512
+#define ENETC_TX_RING_DEFAULT_SIZE 256
+#define ENETC_DEFAULT_TX_WORK (ENETC_TX_RING_DEFAULT_SIZE / 2)
struct enetc_bdr {
struct device *dev; /* for DMA mapping */
@@ -189,14 +191,19 @@ static inline bool enetc_si_is_pf(struct enetc_si *si)
struct enetc_int_vector {
void __iomem *rbier;
void __iomem *tbier_base;
+ void __iomem *ricr1;
unsigned long tx_rings_map;
int count_tx_rings;
- struct napi_struct napi;
+ u32 rx_ictt;
+ u16 comp_cnt;
+ bool rx_dim_en, rx_napi_work;
+ struct napi_struct napi ____cacheline_aligned_in_smp;
+ struct dim rx_dim ____cacheline_aligned_in_smp;
char name[ENETC_INT_NAME_MAX];
- struct enetc_bdr rx_ring ____cacheline_aligned_in_smp;
+ struct enetc_bdr rx_ring;
struct enetc_bdr tx_ring[];
-};
+} ____cacheline_aligned_in_smp;
struct enetc_cls_rule {
struct ethtool_rx_flow_spec fs;
@@ -220,6 +227,21 @@ enum enetc_active_offloads {
ENETC_F_QCI = BIT(3),
};
+/* interrupt coalescing modes */
+enum enetc_ic_mode {
+ /* one interrupt per frame */
+ ENETC_IC_NONE = 0,
+ /* activated when int coalescing time is set to a non-0 value */
+ ENETC_IC_RX_MANUAL = BIT(0),
+ ENETC_IC_TX_MANUAL = BIT(1),
+ /* use dynamic interrupt moderation */
+ ENETC_IC_RX_ADAPTIVE = BIT(2),
+};
+
+#define ENETC_RXIC_PKTTHR min_t(u32, 256, ENETC_RX_RING_DEFAULT_SIZE / 2)
+#define ENETC_TXIC_PKTTHR min_t(u32, 128, ENETC_TX_RING_DEFAULT_SIZE / 2)
+#define ENETC_TXIC_TIMETHR enetc_usecs_to_cycles(600)
+
struct enetc_ndev_priv {
struct net_device *ndev;
struct device *dev; /* dma-mapping device */
@@ -244,6 +266,8 @@ struct enetc_ndev_priv {
struct device_node *phy_node;
phy_interface_t if_mode;
+ int ic_mode;
+ u32 tx_ictt;
};
/* Messaging */
@@ -273,6 +297,8 @@ void enetc_free_si_resources(struct enetc_ndev_priv *priv);
int enetc_open(struct net_device *ndev);
int enetc_close(struct net_device *ndev);
+void enetc_start(struct net_device *ndev);
+void enetc_stop(struct net_device *ndev);
netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev);
struct net_device_stats *enetc_get_stats(struct net_device *ndev);
int enetc_set_features(struct net_device *ndev,
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
index 34bd1f3fb415..1dab83fbca77 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -14,12 +14,14 @@ static const u32 enetc_si_regs[] = {
static const u32 enetc_txbdr_regs[] = {
ENETC_TBMR, ENETC_TBSR, ENETC_TBBAR0, ENETC_TBBAR1,
- ENETC_TBPIR, ENETC_TBCIR, ENETC_TBLENR, ENETC_TBIER
+ ENETC_TBPIR, ENETC_TBCIR, ENETC_TBLENR, ENETC_TBIER, ENETC_TBICR0,
+ ENETC_TBICR1
};
static const u32 enetc_rxbdr_regs[] = {
ENETC_RBMR, ENETC_RBSR, ENETC_RBBSR, ENETC_RBCIR, ENETC_RBBAR0,
- ENETC_RBBAR1, ENETC_RBPIR, ENETC_RBLENR, ENETC_RBICIR0, ENETC_RBIER
+ ENETC_RBBAR1, ENETC_RBPIR, ENETC_RBLENR, ENETC_RBIER, ENETC_RBICR0,
+ ENETC_RBICR1
};
static const u32 enetc_port_regs[] = {
@@ -561,6 +563,74 @@ static void enetc_get_ringparam(struct net_device *ndev,
}
}
+static int enetc_get_coalesce(struct net_device *ndev,
+ struct ethtool_coalesce *ic)
+{
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
+ struct enetc_int_vector *v = priv->int_vector[0];
+
+ ic->tx_coalesce_usecs = enetc_cycles_to_usecs(priv->tx_ictt);
+ ic->rx_coalesce_usecs = enetc_cycles_to_usecs(v->rx_ictt);
+
+ ic->tx_max_coalesced_frames = ENETC_TXIC_PKTTHR;
+ ic->rx_max_coalesced_frames = ENETC_RXIC_PKTTHR;
+
+ ic->use_adaptive_rx_coalesce = priv->ic_mode & ENETC_IC_RX_ADAPTIVE;
+
+ return 0;
+}
+
+static int enetc_set_coalesce(struct net_device *ndev,
+ struct ethtool_coalesce *ic)
+{
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
+ u32 rx_ictt, tx_ictt;
+ int i, ic_mode;
+ bool changed;
+
+ tx_ictt = enetc_usecs_to_cycles(ic->tx_coalesce_usecs);
+ rx_ictt = enetc_usecs_to_cycles(ic->rx_coalesce_usecs);
+
+ if (ic->rx_max_coalesced_frames != ENETC_RXIC_PKTTHR)
+ return -EOPNOTSUPP;
+
+ if (ic->tx_max_coalesced_frames != ENETC_TXIC_PKTTHR)
+ return -EOPNOTSUPP;
+
+ ic_mode = ENETC_IC_NONE;
+ if (ic->use_adaptive_rx_coalesce) {
+ ic_mode |= ENETC_IC_RX_ADAPTIVE;
+ rx_ictt = 0x1;
+ } else {
+ ic_mode |= rx_ictt ? ENETC_IC_RX_MANUAL : 0;
+ }
+
+ ic_mode |= tx_ictt ? ENETC_IC_TX_MANUAL : 0;
+
+ /* commit the settings */
+ changed = (ic_mode != priv->ic_mode) || (priv->tx_ictt != tx_ictt);
+
+ priv->ic_mode = ic_mode;
+ priv->tx_ictt = tx_ictt;
+
+ for (i = 0; i < priv->bdr_int_num; i++) {
+ struct enetc_int_vector *v = priv->int_vector[i];
+
+ v->rx_ictt = rx_ictt;
+ v->rx_dim_en = !!(ic_mode & ENETC_IC_RX_ADAPTIVE);
+ }
+
+ if (netif_running(ndev) && changed) {
+ /* reconfigure the operation mode of h/w interrupts,
+ * traffic needs to be paused in the process
+ */
+ enetc_stop(ndev);
+ enetc_start(ndev);
+ }
+
+ return 0;
+}
+
static int enetc_get_ts_info(struct net_device *ndev,
struct ethtool_ts_info *info)
{
@@ -617,6 +687,9 @@ static int enetc_set_wol(struct net_device *dev,
}
static const struct ethtool_ops enetc_pf_ethtool_ops = {
+ .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+ ETHTOOL_COALESCE_MAX_FRAMES |
+ ETHTOOL_COALESCE_USE_ADAPTIVE_RX,
.get_regs_len = enetc_get_reglen,
.get_regs = enetc_get_regs,
.get_sset_count = enetc_get_sset_count,
@@ -629,6 +702,8 @@ static const struct ethtool_ops enetc_pf_ethtool_ops = {
.get_rxfh = enetc_get_rxfh,
.set_rxfh = enetc_set_rxfh,
.get_ringparam = enetc_get_ringparam,
+ .get_coalesce = enetc_get_coalesce,
+ .set_coalesce = enetc_set_coalesce,
.get_link_ksettings = phy_ethtool_get_link_ksettings,
.set_link_ksettings = phy_ethtool_set_link_ksettings,
.get_link = ethtool_op_get_link,
@@ -638,6 +713,9 @@ static const struct ethtool_ops enetc_pf_ethtool_ops = {
};
static const struct ethtool_ops enetc_vf_ethtool_ops = {
+ .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+ ETHTOOL_COALESCE_MAX_FRAMES |
+ ETHTOOL_COALESCE_USE_ADAPTIVE_RX,
.get_regs_len = enetc_get_reglen,
.get_regs = enetc_get_regs,
.get_sset_count = enetc_get_sset_count,
@@ -649,6 +727,8 @@ static const struct ethtool_ops enetc_vf_ethtool_ops = {
.get_rxfh = enetc_get_rxfh,
.set_rxfh = enetc_set_rxfh,
.get_ringparam = enetc_get_ringparam,
+ .get_coalesce = enetc_get_coalesce,
+ .set_coalesce = enetc_set_coalesce,
.get_link = ethtool_op_get_link,
.get_ts_info = enetc_get_ts_info,
};
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index 135bf46354ea..17cf7c94fdb5 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -121,8 +121,11 @@ enum enetc_bdr_type {TX, RX};
#define ENETC_RBIER 0xa0
#define ENETC_RBIER_RXTIE BIT(0)
#define ENETC_RBIDR 0xa4
-#define ENETC_RBICIR0 0xa8
-#define ENETC_RBICIR0_ICEN BIT(31)
+#define ENETC_RBICR0 0xa8
+#define ENETC_RBICR0_ICEN BIT(31)
+#define ENETC_RBICR0_ICPT_MASK 0x1ff
+#define ENETC_RBICR0_SET_ICPT(n) ((n) & ENETC_RBICR0_ICPT_MASK)
+#define ENETC_RBICR1 0xac
/* TX BDR reg offsets */
#define ENETC_TBMR 0
@@ -141,8 +144,11 @@ enum enetc_bdr_type {TX, RX};
#define ENETC_TBIER 0xa0
#define ENETC_TBIER_TXTIE BIT(0)
#define ENETC_TBIDR 0xa4
-#define ENETC_TBICIR0 0xa8
-#define ENETC_TBICIR0_ICEN BIT(31)
+#define ENETC_TBICR0 0xa8
+#define ENETC_TBICR0_ICEN BIT(31)
+#define ENETC_TBICR0_ICPT_MASK 0xf
+#define ENETC_TBICR0_SET_ICPT(n) ((ilog2(n) + 1) & ENETC_TBICR0_ICPT_MASK)
+#define ENETC_TBICR1 0xac
#define ENETC_RTBLENR_LEN(n) ((n) & ~0x7)
@@ -787,6 +793,15 @@ struct enetc_cbd {
};
#define ENETC_CLK 400000000ULL
+static inline u32 enetc_cycles_to_usecs(u32 cycles)
+{
+ return (u32)div_u64(cycles * 1000000ULL, ENETC_CLK);
+}
+
+static inline u32 enetc_usecs_to_cycles(u32 usecs)
+{
+ return (u32)div_u64(usecs * ENETC_CLK, 1000000ULL);
+}
/* port time gating control register */
#define ENETC_QBV_PTGCR_OFFSET 0x11a00
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 5a08f66b123c..1d2158fd9a28 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -966,6 +966,13 @@ static int enetc_configure_serdes(struct enetc_ndev_priv *priv)
return 0;
}
+static void enetc_teardown_serdes(struct enetc_ndev_priv *priv)
+{
+ struct enetc_pf *pf = enetc_si_priv(priv->si);
+
+ enetc_imdio_remove(pf);
+}
+
static int enetc_pf_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
@@ -1045,6 +1052,7 @@ static int enetc_pf_probe(struct pci_dev *pdev,
return 0;
err_reg_netdev:
+ enetc_teardown_serdes(priv);
enetc_free_msix(priv);
err_alloc_msix:
enetc_free_si_resources(priv);
@@ -1071,7 +1079,7 @@ static void enetc_pf_remove(struct pci_dev *pdev)
priv = netdev_priv(si->ndev);
unregister_netdev(si->ndev);
- enetc_imdio_remove(pf);
+ enetc_teardown_serdes(priv);
enetc_mdio_remove(pf);
enetc_of_put_phy(pf);
diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
index 4fb776920a93..8b2bf85039f1 100644
--- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
@@ -1024,9 +1024,9 @@ static int hix5hd2_init_sg_desc_queue(struct hix5hd2_priv *priv)
struct sg_desc *desc;
dma_addr_t phys_addr;
- desc = (struct sg_desc *)dma_alloc_coherent(priv->dev,
- TX_DESC_NUM * sizeof(struct sg_desc),
- &phys_addr, GFP_KERNEL);
+ desc = dma_alloc_coherent(priv->dev,
+ TX_DESC_NUM * sizeof(struct sg_desc),
+ &phys_addr, GFP_KERNEL);
if (!desc)
return -ENOMEM;
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 7486d010a619..c665220bb637 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -222,6 +222,8 @@ enum ice_state {
__ICE_OICR_INTR_DIS, /* Global OICR interrupt disabled */
__ICE_MDD_VF_PRINT_PENDING, /* set when MDD event handle */
__ICE_VF_RESETS_DISABLED, /* disable resets during ice_remove */
+ __ICE_LINK_DEFAULT_OVERRIDE_PENDING,
+ __ICE_PHY_INIT_COMPLETE,
__ICE_STATE_NBITS /* must be last */
};
@@ -357,12 +359,14 @@ enum ice_pf_flags {
ICE_FLAG_FD_ENA,
ICE_FLAG_ADV_FEATURES,
ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA,
+ ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
ICE_FLAG_NO_MEDIA,
ICE_FLAG_FW_LLDP_AGENT,
ICE_FLAG_ETHTOOL_CTXT, /* set when ethtool holds RTNL lock */
ICE_FLAG_LEGACY_RX,
ICE_FLAG_VF_TRUE_PROMISC_ENA,
ICE_FLAG_MDD_AUTO_RESET_VF,
+ ICE_FLAG_LINK_LENIENT_MODE_ENA,
ICE_PF_FLAGS_NBITS /* must be last */
};
@@ -423,6 +427,8 @@ struct ice_pf {
u16 empr_count; /* EMP reset count */
u16 pfr_count; /* PF reset count */
+ u8 wol_ena : 1; /* software state of WoL */
+ u32 wakeup_reason; /* last wakeup reason */
struct ice_hw_port_stats stats;
struct ice_hw_port_stats stats_prev;
struct ice_hw hw;
@@ -435,6 +441,10 @@ struct ice_pf {
u32 tx_timeout_recovery_level;
char int_name[ICE_INT_NAME_STR_LEN];
u32 sw_int_count;
+
+ __le64 nvm_phy_type_lo; /* NVM PHY type low */
+ __le64 nvm_phy_type_hi; /* NVM PHY type high */
+ struct ice_link_default_override_tlv link_dflt_override;
};
struct ice_netdev_priv {
@@ -568,6 +578,7 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset);
void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
const char *ice_stat_str(enum ice_status stat_err);
const char *ice_aq_str(enum ice_aq_err aq_err);
+bool ice_is_wol_supported(struct ice_pf *pf);
int
ice_fdir_write_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input, bool add,
bool is_tun);
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 99c39249613a..b363e0223670 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -961,8 +961,11 @@ struct ice_aqc_get_phy_caps_data {
#define ICE_AQC_GET_PHY_EN_MOD_QUAL BIT(5)
#define ICE_AQC_PHY_EN_AUTO_FEC BIT(7)
#define ICE_AQC_PHY_CAPS_MASK ICE_M(0xff, 0)
- u8 low_power_ctrl;
+ u8 low_power_ctrl_an;
#define ICE_AQC_PHY_EN_D3COLD_LOW_POWER_AUTONEG BIT(0)
+#define ICE_AQC_PHY_AN_EN_CLAUSE28 BIT(1)
+#define ICE_AQC_PHY_AN_EN_CLAUSE73 BIT(2)
+#define ICE_AQC_PHY_AN_EN_CLAUSE37 BIT(3)
__le16 eee_cap;
#define ICE_AQC_PHY_EEE_EN_100BASE_TX BIT(0)
#define ICE_AQC_PHY_EEE_EN_1000BASE_T BIT(1)
@@ -983,12 +986,14 @@ struct ice_aqc_get_phy_caps_data {
#define ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN BIT(6)
#define ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN BIT(7)
#define ICE_AQC_PHY_FEC_MASK ICE_M(0xdf, 0)
- u8 rsvd1; /* Byte 35 reserved */
+ u8 module_compliance_enforcement;
+#define ICE_AQC_MOD_ENFORCE_STRICT_MODE BIT(0)
u8 extended_compliance_code;
#define ICE_MODULE_TYPE_TOTAL_BYTE 3
u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE];
#define ICE_AQC_MOD_TYPE_BYTE0_SFP_PLUS 0xA0
#define ICE_AQC_MOD_TYPE_BYTE0_QSFP_PLUS 0x80
+#define ICE_AQC_MOD_TYPE_IDENT 1
#define ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_PASSIVE BIT(0)
#define ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_ACTIVE BIT(1)
#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_SR BIT(4)
@@ -1032,11 +1037,11 @@ struct ice_aqc_set_phy_cfg_data {
#define ICE_AQ_PHY_ENA_AUTO_LINK_UPDT BIT(5)
#define ICE_AQ_PHY_ENA_LESM BIT(6)
#define ICE_AQ_PHY_ENA_AUTO_FEC BIT(7)
- u8 low_power_ctrl;
+ u8 low_power_ctrl_an;
__le16 eee_cap; /* Value from ice_aqc_get_phy_caps */
__le16 eeer_value;
u8 link_fec_opt; /* Use defines from ice_aqc_get_phy_caps */
- u8 rsvd1;
+ u8 module_compliance_enforcement;
};
/* Set MAC Config command data structure (direct 0x0603) */
@@ -1147,6 +1152,7 @@ struct ice_aqc_get_link_status_data {
#define ICE_AQ_LINK_PWR_QSFP_CLASS_3 2
#define ICE_AQ_LINK_PWR_QSFP_CLASS_4 3
__le16 link_speed;
+#define ICE_AQ_LINK_SPEED_M 0x7FF
#define ICE_AQ_LINK_SPEED_10MB BIT(0)
#define ICE_AQ_LINK_SPEED_100MB BIT(1)
#define ICE_AQ_LINK_SPEED_1000MB BIT(2)
@@ -1189,6 +1195,57 @@ struct ice_aqc_set_mac_lb {
u8 reserved[15];
};
+struct ice_aqc_link_topo_addr {
+ u8 lport_num;
+ u8 lport_num_valid;
+#define ICE_AQC_LINK_TOPO_PORT_NUM_VALID BIT(0)
+ u8 node_type_ctx;
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_S 0
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_M (0xF << ICE_AQC_LINK_TOPO_NODE_TYPE_S)
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_PHY 0
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_GPIO_CTRL 1
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_MUX_CTRL 2
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_LED_CTRL 3
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_LED 4
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_THERMAL 5
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_CAGE 6
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_MEZZ 7
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_ID_EEPROM 8
+#define ICE_AQC_LINK_TOPO_NODE_CTX_S 4
+#define ICE_AQC_LINK_TOPO_NODE_CTX_M \
+ (0xF << ICE_AQC_LINK_TOPO_NODE_CTX_S)
+#define ICE_AQC_LINK_TOPO_NODE_CTX_GLOBAL 0
+#define ICE_AQC_LINK_TOPO_NODE_CTX_BOARD 1
+#define ICE_AQC_LINK_TOPO_NODE_CTX_PORT 2
+#define ICE_AQC_LINK_TOPO_NODE_CTX_NODE 3
+#define ICE_AQC_LINK_TOPO_NODE_CTX_PROVIDED 4
+#define ICE_AQC_LINK_TOPO_NODE_CTX_OVERRIDE 5
+ u8 index;
+ __le16 handle;
+#define ICE_AQC_LINK_TOPO_HANDLE_S 0
+#define ICE_AQC_LINK_TOPO_HANDLE_M (0x3FF << ICE_AQC_LINK_TOPO_HANDLE_S)
+/* Used to decode the handle field */
+#define ICE_AQC_LINK_TOPO_HANDLE_BRD_TYPE_M BIT(9)
+#define ICE_AQC_LINK_TOPO_HANDLE_BRD_TYPE_LOM BIT(9)
+#define ICE_AQC_LINK_TOPO_HANDLE_BRD_TYPE_MEZZ 0
+#define ICE_AQC_LINK_TOPO_HANDLE_NODE_S 0
+/* In case of a Mezzanine type */
+#define ICE_AQC_LINK_TOPO_HANDLE_MEZZ_NODE_M \
+ (0x3F << ICE_AQC_LINK_TOPO_HANDLE_NODE_S)
+#define ICE_AQC_LINK_TOPO_HANDLE_MEZZ_S 6
+#define ICE_AQC_LINK_TOPO_HANDLE_MEZZ_M (0x7 << ICE_AQC_LINK_TOPO_HANDLE_MEZZ_S)
+/* In case of a LOM type */
+#define ICE_AQC_LINK_TOPO_HANDLE_LOM_NODE_M \
+ (0x1FF << ICE_AQC_LINK_TOPO_HANDLE_NODE_S)
+};
+
+/* Get Link Topology Handle (direct, 0x06E0) */
+struct ice_aqc_get_link_topo {
+ struct ice_aqc_link_topo_addr addr;
+ u8 node_part_num;
+ u8 rsvd[9];
+};
+
/* Set Port Identification LED (direct, 0x06E9) */
struct ice_aqc_set_port_id_led {
u8 lport_num;
@@ -1759,6 +1816,7 @@ struct ice_aq_desc {
struct ice_aqc_set_event_mask set_event_mask;
struct ice_aqc_get_link_status get_link_status;
struct ice_aqc_event_lan_overflow lan_overflow;
+ struct ice_aqc_get_link_topo get_link_topo;
} params;
};
@@ -1858,6 +1916,7 @@ enum ice_adminq_opc {
ice_aqc_opc_get_link_status = 0x0607,
ice_aqc_opc_set_event_mask = 0x0613,
ice_aqc_opc_set_mac_lb = 0x0620,
+ ice_aqc_opc_get_link_topo = 0x06E0,
ice_aqc_opc_set_port_id_led = 0x06E9,
ice_aqc_opc_sff_eeprom = 0x06EE,
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 57fd815c41bc..c72cc77b8d67 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -20,7 +20,40 @@ static enum ice_status ice_set_mac_type(struct ice_hw *hw)
if (hw->vendor_id != PCI_VENDOR_ID_INTEL)
return ICE_ERR_DEVICE_NOT_SUPPORTED;
- hw->mac_type = ICE_MAC_GENERIC;
+ switch (hw->device_id) {
+ case ICE_DEV_ID_E810C_BACKPLANE:
+ case ICE_DEV_ID_E810C_QSFP:
+ case ICE_DEV_ID_E810C_SFP:
+ case ICE_DEV_ID_E810_XXV_SFP:
+ hw->mac_type = ICE_MAC_E810;
+ break;
+ case ICE_DEV_ID_E823C_10G_BASE_T:
+ case ICE_DEV_ID_E823C_BACKPLANE:
+ case ICE_DEV_ID_E823C_QSFP:
+ case ICE_DEV_ID_E823C_SFP:
+ case ICE_DEV_ID_E823C_SGMII:
+ case ICE_DEV_ID_E822C_10G_BASE_T:
+ case ICE_DEV_ID_E822C_BACKPLANE:
+ case ICE_DEV_ID_E822C_QSFP:
+ case ICE_DEV_ID_E822C_SFP:
+ case ICE_DEV_ID_E822C_SGMII:
+ case ICE_DEV_ID_E822L_10G_BASE_T:
+ case ICE_DEV_ID_E822L_BACKPLANE:
+ case ICE_DEV_ID_E822L_SFP:
+ case ICE_DEV_ID_E822L_SGMII:
+ case ICE_DEV_ID_E823L_10G_BASE_T:
+ case ICE_DEV_ID_E823L_1GBE:
+ case ICE_DEV_ID_E823L_BACKPLANE:
+ case ICE_DEV_ID_E823L_QSFP:
+ case ICE_DEV_ID_E823L_SFP:
+ hw->mac_type = ICE_MAC_GENERIC;
+ break;
+ default:
+ hw->mac_type = ICE_MAC_UNKNOWN;
+ break;
+ }
+
+ ice_debug(hw, ICE_DBG_INIT, "mac_type: %d\n", hw->mac_type);
return 0;
}
@@ -52,7 +85,8 @@ enum ice_status ice_clear_pf_cfg(struct ice_hw *hw)
* is returned in user specified buffer. Please interpret user specified
* buffer as "manage_mac_read" response.
* Response such as various MAC addresses are stored in HW struct (port.mac)
- * ice_aq_discover_caps is expected to be called before this function is called.
+ * ice_discover_dev_caps is expected to be called before this function is
+ * called.
*/
static enum ice_status
ice_aq_manage_mac_read(struct ice_hw *hw, void *buf, u16 buf_size,
@@ -116,11 +150,13 @@ ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode,
u16 pcaps_size = sizeof(*pcaps);
struct ice_aq_desc desc;
enum ice_status status;
+ struct ice_hw *hw;
cmd = &desc.params.get_phy;
if (!pcaps || (report_mode & ~ICE_AQC_REPORT_MODE_M) || !pi)
return ICE_ERR_PARAM;
+ hw = pi->hw;
ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_phy_caps);
@@ -128,17 +164,94 @@ ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode,
cmd->param0 |= cpu_to_le16(ICE_AQC_GET_PHY_RQM);
cmd->param0 |= cpu_to_le16(report_mode);
- status = ice_aq_send_cmd(pi->hw, &desc, pcaps, pcaps_size, cd);
+ status = ice_aq_send_cmd(hw, &desc, pcaps, pcaps_size, cd);
+
+ ice_debug(hw, ICE_DBG_LINK, "get phy caps - report_mode = 0x%x\n",
+ report_mode);
+ ice_debug(hw, ICE_DBG_LINK, " phy_type_low = 0x%llx\n",
+ (unsigned long long)le64_to_cpu(pcaps->phy_type_low));
+ ice_debug(hw, ICE_DBG_LINK, " phy_type_high = 0x%llx\n",
+ (unsigned long long)le64_to_cpu(pcaps->phy_type_high));
+ ice_debug(hw, ICE_DBG_LINK, " caps = 0x%x\n", pcaps->caps);
+ ice_debug(hw, ICE_DBG_LINK, " low_power_ctrl_an = 0x%x\n",
+ pcaps->low_power_ctrl_an);
+ ice_debug(hw, ICE_DBG_LINK, " eee_cap = 0x%x\n", pcaps->eee_cap);
+ ice_debug(hw, ICE_DBG_LINK, " eeer_value = 0x%x\n",
+ pcaps->eeer_value);
+ ice_debug(hw, ICE_DBG_LINK, " link_fec_options = 0x%x\n",
+ pcaps->link_fec_options);
+ ice_debug(hw, ICE_DBG_LINK, " module_compliance_enforcement = 0x%x\n",
+ pcaps->module_compliance_enforcement);
+ ice_debug(hw, ICE_DBG_LINK, " extended_compliance_code = 0x%x\n",
+ pcaps->extended_compliance_code);
+ ice_debug(hw, ICE_DBG_LINK, " module_type[0] = 0x%x\n",
+ pcaps->module_type[0]);
+ ice_debug(hw, ICE_DBG_LINK, " module_type[1] = 0x%x\n",
+ pcaps->module_type[1]);
+ ice_debug(hw, ICE_DBG_LINK, " module_type[2] = 0x%x\n",
+ pcaps->module_type[2]);
if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP) {
pi->phy.phy_type_low = le64_to_cpu(pcaps->phy_type_low);
pi->phy.phy_type_high = le64_to_cpu(pcaps->phy_type_high);
+ memcpy(pi->phy.link_info.module_type, &pcaps->module_type,
+ sizeof(pi->phy.link_info.module_type));
}
return status;
}
/**
+ * ice_aq_get_link_topo_handle - get link topology node return status
+ * @pi: port information structure
+ * @node_type: requested node type
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get link topology node return status for specified node type (0x06E0)
+ *
+ * Node type cage can be used to determine if cage is present. If AQC
+ * returns error (ENOENT), then no cage present. If no cage present, then
+ * connection type is backplane or BASE-T.
+ */
+static enum ice_status
+ice_aq_get_link_topo_handle(struct ice_port_info *pi, u8 node_type,
+ struct ice_sq_cd *cd)
+{
+ struct ice_aqc_get_link_topo *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.get_link_topo;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo);
+
+ cmd->addr.node_type_ctx = (ICE_AQC_LINK_TOPO_NODE_CTX_PORT <<
+ ICE_AQC_LINK_TOPO_NODE_CTX_S);
+
+ /* set node type */
+ cmd->addr.node_type_ctx |= (ICE_AQC_LINK_TOPO_NODE_TYPE_M & node_type);
+
+ return ice_aq_send_cmd(pi->hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_is_media_cage_present
+ * @pi: port information structure
+ *
+ * Returns true if media cage is present, else false. If no cage, then
+ * media type is backplane or BASE-T.
+ */
+static bool ice_is_media_cage_present(struct ice_port_info *pi)
+{
+ /* Node type cage can be used to determine if cage is present. If AQC
+ * returns error (ENOENT), then no cage present. If no cage present then
+ * connection type is backplane or BASE-T.
+ */
+ return !ice_aq_get_link_topo_handle(pi,
+ ICE_AQC_LINK_TOPO_NODE_TYPE_CAGE,
+ NULL);
+}
+
+/**
* ice_get_media_type - Gets media type
* @pi: port information structure
*/
@@ -155,6 +268,18 @@ static enum ice_media_type ice_get_media_type(struct ice_port_info *pi)
return ICE_MEDIA_UNKNOWN;
if (hw_link_info->phy_type_low) {
+ /* 1G SGMII is a special case where some DA cable PHYs
+ * may show this as an option when it really shouldn't
+ * be since SGMII is meant to be between a MAC and a PHY
+ * in a backplane. Try to detect this case and handle it
+ */
+ if (hw_link_info->phy_type_low == ICE_PHY_TYPE_LOW_1G_SGMII &&
+ (hw_link_info->module_type[ICE_AQC_MOD_TYPE_IDENT] ==
+ ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_ACTIVE ||
+ hw_link_info->module_type[ICE_AQC_MOD_TYPE_IDENT] ==
+ ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_PASSIVE))
+ return ICE_MEDIA_DA;
+
switch (hw_link_info->phy_type_low) {
case ICE_PHY_TYPE_LOW_1000BASE_SX:
case ICE_PHY_TYPE_LOW_1000BASE_LX:
@@ -163,7 +288,6 @@ static enum ice_media_type ice_get_media_type(struct ice_port_info *pi)
case ICE_PHY_TYPE_LOW_10G_SFI_C2C:
case ICE_PHY_TYPE_LOW_25GBASE_SR:
case ICE_PHY_TYPE_LOW_25GBASE_LR:
- case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
case ICE_PHY_TYPE_LOW_40GBASE_SR4:
case ICE_PHY_TYPE_LOW_40GBASE_LR4:
case ICE_PHY_TYPE_LOW_50GBASE_SR2:
@@ -175,6 +299,14 @@ static enum ice_media_type ice_get_media_type(struct ice_port_info *pi)
case ICE_PHY_TYPE_LOW_100GBASE_LR4:
case ICE_PHY_TYPE_LOW_100GBASE_SR2:
case ICE_PHY_TYPE_LOW_100GBASE_DR:
+ case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC:
+ case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC:
return ICE_MEDIA_FIBER;
case ICE_PHY_TYPE_LOW_100BASE_TX:
case ICE_PHY_TYPE_LOW_1000BASE_T:
@@ -194,6 +326,16 @@ static enum ice_media_type ice_get_media_type(struct ice_port_info *pi)
case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
case ICE_PHY_TYPE_LOW_100GBASE_CP2:
return ICE_MEDIA_DA;
+ case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
+ case ICE_PHY_TYPE_LOW_40G_XLAUI:
+ case ICE_PHY_TYPE_LOW_50G_LAUI2:
+ case ICE_PHY_TYPE_LOW_50G_AUI2:
+ case ICE_PHY_TYPE_LOW_50G_AUI1:
+ case ICE_PHY_TYPE_LOW_100G_AUI4:
+ case ICE_PHY_TYPE_LOW_100G_CAUI4:
+ if (ice_is_media_cage_present(pi))
+ return ICE_MEDIA_DA;
+ fallthrough;
case ICE_PHY_TYPE_LOW_1000BASE_KX:
case ICE_PHY_TYPE_LOW_2500BASE_KX:
case ICE_PHY_TYPE_LOW_2500BASE_X:
@@ -211,8 +353,16 @@ static enum ice_media_type ice_get_media_type(struct ice_port_info *pi)
}
} else {
switch (hw_link_info->phy_type_high) {
+ case ICE_PHY_TYPE_HIGH_100G_AUI2:
+ case ICE_PHY_TYPE_HIGH_100G_CAUI2:
+ if (ice_is_media_cage_present(pi))
+ return ICE_MEDIA_DA;
+ fallthrough;
case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
return ICE_MEDIA_BACKPLANE;
+ case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC:
+ case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC:
+ return ICE_MEDIA_FIBER;
}
}
return ICE_MEDIA_UNKNOWN;
@@ -292,18 +442,21 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
li->lse_ena = !!(resp->cmd_flags & cpu_to_le16(ICE_AQ_LSE_IS_ENABLED));
- ice_debug(hw, ICE_DBG_LINK, "link_speed = 0x%x\n", li->link_speed);
- ice_debug(hw, ICE_DBG_LINK, "phy_type_low = 0x%llx\n",
+ ice_debug(hw, ICE_DBG_LINK, "get link info\n");
+ ice_debug(hw, ICE_DBG_LINK, " link_speed = 0x%x\n", li->link_speed);
+ ice_debug(hw, ICE_DBG_LINK, " phy_type_low = 0x%llx\n",
(unsigned long long)li->phy_type_low);
- ice_debug(hw, ICE_DBG_LINK, "phy_type_high = 0x%llx\n",
+ ice_debug(hw, ICE_DBG_LINK, " phy_type_high = 0x%llx\n",
(unsigned long long)li->phy_type_high);
- ice_debug(hw, ICE_DBG_LINK, "media_type = 0x%x\n", *hw_media_type);
- ice_debug(hw, ICE_DBG_LINK, "link_info = 0x%x\n", li->link_info);
- ice_debug(hw, ICE_DBG_LINK, "an_info = 0x%x\n", li->an_info);
- ice_debug(hw, ICE_DBG_LINK, "ext_info = 0x%x\n", li->ext_info);
- ice_debug(hw, ICE_DBG_LINK, "lse_ena = 0x%x\n", li->lse_ena);
- ice_debug(hw, ICE_DBG_LINK, "max_frame = 0x%x\n", li->max_frame_size);
- ice_debug(hw, ICE_DBG_LINK, "pacing = 0x%x\n", li->pacing);
+ ice_debug(hw, ICE_DBG_LINK, " media_type = 0x%x\n", *hw_media_type);
+ ice_debug(hw, ICE_DBG_LINK, " link_info = 0x%x\n", li->link_info);
+ ice_debug(hw, ICE_DBG_LINK, " an_info = 0x%x\n", li->an_info);
+ ice_debug(hw, ICE_DBG_LINK, " ext_info = 0x%x\n", li->ext_info);
+ ice_debug(hw, ICE_DBG_LINK, " fec_info = 0x%x\n", li->fec_info);
+ ice_debug(hw, ICE_DBG_LINK, " lse_ena = 0x%x\n", li->lse_ena);
+ ice_debug(hw, ICE_DBG_LINK, " max_frame = 0x%x\n",
+ li->max_frame_size);
+ ice_debug(hw, ICE_DBG_LINK, " pacing = 0x%x\n", li->pacing);
/* save link status information */
if (link)
@@ -1617,204 +1770,388 @@ static u32 ice_get_num_per_func(struct ice_hw *hw, u32 max)
}
/**
- * ice_parse_caps - parse function/device capabilities
+ * ice_parse_common_caps - parse common device/function capabilities
* @hw: pointer to the HW struct
- * @buf: pointer to a buffer containing function/device capability records
- * @cap_count: number of capability records in the list
- * @opc: type of capabilities list to parse
+ * @caps: pointer to common capabilities structure
+ * @elem: the capability element to parse
+ * @prefix: message prefix for tracing capabilities
+ *
+ * Given a capability element, extract relevant details into the common
+ * capability structure.
*
- * Helper function to parse function(0x000a)/device(0x000b) capabilities list.
+ * Returns: true if the capability matches one of the common capability ids,
+ * false otherwise.
+ */
+static bool
+ice_parse_common_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps,
+ struct ice_aqc_list_caps_elem *elem, const char *prefix)
+{
+ u32 logical_id = le32_to_cpu(elem->logical_id);
+ u32 phys_id = le32_to_cpu(elem->phys_id);
+ u32 number = le32_to_cpu(elem->number);
+ u16 cap = le16_to_cpu(elem->cap);
+ bool found = true;
+
+ switch (cap) {
+ case ICE_AQC_CAPS_VALID_FUNCTIONS:
+ caps->valid_functions = number;
+ ice_debug(hw, ICE_DBG_INIT,
+ "%s: valid_functions (bitmap) = %d\n", prefix,
+ caps->valid_functions);
+ break;
+ case ICE_AQC_CAPS_SRIOV:
+ caps->sr_iov_1_1 = (number == 1);
+ ice_debug(hw, ICE_DBG_INIT,
+ "%s: sr_iov_1_1 = %d\n", prefix,
+ caps->sr_iov_1_1);
+ break;
+ case ICE_AQC_CAPS_DCB:
+ caps->dcb = (number == 1);
+ caps->active_tc_bitmap = logical_id;
+ caps->maxtc = phys_id;
+ ice_debug(hw, ICE_DBG_INIT,
+ "%s: dcb = %d\n", prefix, caps->dcb);
+ ice_debug(hw, ICE_DBG_INIT,
+ "%s: active_tc_bitmap = %d\n", prefix,
+ caps->active_tc_bitmap);
+ ice_debug(hw, ICE_DBG_INIT,
+ "%s: maxtc = %d\n", prefix, caps->maxtc);
+ break;
+ case ICE_AQC_CAPS_RSS:
+ caps->rss_table_size = number;
+ caps->rss_table_entry_width = logical_id;
+ ice_debug(hw, ICE_DBG_INIT,
+ "%s: rss_table_size = %d\n", prefix,
+ caps->rss_table_size);
+ ice_debug(hw, ICE_DBG_INIT,
+ "%s: rss_table_entry_width = %d\n", prefix,
+ caps->rss_table_entry_width);
+ break;
+ case ICE_AQC_CAPS_RXQS:
+ caps->num_rxq = number;
+ caps->rxq_first_id = phys_id;
+ ice_debug(hw, ICE_DBG_INIT,
+ "%s: num_rxq = %d\n", prefix,
+ caps->num_rxq);
+ ice_debug(hw, ICE_DBG_INIT,
+ "%s: rxq_first_id = %d\n", prefix,
+ caps->rxq_first_id);
+ break;
+ case ICE_AQC_CAPS_TXQS:
+ caps->num_txq = number;
+ caps->txq_first_id = phys_id;
+ ice_debug(hw, ICE_DBG_INIT,
+ "%s: num_txq = %d\n", prefix,
+ caps->num_txq);
+ ice_debug(hw, ICE_DBG_INIT,
+ "%s: txq_first_id = %d\n", prefix,
+ caps->txq_first_id);
+ break;
+ case ICE_AQC_CAPS_MSIX:
+ caps->num_msix_vectors = number;
+ caps->msix_vector_first_id = phys_id;
+ ice_debug(hw, ICE_DBG_INIT,
+ "%s: num_msix_vectors = %d\n", prefix,
+ caps->num_msix_vectors);
+ ice_debug(hw, ICE_DBG_INIT,
+ "%s: msix_vector_first_id = %d\n", prefix,
+ caps->msix_vector_first_id);
+ break;
+ case ICE_AQC_CAPS_MAX_MTU:
+ caps->max_mtu = number;
+ ice_debug(hw, ICE_DBG_INIT, "%s: max_mtu = %d\n",
+ prefix, caps->max_mtu);
+ break;
+ default:
+ /* Not one of the recognized common capabilities */
+ found = false;
+ }
+
+ return found;
+}
+
+/**
+ * ice_recalc_port_limited_caps - Recalculate port limited capabilities
+ * @hw: pointer to the HW structure
+ * @caps: pointer to capabilities structure to fix
+ *
+ * Re-calculate the capabilities that are dependent on the number of physical
+ * ports; i.e. some features are not supported or function differently on
+ * devices with more than 4 ports.
*/
static void
-ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
- enum ice_adminq_opc opc)
+ice_recalc_port_limited_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps)
+{
+ /* This assumes device capabilities are always scanned before function
+ * capabilities during the initialization flow.
+ */
+ if (hw->dev_caps.num_funcs > 4) {
+ /* Max 4 TCs per port */
+ caps->maxtc = 4;
+ ice_debug(hw, ICE_DBG_INIT,
+ "reducing maxtc to %d (based on #ports)\n",
+ caps->maxtc);
+ }
+}
+
+/**
+ * ice_parse_vf_func_caps - Parse ICE_AQC_CAPS_VF function caps
+ * @hw: pointer to the HW struct
+ * @func_p: pointer to function capabilities structure
+ * @cap: pointer to the capability element to parse
+ *
+ * Extract function capabilities for ICE_AQC_CAPS_VF.
+ */
+static void
+ice_parse_vf_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p,
+ struct ice_aqc_list_caps_elem *cap)
+{
+ u32 logical_id = le32_to_cpu(cap->logical_id);
+ u32 number = le32_to_cpu(cap->number);
+
+ func_p->num_allocd_vfs = number;
+ func_p->vf_base_id = logical_id;
+ ice_debug(hw, ICE_DBG_INIT, "func caps: num_allocd_vfs = %d\n",
+ func_p->num_allocd_vfs);
+ ice_debug(hw, ICE_DBG_INIT, "func caps: vf_base_id = %d\n",
+ func_p->vf_base_id);
+}
+
+/**
+ * ice_parse_vsi_func_caps - Parse ICE_AQC_CAPS_VSI function caps
+ * @hw: pointer to the HW struct
+ * @func_p: pointer to function capabilities structure
+ * @cap: pointer to the capability element to parse
+ *
+ * Extract function capabilities for ICE_AQC_CAPS_VSI.
+ */
+static void
+ice_parse_vsi_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p,
+ struct ice_aqc_list_caps_elem *cap)
+{
+ func_p->guar_num_vsi = ice_get_num_per_func(hw, ICE_MAX_VSI);
+ ice_debug(hw, ICE_DBG_INIT, "func caps: guar_num_vsi (fw) = %d\n",
+ le32_to_cpu(cap->number));
+ ice_debug(hw, ICE_DBG_INIT, "func caps: guar_num_vsi = %d\n",
+ func_p->guar_num_vsi);
+}
+
+/**
+ * ice_parse_fdir_func_caps - Parse ICE_AQC_CAPS_FD function caps
+ * @hw: pointer to the HW struct
+ * @func_p: pointer to function capabilities structure
+ *
+ * Extract function capabilities for ICE_AQC_CAPS_FD.
+ */
+static void
+ice_parse_fdir_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p)
+{
+ u32 reg_val, val;
+
+ reg_val = rd32(hw, GLQF_FD_SIZE);
+ val = (reg_val & GLQF_FD_SIZE_FD_GSIZE_M) >>
+ GLQF_FD_SIZE_FD_GSIZE_S;
+ func_p->fd_fltr_guar =
+ ice_get_num_per_func(hw, val);
+ val = (reg_val & GLQF_FD_SIZE_FD_BSIZE_M) >>
+ GLQF_FD_SIZE_FD_BSIZE_S;
+ func_p->fd_fltr_best_effort = val;
+
+ ice_debug(hw, ICE_DBG_INIT,
+ "func caps: fd_fltr_guar = %d\n",
+ func_p->fd_fltr_guar);
+ ice_debug(hw, ICE_DBG_INIT,
+ "func caps: fd_fltr_best_effort = %d\n",
+ func_p->fd_fltr_best_effort);
+}
+
+/**
+ * ice_parse_func_caps - Parse function capabilities
+ * @hw: pointer to the HW struct
+ * @func_p: pointer to function capabilities structure
+ * @buf: buffer containing the function capability records
+ * @cap_count: the number of capabilities
+ *
+ * Helper function to parse function (0x000A) capabilities list. For
+ * capabilities shared between device and function, this relies on
+ * ice_parse_common_caps.
+ *
+ * Loop through the list of provided capabilities and extract the relevant
+ * data into the function capabilities structured.
+ */
+static void
+ice_parse_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p,
+ void *buf, u32 cap_count)
{
struct ice_aqc_list_caps_elem *cap_resp;
- struct ice_hw_func_caps *func_p = NULL;
- struct ice_hw_dev_caps *dev_p = NULL;
- struct ice_hw_common_caps *caps;
- char const *prefix;
u32 i;
- if (!buf)
- return;
-
cap_resp = (struct ice_aqc_list_caps_elem *)buf;
- if (opc == ice_aqc_opc_list_dev_caps) {
- dev_p = &hw->dev_caps;
- caps = &dev_p->common_cap;
- prefix = "dev cap";
- } else if (opc == ice_aqc_opc_list_func_caps) {
- func_p = &hw->func_caps;
- caps = &func_p->common_cap;
- prefix = "func cap";
- } else {
- ice_debug(hw, ICE_DBG_INIT, "wrong opcode\n");
- return;
- }
+ memset(func_p, 0, sizeof(*func_p));
- for (i = 0; caps && i < cap_count; i++, cap_resp++) {
- u32 logical_id = le32_to_cpu(cap_resp->logical_id);
- u32 phys_id = le32_to_cpu(cap_resp->phys_id);
- u32 number = le32_to_cpu(cap_resp->number);
- u16 cap = le16_to_cpu(cap_resp->cap);
+ for (i = 0; i < cap_count; i++) {
+ u16 cap = le16_to_cpu(cap_resp[i].cap);
+ bool found;
- switch (cap) {
- case ICE_AQC_CAPS_VALID_FUNCTIONS:
- caps->valid_functions = number;
- ice_debug(hw, ICE_DBG_INIT,
- "%s: valid_functions (bitmap) = %d\n", prefix,
- caps->valid_functions);
+ found = ice_parse_common_caps(hw, &func_p->common_cap,
+ &cap_resp[i], "func caps");
- /* store func count for resource management purposes */
- if (dev_p)
- dev_p->num_funcs = hweight32(number);
- break;
- case ICE_AQC_CAPS_SRIOV:
- caps->sr_iov_1_1 = (number == 1);
- ice_debug(hw, ICE_DBG_INIT,
- "%s: sr_iov_1_1 = %d\n", prefix,
- caps->sr_iov_1_1);
- break;
+ switch (cap) {
case ICE_AQC_CAPS_VF:
- if (dev_p) {
- dev_p->num_vfs_exposed = number;
- ice_debug(hw, ICE_DBG_INIT,
- "%s: num_vfs_exposed = %d\n", prefix,
- dev_p->num_vfs_exposed);
- } else if (func_p) {
- func_p->num_allocd_vfs = number;
- func_p->vf_base_id = logical_id;
- ice_debug(hw, ICE_DBG_INIT,
- "%s: num_allocd_vfs = %d\n", prefix,
- func_p->num_allocd_vfs);
- ice_debug(hw, ICE_DBG_INIT,
- "%s: vf_base_id = %d\n", prefix,
- func_p->vf_base_id);
- }
+ ice_parse_vf_func_caps(hw, func_p, &cap_resp[i]);
break;
case ICE_AQC_CAPS_VSI:
- if (dev_p) {
- dev_p->num_vsi_allocd_to_host = number;
- ice_debug(hw, ICE_DBG_INIT,
- "%s: num_vsi_allocd_to_host = %d\n",
- prefix,
- dev_p->num_vsi_allocd_to_host);
- } else if (func_p) {
- func_p->guar_num_vsi =
- ice_get_num_per_func(hw, ICE_MAX_VSI);
- ice_debug(hw, ICE_DBG_INIT,
- "%s: guar_num_vsi (fw) = %d\n",
- prefix, number);
- ice_debug(hw, ICE_DBG_INIT,
- "%s: guar_num_vsi = %d\n",
- prefix, func_p->guar_num_vsi);
- }
+ ice_parse_vsi_func_caps(hw, func_p, &cap_resp[i]);
break;
- case ICE_AQC_CAPS_DCB:
- caps->dcb = (number == 1);
- caps->active_tc_bitmap = logical_id;
- caps->maxtc = phys_id;
- ice_debug(hw, ICE_DBG_INIT,
- "%s: dcb = %d\n", prefix, caps->dcb);
- ice_debug(hw, ICE_DBG_INIT,
- "%s: active_tc_bitmap = %d\n", prefix,
- caps->active_tc_bitmap);
- ice_debug(hw, ICE_DBG_INIT,
- "%s: maxtc = %d\n", prefix, caps->maxtc);
- break;
- case ICE_AQC_CAPS_RSS:
- caps->rss_table_size = number;
- caps->rss_table_entry_width = logical_id;
- ice_debug(hw, ICE_DBG_INIT,
- "%s: rss_table_size = %d\n", prefix,
- caps->rss_table_size);
- ice_debug(hw, ICE_DBG_INIT,
- "%s: rss_table_entry_width = %d\n", prefix,
- caps->rss_table_entry_width);
+ case ICE_AQC_CAPS_FD:
+ ice_parse_fdir_func_caps(hw, func_p);
break;
- case ICE_AQC_CAPS_RXQS:
- caps->num_rxq = number;
- caps->rxq_first_id = phys_id;
- ice_debug(hw, ICE_DBG_INIT,
- "%s: num_rxq = %d\n", prefix,
- caps->num_rxq);
- ice_debug(hw, ICE_DBG_INIT,
- "%s: rxq_first_id = %d\n", prefix,
- caps->rxq_first_id);
+ default:
+ /* Don't list common capabilities as unknown */
+ if (!found)
+ ice_debug(hw, ICE_DBG_INIT,
+ "func caps: unknown capability[%d]: 0x%x\n",
+ i, cap);
break;
- case ICE_AQC_CAPS_TXQS:
- caps->num_txq = number;
- caps->txq_first_id = phys_id;
- ice_debug(hw, ICE_DBG_INIT,
- "%s: num_txq = %d\n", prefix,
- caps->num_txq);
- ice_debug(hw, ICE_DBG_INIT,
- "%s: txq_first_id = %d\n", prefix,
- caps->txq_first_id);
+ }
+ }
+
+ ice_recalc_port_limited_caps(hw, &func_p->common_cap);
+}
+
+/**
+ * ice_parse_valid_functions_cap - Parse ICE_AQC_CAPS_VALID_FUNCTIONS caps
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @cap: capability element to parse
+ *
+ * Parse ICE_AQC_CAPS_VALID_FUNCTIONS for device capabilities.
+ */
+static void
+ice_parse_valid_functions_cap(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+ struct ice_aqc_list_caps_elem *cap)
+{
+ u32 number = le32_to_cpu(cap->number);
+
+ dev_p->num_funcs = hweight32(number);
+ ice_debug(hw, ICE_DBG_INIT, "dev caps: num_funcs = %d\n",
+ dev_p->num_funcs);
+}
+
+/**
+ * ice_parse_vf_dev_caps - Parse ICE_AQC_CAPS_VF device caps
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @cap: capability element to parse
+ *
+ * Parse ICE_AQC_CAPS_VF for device capabilities.
+ */
+static void
+ice_parse_vf_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+ struct ice_aqc_list_caps_elem *cap)
+{
+ u32 number = le32_to_cpu(cap->number);
+
+ dev_p->num_vfs_exposed = number;
+ ice_debug(hw, ICE_DBG_INIT, "dev_caps: num_vfs_exposed = %d\n",
+ dev_p->num_vfs_exposed);
+}
+
+/**
+ * ice_parse_vsi_dev_caps - Parse ICE_AQC_CAPS_VSI device caps
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @cap: capability element to parse
+ *
+ * Parse ICE_AQC_CAPS_VSI for device capabilities.
+ */
+static void
+ice_parse_vsi_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+ struct ice_aqc_list_caps_elem *cap)
+{
+ u32 number = le32_to_cpu(cap->number);
+
+ dev_p->num_vsi_allocd_to_host = number;
+ ice_debug(hw, ICE_DBG_INIT, "dev caps: num_vsi_allocd_to_host = %d\n",
+ dev_p->num_vsi_allocd_to_host);
+}
+
+/**
+ * ice_parse_fdir_dev_caps - Parse ICE_AQC_CAPS_FD device caps
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @cap: capability element to parse
+ *
+ * Parse ICE_AQC_CAPS_FD for device capabilities.
+ */
+static void
+ice_parse_fdir_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+ struct ice_aqc_list_caps_elem *cap)
+{
+ u32 number = le32_to_cpu(cap->number);
+
+ dev_p->num_flow_director_fltr = number;
+ ice_debug(hw, ICE_DBG_INIT, "dev caps: num_flow_director_fltr = %d\n",
+ dev_p->num_flow_director_fltr);
+}
+
+/**
+ * ice_parse_dev_caps - Parse device capabilities
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @buf: buffer containing the device capability records
+ * @cap_count: the number of capabilities
+ *
+ * Helper device to parse device (0x000B) capabilities list. For
+ * capabilities shared between device and device, this relies on
+ * ice_parse_common_caps.
+ *
+ * Loop through the list of provided capabilities and extract the relevant
+ * data into the device capabilities structured.
+ */
+static void
+ice_parse_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+ void *buf, u32 cap_count)
+{
+ struct ice_aqc_list_caps_elem *cap_resp;
+ u32 i;
+
+ cap_resp = (struct ice_aqc_list_caps_elem *)buf;
+
+ memset(dev_p, 0, sizeof(*dev_p));
+
+ for (i = 0; i < cap_count; i++) {
+ u16 cap = le16_to_cpu(cap_resp[i].cap);
+ bool found;
+
+ found = ice_parse_common_caps(hw, &dev_p->common_cap,
+ &cap_resp[i], "dev caps");
+
+ switch (cap) {
+ case ICE_AQC_CAPS_VALID_FUNCTIONS:
+ ice_parse_valid_functions_cap(hw, dev_p, &cap_resp[i]);
break;
- case ICE_AQC_CAPS_MSIX:
- caps->num_msix_vectors = number;
- caps->msix_vector_first_id = phys_id;
- ice_debug(hw, ICE_DBG_INIT,
- "%s: num_msix_vectors = %d\n", prefix,
- caps->num_msix_vectors);
- ice_debug(hw, ICE_DBG_INIT,
- "%s: msix_vector_first_id = %d\n", prefix,
- caps->msix_vector_first_id);
+ case ICE_AQC_CAPS_VF:
+ ice_parse_vf_dev_caps(hw, dev_p, &cap_resp[i]);
break;
- case ICE_AQC_CAPS_FD:
- if (dev_p) {
- dev_p->num_flow_director_fltr = number;
- ice_debug(hw, ICE_DBG_INIT,
- "%s: num_flow_director_fltr = %d\n",
- prefix,
- dev_p->num_flow_director_fltr);
- }
- if (func_p) {
- u32 reg_val, val;
-
- reg_val = rd32(hw, GLQF_FD_SIZE);
- val = (reg_val & GLQF_FD_SIZE_FD_GSIZE_M) >>
- GLQF_FD_SIZE_FD_GSIZE_S;
- func_p->fd_fltr_guar =
- ice_get_num_per_func(hw, val);
- val = (reg_val & GLQF_FD_SIZE_FD_BSIZE_M) >>
- GLQF_FD_SIZE_FD_BSIZE_S;
- func_p->fd_fltr_best_effort = val;
- ice_debug(hw, ICE_DBG_INIT,
- "%s: fd_fltr_guar = %d\n",
- prefix, func_p->fd_fltr_guar);
- ice_debug(hw, ICE_DBG_INIT,
- "%s: fd_fltr_best_effort = %d\n",
- prefix, func_p->fd_fltr_best_effort);
- }
+ case ICE_AQC_CAPS_VSI:
+ ice_parse_vsi_dev_caps(hw, dev_p, &cap_resp[i]);
break;
- case ICE_AQC_CAPS_MAX_MTU:
- caps->max_mtu = number;
- ice_debug(hw, ICE_DBG_INIT, "%s: max_mtu = %d\n",
- prefix, caps->max_mtu);
+ case ICE_AQC_CAPS_FD:
+ ice_parse_fdir_dev_caps(hw, dev_p, &cap_resp[i]);
break;
default:
- ice_debug(hw, ICE_DBG_INIT,
- "%s: unknown capability[%d]: 0x%x\n", prefix,
- i, cap);
+ /* Don't list common capabilities as unknown */
+ if (!found)
+ ice_debug(hw, ICE_DBG_INIT,
+ "dev caps: unknown capability[%d]: 0x%x\n",
+ i, cap);
break;
}
}
- /* Re-calculate capabilities that are dependent on the number of
- * physical ports; i.e. some features are not supported or function
- * differently on devices with more than 4 ports.
- */
- if (hw->dev_caps.num_funcs > 4) {
- /* Max 4 TCs per port */
- caps->maxtc = 4;
- ice_debug(hw, ICE_DBG_INIT,
- "%s: maxtc = %d (based on #ports)\n", prefix,
- caps->maxtc);
- }
+ ice_recalc_port_limited_caps(hw, &dev_p->common_cap);
}
/**
@@ -1860,81 +2197,69 @@ ice_aq_list_caps(struct ice_hw *hw, void *buf, u16 buf_size, u32 *cap_count,
}
/**
- * ice_aq_discover_caps - query function/device capabilities
- * @hw: pointer to the HW struct
- * @buf: a virtual buffer to hold the capabilities
- * @buf_size: Size of the virtual buffer
- * @cap_count: cap count needed if AQ err==ENOMEM
- * @opc: capabilities type to discover - pass in the command opcode
- * @cd: pointer to command details structure or NULL
- *
- * Get the function(0x000a)/device(0x000b) capabilities description from
- * the firmware.
+ * ice_discover_dev_caps - Read and extract device capabilities
+ * @hw: pointer to the hardware structure
+ * @dev_caps: pointer to device capabilities structure
*
- * NOTE: this function has the side effect of updating the hw->dev_caps or
- * hw->func_caps by way of calling ice_parse_caps.
+ * Read the device capabilities and extract them into the dev_caps structure
+ * for later use.
*/
static enum ice_status
-ice_aq_discover_caps(struct ice_hw *hw, void *buf, u16 buf_size, u32 *cap_count,
- enum ice_adminq_opc opc, struct ice_sq_cd *cd)
+ice_discover_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_caps)
{
- u32 local_cap_count = 0;
enum ice_status status;
+ u32 cap_count = 0;
+ void *cbuf;
- status = ice_aq_list_caps(hw, buf, buf_size, &local_cap_count,
- opc, cd);
+ cbuf = kzalloc(ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
+ if (!cbuf)
+ return ICE_ERR_NO_MEMORY;
+
+ /* Although the driver doesn't know the number of capabilities the
+ * device will return, we can simply send a 4KB buffer, the maximum
+ * possible size that firmware can return.
+ */
+ cap_count = ICE_AQ_MAX_BUF_LEN / sizeof(struct ice_aqc_list_caps_elem);
+
+ status = ice_aq_list_caps(hw, cbuf, ICE_AQ_MAX_BUF_LEN, &cap_count,
+ ice_aqc_opc_list_dev_caps, NULL);
if (!status)
- ice_parse_caps(hw, buf, local_cap_count, opc);
- else if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOMEM)
- *cap_count = local_cap_count;
+ ice_parse_dev_caps(hw, dev_caps, cbuf, cap_count);
+ kfree(cbuf);
return status;
}
/**
- * ice_discover_caps - get info about the HW
+ * ice_discover_func_caps - Read and extract function capabilities
* @hw: pointer to the hardware structure
- * @opc: capabilities type to discover - pass in the command opcode
+ * @func_caps: pointer to function capabilities structure
+ *
+ * Read the function capabilities and extract them into the func_caps structure
+ * for later use.
*/
static enum ice_status
-ice_discover_caps(struct ice_hw *hw, enum ice_adminq_opc opc)
+ice_discover_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_caps)
{
enum ice_status status;
- u32 cap_count;
- u16 cbuf_len;
- u8 retries;
-
- /* The driver doesn't know how many capabilities the device will return
- * so the buffer size required isn't known ahead of time. The driver
- * starts with cbuf_len and if this turns out to be insufficient, the
- * device returns ICE_AQ_RC_ENOMEM and also the cap_count it needs.
- * The driver then allocates the buffer based on the count and retries
- * the operation. So it follows that the retry count is 2.
- */
-#define ICE_GET_CAP_BUF_COUNT 40
-#define ICE_GET_CAP_RETRY_COUNT 2
-
- cap_count = ICE_GET_CAP_BUF_COUNT;
- retries = ICE_GET_CAP_RETRY_COUNT;
+ u32 cap_count = 0;
+ void *cbuf;
- do {
- void *cbuf;
-
- cbuf_len = (u16)(cap_count *
- sizeof(struct ice_aqc_list_caps_elem));
- cbuf = devm_kzalloc(ice_hw_to_dev(hw), cbuf_len, GFP_KERNEL);
- if (!cbuf)
- return ICE_ERR_NO_MEMORY;
-
- status = ice_aq_discover_caps(hw, cbuf, cbuf_len, &cap_count,
- opc, NULL);
- devm_kfree(ice_hw_to_dev(hw), cbuf);
+ cbuf = kzalloc(ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
+ if (!cbuf)
+ return ICE_ERR_NO_MEMORY;
- if (!status || hw->adminq.sq_last_status != ICE_AQ_RC_ENOMEM)
- break;
+ /* Although the driver doesn't know the number of capabilities the
+ * device will return, we can simply send a 4KB buffer, the maximum
+ * possible size that firmware can return.
+ */
+ cap_count = ICE_AQ_MAX_BUF_LEN / sizeof(struct ice_aqc_list_caps_elem);
- /* If ENOMEM is returned, try again with bigger buffer */
- } while (--retries);
+ status = ice_aq_list_caps(hw, cbuf, ICE_AQ_MAX_BUF_LEN, &cap_count,
+ ice_aqc_opc_list_func_caps, NULL);
+ if (!status)
+ ice_parse_func_caps(hw, func_caps, cbuf, cap_count);
+ kfree(cbuf);
return status;
}
@@ -2011,11 +2336,11 @@ enum ice_status ice_get_caps(struct ice_hw *hw)
{
enum ice_status status;
- status = ice_discover_caps(hw, ice_aqc_opc_list_dev_caps);
- if (!status)
- status = ice_discover_caps(hw, ice_aqc_opc_list_func_caps);
+ status = ice_discover_dev_caps(hw, &hw->dev_caps);
+ if (status)
+ return status;
- return status;
+ return ice_discover_func_caps(hw, &hw->func_caps);
}
/**
@@ -2251,7 +2576,7 @@ ice_update_phy_type(u64 *phy_type_low, u64 *phy_type_high,
/**
* ice_aq_set_phy_cfg
* @hw: pointer to the HW struct
- * @lport: logical port number
+ * @pi: port info structure of the interested logical port
* @cfg: structure with PHY configuration data to be set
* @cd: pointer to command details structure or NULL
*
@@ -2261,7 +2586,7 @@ ice_update_phy_type(u64 *phy_type_low, u64 *phy_type_high,
* parameters. This status will be indicated by the command response (0x0601).
*/
enum ice_status
-ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport,
+ice_aq_set_phy_cfg(struct ice_hw *hw, struct ice_port_info *pi,
struct ice_aqc_set_phy_cfg_data *cfg, struct ice_sq_cd *cd)
{
struct ice_aq_desc desc;
@@ -2280,24 +2605,29 @@ ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport,
}
ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_phy_cfg);
- desc.params.set_phy.lport_num = lport;
+ desc.params.set_phy.lport_num = pi->lport;
desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
- ice_debug(hw, ICE_DBG_LINK, "phy_type_low = 0x%llx\n",
+ ice_debug(hw, ICE_DBG_LINK, "set phy cfg\n");
+ ice_debug(hw, ICE_DBG_LINK, " phy_type_low = 0x%llx\n",
(unsigned long long)le64_to_cpu(cfg->phy_type_low));
- ice_debug(hw, ICE_DBG_LINK, "phy_type_high = 0x%llx\n",
+ ice_debug(hw, ICE_DBG_LINK, " phy_type_high = 0x%llx\n",
(unsigned long long)le64_to_cpu(cfg->phy_type_high));
- ice_debug(hw, ICE_DBG_LINK, "caps = 0x%x\n", cfg->caps);
- ice_debug(hw, ICE_DBG_LINK, "low_power_ctrl = 0x%x\n",
- cfg->low_power_ctrl);
- ice_debug(hw, ICE_DBG_LINK, "eee_cap = 0x%x\n", cfg->eee_cap);
- ice_debug(hw, ICE_DBG_LINK, "eeer_value = 0x%x\n", cfg->eeer_value);
- ice_debug(hw, ICE_DBG_LINK, "link_fec_opt = 0x%x\n", cfg->link_fec_opt);
+ ice_debug(hw, ICE_DBG_LINK, " caps = 0x%x\n", cfg->caps);
+ ice_debug(hw, ICE_DBG_LINK, " low_power_ctrl_an = 0x%x\n",
+ cfg->low_power_ctrl_an);
+ ice_debug(hw, ICE_DBG_LINK, " eee_cap = 0x%x\n", cfg->eee_cap);
+ ice_debug(hw, ICE_DBG_LINK, " eeer_value = 0x%x\n", cfg->eeer_value);
+ ice_debug(hw, ICE_DBG_LINK, " link_fec_opt = 0x%x\n",
+ cfg->link_fec_opt);
status = ice_aq_send_cmd(hw, &desc, cfg, sizeof(*cfg), cd);
if (hw->adminq.sq_last_status == ICE_AQ_RC_EMODE)
status = 0;
+ if (!status)
+ pi->phy.curr_user_phy_cfg = *cfg;
+
return status;
}
@@ -2331,9 +2661,6 @@ enum ice_status ice_update_link_info(struct ice_port_info *pi)
status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP,
pcaps, NULL);
- if (!status)
- memcpy(li->module_type, &pcaps->module_type,
- sizeof(li->module_type));
devm_kfree(ice_hw_to_dev(hw), pcaps);
}
@@ -2342,28 +2669,101 @@ enum ice_status ice_update_link_info(struct ice_port_info *pi)
}
/**
- * ice_set_fc
+ * ice_cache_phy_user_req
* @pi: port information structure
- * @aq_failures: pointer to status code, specific to ice_set_fc routine
- * @ena_auto_link_update: enable automatic link update
+ * @cache_data: PHY logging data
+ * @cache_mode: PHY logging mode
*
- * Set the requested flow control mode.
+ * Log the user request on (FC, FEC, SPEED) for later use.
+ */
+static void
+ice_cache_phy_user_req(struct ice_port_info *pi,
+ struct ice_phy_cache_mode_data cache_data,
+ enum ice_phy_cache_mode cache_mode)
+{
+ if (!pi)
+ return;
+
+ switch (cache_mode) {
+ case ICE_FC_MODE:
+ pi->phy.curr_user_fc_req = cache_data.data.curr_user_fc_req;
+ break;
+ case ICE_SPEED_MODE:
+ pi->phy.curr_user_speed_req =
+ cache_data.data.curr_user_speed_req;
+ break;
+ case ICE_FEC_MODE:
+ pi->phy.curr_user_fec_req = cache_data.data.curr_user_fec_req;
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * ice_caps_to_fc_mode
+ * @caps: PHY capabilities
+ *
+ * Convert PHY FC capabilities to ice FC mode
+ */
+enum ice_fc_mode ice_caps_to_fc_mode(u8 caps)
+{
+ if (caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE &&
+ caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
+ return ICE_FC_FULL;
+
+ if (caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE)
+ return ICE_FC_TX_PAUSE;
+
+ if (caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
+ return ICE_FC_RX_PAUSE;
+
+ return ICE_FC_NONE;
+}
+
+/**
+ * ice_caps_to_fec_mode
+ * @caps: PHY capabilities
+ * @fec_options: Link FEC options
+ *
+ * Convert PHY FEC capabilities to ice FEC mode
+ */
+enum ice_fec_mode ice_caps_to_fec_mode(u8 caps, u8 fec_options)
+{
+ if (caps & ICE_AQC_PHY_EN_AUTO_FEC)
+ return ICE_FEC_AUTO;
+
+ if (fec_options & (ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN |
+ ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ |
+ ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN |
+ ICE_AQC_PHY_FEC_25G_KR_REQ))
+ return ICE_FEC_BASER;
+
+ if (fec_options & (ICE_AQC_PHY_FEC_25G_RS_528_REQ |
+ ICE_AQC_PHY_FEC_25G_RS_544_REQ |
+ ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN))
+ return ICE_FEC_RS;
+
+ return ICE_FEC_NONE;
+}
+
+/**
+ * ice_cfg_phy_fc - Configure PHY FC data based on FC mode
+ * @pi: port information structure
+ * @cfg: PHY configuration data to set FC mode
+ * @req_mode: FC mode to configure
*/
enum ice_status
-ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update)
+ice_cfg_phy_fc(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
+ enum ice_fc_mode req_mode)
{
- struct ice_aqc_set_phy_cfg_data cfg = { 0 };
- struct ice_aqc_get_phy_caps_data *pcaps;
- enum ice_status status;
+ struct ice_phy_cache_mode_data cache_data;
u8 pause_mask = 0x0;
- struct ice_hw *hw;
- if (!pi)
- return ICE_ERR_PARAM;
- hw = pi->hw;
- *aq_failures = ICE_SET_FC_AQ_FAIL_NONE;
+ if (!pi || !cfg)
+ return ICE_ERR_BAD_PTR;
- switch (pi->fc.req_mode) {
+ switch (req_mode) {
case ICE_FC_FULL:
pause_mask |= ICE_AQC_PHY_EN_TX_LINK_PAUSE;
pause_mask |= ICE_AQC_PHY_EN_RX_LINK_PAUSE;
@@ -2378,6 +2778,42 @@ ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update)
break;
}
+ /* clear the old pause settings */
+ cfg->caps &= ~(ICE_AQC_PHY_EN_TX_LINK_PAUSE |
+ ICE_AQC_PHY_EN_RX_LINK_PAUSE);
+
+ /* set the new capabilities */
+ cfg->caps |= pause_mask;
+
+ /* Cache user FC request */
+ cache_data.data.curr_user_fc_req = req_mode;
+ ice_cache_phy_user_req(pi, cache_data, ICE_FC_MODE);
+
+ return 0;
+}
+
+/**
+ * ice_set_fc
+ * @pi: port information structure
+ * @aq_failures: pointer to status code, specific to ice_set_fc routine
+ * @ena_auto_link_update: enable automatic link update
+ *
+ * Set the requested flow control mode.
+ */
+enum ice_status
+ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update)
+{
+ struct ice_aqc_set_phy_cfg_data cfg = { 0 };
+ struct ice_aqc_get_phy_caps_data *pcaps;
+ enum ice_status status;
+ struct ice_hw *hw;
+
+ if (!pi || !aq_failures)
+ return ICE_ERR_BAD_PTR;
+
+ *aq_failures = 0;
+ hw = pi->hw;
+
pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL);
if (!pcaps)
return ICE_ERR_NO_MEMORY;
@@ -2390,12 +2826,12 @@ ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update)
goto out;
}
- /* clear the old pause settings */
- cfg.caps = pcaps->caps & ~(ICE_AQC_PHY_EN_TX_LINK_PAUSE |
- ICE_AQC_PHY_EN_RX_LINK_PAUSE);
+ ice_copy_phy_caps_to_cfg(pi, pcaps, &cfg);
- /* set the new capabilities */
- cfg.caps |= pause_mask;
+ /* Configure the set PHY data */
+ status = ice_cfg_phy_fc(pi, &cfg, pi->fc.req_mode);
+ if (status)
+ goto out;
/* If the capabilities have changed, then set the new config */
if (cfg.caps != pcaps->caps) {
@@ -2404,15 +2840,8 @@ ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update)
/* Auto restart link so settings take effect */
if (ena_auto_link_update)
cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
- /* Copy over all the old settings */
- cfg.phy_type_high = pcaps->phy_type_high;
- cfg.phy_type_low = pcaps->phy_type_low;
- cfg.low_power_ctrl = pcaps->low_power_ctrl;
- cfg.eee_cap = pcaps->eee_cap;
- cfg.eeer_value = pcaps->eeer_value;
- cfg.link_fec_opt = pcaps->link_fec_options;
-
- status = ice_aq_set_phy_cfg(hw, pi->lport, &cfg, NULL);
+
+ status = ice_aq_set_phy_cfg(hw, pi, &cfg, NULL);
if (status) {
*aq_failures = ICE_SET_FC_AQ_FAIL_SET;
goto out;
@@ -2442,7 +2871,44 @@ out:
}
/**
+ * ice_phy_caps_equals_cfg
+ * @phy_caps: PHY capabilities
+ * @phy_cfg: PHY configuration
+ *
+ * Helper function to determine if PHY capabilities matches PHY
+ * configuration
+ */
+bool
+ice_phy_caps_equals_cfg(struct ice_aqc_get_phy_caps_data *phy_caps,
+ struct ice_aqc_set_phy_cfg_data *phy_cfg)
+{
+ u8 caps_mask, cfg_mask;
+
+ if (!phy_caps || !phy_cfg)
+ return false;
+
+ /* These bits are not common between capabilities and configuration.
+ * Do not use them to determine equality.
+ */
+ caps_mask = ICE_AQC_PHY_CAPS_MASK & ~(ICE_AQC_PHY_AN_MODE |
+ ICE_AQC_GET_PHY_EN_MOD_QUAL);
+ cfg_mask = ICE_AQ_PHY_ENA_VALID_MASK & ~ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+
+ if (phy_caps->phy_type_low != phy_cfg->phy_type_low ||
+ phy_caps->phy_type_high != phy_cfg->phy_type_high ||
+ ((phy_caps->caps & caps_mask) != (phy_cfg->caps & cfg_mask)) ||
+ phy_caps->low_power_ctrl_an != phy_cfg->low_power_ctrl_an ||
+ phy_caps->eee_cap != phy_cfg->eee_cap ||
+ phy_caps->eeer_value != phy_cfg->eeer_value ||
+ phy_caps->link_fec_options != phy_cfg->link_fec_opt)
+ return false;
+
+ return true;
+}
+
+/**
* ice_copy_phy_caps_to_cfg - Copy PHY ability data to configuration data
+ * @pi: port information structure
* @caps: PHY ability structure to copy date from
* @cfg: PHY configuration structure to copy data to
*
@@ -2450,42 +2916,73 @@ out:
* data structure
*/
void
-ice_copy_phy_caps_to_cfg(struct ice_aqc_get_phy_caps_data *caps,
+ice_copy_phy_caps_to_cfg(struct ice_port_info *pi,
+ struct ice_aqc_get_phy_caps_data *caps,
struct ice_aqc_set_phy_cfg_data *cfg)
{
- if (!caps || !cfg)
+ if (!pi || !caps || !cfg)
return;
+ memset(cfg, 0, sizeof(*cfg));
cfg->phy_type_low = caps->phy_type_low;
cfg->phy_type_high = caps->phy_type_high;
cfg->caps = caps->caps;
- cfg->low_power_ctrl = caps->low_power_ctrl;
+ cfg->low_power_ctrl_an = caps->low_power_ctrl_an;
cfg->eee_cap = caps->eee_cap;
cfg->eeer_value = caps->eeer_value;
cfg->link_fec_opt = caps->link_fec_options;
+ cfg->module_compliance_enforcement =
+ caps->module_compliance_enforcement;
+
+ if (ice_fw_supports_link_override(pi->hw)) {
+ struct ice_link_default_override_tlv tlv;
+
+ if (ice_get_link_default_override(&tlv, pi))
+ return;
+
+ if (tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE)
+ cfg->module_compliance_enforcement |=
+ ICE_LINK_OVERRIDE_STRICT_MODE;
+ }
}
/**
* ice_cfg_phy_fec - Configure PHY FEC data based on FEC mode
+ * @pi: port information structure
* @cfg: PHY configuration data to set FEC mode
* @fec: FEC mode to configure
- *
- * Caller should copy ice_aqc_get_phy_caps_data.caps ICE_AQC_PHY_EN_AUTO_FEC
- * (bit 7) and ice_aqc_get_phy_caps_data.link_fec_options to cfg.caps
- * ICE_AQ_PHY_ENA_AUTO_FEC (bit 7) and cfg.link_fec_options before calling.
*/
-void
-ice_cfg_phy_fec(struct ice_aqc_set_phy_cfg_data *cfg, enum ice_fec_mode fec)
+enum ice_status
+ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
+ enum ice_fec_mode fec)
{
+ struct ice_aqc_get_phy_caps_data *pcaps;
+ enum ice_status status;
+
+ if (!pi || !cfg)
+ return ICE_ERR_BAD_PTR;
+
+ pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+ if (!pcaps)
+ return ICE_ERR_NO_MEMORY;
+
+ status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps,
+ NULL);
+ if (status)
+ goto out;
+
+ cfg->caps |= pcaps->caps & ICE_AQC_PHY_EN_AUTO_FEC;
+ cfg->link_fec_opt = pcaps->link_fec_options;
+
switch (fec) {
case ICE_FEC_BASER:
/* Clear RS bits, and AND BASE-R ability
* bits and OR request bits.
*/
cfg->link_fec_opt &= ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN |
- ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN;
+ ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN;
cfg->link_fec_opt |= ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ |
- ICE_AQC_PHY_FEC_25G_KR_REQ;
+ ICE_AQC_PHY_FEC_25G_KR_REQ;
break;
case ICE_FEC_RS:
/* Clear BASE-R bits, and AND RS ability
@@ -2493,7 +2990,7 @@ ice_cfg_phy_fec(struct ice_aqc_set_phy_cfg_data *cfg, enum ice_fec_mode fec)
*/
cfg->link_fec_opt &= ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN;
cfg->link_fec_opt |= ICE_AQC_PHY_FEC_25G_RS_528_REQ |
- ICE_AQC_PHY_FEC_25G_RS_544_REQ;
+ ICE_AQC_PHY_FEC_25G_RS_544_REQ;
break;
case ICE_FEC_NONE:
/* Clear all FEC option bits. */
@@ -2502,8 +2999,28 @@ ice_cfg_phy_fec(struct ice_aqc_set_phy_cfg_data *cfg, enum ice_fec_mode fec)
case ICE_FEC_AUTO:
/* AND auto FEC bit, and all caps bits. */
cfg->caps &= ICE_AQC_PHY_CAPS_MASK;
+ cfg->link_fec_opt |= pcaps->link_fec_options;
+ break;
+ default:
+ status = ICE_ERR_PARAM;
break;
}
+
+ if (fec == ICE_FEC_AUTO && ice_fw_supports_link_override(pi->hw)) {
+ struct ice_link_default_override_tlv tlv;
+
+ if (ice_get_link_default_override(&tlv, pi))
+ goto out;
+
+ if (!(tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE) &&
+ (tlv.options & ICE_LINK_OVERRIDE_EN))
+ cfg->link_fec_opt = tlv.fec_options;
+ }
+
+out:
+ kfree(pcaps);
+
+ return status;
}
/**
@@ -3703,3 +4220,121 @@ ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
ice_debug(hw, ICE_DBG_SCHED, "query element failed\n");
return status;
}
+
+/**
+ * ice_fw_supports_link_override
+ * @hw: pointer to the hardware structure
+ *
+ * Checks if the firmware supports link override
+ */
+bool ice_fw_supports_link_override(struct ice_hw *hw)
+{
+ /* Currently, only supported for E810 devices */
+ if (hw->mac_type != ICE_MAC_E810)
+ return false;
+
+ if (hw->api_maj_ver == ICE_FW_API_LINK_OVERRIDE_MAJ) {
+ if (hw->api_min_ver > ICE_FW_API_LINK_OVERRIDE_MIN)
+ return true;
+ if (hw->api_min_ver == ICE_FW_API_LINK_OVERRIDE_MIN &&
+ hw->api_patch >= ICE_FW_API_LINK_OVERRIDE_PATCH)
+ return true;
+ } else if (hw->api_maj_ver > ICE_FW_API_LINK_OVERRIDE_MAJ) {
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * ice_get_link_default_override
+ * @ldo: pointer to the link default override struct
+ * @pi: pointer to the port info struct
+ *
+ * Gets the link default override for a port
+ */
+enum ice_status
+ice_get_link_default_override(struct ice_link_default_override_tlv *ldo,
+ struct ice_port_info *pi)
+{
+ u16 i, tlv, tlv_len, tlv_start, buf, offset;
+ struct ice_hw *hw = pi->hw;
+ enum ice_status status;
+
+ status = ice_get_pfa_module_tlv(hw, &tlv, &tlv_len,
+ ICE_SR_LINK_DEFAULT_OVERRIDE_PTR);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT,
+ "Failed to read link override TLV.\n");
+ return status;
+ }
+
+ /* Each port has its own config; calculate for our port */
+ tlv_start = tlv + pi->lport * ICE_SR_PFA_LINK_OVERRIDE_WORDS +
+ ICE_SR_PFA_LINK_OVERRIDE_OFFSET;
+
+ /* link options first */
+ status = ice_read_sr_word(hw, tlv_start, &buf);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT,
+ "Failed to read override link options.\n");
+ return status;
+ }
+ ldo->options = buf & ICE_LINK_OVERRIDE_OPT_M;
+ ldo->phy_config = (buf & ICE_LINK_OVERRIDE_PHY_CFG_M) >>
+ ICE_LINK_OVERRIDE_PHY_CFG_S;
+
+ /* link PHY config */
+ offset = tlv_start + ICE_SR_PFA_LINK_OVERRIDE_FEC_OFFSET;
+ status = ice_read_sr_word(hw, offset, &buf);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT,
+ "Failed to read override phy config.\n");
+ return status;
+ }
+ ldo->fec_options = buf & ICE_LINK_OVERRIDE_FEC_OPT_M;
+
+ /* PHY types low */
+ offset = tlv_start + ICE_SR_PFA_LINK_OVERRIDE_PHY_OFFSET;
+ for (i = 0; i < ICE_SR_PFA_LINK_OVERRIDE_PHY_WORDS; i++) {
+ status = ice_read_sr_word(hw, (offset + i), &buf);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT,
+ "Failed to read override link options.\n");
+ return status;
+ }
+ /* shift 16 bits at a time to fill 64 bits */
+ ldo->phy_type_low |= ((u64)buf << (i * 16));
+ }
+
+ /* PHY types high */
+ offset = tlv_start + ICE_SR_PFA_LINK_OVERRIDE_PHY_OFFSET +
+ ICE_SR_PFA_LINK_OVERRIDE_PHY_WORDS;
+ for (i = 0; i < ICE_SR_PFA_LINK_OVERRIDE_PHY_WORDS; i++) {
+ status = ice_read_sr_word(hw, (offset + i), &buf);
+ if (status) {
+ ice_debug(hw, ICE_DBG_INIT,
+ "Failed to read override link options.\n");
+ return status;
+ }
+ /* shift 16 bits at a time to fill 64 bits */
+ ldo->phy_type_high |= ((u64)buf << (i * 16));
+ }
+
+ return status;
+}
+
+/**
+ * ice_is_phy_caps_an_enabled - check if PHY capabilities autoneg is enabled
+ * @caps: get PHY capability data
+ */
+bool ice_is_phy_caps_an_enabled(struct ice_aqc_get_phy_caps_data *caps)
+{
+ if (caps->caps & ICE_AQC_PHY_AN_MODE ||
+ caps->low_power_ctrl_an & (ICE_AQC_PHY_AN_EN_CLAUSE28 |
+ ICE_AQC_PHY_AN_EN_CLAUSE73 |
+ ICE_AQC_PHY_AN_EN_CLAUSE37))
+ return true;
+
+ return false;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index d1238f43e872..33a681a75439 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -98,17 +98,33 @@ ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags,
struct ice_sq_cd *cd);
enum ice_status ice_clear_pf_cfg(struct ice_hw *hw);
enum ice_status
-ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport,
+ice_aq_set_phy_cfg(struct ice_hw *hw, struct ice_port_info *pi,
struct ice_aqc_set_phy_cfg_data *cfg, struct ice_sq_cd *cd);
+bool ice_fw_supports_link_override(struct ice_hw *hw);
+enum ice_status
+ice_get_link_default_override(struct ice_link_default_override_tlv *ldo,
+ struct ice_port_info *pi);
+bool ice_is_phy_caps_an_enabled(struct ice_aqc_get_phy_caps_data *caps);
+
+enum ice_fc_mode ice_caps_to_fc_mode(u8 caps);
+enum ice_fec_mode ice_caps_to_fec_mode(u8 caps, u8 fec_options);
enum ice_status
ice_set_fc(struct ice_port_info *pi, u8 *aq_failures,
bool ena_auto_link_update);
+enum ice_status
+ice_cfg_phy_fc(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
+ enum ice_fc_mode fc);
+bool
+ice_phy_caps_equals_cfg(struct ice_aqc_get_phy_caps_data *caps,
+ struct ice_aqc_set_phy_cfg_data *cfg);
void
-ice_cfg_phy_fec(struct ice_aqc_set_phy_cfg_data *cfg, enum ice_fec_mode fec);
-void
-ice_copy_phy_caps_to_cfg(struct ice_aqc_get_phy_caps_data *caps,
+ice_copy_phy_caps_to_cfg(struct ice_port_info *pi,
+ struct ice_aqc_get_phy_caps_data *caps,
struct ice_aqc_set_phy_cfg_data *cfg);
enum ice_status
+ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
+ enum ice_fec_mode fec);
+enum ice_status
ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
struct ice_sq_cd *cd);
enum ice_status
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 7066775769eb..06b93e97892d 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -966,12 +966,8 @@ static int ice_set_fec_cfg(struct net_device *netdev, enum ice_fec_mode req_fec)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_aqc_set_phy_cfg_data config = { 0 };
- struct ice_aqc_get_phy_caps_data *caps;
struct ice_vsi *vsi = np->vsi;
- u8 sw_cfg_caps, sw_cfg_fec;
struct ice_port_info *pi;
- enum ice_status status;
- int err = 0;
pi = vsi->port_info;
if (!pi)
@@ -983,54 +979,26 @@ static int ice_set_fec_cfg(struct net_device *netdev, enum ice_fec_mode req_fec)
return -EOPNOTSUPP;
}
- /* Get last SW configuration */
- caps = kzalloc(sizeof(*caps), GFP_KERNEL);
- if (!caps)
- return -ENOMEM;
-
- status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG,
- caps, NULL);
- if (status) {
- err = -EAGAIN;
- goto done;
- }
-
- /* Copy SW configuration returned from PHY caps to PHY config */
- ice_copy_phy_caps_to_cfg(caps, &config);
- sw_cfg_caps = caps->caps;
- sw_cfg_fec = caps->link_fec_options;
-
- /* Get toloplogy caps, then copy PHY FEC topoloy caps to PHY config */
- memset(caps, 0, sizeof(*caps));
-
- status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP,
- caps, NULL);
- if (status) {
- err = -EAGAIN;
- goto done;
- }
-
- config.caps |= (caps->caps & ICE_AQC_PHY_EN_AUTO_FEC);
- config.link_fec_opt = caps->link_fec_options;
+ /* Proceed only if requesting different FEC mode */
+ if (pi->phy.curr_user_fec_req == req_fec)
+ return 0;
- ice_cfg_phy_fec(&config, req_fec);
+ /* Copy the current user PHY configuration. The current user PHY
+ * configuration is initialized during probe from PHY capabilities
+ * software mode, and updated on set PHY configuration.
+ */
+ memcpy(&config, &pi->phy.curr_user_phy_cfg, sizeof(config));
- /* If FEC mode has changed, then set PHY configuration and enable AN. */
- if ((config.caps & ICE_AQ_PHY_ENA_AUTO_FEC) !=
- (sw_cfg_caps & ICE_AQC_PHY_EN_AUTO_FEC) ||
- config.link_fec_opt != sw_cfg_fec) {
- if (caps->caps & ICE_AQC_PHY_AN_MODE)
- config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+ ice_cfg_phy_fec(pi, &config, req_fec);
+ config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
- status = ice_aq_set_phy_cfg(pi->hw, pi->lport, &config, NULL);
+ if (ice_aq_set_phy_cfg(pi->hw, pi, &config, NULL))
+ return -EAGAIN;
- if (status)
- err = -EAGAIN;
- }
+ /* Save requested FEC config */
+ pi->phy.curr_user_fec_req = req_fec;
-done:
- kfree(caps);
- return err;
+ return 0;
}
/**
@@ -1228,6 +1196,17 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
bitmap_xor(change_flags, pf->flags, orig_flags, ICE_PF_FLAGS_NBITS);
+ /* Do not allow change to link-down-on-close when Total Port Shutdown
+ * is enabled.
+ */
+ if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, change_flags) &&
+ test_bit(ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags)) {
+ dev_err(dev, "Setting link-down-on-close not supported on this port\n");
+ set_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags);
+ ret = -EINVAL;
+ goto ethtool_exit;
+ }
+
if (test_bit(ICE_FLAG_FW_LLDP_AGENT, change_flags)) {
if (!test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags)) {
enum ice_status status;
@@ -1315,6 +1294,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
change_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags);
ret = -EAGAIN;
}
+ethtool_exit:
clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
return ret;
}
@@ -1419,6 +1399,77 @@ ice_get_ethtool_stats(struct net_device *netdev,
}
}
+#define ICE_PHY_TYPE_LOW_MASK_MIN_1G (ICE_PHY_TYPE_LOW_100BASE_TX | \
+ ICE_PHY_TYPE_LOW_100M_SGMII)
+
+#define ICE_PHY_TYPE_LOW_MASK_MIN_25G (ICE_PHY_TYPE_LOW_MASK_MIN_1G | \
+ ICE_PHY_TYPE_LOW_1000BASE_T | \
+ ICE_PHY_TYPE_LOW_1000BASE_SX | \
+ ICE_PHY_TYPE_LOW_1000BASE_LX | \
+ ICE_PHY_TYPE_LOW_1000BASE_KX | \
+ ICE_PHY_TYPE_LOW_1G_SGMII | \
+ ICE_PHY_TYPE_LOW_2500BASE_T | \
+ ICE_PHY_TYPE_LOW_2500BASE_X | \
+ ICE_PHY_TYPE_LOW_2500BASE_KX | \
+ ICE_PHY_TYPE_LOW_5GBASE_T | \
+ ICE_PHY_TYPE_LOW_5GBASE_KR | \
+ ICE_PHY_TYPE_LOW_10GBASE_T | \
+ ICE_PHY_TYPE_LOW_10G_SFI_DA | \
+ ICE_PHY_TYPE_LOW_10GBASE_SR | \
+ ICE_PHY_TYPE_LOW_10GBASE_LR | \
+ ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 | \
+ ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC | \
+ ICE_PHY_TYPE_LOW_10G_SFI_C2C)
+
+#define ICE_PHY_TYPE_LOW_MASK_100G (ICE_PHY_TYPE_LOW_100GBASE_CR4 | \
+ ICE_PHY_TYPE_LOW_100GBASE_SR4 | \
+ ICE_PHY_TYPE_LOW_100GBASE_LR4 | \
+ ICE_PHY_TYPE_LOW_100GBASE_KR4 | \
+ ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC | \
+ ICE_PHY_TYPE_LOW_100G_CAUI4 | \
+ ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC | \
+ ICE_PHY_TYPE_LOW_100G_AUI4 | \
+ ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 | \
+ ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 | \
+ ICE_PHY_TYPE_LOW_100GBASE_CP2 | \
+ ICE_PHY_TYPE_LOW_100GBASE_SR2 | \
+ ICE_PHY_TYPE_LOW_100GBASE_DR)
+
+#define ICE_PHY_TYPE_HIGH_MASK_100G (ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4 | \
+ ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC |\
+ ICE_PHY_TYPE_HIGH_100G_CAUI2 | \
+ ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC | \
+ ICE_PHY_TYPE_HIGH_100G_AUI2)
+
+/**
+ * ice_mask_min_supported_speeds
+ * @phy_types_high: PHY type high
+ * @phy_types_low: PHY type low to apply minimum supported speeds mask
+ *
+ * Apply minimum supported speeds mask to PHY type low. These are the speeds
+ * for ethtool supported link mode.
+ */
+static
+void ice_mask_min_supported_speeds(u64 phy_types_high, u64 *phy_types_low)
+{
+ /* if QSFP connection with 100G speed, minimum supported speed is 25G */
+ if (*phy_types_low & ICE_PHY_TYPE_LOW_MASK_100G ||
+ phy_types_high & ICE_PHY_TYPE_HIGH_MASK_100G)
+ *phy_types_low &= ~ICE_PHY_TYPE_LOW_MASK_MIN_25G;
+ else
+ *phy_types_low &= ~ICE_PHY_TYPE_LOW_MASK_MIN_1G;
+}
+
+#define ice_ethtool_advertise_link_mode(aq_link_speed, ethtool_link_mode) \
+ do { \
+ if (req_speeds & (aq_link_speed) || \
+ (!req_speeds && \
+ (adv_phy_type_lo & phy_type_mask_lo || \
+ adv_phy_type_hi & phy_type_mask_hi))) \
+ ethtool_link_ksettings_add_link_mode(ks, advertising,\
+ ethtool_link_mode); \
+ } while (0)
+
/**
* ice_phy_type_to_ethtool - convert the phy_types to ethtool link modes
* @netdev: network interface device structure
@@ -1429,277 +1480,312 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
struct ethtool_link_ksettings *ks)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
- struct ice_link_status *hw_link_info;
- bool need_add_adv_mode = false;
struct ice_vsi *vsi = np->vsi;
- u64 phy_types_high;
- u64 phy_types_low;
+ struct ice_pf *pf = vsi->back;
+ u64 phy_type_mask_lo = 0;
+ u64 phy_type_mask_hi = 0;
+ u64 adv_phy_type_lo = 0;
+ u64 adv_phy_type_hi = 0;
+ u64 phy_types_high = 0;
+ u64 phy_types_low = 0;
+ u16 req_speeds;
+
+ req_speeds = vsi->port_info->phy.link_info.req_speeds;
+
+ /* Check if lenient mode is supported and enabled, or in strict mode.
+ *
+ * In lenient mode the Supported link modes are the PHY types without
+ * media. The Advertising link mode is either 1. the user requested
+ * speed, 2. the override PHY mask, or 3. the PHY types with media.
+ *
+ * In strict mode Supported link mode are the PHY type with media,
+ * and Advertising link modes are the media PHY type or the speed
+ * requested by user.
+ */
+ if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags)) {
+ struct ice_link_default_override_tlv *ldo;
- hw_link_info = &vsi->port_info->phy.link_info;
- phy_types_low = vsi->port_info->phy.phy_type_low;
- phy_types_high = vsi->port_info->phy.phy_type_high;
+ ldo = &pf->link_dflt_override;
+ phy_types_low = le64_to_cpu(pf->nvm_phy_type_lo);
+ phy_types_high = le64_to_cpu(pf->nvm_phy_type_hi);
+
+ ice_mask_min_supported_speeds(phy_types_high, &phy_types_low);
+
+ /* If override enabled and PHY mask set, then
+ * Advertising link mode is the intersection of the PHY
+ * types without media and the override PHY mask.
+ */
+ if (ldo->options & ICE_LINK_OVERRIDE_EN &&
+ (ldo->phy_type_low || ldo->phy_type_high)) {
+ adv_phy_type_lo =
+ le64_to_cpu(pf->nvm_phy_type_lo) &
+ ldo->phy_type_low;
+ adv_phy_type_hi =
+ le64_to_cpu(pf->nvm_phy_type_hi) &
+ ldo->phy_type_high;
+ }
+ } else {
+ phy_types_low = vsi->port_info->phy.phy_type_low;
+ phy_types_high = vsi->port_info->phy.phy_type_high;
+ }
+
+ /* If Advertising link mode PHY type is not using override PHY type,
+ * then use PHY type with media.
+ */
+ if (!adv_phy_type_lo && !adv_phy_type_hi) {
+ adv_phy_type_lo = vsi->port_info->phy.phy_type_low;
+ adv_phy_type_hi = vsi->port_info->phy.phy_type_high;
+ }
ethtool_link_ksettings_zero_link_mode(ks, supported);
ethtool_link_ksettings_zero_link_mode(ks, advertising);
- if (phy_types_low & ICE_PHY_TYPE_LOW_100BASE_TX ||
- phy_types_low & ICE_PHY_TYPE_LOW_100M_SGMII) {
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_100BASE_TX |
+ ICE_PHY_TYPE_LOW_100M_SGMII;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
100baseT_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100MB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 100baseT_Full);
+
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100MB,
+ 100baseT_Full);
}
- if (phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_T ||
- phy_types_low & ICE_PHY_TYPE_LOW_1G_SGMII) {
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_1000BASE_T |
+ ICE_PHY_TYPE_LOW_1G_SGMII;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
1000baseT_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 1000baseT_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_1000MB,
+ 1000baseT_Full);
}
- if (phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_KX) {
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_1000BASE_KX;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
1000baseKX_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 1000baseKX_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_1000MB,
+ 1000baseKX_Full);
}
- if (phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_SX ||
- phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_LX) {
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_1000BASE_SX |
+ ICE_PHY_TYPE_LOW_1000BASE_LX;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
1000baseX_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 1000baseX_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_1000MB,
+ 1000baseX_Full);
}
- if (phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_T) {
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_2500BASE_T;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
2500baseT_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 2500baseT_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_2500MB,
+ 2500baseT_Full);
}
- if (phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_X ||
- phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_KX) {
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_2500BASE_X |
+ ICE_PHY_TYPE_LOW_2500BASE_KX;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
2500baseX_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 2500baseX_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_2500MB,
+ 2500baseX_Full);
}
- if (phy_types_low & ICE_PHY_TYPE_LOW_5GBASE_T ||
- phy_types_low & ICE_PHY_TYPE_LOW_5GBASE_KR) {
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_5GBASE_T |
+ ICE_PHY_TYPE_LOW_5GBASE_KR;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
5000baseT_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_5GB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 5000baseT_Full);
- }
- if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_T ||
- phy_types_low & ICE_PHY_TYPE_LOW_10G_SFI_DA ||
- phy_types_low & ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC ||
- phy_types_low & ICE_PHY_TYPE_LOW_10G_SFI_C2C) {
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_5GB,
+ 5000baseT_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_T |
+ ICE_PHY_TYPE_LOW_10G_SFI_DA |
+ ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC |
+ ICE_PHY_TYPE_LOW_10G_SFI_C2C;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
10000baseT_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 10000baseT_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB,
+ 10000baseT_Full);
}
- if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_KR_CR1) {
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_KR_CR1;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
10000baseKR_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 10000baseKR_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB,
+ 10000baseKR_Full);
}
- if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_SR) {
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_SR;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
10000baseSR_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 10000baseSR_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB,
+ 10000baseSR_Full);
}
- if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_LR) {
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_LR;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
10000baseLR_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 10000baseLR_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB,
+ 10000baseLR_Full);
}
- if (phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_T ||
- phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR ||
- phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR_S ||
- phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR1 ||
- phy_types_low & ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC ||
- phy_types_low & ICE_PHY_TYPE_LOW_25G_AUI_C2C) {
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_25GBASE_T |
+ ICE_PHY_TYPE_LOW_25GBASE_CR |
+ ICE_PHY_TYPE_LOW_25GBASE_CR_S |
+ ICE_PHY_TYPE_LOW_25GBASE_CR1 |
+ ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC |
+ ICE_PHY_TYPE_LOW_25G_AUI_C2C;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
25000baseCR_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 25000baseCR_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_25GB,
+ 25000baseCR_Full);
}
- if (phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_SR ||
- phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_LR) {
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_25GBASE_SR |
+ ICE_PHY_TYPE_LOW_25GBASE_LR;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
25000baseSR_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 25000baseSR_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_25GB,
+ 25000baseSR_Full);
}
- if (phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR ||
- phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR_S ||
- phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR1) {
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_25GBASE_KR |
+ ICE_PHY_TYPE_LOW_25GBASE_KR_S |
+ ICE_PHY_TYPE_LOW_25GBASE_KR1;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
25000baseKR_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 25000baseKR_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_25GB,
+ 25000baseKR_Full);
}
- if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_KR4) {
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_KR4;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
40000baseKR4_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 40000baseKR4_Full);
- }
- if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_CR4 ||
- phy_types_low & ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC ||
- phy_types_low & ICE_PHY_TYPE_LOW_40G_XLAUI) {
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB,
+ 40000baseKR4_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_CR4 |
+ ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC |
+ ICE_PHY_TYPE_LOW_40G_XLAUI;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
40000baseCR4_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 40000baseCR4_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB,
+ 40000baseCR4_Full);
}
- if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_SR4) {
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_SR4;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
40000baseSR4_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 40000baseSR4_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB,
+ 40000baseSR4_Full);
}
- if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_LR4) {
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_LR4;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
40000baseLR4_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 40000baseLR4_Full);
- }
- if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CR2 ||
- phy_types_low & ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC ||
- phy_types_low & ICE_PHY_TYPE_LOW_50G_LAUI2 ||
- phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC ||
- phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI2 ||
- phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CP ||
- phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_SR ||
- phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC ||
- phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI1) {
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB,
+ 40000baseLR4_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_50GBASE_CR2 |
+ ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC |
+ ICE_PHY_TYPE_LOW_50G_LAUI2 |
+ ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC |
+ ICE_PHY_TYPE_LOW_50G_AUI2 |
+ ICE_PHY_TYPE_LOW_50GBASE_CP |
+ ICE_PHY_TYPE_LOW_50GBASE_SR |
+ ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC |
+ ICE_PHY_TYPE_LOW_50G_AUI1;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
50000baseCR2_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 50000baseCR2_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_50GB,
+ 50000baseCR2_Full);
}
- if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR2 ||
- phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4) {
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_50GBASE_KR2 |
+ ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
50000baseKR2_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 50000baseKR2_Full);
- }
- if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_SR2 ||
- phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_LR2 ||
- phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_FR ||
- phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_LR) {
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_50GB,
+ 50000baseKR2_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_50GBASE_SR2 |
+ ICE_PHY_TYPE_LOW_50GBASE_LR2 |
+ ICE_PHY_TYPE_LOW_50GBASE_FR |
+ ICE_PHY_TYPE_LOW_50GBASE_LR;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
50000baseSR2_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 50000baseSR2_Full);
- }
- if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CR4 ||
- phy_types_low & ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC ||
- phy_types_low & ICE_PHY_TYPE_LOW_100G_CAUI4 ||
- phy_types_low & ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC ||
- phy_types_low & ICE_PHY_TYPE_LOW_100G_AUI4 ||
- phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 ||
- phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CP2 ||
- phy_types_high & ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC ||
- phy_types_high & ICE_PHY_TYPE_HIGH_100G_CAUI2 ||
- phy_types_high & ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC ||
- phy_types_high & ICE_PHY_TYPE_HIGH_100G_AUI2) {
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_50GB,
+ 50000baseSR2_Full);
+ }
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_CR4 |
+ ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC |
+ ICE_PHY_TYPE_LOW_100G_CAUI4 |
+ ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC |
+ ICE_PHY_TYPE_LOW_100G_AUI4 |
+ ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 |
+ ICE_PHY_TYPE_LOW_100GBASE_CP2;
+ phy_type_mask_hi = ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC |
+ ICE_PHY_TYPE_HIGH_100G_CAUI2 |
+ ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC |
+ ICE_PHY_TYPE_HIGH_100G_AUI2;
+ if (phy_types_low & phy_type_mask_lo ||
+ phy_types_high & phy_type_mask_hi) {
ethtool_link_ksettings_add_link_mode(ks, supported,
100000baseCR4_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
- need_add_adv_mode = true;
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
+ 100000baseCR4_Full);
}
- if (need_add_adv_mode) {
- need_add_adv_mode = false;
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 100000baseCR4_Full);
- }
- if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_SR4 ||
- phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_SR2) {
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_SR4 |
+ ICE_PHY_TYPE_LOW_100GBASE_SR2;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
100000baseSR4_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
- need_add_adv_mode = true;
- }
- if (need_add_adv_mode) {
- need_add_adv_mode = false;
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 100000baseSR4_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
+ 100000baseSR4_Full);
}
- if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_LR4 ||
- phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_DR) {
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_LR4 |
+ ICE_PHY_TYPE_LOW_100GBASE_DR;
+ if (phy_types_low & phy_type_mask_lo) {
ethtool_link_ksettings_add_link_mode(ks, supported,
100000baseLR4_ER4_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
- need_add_adv_mode = true;
- }
- if (need_add_adv_mode) {
- need_add_adv_mode = false;
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 100000baseLR4_ER4_Full);
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
+ 100000baseLR4_ER4_Full);
}
- if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR4 ||
- phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 ||
- phy_types_high & ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4) {
+
+ phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_KR4 |
+ ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4;
+ phy_type_mask_hi = ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4;
+ if (phy_types_low & phy_type_mask_lo ||
+ phy_types_high & phy_type_mask_hi) {
ethtool_link_ksettings_add_link_mode(ks, supported,
100000baseKR4_Full);
- if (!hw_link_info->req_speeds ||
- hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
- need_add_adv_mode = true;
+ ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
+ 100000baseKR4_Full);
}
- if (need_add_adv_mode)
- ethtool_link_ksettings_add_link_mode(ks, advertising,
- 100000baseKR4_Full);
/* Autoneg PHY types */
if (phy_types_low & ICE_PHY_TYPE_LOW_100BASE_TX ||
@@ -2127,18 +2213,18 @@ static int
ice_set_link_ksettings(struct net_device *netdev,
const struct ethtool_link_ksettings *ks)
{
- u8 autoneg, timeout = TEST_SET_BITS_TIMEOUT, lport = 0;
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ethtool_link_ksettings safe_ks, copy_ks;
struct ice_aqc_get_phy_caps_data *abilities;
+ u8 autoneg, timeout = TEST_SET_BITS_TIMEOUT;
u16 adv_link_speed, curr_link_speed, idx;
struct ice_aqc_set_phy_cfg_data config;
struct ice_pf *pf = np->vsi->back;
struct ice_port_info *p;
u8 autoneg_changed = 0;
enum ice_status status;
- u64 phy_type_high;
- u64 phy_type_low;
+ u64 phy_type_high = 0;
+ u64 phy_type_low = 0;
int err = 0;
bool linkup;
@@ -2162,6 +2248,18 @@ ice_set_link_ksettings(struct net_device *netdev,
p->phy.link_info.link_info & ICE_AQ_LINK_UP)
return -EOPNOTSUPP;
+ abilities = kzalloc(sizeof(*abilities), GFP_KERNEL);
+ if (!abilities)
+ return -ENOMEM;
+
+ /* Get the PHY capabilities based on media */
+ status = ice_aq_get_phy_caps(p, false, ICE_AQC_REPORT_TOPO_CAP,
+ abilities, NULL);
+ if (status) {
+ err = -EAGAIN;
+ goto done;
+ }
+
/* copy the ksettings to copy_ks to avoid modifying the original */
memcpy(&copy_ks, ks, sizeof(copy_ks));
@@ -2178,8 +2276,12 @@ ice_set_link_ksettings(struct net_device *netdev,
*/
if (!bitmap_subset(copy_ks.link_modes.advertising,
safe_ks.link_modes.supported,
- __ETHTOOL_LINK_MODE_MASK_NBITS))
- return -EINVAL;
+ __ETHTOOL_LINK_MODE_MASK_NBITS)) {
+ if (!test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags))
+ netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
+ err = -EINVAL;
+ goto done;
+ }
/* get our own copy of the bits to check against */
memset(&safe_ks, 0, sizeof(safe_ks));
@@ -2196,33 +2298,27 @@ ice_set_link_ksettings(struct net_device *netdev,
/* If copy_ks.base and safe_ks.base are not the same now, then they are
* trying to set something that we do not support.
*/
- if (memcmp(&copy_ks.base, &safe_ks.base, sizeof(copy_ks.base)))
- return -EOPNOTSUPP;
+ if (memcmp(&copy_ks.base, &safe_ks.base, sizeof(copy_ks.base))) {
+ err = -EOPNOTSUPP;
+ goto done;
+ }
while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
timeout--;
- if (!timeout)
- return -EBUSY;
+ if (!timeout) {
+ err = -EBUSY;
+ goto done;
+ }
usleep_range(TEST_SET_BITS_SLEEP_MIN, TEST_SET_BITS_SLEEP_MAX);
}
- abilities = kzalloc(sizeof(*abilities), GFP_KERNEL);
- if (!abilities)
- return -ENOMEM;
-
- /* Get the current PHY config */
- status = ice_aq_get_phy_caps(p, false, ICE_AQC_REPORT_SW_CFG, abilities,
- NULL);
- if (status) {
- err = -EAGAIN;
- goto done;
- }
+ /* Copy the current user PHY configuration. The current user PHY
+ * configuration is initialized during probe from PHY capabilities
+ * software mode, and updated on set PHY configuration.
+ */
+ memcpy(&config, &p->phy.curr_user_phy_cfg, sizeof(config));
- /* Copy abilities to config in case autoneg is not set below */
- memset(&config, 0, sizeof(config));
- config.caps = abilities->caps & ~ICE_AQC_PHY_AN_MODE;
- if (abilities->caps & ICE_AQC_PHY_AN_MODE)
- config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+ config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
/* Check autoneg */
err = ice_setup_autoneg(p, &safe_ks, &config, autoneg, &autoneg_changed,
@@ -2257,29 +2353,44 @@ ice_set_link_ksettings(struct net_device *netdev,
goto done;
}
- /* copy over the rest of the abilities */
- config.low_power_ctrl = abilities->low_power_ctrl;
- config.eee_cap = abilities->eee_cap;
- config.eeer_value = abilities->eeer_value;
- config.link_fec_opt = abilities->link_fec_options;
-
/* save the requested speeds */
p->phy.link_info.req_speeds = adv_link_speed;
/* set link and auto negotiation so changes take effect */
config.caps |= ICE_AQ_PHY_ENA_LINK;
- if (phy_type_low || phy_type_high) {
- config.phy_type_high = cpu_to_le64(phy_type_high) &
- abilities->phy_type_high;
- config.phy_type_low = cpu_to_le64(phy_type_low) &
- abilities->phy_type_low;
- } else {
+ /* check if there is a PHY type for the requested advertised speed */
+ if (!(phy_type_low || phy_type_high)) {
+ netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
err = -EAGAIN;
- netdev_info(netdev, "Nothing changed. No PHY_TYPE is corresponded to advertised link speed.\n");
goto done;
}
+ /* intersect requested advertised speed PHY types with media PHY types
+ * for set PHY configuration
+ */
+ config.phy_type_high = cpu_to_le64(phy_type_high) &
+ abilities->phy_type_high;
+ config.phy_type_low = cpu_to_le64(phy_type_low) &
+ abilities->phy_type_low;
+
+ if (!(config.phy_type_high || config.phy_type_low)) {
+ /* If there is no intersection and lenient mode is enabled, then
+ * intersect the requested advertised speed with NVM media type
+ * PHY types.
+ */
+ if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags)) {
+ config.phy_type_high = cpu_to_le64(phy_type_high) &
+ pf->nvm_phy_type_hi;
+ config.phy_type_low = cpu_to_le64(phy_type_low) &
+ pf->nvm_phy_type_lo;
+ } else {
+ netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
+ err = -EAGAIN;
+ goto done;
+ }
+ }
+
/* If link is up put link down */
if (p->phy.link_info.link_info & ICE_AQ_LINK_UP) {
/* Tell the OS link is going down, the link will go
@@ -2291,12 +2402,15 @@ ice_set_link_ksettings(struct net_device *netdev,
}
/* make the aq call */
- status = ice_aq_set_phy_cfg(&pf->hw, lport, &config, NULL);
+ status = ice_aq_set_phy_cfg(&pf->hw, p, &config, NULL);
if (status) {
netdev_info(netdev, "Set phy config failed,\n");
err = -EAGAIN;
+ goto done;
}
+ /* Save speed request */
+ p->phy.curr_user_speed_req = adv_link_speed;
done:
kfree(abilities);
clear_bit(__ICE_CFG_BUSY, pf->state);
@@ -2873,8 +2987,8 @@ ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
if (status)
goto out;
- pause->autoneg = ((pcaps->caps & ICE_AQC_PHY_AN_MODE) ?
- AUTONEG_ENABLE : AUTONEG_DISABLE);
+ pause->autoneg = ice_is_phy_caps_an_enabled(pcaps) ? AUTONEG_ENABLE :
+ AUTONEG_DISABLE;
if (dcbx_cfg->pfc.pfcena)
/* PFC enabled so report LFC as off */
@@ -2942,8 +3056,8 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
return -EIO;
}
- is_an = ((pcaps->caps & ICE_AQC_PHY_AN_MODE) ?
- AUTONEG_ENABLE : AUTONEG_DISABLE);
+ is_an = ice_is_phy_caps_an_enabled(pcaps) ? AUTONEG_ENABLE :
+ AUTONEG_DISABLE;
kfree(pcaps);
@@ -3322,6 +3436,58 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
return 0;
}
+/**
+ * ice_get_wol - get current Wake on LAN configuration
+ * @netdev: network interface device structure
+ * @wol: Ethtool structure to retrieve WoL settings
+ */
+static void ice_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_pf *pf = np->vsi->back;
+
+ if (np->vsi->type != ICE_VSI_PF)
+ netdev_warn(netdev, "Wake on LAN is not supported on this interface!\n");
+
+ /* Get WoL settings based on the HW capability */
+ if (ice_is_wol_supported(pf)) {
+ wol->supported = WAKE_MAGIC;
+ wol->wolopts = pf->wol_ena ? WAKE_MAGIC : 0;
+ } else {
+ wol->supported = 0;
+ wol->wolopts = 0;
+ }
+}
+
+/**
+ * ice_set_wol - set Wake on LAN on supported device
+ * @netdev: network interface device structure
+ * @wol: Ethtool structure to set WoL
+ */
+static int ice_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+
+ if (vsi->type != ICE_VSI_PF || !ice_is_wol_supported(pf))
+ return -EOPNOTSUPP;
+
+ /* only magic packet is supported */
+ if (wol->wolopts && wol->wolopts != WAKE_MAGIC)
+ return -EOPNOTSUPP;
+
+ /* Set WoL only if there is a new value */
+ if (pf->wol_ena != !!wol->wolopts) {
+ pf->wol_ena = !!wol->wolopts;
+ device_set_wakeup_enable(ice_pf_to_dev(pf), pf->wol_ena);
+ netdev_dbg(netdev, "WoL magic packet %sabled\n",
+ pf->wol_ena ? "en" : "dis");
+ }
+
+ return 0;
+}
+
enum ice_container_type {
ICE_RX_CONTAINER,
ICE_TX_CONTAINER,
@@ -3805,6 +3971,8 @@ static const struct ethtool_ops ice_ethtool_ops = {
.get_drvinfo = ice_get_drvinfo,
.get_regs_len = ice_get_regs_len,
.get_regs = ice_get_regs,
+ .get_wol = ice_get_wol,
+ .set_wol = ice_set_wol,
.get_msglevel = ice_get_msglevel,
.set_msglevel = ice_set_msglevel,
.self_test = ice_self_test,
@@ -3847,6 +4015,8 @@ static const struct ethtool_ops ice_ethtool_safe_mode_ops = {
.get_drvinfo = ice_get_drvinfo,
.get_regs_len = ice_get_regs_len,
.get_regs = ice_get_regs,
+ .get_wol = ice_get_wol,
+ .set_wol = ice_set_wol,
.get_msglevel = ice_get_msglevel,
.set_msglevel = ice_set_msglevel,
.get_link = ethtool_op_get_link,
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 1086c9f778b4..92e4abca62a4 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -367,6 +367,15 @@
#define VSIQF_FD_CNT_FD_GCNT_M ICE_M(0x3FFF, 0)
#define VSIQF_HKEY_MAX_INDEX 12
#define VSIQF_HLUT_MAX_INDEX 15
+#define PFPM_APM 0x000B8080
+#define PFPM_APM_APME_M BIT(0)
+#define PFPM_WUFC 0x0009DC00
+#define PFPM_WUFC_MAG_M BIT(1)
+#define PFPM_WUS 0x0009DB80
+#define PFPM_WUS_LNKC_M BIT(0)
+#define PFPM_WUS_MAG_M BIT(1)
+#define PFPM_WUS_MNG_M BIT(3)
+#define PFPM_WUS_FW_RST_WK_M BIT(31)
#define VFINT_DYN_CTLN(_i) (0x00003800 + ((_i) * 4))
#define VFINT_DYN_CTLN_CLEARPBA_M BIT(1)
#define PRTRPB_RDPC 0x000AC260
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 8a4c7b8b95df..8f6a191839f1 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1468,6 +1468,30 @@ static void ice_vsi_set_rss_flow_fld(struct ice_vsi *vsi)
}
/**
+ * ice_pf_state_is_nominal - checks the PF for nominal state
+ * @pf: pointer to PF to check
+ *
+ * Check the PF's state for a collection of bits that would indicate
+ * the PF is in a state that would inhibit normal operation for
+ * driver functionality.
+ *
+ * Returns true if PF is in a nominal state, false otherwise
+ */
+bool ice_pf_state_is_nominal(struct ice_pf *pf)
+{
+ DECLARE_BITMAP(check_bits, __ICE_STATE_NBITS) = { 0 };
+
+ if (!pf)
+ return false;
+
+ bitmap_set(check_bits, 0, __ICE_STATE_NOMINAL_CHECK_BITS);
+ if (bitmap_intersects(pf->state, check_bits, __ICE_STATE_NBITS))
+ return false;
+
+ return true;
+}
+
+/**
* ice_update_eth_stats - Update VSI-specific ethernet statistics counters
* @vsi: the VSI to be updated
*/
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index d80e6afa4511..981f3a156c24 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -8,6 +8,8 @@
const char *ice_vsi_type_str(enum ice_vsi_type vsi_type);
+bool ice_pf_state_is_nominal(struct ice_pf *pf);
+
void ice_update_eth_stats(struct ice_vsi *vsi);
int ice_vsi_cfg_rxqs(struct ice_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index a1cef089201a..16a4096bb780 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -612,6 +612,7 @@ static void ice_print_topo_conflict(struct ice_vsi *vsi)
void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
{
struct ice_aqc_get_phy_caps_data *caps;
+ const char *an_advertised;
enum ice_status status;
const char *fec_req;
const char *speed;
@@ -710,6 +711,7 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
caps = kzalloc(sizeof(*caps), GFP_KERNEL);
if (!caps) {
fec_req = "Unknown";
+ an_advertised = "Unknown";
goto done;
}
@@ -718,6 +720,8 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
if (status)
netdev_info(vsi->netdev, "Get phy capability failed.\n");
+ an_advertised = ice_is_phy_caps_an_enabled(caps) ? "On" : "Off";
+
if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
fec_req = "RS-FEC";
@@ -730,8 +734,8 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
kfree(caps);
done:
- netdev_info(vsi->netdev, "NIC Link is up %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
- speed, fec_req, fec, an, fc);
+ netdev_info(vsi->netdev, "NIC Link is up %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg Advertised: %s, Autoneg Negotiated: %s, Flow Control: %s\n",
+ speed, fec_req, fec, an_advertised, an, fc);
ice_print_topo_conflict(vsi);
}
@@ -796,10 +800,6 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
dev_dbg(dev, "Failed to update link status and re-enable link events for port %d\n",
pi->lport);
- /* if the old link up/down and speed is the same as the new */
- if (link_up == old_link && link_speed == old_link_speed)
- return result;
-
vsi = ice_get_main_vsi(pf);
if (!vsi || !vsi->port_info)
return -EINVAL;
@@ -817,6 +817,10 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
}
}
+ /* if the old link up/down and speed is the same as the new */
+ if (link_up == old_link && link_speed == old_link_speed)
+ return result;
+
ice_dcb_rebuild(pf);
ice_vsi_link_event(vsi, link_up);
ice_print_link_msg(vsi, link_up);
@@ -1129,10 +1133,15 @@ static void ice_service_task_complete(struct ice_pf *pf)
/**
* ice_service_task_stop - stop service task and cancel works
* @pf: board private structure
+ *
+ * Return 0 if the __ICE_SERVICE_DIS bit was not already set,
+ * 1 otherwise.
*/
-static void ice_service_task_stop(struct ice_pf *pf)
+static int ice_service_task_stop(struct ice_pf *pf)
{
- set_bit(__ICE_SERVICE_DIS, pf->state);
+ int ret;
+
+ ret = test_and_set_bit(__ICE_SERVICE_DIS, pf->state);
if (pf->serv_tmr.function)
del_timer_sync(&pf->serv_tmr);
@@ -1140,6 +1149,7 @@ static void ice_service_task_stop(struct ice_pf *pf)
cancel_work_sync(&pf->serv_task);
clear_bit(__ICE_SERVICE_SCHED, pf->state);
+ return ret;
}
/**
@@ -1374,25 +1384,23 @@ static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
link_up == !!(pi->phy.link_info.link_info & ICE_AQ_LINK_UP))
goto out;
- cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+ /* Use the current user PHY configuration. The current user PHY
+ * configuration is initialized during probe from PHY capabilities
+ * software mode, and updated on set PHY configuration.
+ */
+ cfg = kmemdup(&pi->phy.curr_user_phy_cfg, sizeof(*cfg), GFP_KERNEL);
if (!cfg) {
retcode = -ENOMEM;
goto out;
}
- cfg->phy_type_low = pcaps->phy_type_low;
- cfg->phy_type_high = pcaps->phy_type_high;
- cfg->caps = pcaps->caps | ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
- cfg->low_power_ctrl = pcaps->low_power_ctrl;
- cfg->eee_cap = pcaps->eee_cap;
- cfg->eeer_value = pcaps->eeer_value;
- cfg->link_fec_opt = pcaps->link_fec_options;
+ cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
if (link_up)
cfg->caps |= ICE_AQ_PHY_ENA_LINK;
else
cfg->caps &= ~ICE_AQ_PHY_ENA_LINK;
- retcode = ice_aq_set_phy_cfg(&vsi->back->hw, pi->lport, cfg, NULL);
+ retcode = ice_aq_set_phy_cfg(&vsi->back->hw, pi, cfg, NULL);
if (retcode) {
dev_err(dev, "Failed to set phy config, VSI %d error %d\n",
vsi->vsi_num, retcode);
@@ -1406,8 +1414,312 @@ out:
}
/**
- * ice_check_media_subtask - Check for media; bring link up if detected.
+ * ice_init_nvm_phy_type - Initialize the NVM PHY type
+ * @pi: port info structure
+ *
+ * Initialize nvm_phy_type_[low|high] for link lenient mode support
+ */
+static int ice_init_nvm_phy_type(struct ice_port_info *pi)
+{
+ struct ice_aqc_get_phy_caps_data *pcaps;
+ struct ice_pf *pf = pi->hw->back;
+ enum ice_status status;
+ int err = 0;
+
+ pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+ if (!pcaps)
+ return -ENOMEM;
+
+ status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_NVM_CAP, pcaps,
+ NULL);
+
+ if (status) {
+ dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
+ err = -EIO;
+ goto out;
+ }
+
+ pf->nvm_phy_type_hi = pcaps->phy_type_high;
+ pf->nvm_phy_type_lo = pcaps->phy_type_low;
+
+out:
+ kfree(pcaps);
+ return err;
+}
+
+/**
+ * ice_init_link_dflt_override - Initialize link default override
+ * @pi: port info structure
+ *
+ * Initialize link default override and PHY total port shutdown during probe
+ */
+static void ice_init_link_dflt_override(struct ice_port_info *pi)
+{
+ struct ice_link_default_override_tlv *ldo;
+ struct ice_pf *pf = pi->hw->back;
+
+ ldo = &pf->link_dflt_override;
+ if (ice_get_link_default_override(ldo, pi))
+ return;
+
+ if (!(ldo->options & ICE_LINK_OVERRIDE_PORT_DIS))
+ return;
+
+ /* Enable Total Port Shutdown (override/replace link-down-on-close
+ * ethtool private flag) for ports with Port Disable bit set.
+ */
+ set_bit(ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags);
+ set_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags);
+}
+
+/**
+ * ice_init_phy_cfg_dflt_override - Initialize PHY cfg default override settings
+ * @pi: port info structure
+ *
+ * If default override is enabled, initialized the user PHY cfg speed and FEC
+ * settings using the default override mask from the NVM.
+ *
+ * The PHY should only be configured with the default override settings the
+ * first time media is available. The __ICE_LINK_DEFAULT_OVERRIDE_PENDING state
+ * is used to indicate that the user PHY cfg default override is initialized
+ * and the PHY has not been configured with the default override settings. The
+ * state is set here, and cleared in ice_configure_phy the first time the PHY is
+ * configured.
+ */
+static void ice_init_phy_cfg_dflt_override(struct ice_port_info *pi)
+{
+ struct ice_link_default_override_tlv *ldo;
+ struct ice_aqc_set_phy_cfg_data *cfg;
+ struct ice_phy_info *phy = &pi->phy;
+ struct ice_pf *pf = pi->hw->back;
+
+ ldo = &pf->link_dflt_override;
+
+ /* If link default override is enabled, use to mask NVM PHY capabilities
+ * for speed and FEC default configuration.
+ */
+ cfg = &phy->curr_user_phy_cfg;
+
+ if (ldo->phy_type_low || ldo->phy_type_high) {
+ cfg->phy_type_low = pf->nvm_phy_type_lo &
+ cpu_to_le64(ldo->phy_type_low);
+ cfg->phy_type_high = pf->nvm_phy_type_hi &
+ cpu_to_le64(ldo->phy_type_high);
+ }
+ cfg->link_fec_opt = ldo->fec_options;
+ phy->curr_user_fec_req = ICE_FEC_AUTO;
+
+ set_bit(__ICE_LINK_DEFAULT_OVERRIDE_PENDING, pf->state);
+}
+
+/**
+ * ice_init_phy_user_cfg - Initialize the PHY user configuration
+ * @pi: port info structure
+ *
+ * Initialize the current user PHY configuration, speed, FEC, and FC requested
+ * mode to default. The PHY defaults are from get PHY capabilities topology
+ * with media so call when media is first available. An error is returned if
+ * called when media is not available. The PHY initialization completed state is
+ * set here.
+ *
+ * These configurations are used when setting PHY
+ * configuration. The user PHY configuration is updated on set PHY
+ * configuration. Returns 0 on success, negative on failure
+ */
+static int ice_init_phy_user_cfg(struct ice_port_info *pi)
+{
+ struct ice_aqc_get_phy_caps_data *pcaps;
+ struct ice_phy_info *phy = &pi->phy;
+ struct ice_pf *pf = pi->hw->back;
+ enum ice_status status;
+ struct ice_vsi *vsi;
+ int err = 0;
+
+ if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
+ return -EIO;
+
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi)
+ return -EINVAL;
+
+ pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+ if (!pcaps)
+ return -ENOMEM;
+
+ status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps,
+ NULL);
+ if (status) {
+ dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
+ err = -EIO;
+ goto err_out;
+ }
+
+ ice_copy_phy_caps_to_cfg(pi, pcaps, &pi->phy.curr_user_phy_cfg);
+
+ /* check if lenient mode is supported and enabled */
+ if (ice_fw_supports_link_override(&vsi->back->hw) &&
+ !(pcaps->module_compliance_enforcement &
+ ICE_AQC_MOD_ENFORCE_STRICT_MODE)) {
+ set_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags);
+
+ /* if link default override is enabled, initialize user PHY
+ * configuration with link default override values
+ */
+ if (pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN) {
+ ice_init_phy_cfg_dflt_override(pi);
+ goto out;
+ }
+ }
+
+ /* if link default override is not enabled, initialize PHY using
+ * topology with media
+ */
+ phy->curr_user_fec_req = ice_caps_to_fec_mode(pcaps->caps,
+ pcaps->link_fec_options);
+ phy->curr_user_fc_req = ice_caps_to_fc_mode(pcaps->caps);
+
+out:
+ phy->curr_user_speed_req = ICE_AQ_LINK_SPEED_M;
+ set_bit(__ICE_PHY_INIT_COMPLETE, pf->state);
+err_out:
+ kfree(pcaps);
+ return err;
+}
+
+/**
+ * ice_configure_phy - configure PHY
+ * @vsi: VSI of PHY
+ *
+ * Set the PHY configuration. If the current PHY configuration is the same as
+ * the curr_user_phy_cfg, then do nothing to avoid link flap. Otherwise
+ * configure the based get PHY capabilities for topology with media.
+ */
+static int ice_configure_phy(struct ice_vsi *vsi)
+{
+ struct device *dev = ice_pf_to_dev(vsi->back);
+ struct ice_aqc_get_phy_caps_data *pcaps;
+ struct ice_aqc_set_phy_cfg_data *cfg;
+ struct ice_port_info *pi;
+ enum ice_status status;
+ int err = 0;
+
+ pi = vsi->port_info;
+ if (!pi)
+ return -EINVAL;
+
+ /* Ensure we have media as we cannot configure a medialess port */
+ if (!(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
+ return -EPERM;
+
+ ice_print_topo_conflict(vsi);
+
+ if (vsi->port_info->phy.link_info.topo_media_conflict ==
+ ICE_AQ_LINK_TOPO_UNSUPP_MEDIA)
+ return -EPERM;
+
+ if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags))
+ return ice_force_phys_link_state(vsi, true);
+
+ pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+ if (!pcaps)
+ return -ENOMEM;
+
+ /* Get current PHY config */
+ status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
+ NULL);
+ if (status) {
+ dev_err(dev, "Failed to get PHY configuration, VSI %d error %s\n",
+ vsi->vsi_num, ice_stat_str(status));
+ err = -EIO;
+ goto done;
+ }
+
+ /* If PHY enable link is configured and configuration has not changed,
+ * there's nothing to do
+ */
+ if (pcaps->caps & ICE_AQC_PHY_EN_LINK &&
+ ice_phy_caps_equals_cfg(pcaps, &pi->phy.curr_user_phy_cfg))
+ goto done;
+
+ /* Use PHY topology as baseline for configuration */
+ memset(pcaps, 0, sizeof(*pcaps));
+ status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps,
+ NULL);
+ if (status) {
+ dev_err(dev, "Failed to get PHY topology, VSI %d error %s\n",
+ vsi->vsi_num, ice_stat_str(status));
+ err = -EIO;
+ goto done;
+ }
+
+ cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+ if (!cfg) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ ice_copy_phy_caps_to_cfg(pi, pcaps, cfg);
+
+ /* Speed - If default override pending, use curr_user_phy_cfg set in
+ * ice_init_phy_user_cfg_ldo.
+ */
+ if (test_and_clear_bit(__ICE_LINK_DEFAULT_OVERRIDE_PENDING,
+ vsi->back->state)) {
+ cfg->phy_type_low = pi->phy.curr_user_phy_cfg.phy_type_low;
+ cfg->phy_type_high = pi->phy.curr_user_phy_cfg.phy_type_high;
+ } else {
+ u64 phy_low = 0, phy_high = 0;
+
+ ice_update_phy_type(&phy_low, &phy_high,
+ pi->phy.curr_user_speed_req);
+ cfg->phy_type_low = pcaps->phy_type_low & cpu_to_le64(phy_low);
+ cfg->phy_type_high = pcaps->phy_type_high &
+ cpu_to_le64(phy_high);
+ }
+
+ /* Can't provide what was requested; use PHY capabilities */
+ if (!cfg->phy_type_low && !cfg->phy_type_high) {
+ cfg->phy_type_low = pcaps->phy_type_low;
+ cfg->phy_type_high = pcaps->phy_type_high;
+ }
+
+ /* FEC */
+ ice_cfg_phy_fec(pi, cfg, pi->phy.curr_user_fec_req);
+
+ /* Can't provide what was requested; use PHY capabilities */
+ if (cfg->link_fec_opt !=
+ (cfg->link_fec_opt & pcaps->link_fec_options)) {
+ cfg->caps |= pcaps->caps & ICE_AQC_PHY_EN_AUTO_FEC;
+ cfg->link_fec_opt = pcaps->link_fec_options;
+ }
+
+ /* Flow Control - always supported; no need to check against
+ * capabilities
+ */
+ ice_cfg_phy_fc(pi, cfg, pi->phy.curr_user_fc_req);
+
+ /* Enable link and link update */
+ cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK;
+
+ status = ice_aq_set_phy_cfg(&vsi->back->hw, pi, cfg, NULL);
+ if (status) {
+ dev_err(dev, "Failed to set phy config, VSI %d error %s\n",
+ vsi->vsi_num, ice_stat_str(status));
+ err = -EIO;
+ }
+
+ kfree(cfg);
+done:
+ kfree(pcaps);
+ return err;
+}
+
+/**
+ * ice_check_media_subtask - Check for media
* @pf: pointer to PF struct
+ *
+ * If media is available, then initialize PHY user configuration if it is not
+ * been, and configure the PHY if the interface is up.
*/
static void ice_check_media_subtask(struct ice_pf *pf)
{
@@ -1415,15 +1727,12 @@ static void ice_check_media_subtask(struct ice_pf *pf)
struct ice_vsi *vsi;
int err;
- vsi = ice_get_main_vsi(pf);
- if (!vsi)
+ /* No need to check for media if it's already present */
+ if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags))
return;
- /* No need to check for media if it's already present or the interface
- * is down
- */
- if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags) ||
- test_bit(__ICE_DOWN, vsi->state))
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi)
return;
/* Refresh link info and check if media is present */
@@ -1433,10 +1742,19 @@ static void ice_check_media_subtask(struct ice_pf *pf)
return;
if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
- err = ice_force_phys_link_state(vsi, true);
- if (err)
+ if (!test_bit(__ICE_PHY_INIT_COMPLETE, pf->state))
+ ice_init_phy_user_cfg(pi);
+
+ /* PHY settings are reset on media insertion, reconfigure
+ * PHY to preserve settings.
+ */
+ if (test_bit(__ICE_DOWN, vsi->state) &&
+ test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags))
return;
- clear_bit(ICE_FLAG_NO_MEDIA, pf->flags);
+
+ err = ice_configure_phy(vsi);
+ if (!err)
+ clear_bit(ICE_FLAG_NO_MEDIA, pf->flags);
/* A Link Status Event will be generated; the event handler
* will complete bringing the interface up
@@ -2941,6 +3259,27 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf)
}
/**
+ * ice_is_wol_supported - get NVM state of WoL
+ * @pf: board private structure
+ *
+ * Check if WoL is supported based on the HW configuration.
+ * Returns true if NVM supports and enables WoL for this port, false otherwise
+ */
+bool ice_is_wol_supported(struct ice_pf *pf)
+{
+ struct ice_hw *hw = &pf->hw;
+ u16 wol_ctrl;
+
+ /* A bit set to 1 in the NVM Software Reserved Word 2 (WoL control
+ * word) indicates WoL is not supported on the corresponding PF ID.
+ */
+ if (ice_read_sr_word(hw, ICE_SR_NVM_WOL_CFG, &wol_ctrl))
+ return false;
+
+ return !(BIT(hw->pf_id) & wol_ctrl);
+}
+
+/**
* ice_vsi_recfg_qs - Change the number of queues on a VSI
* @vsi: VSI being changed
* @new_rx: new number of Rx queues
@@ -3288,6 +3627,33 @@ dflt_pkg_load:
}
/**
+ * ice_print_wake_reason - show the wake up cause in the log
+ * @pf: pointer to the PF struct
+ */
+static void ice_print_wake_reason(struct ice_pf *pf)
+{
+ u32 wus = pf->wakeup_reason;
+ const char *wake_str;
+
+ /* if no wake event, nothing to print */
+ if (!wus)
+ return;
+
+ if (wus & PFPM_WUS_LNKC_M)
+ wake_str = "Link\n";
+ else if (wus & PFPM_WUS_MAG_M)
+ wake_str = "Magic Packet\n";
+ else if (wus & PFPM_WUS_MNG_M)
+ wake_str = "Management\n";
+ else if (wus & PFPM_WUS_FW_RST_WK_M)
+ wake_str = "Firmware Reset\n";
+ else
+ wake_str = "Unknown\n";
+
+ dev_info(ice_pf_to_dev(pf), "Wake reason: %s", wake_str);
+}
+
+/**
* ice_probe - Device initialization routine
* @pdev: PCI device information struct
* @ent: entry in ice_pci_tbl
@@ -3468,8 +3834,53 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
goto err_alloc_sw_unroll;
}
+ err = ice_init_nvm_phy_type(pf->hw.port_info);
+ if (err) {
+ dev_err(dev, "ice_init_nvm_phy_type failed: %d\n", err);
+ goto err_alloc_sw_unroll;
+ }
+
+ err = ice_update_link_info(pf->hw.port_info);
+ if (err) {
+ dev_err(dev, "ice_update_link_info failed: %d\n", err);
+ goto err_alloc_sw_unroll;
+ }
+
+ ice_init_link_dflt_override(pf->hw.port_info);
+
+ /* if media available, initialize PHY settings */
+ if (pf->hw.port_info->phy.link_info.link_info &
+ ICE_AQ_MEDIA_AVAILABLE) {
+ err = ice_init_phy_user_cfg(pf->hw.port_info);
+ if (err) {
+ dev_err(dev, "ice_init_phy_user_cfg failed: %d\n", err);
+ goto err_alloc_sw_unroll;
+ }
+
+ if (!test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) {
+ struct ice_vsi *vsi = ice_get_main_vsi(pf);
+
+ if (vsi)
+ ice_configure_phy(vsi);
+ }
+ } else {
+ set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
+ }
+
ice_verify_cacheline_size(pf);
+ /* Save wakeup reason register for later use */
+ pf->wakeup_reason = rd32(hw, PFPM_WUS);
+
+ /* check for a power management event */
+ ice_print_wake_reason(pf);
+
+ /* clear wake status, all bits */
+ wr32(hw, PFPM_WUS, U32_MAX);
+
+ /* Disable WoL at init, wait for user to enable */
+ device_set_wakeup_enable(dev, false);
+
/* If no DDP driven features have to be setup, we are done with probe */
if (ice_is_safe_mode(pf))
goto probe_done;
@@ -3514,10 +3925,73 @@ err_init_pf_unroll:
err_exit_unroll:
ice_devlink_unregister(pf);
pci_disable_pcie_error_reporting(pdev);
+ pci_disable_device(pdev);
return err;
}
/**
+ * ice_set_wake - enable or disable Wake on LAN
+ * @pf: pointer to the PF struct
+ *
+ * Simple helper for WoL control
+ */
+static void ice_set_wake(struct ice_pf *pf)
+{
+ struct ice_hw *hw = &pf->hw;
+ bool wol = pf->wol_ena;
+
+ /* clear wake state, otherwise new wake events won't fire */
+ wr32(hw, PFPM_WUS, U32_MAX);
+
+ /* enable / disable APM wake up, no RMW needed */
+ wr32(hw, PFPM_APM, wol ? PFPM_APM_APME_M : 0);
+
+ /* set magic packet filter enabled */
+ wr32(hw, PFPM_WUFC, wol ? PFPM_WUFC_MAG_M : 0);
+}
+
+/**
+ * ice_setup_magic_mc_wake - setup device to wake on multicast magic packet
+ * @pf: pointer to the PF struct
+ *
+ * Issue firmware command to enable multicast magic wake, making
+ * sure that any locally administered address (LAA) is used for
+ * wake, and that PF reset doesn't undo the LAA.
+ */
+static void ice_setup_mc_magic_wake(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_hw *hw = &pf->hw;
+ enum ice_status status;
+ u8 mac_addr[ETH_ALEN];
+ struct ice_vsi *vsi;
+ u8 flags;
+
+ if (!pf->wol_ena)
+ return;
+
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi)
+ return;
+
+ /* Get current MAC address in case it's an LAA */
+ if (vsi->netdev)
+ ether_addr_copy(mac_addr, vsi->netdev->dev_addr);
+ else
+ ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
+
+ flags = ICE_AQC_MAN_MAC_WR_MC_MAG_EN |
+ ICE_AQC_MAN_MAC_UPDATE_LAA_WOL |
+ ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP;
+
+ status = ice_aq_manage_mac_write(hw, mac_addr, flags, NULL);
+ if (status)
+ dev_err(dev, "Failed to enable Multicast Magic Packet wake, err %s aq_err %s\n",
+ ice_stat_str(status),
+ ice_aq_str(hw->adminq.sq_last_status));
+}
+
+/**
* ice_remove - Device removal routine
* @pdev: PCI device information struct
*/
@@ -3546,8 +4020,10 @@ static void ice_remove(struct pci_dev *pdev)
mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
if (!ice_is_safe_mode(pf))
ice_remove_arfs(pf);
+ ice_setup_mc_magic_wake(pf);
ice_devlink_destroy_port(pf);
ice_vsi_release_all(pf);
+ ice_set_wake(pf);
ice_free_irq_msix_misc(pf);
ice_for_each_vsi(pf, i) {
if (!pf->vsi[i])
@@ -3567,9 +4043,231 @@ static void ice_remove(struct pci_dev *pdev)
pci_wait_for_pending_transaction(pdev);
ice_clear_interrupt_scheme(pf);
pci_disable_pcie_error_reporting(pdev);
+ pci_disable_device(pdev);
+}
+
+/**
+ * ice_shutdown - PCI callback for shutting down device
+ * @pdev: PCI device information struct
+ */
+static void ice_shutdown(struct pci_dev *pdev)
+{
+ struct ice_pf *pf = pci_get_drvdata(pdev);
+
+ ice_remove(pdev);
+
+ if (system_state == SYSTEM_POWER_OFF) {
+ pci_wake_from_d3(pdev, pf->wol_ena);
+ pci_set_power_state(pdev, PCI_D3hot);
+ }
+}
+
+#ifdef CONFIG_PM
+/**
+ * ice_prepare_for_shutdown - prep for PCI shutdown
+ * @pf: board private structure
+ *
+ * Inform or close all dependent features in prep for PCI device shutdown
+ */
+static void ice_prepare_for_shutdown(struct ice_pf *pf)
+{
+ struct ice_hw *hw = &pf->hw;
+ u32 v;
+
+ /* Notify VFs of impending reset */
+ if (ice_check_sq_alive(hw, &hw->mailboxq))
+ ice_vc_notify_reset(pf);
+
+ dev_dbg(ice_pf_to_dev(pf), "Tearing down internal switch for shutdown\n");
+
+ /* disable the VSIs and their queues that are not already DOWN */
+ ice_pf_dis_all_vsi(pf, false);
+
+ ice_for_each_vsi(pf, v)
+ if (pf->vsi[v])
+ pf->vsi[v]->vsi_num = 0;
+
+ ice_shutdown_all_ctrlq(hw);
+}
+
+/**
+ * ice_reinit_interrupt_scheme - Reinitialize interrupt scheme
+ * @pf: board private structure to reinitialize
+ *
+ * This routine reinitialize interrupt scheme that was cleared during
+ * power management suspend callback.
+ *
+ * This should be called during resume routine to re-allocate the q_vectors
+ * and reacquire interrupts.
+ */
+static int ice_reinit_interrupt_scheme(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ int ret, v;
+
+ /* Since we clear MSIX flag during suspend, we need to
+ * set it back during resume...
+ */
+
+ ret = ice_init_interrupt_scheme(pf);
+ if (ret) {
+ dev_err(dev, "Failed to re-initialize interrupt %d\n", ret);
+ return ret;
+ }
+
+ /* Remap vectors and rings, after successful re-init interrupts */
+ ice_for_each_vsi(pf, v) {
+ if (!pf->vsi[v])
+ continue;
+
+ ret = ice_vsi_alloc_q_vectors(pf->vsi[v]);
+ if (ret)
+ goto err_reinit;
+ ice_vsi_map_rings_to_vectors(pf->vsi[v]);
+ }
+
+ ret = ice_req_irq_msix_misc(pf);
+ if (ret) {
+ dev_err(dev, "Setting up misc vector failed after device suspend %d\n",
+ ret);
+ goto err_reinit;
+ }
+
+ return 0;
+
+err_reinit:
+ while (v--)
+ if (pf->vsi[v])
+ ice_vsi_free_q_vectors(pf->vsi[v]);
+
+ return ret;
}
/**
+ * ice_suspend
+ * @dev: generic device information structure
+ *
+ * Power Management callback to quiesce the device and prepare
+ * for D3 transition.
+ */
+static int ice_suspend(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct ice_pf *pf;
+ int disabled, v;
+
+ pf = pci_get_drvdata(pdev);
+
+ if (!ice_pf_state_is_nominal(pf)) {
+ dev_err(dev, "Device is not ready, no need to suspend it\n");
+ return -EBUSY;
+ }
+
+ /* Stop watchdog tasks until resume completion.
+ * Even though it is most likely that the service task is
+ * disabled if the device is suspended or down, the service task's
+ * state is controlled by a different state bit, and we should
+ * store and honor whatever state that bit is in at this point.
+ */
+ disabled = ice_service_task_stop(pf);
+
+ /* Already suspended?, then there is nothing to do */
+ if (test_and_set_bit(__ICE_SUSPENDED, pf->state)) {
+ if (!disabled)
+ ice_service_task_restart(pf);
+ return 0;
+ }
+
+ if (test_bit(__ICE_DOWN, pf->state) ||
+ ice_is_reset_in_progress(pf->state)) {
+ dev_err(dev, "can't suspend device in reset or already down\n");
+ if (!disabled)
+ ice_service_task_restart(pf);
+ return 0;
+ }
+
+ ice_setup_mc_magic_wake(pf);
+
+ ice_prepare_for_shutdown(pf);
+
+ ice_set_wake(pf);
+
+ /* Free vectors, clear the interrupt scheme and release IRQs
+ * for proper hibernation, especially with large number of CPUs.
+ * Otherwise hibernation might fail when mapping all the vectors back
+ * to CPU0.
+ */
+ ice_free_irq_msix_misc(pf);
+ ice_for_each_vsi(pf, v) {
+ if (!pf->vsi[v])
+ continue;
+ ice_vsi_free_q_vectors(pf->vsi[v]);
+ }
+ ice_clear_interrupt_scheme(pf);
+
+ pci_wake_from_d3(pdev, pf->wol_ena);
+ pci_set_power_state(pdev, PCI_D3hot);
+ return 0;
+}
+
+/**
+ * ice_resume - PM callback for waking up from D3
+ * @dev: generic device information structure
+ */
+static int ice_resume(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ enum ice_reset_req reset_type;
+ struct ice_pf *pf;
+ struct ice_hw *hw;
+ int ret;
+
+ pci_set_power_state(pdev, PCI_D0);
+ pci_restore_state(pdev);
+ pci_save_state(pdev);
+
+ if (!pci_device_is_present(pdev))
+ return -ENODEV;
+
+ ret = pci_enable_device_mem(pdev);
+ if (ret) {
+ dev_err(dev, "Cannot enable device after suspend\n");
+ return ret;
+ }
+
+ pf = pci_get_drvdata(pdev);
+ hw = &pf->hw;
+
+ pf->wakeup_reason = rd32(hw, PFPM_WUS);
+ ice_print_wake_reason(pf);
+
+ /* We cleared the interrupt scheme when we suspended, so we need to
+ * restore it now to resume device functionality.
+ */
+ ret = ice_reinit_interrupt_scheme(pf);
+ if (ret)
+ dev_err(dev, "Cannot restore interrupt scheme: %d\n", ret);
+
+ clear_bit(__ICE_DOWN, pf->state);
+ /* Now perform PF reset and rebuild */
+ reset_type = ICE_RESET_PFR;
+ /* re-enable service task for reset, but allow reset to schedule it */
+ clear_bit(__ICE_SERVICE_DIS, pf->state);
+
+ if (ice_schedule_reset(pf, reset_type))
+ dev_err(dev, "Reset during resume failed.\n");
+
+ clear_bit(__ICE_SUSPENDED, pf->state);
+ ice_service_task_restart(pf);
+
+ /* Restart the service task */
+ mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
+
+ return 0;
+}
+#endif /* CONFIG_PM */
+
+/**
* ice_pci_err_detected - warning that PCI error has been detected
* @pdev: PCI device information struct
* @err: the type of PCI error
@@ -3734,6 +4432,8 @@ static const struct pci_device_id ice_pci_tbl[] = {
};
MODULE_DEVICE_TABLE(pci, ice_pci_tbl);
+static __maybe_unused SIMPLE_DEV_PM_OPS(ice_pm_ops, ice_suspend, ice_resume);
+
static const struct pci_error_handlers ice_pci_err_handler = {
.error_detected = ice_pci_err_detected,
.slot_reset = ice_pci_err_slot_reset,
@@ -3747,6 +4447,10 @@ static struct pci_driver ice_driver = {
.id_table = ice_pci_tbl,
.probe = ice_probe,
.remove = ice_remove,
+#ifdef CONFIG_PM
+ .driver.pm = &ice_pm_ops,
+#endif /* CONFIG_PM */
+ .shutdown = ice_shutdown,
.sriov_configure = ice_sriov_configure,
.err_handler = &ice_pci_err_handler
};
@@ -5659,20 +6363,30 @@ int ice_open(struct net_device *netdev)
/* Set PHY if there is media, otherwise, turn off PHY */
if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
- err = ice_force_phys_link_state(vsi, true);
+ clear_bit(ICE_FLAG_NO_MEDIA, pf->flags);
+ if (!test_bit(__ICE_PHY_INIT_COMPLETE, pf->state)) {
+ err = ice_init_phy_user_cfg(pi);
+ if (err) {
+ netdev_err(netdev, "Failed to initialize PHY settings, error %d\n",
+ err);
+ return err;
+ }
+ }
+
+ err = ice_configure_phy(vsi);
if (err) {
netdev_err(netdev, "Failed to set physical link up, error %d\n",
err);
return err;
}
} else {
+ set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
err = ice_aq_set_link_restart_an(pi, false, NULL);
if (err) {
netdev_err(netdev, "Failed to set PHY state, VSI %d error %d\n",
vsi->vsi_num, err);
return err;
}
- set_bit(ICE_FLAG_NO_MEDIA, vsi->back->flags);
}
err = ice_vsi_open(vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
index b049c1c30c88..7c2a06892bbb 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -172,8 +172,7 @@ void ice_release_nvm(struct ice_hw *hw)
*
* Reads one 16 bit word from the Shadow RAM using the ice_read_sr_word_aq.
*/
-static enum ice_status
-ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data)
+enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data)
{
enum ice_status status;
@@ -197,7 +196,7 @@ ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data)
* Area (PFA) and returns the TLV pointer and length. The caller can
* use these to read the variable length TLV value.
*/
-static enum ice_status
+enum ice_status
ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
u16 module_type)
{
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.h b/drivers/net/ethernet/intel/ice/ice_nvm.h
index 165eda07b93d..999f273ba6ad 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.h
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.h
@@ -11,6 +11,10 @@ enum ice_status
ice_read_flat_nvm(struct ice_hw *hw, u32 offset, u32 *length, u8 *data,
bool read_shadow_ram);
enum ice_status
+ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
+ u16 module_type);
+enum ice_status
ice_read_pba_string(struct ice_hw *hw, u8 *pba_num, u32 pba_num_size);
enum ice_status ice_init_nvm(struct ice_hw *hw);
+enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data);
#endif /* _ICE_NVM_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index c1ad8622e65c..08c616d9fffd 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -87,6 +87,12 @@ enum ice_fc_mode {
ICE_FC_DFLT
};
+enum ice_phy_cache_mode {
+ ICE_FC_MODE = 0,
+ ICE_SPEED_MODE,
+ ICE_FEC_MODE
+};
+
enum ice_fec_mode {
ICE_FEC_NONE = 0,
ICE_FEC_RS,
@@ -94,6 +100,14 @@ enum ice_fec_mode {
ICE_FEC_AUTO
};
+struct ice_phy_cache_mode_data {
+ union {
+ enum ice_fec_mode curr_user_fec_req;
+ enum ice_fc_mode curr_user_fc_req;
+ u16 curr_user_speed_req;
+ } data;
+};
+
enum ice_set_fc_aq_failures {
ICE_SET_FC_AQ_FAIL_NONE = 0,
ICE_SET_FC_AQ_FAIL_GET,
@@ -104,6 +118,7 @@ enum ice_set_fc_aq_failures {
/* Various MAC types */
enum ice_mac_type {
ICE_MAC_UNKNOWN = 0,
+ ICE_MAC_E810,
ICE_MAC_GENERIC,
};
@@ -160,6 +175,13 @@ struct ice_phy_info {
u64 phy_type_high;
enum ice_media_type media_type;
u8 get_link_info;
+ /* Please refer to struct ice_aqc_get_link_status_data to get
+ * detail of enable bit in curr_user_speed_req
+ */
+ u16 curr_user_speed_req;
+ enum ice_fec_mode curr_user_fec_req;
+ enum ice_fc_mode curr_user_fc_req;
+ struct ice_aqc_set_phy_cfg_data curr_user_phy_cfg;
};
/* protocol enumeration for filters */
@@ -293,6 +315,28 @@ struct ice_nvm_info {
u8 blank_nvm_mode; /* is NVM empty (no FW present) */
};
+struct ice_link_default_override_tlv {
+ u8 options;
+#define ICE_LINK_OVERRIDE_OPT_M 0x3F
+#define ICE_LINK_OVERRIDE_STRICT_MODE BIT(0)
+#define ICE_LINK_OVERRIDE_EPCT_DIS BIT(1)
+#define ICE_LINK_OVERRIDE_PORT_DIS BIT(2)
+#define ICE_LINK_OVERRIDE_EN BIT(3)
+#define ICE_LINK_OVERRIDE_AUTO_LINK_DIS BIT(4)
+#define ICE_LINK_OVERRIDE_EEE_EN BIT(5)
+ u8 phy_config;
+#define ICE_LINK_OVERRIDE_PHY_CFG_S 8
+#define ICE_LINK_OVERRIDE_PHY_CFG_M (0xC3 << ICE_LINK_OVERRIDE_PHY_CFG_S)
+#define ICE_LINK_OVERRIDE_PAUSE_M 0x3
+#define ICE_LINK_OVERRIDE_LESM_EN BIT(6)
+#define ICE_LINK_OVERRIDE_AUTO_FEC_EN BIT(7)
+ u8 fec_options;
+#define ICE_LINK_OVERRIDE_FEC_OPT_M 0xFF
+ u8 rsvd1;
+ u64 phy_type_low;
+ u64 phy_type_high;
+};
+
#define ICE_NVM_VER_LEN 32
/* netlist version information */
@@ -444,6 +488,7 @@ struct ice_dcb_app_priority_table {
#define ICE_APP_SEL_ETHTYPE 0x1
#define ICE_APP_SEL_TCPIP 0x2
#define ICE_CEE_APP_SEL_ETHTYPE 0x0
+#define ICE_SR_LINK_DEFAULT_OVERRIDE_PTR 0x134
#define ICE_CEE_APP_SEL_TCPIP 0x1
struct ice_dcbx_cfg {
@@ -709,6 +754,7 @@ struct ice_hw_port_stats {
/* Checksum and Shadow RAM pointers */
#define ICE_SR_BOOT_CFG_PTR 0x132
+#define ICE_SR_NVM_WOL_CFG 0x19
#define ICE_NVM_OROM_VER_OFF 0x02
#define ICE_SR_PBA_BLOCK_PTR 0x16
#define ICE_SR_NVM_DEV_STARTER_VER 0x18
@@ -726,6 +772,17 @@ struct ice_hw_port_stats {
#define ICE_OROM_VER_MASK (0xff << ICE_OROM_VER_SHIFT)
#define ICE_SR_PFA_PTR 0x40
#define ICE_SR_SECTOR_SIZE_IN_WORDS 0x800
+
+/* Link override related */
+#define ICE_SR_PFA_LINK_OVERRIDE_WORDS 10
+#define ICE_SR_PFA_LINK_OVERRIDE_PHY_WORDS 4
+#define ICE_SR_PFA_LINK_OVERRIDE_OFFSET 2
+#define ICE_SR_PFA_LINK_OVERRIDE_FEC_OFFSET 1
+#define ICE_SR_PFA_LINK_OVERRIDE_PHY_OFFSET 2
+#define ICE_FW_API_LINK_OVERRIDE_MAJ 1
+#define ICE_FW_API_LINK_OVERRIDE_MIN 5
+#define ICE_FW_API_LINK_OVERRIDE_PATCH 2
+
#define ICE_SR_WORDS_IN_1KB 512
/* Hash redirection LUT for VSI - maximum array size */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 16a2f2526ccc..9df5ceb26ab9 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -1593,31 +1593,6 @@ err_unroll_intr:
}
/**
- * ice_pf_state_is_nominal - checks the PF for nominal state
- * @pf: pointer to PF to check
- *
- * Check the PF's state for a collection of bits that would indicate
- * the PF is in a state that would inhibit normal operation for
- * driver functionality.
- *
- * Returns true if PF is in a nominal state.
- * Returns false otherwise
- */
-static bool ice_pf_state_is_nominal(struct ice_pf *pf)
-{
- DECLARE_BITMAP(check_bits, __ICE_STATE_NBITS) = { 0 };
-
- if (!pf)
- return false;
-
- bitmap_set(check_bits, 0, __ICE_STATE_NOMINAL_CHECK_BITS);
- if (bitmap_intersects(pf->state, check_bits, __ICE_STATE_NBITS))
- return false;
-
- return true;
-}
-
-/**
* ice_pci_sriov_ena - Enable or change number of VFs
* @pf: pointer to the PF structure
* @num_vfs: number of VFs to allocate
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index f6479384dc4f..de93cc6ebc1a 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -2046,14 +2046,13 @@ static int lan743x_rx_process_packet(struct lan743x_rx *rx)
{
struct skb_shared_hwtstamps *hwtstamps = NULL;
int result = RX_PROCESS_RESULT_NOTHING_TO_DO;
+ int current_head_index = *rx->head_cpu_ptr;
struct lan743x_rx_buffer_info *buffer_info;
struct lan743x_rx_descriptor *descriptor;
- int current_head_index = -1;
int extension_index = -1;
int first_index = -1;
int last_index = -1;
- current_head_index = *rx->head_cpu_ptr;
if (current_head_index < 0 || current_head_index >= rx->ring_size)
goto done;
diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c
index 0ead1ef11c6c..65408bc994c4 100644
--- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c
+++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c
@@ -358,20 +358,20 @@ static const struct reg_field ocelot_regfields[REGFIELD_MAX] = {
[SYS_RESET_CFG_CORE_ENA] = REG_FIELD(SYS_RESET_CFG, 2, 2),
[SYS_RESET_CFG_MEM_ENA] = REG_FIELD(SYS_RESET_CFG, 1, 1),
[SYS_RESET_CFG_MEM_INIT] = REG_FIELD(SYS_RESET_CFG, 0, 0),
- /* Replicated per number of ports (11), register size 4 per port */
- [QSYS_SWITCH_PORT_MODE_PORT_ENA] = REG_FIELD_ID(QSYS_SWITCH_PORT_MODE, 14, 14, 11, 4),
- [QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG] = REG_FIELD_ID(QSYS_SWITCH_PORT_MODE, 11, 13, 11, 4),
- [QSYS_SWITCH_PORT_MODE_YEL_RSRVD] = REG_FIELD_ID(QSYS_SWITCH_PORT_MODE, 10, 10, 11, 4),
- [QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE] = REG_FIELD_ID(QSYS_SWITCH_PORT_MODE, 9, 9, 11, 4),
- [QSYS_SWITCH_PORT_MODE_TX_PFC_ENA] = REG_FIELD_ID(QSYS_SWITCH_PORT_MODE, 1, 8, 11, 4),
- [QSYS_SWITCH_PORT_MODE_TX_PFC_MODE] = REG_FIELD_ID(QSYS_SWITCH_PORT_MODE, 0, 0, 11, 4),
- [SYS_PORT_MODE_DATA_WO_TS] = REG_FIELD_ID(SYS_PORT_MODE, 5, 6, 11, 4),
- [SYS_PORT_MODE_INCL_INJ_HDR] = REG_FIELD_ID(SYS_PORT_MODE, 3, 4, 11, 4),
- [SYS_PORT_MODE_INCL_XTR_HDR] = REG_FIELD_ID(SYS_PORT_MODE, 1, 2, 11, 4),
- [SYS_PORT_MODE_INCL_HDR_ERR] = REG_FIELD_ID(SYS_PORT_MODE, 0, 0, 11, 4),
- [SYS_PAUSE_CFG_PAUSE_START] = REG_FIELD_ID(SYS_PAUSE_CFG, 10, 18, 11, 4),
- [SYS_PAUSE_CFG_PAUSE_STOP] = REG_FIELD_ID(SYS_PAUSE_CFG, 1, 9, 11, 4),
- [SYS_PAUSE_CFG_PAUSE_ENA] = REG_FIELD_ID(SYS_PAUSE_CFG, 0, 1, 11, 4),
+ /* Replicated per number of ports (12), register size 4 per port */
+ [QSYS_SWITCH_PORT_MODE_PORT_ENA] = REG_FIELD_ID(QSYS_SWITCH_PORT_MODE, 14, 14, 12, 4),
+ [QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG] = REG_FIELD_ID(QSYS_SWITCH_PORT_MODE, 11, 13, 12, 4),
+ [QSYS_SWITCH_PORT_MODE_YEL_RSRVD] = REG_FIELD_ID(QSYS_SWITCH_PORT_MODE, 10, 10, 12, 4),
+ [QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE] = REG_FIELD_ID(QSYS_SWITCH_PORT_MODE, 9, 9, 12, 4),
+ [QSYS_SWITCH_PORT_MODE_TX_PFC_ENA] = REG_FIELD_ID(QSYS_SWITCH_PORT_MODE, 1, 8, 12, 4),
+ [QSYS_SWITCH_PORT_MODE_TX_PFC_MODE] = REG_FIELD_ID(QSYS_SWITCH_PORT_MODE, 0, 0, 12, 4),
+ [SYS_PORT_MODE_DATA_WO_TS] = REG_FIELD_ID(SYS_PORT_MODE, 5, 6, 12, 4),
+ [SYS_PORT_MODE_INCL_INJ_HDR] = REG_FIELD_ID(SYS_PORT_MODE, 3, 4, 12, 4),
+ [SYS_PORT_MODE_INCL_XTR_HDR] = REG_FIELD_ID(SYS_PORT_MODE, 1, 2, 12, 4),
+ [SYS_PORT_MODE_INCL_HDR_ERR] = REG_FIELD_ID(SYS_PORT_MODE, 0, 0, 12, 4),
+ [SYS_PAUSE_CFG_PAUSE_START] = REG_FIELD_ID(SYS_PAUSE_CFG, 10, 18, 12, 4),
+ [SYS_PAUSE_CFG_PAUSE_STOP] = REG_FIELD_ID(SYS_PAUSE_CFG, 1, 9, 12, 4),
+ [SYS_PAUSE_CFG_PAUSE_ENA] = REG_FIELD_ID(SYS_PAUSE_CFG, 0, 1, 12, 4),
};
static const struct ocelot_stat_layout ocelot_stats_layout[] = {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
index 2924cde440aa..85c686c16741 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
@@ -247,12 +247,11 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_out_pci_disable_device;
}
- pci_set_master(pdev);
pcie_print_link_status(pdev);
err = ionic_map_bars(ionic);
if (err)
- goto err_out_pci_clear_master;
+ goto err_out_pci_disable_device;
/* Configure the device */
err = ionic_setup(ionic);
@@ -260,6 +259,7 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
dev_err(dev, "Cannot setup device: %d, aborting\n", err);
goto err_out_unmap_bars;
}
+ pci_set_master(pdev);
err = ionic_identify(ionic);
if (err) {
@@ -350,6 +350,7 @@ err_out_reset:
ionic_reset(ionic);
err_out_teardown:
ionic_dev_teardown(ionic);
+ pci_clear_master(pdev);
/* Don't fail the probe for these errors, keep
* the hw interface around for inspection
*/
@@ -358,8 +359,6 @@ err_out_teardown:
err_out_unmap_bars:
ionic_unmap_bars(ionic);
pci_release_regions(pdev);
-err_out_pci_clear_master:
- pci_clear_master(pdev);
err_out_pci_disable_device:
pci_disable_device(pdev);
err_out_debugfs_del_dev:
@@ -389,9 +388,9 @@ static void ionic_remove(struct pci_dev *pdev)
ionic_port_reset(ionic);
ionic_reset(ionic);
ionic_dev_teardown(ionic);
+ pci_clear_master(pdev);
ionic_unmap_bars(ionic);
pci_release_regions(pdev);
- pci_clear_master(pdev);
pci_disable_device(pdev);
ionic_debugfs_del_dev(ionic);
mutex_destroy(&ionic->dev_cmd_lock);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 525434f10025..d5cba502abca 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -10,8 +10,6 @@
#include "ionic_if.h"
#include "ionic_regs.h"
-#define IONIC_MIN_MTU ETH_MIN_MTU
-#define IONIC_MAX_MTU 9194
#define IONIC_MAX_TX_DESC 8192
#define IONIC_MAX_RX_DESC 16384
#define IONIC_MIN_TXRX_DESC 16
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
index 7e22ba4ed915..acc94b244cf3 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -59,6 +59,8 @@ enum ionic_cmd_opcode {
IONIC_CMD_QOS_CLASS_INIT = 241,
IONIC_CMD_QOS_CLASS_RESET = 242,
IONIC_CMD_QOS_CLASS_UPDATE = 243,
+ IONIC_CMD_QOS_CLEAR_STATS = 244,
+ IONIC_CMD_QOS_RESET = 245,
/* Firmware commands */
IONIC_CMD_FW_DOWNLOAD = 254,
@@ -90,8 +92,8 @@ enum ionic_status_code {
IONIC_RC_DEV_CMD = 18, /* Device cmd attempted on AdminQ */
IONIC_RC_ENOSUPP = 19, /* Operation not supported */
IONIC_RC_ERROR = 29, /* Generic error */
-
IONIC_RC_ERDMA = 30, /* Generic RDMA error */
+ IONIC_RC_EVFID = 31, /* VF ID does not exist */
};
enum ionic_notifyq_opcode {
@@ -103,7 +105,7 @@ enum ionic_notifyq_opcode {
};
/**
- * struct cmd - General admin command format
+ * struct ionic_admin_cmd - General admin command format
* @opcode: Opcode for the command
* @lif_index: LIF index
* @cmd_data: Opcode-specific command bytes
@@ -167,7 +169,7 @@ struct ionic_dev_init_cmd {
};
/**
- * struct init_comp - Device init command completion
+ * struct ionic_dev_init_comp - Device init command completion
* @status: Status of the command (enum ionic_status_code)
*/
struct ionic_dev_init_comp {
@@ -185,7 +187,7 @@ struct ionic_dev_reset_cmd {
};
/**
- * struct reset_comp - Reset command completion
+ * struct ionic_dev_reset_comp - Reset command completion
* @status: Status of the command (enum ionic_status_code)
*/
struct ionic_dev_reset_comp {
@@ -357,12 +359,12 @@ struct ionic_lif_logical_qtype {
* enum ionic_lif_state - LIF state
* @IONIC_LIF_DISABLE: LIF disabled
* @IONIC_LIF_ENABLE: LIF enabled
- * @IONIC_LIF_HANG_RESET: LIF hung, being reset
+ * @IONIC_LIF_QUIESCE: LIF Quiesced
*/
enum ionic_lif_state {
- IONIC_LIF_DISABLE = 0,
+ IONIC_LIF_QUIESCE = 0,
IONIC_LIF_ENABLE = 1,
- IONIC_LIF_HANG_RESET = 2,
+ IONIC_LIF_DISABLE = 2,
};
/**
@@ -371,6 +373,7 @@ enum ionic_lif_state {
* @name: LIF name
* @mtu: MTU
* @mac: Station MAC address
+ * @vlan: Default Vlan ID
* @features: Features (enum ionic_eth_hw_features)
* @queue_count: Queue counts per queue-type
*/
@@ -381,7 +384,7 @@ union ionic_lif_config {
char name[IONIC_IFNAMSIZ];
__le32 mtu;
u8 mac[6];
- u8 rsvd2[2];
+ __le16 vlan;
__le64 features;
__le32 queue_count[IONIC_QTYPE_MAX];
} __packed;
@@ -489,13 +492,13 @@ struct ionic_lif_init_comp {
u8 rsvd2[12];
};
- /**
- * struct ionic_q_identify_cmd - queue identify command
- * @opcode: opcode
- * @lif_type: LIF type (enum ionic_lif_type)
- * @type: Logical queue type (enum ionic_logical_qtype)
- * @ver: Highest queue type version that the driver supports
- */
+/**
+ * struct ionic_q_identify_cmd - queue identify command
+ * @opcode: opcode
+ * @lif_type: LIF type (enum ionic_lif_type)
+ * @type: Logical queue type (enum ionic_logical_qtype)
+ * @ver: Highest queue type version that the driver supports
+ */
struct ionic_q_identify_cmd {
u8 opcode;
u8 rsvd;
@@ -983,6 +986,14 @@ enum ionic_pkt_type {
IONIC_PKT_TYPE_IPV6 = 0x008,
IONIC_PKT_TYPE_IPV6_TCP = 0x018,
IONIC_PKT_TYPE_IPV6_UDP = 0x028,
+ /* below types are only used if encap offloads are enabled on lif */
+ IONIC_PKT_TYPE_ENCAP_NON_IP = 0x40,
+ IONIC_PKT_TYPE_ENCAP_IPV4 = 0x41,
+ IONIC_PKT_TYPE_ENCAP_IPV4_TCP = 0x43,
+ IONIC_PKT_TYPE_ENCAP_IPV4_UDP = 0x45,
+ IONIC_PKT_TYPE_ENCAP_IPV6 = 0x48,
+ IONIC_PKT_TYPE_ENCAP_IPV6_TCP = 0x58,
+ IONIC_PKT_TYPE_ENCAP_IPV6_UDP = 0x68,
};
enum ionic_eth_hw_features {
@@ -1003,6 +1014,9 @@ enum ionic_eth_hw_features {
IONIC_ETH_HW_TSO_IPXIP6 = BIT(14),
IONIC_ETH_HW_TSO_UDP = BIT(15),
IONIC_ETH_HW_TSO_UDP_CSUM = BIT(16),
+ IONIC_ETH_HW_RX_CSUM_GENEVE = BIT(17),
+ IONIC_ETH_HW_TX_CSUM_GENEVE = BIT(18),
+ IONIC_ETH_HW_TSO_GENEVE = BIT(19)
};
/**
@@ -1011,7 +1025,7 @@ enum ionic_eth_hw_features {
* @type: Queue type
* @lif_index: LIF index
* @index: Queue index
- * @oper: Operation (enum q_control_oper)
+ * @oper: Operation (enum ionic_q_control_oper)
*/
struct ionic_q_control_cmd {
u8 opcode;
@@ -1172,7 +1186,7 @@ enum ionic_port_loopback_mode {
* struct ionic_xcvr_status - Transceiver Status information
* @state: Transceiver status (enum ionic_xcvr_state)
* @phy: Physical connection type (enum ionic_phy_type)
- * @pid: Transceiver link mode (enum pid)
+ * @pid: Transceiver link mode (enum ionic_xcvr_pid)
* @sprom: Transceiver sprom contents
*/
struct ionic_xcvr_status {
@@ -1186,7 +1200,7 @@ struct ionic_xcvr_status {
* union ionic_port_config - Port configuration
* @speed: port speed (in Mbps)
* @mtu: mtu
- * @state: port admin state (enum port_admin_state)
+ * @state: port admin state (enum ionic_port_admin_state)
* @an_enable: autoneg enable
* @fec_type: fec type (enum ionic_port_fec_type)
* @pause_type: pause type (enum ionic_port_pause_type)
@@ -1874,12 +1888,14 @@ struct ionic_qos_identify_comp {
};
#define IONIC_QOS_TC_MAX 8
+#define IONIC_QOS_ALL_TC 0xFF
/* Capri max supported, should be renamed. */
#define IONIC_QOS_CLASS_MAX 7
#define IONIC_QOS_PCP_MAX 8
#define IONIC_QOS_CLASS_NAME_SZ 32
#define IONIC_QOS_DSCP_MAX 64
#define IONIC_QOS_ALL_PCP 0xFF
+#define IONIC_DSCP_BLOCK_SIZE 8
/**
* enum ionic_qos_class
@@ -1923,6 +1939,7 @@ enum ionic_qos_sched_type {
* IONIC_QOS_CONFIG_F_NO_DROP drop/nodrop
* IONIC_QOS_CONFIG_F_RW_DOT1Q_PCP enable dot1q pcp rewrite
* IONIC_QOS_CONFIG_F_RW_IP_DSCP enable ip dscp rewrite
+ * IONIC_QOS_CONFIG_F_NON_DISRUPTIVE Non-disruptive TC update
* @sched_type: QoS class scheduling type (enum ionic_qos_sched_type)
* @class_type: QoS class type (enum ionic_qos_class_type)
* @pause_type: QoS pause type (enum ionic_qos_pause_type)
@@ -1944,6 +1961,8 @@ union ionic_qos_config {
/* Used to rewrite PCP or DSCP value. */
#define IONIC_QOS_CONFIG_F_RW_DOT1Q_PCP BIT(2)
#define IONIC_QOS_CONFIG_F_RW_IP_DSCP BIT(3)
+/* Non-disruptive TC update */
+#define IONIC_QOS_CONFIG_F_NON_DISRUPTIVE BIT(4)
u8 flags;
u8 sched_type;
u8 class_type;
@@ -2019,6 +2038,16 @@ struct ionic_qos_reset_cmd {
u8 rsvd[62];
};
+/**
+ * struct ionic_qos_clear_port_stats_cmd - Qos config reset command
+ * @opcode: Opcode
+ */
+struct ionic_qos_clear_stats_cmd {
+ u8 opcode;
+ u8 group_bitmap;
+ u8 rsvd[62];
+};
+
typedef struct ionic_admin_comp ionic_qos_reset_comp;
/**
@@ -2164,7 +2193,7 @@ struct ionic_notifyq_event {
* struct ionic_link_change_event - Link change event notification
* @eid: event number
* @ecode: event code = IONIC_EVENT_LINK_CHANGE
- * @link_status: link up or down, with error bits (enum port_status)
+ * @link_status: link up/down, with error bits (enum ionic_port_status)
* @link_speed: speed of the network link
*
* Sent when the network link state changes between UP and DOWN
@@ -2377,6 +2406,16 @@ enum ionic_pb_buffer_drop_stats {
IONIC_BUFFER_DROP_MAX,
};
+enum ionic_oflow_drop_stats {
+ IONIC_OFLOW_OCCUPANCY_DROP,
+ IONIC_OFLOW_EMERGENCY_STOP_DROP,
+ IONIC_OFLOW_WRITE_BUFFER_ACK_FILL_UP_DROP,
+ IONIC_OFLOW_WRITE_BUFFER_ACK_FULL_DROP,
+ IONIC_OFLOW_WRITE_BUFFER_FULL_DROP,
+ IONIC_OFLOW_CONTROL_FIFO_FULL_DROP,
+ IONIC_OFLOW_DROP_MAX,
+};
+
/**
* struct port_pb_stats - packet buffers system stats
* uses ionic_pb_buffer_drop_stats for drop_counts[]
@@ -2390,12 +2429,20 @@ struct ionic_port_pb_stats {
__le64 input_queue_buffer_occupancy[IONIC_QOS_TC_MAX];
__le64 input_queue_port_monitor[IONIC_QOS_TC_MAX];
__le64 output_queue_port_monitor[IONIC_QOS_TC_MAX];
+ __le64 oflow_drop_counts[IONIC_OFLOW_DROP_MAX];
+ __le64 input_queue_good_pkts_in[IONIC_QOS_TC_MAX];
+ __le64 input_queue_good_pkts_out[IONIC_QOS_TC_MAX];
+ __le64 input_queue_err_pkts_in[IONIC_QOS_TC_MAX];
+ __le64 input_queue_fifo_depth[IONIC_QOS_TC_MAX];
+ __le64 input_queue_max_fifo_depth[IONIC_QOS_TC_MAX];
+ __le64 input_queue_peak_occupancy[IONIC_QOS_TC_MAX];
+ __le64 output_queue_buffer_occupancy[IONIC_QOS_TC_MAX];
};
/**
* struct ionic_port_identity - port identity structure
* @version: identity structure version
- * @type: type of port (enum port_type)
+ * @type: type of port (enum ionic_port_type)
* @num_lanes: number of lanes for the port
* @autoneg: autoneg supported
* @min_frame_size: minimum frame size supported
@@ -2637,6 +2684,7 @@ union ionic_dev_cmd {
struct ionic_qos_identify_cmd qos_identify;
struct ionic_qos_init_cmd qos_init;
struct ionic_qos_reset_cmd qos_reset;
+ struct ionic_qos_clear_stats_cmd qos_clear_stats;
struct ionic_q_identify_cmd q_identify;
struct ionic_q_init_cmd q_init;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index f49486b6d04d..3aa64030f4e2 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -5,6 +5,7 @@
#include <linux/dynamic_debug.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
#include <linux/rtnetlink.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
@@ -718,7 +719,7 @@ static bool ionic_notifyq_service(struct ionic_cq *cq,
eid = le64_to_cpu(comp->event.eid);
/* Have we run out of new completions to process? */
- if (eid <= lif->last_eid)
+ if ((s64)(eid - lif->last_eid) <= 0)
return false;
lif->last_eid = eid;
@@ -2022,16 +2023,22 @@ reset_out:
static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index)
{
struct device *dev = ionic->dev;
+ union ionic_lif_identity *lid;
struct net_device *netdev;
struct ionic_lif *lif;
int tbl_sz;
int err;
+ lid = kzalloc(sizeof(*lid), GFP_KERNEL);
+ if (!lid)
+ return ERR_PTR(-ENOMEM);
+
netdev = alloc_etherdev_mqs(sizeof(*lif),
ionic->ntxqs_per_lif, ionic->ntxqs_per_lif);
if (!netdev) {
dev_err(dev, "Cannot allocate netdev, aborting\n");
- return ERR_PTR(-ENOMEM);
+ err = -ENOMEM;
+ goto err_out_free_lid;
}
SET_NETDEV_DEV(netdev, dev);
@@ -2045,8 +2052,12 @@ static struct ionic_lif *ionic_lif_alloc(struct ionic *ionic, unsigned int index
netdev->watchdog_timeo = 2 * HZ;
netif_carrier_off(netdev);
- netdev->min_mtu = IONIC_MIN_MTU;
- netdev->max_mtu = IONIC_MAX_MTU;
+ lif->identity = lid;
+ lif->lif_type = IONIC_LIF_TYPE_CLASSIC;
+ ionic_lif_identify(ionic, lif->lif_type, lif->identity);
+ lif->netdev->min_mtu = le32_to_cpu(lif->identity->eth.min_frame_size);
+ lif->netdev->max_mtu =
+ le32_to_cpu(lif->identity->eth.max_frame_size) - ETH_HLEN - VLAN_HLEN;
lif->neqs = ionic->neqs_per_lif;
lif->nxqs = ionic->ntxqs_per_lif;
@@ -2113,6 +2124,8 @@ err_out_free_lif_info:
err_out_free_netdev:
free_netdev(lif->netdev);
lif = NULL;
+err_out_free_lid:
+ kfree(lid);
return ERR_PTR(err);
}
@@ -2132,7 +2145,6 @@ int ionic_lifs_alloc(struct ionic *ionic)
return -ENOMEM;
}
- lif->lif_type = IONIC_LIF_TYPE_CLASSIC;
ionic_lif_queue_identify(lif);
return 0;
@@ -2243,6 +2255,7 @@ static void ionic_lif_free(struct ionic_lif *lif)
ionic_lif_reset(lif);
/* free lif info */
+ kfree(lif->identity);
dma_free_coherent(dev, lif->info_sz, lif->info, lif->info_pa);
lif->info = NULL;
lif->info_pa = 0;
@@ -2620,6 +2633,7 @@ int ionic_lifs_register(struct ionic *ionic)
return err;
}
ionic->master_lif->registered = true;
+ ionic_lif_set_netdev_info(ionic->master_lif);
return 0;
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index ed126dd74e01..f1e7d3ef1c58 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -184,6 +184,7 @@ struct ionic_lif {
u16 lif_type;
unsigned int nucast;
+ union ionic_lif_identity *identity;
struct ionic_lif_info *info;
dma_addr_t info_pa;
u32 info_sz;
@@ -235,19 +236,6 @@ static inline u32 ionic_coal_usec_to_hw(struct ionic *ionic, u32 usecs)
return (usecs * mult) / div;
}
-static inline u32 ionic_coal_hw_to_usec(struct ionic *ionic, u32 units)
-{
- u32 mult = le32_to_cpu(ionic->ident.dev.intr_coal_mult);
- u32 div = le32_to_cpu(ionic->ident.dev.intr_coal_div);
-
- /* Div-by-zero should never be an issue, but check anyway */
- if (!div || !mult)
- return 0;
-
- /* Convert from device units to usec */
- return (units * div) / mult;
-}
-
typedef void (*ionic_reset_cb)(struct ionic_lif *lif, void *arg);
void ionic_link_status_check_request(struct ionic_lif *lif);
diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile
index 4176bbf2a22b..f947b105cf14 100644
--- a/drivers/net/ethernet/qlogic/qed/Makefile
+++ b/drivers/net/ethernet/qlogic/qed/Makefile
@@ -3,12 +3,35 @@
obj-$(CONFIG_QED) := qed.o
-qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \
- qed_int.o qed_main.o qed_mcp.o qed_sp_commands.o qed_spq.o qed_l2.o \
- qed_selftest.o qed_dcbx.o qed_debug.o qed_ptp.o qed_mng_tlv.o
-qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o
-qed-$(CONFIG_QED_LL2) += qed_ll2.o
-qed-$(CONFIG_QED_RDMA) += qed_roce.o qed_rdma.o qed_iwarp.o
-qed-$(CONFIG_QED_ISCSI) += qed_iscsi.o
+qed-y := \
+ qed_chain.o \
+ qed_cxt.o \
+ qed_dcbx.o \
+ qed_debug.o \
+ qed_dev.o \
+ qed_hw.o \
+ qed_init_fw_funcs.o \
+ qed_init_ops.o \
+ qed_int.o \
+ qed_l2.o \
+ qed_main.o \
+ qed_mcp.o \
+ qed_mng_tlv.o \
+ qed_ptp.o \
+ qed_selftest.o \
+ qed_sp_commands.o \
+ qed_spq.o
+
qed-$(CONFIG_QED_FCOE) += qed_fcoe.o
+qed-$(CONFIG_QED_ISCSI) += qed_iscsi.o
+qed-$(CONFIG_QED_LL2) += qed_ll2.o
qed-$(CONFIG_QED_OOO) += qed_ooo.o
+
+qed-$(CONFIG_QED_RDMA) += \
+ qed_iwarp.o \
+ qed_rdma.o \
+ qed_roce.o
+
+qed-$(CONFIG_QED_SRIOV) += \
+ qed_sriov.o \
+ qed_vf.o
diff --git a/drivers/net/ethernet/qlogic/qed/qed_chain.c b/drivers/net/ethernet/qlogic/qed/qed_chain.c
new file mode 100644
index 000000000000..f8efd36d66e0
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_chain.c
@@ -0,0 +1,369 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+/* Copyright (c) 2020 Marvell International Ltd. */
+
+#include <linux/dma-mapping.h>
+#include <linux/qed/qed_chain.h>
+#include <linux/vmalloc.h>
+
+#include "qed_dev_api.h"
+
+static void qed_chain_init(struct qed_chain *chain,
+ const struct qed_chain_init_params *params,
+ u32 page_cnt)
+{
+ memset(chain, 0, sizeof(*chain));
+
+ chain->elem_size = params->elem_size;
+ chain->intended_use = params->intended_use;
+ chain->mode = params->mode;
+ chain->cnt_type = params->cnt_type;
+
+ chain->elem_per_page = ELEMS_PER_PAGE(params->elem_size,
+ params->page_size);
+ chain->usable_per_page = USABLE_ELEMS_PER_PAGE(params->elem_size,
+ params->page_size,
+ params->mode);
+ chain->elem_unusable = UNUSABLE_ELEMS_PER_PAGE(params->elem_size,
+ params->mode);
+
+ chain->elem_per_page_mask = chain->elem_per_page - 1;
+ chain->next_page_mask = chain->usable_per_page &
+ chain->elem_per_page_mask;
+
+ chain->page_size = params->page_size;
+ chain->page_cnt = page_cnt;
+ chain->capacity = chain->usable_per_page * page_cnt;
+ chain->size = chain->elem_per_page * page_cnt;
+
+ if (params->ext_pbl_virt) {
+ chain->pbl_sp.table_virt = params->ext_pbl_virt;
+ chain->pbl_sp.table_phys = params->ext_pbl_phys;
+
+ chain->b_external_pbl = true;
+ }
+}
+
+static void qed_chain_init_next_ptr_elem(const struct qed_chain *chain,
+ void *virt_curr, void *virt_next,
+ dma_addr_t phys_next)
+{
+ struct qed_chain_next *next;
+ u32 size;
+
+ size = chain->elem_size * chain->usable_per_page;
+ next = virt_curr + size;
+
+ DMA_REGPAIR_LE(next->next_phys, phys_next);
+ next->next_virt = virt_next;
+}
+
+static void qed_chain_init_mem(struct qed_chain *chain, void *virt_addr,
+ dma_addr_t phys_addr)
+{
+ chain->p_virt_addr = virt_addr;
+ chain->p_phys_addr = phys_addr;
+}
+
+static void qed_chain_free_next_ptr(struct qed_dev *cdev,
+ struct qed_chain *chain)
+{
+ struct device *dev = &cdev->pdev->dev;
+ struct qed_chain_next *next;
+ dma_addr_t phys, phys_next;
+ void *virt, *virt_next;
+ u32 size, i;
+
+ size = chain->elem_size * chain->usable_per_page;
+ virt = chain->p_virt_addr;
+ phys = chain->p_phys_addr;
+
+ for (i = 0; i < chain->page_cnt; i++) {
+ if (!virt)
+ break;
+
+ next = virt + size;
+ virt_next = next->next_virt;
+ phys_next = HILO_DMA_REGPAIR(next->next_phys);
+
+ dma_free_coherent(dev, chain->page_size, virt, phys);
+
+ virt = virt_next;
+ phys = phys_next;
+ }
+}
+
+static void qed_chain_free_single(struct qed_dev *cdev,
+ struct qed_chain *chain)
+{
+ if (!chain->p_virt_addr)
+ return;
+
+ dma_free_coherent(&cdev->pdev->dev, chain->page_size,
+ chain->p_virt_addr, chain->p_phys_addr);
+}
+
+static void qed_chain_free_pbl(struct qed_dev *cdev, struct qed_chain *chain)
+{
+ struct device *dev = &cdev->pdev->dev;
+ struct addr_tbl_entry *entry;
+ u32 i;
+
+ if (!chain->pbl.pp_addr_tbl)
+ return;
+
+ for (i = 0; i < chain->page_cnt; i++) {
+ entry = chain->pbl.pp_addr_tbl + i;
+ if (!entry->virt_addr)
+ break;
+
+ dma_free_coherent(dev, chain->page_size, entry->virt_addr,
+ entry->dma_map);
+ }
+
+ if (!chain->b_external_pbl)
+ dma_free_coherent(dev, chain->pbl_sp.table_size,
+ chain->pbl_sp.table_virt,
+ chain->pbl_sp.table_phys);
+
+ vfree(chain->pbl.pp_addr_tbl);
+ chain->pbl.pp_addr_tbl = NULL;
+}
+
+/**
+ * qed_chain_free() - Free chain DMA memory.
+ *
+ * @cdev: Main device structure.
+ * @chain: Chain to free.
+ */
+void qed_chain_free(struct qed_dev *cdev, struct qed_chain *chain)
+{
+ switch (chain->mode) {
+ case QED_CHAIN_MODE_NEXT_PTR:
+ qed_chain_free_next_ptr(cdev, chain);
+ break;
+ case QED_CHAIN_MODE_SINGLE:
+ qed_chain_free_single(cdev, chain);
+ break;
+ case QED_CHAIN_MODE_PBL:
+ qed_chain_free_pbl(cdev, chain);
+ break;
+ default:
+ return;
+ }
+
+ qed_chain_init_mem(chain, NULL, 0);
+}
+
+static int
+qed_chain_alloc_sanity_check(struct qed_dev *cdev,
+ const struct qed_chain_init_params *params,
+ u32 page_cnt)
+{
+ u64 chain_size;
+
+ chain_size = ELEMS_PER_PAGE(params->elem_size, params->page_size);
+ chain_size *= page_cnt;
+
+ if (!chain_size)
+ return -EINVAL;
+
+ /* The actual chain size can be larger than the maximal possible value
+ * after rounding up the requested elements number to pages, and after
+ * taking into account the unusuable elements (next-ptr elements).
+ * The size of a "u16" chain can be (U16_MAX + 1) since the chain
+ * size/capacity fields are of u32 type.
+ */
+ switch (params->cnt_type) {
+ case QED_CHAIN_CNT_TYPE_U16:
+ if (chain_size > U16_MAX + 1)
+ break;
+
+ return 0;
+ case QED_CHAIN_CNT_TYPE_U32:
+ if (chain_size > U32_MAX)
+ break;
+
+ return 0;
+ default:
+ return -EINVAL;
+ }
+
+ DP_NOTICE(cdev,
+ "The actual chain size (0x%llx) is larger than the maximal possible value\n",
+ chain_size);
+
+ return -EINVAL;
+}
+
+static int qed_chain_alloc_next_ptr(struct qed_dev *cdev,
+ struct qed_chain *chain)
+{
+ struct device *dev = &cdev->pdev->dev;
+ void *virt, *virt_prev = NULL;
+ dma_addr_t phys;
+ u32 i;
+
+ for (i = 0; i < chain->page_cnt; i++) {
+ virt = dma_alloc_coherent(dev, chain->page_size, &phys,
+ GFP_KERNEL);
+ if (!virt)
+ return -ENOMEM;
+
+ if (i == 0) {
+ qed_chain_init_mem(chain, virt, phys);
+ qed_chain_reset(chain);
+ } else {
+ qed_chain_init_next_ptr_elem(chain, virt_prev, virt,
+ phys);
+ }
+
+ virt_prev = virt;
+ }
+
+ /* Last page's next element should point to the beginning of the
+ * chain.
+ */
+ qed_chain_init_next_ptr_elem(chain, virt_prev, chain->p_virt_addr,
+ chain->p_phys_addr);
+
+ return 0;
+}
+
+static int qed_chain_alloc_single(struct qed_dev *cdev,
+ struct qed_chain *chain)
+{
+ dma_addr_t phys;
+ void *virt;
+
+ virt = dma_alloc_coherent(&cdev->pdev->dev, chain->page_size,
+ &phys, GFP_KERNEL);
+ if (!virt)
+ return -ENOMEM;
+
+ qed_chain_init_mem(chain, virt, phys);
+ qed_chain_reset(chain);
+
+ return 0;
+}
+
+static int qed_chain_alloc_pbl(struct qed_dev *cdev, struct qed_chain *chain)
+{
+ struct device *dev = &cdev->pdev->dev;
+ struct addr_tbl_entry *addr_tbl;
+ dma_addr_t phys, pbl_phys;
+ __le64 *pbl_virt;
+ u32 page_cnt, i;
+ size_t size;
+ void *virt;
+
+ page_cnt = chain->page_cnt;
+
+ size = array_size(page_cnt, sizeof(*addr_tbl));
+ if (unlikely(size == SIZE_MAX))
+ return -EOVERFLOW;
+
+ addr_tbl = vzalloc(size);
+ if (!addr_tbl)
+ return -ENOMEM;
+
+ chain->pbl.pp_addr_tbl = addr_tbl;
+
+ if (chain->b_external_pbl)
+ goto alloc_pages;
+
+ size = array_size(page_cnt, sizeof(*pbl_virt));
+ if (unlikely(size == SIZE_MAX))
+ return -EOVERFLOW;
+
+ pbl_virt = dma_alloc_coherent(dev, size, &pbl_phys, GFP_KERNEL);
+ if (!pbl_virt)
+ return -ENOMEM;
+
+ chain->pbl_sp.table_virt = pbl_virt;
+ chain->pbl_sp.table_phys = pbl_phys;
+ chain->pbl_sp.table_size = size;
+
+alloc_pages:
+ for (i = 0; i < page_cnt; i++) {
+ virt = dma_alloc_coherent(dev, chain->page_size, &phys,
+ GFP_KERNEL);
+ if (!virt)
+ return -ENOMEM;
+
+ if (i == 0) {
+ qed_chain_init_mem(chain, virt, phys);
+ qed_chain_reset(chain);
+ }
+
+ /* Fill the PBL table with the physical address of the page */
+ pbl_virt[i] = cpu_to_le64(phys);
+
+ /* Keep the virtual address of the page */
+ addr_tbl[i].virt_addr = virt;
+ addr_tbl[i].dma_map = phys;
+ }
+
+ return 0;
+}
+
+/**
+ * qed_chain_alloc() - Allocate and initialize a chain.
+ *
+ * @cdev: Main device structure.
+ * @chain: Chain to be processed.
+ * @params: Chain initialization parameters.
+ *
+ * Return: 0 on success, negative errno otherwise.
+ */
+int qed_chain_alloc(struct qed_dev *cdev, struct qed_chain *chain,
+ struct qed_chain_init_params *params)
+{
+ u32 page_cnt;
+ int rc;
+
+ if (!params->page_size)
+ params->page_size = QED_CHAIN_PAGE_SIZE;
+
+ if (params->mode == QED_CHAIN_MODE_SINGLE)
+ page_cnt = 1;
+ else
+ page_cnt = QED_CHAIN_PAGE_CNT(params->num_elems,
+ params->elem_size,
+ params->page_size,
+ params->mode);
+
+ rc = qed_chain_alloc_sanity_check(cdev, params, page_cnt);
+ if (rc) {
+ DP_NOTICE(cdev,
+ "Cannot allocate a chain with the given arguments:\n");
+ DP_NOTICE(cdev,
+ "[use_mode %d, mode %d, cnt_type %d, num_elems %d, elem_size %zu, page_size %u]\n",
+ params->intended_use, params->mode, params->cnt_type,
+ params->num_elems, params->elem_size,
+ params->page_size);
+ return rc;
+ }
+
+ qed_chain_init(chain, params, page_cnt);
+
+ switch (params->mode) {
+ case QED_CHAIN_MODE_NEXT_PTR:
+ rc = qed_chain_alloc_next_ptr(cdev, chain);
+ break;
+ case QED_CHAIN_MODE_SINGLE:
+ rc = qed_chain_alloc_single(cdev, chain);
+ break;
+ case QED_CHAIN_MODE_PBL:
+ rc = qed_chain_alloc_pbl(cdev, chain);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (!rc)
+ return 0;
+
+ qed_chain_free(cdev, chain);
+
+ return rc;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index 5362dc18b6c2..6c221e9c8799 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -2335,6 +2335,11 @@ qed_cxt_free_ilt_range(struct qed_hwfn *p_hwfn,
elem_size = SRQ_CXT_SIZE;
p_blk = &p_cli->pf_blks[SRQ_BLK];
break;
+ case QED_ELEM_XRC_SRQ:
+ p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_TSDM];
+ elem_size = XRC_SRQ_CXT_SIZE;
+ p_blk = &p_cli->pf_blks[SRQ_BLK];
+ break;
case QED_ELEM_TASK:
p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_CDUT];
elem_size = TYPE1_TASK_CXT_SIZE(p_hwfn);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 6516a1f921da..d9c7a1a6be94 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -4716,279 +4716,6 @@ void qed_hw_remove(struct qed_dev *cdev)
qed_mcp_nvm_info_free(p_hwfn);
}
-static void qed_chain_free_next_ptr(struct qed_dev *cdev,
- struct qed_chain *p_chain)
-{
- void *p_virt = p_chain->p_virt_addr, *p_virt_next = NULL;
- dma_addr_t p_phys = p_chain->p_phys_addr, p_phys_next = 0;
- struct qed_chain_next *p_next;
- u32 size, i;
-
- if (!p_virt)
- return;
-
- size = p_chain->elem_size * p_chain->usable_per_page;
-
- for (i = 0; i < p_chain->page_cnt; i++) {
- if (!p_virt)
- break;
-
- p_next = (struct qed_chain_next *)((u8 *)p_virt + size);
- p_virt_next = p_next->next_virt;
- p_phys_next = HILO_DMA_REGPAIR(p_next->next_phys);
-
- dma_free_coherent(&cdev->pdev->dev,
- QED_CHAIN_PAGE_SIZE, p_virt, p_phys);
-
- p_virt = p_virt_next;
- p_phys = p_phys_next;
- }
-}
-
-static void qed_chain_free_single(struct qed_dev *cdev,
- struct qed_chain *p_chain)
-{
- if (!p_chain->p_virt_addr)
- return;
-
- dma_free_coherent(&cdev->pdev->dev,
- QED_CHAIN_PAGE_SIZE,
- p_chain->p_virt_addr, p_chain->p_phys_addr);
-}
-
-static void qed_chain_free_pbl(struct qed_dev *cdev, struct qed_chain *p_chain)
-{
- struct addr_tbl_entry *pp_addr_tbl = p_chain->pbl.pp_addr_tbl;
- u32 page_cnt = p_chain->page_cnt, i, pbl_size;
-
- if (!pp_addr_tbl)
- return;
-
- for (i = 0; i < page_cnt; i++) {
- if (!pp_addr_tbl[i].virt_addr || !pp_addr_tbl[i].dma_map)
- break;
-
- dma_free_coherent(&cdev->pdev->dev,
- QED_CHAIN_PAGE_SIZE,
- pp_addr_tbl[i].virt_addr,
- pp_addr_tbl[i].dma_map);
- }
-
- pbl_size = page_cnt * QED_CHAIN_PBL_ENTRY_SIZE;
-
- if (!p_chain->b_external_pbl)
- dma_free_coherent(&cdev->pdev->dev,
- pbl_size,
- p_chain->pbl_sp.p_virt_table,
- p_chain->pbl_sp.p_phys_table);
-
- vfree(p_chain->pbl.pp_addr_tbl);
- p_chain->pbl.pp_addr_tbl = NULL;
-}
-
-void qed_chain_free(struct qed_dev *cdev, struct qed_chain *p_chain)
-{
- switch (p_chain->mode) {
- case QED_CHAIN_MODE_NEXT_PTR:
- qed_chain_free_next_ptr(cdev, p_chain);
- break;
- case QED_CHAIN_MODE_SINGLE:
- qed_chain_free_single(cdev, p_chain);
- break;
- case QED_CHAIN_MODE_PBL:
- qed_chain_free_pbl(cdev, p_chain);
- break;
- }
-}
-
-static int
-qed_chain_alloc_sanity_check(struct qed_dev *cdev,
- enum qed_chain_cnt_type cnt_type,
- size_t elem_size, u32 page_cnt)
-{
- u64 chain_size = ELEMS_PER_PAGE(elem_size) * page_cnt;
-
- /* The actual chain size can be larger than the maximal possible value
- * after rounding up the requested elements number to pages, and after
- * taking into acount the unusuable elements (next-ptr elements).
- * The size of a "u16" chain can be (U16_MAX + 1) since the chain
- * size/capacity fields are of a u32 type.
- */
- if ((cnt_type == QED_CHAIN_CNT_TYPE_U16 &&
- chain_size > ((u32)U16_MAX + 1)) ||
- (cnt_type == QED_CHAIN_CNT_TYPE_U32 && chain_size > U32_MAX)) {
- DP_NOTICE(cdev,
- "The actual chain size (0x%llx) is larger than the maximal possible value\n",
- chain_size);
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int
-qed_chain_alloc_next_ptr(struct qed_dev *cdev, struct qed_chain *p_chain)
-{
- void *p_virt = NULL, *p_virt_prev = NULL;
- dma_addr_t p_phys = 0;
- u32 i;
-
- for (i = 0; i < p_chain->page_cnt; i++) {
- p_virt = dma_alloc_coherent(&cdev->pdev->dev,
- QED_CHAIN_PAGE_SIZE,
- &p_phys, GFP_KERNEL);
- if (!p_virt)
- return -ENOMEM;
-
- if (i == 0) {
- qed_chain_init_mem(p_chain, p_virt, p_phys);
- qed_chain_reset(p_chain);
- } else {
- qed_chain_init_next_ptr_elem(p_chain, p_virt_prev,
- p_virt, p_phys);
- }
-
- p_virt_prev = p_virt;
- }
- /* Last page's next element should point to the beginning of the
- * chain.
- */
- qed_chain_init_next_ptr_elem(p_chain, p_virt_prev,
- p_chain->p_virt_addr,
- p_chain->p_phys_addr);
-
- return 0;
-}
-
-static int
-qed_chain_alloc_single(struct qed_dev *cdev, struct qed_chain *p_chain)
-{
- dma_addr_t p_phys = 0;
- void *p_virt = NULL;
-
- p_virt = dma_alloc_coherent(&cdev->pdev->dev,
- QED_CHAIN_PAGE_SIZE, &p_phys, GFP_KERNEL);
- if (!p_virt)
- return -ENOMEM;
-
- qed_chain_init_mem(p_chain, p_virt, p_phys);
- qed_chain_reset(p_chain);
-
- return 0;
-}
-
-static int
-qed_chain_alloc_pbl(struct qed_dev *cdev,
- struct qed_chain *p_chain,
- struct qed_chain_ext_pbl *ext_pbl)
-{
- u32 page_cnt = p_chain->page_cnt, size, i;
- dma_addr_t p_phys = 0, p_pbl_phys = 0;
- struct addr_tbl_entry *pp_addr_tbl;
- u8 *p_pbl_virt = NULL;
- void *p_virt = NULL;
-
- size = page_cnt * sizeof(*pp_addr_tbl);
- pp_addr_tbl = vzalloc(size);
- if (!pp_addr_tbl)
- return -ENOMEM;
-
- /* The allocation of the PBL table is done with its full size, since it
- * is expected to be successive.
- * qed_chain_init_pbl_mem() is called even in a case of an allocation
- * failure, since tbl was previously allocated, and it
- * should be saved to allow its freeing during the error flow.
- */
- size = page_cnt * QED_CHAIN_PBL_ENTRY_SIZE;
-
- if (!ext_pbl) {
- p_pbl_virt = dma_alloc_coherent(&cdev->pdev->dev,
- size, &p_pbl_phys, GFP_KERNEL);
- } else {
- p_pbl_virt = ext_pbl->p_pbl_virt;
- p_pbl_phys = ext_pbl->p_pbl_phys;
- p_chain->b_external_pbl = true;
- }
-
- qed_chain_init_pbl_mem(p_chain, p_pbl_virt, p_pbl_phys, pp_addr_tbl);
- if (!p_pbl_virt)
- return -ENOMEM;
-
- for (i = 0; i < page_cnt; i++) {
- p_virt = dma_alloc_coherent(&cdev->pdev->dev,
- QED_CHAIN_PAGE_SIZE,
- &p_phys, GFP_KERNEL);
- if (!p_virt)
- return -ENOMEM;
-
- if (i == 0) {
- qed_chain_init_mem(p_chain, p_virt, p_phys);
- qed_chain_reset(p_chain);
- }
-
- /* Fill the PBL table with the physical address of the page */
- *(dma_addr_t *)p_pbl_virt = p_phys;
- /* Keep the virtual address of the page */
- p_chain->pbl.pp_addr_tbl[i].virt_addr = p_virt;
- p_chain->pbl.pp_addr_tbl[i].dma_map = p_phys;
-
- p_pbl_virt += QED_CHAIN_PBL_ENTRY_SIZE;
- }
-
- return 0;
-}
-
-int qed_chain_alloc(struct qed_dev *cdev,
- enum qed_chain_use_mode intended_use,
- enum qed_chain_mode mode,
- enum qed_chain_cnt_type cnt_type,
- u32 num_elems,
- size_t elem_size,
- struct qed_chain *p_chain,
- struct qed_chain_ext_pbl *ext_pbl)
-{
- u32 page_cnt;
- int rc = 0;
-
- if (mode == QED_CHAIN_MODE_SINGLE)
- page_cnt = 1;
- else
- page_cnt = QED_CHAIN_PAGE_CNT(num_elems, elem_size, mode);
-
- rc = qed_chain_alloc_sanity_check(cdev, cnt_type, elem_size, page_cnt);
- if (rc) {
- DP_NOTICE(cdev,
- "Cannot allocate a chain with the given arguments:\n");
- DP_NOTICE(cdev,
- "[use_mode %d, mode %d, cnt_type %d, num_elems %d, elem_size %zu]\n",
- intended_use, mode, cnt_type, num_elems, elem_size);
- return rc;
- }
-
- qed_chain_init_params(p_chain, page_cnt, (u8) elem_size, intended_use,
- mode, cnt_type);
-
- switch (mode) {
- case QED_CHAIN_MODE_NEXT_PTR:
- rc = qed_chain_alloc_next_ptr(cdev, p_chain);
- break;
- case QED_CHAIN_MODE_SINGLE:
- rc = qed_chain_alloc_single(cdev, p_chain);
- break;
- case QED_CHAIN_MODE_PBL:
- rc = qed_chain_alloc_pbl(cdev, p_chain, ext_pbl);
- break;
- }
- if (rc)
- goto nomem;
-
- return 0;
-
-nomem:
- qed_chain_free(cdev, p_chain);
- return rc;
-}
-
int qed_fw_l2_queue(struct qed_hwfn *p_hwfn, u16 src_id, u16 *dst_id)
{
if (src_id >= RESC_NUM(p_hwfn, QED_L2_QUEUE)) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index 395d4932c262..d3c1f3879be8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -254,35 +254,9 @@ int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
dma_addr_t dest_addr,
u32 size_in_dwords, struct qed_dmae_params *p_params);
-/**
- * @brief qed_chain_alloc - Allocate and initialize a chain
- *
- * @param p_hwfn
- * @param intended_use
- * @param mode
- * @param num_elems
- * @param elem_size
- * @param p_chain
- * @param ext_pbl - a possible external PBL
- *
- * @return int
- */
-int
-qed_chain_alloc(struct qed_dev *cdev,
- enum qed_chain_use_mode intended_use,
- enum qed_chain_mode mode,
- enum qed_chain_cnt_type cnt_type,
- u32 num_elems,
- size_t elem_size,
- struct qed_chain *p_chain, struct qed_chain_ext_pbl *ext_pbl);
-
-/**
- * @brief qed_chain_free - Free chain DMA memory
- *
- * @param p_hwfn
- * @param p_chain
- */
-void qed_chain_free(struct qed_dev *cdev, struct qed_chain *p_chain);
+int qed_chain_alloc(struct qed_dev *cdev, struct qed_chain *chain,
+ struct qed_chain_init_params *params);
+void qed_chain_free(struct qed_dev *cdev, struct qed_chain *chain);
/**
* @@brief qed_fw_l2_queue - Get absolute L2 queue ID
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 1af3f65ab862..559df9f4d656 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -361,7 +361,7 @@ struct core_tx_update_ramrod_data {
u8 update_qm_pq_id_flg;
u8 reserved0;
__le16 qm_pq_id;
- __le32 reserved1[1];
+ __le32 reserved1;
};
/* Enum flag for what type of dcb data to update */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index 25d2c882d7ac..4eae4ee3538f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -684,9 +684,13 @@ nomem:
static int qed_iscsi_allocate_connection(struct qed_hwfn *p_hwfn,
struct qed_iscsi_conn **p_out_conn)
{
- u16 uhq_num_elements = 0, xhq_num_elements = 0, r2tq_num_elements = 0;
struct scsi_terminate_extra_params *p_q_cnts = NULL;
struct qed_iscsi_pf_params *p_params = NULL;
+ struct qed_chain_init_params params = {
+ .mode = QED_CHAIN_MODE_PBL,
+ .intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U16,
+ };
struct tcp_upload_params *p_tcp = NULL;
struct qed_iscsi_conn *p_conn = NULL;
int rc = 0;
@@ -727,34 +731,25 @@ static int qed_iscsi_allocate_connection(struct qed_hwfn *p_hwfn,
goto nomem_upload_param;
p_conn->tcp_upload_params_virt_addr = p_tcp;
- r2tq_num_elements = p_params->num_r2tq_pages_in_ring *
- QED_CHAIN_PAGE_SIZE / 0x80;
- rc = qed_chain_alloc(p_hwfn->cdev,
- QED_CHAIN_USE_TO_CONSUME_PRODUCE,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U16,
- r2tq_num_elements, 0x80, &p_conn->r2tq, NULL);
+ params.num_elems = p_params->num_r2tq_pages_in_ring *
+ QED_CHAIN_PAGE_SIZE / sizeof(struct iscsi_wqe);
+ params.elem_size = sizeof(struct iscsi_wqe);
+
+ rc = qed_chain_alloc(p_hwfn->cdev, &p_conn->r2tq, &params);
if (rc)
goto nomem_r2tq;
- uhq_num_elements = p_params->num_uhq_pages_in_ring *
+ params.num_elems = p_params->num_uhq_pages_in_ring *
QED_CHAIN_PAGE_SIZE / sizeof(struct iscsi_uhqe);
- rc = qed_chain_alloc(p_hwfn->cdev,
- QED_CHAIN_USE_TO_CONSUME_PRODUCE,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U16,
- uhq_num_elements,
- sizeof(struct iscsi_uhqe), &p_conn->uhq, NULL);
+ params.elem_size = sizeof(struct iscsi_uhqe);
+
+ rc = qed_chain_alloc(p_hwfn->cdev, &p_conn->uhq, &params);
if (rc)
goto nomem_uhq;
- xhq_num_elements = uhq_num_elements;
- rc = qed_chain_alloc(p_hwfn->cdev,
- QED_CHAIN_USE_TO_CONSUME_PRODUCE,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U16,
- xhq_num_elements,
- sizeof(struct iscsi_xhqe), &p_conn->xhq, NULL);
+ params.elem_size = sizeof(struct iscsi_xhqe);
+
+ rc = qed_chain_alloc(p_hwfn->cdev, &p_conn->xhq, &params);
if (rc)
goto nomem;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 6f4aec339cd4..0452b728c527 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -1125,6 +1125,12 @@ static int
qed_ll2_acquire_connection_rx(struct qed_hwfn *p_hwfn,
struct qed_ll2_info *p_ll2_info)
{
+ struct qed_chain_init_params params = {
+ .intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U16,
+ .num_elems = p_ll2_info->input.rx_num_desc,
+ };
+ struct qed_dev *cdev = p_hwfn->cdev;
struct qed_ll2_rx_packet *p_descq;
u32 capacity;
int rc = 0;
@@ -1132,13 +1138,10 @@ qed_ll2_acquire_connection_rx(struct qed_hwfn *p_hwfn,
if (!p_ll2_info->input.rx_num_desc)
goto out;
- rc = qed_chain_alloc(p_hwfn->cdev,
- QED_CHAIN_USE_TO_CONSUME_PRODUCE,
- QED_CHAIN_MODE_NEXT_PTR,
- QED_CHAIN_CNT_TYPE_U16,
- p_ll2_info->input.rx_num_desc,
- sizeof(struct core_rx_bd),
- &p_ll2_info->rx_queue.rxq_chain, NULL);
+ params.mode = QED_CHAIN_MODE_NEXT_PTR;
+ params.elem_size = sizeof(struct core_rx_bd);
+
+ rc = qed_chain_alloc(cdev, &p_ll2_info->rx_queue.rxq_chain, &params);
if (rc) {
DP_NOTICE(p_hwfn, "Failed to allocate ll2 rxq chain\n");
goto out;
@@ -1154,13 +1157,10 @@ qed_ll2_acquire_connection_rx(struct qed_hwfn *p_hwfn,
}
p_ll2_info->rx_queue.descq_array = p_descq;
- rc = qed_chain_alloc(p_hwfn->cdev,
- QED_CHAIN_USE_TO_CONSUME_PRODUCE,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U16,
- p_ll2_info->input.rx_num_desc,
- sizeof(struct core_rx_fast_path_cqe),
- &p_ll2_info->rx_queue.rcq_chain, NULL);
+ params.mode = QED_CHAIN_MODE_PBL;
+ params.elem_size = sizeof(struct core_rx_fast_path_cqe);
+
+ rc = qed_chain_alloc(cdev, &p_ll2_info->rx_queue.rcq_chain, &params);
if (rc) {
DP_NOTICE(p_hwfn, "Failed to allocate ll2 rcq chain\n");
goto out;
@@ -1177,6 +1177,13 @@ out:
static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn,
struct qed_ll2_info *p_ll2_info)
{
+ struct qed_chain_init_params params = {
+ .mode = QED_CHAIN_MODE_PBL,
+ .intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U16,
+ .num_elems = p_ll2_info->input.tx_num_desc,
+ .elem_size = sizeof(struct core_tx_bd),
+ };
struct qed_ll2_tx_packet *p_descq;
u32 desc_size;
u32 capacity;
@@ -1185,13 +1192,8 @@ static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn,
if (!p_ll2_info->input.tx_num_desc)
goto out;
- rc = qed_chain_alloc(p_hwfn->cdev,
- QED_CHAIN_USE_TO_CONSUME_PRODUCE,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U16,
- p_ll2_info->input.tx_num_desc,
- sizeof(struct core_tx_bd),
- &p_ll2_info->tx_queue.txq_chain, NULL);
+ rc = qed_chain_alloc(p_hwfn->cdev, &p_ll2_info->tx_queue.txq_chain,
+ &params);
if (rc)
goto out;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index e5648ca2838b..a4bcde522cdf 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -379,6 +379,7 @@ static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn)
qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->srq_map, 1);
qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->real_cid_map, 1);
qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->xrc_srq_map, 1);
+ qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->xrcd_map, 1);
kfree(p_rdma_info->port);
kfree(p_rdma_info->dev);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index a1423ec0edf7..f16a157bb95a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -757,8 +757,7 @@ static int qed_roce_sp_destroy_qp_requester(struct qed_hwfn *p_hwfn,
if (!qp->req_offloaded)
return 0;
- p_ramrod_res = (struct roce_destroy_qp_req_output_params *)
- dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+ p_ramrod_res = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
sizeof(*p_ramrod_res),
&ramrod_res_phys, GFP_KERNEL);
if (!p_ramrod_res) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index 8142f5669b26..aa71adcf31ee 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -366,11 +366,11 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
/* Place EQ address in RAMROD */
DMA_REGPAIR_LE(p_ramrod->event_ring_pbl_addr,
- p_hwfn->p_eq->chain.pbl_sp.p_phys_table);
+ qed_chain_get_pbl_phys(&p_hwfn->p_eq->chain));
page_cnt = (u8)qed_chain_get_page_cnt(&p_hwfn->p_eq->chain);
p_ramrod->event_ring_num_pages = page_cnt;
DMA_REGPAIR_LE(p_ramrod->consolid_q_pbl_addr,
- p_hwfn->p_consq->chain.pbl_sp.p_phys_table);
+ qed_chain_get_pbl_phys(&p_hwfn->p_consq->chain));
qed_tunn_set_pf_start_params(p_hwfn, p_tunn, &p_ramrod->tunnel_config);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index 92ab029789e5..0bc1a0aeb56e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -382,22 +382,26 @@ int qed_eq_completion(struct qed_hwfn *p_hwfn, void *cookie)
int qed_eq_alloc(struct qed_hwfn *p_hwfn, u16 num_elem)
{
+ struct qed_chain_init_params params = {
+ .mode = QED_CHAIN_MODE_PBL,
+ .intended_use = QED_CHAIN_USE_TO_PRODUCE,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U16,
+ .num_elems = num_elem,
+ .elem_size = sizeof(union event_ring_element),
+ };
struct qed_eq *p_eq;
+ int ret;
/* Allocate EQ struct */
p_eq = kzalloc(sizeof(*p_eq), GFP_KERNEL);
if (!p_eq)
return -ENOMEM;
- /* Allocate and initialize EQ chain*/
- if (qed_chain_alloc(p_hwfn->cdev,
- QED_CHAIN_USE_TO_PRODUCE,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U16,
- num_elem,
- sizeof(union event_ring_element),
- &p_eq->chain, NULL))
+ ret = qed_chain_alloc(p_hwfn->cdev, &p_eq->chain, &params);
+ if (ret) {
+ DP_NOTICE(p_hwfn, "Failed to allocate EQ chain\n");
goto eq_allocate_fail;
+ }
/* register EQ completion on the SP SB */
qed_int_register_cb(p_hwfn, qed_eq_completion,
@@ -408,7 +412,8 @@ int qed_eq_alloc(struct qed_hwfn *p_hwfn, u16 num_elem)
eq_allocate_fail:
kfree(p_eq);
- return -ENOMEM;
+
+ return ret;
}
void qed_eq_setup(struct qed_hwfn *p_hwfn)
@@ -529,33 +534,40 @@ void qed_spq_setup(struct qed_hwfn *p_hwfn)
int qed_spq_alloc(struct qed_hwfn *p_hwfn)
{
+ struct qed_chain_init_params params = {
+ .mode = QED_CHAIN_MODE_SINGLE,
+ .intended_use = QED_CHAIN_USE_TO_PRODUCE,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U16,
+ .elem_size = sizeof(struct slow_path_element),
+ };
+ struct qed_dev *cdev = p_hwfn->cdev;
struct qed_spq_entry *p_virt = NULL;
struct qed_spq *p_spq = NULL;
dma_addr_t p_phys = 0;
u32 capacity;
+ int ret;
/* SPQ struct */
p_spq = kzalloc(sizeof(struct qed_spq), GFP_KERNEL);
if (!p_spq)
return -ENOMEM;
- /* SPQ ring */
- if (qed_chain_alloc(p_hwfn->cdev,
- QED_CHAIN_USE_TO_PRODUCE,
- QED_CHAIN_MODE_SINGLE,
- QED_CHAIN_CNT_TYPE_U16,
- 0, /* N/A when the mode is SINGLE */
- sizeof(struct slow_path_element),
- &p_spq->chain, NULL))
- goto spq_allocate_fail;
+ /* SPQ ring */
+ ret = qed_chain_alloc(cdev, &p_spq->chain, &params);
+ if (ret) {
+ DP_NOTICE(p_hwfn, "Failed to allocate SPQ chain\n");
+ goto spq_chain_alloc_fail;
+ }
/* allocate and fill the SPQ elements (incl. ramrod data list) */
capacity = qed_chain_get_capacity(&p_spq->chain);
- p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+ ret = -ENOMEM;
+
+ p_virt = dma_alloc_coherent(&cdev->pdev->dev,
capacity * sizeof(struct qed_spq_entry),
&p_phys, GFP_KERNEL);
if (!p_virt)
- goto spq_allocate_fail;
+ goto spq_alloc_fail;
p_spq->p_virt = p_virt;
p_spq->p_phys = p_phys;
@@ -563,10 +575,12 @@ int qed_spq_alloc(struct qed_hwfn *p_hwfn)
return 0;
-spq_allocate_fail:
- qed_chain_free(p_hwfn->cdev, &p_spq->chain);
+spq_alloc_fail:
+ qed_chain_free(cdev, &p_spq->chain);
+spq_chain_alloc_fail:
kfree(p_spq);
- return -ENOMEM;
+
+ return ret;
}
void qed_spq_free(struct qed_hwfn *p_hwfn)
@@ -967,30 +981,40 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
return 0;
}
+#define QED_SPQ_CONSQ_ELEM_SIZE 0x80
+
int qed_consq_alloc(struct qed_hwfn *p_hwfn)
{
+ struct qed_chain_init_params params = {
+ .mode = QED_CHAIN_MODE_PBL,
+ .intended_use = QED_CHAIN_USE_TO_PRODUCE,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U16,
+ .num_elems = QED_CHAIN_PAGE_SIZE / QED_SPQ_CONSQ_ELEM_SIZE,
+ .elem_size = QED_SPQ_CONSQ_ELEM_SIZE,
+ };
struct qed_consq *p_consq;
+ int ret;
/* Allocate ConsQ struct */
p_consq = kzalloc(sizeof(*p_consq), GFP_KERNEL);
if (!p_consq)
return -ENOMEM;
- /* Allocate and initialize EQ chain*/
- if (qed_chain_alloc(p_hwfn->cdev,
- QED_CHAIN_USE_TO_PRODUCE,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U16,
- QED_CHAIN_PAGE_SIZE / 0x80,
- 0x80, &p_consq->chain, NULL))
- goto consq_allocate_fail;
+ /* Allocate and initialize ConsQ chain */
+ ret = qed_chain_alloc(p_hwfn->cdev, &p_consq->chain, &params);
+ if (ret) {
+ DP_NOTICE(p_hwfn, "Failed to allocate ConsQ chain");
+ goto consq_alloc_fail;
+ }
p_hwfn->p_consq = p_consq;
+
return 0;
-consq_allocate_fail:
+consq_alloc_fail:
kfree(p_consq);
- return -ENOMEM;
+
+ return ret;
}
void qed_consq_setup(struct qed_hwfn *p_hwfn)
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index f1d7f73de902..803c1fcca8ad 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -176,16 +176,17 @@ struct qede_dev {
u32 dp_module;
u8 dp_level;
- unsigned long flags;
-#define IS_VF(edev) (test_bit(QEDE_FLAGS_IS_VF, &(edev)->flags))
+ unsigned long flags;
+#define IS_VF(edev) test_bit(QEDE_FLAGS_IS_VF, \
+ &(edev)->flags)
const struct qed_eth_ops *ops;
struct qede_ptp *ptp;
u64 ptp_skip_txts;
- struct qed_dev_eth_info dev_info;
-#define QEDE_MAX_RSS_CNT(edev) ((edev)->dev_info.num_queues)
-#define QEDE_MAX_TSS_CNT(edev) ((edev)->dev_info.num_queues)
+ struct qed_dev_eth_info dev_info;
+#define QEDE_MAX_RSS_CNT(edev) ((edev)->dev_info.num_queues)
+#define QEDE_MAX_TSS_CNT(edev) ((edev)->dev_info.num_queues)
#define QEDE_IS_BB(edev) \
((edev)->dev_info.common.dev_type == QED_DEV_TYPE_BB)
#define QEDE_IS_AH(edev) \
@@ -198,14 +199,16 @@ struct qede_dev {
u8 fp_num_rx;
u16 req_queues;
u16 num_queues;
-#define QEDE_QUEUE_CNT(edev) ((edev)->num_queues)
-#define QEDE_RSS_COUNT(edev) ((edev)->num_queues - (edev)->fp_num_tx)
+ u16 total_xdp_queues;
+
+#define QEDE_QUEUE_CNT(edev) ((edev)->num_queues)
+#define QEDE_RSS_COUNT(edev) ((edev)->num_queues - (edev)->fp_num_tx)
#define QEDE_RX_QUEUE_IDX(edev, i) (i)
-#define QEDE_TSS_COUNT(edev) ((edev)->num_queues - (edev)->fp_num_rx)
+#define QEDE_TSS_COUNT(edev) ((edev)->num_queues - (edev)->fp_num_rx)
struct qed_int_info int_info;
- /* Smaller private varaiant of the RTNL lock */
+ /* Smaller private variant of the RTNL lock */
struct mutex qede_lock;
u32 state; /* Protected by qede_lock */
u16 rx_buf_size;
@@ -226,22 +229,28 @@ struct qede_dev {
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
struct qede_stats stats;
-#define QEDE_RSS_INDIR_INITED BIT(0)
-#define QEDE_RSS_KEY_INITED BIT(1)
-#define QEDE_RSS_CAPS_INITED BIT(2)
- u32 rss_params_inited; /* bit-field to track initialized rss params */
- u16 rss_ind_table[128];
- u32 rss_key[10];
- u8 rss_caps;
-
- u16 q_num_rx_buffers; /* Must be a power of two */
- u16 q_num_tx_buffers; /* Must be a power of two */
-
- bool gro_disable;
- struct list_head vlan_list;
- u16 configured_vlans;
- u16 non_configured_vlans;
- bool accept_any_vlan;
+
+ /* Bitfield to track initialized RSS params */
+ u32 rss_params_inited;
+#define QEDE_RSS_INDIR_INITED BIT(0)
+#define QEDE_RSS_KEY_INITED BIT(1)
+#define QEDE_RSS_CAPS_INITED BIT(2)
+
+ u16 rss_ind_table[128];
+ u32 rss_key[10];
+ u8 rss_caps;
+
+ /* Both must be a power of two */
+ u16 q_num_rx_buffers;
+ u16 q_num_tx_buffers;
+
+ bool gro_disable;
+
+ struct list_head vlan_list;
+ u16 configured_vlans;
+ u16 non_configured_vlans;
+ bool accept_any_vlan;
+
struct delayed_work sp_task;
unsigned long sp_flags;
u16 vxlan_dst_port;
@@ -252,14 +261,14 @@ struct qede_dev {
struct qede_rdma_dev rdma_info;
- struct bpf_prog *xdp_prog;
+ struct bpf_prog *xdp_prog;
- unsigned long err_flags;
-#define QEDE_ERR_IS_HANDLED 31
-#define QEDE_ERR_ATTN_CLR_EN 0
-#define QEDE_ERR_GET_DBG_INFO 1
-#define QEDE_ERR_IS_RECOVERABLE 2
-#define QEDE_ERR_WARN 3
+ unsigned long err_flags;
+#define QEDE_ERR_IS_HANDLED 31
+#define QEDE_ERR_ATTN_CLR_EN 0
+#define QEDE_ERR_GET_DBG_INFO 1
+#define QEDE_ERR_IS_RECOVERABLE 2
+#define QEDE_ERR_WARN 3
struct qede_dump_info dump_info;
};
@@ -372,29 +381,34 @@ struct sw_tx_bd {
};
struct sw_tx_xdp {
- struct page *page;
- dma_addr_t mapping;
+ struct page *page;
+ struct xdp_frame *xdpf;
+ dma_addr_t mapping;
};
struct qede_tx_queue {
- u8 is_xdp;
- bool is_legacy;
- u16 sw_tx_cons;
- u16 sw_tx_prod;
- u16 num_tx_buffers; /* Slowpath only */
+ u8 is_xdp;
+ bool is_legacy;
+ u16 sw_tx_cons;
+ u16 sw_tx_prod;
+ u16 num_tx_buffers; /* Slowpath only */
- u64 xmit_pkts;
- u64 stopped_cnt;
- u64 tx_mem_alloc_err;
+ u64 xmit_pkts;
+ u64 stopped_cnt;
+ u64 tx_mem_alloc_err;
- __le16 *hw_cons_ptr;
+ __le16 *hw_cons_ptr;
/* Needed for the mapping of packets */
- struct device *dev;
+ struct device *dev;
+
+ void __iomem *doorbell_addr;
+ union db_prod tx_db;
+
+ /* Spinlock for XDP queues in case of XDP_REDIRECT */
+ spinlock_t xdp_tx_lock;
- void __iomem *doorbell_addr;
- union db_prod tx_db;
- int index; /* Slowpath only */
+ int index; /* Slowpath only */
#define QEDE_TXQ_XDP_TO_IDX(edev, txq) ((txq)->index - \
QEDE_MAX_TSS_CNT(edev))
#define QEDE_TXQ_IDX_TO_XDP(edev, idx) ((idx) + QEDE_MAX_TSS_CNT(edev))
@@ -406,22 +420,22 @@ struct qede_tx_queue {
#define QEDE_NDEV_TXQ_ID_TO_TXQ(edev, idx) \
(&((edev)->fp_array[QEDE_NDEV_TXQ_ID_TO_FP_ID(edev, idx)].txq \
[QEDE_NDEV_TXQ_ID_TO_TXQ_COS(edev, idx)]))
-#define QEDE_FP_TC0_TXQ(fp) (&((fp)->txq[0]))
+#define QEDE_FP_TC0_TXQ(fp) (&((fp)->txq[0]))
/* Regular Tx requires skb + metadata for release purpose,
* while XDP requires the pages and the mapped address.
*/
union {
- struct sw_tx_bd *skbs;
- struct sw_tx_xdp *xdp;
- } sw_tx_ring;
+ struct sw_tx_bd *skbs;
+ struct sw_tx_xdp *xdp;
+ } sw_tx_ring;
- struct qed_chain tx_pbl;
+ struct qed_chain tx_pbl;
/* Slowpath; Should be kept in end [unless missing padding] */
- void *handle;
- u16 cos;
- u16 ndev_txq_id;
+ void *handle;
+ u16 cos;
+ u16 ndev_txq_id;
};
#define BD_UNMAP_ADDR(bd) HILO_U64(le32_to_cpu((bd)->addr.hi), \
@@ -435,32 +449,37 @@ struct qede_tx_queue {
#define BD_UNMAP_LEN(bd) (le16_to_cpu((bd)->nbytes))
struct qede_fastpath {
- struct qede_dev *edev;
-#define QEDE_FASTPATH_TX BIT(0)
-#define QEDE_FASTPATH_RX BIT(1)
-#define QEDE_FASTPATH_XDP BIT(2)
-#define QEDE_FASTPATH_COMBINED (QEDE_FASTPATH_TX | QEDE_FASTPATH_RX)
- u8 type;
- u8 id;
- u8 xdp_xmit;
- struct napi_struct napi;
- struct qed_sb_info *sb_info;
- struct qede_rx_queue *rxq;
- struct qede_tx_queue *txq;
- struct qede_tx_queue *xdp_tx;
-
-#define VEC_NAME_SIZE (sizeof_field(struct net_device, name) + 8)
- char name[VEC_NAME_SIZE];
+ struct qede_dev *edev;
+
+ u8 type;
+#define QEDE_FASTPATH_TX BIT(0)
+#define QEDE_FASTPATH_RX BIT(1)
+#define QEDE_FASTPATH_XDP BIT(2)
+#define QEDE_FASTPATH_COMBINED (QEDE_FASTPATH_TX | QEDE_FASTPATH_RX)
+
+ u8 id;
+
+ u8 xdp_xmit;
+#define QEDE_XDP_TX BIT(0)
+#define QEDE_XDP_REDIRECT BIT(1)
+
+ struct napi_struct napi;
+ struct qed_sb_info *sb_info;
+ struct qede_rx_queue *rxq;
+ struct qede_tx_queue *txq;
+ struct qede_tx_queue *xdp_tx;
+
+ char name[IFNAMSIZ + 8];
};
/* Debug print definitions */
-#define DP_NAME(edev) ((edev)->ndev->name)
+#define DP_NAME(edev) netdev_name((edev)->ndev)
-#define XMIT_PLAIN 0
-#define XMIT_L4_CSUM BIT(0)
-#define XMIT_LSO BIT(1)
-#define XMIT_ENC BIT(2)
-#define XMIT_ENC_GSO_L4_CSUM BIT(3)
+#define XMIT_PLAIN 0
+#define XMIT_L4_CSUM BIT(0)
+#define XMIT_LSO BIT(1)
+#define XMIT_ENC BIT(2)
+#define XMIT_ENC_GSO_L4_CSUM BIT(3)
#define QEDE_CSUM_ERROR BIT(0)
#define QEDE_CSUM_UNNECESSARY BIT(1)
@@ -503,6 +522,8 @@ struct qede_reload_args {
/* Datapath functions definition */
netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev);
+int qede_xdp_transmit(struct net_device *dev, int n_frames,
+ struct xdp_frame **frames, u32 flags);
u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev);
netdev_features_t qede_features_check(struct sk_buff *skb,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 1c4ece0713f8..a2494bf85007 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -302,51 +302,94 @@ static inline void qede_update_tx_producer(struct qede_tx_queue *txq)
wmb();
}
-static int qede_xdp_xmit(struct qede_dev *edev, struct qede_fastpath *fp,
- struct sw_rx_data *metadata, u16 padding, u16 length)
+static int qede_xdp_xmit(struct qede_tx_queue *txq, dma_addr_t dma, u16 pad,
+ u16 len, struct page *page, struct xdp_frame *xdpf)
{
- struct qede_tx_queue *txq = fp->xdp_tx;
- struct eth_tx_1st_bd *first_bd;
- u16 idx = txq->sw_tx_prod;
+ struct eth_tx_1st_bd *bd;
+ struct sw_tx_xdp *xdp;
u16 val;
- if (!qed_chain_get_elem_left(&txq->tx_pbl)) {
+ if (unlikely(qed_chain_get_elem_used(&txq->tx_pbl) >=
+ txq->num_tx_buffers)) {
txq->stopped_cnt++;
return -ENOMEM;
}
- first_bd = (struct eth_tx_1st_bd *)qed_chain_produce(&txq->tx_pbl);
+ bd = qed_chain_produce(&txq->tx_pbl);
+ bd->data.nbds = 1;
+ bd->data.bd_flags.bitfields = BIT(ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT);
- memset(first_bd, 0, sizeof(*first_bd));
- first_bd->data.bd_flags.bitfields =
- BIT(ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT);
-
- val = (length & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) <<
+ val = (len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) <<
ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT;
- first_bd->data.bitfields |= cpu_to_le16(val);
- first_bd->data.nbds = 1;
+ bd->data.bitfields = cpu_to_le16(val);
/* We can safely ignore the offset, as it's 0 for XDP */
- BD_SET_UNMAP_ADDR_LEN(first_bd, metadata->mapping + padding, length);
+ BD_SET_UNMAP_ADDR_LEN(bd, dma + pad, len);
- /* Synchronize the buffer back to device, as program [probably]
- * has changed it.
- */
- dma_sync_single_for_device(&edev->pdev->dev,
- metadata->mapping + padding,
- length, PCI_DMA_TODEVICE);
+ xdp = txq->sw_tx_ring.xdp + txq->sw_tx_prod;
+ xdp->mapping = dma;
+ xdp->page = page;
+ xdp->xdpf = xdpf;
- txq->sw_tx_ring.xdp[idx].page = metadata->data;
- txq->sw_tx_ring.xdp[idx].mapping = metadata->mapping;
txq->sw_tx_prod = (txq->sw_tx_prod + 1) % txq->num_tx_buffers;
- /* Mark the fastpath for future XDP doorbell */
- fp->xdp_xmit = 1;
-
return 0;
}
+int qede_xdp_transmit(struct net_device *dev, int n_frames,
+ struct xdp_frame **frames, u32 flags)
+{
+ struct qede_dev *edev = netdev_priv(dev);
+ struct device *dmadev = &edev->pdev->dev;
+ struct qede_tx_queue *xdp_tx;
+ struct xdp_frame *xdpf;
+ dma_addr_t mapping;
+ int i, drops = 0;
+ u16 xdp_prod;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ if (unlikely(!netif_running(dev)))
+ return -ENETDOWN;
+
+ i = smp_processor_id() % edev->total_xdp_queues;
+ xdp_tx = edev->fp_array[i].xdp_tx;
+
+ spin_lock(&xdp_tx->xdp_tx_lock);
+
+ for (i = 0; i < n_frames; i++) {
+ xdpf = frames[i];
+
+ mapping = dma_map_single(dmadev, xdpf->data, xdpf->len,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dmadev, mapping))) {
+ xdp_return_frame_rx_napi(xdpf);
+ drops++;
+
+ continue;
+ }
+
+ if (unlikely(qede_xdp_xmit(xdp_tx, mapping, 0, xdpf->len,
+ NULL, xdpf))) {
+ xdp_return_frame_rx_napi(xdpf);
+ drops++;
+ }
+ }
+
+ if (flags & XDP_XMIT_FLUSH) {
+ xdp_prod = qed_chain_get_prod_idx(&xdp_tx->tx_pbl);
+
+ xdp_tx->tx_db.data.bd_prod = cpu_to_le16(xdp_prod);
+ qede_update_tx_producer(xdp_tx);
+ }
+
+ spin_unlock(&xdp_tx->xdp_tx_lock);
+
+ return n_frames - drops;
+}
+
int qede_txq_has_work(struct qede_tx_queue *txq)
{
u16 hw_bd_cons;
@@ -362,20 +405,31 @@ int qede_txq_has_work(struct qede_tx_queue *txq)
static void qede_xdp_tx_int(struct qede_dev *edev, struct qede_tx_queue *txq)
{
- u16 hw_bd_cons, idx;
+ struct sw_tx_xdp *xdp_info, *xdp_arr = txq->sw_tx_ring.xdp;
+ struct device *dev = &edev->pdev->dev;
+ struct xdp_frame *xdpf;
+ u16 hw_bd_cons;
hw_bd_cons = le16_to_cpu(*txq->hw_cons_ptr);
barrier();
while (hw_bd_cons != qed_chain_get_cons_idx(&txq->tx_pbl)) {
- qed_chain_consume(&txq->tx_pbl);
- idx = txq->sw_tx_cons;
+ xdp_info = xdp_arr + txq->sw_tx_cons;
+ xdpf = xdp_info->xdpf;
- dma_unmap_page(&edev->pdev->dev,
- txq->sw_tx_ring.xdp[idx].mapping,
- PAGE_SIZE, DMA_BIDIRECTIONAL);
- __free_page(txq->sw_tx_ring.xdp[idx].page);
+ if (xdpf) {
+ dma_unmap_single(dev, xdp_info->mapping, xdpf->len,
+ DMA_TO_DEVICE);
+ xdp_return_frame(xdpf);
+
+ xdp_info->xdpf = NULL;
+ } else {
+ dma_unmap_page(dev, xdp_info->mapping, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ __free_page(xdp_info->page);
+ }
+ qed_chain_consume(&txq->tx_pbl);
txq->sw_tx_cons = (txq->sw_tx_cons + 1) % txq->num_tx_buffers;
txq->xmit_pkts++;
}
@@ -1064,32 +1118,59 @@ static bool qede_rx_xdp(struct qede_dev *edev,
switch (act) {
case XDP_TX:
/* We need the replacement buffer before transmit. */
- if (qede_alloc_rx_buffer(rxq, true)) {
+ if (unlikely(qede_alloc_rx_buffer(rxq, true))) {
qede_recycle_rx_bd_ring(rxq, 1);
+
trace_xdp_exception(edev->ndev, prog, act);
- return false;
+ break;
}
/* Now if there's a transmission problem, we'd still have to
* throw current buffer, as replacement was already allocated.
*/
- if (qede_xdp_xmit(edev, fp, bd, *data_offset, *len)) {
- dma_unmap_page(rxq->dev, bd->mapping,
- PAGE_SIZE, DMA_BIDIRECTIONAL);
+ if (unlikely(qede_xdp_xmit(fp->xdp_tx, bd->mapping,
+ *data_offset, *len, bd->data,
+ NULL))) {
+ dma_unmap_page(rxq->dev, bd->mapping, PAGE_SIZE,
+ rxq->data_direction);
__free_page(bd->data);
+
trace_xdp_exception(edev->ndev, prog, act);
+ } else {
+ dma_sync_single_for_device(rxq->dev,
+ bd->mapping + *data_offset,
+ *len, rxq->data_direction);
+ fp->xdp_xmit |= QEDE_XDP_TX;
}
/* Regardless, we've consumed an Rx BD */
qede_rx_bd_ring_consume(rxq);
- return false;
+ break;
+ case XDP_REDIRECT:
+ /* We need the replacement buffer before transmit. */
+ if (unlikely(qede_alloc_rx_buffer(rxq, true))) {
+ qede_recycle_rx_bd_ring(rxq, 1);
+
+ trace_xdp_exception(edev->ndev, prog, act);
+ break;
+ }
+ dma_unmap_page(rxq->dev, bd->mapping, PAGE_SIZE,
+ rxq->data_direction);
+
+ if (unlikely(xdp_do_redirect(edev->ndev, &xdp, prog)))
+ DP_NOTICE(edev, "Failed to redirect the packet\n");
+ else
+ fp->xdp_xmit |= QEDE_XDP_REDIRECT;
+
+ qede_rx_bd_ring_consume(rxq);
+ break;
default:
bpf_warn_invalid_xdp_action(act);
- /* Fall through */
+ fallthrough;
case XDP_ABORTED:
trace_xdp_exception(edev->ndev, prog, act);
- /* Fall through */
+ fallthrough;
case XDP_DROP:
qede_recycle_rx_bd_ring(rxq, cqe->bd_num);
}
@@ -1353,6 +1434,9 @@ int qede_poll(struct napi_struct *napi, int budget)
napi);
struct qede_dev *edev = fp->edev;
int rx_work_done = 0;
+ u16 xdp_prod;
+
+ fp->xdp_xmit = 0;
if (likely(fp->type & QEDE_FASTPATH_TX)) {
int cos;
@@ -1380,14 +1464,16 @@ int qede_poll(struct napi_struct *napi, int budget)
}
}
- if (fp->xdp_xmit) {
- u16 xdp_prod = qed_chain_get_prod_idx(&fp->xdp_tx->tx_pbl);
+ if (fp->xdp_xmit & QEDE_XDP_TX) {
+ xdp_prod = qed_chain_get_prod_idx(&fp->xdp_tx->tx_pbl);
- fp->xdp_xmit = 0;
fp->xdp_tx->tx_db.data.bd_prod = cpu_to_le16(xdp_prod);
qede_update_tx_producer(fp->xdp_tx);
}
+ if (fp->xdp_xmit & QEDE_XDP_REDIRECT)
+ xdp_do_flush_map();
+
return rx_work_done;
}
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 6f2171dc0dea..1aaae3203f5a 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -639,79 +639,81 @@ qede_setup_tc_offload(struct net_device *dev, enum tc_setup_type type,
}
static const struct net_device_ops qede_netdev_ops = {
- .ndo_open = qede_open,
- .ndo_stop = qede_close,
- .ndo_start_xmit = qede_start_xmit,
- .ndo_select_queue = qede_select_queue,
- .ndo_set_rx_mode = qede_set_rx_mode,
- .ndo_set_mac_address = qede_set_mac_addr,
- .ndo_validate_addr = eth_validate_addr,
- .ndo_change_mtu = qede_change_mtu,
- .ndo_do_ioctl = qede_ioctl,
- .ndo_tx_timeout = qede_tx_timeout,
+ .ndo_open = qede_open,
+ .ndo_stop = qede_close,
+ .ndo_start_xmit = qede_start_xmit,
+ .ndo_select_queue = qede_select_queue,
+ .ndo_set_rx_mode = qede_set_rx_mode,
+ .ndo_set_mac_address = qede_set_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_change_mtu = qede_change_mtu,
+ .ndo_do_ioctl = qede_ioctl,
+ .ndo_tx_timeout = qede_tx_timeout,
#ifdef CONFIG_QED_SRIOV
- .ndo_set_vf_mac = qede_set_vf_mac,
- .ndo_set_vf_vlan = qede_set_vf_vlan,
- .ndo_set_vf_trust = qede_set_vf_trust,
+ .ndo_set_vf_mac = qede_set_vf_mac,
+ .ndo_set_vf_vlan = qede_set_vf_vlan,
+ .ndo_set_vf_trust = qede_set_vf_trust,
#endif
- .ndo_vlan_rx_add_vid = qede_vlan_rx_add_vid,
- .ndo_vlan_rx_kill_vid = qede_vlan_rx_kill_vid,
- .ndo_fix_features = qede_fix_features,
- .ndo_set_features = qede_set_features,
- .ndo_get_stats64 = qede_get_stats64,
+ .ndo_vlan_rx_add_vid = qede_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = qede_vlan_rx_kill_vid,
+ .ndo_fix_features = qede_fix_features,
+ .ndo_set_features = qede_set_features,
+ .ndo_get_stats64 = qede_get_stats64,
#ifdef CONFIG_QED_SRIOV
- .ndo_set_vf_link_state = qede_set_vf_link_state,
- .ndo_set_vf_spoofchk = qede_set_vf_spoofchk,
- .ndo_get_vf_config = qede_get_vf_config,
- .ndo_set_vf_rate = qede_set_vf_rate,
+ .ndo_set_vf_link_state = qede_set_vf_link_state,
+ .ndo_set_vf_spoofchk = qede_set_vf_spoofchk,
+ .ndo_get_vf_config = qede_get_vf_config,
+ .ndo_set_vf_rate = qede_set_vf_rate,
#endif
- .ndo_udp_tunnel_add = udp_tunnel_nic_add_port,
- .ndo_udp_tunnel_del = udp_tunnel_nic_del_port,
- .ndo_features_check = qede_features_check,
- .ndo_bpf = qede_xdp,
+ .ndo_udp_tunnel_add = udp_tunnel_nic_add_port,
+ .ndo_udp_tunnel_del = udp_tunnel_nic_del_port,
+ .ndo_features_check = qede_features_check,
+ .ndo_bpf = qede_xdp,
#ifdef CONFIG_RFS_ACCEL
- .ndo_rx_flow_steer = qede_rx_flow_steer,
+ .ndo_rx_flow_steer = qede_rx_flow_steer,
#endif
- .ndo_setup_tc = qede_setup_tc_offload,
+ .ndo_xdp_xmit = qede_xdp_transmit,
+ .ndo_setup_tc = qede_setup_tc_offload,
};
static const struct net_device_ops qede_netdev_vf_ops = {
- .ndo_open = qede_open,
- .ndo_stop = qede_close,
- .ndo_start_xmit = qede_start_xmit,
- .ndo_select_queue = qede_select_queue,
- .ndo_set_rx_mode = qede_set_rx_mode,
- .ndo_set_mac_address = qede_set_mac_addr,
- .ndo_validate_addr = eth_validate_addr,
- .ndo_change_mtu = qede_change_mtu,
- .ndo_vlan_rx_add_vid = qede_vlan_rx_add_vid,
- .ndo_vlan_rx_kill_vid = qede_vlan_rx_kill_vid,
- .ndo_fix_features = qede_fix_features,
- .ndo_set_features = qede_set_features,
- .ndo_get_stats64 = qede_get_stats64,
- .ndo_udp_tunnel_add = udp_tunnel_nic_add_port,
- .ndo_udp_tunnel_del = udp_tunnel_nic_del_port,
- .ndo_features_check = qede_features_check,
+ .ndo_open = qede_open,
+ .ndo_stop = qede_close,
+ .ndo_start_xmit = qede_start_xmit,
+ .ndo_select_queue = qede_select_queue,
+ .ndo_set_rx_mode = qede_set_rx_mode,
+ .ndo_set_mac_address = qede_set_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_change_mtu = qede_change_mtu,
+ .ndo_vlan_rx_add_vid = qede_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = qede_vlan_rx_kill_vid,
+ .ndo_fix_features = qede_fix_features,
+ .ndo_set_features = qede_set_features,
+ .ndo_get_stats64 = qede_get_stats64,
+ .ndo_udp_tunnel_add = udp_tunnel_nic_add_port,
+ .ndo_udp_tunnel_del = udp_tunnel_nic_del_port,
+ .ndo_features_check = qede_features_check,
};
static const struct net_device_ops qede_netdev_vf_xdp_ops = {
- .ndo_open = qede_open,
- .ndo_stop = qede_close,
- .ndo_start_xmit = qede_start_xmit,
- .ndo_select_queue = qede_select_queue,
- .ndo_set_rx_mode = qede_set_rx_mode,
- .ndo_set_mac_address = qede_set_mac_addr,
- .ndo_validate_addr = eth_validate_addr,
- .ndo_change_mtu = qede_change_mtu,
- .ndo_vlan_rx_add_vid = qede_vlan_rx_add_vid,
- .ndo_vlan_rx_kill_vid = qede_vlan_rx_kill_vid,
- .ndo_fix_features = qede_fix_features,
- .ndo_set_features = qede_set_features,
- .ndo_get_stats64 = qede_get_stats64,
- .ndo_udp_tunnel_add = udp_tunnel_nic_add_port,
- .ndo_udp_tunnel_del = udp_tunnel_nic_del_port,
- .ndo_features_check = qede_features_check,
- .ndo_bpf = qede_xdp,
+ .ndo_open = qede_open,
+ .ndo_stop = qede_close,
+ .ndo_start_xmit = qede_start_xmit,
+ .ndo_select_queue = qede_select_queue,
+ .ndo_set_rx_mode = qede_set_rx_mode,
+ .ndo_set_mac_address = qede_set_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_change_mtu = qede_change_mtu,
+ .ndo_vlan_rx_add_vid = qede_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = qede_vlan_rx_kill_vid,
+ .ndo_fix_features = qede_fix_features,
+ .ndo_set_features = qede_set_features,
+ .ndo_get_stats64 = qede_get_stats64,
+ .ndo_udp_tunnel_add = udp_tunnel_nic_add_port,
+ .ndo_udp_tunnel_del = udp_tunnel_nic_del_port,
+ .ndo_features_check = qede_features_check,
+ .ndo_bpf = qede_xdp,
+ .ndo_xdp_xmit = qede_xdp_transmit,
};
/* -------------------------------------------------------------------------
@@ -1442,6 +1444,11 @@ static void qede_set_tpa_param(struct qede_rx_queue *rxq)
/* This function allocates all memory needed per Rx queue */
static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
{
+ struct qed_chain_init_params params = {
+ .cnt_type = QED_CHAIN_CNT_TYPE_U16,
+ .num_elems = RX_RING_SIZE,
+ };
+ struct qed_dev *cdev = edev->cdev;
int i, rc, size;
rxq->num_rx_buffers = edev->q_num_rx_buffers;
@@ -1477,24 +1484,20 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
}
/* Allocate FW Rx ring */
- rc = edev->ops->common->chain_alloc(edev->cdev,
- QED_CHAIN_USE_TO_CONSUME_PRODUCE,
- QED_CHAIN_MODE_NEXT_PTR,
- QED_CHAIN_CNT_TYPE_U16,
- RX_RING_SIZE,
- sizeof(struct eth_rx_bd),
- &rxq->rx_bd_ring, NULL);
+ params.mode = QED_CHAIN_MODE_NEXT_PTR;
+ params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
+ params.elem_size = sizeof(struct eth_rx_bd);
+
+ rc = edev->ops->common->chain_alloc(cdev, &rxq->rx_bd_ring, &params);
if (rc)
goto err;
/* Allocate FW completion ring */
- rc = edev->ops->common->chain_alloc(edev->cdev,
- QED_CHAIN_USE_TO_CONSUME,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U16,
- RX_RING_SIZE,
- sizeof(union eth_rx_cqe),
- &rxq->rx_comp_ring, NULL);
+ params.mode = QED_CHAIN_MODE_PBL;
+ params.intended_use = QED_CHAIN_USE_TO_CONSUME;
+ params.elem_size = sizeof(union eth_rx_cqe);
+
+ rc = edev->ops->common->chain_alloc(cdev, &rxq->rx_comp_ring, &params);
if (rc)
goto err;
@@ -1531,7 +1534,13 @@ static void qede_free_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
/* This function allocates all memory needed per Tx queue */
static int qede_alloc_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
{
- union eth_tx_bd_types *p_virt;
+ struct qed_chain_init_params params = {
+ .mode = QED_CHAIN_MODE_PBL,
+ .intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U16,
+ .num_elems = edev->q_num_tx_buffers,
+ .elem_size = sizeof(union eth_tx_bd_types),
+ };
int size, rc;
txq->num_tx_buffers = edev->q_num_tx_buffers;
@@ -1549,13 +1558,7 @@ static int qede_alloc_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
goto err;
}
- rc = edev->ops->common->chain_alloc(edev->cdev,
- QED_CHAIN_USE_TO_CONSUME_PRODUCE,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U16,
- txq->num_tx_buffers,
- sizeof(*p_virt),
- &txq->tx_pbl, NULL);
+ rc = edev->ops->common->chain_alloc(edev->cdev, &txq->tx_pbl, &params);
if (rc)
goto err;
@@ -1711,6 +1714,7 @@ static void qede_init_fp(struct qede_dev *edev)
{
int queue_id, rxq_index = 0, txq_index = 0;
struct qede_fastpath *fp;
+ bool init_xdp = false;
for_each_queue(queue_id) {
fp = &edev->fp_array[queue_id];
@@ -1722,6 +1726,9 @@ static void qede_init_fp(struct qede_dev *edev)
fp->xdp_tx->index = QEDE_TXQ_IDX_TO_XDP(edev,
rxq_index);
fp->xdp_tx->is_xdp = 1;
+
+ spin_lock_init(&fp->xdp_tx->xdp_tx_lock);
+ init_xdp = true;
}
if (fp->type & QEDE_FASTPATH_RX) {
@@ -1737,6 +1744,13 @@ static void qede_init_fp(struct qede_dev *edev)
/* Driver have no error path from here */
WARN_ON(xdp_rxq_info_reg(&fp->rxq->xdp_rxq, edev->ndev,
fp->rxq->rxq_id) < 0);
+
+ if (xdp_rxq_info_reg_mem_model(&fp->rxq->xdp_rxq,
+ MEM_TYPE_PAGE_ORDER0,
+ NULL)) {
+ DP_NOTICE(edev,
+ "Failed to register XDP memory model\n");
+ }
}
if (fp->type & QEDE_FASTPATH_TX) {
@@ -1762,6 +1776,11 @@ static void qede_init_fp(struct qede_dev *edev)
snprintf(fp->name, sizeof(fp->name), "%s-fp-%d",
edev->ndev->name, queue_id);
}
+
+ if (init_xdp) {
+ edev->total_xdp_queues = QEDE_RSS_COUNT(edev);
+ DP_INFO(edev, "Total XDP queues: %u\n", edev->total_xdp_queues);
+ }
}
static int qede_set_real_num_queues(struct qede_dev *edev)
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 975f5c10ba47..d1da92ac7fbe 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -3622,6 +3622,7 @@ static void rtl_hw_start_8125a_1(struct rtl8169_private *tp)
rtl_ephy_init(tp, e_info_8125a_1);
rtl_hw_start_8125_common(tp);
+ rtl_hw_aspm_clkreq_enable(tp, true);
}
static void rtl_hw_start_8125a_2(struct rtl8169_private *tp)
@@ -3649,6 +3650,7 @@ static void rtl_hw_start_8125a_2(struct rtl8169_private *tp)
rtl_ephy_init(tp, e_info_8125a_2);
rtl_hw_start_8125_common(tp);
+ rtl_hw_aspm_clkreq_enable(tp, true);
}
static void rtl_hw_start_8125b(struct rtl8169_private *tp)
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index cb7b634a1150..fa7229fff2ff 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -22,6 +22,7 @@
#include <linux/jhash.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
+#include <net/udp_tunnel.h>
/* Hardware control for EF10 architecture including 'Huntington'. */
@@ -38,6 +39,7 @@ struct efx_ef10_vlan {
};
static int efx_ef10_set_udp_tnl_ports(struct efx_nic *efx, bool unloading);
+static const struct udp_tunnel_nic_info efx_ef10_udp_tunnels;
static int efx_ef10_get_warm_boot_count(struct efx_nic *efx)
{
@@ -564,6 +566,9 @@ static int efx_ef10_probe(struct efx_nic *efx)
goto fail2;
mutex_init(&nic_data->udp_tunnels_lock);
+ for (i = 0; i < ARRAY_SIZE(nic_data->udp_tunnels); ++i)
+ nic_data->udp_tunnels[i].type =
+ TUNNEL_ENCAP_UDP_PORT_ENTRY_INVALID;
/* Reset (most) configuration for this function */
rc = efx_mcdi_reset(efx, RESET_TYPE_ALL);
@@ -665,6 +670,12 @@ static int efx_ef10_probe(struct efx_nic *efx)
if (rc)
goto fail_add_vid_0;
+ if (nic_data->datapath_caps &
+ (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN) &&
+ efx->mcdi->fn_flags &
+ (1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_TRUSTED))
+ efx->net_dev->udp_tunnel_nic_info = &efx_ef10_udp_tunnels;
+
return 0;
fail_add_vid_0:
@@ -3702,8 +3713,8 @@ static int efx_ef10_set_udp_tnl_ports(struct efx_nic *efx, bool unloading)
MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_ENTRIES_MAXNUM);
for (i = 0; i < ARRAY_SIZE(nic_data->udp_tunnels); ++i) {
- if (nic_data->udp_tunnels[i].count &&
- nic_data->udp_tunnels[i].port) {
+ if (nic_data->udp_tunnels[i].type !=
+ TUNNEL_ENCAP_UDP_PORT_ENTRY_INVALID) {
efx_dword_t entry;
EFX_POPULATE_DWORD_2(entry,
@@ -3789,79 +3800,34 @@ static int efx_ef10_udp_tnl_push_ports(struct efx_nic *efx)
return rc;
}
-static struct efx_udp_tunnel *__efx_ef10_udp_tnl_lookup_port(struct efx_nic *efx,
- __be16 port)
+static int efx_ef10_udp_tnl_set_port(struct net_device *dev,
+ unsigned int table, unsigned int entry,
+ struct udp_tunnel_info *ti)
{
- struct efx_ef10_nic_data *nic_data = efx->nic_data;
- size_t i;
+ struct efx_nic *efx = netdev_priv(dev);
+ struct efx_ef10_nic_data *nic_data;
+ int efx_tunnel_type, rc;
- for (i = 0; i < ARRAY_SIZE(nic_data->udp_tunnels); ++i) {
- if (!nic_data->udp_tunnels[i].count)
- continue;
- if (nic_data->udp_tunnels[i].port == port)
- return &nic_data->udp_tunnels[i];
- }
- return NULL;
-}
-
-static int efx_ef10_udp_tnl_add_port(struct efx_nic *efx,
- struct efx_udp_tunnel tnl)
-{
- struct efx_ef10_nic_data *nic_data = efx->nic_data;
- struct efx_udp_tunnel *match;
- char typebuf[8];
- size_t i;
- int rc;
+ if (ti->type == UDP_TUNNEL_TYPE_VXLAN)
+ efx_tunnel_type = TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN;
+ else
+ efx_tunnel_type = TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE;
+ nic_data = efx->nic_data;
if (!(nic_data->datapath_caps &
(1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN)))
- return 0;
-
- efx_get_udp_tunnel_type_name(tnl.type, typebuf, sizeof(typebuf));
- netif_dbg(efx, drv, efx->net_dev, "Adding UDP tunnel (%s) port %d\n",
- typebuf, ntohs(tnl.port));
+ return -EOPNOTSUPP;
mutex_lock(&nic_data->udp_tunnels_lock);
/* Make sure all TX are stopped while we add to the table, else we
* might race against an efx_features_check().
*/
efx_device_detach_sync(efx);
-
- match = __efx_ef10_udp_tnl_lookup_port(efx, tnl.port);
- if (match != NULL) {
- if (match->type == tnl.type) {
- netif_dbg(efx, drv, efx->net_dev,
- "Referencing existing tunnel entry\n");
- match->count++;
- /* No need to cause an MCDI update */
- rc = 0;
- goto unlock_out;
- }
- efx_get_udp_tunnel_type_name(match->type,
- typebuf, sizeof(typebuf));
- netif_dbg(efx, drv, efx->net_dev,
- "UDP port %d is already in use by %s\n",
- ntohs(tnl.port), typebuf);
- rc = -EEXIST;
- goto unlock_out;
- }
-
- for (i = 0; i < ARRAY_SIZE(nic_data->udp_tunnels); ++i)
- if (!nic_data->udp_tunnels[i].count) {
- nic_data->udp_tunnels[i] = tnl;
- nic_data->udp_tunnels[i].count = 1;
- rc = efx_ef10_set_udp_tnl_ports(efx, false);
- goto unlock_out;
- }
-
- netif_dbg(efx, drv, efx->net_dev,
- "Unable to add UDP tunnel (%s) port %d; insufficient resources.\n",
- typebuf, ntohs(tnl.port));
-
- rc = -ENOMEM;
-
-unlock_out:
+ nic_data->udp_tunnels[entry].type = efx_tunnel_type;
+ nic_data->udp_tunnels[entry].port = ti->port;
+ rc = efx_ef10_set_udp_tnl_ports(efx, false);
mutex_unlock(&nic_data->udp_tunnels_lock);
+
return rc;
}
@@ -3873,6 +3839,7 @@ unlock_out:
static bool efx_ef10_udp_tnl_has_port(struct efx_nic *efx, __be16 port)
{
struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ size_t i;
if (!(nic_data->datapath_caps &
(1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN)))
@@ -3884,58 +3851,51 @@ static bool efx_ef10_udp_tnl_has_port(struct efx_nic *efx, __be16 port)
*/
return false;
- return __efx_ef10_udp_tnl_lookup_port(efx, port) != NULL;
+ for (i = 0; i < ARRAY_SIZE(nic_data->udp_tunnels); ++i)
+ if (nic_data->udp_tunnels[i].type !=
+ TUNNEL_ENCAP_UDP_PORT_ENTRY_INVALID &&
+ nic_data->udp_tunnels[i].port == port)
+ return true;
+
+ return false;
}
-static int efx_ef10_udp_tnl_del_port(struct efx_nic *efx,
- struct efx_udp_tunnel tnl)
+static int efx_ef10_udp_tnl_unset_port(struct net_device *dev,
+ unsigned int table, unsigned int entry,
+ struct udp_tunnel_info *ti)
{
- struct efx_ef10_nic_data *nic_data = efx->nic_data;
- struct efx_udp_tunnel *match;
- char typebuf[8];
+ struct efx_nic *efx = netdev_priv(dev);
+ struct efx_ef10_nic_data *nic_data;
int rc;
- if (!(nic_data->datapath_caps &
- (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN)))
- return 0;
-
- efx_get_udp_tunnel_type_name(tnl.type, typebuf, sizeof(typebuf));
- netif_dbg(efx, drv, efx->net_dev, "Removing UDP tunnel (%s) port %d\n",
- typebuf, ntohs(tnl.port));
+ nic_data = efx->nic_data;
mutex_lock(&nic_data->udp_tunnels_lock);
/* Make sure all TX are stopped while we remove from the table, else we
* might race against an efx_features_check().
*/
efx_device_detach_sync(efx);
-
- match = __efx_ef10_udp_tnl_lookup_port(efx, tnl.port);
- if (match != NULL) {
- if (match->type == tnl.type) {
- if (--match->count) {
- /* Port is still in use, so nothing to do */
- netif_dbg(efx, drv, efx->net_dev,
- "UDP tunnel port %d remains active\n",
- ntohs(tnl.port));
- rc = 0;
- goto out_unlock;
- }
- rc = efx_ef10_set_udp_tnl_ports(efx, false);
- goto out_unlock;
- }
- efx_get_udp_tunnel_type_name(match->type,
- typebuf, sizeof(typebuf));
- netif_warn(efx, drv, efx->net_dev,
- "UDP port %d is actually in use by %s, not removing\n",
- ntohs(tnl.port), typebuf);
- }
- rc = -ENOENT;
-
-out_unlock:
+ nic_data->udp_tunnels[entry].type = TUNNEL_ENCAP_UDP_PORT_ENTRY_INVALID;
+ nic_data->udp_tunnels[entry].port = 0;
+ rc = efx_ef10_set_udp_tnl_ports(efx, false);
mutex_unlock(&nic_data->udp_tunnels_lock);
+
return rc;
}
+static const struct udp_tunnel_nic_info efx_ef10_udp_tunnels = {
+ .set_port = efx_ef10_udp_tnl_set_port,
+ .unset_port = efx_ef10_udp_tnl_unset_port,
+ .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP,
+ .tables = {
+ {
+ .n_entries = 16,
+ .tunnel_types = UDP_TUNNEL_TYPE_VXLAN |
+ UDP_TUNNEL_TYPE_GENEVE,
+ },
+ },
+};
+
/* EF10 may have multiple datapath firmware variants within a
* single version. Report which variants are running.
*/
@@ -4172,9 +4132,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
.vlan_rx_add_vid = efx_ef10_vlan_rx_add_vid,
.vlan_rx_kill_vid = efx_ef10_vlan_rx_kill_vid,
.udp_tnl_push_ports = efx_ef10_udp_tnl_push_ports,
- .udp_tnl_add_port = efx_ef10_udp_tnl_add_port,
.udp_tnl_has_port = efx_ef10_udp_tnl_has_port,
- .udp_tnl_del_port = efx_ef10_udp_tnl_del_port,
#ifdef CONFIG_SFC_SRIOV
.sriov_configure = efx_ef10_sriov_configure,
.sriov_init = efx_ef10_sriov_init,
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index befd253af918..f16b4f236031 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -38,28 +38,6 @@
/**************************************************************************
*
- * Type name strings
- *
- **************************************************************************
- */
-
-/* UDP tunnel type names */
-static const char *const efx_udp_tunnel_type_names[] = {
- [TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN] = "vxlan",
- [TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE] = "geneve",
-};
-
-void efx_get_udp_tunnel_type_name(u16 type, char *buf, size_t buflen)
-{
- if (type < ARRAY_SIZE(efx_udp_tunnel_type_names) &&
- efx_udp_tunnel_type_names[type] != NULL)
- snprintf(buf, buflen, "%s", efx_udp_tunnel_type_names[type]);
- else
- snprintf(buf, buflen, "type %d", type);
-}
-
-/**************************************************************************
- *
* Configurable values
*
*************************************************************************/
@@ -612,52 +590,6 @@ static int efx_vlan_rx_kill_vid(struct net_device *net_dev, __be16 proto, u16 vi
return -EOPNOTSUPP;
}
-static int efx_udp_tunnel_type_map(enum udp_parsable_tunnel_type in)
-{
- switch (in) {
- case UDP_TUNNEL_TYPE_VXLAN:
- return TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN;
- case UDP_TUNNEL_TYPE_GENEVE:
- return TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE;
- default:
- return -1;
- }
-}
-
-static void efx_udp_tunnel_add(struct net_device *dev, struct udp_tunnel_info *ti)
-{
- struct efx_nic *efx = netdev_priv(dev);
- struct efx_udp_tunnel tnl;
- int efx_tunnel_type;
-
- efx_tunnel_type = efx_udp_tunnel_type_map(ti->type);
- if (efx_tunnel_type < 0)
- return;
-
- tnl.type = (u16)efx_tunnel_type;
- tnl.port = ti->port;
-
- if (efx->type->udp_tnl_add_port)
- (void)efx->type->udp_tnl_add_port(efx, tnl);
-}
-
-static void efx_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *ti)
-{
- struct efx_nic *efx = netdev_priv(dev);
- struct efx_udp_tunnel tnl;
- int efx_tunnel_type;
-
- efx_tunnel_type = efx_udp_tunnel_type_map(ti->type);
- if (efx_tunnel_type < 0)
- return;
-
- tnl.type = (u16)efx_tunnel_type;
- tnl.port = ti->port;
-
- if (efx->type->udp_tnl_del_port)
- (void)efx->type->udp_tnl_del_port(efx, tnl);
-}
-
static const struct net_device_ops efx_netdev_ops = {
.ndo_open = efx_net_open,
.ndo_stop = efx_net_stop,
@@ -685,8 +617,8 @@ static const struct net_device_ops efx_netdev_ops = {
#ifdef CONFIG_RFS_ACCEL
.ndo_rx_flow_steer = efx_filter_rfs,
#endif
- .ndo_udp_tunnel_add = efx_udp_tunnel_add,
- .ndo_udp_tunnel_del = efx_udp_tunnel_del,
+ .ndo_udp_tunnel_add = udp_tunnel_nic_add_port,
+ .ndo_udp_tunnel_del = udp_tunnel_nic_del_port,
.ndo_xdp_xmit = efx_xdp_xmit,
.ndo_bpf = efx_xdp
};
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 0bf11ebb03cf..786fda559976 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -609,8 +609,6 @@ extern const unsigned int efx_reset_type_max;
#define RESET_TYPE(type) \
STRING_TABLE_LOOKUP(type, efx_reset_type)
-void efx_get_udp_tunnel_type_name(u16 type, char *buf, size_t buflen);
-
enum efx_int_mode {
/* Be careful if altering to correct macro below */
EFX_INT_MODE_MSIX = 0,
@@ -1173,12 +1171,9 @@ struct efx_mtd_partition {
};
struct efx_udp_tunnel {
+#define TUNNEL_ENCAP_UDP_PORT_ENTRY_INVALID 0xffff
u16 type; /* TUNNEL_ENCAP_UDP_PORT_ENTRY_foo, see mcdi_pcol.h */
__be16 port;
- /* Count of repeated adds of the same port. Used only inside the list,
- * not in request arguments.
- */
- u16 count;
};
/**
@@ -1302,9 +1297,7 @@ struct efx_udp_tunnel {
* @tso_versions: Returns mask of firmware-assisted TSO versions supported.
* If %NULL, then device does not support any TSO version.
* @udp_tnl_push_ports: Push the list of UDP tunnel ports to the NIC if required.
- * @udp_tnl_add_port: Add a UDP tunnel port
* @udp_tnl_has_port: Check if a port has been added as UDP tunnel
- * @udp_tnl_del_port: Remove a UDP tunnel port
* @print_additional_fwver: Dump NIC-specific additional FW version info
* @revision: Hardware architecture revision
* @txd_ptr_tbl_base: TX descriptor ring base address
@@ -1474,9 +1467,7 @@ struct efx_nic_type {
int (*set_mac_address)(struct efx_nic *efx);
u32 (*tso_versions)(struct efx_nic *efx);
int (*udp_tnl_push_ports)(struct efx_nic *efx);
- int (*udp_tnl_add_port)(struct efx_nic *efx, struct efx_udp_tunnel tnl);
bool (*udp_tnl_has_port)(struct efx_nic *efx, __be16 port);
- int (*udp_tnl_del_port)(struct efx_nic *efx, struct efx_udp_tunnel tnl);
size_t (*print_additional_fwver)(struct efx_nic *efx, char *buf,
size_t len);
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index abda736e7c7d..2181d4538ab7 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -897,6 +897,7 @@ struct netvsc_ethtool_stats {
unsigned long rx_no_memory;
unsigned long stop_queue;
unsigned long wake_queue;
+ unsigned long vlan_error;
};
struct netvsc_ethtool_pcpu_stats {
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 6267f706e8ee..d27b90b4e2bb 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -605,6 +605,29 @@ static int netvsc_xmit(struct sk_buff *skb, struct net_device *net, bool xdp_tx)
*hash_info = hash;
}
+ /* When using AF_PACKET we need to drop VLAN header from
+ * the frame and update the SKB to allow the HOST OS
+ * to transmit the 802.1Q packet
+ */
+ if (skb->protocol == htons(ETH_P_8021Q)) {
+ u16 vlan_tci;
+
+ skb_reset_mac_header(skb);
+ if (eth_type_vlan(eth_hdr(skb)->h_proto)) {
+ if (unlikely(__skb_vlan_pop(skb, &vlan_tci) != 0)) {
+ ++net_device_ctx->eth_stats.vlan_error;
+ goto drop;
+ }
+
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
+ /* Update the NDIS header pkt lengths */
+ packet->total_data_buflen -= VLAN_HLEN;
+ packet->total_bytes -= VLAN_HLEN;
+ rndis_msg->msg_len = packet->total_data_buflen;
+ rndis_msg->msg.pkt.data_len = packet->total_data_buflen;
+ }
+ }
+
if (skb_vlan_tag_present(skb)) {
struct ndis_pkt_8021q_info *vlan;
@@ -1427,6 +1450,7 @@ static const struct {
{ "rx_no_memory", offsetof(struct netvsc_ethtool_stats, rx_no_memory) },
{ "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) },
{ "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) },
+ { "vlan_error", offsetof(struct netvsc_ethtool_stats, vlan_error) },
}, pcpu_stats[] = {
{ "cpu%u_rx_packets",
offsetof(struct netvsc_ethtool_pcpu_stats, rx_packets) },
@@ -1934,6 +1958,23 @@ syncvf:
return ret;
}
+static int netvsc_get_regs_len(struct net_device *netdev)
+{
+ return VRSS_SEND_TAB_SIZE * sizeof(u32);
+}
+
+static void netvsc_get_regs(struct net_device *netdev,
+ struct ethtool_regs *regs, void *p)
+{
+ struct net_device_context *ndc = netdev_priv(netdev);
+ u32 *regs_buff = p;
+
+ /* increase the version, if buffer format is changed. */
+ regs->version = 1;
+
+ memcpy(regs_buff, ndc->tx_table, VRSS_SEND_TAB_SIZE * sizeof(u32));
+}
+
static u32 netvsc_get_msglevel(struct net_device *ndev)
{
struct net_device_context *ndev_ctx = netdev_priv(ndev);
@@ -1950,6 +1991,8 @@ static void netvsc_set_msglevel(struct net_device *ndev, u32 val)
static const struct ethtool_ops ethtool_ops = {
.get_drvinfo = netvsc_get_drvinfo,
+ .get_regs_len = netvsc_get_regs_len,
+ .get_regs = netvsc_get_regs,
.get_msglevel = netvsc_get_msglevel,
.set_msglevel = netvsc_set_msglevel,
.get_link = ethtool_op_get_link,
diff --git a/drivers/net/phy/mdio-mux-gpio.c b/drivers/net/phy/mdio-mux-gpio.c
index 6c8960df43b0..10a758fdc9e6 100644
--- a/drivers/net/phy/mdio-mux-gpio.c
+++ b/drivers/net/phy/mdio-mux-gpio.c
@@ -42,25 +42,21 @@ static int mdio_mux_gpio_probe(struct platform_device *pdev)
struct gpio_descs *gpios;
int r;
- gpios = gpiod_get_array(&pdev->dev, NULL, GPIOD_OUT_LOW);
+ gpios = devm_gpiod_get_array(&pdev->dev, NULL, GPIOD_OUT_LOW);
if (IS_ERR(gpios))
return PTR_ERR(gpios);
s = devm_kzalloc(&pdev->dev, sizeof(*s), GFP_KERNEL);
- if (!s) {
- gpiod_put_array(gpios);
+ if (!s)
return -ENOMEM;
- }
s->gpios = gpios;
r = mdio_mux_init(&pdev->dev, pdev->dev.of_node,
mdio_mux_gpio_switch_fn, &s->mux_handle, s, NULL);
- if (r != 0) {
- gpiod_put_array(s->gpios);
+ if (r != 0)
return r;
- }
pdev->dev.platform_data = s;
return 0;
@@ -70,7 +66,6 @@ static int mdio_mux_gpio_remove(struct platform_device *pdev)
{
struct mdio_mux_gpio_state *s = dev_get_platdata(&pdev->dev);
mdio_mux_uninit(s->mux_handle);
- gpiod_put_array(s->gpios);
return 0;
}
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 49e98a092b96..1b9523595839 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -734,8 +734,8 @@ static int get_phy_c45_ids(struct mii_bus *bus, int addr,
/* Find first non-zero Devices In package. Device zero is reserved
* for 802.3 c45 complied PHYs, so don't probe it at first.
*/
- for (i = 1; i < MDIO_MMD_NUM && devs_in_pkg == 0 &&
- (devs_in_pkg & 0x1fffffff) == 0x1fffffff; i++) {
+ for (i = 1; i < MDIO_MMD_NUM && (devs_in_pkg == 0 ||
+ (devs_in_pkg & 0x1fffffff) == 0x1fffffff); i++) {
if (i == MDIO_MMD_VEND1 || i == MDIO_MMD_VEND2) {
/* Check that there is a device present at this
* address before reading the devices-in-package
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index dae6c8b51d7f..32b4bd6a5b55 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -43,6 +43,7 @@ struct phylink {
const struct phylink_mac_ops *mac_ops;
const struct phylink_pcs_ops *pcs_ops;
struct phylink_config *config;
+ struct phylink_pcs *pcs;
struct device *dev;
unsigned int old_link_state:1;
@@ -241,8 +242,10 @@ static int phylink_parse_fixedlink(struct phylink *pl,
phylink_set(pl->supported, MII);
phylink_set(pl->supported, Pause);
phylink_set(pl->supported, Asym_Pause);
+ phylink_set(pl->supported, Autoneg);
if (s) {
__set_bit(s->bit, pl->supported);
+ __set_bit(s->bit, pl->link_config.lp_advertising);
} else {
phylink_warn(pl, "fixed link %s duplex %dMbps not recognised\n",
pl->link_config.duplex == DUPLEX_FULL ? "full" : "half",
@@ -419,40 +422,102 @@ static void phylink_mac_config(struct phylink *pl,
pl->mac_ops->mac_config(pl->config, pl->cur_link_an_mode, state);
}
-static void phylink_mac_config_up(struct phylink *pl,
- const struct phylink_link_state *state)
-{
- if (state->link)
- phylink_mac_config(pl, state);
-}
-
static void phylink_mac_pcs_an_restart(struct phylink *pl)
{
if (pl->link_config.an_enabled &&
phy_interface_mode_is_8023z(pl->link_config.interface) &&
phylink_autoneg_inband(pl->cur_link_an_mode)) {
if (pl->pcs_ops)
- pl->pcs_ops->pcs_an_restart(pl->config);
+ pl->pcs_ops->pcs_an_restart(pl->pcs);
else
pl->mac_ops->mac_an_restart(pl->config);
}
}
-static void phylink_pcs_config(struct phylink *pl, bool force_restart,
- const struct phylink_link_state *state)
+static void phylink_major_config(struct phylink *pl, bool restart,
+ const struct phylink_link_state *state)
{
- bool restart = force_restart;
+ int err;
- if (pl->pcs_ops && pl->pcs_ops->pcs_config(pl->config,
- pl->cur_link_an_mode,
- state->interface,
- state->advertising))
- restart = true;
+ phylink_dbg(pl, "major config %s\n", phy_modes(state->interface));
+
+ if (pl->mac_ops->mac_prepare) {
+ err = pl->mac_ops->mac_prepare(pl->config, pl->cur_link_an_mode,
+ state->interface);
+ if (err < 0) {
+ phylink_err(pl, "mac_prepare failed: %pe\n",
+ ERR_PTR(err));
+ return;
+ }
+ }
phylink_mac_config(pl, state);
+ if (pl->pcs_ops) {
+ err = pl->pcs_ops->pcs_config(pl->pcs, pl->cur_link_an_mode,
+ state->interface,
+ state->advertising,
+ !!(pl->link_config.pause &
+ MLO_PAUSE_AN));
+ if (err < 0)
+ phylink_err(pl, "pcs_config failed: %pe\n",
+ ERR_PTR(err));
+ if (err > 0)
+ restart = true;
+ }
if (restart)
phylink_mac_pcs_an_restart(pl);
+
+ if (pl->mac_ops->mac_finish) {
+ err = pl->mac_ops->mac_finish(pl->config, pl->cur_link_an_mode,
+ state->interface);
+ if (err < 0)
+ phylink_err(pl, "mac_prepare failed: %pe\n",
+ ERR_PTR(err));
+ }
+}
+
+/*
+ * Reconfigure for a change of inband advertisement.
+ * If we have a separate PCS, we only need to call its pcs_config() method,
+ * and then restart AN if it indicates something changed. Otherwise, we do
+ * the full MAC reconfiguration.
+ */
+static int phylink_change_inband_advert(struct phylink *pl)
+{
+ int ret;
+
+ if (test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state))
+ return 0;
+
+ if (!pl->pcs_ops) {
+ /* Legacy method */
+ phylink_mac_config(pl, &pl->link_config);
+ phylink_mac_pcs_an_restart(pl);
+ return 0;
+ }
+
+ phylink_dbg(pl, "%s: mode=%s/%s adv=%*pb pause=%02x\n", __func__,
+ phylink_an_mode_str(pl->cur_link_an_mode),
+ phy_modes(pl->link_config.interface),
+ __ETHTOOL_LINK_MODE_MASK_NBITS, pl->link_config.advertising,
+ pl->link_config.pause);
+
+ /* Modern PCS-based method; update the advert at the PCS, and
+ * restart negotiation if the pcs_config() helper indicates that
+ * the programmed advertisement has changed.
+ */
+ ret = pl->pcs_ops->pcs_config(pl->pcs, pl->cur_link_an_mode,
+ pl->link_config.interface,
+ pl->link_config.advertising,
+ !!(pl->link_config.pause & MLO_PAUSE_AN));
+ if (ret < 0)
+ return ret;
+
+ if (ret > 0)
+ phylink_mac_pcs_an_restart(pl);
+
+ return 0;
}
static void phylink_mac_pcs_get_state(struct phylink *pl,
@@ -469,7 +534,7 @@ static void phylink_mac_pcs_get_state(struct phylink *pl,
state->link = 1;
if (pl->pcs_ops)
- pl->pcs_ops->pcs_get_state(pl->config, state);
+ pl->pcs_ops->pcs_get_state(pl->pcs, state);
else
pl->mac_ops->mac_pcs_get_state(pl->config, state);
}
@@ -515,7 +580,7 @@ static void phylink_mac_initial_config(struct phylink *pl, bool force_restart)
link_state.link = false;
phylink_apply_manual_flow(pl, &link_state);
- phylink_pcs_config(pl, force_restart, &link_state);
+ phylink_major_config(pl, force_restart, &link_state);
}
static const char *phylink_pause_to_str(int pause)
@@ -540,7 +605,7 @@ static void phylink_link_up(struct phylink *pl,
pl->cur_interface = link_state.interface;
if (pl->pcs_ops && pl->pcs_ops->pcs_link_up)
- pl->pcs_ops->pcs_link_up(pl->config, pl->cur_link_an_mode,
+ pl->pcs_ops->pcs_link_up(pl->pcs, pl->cur_link_an_mode,
pl->cur_interface,
link_state.speed, link_state.duplex);
@@ -576,9 +641,15 @@ static void phylink_resolve(struct work_struct *w)
struct phylink *pl = container_of(w, struct phylink, resolve);
struct phylink_link_state link_state;
struct net_device *ndev = pl->netdev;
- int link_changed;
+ bool mac_config = false;
+ bool cur_link_state;
mutex_lock(&pl->state_mutex);
+ if (pl->netdev)
+ cur_link_state = netif_carrier_ok(ndev);
+ else
+ cur_link_state = pl->old_link_state;
+
if (pl->phylink_disable_state) {
pl->mac_link_dropped = false;
link_state.link = false;
@@ -589,12 +660,12 @@ static void phylink_resolve(struct work_struct *w)
case MLO_AN_PHY:
link_state = pl->phy_state;
phylink_apply_manual_flow(pl, &link_state);
- phylink_mac_config_up(pl, &link_state);
+ mac_config = link_state.link;
break;
case MLO_AN_FIXED:
phylink_get_fixed_state(pl, &link_state);
- phylink_mac_config_up(pl, &link_state);
+ mac_config = link_state.link;
break;
case MLO_AN_INBAND:
@@ -612,21 +683,36 @@ static void phylink_resolve(struct work_struct *w)
/* If we have a PHY, we need to update with
* the PHY flow control bits. */
link_state.pause = pl->phy_state.pause;
- phylink_apply_manual_flow(pl, &link_state);
- phylink_mac_config(pl, &link_state);
- } else {
- phylink_apply_manual_flow(pl, &link_state);
+ mac_config = true;
}
+ phylink_apply_manual_flow(pl, &link_state);
break;
}
}
- if (pl->netdev)
- link_changed = (link_state.link != netif_carrier_ok(ndev));
- else
- link_changed = (link_state.link != pl->old_link_state);
+ if (mac_config) {
+ if (link_state.interface != pl->link_config.interface) {
+ /* The interface has changed, force the link down and
+ * then reconfigure.
+ */
+ if (cur_link_state) {
+ phylink_link_down(pl);
+ cur_link_state = false;
+ }
+ phylink_major_config(pl, false, &link_state);
+ pl->link_config.interface = link_state.interface;
+ } else if (!pl->pcs_ops) {
+ /* The interface remains unchanged, only the speed,
+ * duplex or pause settings have changed. Call the
+ * old mac_config() method to configure the MAC/PCS
+ * only if we do not have a PCS installed (an
+ * unconverted user.)
+ */
+ phylink_mac_config(pl, &link_state);
+ }
+ }
- if (link_changed) {
+ if (link_state.link != cur_link_state) {
pl->old_link_state = link_state.link;
if (!link_state.link)
phylink_link_down(pl);
@@ -778,11 +864,26 @@ struct phylink *phylink_create(struct phylink_config *config,
}
EXPORT_SYMBOL_GPL(phylink_create);
-void phylink_add_pcs(struct phylink *pl, const struct phylink_pcs_ops *ops)
+/**
+ * phylink_set_pcs() - set the current PCS for phylink to use
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ * @pcs: a pointer to the &struct phylink_pcs
+ *
+ * Bind the MAC PCS to phylink. This may be called after phylink_create(),
+ * in mac_prepare() or mac_config() methods if it is desired to dynamically
+ * change the PCS.
+ *
+ * Please note that there are behavioural changes with the mac_config()
+ * callback if a PCS is present (denoting a newer setup) so removing a PCS
+ * is not supported, and if a PCS is going to be used, it must be registered
+ * by calling phylink_set_pcs() at the latest in the first mac_config() call.
+ */
+void phylink_set_pcs(struct phylink *pl, struct phylink_pcs *pcs)
{
- pl->pcs_ops = ops;
+ pl->pcs = pcs;
+ pl->pcs_ops = pcs->ops;
}
-EXPORT_SYMBOL_GPL(phylink_add_pcs);
+EXPORT_SYMBOL_GPL(phylink_set_pcs);
/**
* phylink_destroy() - cleanup and destroy the phylink instance
@@ -1127,6 +1228,8 @@ void phylink_start(struct phylink *pl)
break;
case MLO_AN_INBAND:
poll |= pl->config->pcs_poll;
+ if (pl->pcs)
+ poll |= pl->pcs->poll;
break;
}
if (poll)
@@ -1296,27 +1399,46 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
const struct ethtool_link_ksettings *kset)
{
__ETHTOOL_DECLARE_LINK_MODE_MASK(support);
- struct ethtool_link_ksettings our_kset;
struct phylink_link_state config;
- int ret;
+ const struct phy_setting *s;
ASSERT_RTNL();
- if (kset->base.autoneg != AUTONEG_DISABLE &&
- kset->base.autoneg != AUTONEG_ENABLE)
- return -EINVAL;
+ if (pl->phydev) {
+ /* We can rely on phylib for this update; we also do not need
+ * to update the pl->link_config settings:
+ * - the configuration returned via ksettings_get() will come
+ * from phylib whenever a PHY is present.
+ * - link_config.interface will be updated by the PHY calling
+ * back via phylink_phy_change() and a subsequent resolve.
+ * - initial link configuration for PHY mode comes from the
+ * last phy state updated via phylink_phy_change().
+ * - other configuration changes (e.g. pause modes) are
+ * performed directly via phylib.
+ * - if in in-band mode with a PHY, the link configuration
+ * is passed on the link from the PHY, and all of
+ * link_config.{speed,duplex,an_enabled,pause} are not used.
+ * - the only possible use would be link_config.advertising
+ * pause modes when in 1000base-X mode with a PHY, but in
+ * the presence of a PHY, this should not be changed as that
+ * should be determined from the media side advertisement.
+ */
+ return phy_ethtool_ksettings_set(pl->phydev, kset);
+ }
linkmode_copy(support, pl->supported);
config = pl->link_config;
+ config.an_enabled = kset->base.autoneg == AUTONEG_ENABLE;
- /* Mask out unsupported advertisements */
+ /* Mask out unsupported advertisements, and force the autoneg bit */
linkmode_and(config.advertising, kset->link_modes.advertising,
support);
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising,
+ config.an_enabled);
/* FIXME: should we reject autoneg if phy/mac does not support it? */
- if (kset->base.autoneg == AUTONEG_DISABLE) {
- const struct phy_setting *s;
-
+ switch (kset->base.autoneg) {
+ case AUTONEG_DISABLE:
/* Autonegotiation disabled, select a suitable speed and
* duplex.
*/
@@ -1325,90 +1447,73 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
if (!s)
return -EINVAL;
- /* If we have a fixed link (as specified by firmware), refuse
- * to change link parameters.
+ /* If we have a fixed link, refuse to change link parameters.
+ * If the link parameters match, accept them but do nothing.
*/
- if (pl->cur_link_an_mode == MLO_AN_FIXED &&
- (s->speed != pl->link_config.speed ||
- s->duplex != pl->link_config.duplex))
- return -EINVAL;
+ if (pl->cur_link_an_mode == MLO_AN_FIXED) {
+ if (s->speed != pl->link_config.speed ||
+ s->duplex != pl->link_config.duplex)
+ return -EINVAL;
+ return 0;
+ }
config.speed = s->speed;
config.duplex = s->duplex;
- config.an_enabled = false;
+ break;
- __clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising);
- } else {
- /* If we have a fixed link, refuse to enable autonegotiation */
- if (pl->cur_link_an_mode == MLO_AN_FIXED)
- return -EINVAL;
+ case AUTONEG_ENABLE:
+ /* If we have a fixed link, allow autonegotiation (since that
+ * is our default case) but do not allow the advertisement to
+ * be changed. If the advertisement matches, simply return.
+ */
+ if (pl->cur_link_an_mode == MLO_AN_FIXED) {
+ if (!linkmode_equal(config.advertising,
+ pl->link_config.advertising))
+ return -EINVAL;
+ return 0;
+ }
config.speed = SPEED_UNKNOWN;
config.duplex = DUPLEX_UNKNOWN;
- config.an_enabled = true;
+ break;
- __set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising);
+ default:
+ return -EINVAL;
}
- if (pl->phydev) {
- /* If we have a PHY, we process the kset change via phylib.
- * phylib will call our link state function if the PHY
- * parameters have changed, which will trigger a resolve
- * and update the MAC configuration.
- */
- our_kset = *kset;
- linkmode_copy(our_kset.link_modes.advertising,
- config.advertising);
- our_kset.base.speed = config.speed;
- our_kset.base.duplex = config.duplex;
+ /* We have ruled out the case with a PHY attached, and the
+ * fixed-link cases. All that is left are in-band links.
+ */
+ if (phylink_validate(pl, support, &config))
+ return -EINVAL;
- ret = phy_ethtool_ksettings_set(pl->phydev, &our_kset);
- if (ret)
- return ret;
+ /* If autonegotiation is enabled, we must have an advertisement */
+ if (config.an_enabled && phylink_is_empty_linkmode(config.advertising))
+ return -EINVAL;
- mutex_lock(&pl->state_mutex);
- /* Save the new configuration */
- linkmode_copy(pl->link_config.advertising,
- our_kset.link_modes.advertising);
+ mutex_lock(&pl->state_mutex);
+ pl->link_config.speed = config.speed;
+ pl->link_config.duplex = config.duplex;
+ pl->link_config.an_enabled = config.an_enabled;
+
+ if (pl->link_config.interface != config.interface) {
+ /* The interface changed, e.g. 1000base-X <-> 2500base-X */
+ /* We need to force the link down, then change the interface */
+ if (pl->old_link_state) {
+ phylink_link_down(pl);
+ pl->old_link_state = false;
+ }
+ if (!test_bit(PHYLINK_DISABLE_STOPPED,
+ &pl->phylink_disable_state))
+ phylink_major_config(pl, false, &config);
pl->link_config.interface = config.interface;
- pl->link_config.speed = our_kset.base.speed;
- pl->link_config.duplex = our_kset.base.duplex;
- pl->link_config.an_enabled = our_kset.base.autoneg !=
- AUTONEG_DISABLE;
- mutex_unlock(&pl->state_mutex);
- } else {
- /* For a fixed link, this isn't able to change any parameters,
- * which just leaves inband mode.
- */
- if (phylink_validate(pl, support, &config))
- return -EINVAL;
-
- /* If autonegotiation is enabled, we must have an advertisement */
- if (config.an_enabled &&
- phylink_is_empty_linkmode(config.advertising))
- return -EINVAL;
-
- mutex_lock(&pl->state_mutex);
linkmode_copy(pl->link_config.advertising, config.advertising);
- pl->link_config.interface = config.interface;
- pl->link_config.speed = config.speed;
- pl->link_config.duplex = config.duplex;
- pl->link_config.an_enabled = kset->base.autoneg !=
- AUTONEG_DISABLE;
-
- if (pl->cur_link_an_mode == MLO_AN_INBAND &&
- !test_bit(PHYLINK_DISABLE_STOPPED,
- &pl->phylink_disable_state)) {
- /* If in 802.3z mode, this updates the advertisement.
- *
- * If we are in SGMII mode without a PHY, there is no
- * advertisement; the only thing we have is the pause
- * modes which can only come from a PHY.
- */
- phylink_pcs_config(pl, true, &pl->link_config);
- }
- mutex_unlock(&pl->state_mutex);
+ } else if (!linkmode_equal(pl->link_config.advertising,
+ config.advertising)) {
+ linkmode_copy(pl->link_config.advertising, config.advertising);
+ phylink_change_inband_advert(pl);
}
+ mutex_unlock(&pl->state_mutex);
return 0;
}
@@ -1511,9 +1616,11 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl,
config->pause = pause_state;
- if (!pl->phydev && !test_bit(PHYLINK_DISABLE_STOPPED,
- &pl->phylink_disable_state))
- phylink_pcs_config(pl, true, &pl->link_config);
+ /* Update our in-band advertisement, triggering a renegotiation if
+ * the advertisement changed.
+ */
+ if (!pl->phydev)
+ phylink_change_inband_advert(pl);
mutex_unlock(&pl->state_mutex);
@@ -2336,6 +2443,43 @@ int phylink_mii_c22_pcs_set_advertisement(struct mdio_device *pcs,
EXPORT_SYMBOL_GPL(phylink_mii_c22_pcs_set_advertisement);
/**
+ * phylink_mii_c22_pcs_config() - configure clause 22 PCS
+ * @pcs: a pointer to a &struct mdio_device.
+ * @mode: link autonegotiation mode
+ * @interface: the PHY interface mode being configured
+ * @advertising: the ethtool advertisement mask
+ *
+ * Configure a Clause 22 PCS PHY with the appropriate negotiation
+ * parameters for the @mode, @interface and @advertising parameters.
+ * Returns negative error number on failure, zero if the advertisement
+ * has not changed, or positive if there is a change.
+ */
+int phylink_mii_c22_pcs_config(struct mdio_device *pcs, unsigned int mode,
+ phy_interface_t interface,
+ const unsigned long *advertising)
+{
+ bool changed;
+ u16 bmcr;
+ int ret;
+
+ ret = phylink_mii_c22_pcs_set_advertisement(pcs, interface,
+ advertising);
+ if (ret < 0)
+ return ret;
+
+ changed = ret > 0;
+
+ bmcr = mode == MLO_AN_INBAND ? BMCR_ANENABLE : 0;
+ ret = mdiobus_modify(pcs->bus, pcs->addr, MII_BMCR,
+ BMCR_ANENABLE, bmcr);
+ if (ret < 0)
+ return ret;
+
+ return changed ? 1 : 0;
+}
+EXPORT_SYMBOL_GPL(phylink_mii_c22_pcs_config);
+
+/**
* phylink_mii_c22_pcs_an_restart() - restart 802.3z autonegotiation
* @pcs: a pointer to a &struct mdio_device.
*
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 46599606ff10..60c1aadece89 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -260,52 +260,6 @@ static void vrf_map_unlock(struct vrf_map *vmap) __releases(&vmap->vmap_lock)
spin_unlock(&vmap->vmap_lock);
}
-static bool vrf_strict_mode(struct vrf_map *vmap)
-{
- bool strict_mode;
-
- vrf_map_lock(vmap);
- strict_mode = vmap->strict_mode;
- vrf_map_unlock(vmap);
-
- return strict_mode;
-}
-
-static int vrf_strict_mode_change(struct vrf_map *vmap, bool new_mode)
-{
- bool *cur_mode;
- int res = 0;
-
- vrf_map_lock(vmap);
-
- cur_mode = &vmap->strict_mode;
- if (*cur_mode == new_mode)
- goto unlock;
-
- if (*cur_mode) {
- /* disable strict mode */
- *cur_mode = false;
- } else {
- if (vmap->shared_tables) {
- /* we cannot allow strict_mode because there are some
- * vrfs that share one or more tables.
- */
- res = -EBUSY;
- goto unlock;
- }
-
- /* no tables are shared among vrfs, so we can go back
- * to 1:1 association between a vrf with its table.
- */
- *cur_mode = true;
- }
-
-unlock:
- vrf_map_unlock(vmap);
-
- return res;
-}
-
/* called with rtnl lock held */
static int
vrf_map_register_dev(struct net_device *dev, struct netlink_ext_ack *extack)
@@ -1790,6 +1744,53 @@ static int vrf_map_init(struct vrf_map *vmap)
return 0;
}
+#ifdef CONFIG_SYSCTL
+static bool vrf_strict_mode(struct vrf_map *vmap)
+{
+ bool strict_mode;
+
+ vrf_map_lock(vmap);
+ strict_mode = vmap->strict_mode;
+ vrf_map_unlock(vmap);
+
+ return strict_mode;
+}
+
+static int vrf_strict_mode_change(struct vrf_map *vmap, bool new_mode)
+{
+ bool *cur_mode;
+ int res = 0;
+
+ vrf_map_lock(vmap);
+
+ cur_mode = &vmap->strict_mode;
+ if (*cur_mode == new_mode)
+ goto unlock;
+
+ if (*cur_mode) {
+ /* disable strict mode */
+ *cur_mode = false;
+ } else {
+ if (vmap->shared_tables) {
+ /* we cannot allow strict_mode because there are some
+ * vrfs that share one or more tables.
+ */
+ res = -EBUSY;
+ goto unlock;
+ }
+
+ /* no tables are shared among vrfs, so we can go back
+ * to 1:1 association between a vrf with its table.
+ */
+ *cur_mode = true;
+ }
+
+unlock:
+ vrf_map_unlock(vmap);
+
+ return res;
+}
+
static int vrf_shared_table_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
@@ -1830,15 +1831,9 @@ static const struct ctl_table vrf_table[] = {
{ },
};
-/* Initialize per network namespace state */
-static int __net_init vrf_netns_init(struct net *net)
+static int vrf_netns_init_sysctl(struct net *net, struct netns_vrf *nn_vrf)
{
- struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id);
struct ctl_table *table;
- int res;
-
- nn_vrf->add_fib_rules = true;
- vrf_map_init(&nn_vrf->vmap);
table = kmemdup(vrf_table, sizeof(vrf_table), GFP_KERNEL);
if (!table)
@@ -1849,19 +1844,14 @@ static int __net_init vrf_netns_init(struct net *net)
nn_vrf->ctl_hdr = register_net_sysctl(net, "net/vrf", table);
if (!nn_vrf->ctl_hdr) {
- res = -ENOMEM;
- goto free_table;
+ kfree(table);
+ return -ENOMEM;
}
return 0;
-
-free_table:
- kfree(table);
-
- return res;
}
-static void __net_exit vrf_netns_exit(struct net *net)
+static void vrf_netns_exit_sysctl(struct net *net)
{
struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id);
struct ctl_table *table;
@@ -1870,6 +1860,32 @@ static void __net_exit vrf_netns_exit(struct net *net)
unregister_net_sysctl_table(nn_vrf->ctl_hdr);
kfree(table);
}
+#else
+static int vrf_netns_init_sysctl(struct net *net, struct netns_vrf *nn_vrf)
+{
+ return 0;
+}
+
+static void vrf_netns_exit_sysctl(struct net *net)
+{
+}
+#endif
+
+/* Initialize per network namespace state */
+static int __net_init vrf_netns_init(struct net *net)
+{
+ struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id);
+
+ nn_vrf->add_fib_rules = true;
+ vrf_map_init(&nn_vrf->vmap);
+
+ return vrf_netns_init_sysctl(net, nn_vrf);
+}
+
+static void __net_exit vrf_netns_exit(struct net *net)
+{
+ vrf_netns_exit_sysctl(net);
+}
static struct pernet_operations vrf_net_ops __net_initdata = {
.init = vrf_netns_init,
diff --git a/include/linux/bpf-netns.h b/include/linux/bpf-netns.h
index 47d5b0c708c9..722f799c1a2e 100644
--- a/include/linux/bpf-netns.h
+++ b/include/linux/bpf-netns.h
@@ -8,6 +8,7 @@
enum netns_bpf_attach_type {
NETNS_BPF_INVALID = -1,
NETNS_BPF_FLOW_DISSECTOR = 0,
+ NETNS_BPF_SK_LOOKUP,
MAX_NETNS_BPF_ATTACH_TYPE
};
@@ -17,6 +18,8 @@ to_netns_bpf_attach_type(enum bpf_attach_type attach_type)
switch (attach_type) {
case BPF_FLOW_DISSECTOR:
return NETNS_BPF_FLOW_DISSECTOR;
+ case BPF_SK_LOOKUP:
+ return NETNS_BPF_SK_LOOKUP;
default:
return NETNS_BPF_INVALID;
}
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c67c88ad35f8..bae557ff2da8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -249,6 +249,7 @@ enum bpf_arg_type {
ARG_PTR_TO_INT, /* pointer to int */
ARG_PTR_TO_LONG, /* pointer to long */
ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */
+ ARG_PTR_TO_SOCKET_OR_NULL, /* pointer to bpf_sock (fullsock) or NULL */
ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */
ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */
ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */
@@ -667,6 +668,7 @@ struct bpf_jit_poke_descriptor {
struct bpf_ctx_arg_aux {
u32 offset;
enum bpf_reg_type reg_type;
+ u32 btf_id;
};
struct bpf_prog_aux {
@@ -928,6 +930,9 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs,
void bpf_prog_array_delete_safe(struct bpf_prog_array *progs,
struct bpf_prog *old_prog);
+int bpf_prog_array_delete_safe_at(struct bpf_prog_array *array, int index);
+int bpf_prog_array_update_at(struct bpf_prog_array *array, int index,
+ struct bpf_prog *prog);
int bpf_prog_array_copy_info(struct bpf_prog_array *array,
u32 *prog_ids, u32 request_cnt,
u32 *prog_cnt);
@@ -1272,6 +1277,7 @@ struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
void __cpu_map_flush(void);
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx);
+bool cpu_map_prog_allowed(struct bpf_map *map);
/* Return map's numa specified by userspace */
static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
@@ -1432,6 +1438,11 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
return 0;
}
+static inline bool cpu_map_prog_allowed(struct bpf_map *map)
+{
+ return false;
+}
+
static inline struct bpf_prog *bpf_prog_get_type_path(const char *name,
enum bpf_prog_type type)
{
@@ -1531,7 +1542,6 @@ static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr);
void bpf_map_offload_map_free(struct bpf_map *map);
-void init_btf_sock_ids(struct btf *btf);
#else
static inline int bpf_prog_offload_init(struct bpf_prog *prog,
union bpf_attr *attr)
@@ -1557,9 +1567,6 @@ static inline struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
static inline void bpf_map_offload_map_free(struct bpf_map *map)
{
}
-static inline void init_btf_sock_ids(struct btf *btf)
-{
-}
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
#if defined(CONFIG_BPF_STREAM_PARSER)
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index a18ae82a298a..a52a5688418e 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -64,6 +64,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2,
#ifdef CONFIG_INET
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport,
struct sk_reuseport_md, struct sk_reuseport_kern)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SK_LOOKUP, sk_lookup,
+ struct bpf_sk_lookup, struct bpf_sk_lookup_kern)
#endif
#if defined(CONFIG_BPF_JIT)
BPF_PROG_TYPE(BPF_PROG_TYPE_STRUCT_OPS, bpf_struct_ops,
diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h
index 9b114c718a76..2ae3c8e1d83c 100644
--- a/include/linux/bpfilter.h
+++ b/include/linux/bpfilter.h
@@ -4,9 +4,10 @@
#include <uapi/linux/bpfilter.h>
#include <linux/usermode_driver.h>
+#include <linux/sockptr.h>
struct sock;
-int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval,
+int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval,
unsigned int optlen);
int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
int __user *optlen);
@@ -16,8 +17,7 @@ struct bpfilter_umh_ops {
struct umd_info info;
/* since ip_getsockopt() can run in parallel, serialize access to umh */
struct mutex lock;
- int (*sockopt)(struct sock *sk, int optname,
- char __user *optval,
+ int (*sockopt)(struct sock *sk, int optname, sockptr_t optval,
unsigned int optlen, bool is_set);
int (*start)(void);
};
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 1cdb56950ffe..4867d549e3c1 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -57,17 +57,20 @@ asm( \
* .zero 4
*
*/
-#define __BTF_ID_LIST(name) \
+#define __BTF_ID_LIST(name, scope) \
asm( \
".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
-".local " #name "; \n" \
+"." #scope " " #name "; \n" \
#name ":; \n" \
".popsection; \n"); \
#define BTF_ID_LIST(name) \
-__BTF_ID_LIST(name) \
+__BTF_ID_LIST(name, local) \
extern u32 name[];
+#define BTF_ID_LIST_GLOBAL(name) \
+__BTF_ID_LIST(name, globl)
+
/*
* The BTF_ID_UNUSED macro defines 4 zero bytes.
* It's used when we want to define 'unused' entry
@@ -90,7 +93,38 @@ asm( \
#define BTF_ID_LIST(name) static u32 name[5];
#define BTF_ID(prefix, name)
#define BTF_ID_UNUSED
+#define BTF_ID_LIST_GLOBAL(name) u32 name[1];
#endif /* CONFIG_DEBUG_INFO_BTF */
+#ifdef CONFIG_NET
+/* Define a list of socket types which can be the argument for
+ * skc_to_*_sock() helpers. All these sockets should have
+ * sock_common as the first argument in its memory layout.
+ */
+#define BTF_SOCK_TYPE_xxx \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, inet_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, inet_connection_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, inet_request_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, inet_timewait_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, request_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, sock_common) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, tcp_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, tcp_request_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
+
+enum {
+#define BTF_SOCK_TYPE(name, str) name,
+BTF_SOCK_TYPE_xxx
+#undef BTF_SOCK_TYPE
+MAX_BTF_SOCK_TYPE,
+};
+
+extern u32 btf_sock_ids[];
+#endif
+
#endif
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4d049c8e1fbe..d07a6e973a7d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -20,6 +20,7 @@
#include <linux/kallsyms.h>
#include <linux/if_vlan.h>
#include <linux/vmalloc.h>
+#include <linux/sockptr.h>
#include <crypto/sha.h>
#include <net/sch_generic.h>
@@ -1276,6 +1277,153 @@ struct bpf_sockopt_kern {
s32 retval;
};
-int copy_bpf_fprog_from_user(struct sock_fprog *dst, void __user *src, int len);
+int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);
+
+struct bpf_sk_lookup_kern {
+ u16 family;
+ u16 protocol;
+ struct {
+ __be32 saddr;
+ __be32 daddr;
+ } v4;
+ struct {
+ const struct in6_addr *saddr;
+ const struct in6_addr *daddr;
+ } v6;
+ __be16 sport;
+ u16 dport;
+ struct sock *selected_sk;
+ bool no_reuseport;
+};
+
+extern struct static_key_false bpf_sk_lookup_enabled;
+
+/* Runners for BPF_SK_LOOKUP programs to invoke on socket lookup.
+ *
+ * Allowed return values for a BPF SK_LOOKUP program are SK_PASS and
+ * SK_DROP. Their meaning is as follows:
+ *
+ * SK_PASS && ctx.selected_sk != NULL: use selected_sk as lookup result
+ * SK_PASS && ctx.selected_sk == NULL: continue to htable-based socket lookup
+ * SK_DROP : terminate lookup with -ECONNREFUSED
+ *
+ * This macro aggregates return values and selected sockets from
+ * multiple BPF programs according to following rules in order:
+ *
+ * 1. If any program returned SK_PASS and a non-NULL ctx.selected_sk,
+ * macro result is SK_PASS and last ctx.selected_sk is used.
+ * 2. If any program returned SK_DROP return value,
+ * macro result is SK_DROP.
+ * 3. Otherwise result is SK_PASS and ctx.selected_sk is NULL.
+ *
+ * Caller must ensure that the prog array is non-NULL, and that the
+ * array as well as the programs it contains remain valid.
+ */
+#define BPF_PROG_SK_LOOKUP_RUN_ARRAY(array, ctx, func) \
+ ({ \
+ struct bpf_sk_lookup_kern *_ctx = &(ctx); \
+ struct bpf_prog_array_item *_item; \
+ struct sock *_selected_sk = NULL; \
+ bool _no_reuseport = false; \
+ struct bpf_prog *_prog; \
+ bool _all_pass = true; \
+ u32 _ret; \
+ \
+ migrate_disable(); \
+ _item = &(array)->items[0]; \
+ while ((_prog = READ_ONCE(_item->prog))) { \
+ /* restore most recent selection */ \
+ _ctx->selected_sk = _selected_sk; \
+ _ctx->no_reuseport = _no_reuseport; \
+ \
+ _ret = func(_prog, _ctx); \
+ if (_ret == SK_PASS && _ctx->selected_sk) { \
+ /* remember last non-NULL socket */ \
+ _selected_sk = _ctx->selected_sk; \
+ _no_reuseport = _ctx->no_reuseport; \
+ } else if (_ret == SK_DROP && _all_pass) { \
+ _all_pass = false; \
+ } \
+ _item++; \
+ } \
+ _ctx->selected_sk = _selected_sk; \
+ _ctx->no_reuseport = _no_reuseport; \
+ migrate_enable(); \
+ _all_pass || _selected_sk ? SK_PASS : SK_DROP; \
+ })
+
+static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol,
+ const __be32 saddr, const __be16 sport,
+ const __be32 daddr, const u16 dport,
+ struct sock **psk)
+{
+ struct bpf_prog_array *run_array;
+ struct sock *selected_sk = NULL;
+ bool no_reuseport = false;
+
+ rcu_read_lock();
+ run_array = rcu_dereference(net->bpf.run_array[NETNS_BPF_SK_LOOKUP]);
+ if (run_array) {
+ struct bpf_sk_lookup_kern ctx = {
+ .family = AF_INET,
+ .protocol = protocol,
+ .v4.saddr = saddr,
+ .v4.daddr = daddr,
+ .sport = sport,
+ .dport = dport,
+ };
+ u32 act;
+
+ act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN);
+ if (act == SK_PASS) {
+ selected_sk = ctx.selected_sk;
+ no_reuseport = ctx.no_reuseport;
+ } else {
+ selected_sk = ERR_PTR(-ECONNREFUSED);
+ }
+ }
+ rcu_read_unlock();
+ *psk = selected_sk;
+ return no_reuseport;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
+ const struct in6_addr *saddr,
+ const __be16 sport,
+ const struct in6_addr *daddr,
+ const u16 dport,
+ struct sock **psk)
+{
+ struct bpf_prog_array *run_array;
+ struct sock *selected_sk = NULL;
+ bool no_reuseport = false;
+
+ rcu_read_lock();
+ run_array = rcu_dereference(net->bpf.run_array[NETNS_BPF_SK_LOOKUP]);
+ if (run_array) {
+ struct bpf_sk_lookup_kern ctx = {
+ .family = AF_INET6,
+ .protocol = protocol,
+ .v6.saddr = saddr,
+ .v6.daddr = daddr,
+ .sport = sport,
+ .dport = dport,
+ };
+ u32 act;
+
+ act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN);
+ if (act == SK_PASS) {
+ selected_sk = ctx.selected_sk;
+ no_reuseport = ctx.no_reuseport;
+ } else {
+ selected_sk = ERR_PTR(-ECONNREFUSED);
+ }
+ }
+ rcu_read_unlock();
+ *psk = selected_sk;
+ return no_reuseport;
+}
+#endif /* IS_ENABLED(CONFIG_IPV6) */
#endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/icmp.h b/include/linux/icmp.h
index 8fc38a34cb20..0af4d210ee31 100644
--- a/include/linux/icmp.h
+++ b/include/linux/icmp.h
@@ -37,6 +37,7 @@ static inline bool icmp_is_err(int type)
}
void ip_icmp_error_rfc4884(const struct sk_buff *skb,
- struct sock_ee_data_rfc4884 *out);
+ struct sock_ee_data_rfc4884 *out,
+ int thlen, int off);
#endif /* _LINUX_ICMP_H */
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 8d8f877e7f81..a44789d027cc 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -283,6 +283,7 @@ struct ipv6_pinfo {
autoflowlabel:1,
autoflowlabel_set:1,
mc_all:1,
+ recverr_rfc4884:1,
rtalert_isolate:1;
__u8 min_hopcount;
__u8 tclass;
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 9a36fad9e068..6cbbfe94348c 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -8,6 +8,7 @@
#include <net/fib_notifier.h>
#include <uapi/linux/mroute.h>
#include <linux/mroute_base.h>
+#include <linux/sockptr.h>
#ifdef CONFIG_IP_MROUTE
static inline int ip_mroute_opt(int opt)
@@ -15,7 +16,7 @@ static inline int ip_mroute_opt(int opt)
return opt >= MRT_BASE && opt <= MRT_MAX;
}
-int ip_mroute_setsockopt(struct sock *, int, char __user *, unsigned int);
+int ip_mroute_setsockopt(struct sock *, int, sockptr_t, unsigned int);
int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *);
int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg);
int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
@@ -23,7 +24,7 @@ int ip_mr_init(void);
bool ipmr_rule_default(const struct fib_rule *rule);
#else
static inline int ip_mroute_setsockopt(struct sock *sock, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
return -ENOPROTOOPT;
}
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index c4a45859f586..bc351a85ce9b 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -8,6 +8,7 @@
#include <net/net_namespace.h>
#include <uapi/linux/mroute6.h>
#include <linux/mroute_base.h>
+#include <linux/sockptr.h>
#include <net/fib_rules.h>
#ifdef CONFIG_IPV6_MROUTE
@@ -25,7 +26,7 @@ static inline int ip6_mroute_opt(int opt)
struct sock;
#ifdef CONFIG_IPV6_MROUTE
-extern int ip6_mroute_setsockopt(struct sock *, int, char __user *, unsigned int);
+extern int ip6_mroute_setsockopt(struct sock *, int, sockptr_t, unsigned int);
extern int ip6_mroute_getsockopt(struct sock *, int, char __user *, int __user *);
extern int ip6_mr_input(struct sk_buff *skb);
extern int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg);
@@ -33,9 +34,8 @@ extern int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *ar
extern int ip6_mr_init(void);
extern void ip6_mr_cleanup(void);
#else
-static inline
-int ip6_mroute_setsockopt(struct sock *sock,
- int optname, char __user *optval, unsigned int optlen)
+static inline int ip6_mroute_setsockopt(struct sock *sock, int optname,
+ sockptr_t optval, unsigned int optlen)
{
return -ENOPROTOOPT;
}
diff --git a/include/linux/net.h b/include/linux/net.h
index 858ff1d98154..d48ff1180879 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -21,6 +21,7 @@
#include <linux/rcupdate.h>
#include <linux/once.h>
#include <linux/fs.h>
+#include <linux/sockptr.h>
#include <uapi/linux/net.h>
@@ -162,7 +163,8 @@ struct proto_ops {
int (*listen) (struct socket *sock, int len);
int (*shutdown) (struct socket *sock, int flags);
int (*setsockopt)(struct socket *sock, int level,
- int optname, char __user *optval, unsigned int optlen);
+ int optname, sockptr_t optval,
+ unsigned int optlen);
int (*getsockopt)(struct socket *sock, int level,
int optname, char __user *optval, int __user *optlen);
void (*show_fdinfo)(struct seq_file *m, struct socket *sock);
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 711b4d4486f0..0101747de549 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -13,6 +13,7 @@
#include <linux/static_key.h>
#include <linux/netfilter_defs.h>
#include <linux/netdevice.h>
+#include <linux/sockptr.h>
#include <net/net_namespace.h>
static inline int NF_DROP_GETERR(int verdict)
@@ -163,7 +164,8 @@ struct nf_sockopt_ops {
/* Non-inclusive ranges: use 0/0/NULL to never get called. */
int set_optmin;
int set_optmax;
- int (*set)(struct sock *sk, int optval, void __user *user, unsigned int len);
+ int (*set)(struct sock *sk, int optval, sockptr_t arg,
+ unsigned int len);
int get_optmin;
int get_optmax;
int (*get)(struct sock *sk, int optval, void __user *user, int *len);
@@ -338,7 +340,7 @@ NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
}
/* Call setsockopt() */
-int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt,
+int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, sockptr_t opt,
unsigned int len);
int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt,
int *len);
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index b8b943ee7b8b..5deb099d156d 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -301,8 +301,8 @@ int xt_target_to_user(const struct xt_entry_target *t,
int xt_data_to_user(void __user *dst, const void *src,
int usersize, int size, int aligned_size);
-void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
- struct xt_counters_info *info);
+void *xt_copy_counters(sockptr_t arg, unsigned int len,
+ struct xt_counters_info *info);
struct xt_counters *xt_counters_alloc(unsigned int counters);
struct xt_table *xt_register_table(struct net *net,
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index b32b8b45421b..1aad2aea4610 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -76,7 +76,9 @@ struct phylink_config {
* struct phylink_mac_ops - MAC operations structure.
* @validate: Validate and update the link configuration.
* @mac_pcs_get_state: Read the current link state from the hardware.
+ * @mac_prepare: prepare for a major reconfiguration of the interface.
* @mac_config: configure the MAC for the selected mode and state.
+ * @mac_finish: finish a major reconfiguration of the interface.
* @mac_an_restart: restart 802.3z BaseX autonegotiation.
* @mac_link_down: take the link down.
* @mac_link_up: allow the link to come up.
@@ -89,8 +91,12 @@ struct phylink_mac_ops {
struct phylink_link_state *state);
void (*mac_pcs_get_state)(struct phylink_config *config,
struct phylink_link_state *state);
+ int (*mac_prepare)(struct phylink_config *config, unsigned int mode,
+ phy_interface_t iface);
void (*mac_config)(struct phylink_config *config, unsigned int mode,
const struct phylink_link_state *state);
+ int (*mac_finish)(struct phylink_config *config, unsigned int mode,
+ phy_interface_t iface);
void (*mac_an_restart)(struct phylink_config *config);
void (*mac_link_down)(struct phylink_config *config, unsigned int mode,
phy_interface_t interface);
@@ -146,6 +152,31 @@ void mac_pcs_get_state(struct phylink_config *config,
struct phylink_link_state *state);
/**
+ * mac_prepare() - prepare to change the PHY interface mode
+ * @config: a pointer to a &struct phylink_config.
+ * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND.
+ * @iface: interface mode to switch to
+ *
+ * phylink will call this method at the beginning of a full initialisation
+ * of the link, which includes changing the interface mode or at initial
+ * startup time. It may be called for the current mode. The MAC driver
+ * should perform whatever actions are required, e.g. disabling the
+ * Serdes PHY.
+ *
+ * This will be the first call in the sequence:
+ * - mac_prepare()
+ * - mac_config()
+ * - pcs_config()
+ * - possible pcs_an_restart()
+ * - mac_finish()
+ *
+ * Returns zero on success, or negative errno on failure which will be
+ * reported to the kernel log.
+ */
+int mac_prepare(struct phylink_config *config, unsigned int mode,
+ phy_interface_t iface);
+
+/**
* mac_config() - configure the MAC for the selected mode and state
* @config: a pointer to a &struct phylink_config.
* @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND.
@@ -221,6 +252,23 @@ void mac_config(struct phylink_config *config, unsigned int mode,
const struct phylink_link_state *state);
/**
+ * mac_finish() - finish a to change the PHY interface mode
+ * @config: a pointer to a &struct phylink_config.
+ * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND.
+ * @iface: interface mode to switch to
+ *
+ * phylink will call this if it called mac_prepare() to allow the MAC to
+ * complete any necessary steps after the MAC and PCS have been configured
+ * for the @mode and @iface. E.g. a MAC driver may wish to re-enable the
+ * Serdes PHY here if it was previously disabled by mac_prepare().
+ *
+ * Returns zero on success, or negative errno on failure which will be
+ * reported to the kernel log.
+ */
+int mac_finish(struct phylink_config *config, unsigned int mode,
+ phy_interface_t iface);
+
+/**
* mac_an_restart() - restart 802.3z BaseX autonegotiation
* @config: a pointer to a &struct phylink_config.
*/
@@ -273,6 +321,21 @@ void mac_link_up(struct phylink_config *config, struct phy_device *phy,
int speed, int duplex, bool tx_pause, bool rx_pause);
#endif
+struct phylink_pcs_ops;
+
+/**
+ * struct phylink_pcs - PHYLINK PCS instance
+ * @ops: a pointer to the &struct phylink_pcs_ops structure
+ * @poll: poll the PCS for link changes
+ *
+ * This structure is designed to be embedded within the PCS private data,
+ * and will be passed between phylink and the PCS.
+ */
+struct phylink_pcs {
+ const struct phylink_pcs_ops *ops;
+ bool poll;
+};
+
/**
* struct phylink_pcs_ops - MAC PCS operations structure.
* @pcs_get_state: read the current MAC PCS link state from the hardware.
@@ -282,20 +345,21 @@ void mac_link_up(struct phylink_config *config, struct phy_device *phy,
* (where necessary).
*/
struct phylink_pcs_ops {
- void (*pcs_get_state)(struct phylink_config *config,
+ void (*pcs_get_state)(struct phylink_pcs *pcs,
struct phylink_link_state *state);
- int (*pcs_config)(struct phylink_config *config, unsigned int mode,
+ int (*pcs_config)(struct phylink_pcs *pcs, unsigned int mode,
phy_interface_t interface,
- const unsigned long *advertising);
- void (*pcs_an_restart)(struct phylink_config *config);
- void (*pcs_link_up)(struct phylink_config *config, unsigned int mode,
+ const unsigned long *advertising,
+ bool permit_pause_to_mac);
+ void (*pcs_an_restart)(struct phylink_pcs *pcs);
+ void (*pcs_link_up)(struct phylink_pcs *pcs, unsigned int mode,
phy_interface_t interface, int speed, int duplex);
};
#if 0 /* For kernel-doc purposes only. */
/**
* pcs_get_state() - Read the current inband link state from the hardware
- * @config: a pointer to a &struct phylink_config.
+ * @pcs: a pointer to a &struct phylink_pcs.
* @state: a pointer to a &struct phylink_link_state.
*
* Read the current inband link state from the MAC PCS, reporting the
@@ -308,18 +372,20 @@ struct phylink_pcs_ops {
* When present, this overrides mac_pcs_get_state() in &struct
* phylink_mac_ops.
*/
-void pcs_get_state(struct phylink_config *config,
+void pcs_get_state(struct phylink_pcs *pcs,
struct phylink_link_state *state);
/**
* pcs_config() - Configure the PCS mode and advertisement
- * @config: a pointer to a &struct phylink_config.
+ * @pcs: a pointer to a &struct phylink_pcs.
* @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND.
* @interface: interface mode to be used
* @advertising: adertisement ethtool link mode mask
+ * @permit_pause_to_mac: permit forwarding pause resolution to MAC
*
* Configure the PCS for the operating mode, the interface mode, and set
- * the advertisement mask.
+ * the advertisement mask. @permit_pause_to_mac indicates whether the
+ * hardware may forward the pause mode resolution to the MAC.
*
* When operating in %MLO_AN_INBAND, inband should always be enabled,
* otherwise inband should be disabled.
@@ -331,21 +397,21 @@ void pcs_get_state(struct phylink_config *config,
*
* For most 10GBASE-R, there is no advertisement.
*/
-int (*pcs_config)(struct phylink_config *config, unsigned int mode,
- phy_interface_t interface, const unsigned long *advertising);
+int pcs_config(struct phylink_pcs *pcs, unsigned int mode,
+ phy_interface_t interface, const unsigned long *advertising);
/**
* pcs_an_restart() - restart 802.3z BaseX autonegotiation
- * @config: a pointer to a &struct phylink_config.
+ * @pcs: a pointer to a &struct phylink_pcs.
*
* When PCS ops are present, this overrides mac_an_restart() in &struct
* phylink_mac_ops.
*/
-void (*pcs_an_restart)(struct phylink_config *config);
+void pcs_an_restart(struct phylink_pcs *pcs);
/**
* pcs_link_up() - program the PCS for the resolved link configuration
- * @config: a pointer to a &struct phylink_config.
+ * @pcs: a pointer to a &struct phylink_pcs.
* @mode: link autonegotiation mode
* @interface: link &typedef phy_interface_t mode
* @speed: link speed
@@ -356,14 +422,14 @@ void (*pcs_an_restart)(struct phylink_config *config);
* mode without in-band AN needs to be manually configured for the link
* and duplex setting. Otherwise, this should be a no-op.
*/
-void (*pcs_link_up)(struct phylink_config *config, unsigned int mode,
- phy_interface_t interface, int speed, int duplex);
+void pcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
+ phy_interface_t interface, int speed, int duplex);
#endif
struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *,
phy_interface_t iface,
const struct phylink_mac_ops *mac_ops);
-void phylink_add_pcs(struct phylink *, const struct phylink_pcs_ops *ops);
+void phylink_set_pcs(struct phylink *, struct phylink_pcs *pcs);
void phylink_destroy(struct phylink *);
int phylink_connect_phy(struct phylink *, struct phy_device *);
@@ -412,6 +478,9 @@ void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs,
int phylink_mii_c22_pcs_set_advertisement(struct mdio_device *pcs,
phy_interface_t interface,
const unsigned long *advertising);
+int phylink_mii_c22_pcs_config(struct mdio_device *pcs, unsigned int mode,
+ phy_interface_t interface,
+ const unsigned long *advertising);
void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs);
void phylink_mii_c45_pcs_get_state(struct mdio_device *pcs,
diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h
index 7071dc92b4e2..4d58dc8943f0 100644
--- a/include/linux/qed/qed_chain.h
+++ b/include/linux/qed/qed_chain.h
@@ -11,6 +11,7 @@
#include <asm/byteorder.h>
#include <linux/kernel.h>
#include <linux/list.h>
+#include <linux/sizes.h>
#include <linux/slab.h>
#include <linux/qed/common_hsi.h>
@@ -26,9 +27,9 @@ enum qed_chain_mode {
};
enum qed_chain_use_mode {
- QED_CHAIN_USE_TO_PRODUCE, /* Chain starts empty */
- QED_CHAIN_USE_TO_CONSUME, /* Chain starts full */
- QED_CHAIN_USE_TO_CONSUME_PRODUCE, /* Chain starts empty */
+ QED_CHAIN_USE_TO_PRODUCE, /* Chain starts empty */
+ QED_CHAIN_USE_TO_CONSUME, /* Chain starts full */
+ QED_CHAIN_USE_TO_CONSUME_PRODUCE, /* Chain starts empty */
};
enum qed_chain_cnt_type {
@@ -40,197 +41,230 @@ enum qed_chain_cnt_type {
};
struct qed_chain_next {
- struct regpair next_phys;
- void *next_virt;
+ struct regpair next_phys;
+ void *next_virt;
};
struct qed_chain_pbl_u16 {
- u16 prod_page_idx;
- u16 cons_page_idx;
+ u16 prod_page_idx;
+ u16 cons_page_idx;
};
struct qed_chain_pbl_u32 {
- u32 prod_page_idx;
- u32 cons_page_idx;
-};
-
-struct qed_chain_ext_pbl {
- dma_addr_t p_pbl_phys;
- void *p_pbl_virt;
+ u32 prod_page_idx;
+ u32 cons_page_idx;
};
struct qed_chain_u16 {
/* Cyclic index of next element to produce/consme */
- u16 prod_idx;
- u16 cons_idx;
+ u16 prod_idx;
+ u16 cons_idx;
};
struct qed_chain_u32 {
/* Cyclic index of next element to produce/consme */
- u32 prod_idx;
- u32 cons_idx;
+ u32 prod_idx;
+ u32 cons_idx;
};
struct addr_tbl_entry {
- void *virt_addr;
- dma_addr_t dma_map;
+ void *virt_addr;
+ dma_addr_t dma_map;
};
struct qed_chain {
- /* fastpath portion of the chain - required for commands such
+ /* Fastpath portion of the chain - required for commands such
* as produce / consume.
*/
+
/* Point to next element to produce/consume */
- void *p_prod_elem;
- void *p_cons_elem;
+ void *p_prod_elem;
+ void *p_cons_elem;
/* Fastpath portions of the PBL [if exists] */
+
struct {
/* Table for keeping the virtual and physical addresses of the
* chain pages, respectively to the physical addresses
* in the pbl table.
*/
- struct addr_tbl_entry *pp_addr_tbl;
+ struct addr_tbl_entry *pp_addr_tbl;
union {
- struct qed_chain_pbl_u16 u16;
- struct qed_chain_pbl_u32 u32;
- } c;
- } pbl;
+ struct qed_chain_pbl_u16 u16;
+ struct qed_chain_pbl_u32 u32;
+ } c;
+ } pbl;
union {
- struct qed_chain_u16 chain16;
- struct qed_chain_u32 chain32;
- } u;
+ struct qed_chain_u16 chain16;
+ struct qed_chain_u32 chain32;
+ } u;
/* Capacity counts only usable elements */
- u32 capacity;
- u32 page_cnt;
+ u32 capacity;
+ u32 page_cnt;
- enum qed_chain_mode mode;
+ enum qed_chain_mode mode;
/* Elements information for fast calculations */
- u16 elem_per_page;
- u16 elem_per_page_mask;
- u16 elem_size;
- u16 next_page_mask;
- u16 usable_per_page;
- u8 elem_unusable;
+ u16 elem_per_page;
+ u16 elem_per_page_mask;
+ u16 elem_size;
+ u16 next_page_mask;
+ u16 usable_per_page;
+ u8 elem_unusable;
- u8 cnt_type;
+ enum qed_chain_cnt_type cnt_type;
/* Slowpath of the chain - required for initialization and destruction,
* but isn't involved in regular functionality.
*/
+ u32 page_size;
+
/* Base address of a pre-allocated buffer for pbl */
struct {
- dma_addr_t p_phys_table;
- void *p_virt_table;
- } pbl_sp;
+ __le64 *table_virt;
+ dma_addr_t table_phys;
+ size_t table_size;
+ } pbl_sp;
/* Address of first page of the chain - the address is required
* for fastpath operation [consume/produce] but only for the SINGLE
* flavour which isn't considered fastpath [== SPQ].
*/
- void *p_virt_addr;
- dma_addr_t p_phys_addr;
+ void *p_virt_addr;
+ dma_addr_t p_phys_addr;
/* Total number of elements [for entire chain] */
- u32 size;
+ u32 size;
- u8 intended_use;
+ enum qed_chain_use_mode intended_use;
- bool b_external_pbl;
+ bool b_external_pbl;
};
-#define QED_CHAIN_PBL_ENTRY_SIZE (8)
-#define QED_CHAIN_PAGE_SIZE (0x1000)
-#define ELEMS_PER_PAGE(elem_size) (QED_CHAIN_PAGE_SIZE / (elem_size))
+struct qed_chain_init_params {
+ enum qed_chain_mode mode;
+ enum qed_chain_use_mode intended_use;
+ enum qed_chain_cnt_type cnt_type;
+
+ u32 page_size;
+ u32 num_elems;
+ size_t elem_size;
+
+ void *ext_pbl_virt;
+ dma_addr_t ext_pbl_phys;
+};
-#define UNUSABLE_ELEMS_PER_PAGE(elem_size, mode) \
- (((mode) == QED_CHAIN_MODE_NEXT_PTR) ? \
- (u8)(1 + ((sizeof(struct qed_chain_next) - 1) / \
- (elem_size))) : 0)
+#define QED_CHAIN_PAGE_SIZE SZ_4K
-#define USABLE_ELEMS_PER_PAGE(elem_size, mode) \
- ((u32)(ELEMS_PER_PAGE(elem_size) - \
- UNUSABLE_ELEMS_PER_PAGE(elem_size, mode)))
+#define ELEMS_PER_PAGE(elem_size, page_size) \
+ ((page_size) / (elem_size))
-#define QED_CHAIN_PAGE_CNT(elem_cnt, elem_size, mode) \
- DIV_ROUND_UP(elem_cnt, USABLE_ELEMS_PER_PAGE(elem_size, mode))
+#define UNUSABLE_ELEMS_PER_PAGE(elem_size, mode) \
+ (((mode) == QED_CHAIN_MODE_NEXT_PTR) ? \
+ (u8)(1 + ((sizeof(struct qed_chain_next) - 1) / (elem_size))) : \
+ 0)
-#define is_chain_u16(p) ((p)->cnt_type == QED_CHAIN_CNT_TYPE_U16)
-#define is_chain_u32(p) ((p)->cnt_type == QED_CHAIN_CNT_TYPE_U32)
+#define USABLE_ELEMS_PER_PAGE(elem_size, page_size, mode) \
+ ((u32)(ELEMS_PER_PAGE((elem_size), (page_size)) - \
+ UNUSABLE_ELEMS_PER_PAGE((elem_size), (mode))))
+
+#define QED_CHAIN_PAGE_CNT(elem_cnt, elem_size, page_size, mode) \
+ DIV_ROUND_UP((elem_cnt), \
+ USABLE_ELEMS_PER_PAGE((elem_size), (page_size), (mode)))
+
+#define is_chain_u16(p) \
+ ((p)->cnt_type == QED_CHAIN_CNT_TYPE_U16)
+#define is_chain_u32(p) \
+ ((p)->cnt_type == QED_CHAIN_CNT_TYPE_U32)
/* Accessors */
-static inline u16 qed_chain_get_prod_idx(struct qed_chain *p_chain)
+
+static inline u16 qed_chain_get_prod_idx(const struct qed_chain *chain)
{
- return p_chain->u.chain16.prod_idx;
+ return chain->u.chain16.prod_idx;
}
-static inline u16 qed_chain_get_cons_idx(struct qed_chain *p_chain)
+static inline u16 qed_chain_get_cons_idx(const struct qed_chain *chain)
{
- return p_chain->u.chain16.cons_idx;
+ return chain->u.chain16.cons_idx;
}
-static inline u32 qed_chain_get_cons_idx_u32(struct qed_chain *p_chain)
+static inline u32 qed_chain_get_prod_idx_u32(const struct qed_chain *chain)
{
- return p_chain->u.chain32.cons_idx;
+ return chain->u.chain32.prod_idx;
}
-static inline u16 qed_chain_get_elem_left(struct qed_chain *p_chain)
+static inline u32 qed_chain_get_cons_idx_u32(const struct qed_chain *chain)
{
- u16 elem_per_page = p_chain->elem_per_page;
- u32 prod = p_chain->u.chain16.prod_idx;
- u32 cons = p_chain->u.chain16.cons_idx;
+ return chain->u.chain32.cons_idx;
+}
+
+static inline u16 qed_chain_get_elem_used(const struct qed_chain *chain)
+{
+ u32 prod = qed_chain_get_prod_idx(chain);
+ u32 cons = qed_chain_get_cons_idx(chain);
+ u16 elem_per_page = chain->elem_per_page;
u16 used;
if (prod < cons)
prod += (u32)U16_MAX + 1;
used = (u16)(prod - cons);
- if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR)
- used -= prod / elem_per_page - cons / elem_per_page;
+ if (chain->mode == QED_CHAIN_MODE_NEXT_PTR)
+ used -= (u16)(prod / elem_per_page - cons / elem_per_page);
- return (u16)(p_chain->capacity - used);
+ return used;
}
-static inline u32 qed_chain_get_elem_left_u32(struct qed_chain *p_chain)
+static inline u16 qed_chain_get_elem_left(const struct qed_chain *chain)
{
- u16 elem_per_page = p_chain->elem_per_page;
- u64 prod = p_chain->u.chain32.prod_idx;
- u64 cons = p_chain->u.chain32.cons_idx;
+ return (u16)(chain->capacity - qed_chain_get_elem_used(chain));
+}
+
+static inline u32 qed_chain_get_elem_used_u32(const struct qed_chain *chain)
+{
+ u64 prod = qed_chain_get_prod_idx_u32(chain);
+ u64 cons = qed_chain_get_cons_idx_u32(chain);
+ u16 elem_per_page = chain->elem_per_page;
u32 used;
if (prod < cons)
prod += (u64)U32_MAX + 1;
used = (u32)(prod - cons);
- if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR)
+ if (chain->mode == QED_CHAIN_MODE_NEXT_PTR)
used -= (u32)(prod / elem_per_page - cons / elem_per_page);
- return p_chain->capacity - used;
+ return used;
}
-static inline u16 qed_chain_get_usable_per_page(struct qed_chain *p_chain)
+static inline u32 qed_chain_get_elem_left_u32(const struct qed_chain *chain)
{
- return p_chain->usable_per_page;
+ return chain->capacity - qed_chain_get_elem_used_u32(chain);
}
-static inline u8 qed_chain_get_unusable_per_page(struct qed_chain *p_chain)
+static inline u16 qed_chain_get_usable_per_page(const struct qed_chain *chain)
{
- return p_chain->elem_unusable;
+ return chain->usable_per_page;
}
-static inline u32 qed_chain_get_page_cnt(struct qed_chain *p_chain)
+static inline u8 qed_chain_get_unusable_per_page(const struct qed_chain *chain)
{
- return p_chain->page_cnt;
+ return chain->elem_unusable;
}
-static inline dma_addr_t qed_chain_get_pbl_phys(struct qed_chain *p_chain)
+static inline u32 qed_chain_get_page_cnt(const struct qed_chain *chain)
{
- return p_chain->pbl_sp.p_phys_table;
+ return chain->page_cnt;
+}
+
+static inline dma_addr_t qed_chain_get_pbl_phys(const struct qed_chain *chain)
+{
+ return chain->pbl_sp.table_phys;
}
/**
@@ -485,118 +519,6 @@ static inline void qed_chain_reset(struct qed_chain *p_chain)
}
/**
- * @brief qed_chain_init - Initalizes a basic chain struct
- *
- * @param p_chain
- * @param p_virt_addr
- * @param p_phys_addr physical address of allocated buffer's beginning
- * @param page_cnt number of pages in the allocated buffer
- * @param elem_size size of each element in the chain
- * @param intended_use
- * @param mode
- */
-static inline void qed_chain_init_params(struct qed_chain *p_chain,
- u32 page_cnt,
- u8 elem_size,
- enum qed_chain_use_mode intended_use,
- enum qed_chain_mode mode,
- enum qed_chain_cnt_type cnt_type)
-{
- /* chain fixed parameters */
- p_chain->p_virt_addr = NULL;
- p_chain->p_phys_addr = 0;
- p_chain->elem_size = elem_size;
- p_chain->intended_use = (u8)intended_use;
- p_chain->mode = mode;
- p_chain->cnt_type = (u8)cnt_type;
-
- p_chain->elem_per_page = ELEMS_PER_PAGE(elem_size);
- p_chain->usable_per_page = USABLE_ELEMS_PER_PAGE(elem_size, mode);
- p_chain->elem_per_page_mask = p_chain->elem_per_page - 1;
- p_chain->elem_unusable = UNUSABLE_ELEMS_PER_PAGE(elem_size, mode);
- p_chain->next_page_mask = (p_chain->usable_per_page &
- p_chain->elem_per_page_mask);
-
- p_chain->page_cnt = page_cnt;
- p_chain->capacity = p_chain->usable_per_page * page_cnt;
- p_chain->size = p_chain->elem_per_page * page_cnt;
-
- p_chain->pbl_sp.p_phys_table = 0;
- p_chain->pbl_sp.p_virt_table = NULL;
- p_chain->pbl.pp_addr_tbl = NULL;
-}
-
-/**
- * @brief qed_chain_init_mem -
- *
- * Initalizes a basic chain struct with its chain buffers
- *
- * @param p_chain
- * @param p_virt_addr virtual address of allocated buffer's beginning
- * @param p_phys_addr physical address of allocated buffer's beginning
- *
- */
-static inline void qed_chain_init_mem(struct qed_chain *p_chain,
- void *p_virt_addr, dma_addr_t p_phys_addr)
-{
- p_chain->p_virt_addr = p_virt_addr;
- p_chain->p_phys_addr = p_phys_addr;
-}
-
-/**
- * @brief qed_chain_init_pbl_mem -
- *
- * Initalizes a basic chain struct with its pbl buffers
- *
- * @param p_chain
- * @param p_virt_pbl pointer to a pre allocated side table which will hold
- * virtual page addresses.
- * @param p_phys_pbl pointer to a pre-allocated side table which will hold
- * physical page addresses.
- * @param pp_virt_addr_tbl
- * pointer to a pre-allocated side table which will hold
- * the virtual addresses of the chain pages.
- *
- */
-static inline void qed_chain_init_pbl_mem(struct qed_chain *p_chain,
- void *p_virt_pbl,
- dma_addr_t p_phys_pbl,
- struct addr_tbl_entry *pp_addr_tbl)
-{
- p_chain->pbl_sp.p_phys_table = p_phys_pbl;
- p_chain->pbl_sp.p_virt_table = p_virt_pbl;
- p_chain->pbl.pp_addr_tbl = pp_addr_tbl;
-}
-
-/**
- * @brief qed_chain_init_next_ptr_elem -
- *
- * Initalizes a next pointer element
- *
- * @param p_chain
- * @param p_virt_curr virtual address of a chain page of which the next
- * pointer element is initialized
- * @param p_virt_next virtual address of the next chain page
- * @param p_phys_next physical address of the next chain page
- *
- */
-static inline void
-qed_chain_init_next_ptr_elem(struct qed_chain *p_chain,
- void *p_virt_curr,
- void *p_virt_next, dma_addr_t p_phys_next)
-{
- struct qed_chain_next *p_next;
- u32 size;
-
- size = p_chain->elem_size * p_chain->usable_per_page;
- p_next = (struct qed_chain_next *)((u8 *)p_virt_curr + size);
-
- DMA_REGPAIR_LE(p_next->next_phys, p_phys_next);
-
- p_next->next_virt = p_virt_next;
-}
-
-/**
* @brief qed_chain_get_last_elem -
*
* Returns a pointer to the last element of the chain
@@ -703,7 +625,7 @@ static inline void qed_chain_pbl_zero_mem(struct qed_chain *p_chain)
for (i = 0; i < page_cnt; i++)
memset(p_chain->pbl.pp_addr_tbl[i].virt_addr, 0,
- QED_CHAIN_PAGE_SIZE);
+ p_chain->page_size);
}
#endif
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index a5c6854343e6..cd6a5c7e56eb 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -948,13 +948,8 @@ struct qed_common_ops {
u8 dp_level);
int (*chain_alloc)(struct qed_dev *cdev,
- enum qed_chain_use_mode intended_use,
- enum qed_chain_mode mode,
- enum qed_chain_cnt_type cnt_type,
- u32 num_elems,
- size_t elem_size,
- struct qed_chain *p_chain,
- struct qed_chain_ext_pbl *ext_pbl);
+ struct qed_chain *chain,
+ struct qed_chain_init_params *params);
void (*chain_free)(struct qed_dev *cdev,
struct qed_chain *p_chain);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6a82d4a8229e..fa817a105517 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1342,6 +1342,10 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container);
+void skb_flow_dissect_hash(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container);
+
static inline __u32 skb_get_hash(struct sk_buff *skb)
{
if (!skb->l4_hash && !skb->sw_hash)
diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
new file mode 100644
index 000000000000..7d5cdb2b30b5
--- /dev/null
+++ b/include/linux/sockptr.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020 Christoph Hellwig.
+ *
+ * Support for "universal" pointers that can point to either kernel or userspace
+ * memory.
+ */
+#ifndef _LINUX_SOCKPTR_H
+#define _LINUX_SOCKPTR_H
+
+#include <linux/compiler.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+typedef union {
+ void *kernel;
+ void __user *user;
+} sockptr_t;
+
+static inline bool sockptr_is_kernel(sockptr_t sockptr)
+{
+ return (unsigned long)sockptr.kernel >= TASK_SIZE;
+}
+
+static inline sockptr_t KERNEL_SOCKPTR(void *p)
+{
+ return (sockptr_t) { .kernel = p };
+}
+
+static inline int __must_check init_user_sockptr(sockptr_t *sp, void __user *p)
+{
+ if ((unsigned long)p >= TASK_SIZE)
+ return -EFAULT;
+ sp->user = p;
+ return 0;
+}
+#else /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */
+typedef struct {
+ union {
+ void *kernel;
+ void __user *user;
+ };
+ bool is_kernel : 1;
+} sockptr_t;
+
+static inline bool sockptr_is_kernel(sockptr_t sockptr)
+{
+ return sockptr.is_kernel;
+}
+
+static inline sockptr_t KERNEL_SOCKPTR(void *p)
+{
+ return (sockptr_t) { .kernel = p, .is_kernel = true };
+}
+
+static inline int __must_check init_user_sockptr(sockptr_t *sp, void __user *p)
+{
+ sp->user = p;
+ sp->is_kernel = false;
+ return 0;
+}
+#endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */
+
+static inline bool sockptr_is_null(sockptr_t sockptr)
+{
+ return !sockptr.user && !sockptr.kernel;
+}
+
+static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size)
+{
+ if (!sockptr_is_kernel(src))
+ return copy_from_user(dst, src.user, size);
+ memcpy(dst, src.kernel, size);
+ return 0;
+}
+
+static inline int copy_to_sockptr(sockptr_t dst, const void *src, size_t size)
+{
+ if (!sockptr_is_kernel(dst))
+ return copy_to_user(dst.user, src, size);
+ memcpy(dst.kernel, src, size);
+ return 0;
+}
+
+static inline void *memdup_sockptr(sockptr_t src, size_t len)
+{
+ void *p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN);
+
+ if (!p)
+ return ERR_PTR(-ENOMEM);
+ if (copy_from_sockptr(p, src, len)) {
+ kfree(p);
+ return ERR_PTR(-EFAULT);
+ }
+ return p;
+}
+
+static inline void *memdup_sockptr_nul(sockptr_t src, size_t len)
+{
+ char *p = kmalloc_track_caller(len + 1, GFP_KERNEL);
+
+ if (!p)
+ return ERR_PTR(-ENOMEM);
+ if (copy_from_sockptr(p, src, len)) {
+ kfree(p);
+ return ERR_PTR(-EFAULT);
+ }
+ p[len] = '\0';
+ return p;
+}
+
+static inline void sockptr_advance(sockptr_t sockptr, size_t len)
+{
+ if (sockptr_is_kernel(sockptr))
+ sockptr.kernel += len;
+ else
+ sockptr.user += len;
+}
+
+static inline long strncpy_from_sockptr(char *dst, sockptr_t src, size_t count)
+{
+ if (sockptr_is_kernel(src)) {
+ size_t len = min(strnlen(src.kernel, count - 1) + 1, count);
+
+ memcpy(dst, src.kernel, len);
+ return len;
+ }
+ return strncpy_from_user(dst, src.user, count);
+}
+
+#endif /* _LINUX_SOCKPTR_H */
diff --git a/include/net/bareudp.h b/include/net/bareudp.h
index 3dd5f9a8d01c..dc65a0d71d9b 100644
--- a/include/net/bareudp.h
+++ b/include/net/bareudp.h
@@ -12,7 +12,6 @@ struct bareudp_conf {
__be16 port;
u16 sport_min;
bool multi_proto_mode;
- bool rx_collect_metadata;
};
struct net_device *bareudp_dev_create(struct net *net, const char *name,
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 913e8679ae35..19d990c8edcc 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -40,7 +40,9 @@ struct devlink {
struct xarray snapshot_ids;
struct device *dev;
possible_net_t _net;
- struct mutex lock;
+ struct mutex lock; /* Serializes access to devlink instance specific objects such as
+ * port, sb, dpipe, resource, params, region, traps and more.
+ */
u8 reload_failed:1,
reload_enabled:1,
registered:1;
diff --git a/include/net/dsa.h b/include/net/dsa.h
index f1b63d06d132..75c8fac82017 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -94,8 +94,6 @@ struct dsa_device_ops {
struct dsa_netdevice_ops {
int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr,
int cmd);
- int (*ndo_get_phys_port_name)(struct net_device *dev, char *name,
- size_t len);
};
#define DSA_TAG_DRIVER_ALIAS "dsa_tag-"
@@ -719,33 +717,12 @@ static inline int dsa_ndo_do_ioctl(struct net_device *dev, struct ifreq *ifr,
return ops->ndo_do_ioctl(dev, ifr, cmd);
}
-
-static inline int dsa_ndo_get_phys_port_name(struct net_device *dev,
- char *name, size_t len)
-{
- const struct dsa_netdevice_ops *ops;
- int err;
-
- err = __dsa_netdevice_ops_check(dev);
- if (err)
- return err;
-
- ops = dev->dsa_ptr->netdev_ops;
-
- return ops->ndo_get_phys_port_name(dev, name, len);
-}
#else
static inline int dsa_ndo_do_ioctl(struct net_device *dev, struct ifreq *ifr,
int cmd)
{
return -EOPNOTSUPP;
}
-
-static inline int dsa_ndo_get_phys_port_name(struct net_device *dev,
- char *name, size_t len)
-{
- return -EOPNOTSUPP;
-}
#endif
void dsa_unregister_switch(struct dsa_switch *ds);
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 4b6e36288ddd..cc10b10dc3a1 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -243,6 +243,14 @@ struct flow_dissector_key_ct {
u32 ct_labels[4];
};
+/**
+ * struct flow_dissector_key_hash:
+ * @hash: hash value
+ */
+struct flow_dissector_key_hash {
+ u32 hash;
+};
+
enum flow_dissector_key_id {
FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */
FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */
@@ -271,6 +279,7 @@ enum flow_dissector_key_id {
FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */
FLOW_DISSECTOR_KEY_META, /* struct flow_dissector_key_meta */
FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */
+ FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */
FLOW_DISSECTOR_KEY_MAX,
};
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 157c60cca0ca..1e209ce7d1bd 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -16,6 +16,7 @@
#include <linux/timer.h>
#include <linux/poll.h>
#include <linux/kernel.h>
+#include <linux/sockptr.h>
#include <net/inet_sock.h>
#include <net/request_sock.h>
@@ -45,7 +46,7 @@ struct inet_connection_sock_af_ops {
u16 net_frag_header_len;
u16 sockaddr_len;
int (*setsockopt)(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
+ sockptr_t optval, unsigned int optlen);
int (*getsockopt)(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
void (*addr2sockaddr)(struct sock *sk, struct sockaddr *);
diff --git a/include/net/ip.h b/include/net/ip.h
index 3d34acc95ca8..b09c48d862cc 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -23,6 +23,7 @@
#include <linux/in.h>
#include <linux/skbuff.h>
#include <linux/jhash.h>
+#include <linux/sockptr.h>
#include <net/inet_sock.h>
#include <net/route.h>
@@ -707,9 +708,7 @@ int __ip_options_compile(struct net *net, struct ip_options *opt,
int ip_options_compile(struct net *net, struct ip_options *opt,
struct sk_buff *skb);
int ip_options_get(struct net *net, struct ip_options_rcu **optp,
- unsigned char *data, int optlen);
-int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp,
- unsigned char __user *data, int optlen);
+ sockptr_t data, int optlen);
void ip_options_undo(struct ip_options *opt);
void ip_forward_options(struct sk_buff *skb);
int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev);
@@ -723,7 +722,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb, int tlen, int offset);
int ip_cmsg_send(struct sock *sk, struct msghdr *msg,
struct ipcm_cookie *ipc, bool allow_ipv6);
-int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
+int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
unsigned int optlen);
int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
int __user *optlen);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 262fc88dbd7e..bd1f396cc9c7 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -406,7 +406,7 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
struct ip6_flowlabel *fl,
struct ipv6_txoptions *fopt);
void fl6_free_socklist(struct sock *sk);
-int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen);
+int ipv6_flowlabel_opt(struct sock *sk, sockptr_t optval, int optlen);
int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
int flags);
int ip6_flowlabel_init(void);
@@ -1084,8 +1084,8 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
* socket options (ipv6_sockglue.c)
*/
-int ipv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
+int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen);
int ipv6_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 233bbf7df5d6..b33f1aefad09 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -431,7 +431,7 @@ struct sctp_af {
int (*setsockopt) (struct sock *sk,
int level,
int optname,
- char __user *optval,
+ sockptr_t optval,
unsigned int optlen);
int (*getsockopt) (struct sock *sk,
int level,
diff --git a/include/net/sock.h b/include/net/sock.h
index 62e18fc8ac9f..2cc3ba667908 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -59,6 +59,7 @@
#include <linux/filter.h>
#include <linux/rculist_nulls.h>
#include <linux/poll.h>
+#include <linux/sockptr.h>
#include <linux/atomic.h>
#include <linux/refcount.h>
@@ -1140,7 +1141,7 @@ struct proto {
void (*destroy)(struct sock *sk);
void (*shutdown)(struct sock *sk, int how);
int (*setsockopt)(struct sock *sk, int level,
- int optname, char __user *optval,
+ int optname, sockptr_t optval,
unsigned int optlen);
int (*getsockopt)(struct sock *sk, int level,
int optname, char __user *optval,
@@ -1669,7 +1670,7 @@ void sock_pfree(struct sk_buff *skb);
#endif
int sock_setsockopt(struct socket *sock, int level, int op,
- char __user *optval, unsigned int optlen);
+ sockptr_t optval, unsigned int optlen);
int sock_getsockopt(struct socket *sock, int level, int op,
char __user *optval, int __user *optlen);
@@ -1733,7 +1734,7 @@ int sock_common_getsockopt(struct socket *sock, int level, int optname,
int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags);
int sock_common_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen);
+ sockptr_t optval, unsigned int optlen);
void sk_common_release(struct sock *sk);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9f7f7c0c1104..e0c35d56091f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -399,8 +399,8 @@ __poll_t tcp_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait);
int tcp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
-int tcp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
+int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen);
void tcp_set_keepalive(struct sock *sk, int val);
void tcp_syn_ack_timeout(const struct request_sock *req);
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
@@ -2002,7 +2002,7 @@ struct tcp_sock_af_ops {
const struct sk_buff *skb);
int (*md5_parse)(struct sock *sk,
int optname,
- char __user *optval,
+ sockptr_t optval,
int optlen);
#endif
};
diff --git a/include/net/udp.h b/include/net/udp.h
index 17a9e86a8076..295d52a73598 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -306,7 +306,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
int udp_lib_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
int udp_lib_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen,
+ sockptr_t optval, unsigned int optlen,
int (*push_pending_frames)(struct sock *));
struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif);
diff --git a/include/net/xdp.h b/include/net/xdp.h
index d3005bef812f..dbe9c60797e1 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -104,6 +104,7 @@ struct xdp_frame {
struct net_device *dev_rx; /* used by cpumap */
};
+
static inline struct skb_shared_info *
xdp_get_shared_info_from_frame(struct xdp_frame *frame)
{
@@ -113,6 +114,12 @@ xdp_get_shared_info_from_frame(struct xdp_frame *frame)
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
}
+struct xdp_cpumap_stats {
+ unsigned int redirect;
+ unsigned int pass;
+ unsigned int drop;
+};
+
/* Clear kernel pointers in xdp_frame */
static inline void xdp_scrub_frame(struct xdp_frame *frame)
{
@@ -136,39 +143,48 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
xdp->frame_sz = frame->frame_sz;
}
-/* Convert xdp_buff to xdp_frame */
static inline
-struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
+int xdp_update_frame_from_buff(struct xdp_buff *xdp,
+ struct xdp_frame *xdp_frame)
{
- struct xdp_frame *xdp_frame;
- int metasize;
- int headroom;
-
- if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL)
- return xdp_convert_zc_to_xdp_frame(xdp);
+ int metasize, headroom;
/* Assure headroom is available for storing info */
headroom = xdp->data - xdp->data_hard_start;
metasize = xdp->data - xdp->data_meta;
metasize = metasize > 0 ? metasize : 0;
if (unlikely((headroom - metasize) < sizeof(*xdp_frame)))
- return NULL;
+ return -ENOSPC;
/* Catch if driver didn't reserve tailroom for skb_shared_info */
if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) {
XDP_WARN("Driver BUG: missing reserved tailroom");
- return NULL;
+ return -ENOSPC;
}
- /* Store info in top of packet */
- xdp_frame = xdp->data_hard_start;
-
xdp_frame->data = xdp->data;
xdp_frame->len = xdp->data_end - xdp->data;
xdp_frame->headroom = headroom - sizeof(*xdp_frame);
xdp_frame->metasize = metasize;
xdp_frame->frame_sz = xdp->frame_sz;
+ return 0;
+}
+
+/* Convert xdp_buff to xdp_frame */
+static inline
+struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
+{
+ struct xdp_frame *xdp_frame;
+
+ if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL)
+ return xdp_convert_zc_to_xdp_frame(xdp);
+
+ /* Store info in top of packet */
+ xdp_frame = xdp->data_hard_start;
+ if (unlikely(xdp_update_frame_from_buff(xdp, xdp_frame) < 0))
+ return NULL;
+
/* rxq only valid until napi_schedule ends, convert to xdp_mem_info */
xdp_frame->mem = xdp->rxq->mem;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index f9e1fda82ddf..5e81868b574a 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -15,6 +15,7 @@
#include <linux/audit.h>
#include <linux/slab.h>
#include <linux/refcount.h>
+#include <linux/sockptr.h>
#include <net/sock.h>
#include <net/dst.h>
@@ -1609,10 +1610,11 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
-int xfrm_user_policy(struct sock *sk, int optname,
- u8 __user *optval, int optlen);
+int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
+ int optlen);
#else
-static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
+static inline int xfrm_user_policy(struct sock *sk, int optname,
+ sockptr_t optval, int optlen)
{
return -ENOPROTOOPT;
}
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index b73d3e141323..cd24e8a59529 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -177,9 +177,9 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err,
TRACE_EVENT(xdp_cpumap_kthread,
TP_PROTO(int map_id, unsigned int processed, unsigned int drops,
- int sched),
+ int sched, struct xdp_cpumap_stats *xdp_stats),
- TP_ARGS(map_id, processed, drops, sched),
+ TP_ARGS(map_id, processed, drops, sched, xdp_stats),
TP_STRUCT__entry(
__field(int, map_id)
@@ -188,6 +188,9 @@ TRACE_EVENT(xdp_cpumap_kthread,
__field(unsigned int, drops)
__field(unsigned int, processed)
__field(int, sched)
+ __field(unsigned int, xdp_pass)
+ __field(unsigned int, xdp_drop)
+ __field(unsigned int, xdp_redirect)
),
TP_fast_assign(
@@ -197,16 +200,21 @@ TRACE_EVENT(xdp_cpumap_kthread,
__entry->drops = drops;
__entry->processed = processed;
__entry->sched = sched;
+ __entry->xdp_pass = xdp_stats->pass;
+ __entry->xdp_drop = xdp_stats->drop;
+ __entry->xdp_redirect = xdp_stats->redirect;
),
TP_printk("kthread"
" cpu=%d map_id=%d action=%s"
" processed=%u drops=%u"
- " sched=%d",
+ " sched=%d"
+ " xdp_pass=%u xdp_drop=%u xdp_redirect=%u",
__entry->cpu, __entry->map_id,
__print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
__entry->processed, __entry->drops,
- __entry->sched)
+ __entry->sched,
+ __entry->xdp_pass, __entry->xdp_drop, __entry->xdp_redirect)
);
TRACE_EVENT(xdp_cpumap_enqueue,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 5e386389913a..54d0c886e3ba 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -189,6 +189,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_STRUCT_OPS,
BPF_PROG_TYPE_EXT,
BPF_PROG_TYPE_LSM,
+ BPF_PROG_TYPE_SK_LOOKUP,
};
enum bpf_attach_type {
@@ -227,6 +228,8 @@ enum bpf_attach_type {
BPF_CGROUP_INET6_GETSOCKNAME,
BPF_XDP_DEVMAP,
BPF_CGROUP_INET_SOCK_RELEASE,
+ BPF_XDP_CPUMAP,
+ BPF_SK_LOOKUP,
__MAX_BPF_ATTACH_TYPE
};
@@ -2419,7 +2422,7 @@ union bpf_attr {
* Look for an IPv6 socket.
*
* If the *netns* is a negative signed 32-bit integer, then the
- * socket lookup table in the netns associated with the *ctx* will
+ * socket lookup table in the netns associated with the *ctx*
* will be used. For the TC hooks, this is the netns of the device
* in the skb. For socket hooks, this is the netns of the socket.
* If *netns* is any other signed 32-bit value greater than or
@@ -2456,7 +2459,7 @@ union bpf_attr {
* Look for an IPv6 socket.
*
* If the *netns* is a negative signed 32-bit integer, then the
- * socket lookup table in the netns associated with the *ctx* will
+ * socket lookup table in the netns associated with the *ctx*
* will be used. For the TC hooks, this is the netns of the device
* in the skb. For socket hooks, this is the netns of the socket.
* If *netns* is any other signed 32-bit value greater than or
@@ -3068,6 +3071,10 @@ union bpf_attr {
*
* long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
* Description
+ * Helper is overloaded depending on BPF program type. This
+ * description applies to **BPF_PROG_TYPE_SCHED_CLS** and
+ * **BPF_PROG_TYPE_SCHED_ACT** programs.
+ *
* Assign the *sk* to the *skb*. When combined with appropriate
* routing configuration to receive the packet towards the socket,
* will cause *skb* to be delivered to the specified socket.
@@ -3093,6 +3100,56 @@ union bpf_attr {
* **-ESOCKTNOSUPPORT** if the socket type is not supported
* (reuseport).
*
+ * long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags)
+ * Description
+ * Helper is overloaded depending on BPF program type. This
+ * description applies to **BPF_PROG_TYPE_SK_LOOKUP** programs.
+ *
+ * Select the *sk* as a result of a socket lookup.
+ *
+ * For the operation to succeed passed socket must be compatible
+ * with the packet description provided by the *ctx* object.
+ *
+ * L4 protocol (**IPPROTO_TCP** or **IPPROTO_UDP**) must
+ * be an exact match. While IP family (**AF_INET** or
+ * **AF_INET6**) must be compatible, that is IPv6 sockets
+ * that are not v6-only can be selected for IPv4 packets.
+ *
+ * Only TCP listeners and UDP unconnected sockets can be
+ * selected. *sk* can also be NULL to reset any previous
+ * selection.
+ *
+ * *flags* argument can combination of following values:
+ *
+ * * **BPF_SK_LOOKUP_F_REPLACE** to override the previous
+ * socket selection, potentially done by a BPF program
+ * that ran before us.
+ *
+ * * **BPF_SK_LOOKUP_F_NO_REUSEPORT** to skip
+ * load-balancing within reuseport group for the socket
+ * being selected.
+ *
+ * On success *ctx->sk* will point to the selected socket.
+ *
+ * Return
+ * 0 on success, or a negative errno in case of failure.
+ *
+ * * **-EAFNOSUPPORT** if socket family (*sk->family*) is
+ * not compatible with packet family (*ctx->family*).
+ *
+ * * **-EEXIST** if socket has been already selected,
+ * potentially by another program, and
+ * **BPF_SK_LOOKUP_F_REPLACE** flag was not specified.
+ *
+ * * **-EINVAL** if unsupported flags were specified.
+ *
+ * * **-EPROTOTYPE** if socket L4 protocol
+ * (*sk->protocol*) doesn't match packet protocol
+ * (*ctx->protocol*).
+ *
+ * * **-ESOCKTNOSUPPORT** if socket is not in allowed
+ * state (TCP listening or UDP unconnected).
+ *
* u64 bpf_ktime_get_boot_ns(void)
* Description
* Return the time elapsed since system boot, in nanoseconds.
@@ -3606,6 +3663,12 @@ enum {
BPF_RINGBUF_HDR_SZ = 8,
};
+/* BPF_FUNC_sk_assign flags in bpf_sk_lookup context. */
+enum {
+ BPF_SK_LOOKUP_F_REPLACE = (1ULL << 0),
+ BPF_SK_LOOKUP_F_NO_REUSEPORT = (1ULL << 1),
+};
+
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET,
@@ -3849,6 +3912,19 @@ struct bpf_devmap_val {
} bpf_prog;
};
+/* CPUMAP map-value layout
+ *
+ * The struct data-layout of map-value is a configuration interface.
+ * New members can only be added to the end of this structure.
+ */
+struct bpf_cpumap_val {
+ __u32 qsize; /* queue size to remote target CPU */
+ union {
+ int fd; /* prog fd on map write */
+ __u32 id; /* prog id on map read */
+ } bpf_prog;
+};
+
enum sk_action {
SK_DROP = 0,
SK_PASS,
@@ -3986,7 +4062,7 @@ struct bpf_link_info {
/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
* by user and intended to be used by socket (e.g. to bind to, depends on
- * attach attach type).
+ * attach type).
*/
struct bpf_sock_addr {
__u32 user_family; /* Allows 4-byte read, but no write. */
@@ -4335,4 +4411,19 @@ struct bpf_pidns_info {
__u32 pid;
__u32 tgid;
};
+
+/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
+struct bpf_sk_lookup {
+ __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+
+ __u32 family; /* Protocol family (AF_INET, AF_INET6) */
+ __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
+ __u32 remote_ip4; /* Network byte order */
+ __u32 remote_ip6[4]; /* Network byte order */
+ __u32 remote_port; /* Network byte order */
+ __u32 local_ip4; /* Network byte order */
+ __u32 local_ip6[4]; /* Network byte order */
+ __u32 local_port; /* Host byte order */
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h
index 2622b5a3e616..c1661febc2dc 100644
--- a/include/uapi/linux/icmpv6.h
+++ b/include/uapi/linux/icmpv6.h
@@ -68,6 +68,7 @@ struct icmp6hdr {
#define icmp6_mtu icmp6_dataun.un_data32[0]
#define icmp6_unused icmp6_dataun.un_data32[0]
#define icmp6_maxdelay icmp6_dataun.un_data16[0]
+#define icmp6_datagram_len icmp6_dataun.un_data8[0]
#define icmp6_router icmp6_dataun.u_nd_advt.router
#define icmp6_solicited icmp6_dataun.u_nd_advt.solicited
#define icmp6_override icmp6_dataun.u_nd_advt.override
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 26842ffd0501..af8f31987526 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -601,7 +601,6 @@ enum {
IFLA_BAREUDP_ETHERTYPE,
IFLA_BAREUDP_SRCPORT_MIN,
IFLA_BAREUDP_MULTIPROTO_MODE,
- IFLA_BAREUDP_RX_COLLECT_METADATA,
__IFLA_BAREUDP_MAX
};
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index 9f2273a08356..5ad396a57eb3 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -179,6 +179,7 @@ struct in6_flowlabel_req {
#define IPV6_LEAVE_ANYCAST 28
#define IPV6_MULTICAST_ALL 29
#define IPV6_ROUTER_ALERT_ISOLATE 30
+#define IPV6_RECVERR_RFC4884 31
/* IPV6_MTU_DISCOVER values */
#define IPV6_PMTUDISC_DONT 0
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 7576209d96f9..ee95f42fb0ec 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -578,6 +578,9 @@ enum {
TCA_FLOWER_KEY_MPLS_OPTS,
+ TCA_FLOWER_KEY_HASH, /* u32 */
+ TCA_FLOWER_KEY_HASH_MASK, /* u32 */
+
__TCA_FLOWER_MAX,
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 03d6d43bb1d6..ee36b7f60936 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3672,7 +3672,6 @@ struct btf *btf_parse_vmlinux(void)
goto errout;
bpf_struct_ops_init(btf, log);
- init_btf_sock_ids(btf);
btf_verifier_env_free(env);
refcount_set(&btf->refcnt, 1);
@@ -3818,16 +3817,17 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
return true;
/* this is a pointer to another type */
- info->reg_type = PTR_TO_BTF_ID;
for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
if (ctx_arg_info->offset == off) {
info->reg_type = ctx_arg_info->reg_type;
- break;
+ info->btf_id = ctx_arg_info->btf_id;
+ return true;
}
}
+ info->reg_type = PTR_TO_BTF_ID;
if (tgt_prog) {
ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type, arg);
if (ret > 0) {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 9df4cc9a2907..7be02e555ab9 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1958,6 +1958,61 @@ void bpf_prog_array_delete_safe(struct bpf_prog_array *array,
}
}
+/**
+ * bpf_prog_array_delete_safe_at() - Replaces the program at the given
+ * index into the program array with
+ * a dummy no-op program.
+ * @array: a bpf_prog_array
+ * @index: the index of the program to replace
+ *
+ * Skips over dummy programs, by not counting them, when calculating
+ * the the position of the program to replace.
+ *
+ * Return:
+ * * 0 - Success
+ * * -EINVAL - Invalid index value. Must be a non-negative integer.
+ * * -ENOENT - Index out of range
+ */
+int bpf_prog_array_delete_safe_at(struct bpf_prog_array *array, int index)
+{
+ return bpf_prog_array_update_at(array, index, &dummy_bpf_prog.prog);
+}
+
+/**
+ * bpf_prog_array_update_at() - Updates the program at the given index
+ * into the program array.
+ * @array: a bpf_prog_array
+ * @index: the index of the program to update
+ * @prog: the program to insert into the array
+ *
+ * Skips over dummy programs, by not counting them, when calculating
+ * the position of the program to update.
+ *
+ * Return:
+ * * 0 - Success
+ * * -EINVAL - Invalid index value. Must be a non-negative integer.
+ * * -ENOENT - Index out of range
+ */
+int bpf_prog_array_update_at(struct bpf_prog_array *array, int index,
+ struct bpf_prog *prog)
+{
+ struct bpf_prog_array_item *item;
+
+ if (unlikely(index < 0))
+ return -EINVAL;
+
+ for (item = array->items; item->prog; item++) {
+ if (item->prog == &dummy_bpf_prog.prog)
+ continue;
+ if (!index) {
+ WRITE_ONCE(item->prog, prog);
+ return 0;
+ }
+ index--;
+ }
+ return -ENOENT;
+}
+
int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog *exclude_prog,
struct bpf_prog *include_prog,
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index bd8658055c16..f1c46529929b 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -52,7 +52,6 @@ struct xdp_bulk_queue {
struct bpf_cpu_map_entry {
u32 cpu; /* kthread CPU and map index */
int map_id; /* Back reference to map */
- u32 qsize; /* Queue size placeholder for map lookup */
/* XDP can run multiple RX-ring queues, need __percpu enqueue store */
struct xdp_bulk_queue __percpu *bulkq;
@@ -62,10 +61,14 @@ struct bpf_cpu_map_entry {
/* Queue with potential multi-producers, and single-consumer kthread */
struct ptr_ring *queue;
struct task_struct *kthread;
- struct work_struct kthread_stop_wq;
+
+ struct bpf_cpumap_val value;
+ struct bpf_prog *prog;
atomic_t refcnt; /* Control when this struct can be free'ed */
struct rcu_head rcu;
+
+ struct work_struct kthread_stop_wq;
};
struct bpf_cpu_map {
@@ -80,6 +83,7 @@ static int bq_flush_to_queue(struct xdp_bulk_queue *bq);
static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
{
+ u32 value_size = attr->value_size;
struct bpf_cpu_map *cmap;
int err = -ENOMEM;
u64 cost;
@@ -90,7 +94,9 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
- attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
+ (value_size != offsetofend(struct bpf_cpumap_val, qsize) &&
+ value_size != offsetofend(struct bpf_cpumap_val, bpf_prog.fd)) ||
+ attr->map_flags & ~BPF_F_NUMA_NODE)
return ERR_PTR(-EINVAL);
cmap = kzalloc(sizeof(*cmap), GFP_USER);
@@ -212,6 +218,8 @@ static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
{
if (atomic_dec_and_test(&rcpu->refcnt)) {
+ if (rcpu->prog)
+ bpf_prog_put(rcpu->prog);
/* The queue should be empty at this point */
__cpu_map_ring_cleanup(rcpu->queue);
ptr_ring_cleanup(rcpu->queue, NULL);
@@ -220,6 +228,75 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
}
}
+static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
+ void **frames, int n,
+ struct xdp_cpumap_stats *stats)
+{
+ struct xdp_rxq_info rxq;
+ struct xdp_buff xdp;
+ int i, nframes = 0;
+
+ if (!rcpu->prog)
+ return n;
+
+ rcu_read_lock_bh();
+
+ xdp_set_return_frame_no_direct();
+ xdp.rxq = &rxq;
+
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+ u32 act;
+ int err;
+
+ rxq.dev = xdpf->dev_rx;
+ rxq.mem = xdpf->mem;
+ /* TODO: report queue_index to xdp_rxq_info */
+
+ xdp_convert_frame_to_buff(xdpf, &xdp);
+
+ act = bpf_prog_run_xdp(rcpu->prog, &xdp);
+ switch (act) {
+ case XDP_PASS:
+ err = xdp_update_frame_from_buff(&xdp, xdpf);
+ if (err < 0) {
+ xdp_return_frame(xdpf);
+ stats->drop++;
+ } else {
+ frames[nframes++] = xdpf;
+ stats->pass++;
+ }
+ break;
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(xdpf->dev_rx, &xdp,
+ rcpu->prog);
+ if (unlikely(err)) {
+ xdp_return_frame(xdpf);
+ stats->drop++;
+ } else {
+ stats->redirect++;
+ }
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fallthrough */
+ case XDP_DROP:
+ xdp_return_frame(xdpf);
+ stats->drop++;
+ break;
+ }
+ }
+
+ if (stats->redirect)
+ xdp_do_flush_map();
+
+ xdp_clear_return_frame_no_direct();
+
+ rcu_read_unlock_bh(); /* resched point, may call do_softirq() */
+
+ return nframes;
+}
+
#define CPUMAP_BATCH 8
static int cpu_map_kthread_run(void *data)
@@ -234,11 +311,12 @@ static int cpu_map_kthread_run(void *data)
* kthread_stop signal until queue is empty.
*/
while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
+ struct xdp_cpumap_stats stats = {}; /* zero stats */
+ gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
unsigned int drops = 0, sched = 0;
void *frames[CPUMAP_BATCH];
void *skbs[CPUMAP_BATCH];
- gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
- int i, n, m;
+ int i, n, m, nframes;
/* Release CPU reschedule checks */
if (__ptr_ring_empty(rcpu->queue)) {
@@ -259,8 +337,8 @@ static int cpu_map_kthread_run(void *data)
* kthread CPU pinned. Lockless access to ptr_ring
* consume side valid as no-resize allowed of queue.
*/
- n = ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH);
-
+ n = __ptr_ring_consume_batched(rcpu->queue, frames,
+ CPUMAP_BATCH);
for (i = 0; i < n; i++) {
void *f = frames[i];
struct page *page = virt_to_page(f);
@@ -272,15 +350,19 @@ static int cpu_map_kthread_run(void *data)
prefetchw(page);
}
- m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, n, skbs);
- if (unlikely(m == 0)) {
- for (i = 0; i < n; i++)
- skbs[i] = NULL; /* effect: xdp_return_frame */
- drops = n;
+ /* Support running another XDP prog on this CPU */
+ nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, n, &stats);
+ if (nframes) {
+ m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs);
+ if (unlikely(m == 0)) {
+ for (i = 0; i < nframes; i++)
+ skbs[i] = NULL; /* effect: xdp_return_frame */
+ drops += nframes;
+ }
}
local_bh_disable();
- for (i = 0; i < n; i++) {
+ for (i = 0; i < nframes; i++) {
struct xdp_frame *xdpf = frames[i];
struct sk_buff *skb = skbs[i];
int ret;
@@ -297,7 +379,7 @@ static int cpu_map_kthread_run(void *data)
drops++;
}
/* Feedback loop via tracepoint */
- trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched);
+ trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched, &stats);
local_bh_enable(); /* resched point, may call do_softirq() */
}
@@ -307,13 +389,38 @@ static int cpu_map_kthread_run(void *data)
return 0;
}
-static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
- int map_id)
+bool cpu_map_prog_allowed(struct bpf_map *map)
{
+ return map->map_type == BPF_MAP_TYPE_CPUMAP &&
+ map->value_size != offsetofend(struct bpf_cpumap_val, qsize);
+}
+
+static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
+{
+ struct bpf_prog *prog;
+
+ prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ if (prog->expected_attach_type != BPF_XDP_CPUMAP) {
+ bpf_prog_put(prog);
+ return -EINVAL;
+ }
+
+ rcpu->value.bpf_prog.id = prog->aux->id;
+ rcpu->prog = prog;
+
+ return 0;
+}
+
+static struct bpf_cpu_map_entry *
+__cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
+{
+ int numa, err, i, fd = value->bpf_prog.fd;
gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
struct bpf_cpu_map_entry *rcpu;
struct xdp_bulk_queue *bq;
- int numa, err, i;
/* Have map->numa_node, but choose node of redirect target CPU */
numa = cpu_to_node(cpu);
@@ -338,19 +445,22 @@ static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
if (!rcpu->queue)
goto free_bulkq;
- err = ptr_ring_init(rcpu->queue, qsize, gfp);
+ err = ptr_ring_init(rcpu->queue, value->qsize, gfp);
if (err)
goto free_queue;
rcpu->cpu = cpu;
rcpu->map_id = map_id;
- rcpu->qsize = qsize;
+ rcpu->value.qsize = value->qsize;
+
+ if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd))
+ goto free_ptr_ring;
/* Setup kthread */
rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
"cpumap/%d/map:%d", cpu, map_id);
if (IS_ERR(rcpu->kthread))
- goto free_ptr_ring;
+ goto free_prog;
get_cpu_map_entry(rcpu); /* 1-refcnt for being in cmap->cpu_map[] */
get_cpu_map_entry(rcpu); /* 1-refcnt for kthread */
@@ -361,6 +471,9 @@ static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
return rcpu;
+free_prog:
+ if (rcpu->prog)
+ bpf_prog_put(rcpu->prog);
free_ptr_ring:
ptr_ring_cleanup(rcpu->queue, NULL);
free_queue:
@@ -437,12 +550,12 @@ static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
u64 map_flags)
{
struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
+ struct bpf_cpumap_val cpumap_value = {};
struct bpf_cpu_map_entry *rcpu;
-
/* Array index key correspond to CPU number */
u32 key_cpu = *(u32 *)key;
- /* Value is the queue size */
- u32 qsize = *(u32 *)value;
+
+ memcpy(&cpumap_value, value, map->value_size);
if (unlikely(map_flags > BPF_EXIST))
return -EINVAL;
@@ -450,18 +563,18 @@ static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
return -E2BIG;
if (unlikely(map_flags == BPF_NOEXIST))
return -EEXIST;
- if (unlikely(qsize > 16384)) /* sanity limit on qsize */
+ if (unlikely(cpumap_value.qsize > 16384)) /* sanity limit on qsize */
return -EOVERFLOW;
/* Make sure CPU is a valid possible cpu */
if (key_cpu >= nr_cpumask_bits || !cpu_possible(key_cpu))
return -ENODEV;
- if (qsize == 0) {
+ if (cpumap_value.qsize == 0) {
rcpu = NULL; /* Same as deleting */
} else {
/* Updating qsize cause re-allocation of bpf_cpu_map_entry */
- rcpu = __cpu_map_entry_alloc(qsize, key_cpu, map->id);
+ rcpu = __cpu_map_entry_alloc(&cpumap_value, key_cpu, map->id);
if (!rcpu)
return -ENOMEM;
rcpu->cmap = cmap;
@@ -523,7 +636,7 @@ static void *cpu_map_lookup_elem(struct bpf_map *map, void *key)
struct bpf_cpu_map_entry *rcpu =
__cpu_map_lookup_elem(map, *(u32 *)key);
- return rcpu ? &rcpu->qsize : NULL;
+ return rcpu ? &rcpu->value : NULL;
}
static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
index c69071e334bf..8a7af11b411f 100644
--- a/kernel/bpf/map_iter.c
+++ b/kernel/bpf/map_iter.c
@@ -4,6 +4,7 @@
#include <linux/fs.h>
#include <linux/filter.h>
#include <linux/kernel.h>
+#include <linux/btf_ids.h>
struct bpf_iter_seq_map_info {
u32 mid;
@@ -81,7 +82,10 @@ static const struct seq_operations bpf_map_seq_ops = {
.show = bpf_map_seq_show,
};
-static const struct bpf_iter_reg bpf_map_reg_info = {
+BTF_ID_LIST(btf_bpf_map_id)
+BTF_ID(struct, bpf_map)
+
+static struct bpf_iter_reg bpf_map_reg_info = {
.target = "bpf_map",
.seq_ops = &bpf_map_seq_ops,
.init_seq_private = NULL,
@@ -96,6 +100,7 @@ static const struct bpf_iter_reg bpf_map_reg_info = {
static int __init bpf_map_iter_init(void)
{
+ bpf_map_reg_info.ctx_arg_info[0].btf_id = *btf_bpf_map_id;
return bpf_iter_reg_target(&bpf_map_reg_info);
}
diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c
index 310241ca7991..71405edd667c 100644
--- a/kernel/bpf/net_namespace.c
+++ b/kernel/bpf/net_namespace.c
@@ -25,6 +25,32 @@ struct bpf_netns_link {
/* Protects updates to netns_bpf */
DEFINE_MUTEX(netns_bpf_mutex);
+static void netns_bpf_attach_type_unneed(enum netns_bpf_attach_type type)
+{
+ switch (type) {
+#ifdef CONFIG_INET
+ case NETNS_BPF_SK_LOOKUP:
+ static_branch_dec(&bpf_sk_lookup_enabled);
+ break;
+#endif
+ default:
+ break;
+ }
+}
+
+static void netns_bpf_attach_type_need(enum netns_bpf_attach_type type)
+{
+ switch (type) {
+#ifdef CONFIG_INET
+ case NETNS_BPF_SK_LOOKUP:
+ static_branch_inc(&bpf_sk_lookup_enabled);
+ break;
+#endif
+ default:
+ break;
+ }
+}
+
/* Must be called with netns_bpf_mutex held. */
static void netns_bpf_run_array_detach(struct net *net,
enum netns_bpf_attach_type type)
@@ -36,12 +62,50 @@ static void netns_bpf_run_array_detach(struct net *net,
bpf_prog_array_free(run_array);
}
+static int link_index(struct net *net, enum netns_bpf_attach_type type,
+ struct bpf_netns_link *link)
+{
+ struct bpf_netns_link *pos;
+ int i = 0;
+
+ list_for_each_entry(pos, &net->bpf.links[type], node) {
+ if (pos == link)
+ return i;
+ i++;
+ }
+ return -ENOENT;
+}
+
+static int link_count(struct net *net, enum netns_bpf_attach_type type)
+{
+ struct list_head *pos;
+ int i = 0;
+
+ list_for_each(pos, &net->bpf.links[type])
+ i++;
+ return i;
+}
+
+static void fill_prog_array(struct net *net, enum netns_bpf_attach_type type,
+ struct bpf_prog_array *prog_array)
+{
+ struct bpf_netns_link *pos;
+ unsigned int i = 0;
+
+ list_for_each_entry(pos, &net->bpf.links[type], node) {
+ prog_array->items[i].prog = pos->link.prog;
+ i++;
+ }
+}
+
static void bpf_netns_link_release(struct bpf_link *link)
{
struct bpf_netns_link *net_link =
container_of(link, struct bpf_netns_link, link);
enum netns_bpf_attach_type type = net_link->netns_type;
+ struct bpf_prog_array *old_array, *new_array;
struct net *net;
+ int cnt, idx;
mutex_lock(&netns_bpf_mutex);
@@ -53,9 +117,30 @@ static void bpf_netns_link_release(struct bpf_link *link)
if (!net)
goto out_unlock;
- netns_bpf_run_array_detach(net, type);
+ /* Mark attach point as unused */
+ netns_bpf_attach_type_unneed(type);
+
+ /* Remember link position in case of safe delete */
+ idx = link_index(net, type, net_link);
list_del(&net_link->node);
+ cnt = link_count(net, type);
+ if (!cnt) {
+ netns_bpf_run_array_detach(net, type);
+ goto out_unlock;
+ }
+
+ old_array = rcu_dereference_protected(net->bpf.run_array[type],
+ lockdep_is_held(&netns_bpf_mutex));
+ new_array = bpf_prog_array_alloc(cnt, GFP_KERNEL);
+ if (!new_array) {
+ WARN_ON(bpf_prog_array_delete_safe_at(old_array, idx));
+ goto out_unlock;
+ }
+ fill_prog_array(net, type, new_array);
+ rcu_assign_pointer(net->bpf.run_array[type], new_array);
+ bpf_prog_array_free(old_array);
+
out_unlock:
mutex_unlock(&netns_bpf_mutex);
}
@@ -77,7 +162,7 @@ static int bpf_netns_link_update_prog(struct bpf_link *link,
enum netns_bpf_attach_type type = net_link->netns_type;
struct bpf_prog_array *run_array;
struct net *net;
- int ret = 0;
+ int idx, ret;
if (old_prog && old_prog != link->prog)
return -EPERM;
@@ -95,7 +180,10 @@ static int bpf_netns_link_update_prog(struct bpf_link *link,
run_array = rcu_dereference_protected(net->bpf.run_array[type],
lockdep_is_held(&netns_bpf_mutex));
- WRITE_ONCE(run_array->items[0].prog, new_prog);
+ idx = link_index(net, type, net_link);
+ ret = bpf_prog_array_update_at(run_array, idx, new_prog);
+ if (ret)
+ goto out_unlock;
old_prog = xchg(&link->prog, new_prog);
bpf_prog_put(old_prog);
@@ -309,18 +397,30 @@ int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
return ret;
}
+static int netns_bpf_max_progs(enum netns_bpf_attach_type type)
+{
+ switch (type) {
+ case NETNS_BPF_FLOW_DISSECTOR:
+ return 1;
+ case NETNS_BPF_SK_LOOKUP:
+ return 64;
+ default:
+ return 0;
+ }
+}
+
static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
enum netns_bpf_attach_type type)
{
struct bpf_netns_link *net_link =
container_of(link, struct bpf_netns_link, link);
struct bpf_prog_array *run_array;
- int err;
+ int cnt, err;
mutex_lock(&netns_bpf_mutex);
- /* Allow attaching only one prog or link for now */
- if (!list_empty(&net->bpf.links[type])) {
+ cnt = link_count(net, type);
+ if (cnt >= netns_bpf_max_progs(type)) {
err = -E2BIG;
goto out_unlock;
}
@@ -334,6 +434,9 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
case NETNS_BPF_FLOW_DISSECTOR:
err = flow_dissector_bpf_prog_attach_check(net, link->prog);
break;
+ case NETNS_BPF_SK_LOOKUP:
+ err = 0; /* nothing to check */
+ break;
default:
err = -EINVAL;
break;
@@ -341,16 +444,22 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
if (err)
goto out_unlock;
- run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
+ run_array = bpf_prog_array_alloc(cnt + 1, GFP_KERNEL);
if (!run_array) {
err = -ENOMEM;
goto out_unlock;
}
- run_array->items[0].prog = link->prog;
- rcu_assign_pointer(net->bpf.run_array[type], run_array);
list_add_tail(&net_link->node, &net->bpf.links[type]);
+ fill_prog_array(net, type, run_array);
+ run_array = rcu_replace_pointer(net->bpf.run_array[type], run_array,
+ lockdep_is_held(&netns_bpf_mutex));
+ bpf_prog_array_free(run_array);
+
+ /* Mark attach point as used */
+ netns_bpf_attach_type_need(type);
+
out_unlock:
mutex_unlock(&netns_bpf_mutex);
return err;
@@ -426,8 +535,10 @@ static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
mutex_lock(&netns_bpf_mutex);
for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) {
netns_bpf_run_array_detach(net, type);
- list_for_each_entry(net_link, &net->bpf.links[type], node)
+ list_for_each_entry(net_link, &net->bpf.links[type], node) {
net_link->net = NULL; /* auto-detach link */
+ netns_bpf_attach_type_unneed(type);
+ }
if (net->bpf.progs[type])
bpf_prog_put(net->bpf.progs[type]);
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7ea9dfbebd8c..d07417d17712 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2022,6 +2022,10 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
default:
return -EINVAL;
}
+ case BPF_PROG_TYPE_SK_LOOKUP:
+ if (expected_attach_type == BPF_SK_LOOKUP)
+ return 0;
+ return -EINVAL;
case BPF_PROG_TYPE_EXT:
if (expected_attach_type)
return -EINVAL;
@@ -2756,6 +2760,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+ case BPF_PROG_TYPE_SK_LOOKUP:
return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
case BPF_PROG_TYPE_CGROUP_SKB:
if (!capable(CAP_NET_ADMIN))
@@ -2817,6 +2822,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
return BPF_PROG_TYPE_CGROUP_SOCKOPT;
case BPF_TRACE_ITER:
return BPF_PROG_TYPE_TRACING;
+ case BPF_SK_LOOKUP:
+ return BPF_PROG_TYPE_SK_LOOKUP;
default:
return BPF_PROG_TYPE_UNSPEC;
}
@@ -2953,6 +2960,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_LIRC_MODE2:
return lirc_prog_query(attr, uattr);
case BPF_FLOW_DISSECTOR:
+ case BPF_SK_LOOKUP:
return netns_bpf_prog_query(attr, uattr);
default:
return -EINVAL;
@@ -3891,6 +3899,7 @@ static int link_create(union bpf_attr *attr)
ret = tracing_bpf_link_attach(attr, prog);
break;
case BPF_PROG_TYPE_FLOW_DISSECTOR:
+ case BPF_PROG_TYPE_SK_LOOKUP:
ret = netns_bpf_link_create(attr, prog);
break;
default:
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 4dbf2b6035f8..2feecf095609 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -7,6 +7,7 @@
#include <linux/fs.h>
#include <linux/fdtable.h>
#include <linux/filter.h>
+#include <linux/btf_ids.h>
struct bpf_iter_seq_task_common {
struct pid_namespace *ns;
@@ -312,7 +313,11 @@ static const struct seq_operations task_file_seq_ops = {
.show = task_file_seq_show,
};
-static const struct bpf_iter_reg task_reg_info = {
+BTF_ID_LIST(btf_task_file_ids)
+BTF_ID(struct, task_struct)
+BTF_ID(struct, file)
+
+static struct bpf_iter_reg task_reg_info = {
.target = "task",
.seq_ops = &task_seq_ops,
.init_seq_private = init_seq_pidns,
@@ -325,7 +330,7 @@ static const struct bpf_iter_reg task_reg_info = {
},
};
-static const struct bpf_iter_reg task_file_reg_info = {
+static struct bpf_iter_reg task_file_reg_info = {
.target = "task_file",
.seq_ops = &task_file_seq_ops,
.init_seq_private = init_seq_pidns,
@@ -344,10 +349,13 @@ static int __init task_iter_init(void)
{
int ret;
+ task_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0];
ret = bpf_iter_reg_target(&task_reg_info);
if (ret)
return ret;
+ task_file_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0];
+ task_file_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[1];
return bpf_iter_reg_target(&task_file_reg_info);
}
late_initcall(task_iter_init);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3c1efc9d08fd..9a6703bc3f36 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3878,10 +3878,14 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
}
meta->ref_obj_id = reg->ref_obj_id;
}
- } else if (arg_type == ARG_PTR_TO_SOCKET) {
+ } else if (arg_type == ARG_PTR_TO_SOCKET ||
+ arg_type == ARG_PTR_TO_SOCKET_OR_NULL) {
expected_type = PTR_TO_SOCKET;
- if (type != expected_type)
- goto err_type;
+ if (!(register_is_null(reg) &&
+ arg_type == ARG_PTR_TO_SOCKET_OR_NULL)) {
+ if (type != expected_type)
+ goto err_type;
+ }
} else if (arg_type == ARG_PTR_TO_BTF_ID) {
expected_type = PTR_TO_BTF_ID;
if (type != expected_type)
@@ -7354,6 +7358,9 @@ static int check_return_code(struct bpf_verifier_env *env)
return -ENOTSUPP;
}
break;
+ case BPF_PROG_TYPE_SK_LOOKUP:
+ range = tnum_range(SK_DROP, SK_PASS);
+ break;
case BPF_PROG_TYPE_EXT:
/* freplace program can return anything as its return value
* depends on the to-be-replaced kernel func or bpf program.
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index a5fddf9ebcb7..ca7d635bccd9 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5275,31 +5275,21 @@ static struct bpf_test tests[] = {
{ /* Mainly checking JIT here. */
"BPF_MAXINSNS: Ctx heavy transformations",
{ },
-#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
- CLASSIC | FLAG_EXPECTED_FAIL,
-#else
CLASSIC,
-#endif
{ },
{
{ 1, SKB_VLAN_PRESENT },
{ 10, SKB_VLAN_PRESENT }
},
.fill_helper = bpf_fill_maxinsns6,
- .expected_errcode = -ENOTSUPP,
},
{ /* Mainly checking JIT here. */
"BPF_MAXINSNS: Call heavy transformations",
{ },
-#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
- CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
-#else
CLASSIC | FLAG_NO_DATA,
-#endif
{ },
{ { 1, 0 }, { 10, 0 } },
.fill_helper = bpf_fill_maxinsns7,
- .expected_errcode = -ENOTSUPP,
},
{ /* Mainly checking JIT here. */
"BPF_MAXINSNS: Jump heavy test",
@@ -5350,28 +5340,18 @@ static struct bpf_test tests[] = {
{
"BPF_MAXINSNS: exec all MSH",
{ },
-#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
- CLASSIC | FLAG_EXPECTED_FAIL,
-#else
CLASSIC,
-#endif
{ 0xfa, 0xfb, 0xfc, 0xfd, },
{ { 4, 0xababab83 } },
.fill_helper = bpf_fill_maxinsns13,
- .expected_errcode = -ENOTSUPP,
},
{
"BPF_MAXINSNS: ld_abs+get_processor_id",
{ },
-#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
- CLASSIC | FLAG_EXPECTED_FAIL,
-#else
CLASSIC,
-#endif
{ },
{ { 1, 0xbee } },
.fill_helper = bpf_fill_ld_abs_get_processor_id,
- .expected_errcode = -ENOTSUPP,
},
/*
* LD_IND / LD_ABS on fragmented SKBs
diff --git a/net/atm/common.c b/net/atm/common.c
index 9b28f1fb3c69..84367b844b14 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -745,7 +745,7 @@ static int check_qos(const struct atm_qos *qos)
}
int vcc_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct atm_vcc *vcc;
unsigned long value;
@@ -760,7 +760,7 @@ int vcc_setsockopt(struct socket *sock, int level, int optname,
{
struct atm_qos qos;
- if (copy_from_user(&qos, optval, sizeof(qos)))
+ if (copy_from_sockptr(&qos, optval, sizeof(qos)))
return -EFAULT;
error = check_qos(&qos);
if (error)
@@ -774,7 +774,7 @@ int vcc_setsockopt(struct socket *sock, int level, int optname,
return 0;
}
case SO_SETCLP:
- if (get_user(value, (unsigned long __user *)optval))
+ if (copy_from_sockptr(&value, optval, sizeof(value)))
return -EFAULT;
if (value)
vcc->atm_options |= ATM_ATMOPT_CLP;
diff --git a/net/atm/common.h b/net/atm/common.h
index 5850649068bb..a1e56e8de698 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -21,7 +21,7 @@ __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int vcc_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen);
+ sockptr_t optval, unsigned int optlen);
int vcc_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen);
void vcc_process_recv_queue(struct atm_vcc *vcc);
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 02bd2a436bdf..53e7d3f39e26 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -63,7 +63,7 @@ static int pvc_connect(struct socket *sock, struct sockaddr *sockaddr,
}
static int pvc_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
int error;
diff --git a/net/atm/svc.c b/net/atm/svc.c
index ba144d035e3d..4a02bcaad279 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -451,7 +451,7 @@ int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos)
}
static int svc_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct atm_vcc *vcc = ATM_SD(sock);
@@ -464,7 +464,7 @@ static int svc_setsockopt(struct socket *sock, int level, int optname,
error = -EINVAL;
goto out;
}
- if (copy_from_user(&vcc->sap, optval, optlen)) {
+ if (copy_from_sockptr(&vcc->sap, optval, optlen)) {
error = -EFAULT;
goto out;
}
@@ -475,7 +475,7 @@ static int svc_setsockopt(struct socket *sock, int level, int optname,
error = -EINVAL;
goto out;
}
- if (get_user(value, (int __user *)optval)) {
+ if (copy_from_sockptr(&value, optval, sizeof(int))) {
error = -EFAULT;
goto out;
}
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index fd91cd34f25e..17bf31a89692 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -528,7 +528,7 @@ ax25_cb *ax25_create_cb(void)
*/
static int ax25_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
ax25_cb *ax25;
@@ -543,7 +543,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(unsigned int))
return -EINVAL;
- if (get_user(opt, (unsigned int __user *)optval))
+ if (copy_from_sockptr(&opt, optval, sizeof(unsigned int)))
return -EFAULT;
lock_sock(sk);
@@ -640,7 +640,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
memset(devname, 0, sizeof(devname));
- if (copy_from_user(devname, optval, optlen)) {
+ if (copy_from_sockptr(devname, optval, optlen)) {
res = -EFAULT;
break;
}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index caf38a8ea6a8..d5eff27d5b1e 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1842,7 +1842,7 @@ drop:
}
static int hci_sock_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int len)
+ sockptr_t optval, unsigned int len)
{
struct hci_ufilter uf = { .opcode = 0 };
struct sock *sk = sock->sk;
@@ -1862,7 +1862,7 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case HCI_DATA_DIR:
- if (get_user(opt, (int __user *)optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
err = -EFAULT;
break;
}
@@ -1874,7 +1874,7 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case HCI_TIME_STAMP:
- if (get_user(opt, (int __user *)optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
err = -EFAULT;
break;
}
@@ -1896,7 +1896,7 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname,
}
len = min_t(unsigned int, len, sizeof(uf));
- if (copy_from_user(&uf, optval, len)) {
+ if (copy_from_sockptr(&uf, optval, len)) {
err = -EFAULT;
break;
}
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index a995d2c51fa7..a3d104123f38 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -703,7 +703,7 @@ static bool l2cap_valid_mtu(struct l2cap_chan *chan, u16 mtu)
}
static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct l2cap_chan *chan = l2cap_pi(sk)->chan;
@@ -736,7 +736,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
opts.txwin_size = chan->tx_win;
len = min_t(unsigned int, sizeof(opts), optlen);
- if (copy_from_user((char *) &opts, optval, len)) {
+ if (copy_from_sockptr(&opts, optval, len)) {
err = -EFAULT;
break;
}
@@ -782,7 +782,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
break;
case L2CAP_LM:
- if (get_user(opt, (u32 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
err = -EFAULT;
break;
}
@@ -859,7 +859,7 @@ static int l2cap_set_mode(struct l2cap_chan *chan, u8 mode)
}
static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct l2cap_chan *chan = l2cap_pi(sk)->chan;
@@ -891,7 +891,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
sec.level = BT_SECURITY_LOW;
len = min_t(unsigned int, sizeof(sec), optlen);
- if (copy_from_user((char *) &sec, optval, len)) {
+ if (copy_from_sockptr(&sec, optval, len)) {
err = -EFAULT;
break;
}
@@ -939,7 +939,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (get_user(opt, (u32 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
err = -EFAULT;
break;
}
@@ -954,7 +954,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_FLUSHABLE:
- if (get_user(opt, (u32 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
err = -EFAULT;
break;
}
@@ -990,7 +990,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
pwr.force_active = BT_POWER_FORCE_ACTIVE_ON;
len = min_t(unsigned int, sizeof(pwr), optlen);
- if (copy_from_user((char *) &pwr, optval, len)) {
+ if (copy_from_sockptr(&pwr, optval, len)) {
err = -EFAULT;
break;
}
@@ -1002,7 +1002,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_CHANNEL_POLICY:
- if (get_user(opt, (u32 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
err = -EFAULT;
break;
}
@@ -1050,7 +1050,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (get_user(opt, (u16 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u16))) {
err = -EFAULT;
break;
}
@@ -1081,7 +1081,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (get_user(opt, (u8 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u8))) {
err = -EFAULT;
break;
}
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index df14eebe80da..dba4ea0e1b0d 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -644,7 +644,8 @@ static int rfcomm_sock_recvmsg(struct socket *sock, struct msghdr *msg,
return len;
}
-static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen)
+static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
int err = 0;
@@ -656,7 +657,7 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __u
switch (optname) {
case RFCOMM_LM:
- if (get_user(opt, (u32 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
err = -EFAULT;
break;
}
@@ -685,7 +686,8 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __u
return err;
}
-static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
+static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct bt_security sec;
@@ -713,7 +715,7 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c
sec.level = BT_SECURITY_LOW;
len = min_t(unsigned int, sizeof(sec), optlen);
- if (copy_from_user((char *) &sec, optval, len)) {
+ if (copy_from_sockptr(&sec, optval, len)) {
err = -EFAULT;
break;
}
@@ -732,7 +734,7 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c
break;
}
- if (get_user(opt, (u32 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
err = -EFAULT;
break;
}
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index c8c3d38cdc7b..37260baf7150 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -791,7 +791,7 @@ static int sco_sock_recvmsg(struct socket *sock, struct msghdr *msg,
}
static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
int len, err = 0;
@@ -810,7 +810,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (get_user(opt, (u32 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
err = -EFAULT;
break;
}
@@ -831,7 +831,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
voice.setting = sco_pi(sk)->setting;
len = min_t(unsigned int, sizeof(voice), optlen);
- if (copy_from_user((char *)&voice, optval, len)) {
+ if (copy_from_sockptr(&voice, optval, len)) {
err = -EFAULT;
break;
}
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index 2c31e82cb953..f580c3344cb3 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -31,48 +31,55 @@ static void __stop_umh(void)
shutdown_umh();
}
-static int __bpfilter_process_sockopt(struct sock *sk, int optname,
- char __user *optval,
- unsigned int optlen, bool is_set)
+static int bpfilter_send_req(struct mbox_request *req)
{
- struct mbox_request req;
struct mbox_reply reply;
loff_t pos;
ssize_t n;
- int ret = -EFAULT;
- req.is_set = is_set;
- req.pid = current->pid;
- req.cmd = optname;
- req.addr = (long __force __user)optval;
- req.len = optlen;
if (!bpfilter_ops.info.tgid)
- goto out;
+ return -EFAULT;
pos = 0;
- n = kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req),
+ n = kernel_write(bpfilter_ops.info.pipe_to_umh, req, sizeof(*req),
&pos);
- if (n != sizeof(req)) {
+ if (n != sizeof(*req)) {
pr_err("write fail %zd\n", n);
- __stop_umh();
- ret = -EFAULT;
- goto out;
+ goto stop;
}
pos = 0;
n = kernel_read(bpfilter_ops.info.pipe_from_umh, &reply, sizeof(reply),
&pos);
if (n != sizeof(reply)) {
pr_err("read fail %zd\n", n);
- __stop_umh();
- ret = -EFAULT;
- goto out;
+ goto stop;
+ }
+ return reply.status;
+stop:
+ __stop_umh();
+ return -EFAULT;
+}
+
+static int bpfilter_process_sockopt(struct sock *sk, int optname,
+ sockptr_t optval, unsigned int optlen,
+ bool is_set)
+{
+ struct mbox_request req = {
+ .is_set = is_set,
+ .pid = current->pid,
+ .cmd = optname,
+ .addr = (uintptr_t)optval.user,
+ .len = optlen,
+ };
+ if (uaccess_kernel() || sockptr_is_kernel(optval)) {
+ pr_err("kernel access not supported\n");
+ return -EFAULT;
}
- ret = reply.status;
-out:
- return ret;
+ return bpfilter_send_req(&req);
}
static int start_umh(void)
{
+ struct mbox_request req = { .pid = current->pid };
int err;
/* fork usermode process */
@@ -82,7 +89,7 @@ static int start_umh(void)
pr_info("Loaded bpfilter_umh pid %d\n", pid_nr(bpfilter_ops.info.tgid));
/* health check that usermode process started correctly */
- if (__bpfilter_process_sockopt(NULL, 0, NULL, 0, 0) != 0) {
+ if (bpfilter_send_req(&req) != 0) {
shutdown_umh();
return -EFAULT;
}
@@ -103,7 +110,7 @@ static int __init load_umh(void)
mutex_lock(&bpfilter_ops.lock);
err = start_umh();
if (!err && IS_ENABLED(CONFIG_INET)) {
- bpfilter_ops.sockopt = &__bpfilter_process_sockopt;
+ bpfilter_ops.sockopt = &bpfilter_process_sockopt;
bpfilter_ops.start = &start_umh;
}
mutex_unlock(&bpfilter_ops.lock);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index fe13108af1f5..d35173e803d3 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1063,14 +1063,13 @@ free_counterstmp:
}
/* replace the table */
-static int do_replace(struct net *net, const void __user *user,
- unsigned int len)
+static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
{
int ret, countersize;
struct ebt_table_info *newinfo;
struct ebt_replace tmp;
- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
if (len != sizeof(tmp) + tmp.entries_size)
@@ -1242,9 +1241,8 @@ void ebt_unregister_table(struct net *net, struct ebt_table *table,
/* userspace just supplied us with counters */
static int do_update_counters(struct net *net, const char *name,
- struct ebt_counter __user *counters,
- unsigned int num_counters,
- const void __user *user, unsigned int len)
+ struct ebt_counter __user *counters,
+ unsigned int num_counters, unsigned int len)
{
int i, ret;
struct ebt_counter *tmp;
@@ -1287,19 +1285,18 @@ free_tmp:
return ret;
}
-static int update_counters(struct net *net, const void __user *user,
- unsigned int len)
+static int update_counters(struct net *net, sockptr_t arg, unsigned int len)
{
struct ebt_replace hlp;
- if (copy_from_user(&hlp, user, sizeof(hlp)))
+ if (copy_from_sockptr(&hlp, arg, sizeof(hlp)))
return -EFAULT;
if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter))
return -EINVAL;
return do_update_counters(net, hlp.name, hlp.counters,
- hlp.num_counters, user, len);
+ hlp.num_counters, len);
}
static inline int ebt_obj_to_user(char __user *um, const char *_name,
@@ -2080,7 +2077,7 @@ static int compat_copy_entries(unsigned char *data, unsigned int size_user,
static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl,
- void __user *user, unsigned int len)
+ sockptr_t arg, unsigned int len)
{
struct compat_ebt_replace tmp;
int i;
@@ -2088,7 +2085,7 @@ static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl,
if (len < sizeof(tmp))
return -EINVAL;
- if (copy_from_user(&tmp, user, sizeof(tmp)))
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)))
return -EFAULT;
if (len != sizeof(tmp) + tmp.entries_size)
@@ -2115,8 +2112,7 @@ static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl,
return 0;
}
-static int compat_do_replace(struct net *net, void __user *user,
- unsigned int len)
+static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
{
int ret, i, countersize, size64;
struct ebt_table_info *newinfo;
@@ -2124,10 +2120,10 @@ static int compat_do_replace(struct net *net, void __user *user,
struct ebt_entries_buf_state state;
void *entries_tmp;
- ret = compat_copy_ebt_replace_from_user(&tmp, user, len);
+ ret = compat_copy_ebt_replace_from_user(&tmp, arg, len);
if (ret) {
/* try real handler in case userland supplied needed padding */
- if (ret == -EINVAL && do_replace(net, user, len) == 0)
+ if (ret == -EINVAL && do_replace(net, arg, len) == 0)
ret = 0;
return ret;
}
@@ -2218,20 +2214,20 @@ out_unlock:
goto free_entries;
}
-static int compat_update_counters(struct net *net, void __user *user,
+static int compat_update_counters(struct net *net, sockptr_t arg,
unsigned int len)
{
struct compat_ebt_replace hlp;
- if (copy_from_user(&hlp, user, sizeof(hlp)))
+ if (copy_from_sockptr(&hlp, arg, sizeof(hlp)))
return -EFAULT;
/* try real handler in case userland supplied needed padding */
if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter))
- return update_counters(net, user, len);
+ return update_counters(net, arg, len);
return do_update_counters(net, hlp.name, compat_ptr(hlp.counters),
- hlp.num_counters, user, len);
+ hlp.num_counters, len);
}
static int compat_do_ebt_get_ctl(struct sock *sk, int cmd,
@@ -2369,7 +2365,7 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
return ret;
}
-static int do_ebt_set_ctl(struct sock *sk, int cmd, void __user *user,
+static int do_ebt_set_ctl(struct sock *sk, int cmd, sockptr_t arg,
unsigned int len)
{
struct net *net = sock_net(sk);
@@ -2382,18 +2378,18 @@ static int do_ebt_set_ctl(struct sock *sk, int cmd, void __user *user,
case EBT_SO_SET_ENTRIES:
#ifdef CONFIG_COMPAT
if (in_compat_syscall())
- ret = compat_do_replace(net, user, len);
+ ret = compat_do_replace(net, arg, len);
else
#endif
- ret = do_replace(net, user, len);
+ ret = do_replace(net, arg, len);
break;
case EBT_SO_SET_COUNTERS:
#ifdef CONFIG_COMPAT
if (in_compat_syscall())
- ret = compat_update_counters(net, user, len);
+ ret = compat_update_counters(net, arg, len);
else
#endif
- ret = update_counters(net, user, len);
+ ret = update_counters(net, arg, len);
break;
default:
ret = -EINVAL;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index b94ecd931002..3ad0a1df6712 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -669,8 +669,8 @@ out_err:
return sent ? : err;
}
-static int setsockopt(struct socket *sock,
- int lvl, int opt, char __user *ov, unsigned int ol)
+static int setsockopt(struct socket *sock, int lvl, int opt, sockptr_t ov,
+ unsigned int ol)
{
struct sock *sk = sock->sk;
struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
@@ -685,7 +685,7 @@ static int setsockopt(struct socket *sock,
return -EINVAL;
if (lvl != SOL_CAIF)
goto bad_sol;
- if (copy_from_user(&linksel, ov, sizeof(int)))
+ if (copy_from_sockptr(&linksel, ov, sizeof(int)))
return -EINVAL;
lock_sock(&(cf_sk->sk));
cf_sk->conn_req.link_selector = linksel;
@@ -699,7 +699,7 @@ static int setsockopt(struct socket *sock,
return -ENOPROTOOPT;
lock_sock(&(cf_sk->sk));
if (ol > sizeof(cf_sk->conn_req.param.data) ||
- copy_from_user(&cf_sk->conn_req.param.data, ov, ol)) {
+ copy_from_sockptr(&cf_sk->conn_req.param.data, ov, ol)) {
release_sock(&cf_sk->sk);
return -EINVAL;
}
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index f7587428febd..78ff9b3f1d40 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -627,14 +627,14 @@ static int j1939_sk_release(struct socket *sock)
return 0;
}
-static int j1939_sk_setsockopt_flag(struct j1939_sock *jsk, char __user *optval,
+static int j1939_sk_setsockopt_flag(struct j1939_sock *jsk, sockptr_t optval,
unsigned int optlen, int flag)
{
int tmp;
if (optlen != sizeof(tmp))
return -EINVAL;
- if (copy_from_user(&tmp, optval, optlen))
+ if (copy_from_sockptr(&tmp, optval, optlen))
return -EFAULT;
lock_sock(&jsk->sk);
if (tmp)
@@ -646,7 +646,7 @@ static int j1939_sk_setsockopt_flag(struct j1939_sock *jsk, char __user *optval,
}
static int j1939_sk_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct j1939_sock *jsk = j1939_sk(sk);
@@ -658,7 +658,7 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case SO_J1939_FILTER:
- if (optval) {
+ if (!sockptr_is_null(optval)) {
struct j1939_filter *f;
int c;
@@ -670,7 +670,7 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
count = optlen / sizeof(*filters);
- filters = memdup_user(optval, optlen);
+ filters = memdup_sockptr(optval, optlen);
if (IS_ERR(filters))
return PTR_ERR(filters);
@@ -703,7 +703,7 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname,
case SO_J1939_SEND_PRIO:
if (optlen != sizeof(tmp))
return -EINVAL;
- if (copy_from_user(&tmp, optval, optlen))
+ if (copy_from_sockptr(&tmp, optval, optlen))
return -EFAULT;
if (tmp < 0 || tmp > 7)
return -EDOM;
diff --git a/net/can/raw.c b/net/can/raw.c
index 59c039d73c6d..94a9405658dc 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -485,7 +485,7 @@ static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
}
static int raw_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct raw_sock *ro = raw_sk(sk);
@@ -511,11 +511,11 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (count > 1) {
/* filter does not fit into dfilter => alloc space */
- filter = memdup_user(optval, optlen);
+ filter = memdup_sockptr(optval, optlen);
if (IS_ERR(filter))
return PTR_ERR(filter);
} else if (count == 1) {
- if (copy_from_user(&sfilter, optval, sizeof(sfilter)))
+ if (copy_from_sockptr(&sfilter, optval, sizeof(sfilter)))
return -EFAULT;
}
@@ -568,7 +568,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (optlen != sizeof(err_mask))
return -EINVAL;
- if (copy_from_user(&err_mask, optval, optlen))
+ if (copy_from_sockptr(&err_mask, optval, optlen))
return -EFAULT;
err_mask &= CAN_ERR_MASK;
@@ -607,7 +607,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (optlen != sizeof(ro->loopback))
return -EINVAL;
- if (copy_from_user(&ro->loopback, optval, optlen))
+ if (copy_from_sockptr(&ro->loopback, optval, optlen))
return -EFAULT;
break;
@@ -616,7 +616,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (optlen != sizeof(ro->recv_own_msgs))
return -EINVAL;
- if (copy_from_user(&ro->recv_own_msgs, optval, optlen))
+ if (copy_from_sockptr(&ro->recv_own_msgs, optval, optlen))
return -EFAULT;
break;
@@ -625,7 +625,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (optlen != sizeof(ro->fd_frames))
return -EINVAL;
- if (copy_from_user(&ro->fd_frames, optval, optlen))
+ if (copy_from_sockptr(&ro->fd_frames, optval, optlen))
return -EFAULT;
break;
@@ -634,7 +634,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (optlen != sizeof(ro->join_filters))
return -EINVAL;
- if (copy_from_user(&ro->join_filters, optval, optlen))
+ if (copy_from_sockptr(&ro->join_filters, optval, optlen))
return -EFAULT;
break;
diff --git a/net/core/dev.c b/net/core/dev.c
index 19f1abc26fcd..a986b07ea845 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -98,7 +98,6 @@
#include <net/busy_poll.h>
#include <linux/rtnetlink.h>
#include <linux/stat.h>
-#include <net/dsa.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
#include <net/pkt_sched.h>
@@ -5449,6 +5448,8 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
for (i = 0; i < new->aux->used_map_cnt; i++) {
if (dev_map_can_have_prog(new->aux->used_maps[i]))
return -EINVAL;
+ if (cpu_map_prog_allowed(new->aux->used_maps[i]))
+ return -EINVAL;
}
}
@@ -8603,10 +8604,6 @@ int dev_get_phys_port_name(struct net_device *dev,
const struct net_device_ops *ops = dev->netdev_ops;
int err;
- err = dsa_ndo_get_phys_port_name(dev, name, len);
- if (err == 0 || err != -EOPNOTSUPP)
- return err;
-
if (ops->ndo_get_phys_port_name) {
err = ops->ndo_get_phys_port_name(dev, name, len);
if (err != -EOPNOTSUPP)
@@ -8880,6 +8877,13 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
return -EINVAL;
}
+ if (prog->expected_attach_type == BPF_XDP_CPUMAP) {
+ NL_SET_ERR_MSG(extack,
+ "BPF_XDP_CPUMAP programs can not be attached to a device");
+ bpf_prog_put(prog);
+ return -EINVAL;
+ }
+
/* prog->aux->id may be 0 for orphaned device-bound progs */
if (prog->aux->id && prog->aux->id == prog_id) {
bpf_prog_put(prog);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 6335e1851088..0ca89196a367 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -386,16 +386,14 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
return NULL;
}
-#define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0)
-#define DEVLINK_NL_FLAG_NEED_PORT BIT(1)
-#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(2)
-#define DEVLINK_NL_FLAG_NEED_SB BIT(3)
+#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
+#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
/* The per devlink instance lock is taken by default in the pre-doit
* operation, yet several commands do not require this. The global
* devlink lock is taken and protects from disruption by user-calls.
*/
-#define DEVLINK_NL_FLAG_NO_LOCK BIT(4)
+#define DEVLINK_NL_FLAG_NO_LOCK BIT(2)
static int devlink_nl_pre_doit(const struct genl_ops *ops,
struct sk_buff *skb, struct genl_info *info)
@@ -412,31 +410,19 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
}
if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
mutex_lock(&devlink->lock);
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) {
- info->user_ptr[0] = devlink;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
+ info->user_ptr[0] = devlink;
+ if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
devlink_port = devlink_port_get_from_info(devlink, info);
if (IS_ERR(devlink_port)) {
err = PTR_ERR(devlink_port);
goto unlock;
}
- info->user_ptr[0] = devlink_port;
+ info->user_ptr[1] = devlink_port;
} else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT) {
- info->user_ptr[0] = devlink;
devlink_port = devlink_port_get_from_info(devlink, info);
if (!IS_ERR(devlink_port))
info->user_ptr[1] = devlink_port;
}
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_SB) {
- struct devlink_sb *devlink_sb;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb)) {
- err = PTR_ERR(devlink_sb);
- goto unlock;
- }
- info->user_ptr[1] = devlink_sb;
- }
return 0;
unlock:
@@ -451,16 +437,8 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
{
struct devlink *devlink;
- /* When devlink changes netns, it would not be found
- * by devlink_get_from_info(). So try if it is stored first.
- */
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) {
- devlink = info->user_ptr[0];
- } else {
- devlink = devlink_get_from_info(info);
- WARN_ON(IS_ERR(devlink));
- }
- if (!IS_ERR(devlink) && ~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
+ devlink = info->user_ptr[0];
+ if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
mutex_unlock(&devlink->lock);
mutex_unlock(&devlink_mutex);
}
@@ -759,7 +737,7 @@ out:
static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[0];
+ struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = devlink_port->devlink;
struct sk_buff *msg;
int err;
@@ -906,7 +884,7 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[0];
+ struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = devlink_port->devlink;
int err;
@@ -1041,10 +1019,14 @@ static int devlink_nl_cmd_sb_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct devlink_sb *devlink_sb;
struct sk_buff *msg;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
@@ -1146,11 +1128,15 @@ static int devlink_nl_cmd_sb_pool_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct devlink_sb *devlink_sb;
struct sk_buff *msg;
u16 pool_index;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
&pool_index);
if (err)
@@ -1255,12 +1241,16 @@ static int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
enum devlink_sb_threshold_type threshold_type;
+ struct devlink_sb *devlink_sb;
u16 pool_index;
u32 size;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
&pool_index);
if (err)
@@ -1339,13 +1329,17 @@ nla_put_failure:
static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[0];
+ struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = devlink_port->devlink;
- struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct devlink_sb *devlink_sb;
struct sk_buff *msg;
u16 pool_index;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
&pool_index);
if (err)
@@ -1455,12 +1449,17 @@ static int devlink_sb_port_pool_set(struct devlink_port *devlink_port,
static int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_sb *devlink_sb;
u16 pool_index;
u32 threshold;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
&pool_index);
if (err)
@@ -1542,14 +1541,18 @@ nla_put_failure:
static int devlink_nl_cmd_sb_tc_pool_bind_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[0];
+ struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = devlink_port->devlink;
- struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct devlink_sb *devlink_sb;
struct sk_buff *msg;
enum devlink_sb_pool_type pool_type;
u16 tc_index;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
err = devlink_sb_pool_type_get_from_info(info, &pool_type);
if (err)
return err;
@@ -1689,14 +1692,19 @@ static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
static int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct devlink *devlink = info->user_ptr[0];
enum devlink_sb_pool_type pool_type;
+ struct devlink_sb *devlink_sb;
u16 tc_index;
u16 pool_index;
u32 threshold;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
err = devlink_sb_pool_type_get_from_info(info, &pool_type);
if (err)
return err;
@@ -1724,8 +1732,12 @@ static int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
const struct devlink_ops *ops = devlink->ops;
+ struct devlink_sb *devlink_sb;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
if (ops->sb_occ_snapshot)
return ops->sb_occ_snapshot(devlink, devlink_sb->index);
@@ -1736,8 +1748,12 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
const struct devlink_ops *ops = devlink->ops;
+ struct devlink_sb *devlink_sb;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
if (ops->sb_occ_max_clear)
return ops->sb_occ_max_clear(devlink, devlink_sb->index);
@@ -2921,7 +2937,7 @@ static void devlink_reload_netns_change(struct devlink *devlink,
DEVLINK_CMD_PARAM_NEW);
}
-static bool devlink_reload_supported(struct devlink *devlink)
+static bool devlink_reload_supported(const struct devlink *devlink)
{
return devlink->ops->reload_down && devlink->ops->reload_up;
}
@@ -2967,7 +2983,7 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
struct net *dest_net = NULL;
int err;
- if (!devlink_reload_supported(devlink) || !devlink->reload_enabled)
+ if (!devlink_reload_supported(devlink))
return -EOPNOTSUPP;
err = devlink_resources_validate(devlink, NULL, info);
@@ -7018,7 +7034,6 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_get_doit,
.dumpit = devlink_nl_cmd_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
@@ -7041,24 +7056,20 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_port_split_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_PORT_UNSPLIT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_port_unsplit_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_SB_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_get_doit,
.dumpit = devlink_nl_cmd_sb_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NEED_SB,
/* can be retrieved by unprivileged users */
},
{
@@ -7066,8 +7077,6 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_pool_get_doit,
.dumpit = devlink_nl_cmd_sb_pool_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NEED_SB,
/* can be retrieved by unprivileged users */
},
{
@@ -7075,16 +7084,13 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_pool_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NEED_SB,
},
{
.cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_port_pool_get_doit,
.dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
- DEVLINK_NL_FLAG_NEED_SB,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
/* can be retrieved by unprivileged users */
},
{
@@ -7092,16 +7098,14 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_port_pool_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
- DEVLINK_NL_FLAG_NEED_SB,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
},
{
.cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit,
.dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
- DEVLINK_NL_FLAG_NEED_SB,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
/* can be retrieved by unprivileged users */
},
{
@@ -7109,60 +7113,50 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
- DEVLINK_NL_FLAG_NEED_SB,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
},
{
.cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_occ_snapshot_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NEED_SB,
},
{
.cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_occ_max_clear_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NEED_SB,
},
{
.cmd = DEVLINK_CMD_ESWITCH_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_eswitch_get_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_ESWITCH_SET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_eswitch_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_dpipe_table_get,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_dpipe_entries_get,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_dpipe_headers_get,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
@@ -7170,20 +7164,17 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_dpipe_table_counters_set,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_RESOURCE_SET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_resource_set,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_RESOURCE_DUMP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_resource_dump,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
@@ -7191,15 +7182,13 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_reload,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_PARAM_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_param_get_doit,
.dumpit = devlink_nl_cmd_param_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
@@ -7207,7 +7196,6 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_param_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_PORT_PARAM_GET,
@@ -7230,21 +7218,18 @@ static const struct genl_ops devlink_nl_ops[] = {
.doit = devlink_nl_cmd_region_get_doit,
.dumpit = devlink_nl_cmd_region_get_dumpit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_REGION_NEW,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_region_new,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_REGION_DEL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_region_del,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_REGION_READ,
@@ -7252,14 +7237,12 @@ static const struct genl_ops devlink_nl_ops[] = {
GENL_DONT_VALIDATE_DUMP_STRICT,
.dumpit = devlink_nl_cmd_region_read_dumpit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_INFO_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_info_get_doit,
.dumpit = devlink_nl_cmd_info_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
@@ -7317,46 +7300,39 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_flash_update,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_TRAP_GET,
.doit = devlink_nl_cmd_trap_get_doit,
.dumpit = devlink_nl_cmd_trap_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_TRAP_SET,
.doit = devlink_nl_cmd_trap_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_TRAP_GROUP_GET,
.doit = devlink_nl_cmd_trap_group_get_doit,
.dumpit = devlink_nl_cmd_trap_group_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_TRAP_GROUP_SET,
.doit = devlink_nl_cmd_trap_group_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_TRAP_POLICER_GET,
.doit = devlink_nl_cmd_trap_policer_get_doit,
.dumpit = devlink_nl_cmd_trap_policer_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_TRAP_POLICER_SET,
.doit = devlink_nl_cmd_trap_policer_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
};
@@ -7421,9 +7397,9 @@ EXPORT_SYMBOL_GPL(devlink_alloc);
*/
int devlink_register(struct devlink *devlink, struct device *dev)
{
- mutex_lock(&devlink_mutex);
devlink->dev = dev;
devlink->registered = true;
+ mutex_lock(&devlink_mutex);
list_add_tail(&devlink->list, &devlink_list);
devlink_notify(devlink, DEVLINK_CMD_NEW);
mutex_unlock(&devlink_mutex);
diff --git a/net/core/filter.c b/net/core/filter.c
index 2bf6624796d8..29e3455122f7 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -77,14 +77,14 @@
#include <net/transp_v6.h>
#include <linux/btf_ids.h>
-int copy_bpf_fprog_from_user(struct sock_fprog *dst, void __user *src, int len)
+int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
{
if (in_compat_syscall()) {
struct compat_sock_fprog f32;
if (len != sizeof(f32))
return -EINVAL;
- if (copy_from_user(&f32, src, sizeof(f32)))
+ if (copy_from_sockptr(&f32, src, sizeof(f32)))
return -EFAULT;
memset(dst, 0, sizeof(*dst));
dst->len = f32.len;
@@ -92,7 +92,7 @@ int copy_bpf_fprog_from_user(struct sock_fprog *dst, void __user *src, int len)
} else {
if (len != sizeof(*dst))
return -EINVAL;
- if (copy_from_user(dst, src, sizeof(*dst)))
+ if (copy_from_sockptr(dst, src, sizeof(*dst)))
return -EFAULT;
}
@@ -9252,61 +9252,205 @@ const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
const struct bpf_prog_ops sk_reuseport_prog_ops = {
};
-#endif /* CONFIG_INET */
-DEFINE_BPF_DISPATCHER(xdp)
+DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled);
+EXPORT_SYMBOL(bpf_sk_lookup_enabled);
-void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
+BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx,
+ struct sock *, sk, u64, flags)
{
- bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
+ if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE |
+ BPF_SK_LOOKUP_F_NO_REUSEPORT)))
+ return -EINVAL;
+ if (unlikely(sk && sk_is_refcounted(sk)))
+ return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */
+ if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED))
+ return -ESOCKTNOSUPPORT; /* reject connected sockets */
+
+ /* Check if socket is suitable for packet L3/L4 protocol */
+ if (sk && sk->sk_protocol != ctx->protocol)
+ return -EPROTOTYPE;
+ if (sk && sk->sk_family != ctx->family &&
+ (sk->sk_family == AF_INET || ipv6_only_sock(sk)))
+ return -EAFNOSUPPORT;
+
+ if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE))
+ return -EEXIST;
+
+ /* Select socket as lookup result */
+ ctx->selected_sk = sk;
+ ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT;
+ return 0;
}
-/* Define a list of socket types which can be the argument for
- * skc_to_*_sock() helpers. All these sockets should have
- * sock_common as the first argument in its memory layout.
- */
-#define BTF_SOCK_TYPE_xxx \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, "inet_sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, "inet_connection_sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, "inet_request_sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, "inet_timewait_sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, "request_sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, "sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, "sock_common") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, "tcp_sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, "tcp_request_sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, "tcp_timewait_sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, "tcp6_sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, "udp_sock") \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, "udp6_sock")
-
-enum {
-#define BTF_SOCK_TYPE(name, str) name,
-BTF_SOCK_TYPE_xxx
-#undef BTF_SOCK_TYPE
-MAX_BTF_SOCK_TYPE,
+static const struct bpf_func_proto bpf_sk_lookup_assign_proto = {
+ .func = bpf_sk_lookup_assign,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_SOCKET_OR_NULL,
+ .arg3_type = ARG_ANYTHING,
};
-static int btf_sock_ids[MAX_BTF_SOCK_TYPE];
+static const struct bpf_func_proto *
+sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_perf_event_output:
+ return &bpf_event_output_data_proto;
+ case BPF_FUNC_sk_assign:
+ return &bpf_sk_lookup_assign_proto;
+ case BPF_FUNC_sk_release:
+ return &bpf_sk_release_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
-#ifdef CONFIG_BPF_SYSCALL
-static const char *bpf_sock_types[] = {
-#define BTF_SOCK_TYPE(name, str) str,
-BTF_SOCK_TYPE_xxx
-#undef BTF_SOCK_TYPE
-};
+static bool sk_lookup_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (off < 0 || off >= sizeof(struct bpf_sk_lookup))
+ return false;
+ if (off % size != 0)
+ return false;
+ if (type != BPF_READ)
+ return false;
+
+ switch (off) {
+ case offsetof(struct bpf_sk_lookup, sk):
+ info->reg_type = PTR_TO_SOCKET_OR_NULL;
+ return size == sizeof(__u64);
-void init_btf_sock_ids(struct btf *btf)
+ case bpf_ctx_range(struct bpf_sk_lookup, family):
+ case bpf_ctx_range(struct bpf_sk_lookup, protocol):
+ case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4):
+ case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
+ case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
+ case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
+ case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
+ case bpf_ctx_range(struct bpf_sk_lookup, local_port):
+ bpf_ctx_record_field_size(info, sizeof(__u32));
+ return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));
+
+ default:
+ return false;
+ }
+}
+
+static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog,
+ u32 *target_size)
{
- int i, btf_id;
+ struct bpf_insn *insn = insn_buf;
+
+ switch (si->off) {
+ case offsetof(struct bpf_sk_lookup, sk):
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sk_lookup_kern, selected_sk));
+ break;
- for (i = 0; i < MAX_BTF_SOCK_TYPE; i++) {
- btf_id = btf_find_by_name_kind(btf, bpf_sock_types[i],
- BTF_KIND_STRUCT);
- if (btf_id > 0)
- btf_sock_ids[i] = btf_id;
+ case offsetof(struct bpf_sk_lookup, family):
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ family, 2, target_size));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, protocol):
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ protocol, 2, target_size));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, remote_ip4):
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ v4.saddr, 4, target_size));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, local_ip4):
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ v4.daddr, 4, target_size));
+ break;
+
+ case bpf_ctx_range_till(struct bpf_sk_lookup,
+ remote_ip6[0], remote_ip6[3]): {
+#if IS_ENABLED(CONFIG_IPV6)
+ int off = si->off;
+
+ off -= offsetof(struct bpf_sk_lookup, remote_ip6[0]);
+ off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sk_lookup_kern, v6.saddr));
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
+#else
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+ break;
+ }
+ case bpf_ctx_range_till(struct bpf_sk_lookup,
+ local_ip6[0], local_ip6[3]): {
+#if IS_ENABLED(CONFIG_IPV6)
+ int off = si->off;
+
+ off -= offsetof(struct bpf_sk_lookup, local_ip6[0]);
+ off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sk_lookup_kern, v6.daddr));
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
+#else
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+ break;
}
+ case offsetof(struct bpf_sk_lookup, remote_port):
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ sport, 2, target_size));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, local_port):
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ dport, 2, target_size));
+ break;
+ }
+
+ return insn - insn_buf;
}
+
+const struct bpf_prog_ops sk_lookup_prog_ops = {
+};
+
+const struct bpf_verifier_ops sk_lookup_verifier_ops = {
+ .get_func_proto = sk_lookup_func_proto,
+ .is_valid_access = sk_lookup_is_valid_access,
+ .convert_ctx_access = sk_lookup_convert_ctx_access,
+};
+
+#endif /* CONFIG_INET */
+
+DEFINE_BPF_DISPATCHER(xdp)
+
+void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
+{
+ bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
+}
+
+#ifdef CONFIG_DEBUG_INFO_BTF
+BTF_ID_LIST_GLOBAL(btf_sock_ids)
+#define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type)
+BTF_SOCK_TYPE_xxx
+#undef BTF_SOCK_TYPE
+#else
+u32 btf_sock_ids[MAX_BTF_SOCK_TYPE];
#endif
static bool check_arg_btf_id(u32 btf_id, u32 arg)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 142a8824f0a8..29806eb765cf 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -383,6 +383,23 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
}
EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
+void skb_flow_dissect_hash(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container)
+{
+ struct flow_dissector_key_hash *key;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_HASH))
+ return;
+
+ key = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_HASH,
+ target_container);
+
+ key->hash = skb_get_hash_raw(skb);
+}
+EXPORT_SYMBOL(skb_flow_dissect_hash);
+
static enum flow_dissect_ret
__skb_flow_dissect_mpls(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
diff --git a/net/core/sock.c b/net/core/sock.c
index d828bfe1c47d..2c5dd1397775 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -113,6 +113,7 @@
#include <linux/static_key.h>
#include <linux/memcontrol.h>
#include <linux/prefetch.h>
+#include <linux/compat.h>
#include <linux/uaccess.h>
@@ -360,7 +361,8 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
return sizeof(tv);
}
-static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool old_timeval)
+static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
+ bool old_timeval)
{
struct __kernel_sock_timeval tv;
@@ -370,7 +372,7 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool
if (optlen < sizeof(tv32))
return -EINVAL;
- if (copy_from_user(&tv32, optval, sizeof(tv32)))
+ if (copy_from_sockptr(&tv32, optval, sizeof(tv32)))
return -EFAULT;
tv.tv_sec = tv32.tv_sec;
tv.tv_usec = tv32.tv_usec;
@@ -379,14 +381,14 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool
if (optlen < sizeof(old_tv))
return -EINVAL;
- if (copy_from_user(&old_tv, optval, sizeof(old_tv)))
+ if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv)))
return -EFAULT;
tv.tv_sec = old_tv.tv_sec;
tv.tv_usec = old_tv.tv_usec;
} else {
if (optlen < sizeof(tv))
return -EINVAL;
- if (copy_from_user(&tv, optval, sizeof(tv)))
+ if (copy_from_sockptr(&tv, optval, sizeof(tv)))
return -EFAULT;
}
if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
@@ -608,8 +610,7 @@ int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)
}
EXPORT_SYMBOL(sock_bindtoindex);
-static int sock_setbindtodevice(struct sock *sk, char __user *optval,
- int optlen)
+static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
{
int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
@@ -631,7 +632,7 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval,
memset(devname, 0, sizeof(devname));
ret = -EFAULT;
- if (copy_from_user(devname, optval, optlen))
+ if (copy_from_sockptr(devname, optval, optlen))
goto out;
index = 0;
@@ -825,7 +826,7 @@ EXPORT_SYMBOL(sock_set_rcvbuf);
*/
int sock_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock_txtime sk_txtime;
struct sock *sk = sock->sk;
@@ -844,7 +845,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
valbool = val ? 1 : 0;
@@ -957,7 +958,7 @@ set_sndbuf:
ret = -EINVAL; /* 1003.1g */
break;
}
- if (copy_from_user(&ling, optval, sizeof(ling))) {
+ if (copy_from_sockptr(&ling, optval, sizeof(ling))) {
ret = -EFAULT;
break;
}
@@ -1051,12 +1052,14 @@ set_sndbuf:
case SO_RCVTIMEO_OLD:
case SO_RCVTIMEO_NEW:
- ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen, optname == SO_RCVTIMEO_OLD);
+ ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
+ optlen, optname == SO_RCVTIMEO_OLD);
break;
case SO_SNDTIMEO_OLD:
case SO_SNDTIMEO_NEW:
- ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen, optname == SO_SNDTIMEO_OLD);
+ ret = sock_set_timeout(&sk->sk_sndtimeo, optval,
+ optlen, optname == SO_SNDTIMEO_OLD);
break;
case SO_ATTACH_FILTER: {
@@ -1073,7 +1076,7 @@ set_sndbuf:
u32 ufd;
ret = -EFAULT;
- if (copy_from_user(&ufd, optval, sizeof(ufd)))
+ if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
break;
ret = sk_attach_bpf(ufd, sk);
@@ -1094,7 +1097,7 @@ set_sndbuf:
u32 ufd;
ret = -EFAULT;
- if (copy_from_user(&ufd, optval, sizeof(ufd)))
+ if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
break;
ret = sk_reuseport_attach_bpf(ufd, sk);
@@ -1174,7 +1177,7 @@ set_sndbuf:
if (sizeof(ulval) != sizeof(val) &&
optlen >= sizeof(ulval) &&
- get_user(ulval, (unsigned long __user *)optval)) {
+ copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
ret = -EFAULT;
break;
}
@@ -1217,7 +1220,7 @@ set_sndbuf:
if (optlen != sizeof(struct sock_txtime)) {
ret = -EINVAL;
break;
- } else if (copy_from_user(&sk_txtime, optval,
+ } else if (copy_from_sockptr(&sk_txtime, optval,
sizeof(struct sock_txtime))) {
ret = -EFAULT;
break;
@@ -3208,7 +3211,7 @@ EXPORT_SYMBOL(sock_common_recvmsg);
* Set socket options on an inet socket.
*/
int sock_common_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 434eea91b767..9cc9d1ee6cdb 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -295,7 +295,7 @@ int dccp_disconnect(struct sock *sk, int flags);
int dccp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
int dccp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
+ sockptr_t optval, unsigned int optlen);
int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index fd92d3fe321f..2e9e8449698f 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -375,6 +375,15 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
goto out;
switch (cmd) {
+ case SIOCOUTQ: {
+ int amount = sk_wmem_alloc_get(sk);
+ /* Using sk_wmem_alloc here because sk_wmem_queued is not used by DCCP and
+ * always 0, comparably to UDP.
+ */
+
+ rc = put_user(amount, (int __user *)arg);
+ }
+ break;
case SIOCINQ: {
struct sk_buff *skb;
unsigned long amount = 0;
@@ -402,7 +411,7 @@ out:
EXPORT_SYMBOL_GPL(dccp_ioctl);
static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_service_list *sl = NULL;
@@ -417,9 +426,9 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
return -ENOMEM;
sl->dccpsl_nr = optlen / sizeof(u32) - 1;
- if (copy_from_user(sl->dccpsl_list,
- optval + sizeof(service),
- optlen - sizeof(service)) ||
+ sockptr_advance(optval, sizeof(service));
+ if (copy_from_sockptr(sl->dccpsl_list, optval,
+ optlen - sizeof(service)) ||
dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
kfree(sl);
return -EFAULT;
@@ -473,7 +482,7 @@ static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
}
static int dccp_setsockopt_ccid(struct sock *sk, int type,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
u8 *val;
int rc = 0;
@@ -481,7 +490,7 @@ static int dccp_setsockopt_ccid(struct sock *sk, int type,
if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
return -EINVAL;
- val = memdup_user(optval, optlen);
+ val = memdup_sockptr(optval, optlen);
if (IS_ERR(val))
return PTR_ERR(val);
@@ -498,7 +507,7 @@ static int dccp_setsockopt_ccid(struct sock *sk, int type,
}
static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct dccp_sock *dp = dccp_sk(sk);
int val, err = 0;
@@ -520,7 +529,7 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
if (optlen < (int)sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
return -EFAULT;
if (optname == DCCP_SOCKOPT_SERVICE)
@@ -563,8 +572,8 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
return err;
}
-int dccp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+int dccp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen)
{
if (level != SOL_DCCP)
return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 7d7ae2dd69b8..3b53d766789d 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -150,7 +150,8 @@ static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
static struct hlist_head dn_wild_sk;
static atomic_long_t decnet_memory_allocated;
-static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen, int flags);
+static int __dn_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int optlen, int flags);
static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
static struct hlist_head *dn_find_list(struct sock *sk)
@@ -1320,7 +1321,8 @@ out:
return err;
}
-static int dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
+static int dn_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
int err;
@@ -1338,7 +1340,8 @@ static int dn_setsockopt(struct socket *sock, int level, int optname, char __use
return err;
}
-static int __dn_setsockopt(struct socket *sock, int level,int optname, char __user *optval, unsigned int optlen, int flags)
+static int __dn_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int optlen, int flags)
{
struct sock *sk = sock->sk;
struct dn_scp *scp = DN_SK(sk);
@@ -1354,13 +1357,13 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us
} u;
int err;
- if (optlen && !optval)
+ if (optlen && sockptr_is_null(optval))
return -EINVAL;
if (optlen > sizeof(u))
return -EINVAL;
- if (copy_from_user(&u, optval, optlen))
+ if (copy_from_sockptr(&u, optval, optlen))
return -EFAULT;
switch (optname) {
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index e055efff390b..c0ffc7a2b65f 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -727,8 +727,12 @@ static int dsa_switch_parse_ports_of(struct dsa_switch *ds,
ports = of_get_child_by_name(dn, "ports");
if (!ports) {
- dev_err(ds->dev, "no ports child node found\n");
- return -EINVAL;
+ /* The second possibility is "ethernet-ports" */
+ ports = of_get_child_by_name(dn, "ethernet-ports");
+ if (!ports) {
+ dev_err(ds->dev, "no ports child node found\n");
+ return -EINVAL;
+ }
}
for_each_available_child_of_node(ports, port) {
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 0a90911ae31b..61615ebc70e9 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -186,17 +186,6 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
}
}
-static int dsa_master_get_phys_port_name(struct net_device *dev,
- char *name, size_t len)
-{
- struct dsa_port *cpu_dp = dev->dsa_ptr;
-
- if (snprintf(name, len, "p%d", cpu_dp->index) >= len)
- return -EINVAL;
-
- return 0;
-}
-
static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
@@ -228,7 +217,6 @@ static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
static const struct dsa_netdevice_ops dsa_netdev_ops = {
.ndo_do_ioctl = dsa_master_ioctl,
- .ndo_get_phys_port_name = dsa_master_get_phys_port_name,
};
static int dsa_master_ethtool_setup(struct net_device *dev)
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index 94ae9662133e..a45a0401adc5 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -382,7 +382,7 @@ static int raw_getsockopt(struct sock *sk, int level, int optname,
}
static int raw_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
return -EOPNOTSUPP;
}
@@ -872,7 +872,7 @@ static int dgram_getsockopt(struct sock *sk, int level, int optname,
}
static int dgram_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct dgram_sock *ro = dgram_sk(sk);
struct net *net = sock_net(sk);
@@ -882,7 +882,7 @@ static int dgram_setsockopt(struct sock *sk, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
return -EFAULT;
lock_sock(sk);
diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
index 9063c6767d34..94f18d2352d0 100644
--- a/net/ipv4/bpfilter/sockopt.c
+++ b/net/ipv4/bpfilter/sockopt.c
@@ -21,8 +21,7 @@ void bpfilter_umh_cleanup(struct umd_info *info)
}
EXPORT_SYMBOL_GPL(bpfilter_umh_cleanup);
-static int bpfilter_mbox_request(struct sock *sk, int optname,
- char __user *optval,
+static int bpfilter_mbox_request(struct sock *sk, int optname, sockptr_t optval,
unsigned int optlen, bool is_set)
{
int err;
@@ -52,20 +51,23 @@ out:
return err;
}
-int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval,
+int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval,
unsigned int optlen)
{
return bpfilter_mbox_request(sk, optname, optval, optlen, true);
}
-int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
- int __user *optlen)
+int bpfilter_ip_get_sockopt(struct sock *sk, int optname,
+ char __user *user_optval, int __user *optlen)
{
- int len;
+ sockptr_t optval;
+ int err, len;
if (get_user(len, optlen))
return -EFAULT;
-
+ err = init_user_sockptr(&optval, user_optval);
+ if (err)
+ return err;
return bpfilter_mbox_request(sk, optname, optval, len, false);
}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 793aebf07c2a..cf36f955bfe6 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1151,29 +1151,16 @@ static bool ip_icmp_error_rfc4884_validate(const struct sk_buff *skb, int off)
}
void ip_icmp_error_rfc4884(const struct sk_buff *skb,
- struct sock_ee_data_rfc4884 *out)
+ struct sock_ee_data_rfc4884 *out,
+ int thlen, int off)
{
- int hlen, off;
+ int hlen;
- switch (icmp_hdr(skb)->type) {
- case ICMP_DEST_UNREACH:
- case ICMP_TIME_EXCEEDED:
- case ICMP_PARAMETERPROB:
- break;
- default:
- return;
- }
-
- /* outer headers up to inner iph. skb->data is at inner payload */
- hlen = -skb_transport_offset(skb) - sizeof(struct icmphdr);
-
- /* per rfc 791: maximum packet length of 576 bytes */
- if (hlen + skb->len > 576)
- return;
+ /* original datagram headers: end of icmph to payload (skb->data) */
+ hlen = -skb_transport_offset(skb) - thlen;
/* per rfc 4884: minimal datagram length of 128 bytes */
- off = icmp_hdr(skb)->un.reserved[1] * sizeof(u32);
- if (off < 128)
+ if (off < 128 || off < hlen)
return;
/* kernel has stripped headers: return payload offset in bytes */
@@ -1186,6 +1173,7 @@ void ip_icmp_error_rfc4884(const struct sk_buff *skb,
if (!ip_icmp_error_rfc4884_validate(skb, off))
out->flags |= SO_EE_RFC4884_FLAG_INVALID;
}
+EXPORT_SYMBOL_GPL(ip_icmp_error_rfc4884);
int icmp_err(struct sk_buff *skb, u32 info)
{
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2bbaaf0c7176..4eb4cd8d20dd 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -246,6 +246,21 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
+static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, unsigned short hnum)
+{
+ struct sock *reuse_sk = NULL;
+ u32 phash;
+
+ if (sk->sk_reuseport) {
+ phash = inet_ehashfn(net, daddr, hnum, saddr, sport);
+ reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
+ }
+ return reuse_sk;
+}
+
/*
* Here are some nice properties to exploit here. The BSD API
* does not allow a listening sock to specify the remote port nor the
@@ -265,21 +280,17 @@ static struct sock *inet_lhash2_lookup(struct net *net,
struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
int score, hiscore = 0;
- u32 phash = 0;
inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
sk = (struct sock *)icsk;
score = compute_score(sk, net, hnum, daddr,
dif, sdif, exact_dif);
if (score > hiscore) {
- if (sk->sk_reuseport) {
- phash = inet_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, phash,
- skb, doff);
- if (result)
- return result;
- }
+ result = lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, hnum);
+ if (result)
+ return result;
+
result = sk;
hiscore = score;
}
@@ -288,6 +299,29 @@ static struct sock *inet_lhash2_lookup(struct net *net,
return result;
}
+static inline struct sock *inet_lookup_run_bpf(struct net *net,
+ struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, u16 hnum)
+{
+ struct sock *sk, *reuse_sk;
+ bool no_reuseport;
+
+ if (hashinfo != &tcp_hashinfo)
+ return NULL; /* only TCP is supported */
+
+ no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP,
+ saddr, sport, daddr, hnum, &sk);
+ if (no_reuseport || IS_ERR_OR_NULL(sk))
+ return sk;
+
+ reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
+ if (reuse_sk)
+ sk = reuse_sk;
+ return sk;
+}
+
struct sock *__inet_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@@ -299,6 +333,14 @@ struct sock *__inet_lookup_listener(struct net *net,
struct sock *result = NULL;
unsigned int hash2;
+ /* Lookup redirect from BPF */
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
+ result = inet_lookup_run_bpf(net, hashinfo, skb, doff,
+ saddr, sport, daddr, hnum);
+ if (result)
+ goto done;
+ }
+
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ddaa01ec2bce..948747aac4e2 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -519,15 +519,20 @@ void ip_options_undo(struct ip_options *opt)
}
}
-static struct ip_options_rcu *ip_options_get_alloc(const int optlen)
+int ip_options_get(struct net *net, struct ip_options_rcu **optp,
+ sockptr_t data, int optlen)
{
- return kzalloc(sizeof(struct ip_options_rcu) + ((optlen + 3) & ~3),
+ struct ip_options_rcu *opt;
+
+ opt = kzalloc(sizeof(struct ip_options_rcu) + ((optlen + 3) & ~3),
GFP_KERNEL);
-}
+ if (!opt)
+ return -ENOMEM;
+ if (optlen && copy_from_sockptr(opt->opt.__data, data, optlen)) {
+ kfree(opt);
+ return -EFAULT;
+ }
-static int ip_options_get_finish(struct net *net, struct ip_options_rcu **optp,
- struct ip_options_rcu *opt, int optlen)
-{
while (optlen & 3)
opt->opt.__data[optlen++] = IPOPT_END;
opt->opt.optlen = optlen;
@@ -540,32 +545,6 @@ static int ip_options_get_finish(struct net *net, struct ip_options_rcu **optp,
return 0;
}
-int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp,
- unsigned char __user *data, int optlen)
-{
- struct ip_options_rcu *opt = ip_options_get_alloc(optlen);
-
- if (!opt)
- return -ENOMEM;
- if (optlen && copy_from_user(opt->opt.__data, data, optlen)) {
- kfree(opt);
- return -EFAULT;
- }
- return ip_options_get_finish(net, optp, opt, optlen);
-}
-
-int ip_options_get(struct net *net, struct ip_options_rcu **optp,
- unsigned char *data, int optlen)
-{
- struct ip_options_rcu *opt = ip_options_get_alloc(optlen);
-
- if (!opt)
- return -ENOMEM;
- if (optlen)
- memcpy(opt->opt.__data, data, optlen);
- return ip_options_get_finish(net, optp, opt, optlen);
-}
-
void ip_forward_options(struct sk_buff *skb)
{
struct ip_options *opt = &(IPCB(skb)->opt);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index a5ea02d7a183..d2c223554ff7 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -280,7 +280,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
err = cmsg->cmsg_len - sizeof(struct cmsghdr);
/* Our caller is responsible for freeing ipc->opt */
- err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
+ err = ip_options_get(net, &ipc->opt,
+ KERNEL_SOCKPTR(CMSG_DATA(cmsg)),
err < 40 ? err : 40);
if (err)
return err;
@@ -389,6 +390,18 @@ int ip_ra_control(struct sock *sk, unsigned char on,
return 0;
}
+static void ipv4_icmp_error_rfc4884(const struct sk_buff *skb,
+ struct sock_ee_data_rfc4884 *out)
+{
+ switch (icmp_hdr(skb)->type) {
+ case ICMP_DEST_UNREACH:
+ case ICMP_TIME_EXCEEDED:
+ case ICMP_PARAMETERPROB:
+ ip_icmp_error_rfc4884(skb, out, sizeof(struct icmphdr),
+ icmp_hdr(skb)->un.reserved[1] * 4);
+ }
+}
+
void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
__be16 port, u32 info, u8 *payload)
{
@@ -412,7 +425,7 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
if (skb_pull(skb, payload - skb->data)) {
if (inet_sk(sk)->recverr_rfc4884)
- ip_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884);
+ ipv4_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884);
skb_reset_transport_header(skb);
if (sock_queue_err_skb(sk, skb) == 0)
@@ -682,15 +695,15 @@ Eaddrnotavail:
return -EADDRNOTAVAIL;
}
-static int copy_group_source_from_user(struct group_source_req *greqs,
- void __user *optval, int optlen)
+static int copy_group_source_from_sockptr(struct group_source_req *greqs,
+ sockptr_t optval, int optlen)
{
if (in_compat_syscall()) {
struct compat_group_source_req gr32;
if (optlen != sizeof(gr32))
return -EINVAL;
- if (copy_from_user(&gr32, optval, sizeof(gr32)))
+ if (copy_from_sockptr(&gr32, optval, sizeof(gr32)))
return -EFAULT;
greqs->gsr_interface = gr32.gsr_interface;
greqs->gsr_group = gr32.gsr_group;
@@ -698,7 +711,7 @@ static int copy_group_source_from_user(struct group_source_req *greqs,
} else {
if (optlen != sizeof(*greqs))
return -EINVAL;
- if (copy_from_user(greqs, optval, sizeof(*greqs)))
+ if (copy_from_sockptr(greqs, optval, sizeof(*greqs)))
return -EFAULT;
}
@@ -706,14 +719,14 @@ static int copy_group_source_from_user(struct group_source_req *greqs,
}
static int do_mcast_group_source(struct sock *sk, int optname,
- void __user *optval, int optlen)
+ sockptr_t optval, int optlen)
{
struct group_source_req greqs;
struct ip_mreq_source mreqs;
struct sockaddr_in *psin;
int omode, add, err;
- err = copy_group_source_from_user(&greqs, optval, optlen);
+ err = copy_group_source_from_sockptr(&greqs, optval, optlen);
if (err)
return err;
@@ -753,8 +766,7 @@ static int do_mcast_group_source(struct sock *sk, int optname,
return ip_mc_source(add, omode, sk, &mreqs, greqs.gsr_interface);
}
-static int ip_set_mcast_msfilter(struct sock *sk, void __user *optval,
- int optlen)
+static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
{
struct group_filter *gsf = NULL;
int err;
@@ -764,7 +776,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, void __user *optval,
if (optlen > sysctl_optmem_max)
return -ENOBUFS;
- gsf = memdup_user(optval, optlen);
+ gsf = memdup_sockptr(optval, optlen);
if (IS_ERR(gsf))
return PTR_ERR(gsf);
@@ -785,7 +797,7 @@ out_free_gsf:
return err;
}
-static int compat_ip_set_mcast_msfilter(struct sock *sk, void __user *optval,
+static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
int optlen)
{
const int size0 = offsetof(struct compat_group_filter, gf_slist);
@@ -805,7 +817,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, void __user *optval,
gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
err = -EFAULT;
- if (copy_from_user(gf32, optval, optlen))
+ if (copy_from_sockptr(gf32, optval, optlen))
goto out_free_gsf;
/* numsrc >= (4G-140)/128 overflow in 32 bits */
@@ -830,7 +842,7 @@ out_free_gsf:
}
static int ip_mcast_join_leave(struct sock *sk, int optname,
- void __user *optval, int optlen)
+ sockptr_t optval, int optlen)
{
struct ip_mreqn mreq = { };
struct sockaddr_in *psin;
@@ -838,7 +850,7 @@ static int ip_mcast_join_leave(struct sock *sk, int optname,
if (optlen < sizeof(struct group_req))
return -EINVAL;
- if (copy_from_user(&greq, optval, sizeof(greq)))
+ if (copy_from_sockptr(&greq, optval, sizeof(greq)))
return -EFAULT;
psin = (struct sockaddr_in *)&greq.gr_group;
@@ -852,7 +864,7 @@ static int ip_mcast_join_leave(struct sock *sk, int optname,
}
static int compat_ip_mcast_join_leave(struct sock *sk, int optname,
- void __user *optval, int optlen)
+ sockptr_t optval, int optlen)
{
struct compat_group_req greq;
struct ip_mreqn mreq = { };
@@ -860,7 +872,7 @@ static int compat_ip_mcast_join_leave(struct sock *sk, int optname,
if (optlen < sizeof(struct compat_group_req))
return -EINVAL;
- if (copy_from_user(&greq, optval, sizeof(greq)))
+ if (copy_from_sockptr(&greq, optval, sizeof(greq)))
return -EFAULT;
psin = (struct sockaddr_in *)&greq.gr_group;
@@ -874,8 +886,8 @@ static int compat_ip_mcast_join_leave(struct sock *sk, int optname,
return ip_mc_leave_group(sk, &mreq);
}
-static int do_ip_setsockopt(struct sock *sk, int level,
- int optname, char __user *optval, unsigned int optlen)
+static int do_ip_setsockopt(struct sock *sk, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
@@ -909,12 +921,12 @@ static int do_ip_setsockopt(struct sock *sk, int level,
case IP_RECVFRAGSIZE:
case IP_RECVERR_RFC4884:
if (optlen >= sizeof(int)) {
- if (get_user(val, (int __user *) optval))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
} else if (optlen >= sizeof(char)) {
unsigned char ucval;
- if (get_user(ucval, (unsigned char __user *) optval))
+ if (copy_from_sockptr(&ucval, optval, sizeof(ucval)))
return -EFAULT;
val = (int) ucval;
}
@@ -939,8 +951,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
if (optlen > 40)
goto e_inval;
- err = ip_options_get_from_user(sock_net(sk), &opt,
- optval, optlen);
+ err = ip_options_get(sock_net(sk), &opt, optval, optlen);
if (err)
break;
old = rcu_dereference_protected(inet->inet_opt,
@@ -1138,17 +1149,17 @@ static int do_ip_setsockopt(struct sock *sk, int level,
err = -EFAULT;
if (optlen >= sizeof(struct ip_mreqn)) {
- if (copy_from_user(&mreq, optval, sizeof(mreq)))
+ if (copy_from_sockptr(&mreq, optval, sizeof(mreq)))
break;
} else {
memset(&mreq, 0, sizeof(mreq));
if (optlen >= sizeof(struct ip_mreq)) {
- if (copy_from_user(&mreq, optval,
- sizeof(struct ip_mreq)))
+ if (copy_from_sockptr(&mreq, optval,
+ sizeof(struct ip_mreq)))
break;
} else if (optlen >= sizeof(struct in_addr)) {
- if (copy_from_user(&mreq.imr_address, optval,
- sizeof(struct in_addr)))
+ if (copy_from_sockptr(&mreq.imr_address, optval,
+ sizeof(struct in_addr)))
break;
}
}
@@ -1200,11 +1211,12 @@ static int do_ip_setsockopt(struct sock *sk, int level,
goto e_inval;
err = -EFAULT;
if (optlen >= sizeof(struct ip_mreqn)) {
- if (copy_from_user(&mreq, optval, sizeof(mreq)))
+ if (copy_from_sockptr(&mreq, optval, sizeof(mreq)))
break;
} else {
memset(&mreq, 0, sizeof(mreq));
- if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq)))
+ if (copy_from_sockptr(&mreq, optval,
+ sizeof(struct ip_mreq)))
break;
}
@@ -1224,7 +1236,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
err = -ENOBUFS;
break;
}
- msf = memdup_user(optval, optlen);
+ msf = memdup_sockptr(optval, optlen);
if (IS_ERR(msf)) {
err = PTR_ERR(msf);
break;
@@ -1255,7 +1267,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
if (optlen != sizeof(struct ip_mreq_source))
goto e_inval;
- if (copy_from_user(&mreqs, optval, sizeof(mreqs))) {
+ if (copy_from_sockptr(&mreqs, optval, sizeof(mreqs))) {
err = -EFAULT;
break;
}
@@ -1401,8 +1413,8 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
skb_dst_drop(skb);
}
-int ip_setsockopt(struct sock *sk, int level,
- int optname, char __user *optval, unsigned int optlen)
+int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen)
{
int err;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 678639c01e48..cdf3a40f9ff5 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1341,7 +1341,7 @@ static void mrtsock_destruct(struct sock *sk)
* MOSPF/PIM router set up we can clean this up.
*/
-int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
+int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
unsigned int optlen)
{
struct net *net = sock_net(sk);
@@ -1413,7 +1413,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
ret = -EINVAL;
break;
}
- if (copy_from_user(&vif, optval, sizeof(vif))) {
+ if (copy_from_sockptr(&vif, optval, sizeof(vif))) {
ret = -EFAULT;
break;
}
@@ -1441,7 +1441,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
ret = -EINVAL;
break;
}
- if (copy_from_user(&mfc, optval, sizeof(mfc))) {
+ if (copy_from_sockptr(&val, optval, sizeof(val))) {
ret = -EFAULT;
break;
}
@@ -1459,7 +1459,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
ret = -EINVAL;
break;
}
- if (get_user(val, (int __user *)optval)) {
+ if (copy_from_sockptr(&val, optval, sizeof(val))) {
ret = -EFAULT;
break;
}
@@ -1471,7 +1471,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
ret = -EINVAL;
break;
}
- if (get_user(val, (int __user *)optval)) {
+ if (copy_from_sockptr(&val, optval, sizeof(val))) {
ret = -EFAULT;
break;
}
@@ -1486,7 +1486,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
ret = -EINVAL;
break;
}
- if (get_user(val, (int __user *)optval)) {
+ if (copy_from_sockptr(&val, optval, sizeof(val))) {
ret = -EFAULT;
break;
}
@@ -1508,7 +1508,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
ret = -EINVAL;
break;
}
- if (get_user(uval, (u32 __user *)optval)) {
+ if (copy_from_sockptr(&uval, optval, sizeof(uval))) {
ret = -EFAULT;
break;
}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 2c8a4dad39d7..f5b26ef17820 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-only
+
/*
* Packet matching code for ARP packets.
*
@@ -947,8 +947,7 @@ static int __do_replace(struct net *net, const char *name,
return ret;
}
-static int do_replace(struct net *net, const void __user *user,
- unsigned int len)
+static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
{
int ret;
struct arpt_replace tmp;
@@ -956,7 +955,7 @@ static int do_replace(struct net *net, const void __user *user,
void *loc_cpu_entry;
struct arpt_entry *iter;
- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
/* overflow check */
@@ -972,8 +971,8 @@ static int do_replace(struct net *net, const void __user *user,
return -ENOMEM;
loc_cpu_entry = newinfo->entries;
- if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
- tmp.size) != 0) {
+ sockptr_advance(arg, sizeof(tmp));
+ if (copy_from_sockptr(loc_cpu_entry, arg, tmp.size) != 0) {
ret = -EFAULT;
goto free_newinfo;
}
@@ -996,8 +995,7 @@ static int do_replace(struct net *net, const void __user *user,
return ret;
}
-static int do_add_counters(struct net *net, const void __user *user,
- unsigned int len)
+static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len)
{
unsigned int i;
struct xt_counters_info tmp;
@@ -1008,7 +1006,7 @@ static int do_add_counters(struct net *net, const void __user *user,
struct arpt_entry *iter;
unsigned int addend;
- paddc = xt_copy_counters_from_user(user, len, &tmp);
+ paddc = xt_copy_counters(arg, len, &tmp);
if (IS_ERR(paddc))
return PTR_ERR(paddc);
@@ -1245,8 +1243,7 @@ out_unlock:
return ret;
}
-static int compat_do_replace(struct net *net, void __user *user,
- unsigned int len)
+static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
{
int ret;
struct compat_arpt_replace tmp;
@@ -1254,7 +1251,7 @@ static int compat_do_replace(struct net *net, void __user *user,
void *loc_cpu_entry;
struct arpt_entry *iter;
- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
/* overflow check */
@@ -1270,7 +1267,8 @@ static int compat_do_replace(struct net *net, void __user *user,
return -ENOMEM;
loc_cpu_entry = newinfo->entries;
- if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) {
+ sockptr_advance(arg, sizeof(tmp));
+ if (copy_from_sockptr(loc_cpu_entry, arg, tmp.size) != 0) {
ret = -EFAULT;
goto free_newinfo;
}
@@ -1402,7 +1400,8 @@ static int compat_get_entries(struct net *net,
}
#endif
-static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+static int do_arpt_set_ctl(struct sock *sk, int cmd, sockptr_t arg,
+ unsigned int len)
{
int ret;
@@ -1413,14 +1412,14 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
case ARPT_SO_SET_REPLACE:
#ifdef CONFIG_COMPAT
if (in_compat_syscall())
- ret = compat_do_replace(sock_net(sk), user, len);
+ ret = compat_do_replace(sock_net(sk), arg, len);
else
#endif
- ret = do_replace(sock_net(sk), user, len);
+ ret = do_replace(sock_net(sk), arg, len);
break;
case ARPT_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(sock_net(sk), user, len);
+ ret = do_add_counters(sock_net(sk), arg, len);
break;
default:
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 161901dd1cae..f2a9680303d8 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1102,7 +1102,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
}
static int
-do_replace(struct net *net, const void __user *user, unsigned int len)
+do_replace(struct net *net, sockptr_t arg, unsigned int len)
{
int ret;
struct ipt_replace tmp;
@@ -1110,7 +1110,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
void *loc_cpu_entry;
struct ipt_entry *iter;
- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
/* overflow check */
@@ -1126,8 +1126,8 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
return -ENOMEM;
loc_cpu_entry = newinfo->entries;
- if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
- tmp.size) != 0) {
+ sockptr_advance(arg, sizeof(tmp));
+ if (copy_from_sockptr(loc_cpu_entry, arg, tmp.size) != 0) {
ret = -EFAULT;
goto free_newinfo;
}
@@ -1151,8 +1151,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
}
static int
-do_add_counters(struct net *net, const void __user *user,
- unsigned int len)
+do_add_counters(struct net *net, sockptr_t arg, unsigned int len)
{
unsigned int i;
struct xt_counters_info tmp;
@@ -1163,7 +1162,7 @@ do_add_counters(struct net *net, const void __user *user,
struct ipt_entry *iter;
unsigned int addend;
- paddc = xt_copy_counters_from_user(user, len, &tmp);
+ paddc = xt_copy_counters(arg, len, &tmp);
if (IS_ERR(paddc))
return PTR_ERR(paddc);
@@ -1485,7 +1484,7 @@ out_unlock:
}
static int
-compat_do_replace(struct net *net, void __user *user, unsigned int len)
+compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
{
int ret;
struct compat_ipt_replace tmp;
@@ -1493,7 +1492,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
void *loc_cpu_entry;
struct ipt_entry *iter;
- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
/* overflow check */
@@ -1509,8 +1508,8 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
return -ENOMEM;
loc_cpu_entry = newinfo->entries;
- if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
- tmp.size) != 0) {
+ sockptr_advance(arg, sizeof(tmp));
+ if (copy_from_sockptr(loc_cpu_entry, arg, tmp.size) != 0) {
ret = -EFAULT;
goto free_newinfo;
}
@@ -1611,7 +1610,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
#endif
static int
-do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+do_ipt_set_ctl(struct sock *sk, int cmd, sockptr_t arg, unsigned int len)
{
int ret;
@@ -1622,14 +1621,14 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
case IPT_SO_SET_REPLACE:
#ifdef CONFIG_COMPAT
if (in_compat_syscall())
- ret = compat_do_replace(sock_net(sk), user, len);
+ ret = compat_do_replace(sock_net(sk), arg, len);
else
#endif
- ret = do_replace(sock_net(sk), user, len);
+ ret = do_replace(sock_net(sk), arg, len);
break;
case IPT_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(sock_net(sk), user, len);
+ ret = do_add_counters(sock_net(sk), arg, len);
break;
default:
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 2a57d633b31e..6fd4330287c2 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -809,11 +809,11 @@ static int raw_sk_init(struct sock *sk)
return 0;
}
-static int raw_seticmpfilter(struct sock *sk, char __user *optval, int optlen)
+static int raw_seticmpfilter(struct sock *sk, sockptr_t optval, int optlen)
{
if (optlen > sizeof(struct icmp_filter))
optlen = sizeof(struct icmp_filter);
- if (copy_from_user(&raw_sk(sk)->filter, optval, optlen))
+ if (copy_from_sockptr(&raw_sk(sk)->filter, optval, optlen))
return -EFAULT;
return 0;
}
@@ -838,7 +838,7 @@ out: return ret;
}
static int do_raw_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
if (optname == ICMP_FILTER) {
if (inet_sk(sk)->inet_num != IPPROTO_ICMP)
@@ -850,7 +850,7 @@ static int do_raw_setsockopt(struct sock *sk, int level, int optname,
}
static int raw_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
if (level != SOL_RAW)
return ip_setsockopt(sk, level, optname, optval, optlen);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 58ede3d62b2e..27de9380ed14 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2764,7 +2764,7 @@ static inline bool tcp_can_repair_sock(const struct sock *sk)
(sk->sk_state != TCP_LISTEN);
}
-static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len)
+static int tcp_repair_set_window(struct tcp_sock *tp, sockptr_t optbuf, int len)
{
struct tcp_repair_window opt;
@@ -2774,7 +2774,7 @@ static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int l
if (len != sizeof(opt))
return -EINVAL;
- if (copy_from_user(&opt, optbuf, sizeof(opt)))
+ if (copy_from_sockptr(&opt, optbuf, sizeof(opt)))
return -EFAULT;
if (opt.max_window < opt.snd_wnd)
@@ -2796,17 +2796,17 @@ static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int l
return 0;
}
-static int tcp_repair_options_est(struct sock *sk,
- struct tcp_repair_opt __user *optbuf, unsigned int len)
+static int tcp_repair_options_est(struct sock *sk, sockptr_t optbuf,
+ unsigned int len)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_repair_opt opt;
while (len >= sizeof(opt)) {
- if (copy_from_user(&opt, optbuf, sizeof(opt)))
+ if (copy_from_sockptr(&opt, optbuf, sizeof(opt)))
return -EFAULT;
- optbuf++;
+ sockptr_advance(optbuf, sizeof(opt));
len -= sizeof(opt);
switch (opt.opt_code) {
@@ -3020,8 +3020,8 @@ EXPORT_SYMBOL(tcp_sock_set_keepcnt);
/*
* Socket option code for TCP.
*/
-static int do_tcp_setsockopt(struct sock *sk, int level,
- int optname, char __user *optval, unsigned int optlen)
+static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -3037,7 +3037,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
if (optlen < 1)
return -EINVAL;
- val = strncpy_from_user(name, optval,
+ val = strncpy_from_sockptr(name, optval,
min_t(long, TCP_CA_NAME_MAX-1, optlen));
if (val < 0)
return -EFAULT;
@@ -3056,7 +3056,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
if (optlen < 1)
return -EINVAL;
- val = strncpy_from_user(name, optval,
+ val = strncpy_from_sockptr(name, optval,
min_t(long, TCP_ULP_NAME_MAX - 1,
optlen));
if (val < 0)
@@ -3079,7 +3079,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
optlen != TCP_FASTOPEN_KEY_BUF_LENGTH)
return -EINVAL;
- if (copy_from_user(key, optval, optlen))
+ if (copy_from_sockptr(key, optval, optlen))
return -EFAULT;
if (optlen == TCP_FASTOPEN_KEY_BUF_LENGTH)
@@ -3095,7 +3095,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
lock_sock(sk);
@@ -3174,9 +3174,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
if (!tp->repair)
err = -EINVAL;
else if (sk->sk_state == TCP_ESTABLISHED)
- err = tcp_repair_options_est(sk,
- (struct tcp_repair_opt __user *)optval,
- optlen);
+ err = tcp_repair_options_est(sk, optval, optlen);
else
err = -EPERM;
break;
@@ -3325,7 +3323,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
return err;
}
-int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
+int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
unsigned int optlen)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cd81b6e04efb..f8913923a6c0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -76,6 +76,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/inetdevice.h>
+#include <linux/btf_ids.h>
#include <crypto/hash.h>
#include <linux/scatterlist.h>
@@ -1194,7 +1195,7 @@ static void tcp_clear_md5_list(struct sock *sk)
}
static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
- char __user *optval, int optlen)
+ sockptr_t optval, int optlen)
{
struct tcp_md5sig cmd;
struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
@@ -1205,7 +1206,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
if (optlen < sizeof(cmd))
return -EINVAL;
- if (copy_from_user(&cmd, optval, sizeof(cmd)))
+ if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
return -EFAULT;
if (sin->sin_family != AF_INET)
@@ -2946,7 +2947,7 @@ static void bpf_iter_fini_tcp(void *priv_data)
bpf_iter_fini_seq_net(priv_data);
}
-static const struct bpf_iter_reg tcp_reg_info = {
+static struct bpf_iter_reg tcp_reg_info = {
.target = "tcp",
.seq_ops = &bpf_iter_tcp_seq_ops,
.init_seq_private = bpf_iter_init_tcp,
@@ -2961,6 +2962,7 @@ static const struct bpf_iter_reg tcp_reg_info = {
static void __init bpf_iter_register(void)
{
+ tcp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON];
if (bpf_iter_reg_target(&tcp_reg_info))
pr_warn("Warning: could not register bpf iterator tcp\n");
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d4be4471c424..5a6a2f6d86b9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -106,6 +106,7 @@
#include <net/xfrm.h>
#include <trace/events/udp.h>
#include <linux/static_key.h>
+#include <linux/btf_ids.h>
#include <trace/events/skb.h>
#include <net/busy_poll.h>
#include "udp_impl.h"
@@ -408,6 +409,25 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
udp_ehash_secret + net_hash_mix(net));
}
+static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, unsigned short hnum)
+{
+ struct sock *reuse_sk = NULL;
+ u32 hash;
+
+ if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
+ hash = udp_ehashfn(net, daddr, hnum, saddr, sport);
+ reuse_sk = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ /* Fall back to scoring if group has connections */
+ if (reuseport_has_conns(sk, false))
+ return NULL;
+ }
+ return reuse_sk;
+}
+
/* called with rcu_read_lock() */
static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport,
@@ -418,7 +438,6 @@ static struct sock *udp4_lib_lookup2(struct net *net,
{
struct sock *sk, *result;
int score, badness;
- u32 hash = 0;
result = NULL;
badness = 0;
@@ -426,15 +445,11 @@ static struct sock *udp4_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
- if (sk->sk_reuseport &&
- sk->sk_state != TCP_ESTABLISHED) {
- hash = udp_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (result && !reuseport_has_conns(sk, false))
- return result;
- }
+ result = lookup_reuseport(net, sk, skb,
+ saddr, sport, daddr, hnum);
+ if (result)
+ return result;
+
badness = score;
result = sk;
}
@@ -442,6 +457,29 @@ static struct sock *udp4_lib_lookup2(struct net *net,
return result;
}
+static inline struct sock *udp4_lookup_run_bpf(struct net *net,
+ struct udp_table *udptable,
+ struct sk_buff *skb,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, u16 hnum)
+{
+ struct sock *sk, *reuse_sk;
+ bool no_reuseport;
+
+ if (udptable != &udp_table)
+ return NULL; /* only UDP is supported */
+
+ no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP,
+ saddr, sport, daddr, hnum, &sk);
+ if (no_reuseport || IS_ERR_OR_NULL(sk))
+ return sk;
+
+ reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
+ if (reuse_sk)
+ sk = reuse_sk;
+ return sk;
+}
+
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
@@ -449,27 +487,45 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
__be16 sport, __be32 daddr, __be16 dport, int dif,
int sdif, struct udp_table *udptable, struct sk_buff *skb)
{
- struct sock *result;
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2;
struct udp_hslot *hslot2;
+ struct sock *result, *sk;
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
+ /* Lookup connected or non-wildcard socket */
result = udp4_lib_lookup2(net, saddr, sport,
daddr, hnum, dif, sdif,
hslot2, skb);
- if (!result) {
- hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
-
- result = udp4_lib_lookup2(net, saddr, sport,
- htonl(INADDR_ANY), hnum, dif, sdif,
- hslot2, skb);
+ if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED)
+ goto done;
+
+ /* Lookup redirect from BPF */
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
+ sk = udp4_lookup_run_bpf(net, udptable, skb,
+ saddr, sport, daddr, hnum);
+ if (sk) {
+ result = sk;
+ goto done;
+ }
}
+
+ /* Got non-wildcard socket or error on first lookup */
+ if (result)
+ goto done;
+
+ /* Lookup wildcard sockets */
+ hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
+ slot2 = hash2 & udptable->mask;
+ hslot2 = &udptable->hash2[slot2];
+
+ result = udp4_lib_lookup2(net, saddr, sport,
+ htonl(INADDR_ANY), hnum, dif, sdif,
+ hslot2, skb);
+done:
if (IS_ERR(result))
return NULL;
return result;
@@ -2532,7 +2588,7 @@ void udp_destroy_sock(struct sock *sk)
* Socket option code for UDP
*/
int udp_lib_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen,
+ sockptr_t optval, unsigned int optlen,
int (*push_pending_frames)(struct sock *))
{
struct udp_sock *up = udp_sk(sk);
@@ -2543,7 +2599,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
valbool = val ? 1 : 0;
@@ -2647,11 +2703,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
}
EXPORT_SYMBOL(udp_lib_setsockopt);
-int udp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen)
{
if (level == SOL_UDP || level == SOL_UDPLITE)
- return udp_lib_setsockopt(sk, level, optname, optval, optlen,
+ return udp_lib_setsockopt(sk, level, optname,
+ optval, optlen,
udp_push_pending_frames);
return ip_setsockopt(sk, level, optname, optval, optlen);
}
@@ -3153,7 +3210,7 @@ static void bpf_iter_fini_udp(void *priv_data)
bpf_iter_fini_seq_net(priv_data);
}
-static const struct bpf_iter_reg udp_reg_info = {
+static struct bpf_iter_reg udp_reg_info = {
.target = "udp",
.seq_ops = &bpf_iter_udp_seq_ops,
.init_seq_private = bpf_iter_init_udp,
@@ -3168,6 +3225,7 @@ static const struct bpf_iter_reg udp_reg_info = {
static void __init bpf_iter_register(void)
{
+ udp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UDP];
if (bpf_iter_reg_target(&udp_reg_info))
pr_warn("Warning: could not register bpf iterator udp\n");
}
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index ab313702c87f..2878d8285caf 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -12,8 +12,8 @@ int __udp4_lib_err(struct sk_buff *, u32, struct udp_table *);
int udp_v4_get_port(struct sock *sk, unsigned short snum);
void udp_v4_rehash(struct sock *sk);
-int udp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
+int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen);
int udp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 390bedde21a5..cc8ad7ddecda 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -19,6 +19,7 @@
#include <linux/route.h>
#include <linux/slab.h>
#include <linux/export.h>
+#include <linux/icmp.h>
#include <net/ipv6.h>
#include <net/ndisc.h>
@@ -284,6 +285,17 @@ int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr,
}
EXPORT_SYMBOL_GPL(ip6_datagram_connect_v6_only);
+static void ipv6_icmp_error_rfc4884(const struct sk_buff *skb,
+ struct sock_ee_data_rfc4884 *out)
+{
+ switch (icmp6_hdr(skb)->icmp6_type) {
+ case ICMPV6_TIME_EXCEED:
+ case ICMPV6_DEST_UNREACH:
+ ip_icmp_error_rfc4884(skb, out, sizeof(struct icmp6hdr),
+ icmp6_hdr(skb)->icmp6_datagram_len * 8);
+ }
+}
+
void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
__be16 port, u32 info, u8 *payload)
{
@@ -313,6 +325,10 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
serr->port = port;
__skb_pull(skb, payload - skb->data);
+
+ if (inet6_sk(sk)->recverr_rfc4884)
+ ipv6_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884);
+
skb_reset_transport_header(skb);
if (sock_queue_err_skb(sk, skb))
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index fbe9d4295eac..2d3add9e6116 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -21,6 +21,8 @@
#include <net/ip.h>
#include <net/sock_reuseport.h>
+extern struct inet_hashinfo tcp_hashinfo;
+
u32 inet6_ehashfn(const struct net *net,
const struct in6_addr *laddr, const u16 lport,
const struct in6_addr *faddr, const __be16 fport)
@@ -111,6 +113,23 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
+static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned short hnum)
+{
+ struct sock *reuse_sk = NULL;
+ u32 phash;
+
+ if (sk->sk_reuseport) {
+ phash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
+ reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
+ }
+ return reuse_sk;
+}
+
/* called with rcu_read_lock() */
static struct sock *inet6_lhash2_lookup(struct net *net,
struct inet_listen_hashbucket *ilb2,
@@ -123,21 +142,17 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
int score, hiscore = 0;
- u32 phash = 0;
inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
sk = (struct sock *)icsk;
score = compute_score(sk, net, hnum, daddr, dif, sdif,
exact_dif);
if (score > hiscore) {
- if (sk->sk_reuseport) {
- phash = inet6_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, phash,
- skb, doff);
- if (result)
- return result;
- }
+ result = lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, hnum);
+ if (result)
+ return result;
+
result = sk;
hiscore = score;
}
@@ -146,6 +161,31 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
return result;
}
+static inline struct sock *inet6_lookup_run_bpf(struct net *net,
+ struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ const __be16 sport,
+ const struct in6_addr *daddr,
+ const u16 hnum)
+{
+ struct sock *sk, *reuse_sk;
+ bool no_reuseport;
+
+ if (hashinfo != &tcp_hashinfo)
+ return NULL; /* only TCP is supported */
+
+ no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP,
+ saddr, sport, daddr, hnum, &sk);
+ if (no_reuseport || IS_ERR_OR_NULL(sk))
+ return sk;
+
+ reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
+ if (reuse_sk)
+ sk = reuse_sk;
+ return sk;
+}
+
struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@@ -157,6 +197,14 @@ struct sock *inet6_lookup_listener(struct net *net,
struct sock *result = NULL;
unsigned int hash2;
+ /* Lookup redirect from BPF */
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
+ result = inet6_lookup_run_bpf(net, hashinfo, skb, doff,
+ saddr, sport, daddr, hnum);
+ if (result)
+ goto done;
+ }
+
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index ce4fbba4acce..215b6f5e733e 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -371,7 +371,7 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo
static struct ip6_flowlabel *
fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
- char __user *optval, int optlen, int *err_p)
+ sockptr_t optval, int optlen, int *err_p)
{
struct ip6_flowlabel *fl = NULL;
int olen;
@@ -401,7 +401,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
memset(fl->opt, 0, sizeof(*fl->opt));
fl->opt->tot_len = sizeof(*fl->opt) + olen;
err = -EFAULT;
- if (copy_from_user(fl->opt+1, optval+CMSG_ALIGN(sizeof(*freq)), olen))
+ sockptr_advance(optval, CMSG_ALIGN(sizeof(*freq)));
+ if (copy_from_sockptr(fl->opt + 1, optval, olen))
goto done;
msg.msg_controllen = olen;
@@ -533,187 +534,211 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
return -ENOENT;
}
-int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
+#define socklist_dereference(__sflp) \
+ rcu_dereference_protected(__sflp, lockdep_is_held(&ip6_sk_fl_lock))
+
+static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq)
{
- int uninitialized_var(err);
- struct net *net = sock_net(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
- struct in6_flowlabel_req freq;
- struct ipv6_fl_socklist *sfl1 = NULL;
- struct ipv6_fl_socklist *sfl;
struct ipv6_fl_socklist __rcu **sflp;
- struct ip6_flowlabel *fl, *fl1 = NULL;
+ struct ipv6_fl_socklist *sfl;
+ if (freq->flr_flags & IPV6_FL_F_REFLECT) {
+ if (sk->sk_protocol != IPPROTO_TCP)
+ return -ENOPROTOOPT;
+ if (!np->repflow)
+ return -ESRCH;
+ np->flow_label = 0;
+ np->repflow = 0;
+ return 0;
+ }
- if (optlen < sizeof(freq))
- return -EINVAL;
+ spin_lock_bh(&ip6_sk_fl_lock);
+ for (sflp = &np->ipv6_fl_list;
+ (sfl = socklist_dereference(*sflp)) != NULL;
+ sflp = &sfl->next) {
+ if (sfl->fl->label == freq->flr_label)
+ goto found;
+ }
+ spin_unlock_bh(&ip6_sk_fl_lock);
+ return -ESRCH;
+found:
+ if (freq->flr_label == (np->flow_label & IPV6_FLOWLABEL_MASK))
+ np->flow_label &= ~IPV6_FLOWLABEL_MASK;
+ *sflp = sfl->next;
+ spin_unlock_bh(&ip6_sk_fl_lock);
+ fl_release(sfl->fl);
+ kfree_rcu(sfl, rcu);
+ return 0;
+}
- if (copy_from_user(&freq, optval, sizeof(freq)))
- return -EFAULT;
+static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net *net = sock_net(sk);
+ struct ipv6_fl_socklist *sfl;
+ int err;
- switch (freq.flr_action) {
- case IPV6_FL_A_PUT:
- if (freq.flr_flags & IPV6_FL_F_REFLECT) {
- if (sk->sk_protocol != IPPROTO_TCP)
- return -ENOPROTOOPT;
- if (!np->repflow)
- return -ESRCH;
- np->flow_label = 0;
- np->repflow = 0;
- return 0;
- }
- spin_lock_bh(&ip6_sk_fl_lock);
- for (sflp = &np->ipv6_fl_list;
- (sfl = rcu_dereference_protected(*sflp,
- lockdep_is_held(&ip6_sk_fl_lock))) != NULL;
- sflp = &sfl->next) {
- if (sfl->fl->label == freq.flr_label) {
- if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
- np->flow_label &= ~IPV6_FLOWLABEL_MASK;
- *sflp = sfl->next;
- spin_unlock_bh(&ip6_sk_fl_lock);
- fl_release(sfl->fl);
- kfree_rcu(sfl, rcu);
- return 0;
- }
+ rcu_read_lock_bh();
+ for_each_sk_fl_rcu(np, sfl) {
+ if (sfl->fl->label == freq->flr_label) {
+ err = fl6_renew(sfl->fl, freq->flr_linger,
+ freq->flr_expires);
+ rcu_read_unlock_bh();
+ return err;
}
- spin_unlock_bh(&ip6_sk_fl_lock);
- return -ESRCH;
+ }
+ rcu_read_unlock_bh();
- case IPV6_FL_A_RENEW:
- rcu_read_lock_bh();
- for_each_sk_fl_rcu(np, sfl) {
- if (sfl->fl->label == freq.flr_label) {
- err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
- rcu_read_unlock_bh();
- return err;
- }
- }
- rcu_read_unlock_bh();
+ if (freq->flr_share == IPV6_FL_S_NONE &&
+ ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+ struct ip6_flowlabel *fl = fl_lookup(net, freq->flr_label);
- if (freq.flr_share == IPV6_FL_S_NONE &&
- ns_capable(net->user_ns, CAP_NET_ADMIN)) {
- fl = fl_lookup(net, freq.flr_label);
- if (fl) {
- err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
- fl_release(fl);
- return err;
- }
+ if (fl) {
+ err = fl6_renew(fl, freq->flr_linger,
+ freq->flr_expires);
+ fl_release(fl);
+ return err;
}
- return -ESRCH;
-
- case IPV6_FL_A_GET:
- if (freq.flr_flags & IPV6_FL_F_REFLECT) {
- struct net *net = sock_net(sk);
- if (net->ipv6.sysctl.flowlabel_consistency) {
- net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n");
- return -EPERM;
- }
+ }
+ return -ESRCH;
+}
- if (sk->sk_protocol != IPPROTO_TCP)
- return -ENOPROTOOPT;
+static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq,
+ sockptr_t optval, int optlen)
+{
+ struct ipv6_fl_socklist *sfl, *sfl1 = NULL;
+ struct ip6_flowlabel *fl, *fl1 = NULL;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net *net = sock_net(sk);
+ int uninitialized_var(err);
- np->repflow = 1;
- return 0;
+ if (freq->flr_flags & IPV6_FL_F_REFLECT) {
+ if (net->ipv6.sysctl.flowlabel_consistency) {
+ net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n");
+ return -EPERM;
}
- if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
- return -EINVAL;
+ if (sk->sk_protocol != IPPROTO_TCP)
+ return -ENOPROTOOPT;
+ np->repflow = 1;
+ return 0;
+ }
- if (net->ipv6.sysctl.flowlabel_state_ranges &&
- (freq.flr_label & IPV6_FLOWLABEL_STATELESS_FLAG))
- return -ERANGE;
+ if (freq->flr_label & ~IPV6_FLOWLABEL_MASK)
+ return -EINVAL;
+ if (net->ipv6.sysctl.flowlabel_state_ranges &&
+ (freq->flr_label & IPV6_FLOWLABEL_STATELESS_FLAG))
+ return -ERANGE;
- fl = fl_create(net, sk, &freq, optval, optlen, &err);
- if (!fl)
- return err;
- sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
+ fl = fl_create(net, sk, freq, optval, optlen, &err);
+ if (!fl)
+ return err;
- if (freq.flr_label) {
- err = -EEXIST;
- rcu_read_lock_bh();
- for_each_sk_fl_rcu(np, sfl) {
- if (sfl->fl->label == freq.flr_label) {
- if (freq.flr_flags&IPV6_FL_F_EXCL) {
- rcu_read_unlock_bh();
- goto done;
- }
- fl1 = sfl->fl;
- if (!atomic_inc_not_zero(&fl1->users))
- fl1 = NULL;
- break;
+ sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
+
+ if (freq->flr_label) {
+ err = -EEXIST;
+ rcu_read_lock_bh();
+ for_each_sk_fl_rcu(np, sfl) {
+ if (sfl->fl->label == freq->flr_label) {
+ if (freq->flr_flags & IPV6_FL_F_EXCL) {
+ rcu_read_unlock_bh();
+ goto done;
}
+ fl1 = sfl->fl;
+ if (!atomic_inc_not_zero(&fl1->users))
+ fl1 = NULL;
+ break;
}
- rcu_read_unlock_bh();
+ }
+ rcu_read_unlock_bh();
- if (!fl1)
- fl1 = fl_lookup(net, freq.flr_label);
- if (fl1) {
+ if (!fl1)
+ fl1 = fl_lookup(net, freq->flr_label);
+ if (fl1) {
recheck:
- err = -EEXIST;
- if (freq.flr_flags&IPV6_FL_F_EXCL)
- goto release;
- err = -EPERM;
- if (fl1->share == IPV6_FL_S_EXCL ||
- fl1->share != fl->share ||
- ((fl1->share == IPV6_FL_S_PROCESS) &&
- (fl1->owner.pid != fl->owner.pid)) ||
- ((fl1->share == IPV6_FL_S_USER) &&
- !uid_eq(fl1->owner.uid, fl->owner.uid)))
- goto release;
-
- err = -ENOMEM;
- if (!sfl1)
- goto release;
- if (fl->linger > fl1->linger)
- fl1->linger = fl->linger;
- if ((long)(fl->expires - fl1->expires) > 0)
- fl1->expires = fl->expires;
- fl_link(np, sfl1, fl1);
- fl_free(fl);
- return 0;
+ err = -EEXIST;
+ if (freq->flr_flags&IPV6_FL_F_EXCL)
+ goto release;
+ err = -EPERM;
+ if (fl1->share == IPV6_FL_S_EXCL ||
+ fl1->share != fl->share ||
+ ((fl1->share == IPV6_FL_S_PROCESS) &&
+ (fl1->owner.pid != fl->owner.pid)) ||
+ ((fl1->share == IPV6_FL_S_USER) &&
+ !uid_eq(fl1->owner.uid, fl->owner.uid)))
+ goto release;
+
+ err = -ENOMEM;
+ if (!sfl1)
+ goto release;
+ if (fl->linger > fl1->linger)
+ fl1->linger = fl->linger;
+ if ((long)(fl->expires - fl1->expires) > 0)
+ fl1->expires = fl->expires;
+ fl_link(np, sfl1, fl1);
+ fl_free(fl);
+ return 0;
release:
- fl_release(fl1);
- goto done;
- }
- }
- err = -ENOENT;
- if (!(freq.flr_flags&IPV6_FL_F_CREATE))
+ fl_release(fl1);
goto done;
+ }
+ }
+ err = -ENOENT;
+ if (!(freq->flr_flags & IPV6_FL_F_CREATE))
+ goto done;
- err = -ENOMEM;
- if (!sfl1)
- goto done;
+ err = -ENOMEM;
+ if (!sfl1)
+ goto done;
- err = mem_check(sk);
- if (err != 0)
- goto done;
+ err = mem_check(sk);
+ if (err != 0)
+ goto done;
- fl1 = fl_intern(net, fl, freq.flr_label);
- if (fl1)
- goto recheck;
+ fl1 = fl_intern(net, fl, freq->flr_label);
+ if (fl1)
+ goto recheck;
- if (!freq.flr_label) {
- if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
- &fl->label, sizeof(fl->label))) {
- /* Intentionally ignore fault. */
- }
+ if (!freq->flr_label) {
+ sockptr_advance(optval,
+ offsetof(struct in6_flowlabel_req, flr_label));
+ if (copy_to_sockptr(optval, &fl->label, sizeof(fl->label))) {
+ /* Intentionally ignore fault. */
}
-
- fl_link(np, sfl1, fl);
- return 0;
-
- default:
- return -EINVAL;
}
+ fl_link(np, sfl1, fl);
+ return 0;
done:
fl_free(fl);
kfree(sfl1);
return err;
}
+int ipv6_flowlabel_opt(struct sock *sk, sockptr_t optval, int optlen)
+{
+ struct in6_flowlabel_req freq;
+
+ if (optlen < sizeof(freq))
+ return -EINVAL;
+ if (copy_from_sockptr(&freq, optval, sizeof(freq)))
+ return -EFAULT;
+
+ switch (freq.flr_action) {
+ case IPV6_FL_A_PUT:
+ return ipv6_flowlabel_put(sk, &freq);
+ case IPV6_FL_A_RENEW:
+ return ipv6_flowlabel_renew(sk, &freq);
+ case IPV6_FL_A_GET:
+ return ipv6_flowlabel_get(sk, &freq, optval, optlen);
+ default:
+ return -EINVAL;
+ }
+}
+
#ifdef CONFIG_PROC_FS
struct ip6fl_iter_state {
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 1f4d20e97c07..06b0d2c329b9 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1629,7 +1629,8 @@ EXPORT_SYMBOL(mroute6_is_socket);
* MOSPF/PIM router set up we can clean this up.
*/
-int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
+int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
+ unsigned int optlen)
{
int ret, parent = 0;
struct mif6ctl vif;
@@ -1665,7 +1666,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
case MRT6_ADD_MIF:
if (optlen < sizeof(vif))
return -EINVAL;
- if (copy_from_user(&vif, optval, sizeof(vif)))
+ if (copy_from_sockptr(&vif, optval, sizeof(vif)))
return -EFAULT;
if (vif.mif6c_mifi >= MAXMIFS)
return -ENFILE;
@@ -1678,7 +1679,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
case MRT6_DEL_MIF:
if (optlen < sizeof(mifi_t))
return -EINVAL;
- if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
+ if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t)))
return -EFAULT;
rtnl_lock();
ret = mif6_delete(mrt, mifi, 0, NULL);
@@ -1697,7 +1698,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
case MRT6_DEL_MFC_PROXY:
if (optlen < sizeof(mfc))
return -EINVAL;
- if (copy_from_user(&mfc, optval, sizeof(mfc)))
+ if (copy_from_sockptr(&mfc, optval, sizeof(mfc)))
return -EFAULT;
if (parent == 0)
parent = mfc.mf6cc_parent;
@@ -1718,7 +1719,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
if (optlen != sizeof(flags))
return -EINVAL;
- if (get_user(flags, (int __user *)optval))
+ if (copy_from_sockptr(&flags, optval, sizeof(flags)))
return -EFAULT;
rtnl_lock();
mroute_clean_tables(mrt, flags);
@@ -1735,7 +1736,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
if (optlen != sizeof(v))
return -EINVAL;
- if (get_user(v, (int __user *)optval))
+ if (copy_from_sockptr(&v, optval, sizeof(v)))
return -EFAULT;
mrt->mroute_do_assert = v;
return 0;
@@ -1748,7 +1749,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
if (optlen != sizeof(v))
return -EINVAL;
- if (get_user(v, (int __user *)optval))
+ if (copy_from_sockptr(&v, optval, sizeof(v)))
return -EFAULT;
v = !!v;
rtnl_lock();
@@ -1769,7 +1770,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
if (optlen != sizeof(u32))
return -EINVAL;
- if (get_user(v, (u32 __user *)optval))
+ if (copy_from_sockptr(&v, optval, sizeof(v)))
return -EFAULT;
/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
if (v != RT_TABLE_DEFAULT && v >= 100000000)
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index add8f7912299..20c740976334 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -136,15 +136,15 @@ static bool setsockopt_needs_rtnl(int optname)
return false;
}
-static int copy_group_source_from_user(struct group_source_req *greqs,
- void __user *optval, int optlen)
+static int copy_group_source_from_sockptr(struct group_source_req *greqs,
+ sockptr_t optval, int optlen)
{
if (in_compat_syscall()) {
struct compat_group_source_req gr32;
if (optlen < sizeof(gr32))
return -EINVAL;
- if (copy_from_user(&gr32, optval, sizeof(gr32)))
+ if (copy_from_sockptr(&gr32, optval, sizeof(gr32)))
return -EFAULT;
greqs->gsr_interface = gr32.gsr_interface;
greqs->gsr_group = gr32.gsr_group;
@@ -152,7 +152,7 @@ static int copy_group_source_from_user(struct group_source_req *greqs,
} else {
if (optlen < sizeof(*greqs))
return -EINVAL;
- if (copy_from_user(greqs, optval, sizeof(*greqs)))
+ if (copy_from_sockptr(greqs, optval, sizeof(*greqs)))
return -EFAULT;
}
@@ -160,13 +160,13 @@ static int copy_group_source_from_user(struct group_source_req *greqs,
}
static int do_ipv6_mcast_group_source(struct sock *sk, int optname,
- void __user *optval, int optlen)
+ sockptr_t optval, int optlen)
{
struct group_source_req greqs;
int omode, add;
int ret;
- ret = copy_group_source_from_user(&greqs, optval, optlen);
+ ret = copy_group_source_from_sockptr(&greqs, optval, optlen);
if (ret)
return ret;
@@ -200,7 +200,7 @@ static int do_ipv6_mcast_group_source(struct sock *sk, int optname,
return ip6_mc_source(add, omode, sk, &greqs);
}
-static int ipv6_set_mcast_msfilter(struct sock *sk, void __user *optval,
+static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
int optlen)
{
struct group_filter *gsf;
@@ -211,7 +211,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, void __user *optval,
if (optlen > sysctl_optmem_max)
return -ENOBUFS;
- gsf = memdup_user(optval, optlen);
+ gsf = memdup_sockptr(optval, optlen);
if (IS_ERR(gsf))
return PTR_ERR(gsf);
@@ -231,7 +231,7 @@ out_free_gsf:
return ret;
}
-static int compat_ipv6_set_mcast_msfilter(struct sock *sk, void __user *optval,
+static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
int optlen)
{
const int size0 = offsetof(struct compat_group_filter, gf_slist);
@@ -251,7 +251,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, void __user *optval,
gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
ret = -EFAULT;
- if (copy_from_user(gf32, optval, optlen))
+ if (copy_from_sockptr(gf32, optval, optlen))
goto out_free_p;
/* numsrc >= (4G-140)/128 overflow in 32 bits */
@@ -276,14 +276,14 @@ out_free_p:
}
static int ipv6_mcast_join_leave(struct sock *sk, int optname,
- void __user *optval, int optlen)
+ sockptr_t optval, int optlen)
{
struct sockaddr_in6 *psin6;
struct group_req greq;
if (optlen < sizeof(greq))
return -EINVAL;
- if (copy_from_user(&greq, optval, sizeof(greq)))
+ if (copy_from_sockptr(&greq, optval, sizeof(greq)))
return -EFAULT;
if (greq.gr_group.ss_family != AF_INET6)
@@ -296,14 +296,14 @@ static int ipv6_mcast_join_leave(struct sock *sk, int optname,
}
static int compat_ipv6_mcast_join_leave(struct sock *sk, int optname,
- void __user *optval, int optlen)
+ sockptr_t optval, int optlen)
{
struct compat_group_req gr32;
struct sockaddr_in6 *psin6;
if (optlen < sizeof(gr32))
return -EINVAL;
- if (copy_from_user(&gr32, optval, sizeof(gr32)))
+ if (copy_from_sockptr(&gr32, optval, sizeof(gr32)))
return -EFAULT;
if (gr32.gr_group.ss_family != AF_INET6)
@@ -315,8 +315,82 @@ static int compat_ipv6_mcast_join_leave(struct sock *sk, int optname,
return ipv6_sock_mc_drop(sk, gr32.gr_interface, &psin6->sin6_addr);
}
+static int ipv6_set_opt_hdr(struct sock *sk, int optname, sockptr_t optval,
+ int optlen)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_opt_hdr *new = NULL;
+ struct net *net = sock_net(sk);
+ struct ipv6_txoptions *opt;
+ int err;
+
+ /* hop-by-hop / destination options are privileged option */
+ if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
+ return -EPERM;
+
+ /* remove any sticky options header with a zero option
+ * length, per RFC3542.
+ */
+ if (optlen > 0) {
+ if (sockptr_is_null(optval))
+ return -EINVAL;
+ if (optlen < sizeof(struct ipv6_opt_hdr) ||
+ optlen & 0x7 ||
+ optlen > 8 * 255)
+ return -EINVAL;
+
+ new = memdup_sockptr(optval, optlen);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
+ if (unlikely(ipv6_optlen(new) > optlen)) {
+ kfree(new);
+ return -EINVAL;
+ }
+ }
+
+ opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
+ opt = ipv6_renew_options(sk, opt, optname, new);
+ kfree(new);
+ if (IS_ERR(opt))
+ return PTR_ERR(opt);
+
+ /* routing header option needs extra check */
+ err = -EINVAL;
+ if (optname == IPV6_RTHDR && opt && opt->srcrt) {
+ struct ipv6_rt_hdr *rthdr = opt->srcrt;
+ switch (rthdr->type) {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+ case IPV6_SRCRT_TYPE_2:
+ if (rthdr->hdrlen != 2 || rthdr->segments_left != 1)
+ goto sticky_done;
+ break;
+#endif
+ case IPV6_SRCRT_TYPE_4:
+ {
+ struct ipv6_sr_hdr *srh =
+ (struct ipv6_sr_hdr *)opt->srcrt;
+
+ if (!seg6_validate_srh(srh, optlen, false))
+ goto sticky_done;
+ break;
+ }
+ default:
+ goto sticky_done;
+ }
+ }
+
+ err = 0;
+ opt = ipv6_update_options(sk, opt);
+sticky_done:
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
+ return err;
+}
+
static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct net *net = sock_net(sk);
@@ -324,11 +398,11 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
int retv = -ENOPROTOOPT;
bool needs_rtnl = setsockopt_needs_rtnl(optname);
- if (!optval)
+ if (sockptr_is_null(optval))
val = 0;
else {
if (optlen >= sizeof(int)) {
- if (get_user(val, (int __user *) optval))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
} else
val = 0;
@@ -579,82 +653,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
case IPV6_RTHDRDSTOPTS:
case IPV6_RTHDR:
case IPV6_DSTOPTS:
- {
- struct ipv6_txoptions *opt;
- struct ipv6_opt_hdr *new = NULL;
-
- /* hop-by-hop / destination options are privileged option */
- retv = -EPERM;
- if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
- break;
-
- /* remove any sticky options header with a zero option
- * length, per RFC3542.
- */
- if (optlen == 0)
- optval = NULL;
- else if (!optval)
- goto e_inval;
- else if (optlen < sizeof(struct ipv6_opt_hdr) ||
- optlen & 0x7 || optlen > 8 * 255)
- goto e_inval;
- else {
- new = memdup_user(optval, optlen);
- if (IS_ERR(new)) {
- retv = PTR_ERR(new);
- break;
- }
- if (unlikely(ipv6_optlen(new) > optlen)) {
- kfree(new);
- goto e_inval;
- }
- }
-
- opt = rcu_dereference_protected(np->opt,
- lockdep_sock_is_held(sk));
- opt = ipv6_renew_options(sk, opt, optname, new);
- kfree(new);
- if (IS_ERR(opt)) {
- retv = PTR_ERR(opt);
- break;
- }
-
- /* routing header option needs extra check */
- retv = -EINVAL;
- if (optname == IPV6_RTHDR && opt && opt->srcrt) {
- struct ipv6_rt_hdr *rthdr = opt->srcrt;
- switch (rthdr->type) {
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
- case IPV6_SRCRT_TYPE_2:
- if (rthdr->hdrlen != 2 ||
- rthdr->segments_left != 1)
- goto sticky_done;
-
- break;
-#endif
- case IPV6_SRCRT_TYPE_4:
- {
- struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)
- opt->srcrt;
-
- if (!seg6_validate_srh(srh, optlen, false))
- goto sticky_done;
- break;
- }
- default:
- goto sticky_done;
- }
- }
-
- retv = 0;
- opt = ipv6_update_options(sk, opt);
-sticky_done:
- if (opt) {
- atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
- txopt_put(opt);
- }
+ retv = ipv6_set_opt_hdr(sk, optname, optval, optlen);
break;
- }
case IPV6_PKTINFO:
{
@@ -662,12 +662,13 @@ sticky_done:
if (optlen == 0)
goto e_inval;
- else if (optlen < sizeof(struct in6_pktinfo) || !optval)
+ else if (optlen < sizeof(struct in6_pktinfo) ||
+ sockptr_is_null(optval))
goto e_inval;
- if (copy_from_user(&pkt, optval, sizeof(struct in6_pktinfo))) {
- retv = -EFAULT;
- break;
+ if (copy_from_sockptr(&pkt, optval, sizeof(pkt))) {
+ retv = -EFAULT;
+ break;
}
if (!sk_dev_equal_l3scope(sk, pkt.ipi6_ifindex))
goto e_inval;
@@ -708,7 +709,7 @@ sticky_done:
refcount_set(&opt->refcnt, 1);
opt->tot_len = sizeof(*opt) + optlen;
retv = -EFAULT;
- if (copy_from_user(opt+1, optval, optlen))
+ if (copy_from_sockptr(opt + 1, optval, optlen))
goto done;
msg.msg_controllen = optlen;
@@ -830,7 +831,7 @@ done:
break;
retv = -EFAULT;
- if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq)))
+ if (copy_from_sockptr(&mreq, optval, sizeof(struct ipv6_mreq)))
break;
if (optname == IPV6_ADD_MEMBERSHIP)
@@ -848,7 +849,7 @@ done:
goto e_inval;
retv = -EFAULT;
- if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq)))
+ if (copy_from_sockptr(&mreq, optval, sizeof(struct ipv6_mreq)))
break;
if (optname == IPV6_JOIN_ANYCAST)
@@ -964,6 +965,14 @@ done:
np->rxopt.bits.recvfragsize = valbool;
retv = 0;
break;
+ case IPV6_RECVERR_RFC4884:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ if (val < 0 || val > 1)
+ goto e_inval;
+ np->recverr_rfc4884 = valbool;
+ retv = 0;
+ break;
}
release_sock(sk);
@@ -979,8 +988,8 @@ e_inval:
return -EINVAL;
}
-int ipv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen)
{
int err;
@@ -1438,6 +1447,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = np->rtalert_isolate;
break;
+ case IPV6_RECVERR_RFC4884:
+ val = np->recverr_rfc4884;
+ break;
+
default:
return -ENOPROTOOPT;
}
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index fd1f8f931231..1d52957a413f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1119,7 +1119,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
}
static int
-do_replace(struct net *net, const void __user *user, unsigned int len)
+do_replace(struct net *net, sockptr_t arg, unsigned int len)
{
int ret;
struct ip6t_replace tmp;
@@ -1127,7 +1127,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
void *loc_cpu_entry;
struct ip6t_entry *iter;
- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
/* overflow check */
@@ -1143,8 +1143,8 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
return -ENOMEM;
loc_cpu_entry = newinfo->entries;
- if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
- tmp.size) != 0) {
+ sockptr_advance(arg, sizeof(tmp));
+ if (copy_from_sockptr(loc_cpu_entry, arg, tmp.size) != 0) {
ret = -EFAULT;
goto free_newinfo;
}
@@ -1168,7 +1168,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
}
static int
-do_add_counters(struct net *net, const void __user *user, unsigned int len)
+do_add_counters(struct net *net, sockptr_t arg, unsigned int len)
{
unsigned int i;
struct xt_counters_info tmp;
@@ -1179,7 +1179,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len)
struct ip6t_entry *iter;
unsigned int addend;
- paddc = xt_copy_counters_from_user(user, len, &tmp);
+ paddc = xt_copy_counters(arg, len, &tmp);
if (IS_ERR(paddc))
return PTR_ERR(paddc);
t = xt_find_table_lock(net, AF_INET6, tmp.name);
@@ -1493,7 +1493,7 @@ out_unlock:
}
static int
-compat_do_replace(struct net *net, void __user *user, unsigned int len)
+compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
{
int ret;
struct compat_ip6t_replace tmp;
@@ -1501,7 +1501,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
void *loc_cpu_entry;
struct ip6t_entry *iter;
- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
/* overflow check */
@@ -1517,8 +1517,8 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
return -ENOMEM;
loc_cpu_entry = newinfo->entries;
- if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
- tmp.size) != 0) {
+ sockptr_advance(arg, sizeof(tmp));
+ if (copy_from_sockptr(loc_cpu_entry, arg, tmp.size) != 0) {
ret = -EFAULT;
goto free_newinfo;
}
@@ -1619,7 +1619,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
#endif
static int
-do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+do_ip6t_set_ctl(struct sock *sk, int cmd, sockptr_t arg, unsigned int len)
{
int ret;
@@ -1630,14 +1630,14 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
case IP6T_SO_SET_REPLACE:
#ifdef CONFIG_COMPAT
if (in_compat_syscall())
- ret = compat_do_replace(sock_net(sk), user, len);
+ ret = compat_do_replace(sock_net(sk), arg, len);
else
#endif
- ret = do_replace(sock_net(sk), user, len);
+ ret = do_replace(sock_net(sk), arg, len);
break;
case IP6T_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(sock_net(sk), user, len);
+ ret = do_add_counters(sock_net(sk), arg, len);
break;
default:
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 594e01ad670a..874f01cd7aec 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -972,13 +972,13 @@ do_confirm:
}
static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
+ sockptr_t optval, int optlen)
{
switch (optname) {
case ICMPV6_FILTER:
if (optlen > sizeof(struct icmp6_filter))
optlen = sizeof(struct icmp6_filter);
- if (copy_from_user(&raw6_sk(sk)->filter, optval, optlen))
+ if (copy_from_sockptr(&raw6_sk(sk)->filter, optval, optlen))
return -EFAULT;
return 0;
default:
@@ -1015,12 +1015,12 @@ static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct raw6_sock *rp = raw6_sk(sk);
int val;
- if (get_user(val, (int __user *)optval))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
switch (optname) {
@@ -1062,7 +1062,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
}
static int rawv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
switch (level) {
case SOL_RAW:
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 427b81cbc164..33f5efbad0a9 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -61,6 +61,7 @@
#include <net/l3mdev.h>
#include <net/ip.h>
#include <linux/uaccess.h>
+#include <linux/btf_ids.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
@@ -6423,7 +6424,10 @@ void __init ip6_route_init_special_entries(void)
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
-static const struct bpf_iter_reg ipv6_route_reg_info = {
+BTF_ID_LIST(btf_fib6_info_id)
+BTF_ID(struct, fib6_info)
+
+static struct bpf_iter_reg ipv6_route_reg_info = {
.target = "ipv6_route",
.seq_ops = &ipv6_route_seq_ops,
.init_seq_private = bpf_iter_init_seq_net,
@@ -6438,6 +6442,7 @@ static const struct bpf_iter_reg ipv6_route_reg_info = {
static int __init bpf_iter_register(void)
{
+ ipv6_route_reg_info.ctx_arg_info[0].btf_id = *btf_fib6_info_id;
return bpf_iter_reg_target(&ipv6_route_reg_info);
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c34b7834fd84..305870a72352 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -567,7 +567,7 @@ static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
}
static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
- char __user *optval, int optlen)
+ sockptr_t optval, int optlen)
{
struct tcp_md5sig cmd;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
@@ -577,7 +577,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
if (optlen < sizeof(cmd))
return -EINVAL;
- if (copy_from_user(&cmd, optval, sizeof(cmd)))
+ if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
return -EFAULT;
if (sin6->sin6_family != AF_INET6)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5aff0856a05b..15818e18655d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -141,6 +141,27 @@ static int compute_score(struct sock *sk, struct net *net,
return score;
}
+static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb,
+ const struct in6_addr *saddr,
+ __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned int hnum)
+{
+ struct sock *reuse_sk = NULL;
+ u32 hash;
+
+ if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
+ hash = udp6_ehashfn(net, daddr, hnum, saddr, sport);
+ reuse_sk = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ /* Fall back to scoring if group has connections */
+ if (reuseport_has_conns(sk, false))
+ return NULL;
+ }
+ return reuse_sk;
+}
+
/* called with rcu_read_lock() */
static struct sock *udp6_lib_lookup2(struct net *net,
const struct in6_addr *saddr, __be16 sport,
@@ -150,7 +171,6 @@ static struct sock *udp6_lib_lookup2(struct net *net,
{
struct sock *sk, *result;
int score, badness;
- u32 hash = 0;
result = NULL;
badness = -1;
@@ -158,16 +178,11 @@ static struct sock *udp6_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
- if (sk->sk_reuseport &&
- sk->sk_state != TCP_ESTABLISHED) {
- hash = udp6_ehashfn(net, daddr, hnum,
- saddr, sport);
-
- result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (result && !reuseport_has_conns(sk, false))
- return result;
- }
+ result = lookup_reuseport(net, sk, skb,
+ saddr, sport, daddr, hnum);
+ if (result)
+ return result;
+
result = sk;
badness = score;
}
@@ -175,6 +190,31 @@ static struct sock *udp6_lib_lookup2(struct net *net,
return result;
}
+static inline struct sock *udp6_lookup_run_bpf(struct net *net,
+ struct udp_table *udptable,
+ struct sk_buff *skb,
+ const struct in6_addr *saddr,
+ __be16 sport,
+ const struct in6_addr *daddr,
+ u16 hnum)
+{
+ struct sock *sk, *reuse_sk;
+ bool no_reuseport;
+
+ if (udptable != &udp_table)
+ return NULL; /* only UDP is supported */
+
+ no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP,
+ saddr, sport, daddr, hnum, &sk);
+ if (no_reuseport || IS_ERR_OR_NULL(sk))
+ return sk;
+
+ reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
+ if (reuse_sk)
+ sk = reuse_sk;
+ return sk;
+}
+
/* rcu_read_lock() must be held */
struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
@@ -185,25 +225,42 @@ struct sock *__udp6_lib_lookup(struct net *net,
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2;
struct udp_hslot *hslot2;
- struct sock *result;
+ struct sock *result, *sk;
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
+ /* Lookup connected or non-wildcard sockets */
result = udp6_lib_lookup2(net, saddr, sport,
daddr, hnum, dif, sdif,
hslot2, skb);
- if (!result) {
- hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
- slot2 = hash2 & udptable->mask;
+ if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED)
+ goto done;
+
+ /* Lookup redirect from BPF */
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
+ sk = udp6_lookup_run_bpf(net, udptable, skb,
+ saddr, sport, daddr, hnum);
+ if (sk) {
+ result = sk;
+ goto done;
+ }
+ }
- hslot2 = &udptable->hash2[slot2];
+ /* Got non-wildcard socket or error on first lookup */
+ if (result)
+ goto done;
- result = udp6_lib_lookup2(net, saddr, sport,
- &in6addr_any, hnum, dif, sdif,
- hslot2, skb);
- }
+ /* Lookup wildcard sockets */
+ hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
+ slot2 = hash2 & udptable->mask;
+ hslot2 = &udptable->hash2[slot2];
+
+ result = udp6_lib_lookup2(net, saddr, sport,
+ &in6addr_any, hnum, dif, sdif,
+ hslot2, skb);
+done:
if (IS_ERR(result))
return NULL;
return result;
@@ -1561,11 +1618,12 @@ void udpv6_destroy_sock(struct sock *sk)
/*
* Socket option code for UDP
*/
-int udpv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen)
{
if (level == SOL_UDP || level == SOL_UDPLITE)
- return udp_lib_setsockopt(sk, level, optname, optval, optlen,
+ return udp_lib_setsockopt(sk, level, optname,
+ optval, optlen,
udp_v6_push_pending_frames);
return ipv6_setsockopt(sk, level, optname, optval, optlen);
}
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 30dfb6f1b762..b2fcc46c1630 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -17,8 +17,8 @@ void udp_v6_rehash(struct sock *sk);
int udpv6_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
-int udpv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
+int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen);
int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
int flags, int *addr_len);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index ee0add15497d..6ee9851ac7c6 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1494,7 +1494,7 @@ static int iucv_sock_release(struct socket *sock)
/* getsockopt and setsockopt */
static int iucv_sock_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct iucv_sock *iucv = iucv_sk(sk);
@@ -1507,7 +1507,7 @@ static int iucv_sock_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *) optval))
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
return -EFAULT;
rc = 0;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 56fac24a627a..56dad9565bc9 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1265,7 +1265,7 @@ static void kcm_recv_enable(struct kcm_sock *kcm)
}
static int kcm_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct kcm_sock *kcm = kcm_sk(sock->sk);
int val, valbool;
@@ -1277,8 +1277,8 @@ static int kcm_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
- return -EINVAL;
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
+ return -EFAULT;
valbool = val ? 1 : 0;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 6434d17e6e8e..e723828e458b 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/*
- * L2TP core.
+/* L2TP core.
*
* Copyright (c) 2008,2009,2010 Katalix Systems Ltd
*
@@ -94,7 +93,7 @@ struct l2tp_skb_cb {
unsigned long expires;
};
-#define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)])
+#define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *)&(skb)->cb[sizeof(struct inet_skb_parm)])
static struct workqueue_struct *l2tp_wq;
@@ -102,8 +101,10 @@ static struct workqueue_struct *l2tp_wq;
static unsigned int l2tp_net_id;
struct l2tp_net {
struct list_head l2tp_tunnel_list;
+ /* Lock for write access to l2tp_tunnel_list */
spinlock_t l2tp_tunnel_list_lock;
struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2];
+ /* Lock for write access to l2tp_session_hlist */
spinlock_t l2tp_session_hlist_lock;
};
@@ -134,7 +135,6 @@ static inline struct hlist_head *
l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id)
{
return &pn->l2tp_session_hlist[hash_32(session_id, L2TP_HASH_BITS_2)];
-
}
/* Session hash list.
@@ -412,7 +412,7 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *
}
/* call private receive handler */
- if (session->recv_skb != NULL)
+ if (session->recv_skb)
(*session->recv_skb)(session, skb, L2TP_SKB_CB(skb)->length);
else
kfree_skb(skb);
@@ -621,8 +621,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
int length)
{
struct l2tp_tunnel *tunnel = session->tunnel;
+ u32 ns = 0, nr = 0;
int offset;
- u32 ns, nr;
/* Parse and check optional cookie */
if (session->peer_cookie_len > 0) {
@@ -644,13 +644,12 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
* the control of the LNS. If no sequence numbers present but
* we were expecting them, discard frame.
*/
- ns = nr = 0;
L2TP_SKB_CB(skb)->has_seq = 0;
if (tunnel->version == L2TP_HDR_VER_2) {
if (hdrflags & L2TP_HDRFLAG_S) {
- ns = ntohs(*(__be16 *) ptr);
+ ns = ntohs(*(__be16 *)ptr);
ptr += 2;
- nr = ntohs(*(__be16 *) ptr);
+ nr = ntohs(*(__be16 *)ptr);
ptr += 2;
/* Store L2TP info in the skb */
@@ -662,7 +661,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
session->name, ns, nr, session->nr);
}
} else if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) {
- u32 l2h = ntohl(*(__be32 *) ptr);
+ u32 l2h = ntohl(*(__be32 *)ptr);
if (l2h & 0x40000000) {
ns = l2h & 0x00ffffff;
@@ -679,11 +678,11 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
}
if (L2TP_SKB_CB(skb)->has_seq) {
- /* Received a packet with sequence numbers. If we're the LNS,
+ /* Received a packet with sequence numbers. If we're the LAC,
* check if we sre sending sequence numbers and if not,
* configure it so.
*/
- if ((!session->lns_mode) && (!session->send_seq)) {
+ if (!session->lns_mode && !session->send_seq) {
l2tp_info(session, L2TP_MSG_SEQ,
"%s: requested to enable seq numbers by LNS\n",
session->name);
@@ -707,7 +706,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
* If we're the LNS and we're sending sequence numbers, the
* LAC is broken. Discard the frame.
*/
- if ((!session->lns_mode) && (session->send_seq)) {
+ if (!session->lns_mode && session->send_seq) {
l2tp_info(session, L2TP_MSG_SEQ,
"%s: requested to disable seq numbers by LNS\n",
session->name);
@@ -774,16 +773,17 @@ EXPORT_SYMBOL(l2tp_recv_common);
/* Drop skbs from the session's reorder_q
*/
-static int l2tp_session_queue_purge(struct l2tp_session *session)
+static void l2tp_session_queue_purge(struct l2tp_session *session)
{
struct sk_buff *skb = NULL;
- BUG_ON(!session);
- BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+
+ if (WARN_ON(session->magic != L2TP_SESSION_MAGIC))
+ return;
+
while ((skb = skb_dequeue(&session->reorder_q))) {
atomic_long_inc(&session->stats.rx_errors);
kfree_skb(skb);
}
- return 0;
}
/* Internal UDP receive frame. Do the real work of receiving an L2TP data frame
@@ -825,10 +825,11 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
}
/* Point to L2TP header */
- optr = ptr = skb->data;
+ optr = skb->data;
+ ptr = skb->data;
/* Get L2TP header flags */
- hdrflags = ntohs(*(__be16 *) ptr);
+ hdrflags = ntohs(*(__be16 *)ptr);
/* Check protocol version */
version = hdrflags & L2TP_HDR_VER_MASK;
@@ -859,14 +860,14 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
ptr += 2;
/* Extract tunnel and session ID */
- tunnel_id = ntohs(*(__be16 *) ptr);
+ tunnel_id = ntohs(*(__be16 *)ptr);
ptr += 2;
- session_id = ntohs(*(__be16 *) ptr);
+ session_id = ntohs(*(__be16 *)ptr);
ptr += 2;
} else {
ptr += 2; /* skip reserved bits */
tunnel_id = tunnel->tunnel_id;
- session_id = ntohl(*(__be32 *) ptr);
+ session_id = ntohl(*(__be32 *)ptr);
ptr += 4;
}
@@ -910,7 +911,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
struct l2tp_tunnel *tunnel;
tunnel = rcu_dereference_sk_user_data(sk);
- if (tunnel == NULL)
+ if (!tunnel)
goto pass_up;
l2tp_dbg(tunnel, L2TP_MSG_DATA, "%s: received %d bytes\n",
@@ -971,13 +972,13 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
*/
if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
u16 flags = L2TP_HDR_VER_3;
- *((__be16 *) bufp) = htons(flags);
+ *((__be16 *)bufp) = htons(flags);
bufp += 2;
- *((__be16 *) bufp) = 0;
+ *((__be16 *)bufp) = 0;
bufp += 2;
}
- *((__be32 *) bufp) = htonl(session->peer_session_id);
+ *((__be32 *)bufp) = htonl(session->peer_session_id);
bufp += 4;
if (session->cookie_len) {
memcpy(bufp, &session->cookie[0], session->cookie_len);
@@ -1121,8 +1122,8 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
&sk->sk_v6_daddr, udp_len);
else
#endif
- udp_set_csum(sk->sk_no_check_tx, skb, inet->inet_saddr,
- inet->inet_daddr, udp_len);
+ udp_set_csum(sk->sk_no_check_tx, skb, inet->inet_saddr,
+ inet->inet_daddr, udp_len);
break;
case L2TP_ENCAPTYPE_IP:
@@ -1149,7 +1150,7 @@ static void l2tp_tunnel_destruct(struct sock *sk)
{
struct l2tp_tunnel *tunnel = l2tp_tunnel(sk);
- if (tunnel == NULL)
+ if (!tunnel)
goto end;
l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing...\n", tunnel->name);
@@ -1188,8 +1189,6 @@ static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
struct hlist_node *tmp;
struct l2tp_session *session;
- BUG_ON(tunnel == NULL);
-
l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing all sessions...\n",
tunnel->name);
@@ -1213,7 +1212,7 @@ again:
__l2tp_session_unhash(session);
l2tp_session_queue_purge(session);
- if (session->session_close != NULL)
+ if (session->session_close)
(*session->session_close)(session);
l2tp_session_dec_refcount(session);
@@ -1284,10 +1283,10 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
* exit hook.
*/
static int l2tp_tunnel_sock_create(struct net *net,
- u32 tunnel_id,
- u32 peer_tunnel_id,
- struct l2tp_tunnel_cfg *cfg,
- struct socket **sockp)
+ u32 tunnel_id,
+ u32 peer_tunnel_id,
+ struct l2tp_tunnel_cfg *cfg,
+ struct socket **sockp)
{
int err = -EINVAL;
struct socket *sock = NULL;
@@ -1305,9 +1304,9 @@ static int l2tp_tunnel_sock_create(struct net *net,
memcpy(&udp_conf.peer_ip6, cfg->peer_ip6,
sizeof(udp_conf.peer_ip6));
udp_conf.use_udp6_tx_checksums =
- ! cfg->udp6_zero_tx_checksums;
+ !cfg->udp6_zero_tx_checksums;
udp_conf.use_udp6_rx_checksums =
- ! cfg->udp6_zero_rx_checksums;
+ !cfg->udp6_zero_rx_checksums;
} else
#endif
{
@@ -1332,7 +1331,7 @@ static int l2tp_tunnel_sock_create(struct net *net,
struct sockaddr_l2tpip6 ip6_addr = {0};
err = sock_create_kern(net, AF_INET6, SOCK_DGRAM,
- IPPROTO_L2TP, &sock);
+ IPPROTO_L2TP, &sock);
if (err < 0)
goto out;
@@ -1340,7 +1339,7 @@ static int l2tp_tunnel_sock_create(struct net *net,
memcpy(&ip6_addr.l2tp_addr, cfg->local_ip6,
sizeof(ip6_addr.l2tp_addr));
ip6_addr.l2tp_conn_id = tunnel_id;
- err = kernel_bind(sock, (struct sockaddr *) &ip6_addr,
+ err = kernel_bind(sock, (struct sockaddr *)&ip6_addr,
sizeof(ip6_addr));
if (err < 0)
goto out;
@@ -1350,7 +1349,7 @@ static int l2tp_tunnel_sock_create(struct net *net,
sizeof(ip6_addr.l2tp_addr));
ip6_addr.l2tp_conn_id = peer_tunnel_id;
err = kernel_connect(sock,
- (struct sockaddr *) &ip6_addr,
+ (struct sockaddr *)&ip6_addr,
sizeof(ip6_addr), 0);
if (err < 0)
goto out;
@@ -1360,14 +1359,14 @@ static int l2tp_tunnel_sock_create(struct net *net,
struct sockaddr_l2tpip ip_addr = {0};
err = sock_create_kern(net, AF_INET, SOCK_DGRAM,
- IPPROTO_L2TP, &sock);
+ IPPROTO_L2TP, &sock);
if (err < 0)
goto out;
ip_addr.l2tp_family = AF_INET;
ip_addr.l2tp_addr = cfg->local_ip;
ip_addr.l2tp_conn_id = tunnel_id;
- err = kernel_bind(sock, (struct sockaddr *) &ip_addr,
+ err = kernel_bind(sock, (struct sockaddr *)&ip_addr,
sizeof(ip_addr));
if (err < 0)
goto out;
@@ -1375,7 +1374,7 @@ static int l2tp_tunnel_sock_create(struct net *net,
ip_addr.l2tp_family = AF_INET;
ip_addr.l2tp_addr = cfg->peer_ip;
ip_addr.l2tp_conn_id = peer_tunnel_id;
- err = kernel_connect(sock, (struct sockaddr *) &ip_addr,
+ err = kernel_connect(sock, (struct sockaddr *)&ip_addr,
sizeof(ip_addr), 0);
if (err < 0)
goto out;
@@ -1388,7 +1387,7 @@ static int l2tp_tunnel_sock_create(struct net *net,
out:
*sockp = sock;
- if ((err < 0) && sock) {
+ if (err < 0 && sock) {
kernel_sock_shutdown(sock, SHUT_RDWR);
sock_release(sock);
*sockp = NULL;
@@ -1399,17 +1398,18 @@ out:
static struct lock_class_key l2tp_socket_class;
-int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
+int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id,
+ struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
{
struct l2tp_tunnel *tunnel = NULL;
int err;
enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
- if (cfg != NULL)
+ if (cfg)
encap = cfg->encap;
- tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL);
- if (tunnel == NULL) {
+ tunnel = kzalloc(sizeof(*tunnel), GFP_KERNEL);
+ if (!tunnel) {
err = -ENOMEM;
goto err;
}
@@ -1424,7 +1424,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
rwlock_init(&tunnel->hlist_lock);
tunnel->acpt_newsess = true;
- if (cfg != NULL)
+ if (cfg)
tunnel->debug = cfg->debug;
tunnel->encap = encap;
@@ -1563,13 +1563,13 @@ void l2tp_session_free(struct l2tp_session *session)
{
struct l2tp_tunnel *tunnel = session->tunnel;
- BUG_ON(refcount_read(&session->ref_count) != 0);
-
if (tunnel) {
- BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+ if (WARN_ON(tunnel->magic != L2TP_TUNNEL_MAGIC))
+ goto out;
l2tp_tunnel_dec_refcount(tunnel);
}
+out:
kfree(session);
}
EXPORT_SYMBOL_GPL(l2tp_session_free);
@@ -1593,6 +1593,7 @@ void __l2tp_session_unhash(struct l2tp_session *session)
/* For L2TPv3 we have a per-net hash: remove from there, too */
if (tunnel->version != L2TP_HDR_VER_2) {
struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+
spin_lock_bh(&pn->l2tp_session_hlist_lock);
hlist_del_init_rcu(&session->global_hlist);
spin_unlock_bh(&pn->l2tp_session_hlist_lock);
@@ -1603,7 +1604,7 @@ void __l2tp_session_unhash(struct l2tp_session *session)
EXPORT_SYMBOL_GPL(__l2tp_session_unhash);
/* This function is used by the netlink SESSION_DELETE command and by
- pseudowire modules.
+ * pseudowire modules.
*/
int l2tp_session_delete(struct l2tp_session *session)
{
@@ -1612,7 +1613,7 @@ int l2tp_session_delete(struct l2tp_session *session)
__l2tp_session_unhash(session);
l2tp_session_queue_purge(session);
- if (session->session_close != NULL)
+ if (session->session_close)
(*session->session_close)(session);
l2tp_session_dec_refcount(session);
@@ -1636,16 +1637,16 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version)
if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP)
session->hdr_len += 4;
}
-
}
EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
-struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id,
+ u32 peer_session_id, struct l2tp_session_cfg *cfg)
{
struct l2tp_session *session;
- session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL);
- if (session != NULL) {
+ session = kzalloc(sizeof(*session) + priv_size, GFP_KERNEL);
+ if (session) {
session->magic = L2TP_SESSION_MAGIC;
session->tunnel = tunnel;
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 10cf7c3dcbb3..2d2dd219a176 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -1,6 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * L2TP internal definitions.
+/* L2TP internal definitions.
*
* Copyright (c) 2008,2009 Katalix Systems Ltd
*/
@@ -22,11 +21,11 @@
/* Per tunnel, session hash table size */
#define L2TP_HASH_BITS 4
-#define L2TP_HASH_SIZE (1 << L2TP_HASH_BITS)
+#define L2TP_HASH_SIZE BIT(L2TP_HASH_BITS)
/* System-wide, session hash table size */
#define L2TP_HASH_BITS_2 8
-#define L2TP_HASH_SIZE_2 (1 << L2TP_HASH_BITS_2)
+#define L2TP_HASH_SIZE_2 BIT(L2TP_HASH_BITS_2)
struct sk_buff;
@@ -49,32 +48,26 @@ struct l2tp_tunnel;
*/
struct l2tp_session_cfg {
enum l2tp_pwtype pw_type;
- unsigned int recv_seq:1; /* expect receive packets with
- * sequence numbers? */
- unsigned int send_seq:1; /* send packets with sequence
- * numbers? */
- unsigned int lns_mode:1; /* behave as LNS? LAC enables
- * sequence numbers under
- * control of LNS. */
- int debug; /* bitmask of debug message
- * categories */
+ unsigned int recv_seq:1; /* expect receive packets with sequence numbers? */
+ unsigned int send_seq:1; /* send packets with sequence numbers? */
+ unsigned int lns_mode:1; /* behave as LNS?
+ * LAC enables sequence numbers under LNS control.
+ */
+ int debug; /* bitmask of debug message categories */
u16 l2specific_type; /* Layer 2 specific type */
u8 cookie[8]; /* optional cookie */
int cookie_len; /* 0, 4 or 8 bytes */
u8 peer_cookie[8]; /* peer's cookie */
int peer_cookie_len; /* 0, 4 or 8 bytes */
- int reorder_timeout; /* configured reorder timeout
- * (in jiffies) */
+ int reorder_timeout; /* configured reorder timeout (in jiffies) */
char *ifname;
};
struct l2tp_session {
- int magic; /* should be
- * L2TP_SESSION_MAGIC */
+ int magic; /* should be L2TP_SESSION_MAGIC */
long dead;
- struct l2tp_tunnel *tunnel; /* back pointer to tunnel
- * context */
+ struct l2tp_tunnel *tunnel; /* back pointer to tunnel context */
u32 session_id;
u32 peer_session_id;
u8 cookie[8];
@@ -89,42 +82,37 @@ struct l2tp_session {
u32 nr_max; /* max NR. Depends on tunnel */
u32 nr_window_size; /* NR window size */
u32 nr_oos; /* NR of last OOS packet */
- int nr_oos_count; /* For OOS recovery */
+ int nr_oos_count; /* for OOS recovery */
int nr_oos_count_max;
- struct hlist_node hlist; /* Hash list node */
+ struct hlist_node hlist; /* hash list node */
refcount_t ref_count;
char name[32]; /* for logging */
char ifname[IFNAMSIZ];
- unsigned int recv_seq:1; /* expect receive packets with
- * sequence numbers? */
- unsigned int send_seq:1; /* send packets with sequence
- * numbers? */
- unsigned int lns_mode:1; /* behave as LNS? LAC enables
- * sequence numbers under
- * control of LNS. */
- int debug; /* bitmask of debug message
- * categories */
- int reorder_timeout; /* configured reorder timeout
- * (in jiffies) */
+ unsigned int recv_seq:1; /* expect receive packets with sequence numbers? */
+ unsigned int send_seq:1; /* send packets with sequence numbers? */
+ unsigned int lns_mode:1; /* behave as LNS?
+ * LAC enables sequence numbers under LNS control.
+ */
+ int debug; /* bitmask of debug message categories */
+ int reorder_timeout; /* configured reorder timeout (in jiffies) */
int reorder_skip; /* set if skip to next nr */
enum l2tp_pwtype pwtype;
struct l2tp_stats stats;
- struct hlist_node global_hlist; /* Global hash list node */
+ struct hlist_node global_hlist; /* global hash list node */
int (*build_header)(struct l2tp_session *session, void *buf);
void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len);
void (*session_close)(struct l2tp_session *session);
void (*show)(struct seq_file *m, void *priv);
- u8 priv[]; /* private data */
+ u8 priv[]; /* private data */
};
/* Describes the tunnel. It contains info to track all the associated
* sessions so incoming packets can be sorted out
*/
struct l2tp_tunnel_cfg {
- int debug; /* bitmask of debug message
- * categories */
+ int debug; /* bitmask of debug message categories */
enum l2tp_encap_type encap;
/* Used only for kernel-created sockets */
@@ -148,31 +136,29 @@ struct l2tp_tunnel {
struct rcu_head rcu;
rwlock_t hlist_lock; /* protect session_hlist */
- bool acpt_newsess; /* Indicates whether this
- * tunnel accepts new sessions.
- * Protected by hlist_lock.
+ bool acpt_newsess; /* indicates whether this tunnel accepts
+ * new sessions. Protected by hlist_lock.
*/
struct hlist_head session_hlist[L2TP_HASH_SIZE];
- /* hashed list of sessions,
- * hashed by id */
+ /* hashed list of sessions, hashed by id */
u32 tunnel_id;
u32 peer_tunnel_id;
int version; /* 2=>L2TPv2, 3=>L2TPv3 */
char name[20]; /* for logging */
- int debug; /* bitmask of debug message
- * categories */
+ int debug; /* bitmask of debug message categories */
enum l2tp_encap_type encap;
struct l2tp_stats stats;
- struct list_head list; /* Keep a list of all tunnels */
+ struct list_head list; /* list node on per-namespace list of tunnels */
struct net *l2tp_net; /* the net we belong to */
refcount_t ref_count;
- void (*old_sk_destruct)(struct sock *);
- struct sock *sock; /* Parent socket */
- int fd; /* Parent fd, if tunnel socket
- * was created by userspace */
+ void (*old_sk_destruct)(struct sock *sk);
+ struct sock *sock; /* parent socket */
+ int fd; /* parent fd, if tunnel socket was created
+ * by userspace
+ */
struct work_struct del_work;
};
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 35bb4f3bdbe0..96cb9601c21b 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * L2TP subsystem debugfs
+/* L2TP subsystem debugfs
*
* Copyright (c) 2010 Katalix Systems Ltd
*/
@@ -59,11 +58,10 @@ static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
pd->session_idx++;
- if (pd->session == NULL) {
+ if (!pd->session) {
pd->session_idx = 0;
l2tp_dfs_next_tunnel(pd);
}
-
}
static void *l2tp_dfs_seq_start(struct seq_file *m, loff_t *offs)
@@ -74,23 +72,25 @@ static void *l2tp_dfs_seq_start(struct seq_file *m, loff_t *offs)
if (!pos)
goto out;
- BUG_ON(m->private == NULL);
+ if (WARN_ON(!m->private)) {
+ pd = NULL;
+ goto out;
+ }
pd = m->private;
- if (pd->tunnel == NULL)
+ if (!pd->tunnel)
l2tp_dfs_next_tunnel(pd);
else
l2tp_dfs_next_session(pd);
/* NULL tunnel and session indicates end of list */
- if ((pd->tunnel == NULL) && (pd->session == NULL))
+ if (!pd->tunnel && !pd->session)
pd = NULL;
out:
return pd;
}
-
static void *l2tp_dfs_seq_next(struct seq_file *m, void *v, loff_t *pos)
{
(*pos)++;
@@ -148,11 +148,13 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
const struct ipv6_pinfo *np = inet6_sk(tunnel->sock);
seq_printf(m, " from %pI6c to %pI6c\n",
- &np->saddr, &tunnel->sock->sk_v6_daddr);
- } else
+ &np->saddr, &tunnel->sock->sk_v6_daddr);
+ }
#endif
- seq_printf(m, " from %pI4 to %pI4\n",
- &inet->inet_saddr, &inet->inet_daddr);
+ if (tunnel->sock->sk_family == AF_INET)
+ seq_printf(m, " from %pI4 to %pI4\n",
+ &inet->inet_saddr, &inet->inet_daddr);
+
if (tunnel->encap == L2TP_ENCAPTYPE_UDP)
seq_printf(m, " source port %hu, dest port %hu\n",
ntohs(inet->inet_sport), ntohs(inet->inet_dport));
@@ -202,7 +204,7 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
seq_printf(m, "%02x%02x%02x%02x",
session->cookie[4], session->cookie[5],
session->cookie[6], session->cookie[7]);
- seq_printf(m, "\n");
+ seq_puts(m, "\n");
}
if (session->peer_cookie_len) {
seq_printf(m, " peer cookie %02x%02x%02x%02x",
@@ -212,7 +214,7 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
seq_printf(m, "%02x%02x%02x%02x",
session->peer_cookie[4], session->peer_cookie[5],
session->peer_cookie[6], session->peer_cookie[7]);
- seq_printf(m, "\n");
+ seq_puts(m, "\n");
}
seq_printf(m, " %hu/%hu tx %ld/%ld/%ld rx %ld/%ld/%ld\n",
@@ -224,7 +226,7 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
atomic_long_read(&session->stats.rx_bytes),
atomic_long_read(&session->stats.rx_errors));
- if (session->show != NULL)
+ if (session->show)
session->show(m, session);
}
@@ -271,7 +273,7 @@ static int l2tp_dfs_seq_open(struct inode *inode, struct file *file)
int rc = -ENOMEM;
pd = kzalloc(sizeof(*pd), GFP_KERNEL);
- if (pd == NULL)
+ if (!pd)
goto out;
/* Derive the network namespace from the pid opening the
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 3099efa19249..7ed2b4eced94 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * L2TPv3 ethernet pseudowire driver
+/* L2TPv3 ethernet pseudowire driver
*
* Copyright (c) 2008,2009,2010 Katalix Systems Ltd
*/
@@ -51,7 +50,6 @@ struct l2tp_eth_sess {
struct net_device __rcu *dev;
};
-
static int l2tp_eth_dev_init(struct net_device *dev)
{
eth_hw_addr_random(dev);
@@ -94,13 +92,12 @@ static void l2tp_eth_get_stats64(struct net_device *dev,
{
struct l2tp_eth *priv = netdev_priv(dev);
- stats->tx_bytes = (unsigned long) atomic_long_read(&priv->tx_bytes);
- stats->tx_packets = (unsigned long) atomic_long_read(&priv->tx_packets);
- stats->tx_dropped = (unsigned long) atomic_long_read(&priv->tx_dropped);
- stats->rx_bytes = (unsigned long) atomic_long_read(&priv->rx_bytes);
- stats->rx_packets = (unsigned long) atomic_long_read(&priv->rx_packets);
- stats->rx_errors = (unsigned long) atomic_long_read(&priv->rx_errors);
-
+ stats->tx_bytes = (unsigned long)atomic_long_read(&priv->tx_bytes);
+ stats->tx_packets = (unsigned long)atomic_long_read(&priv->tx_packets);
+ stats->tx_dropped = (unsigned long)atomic_long_read(&priv->tx_dropped);
+ stats->rx_bytes = (unsigned long)atomic_long_read(&priv->rx_bytes);
+ stats->rx_packets = (unsigned long)atomic_long_read(&priv->rx_packets);
+ stats->rx_errors = (unsigned long)atomic_long_read(&priv->rx_errors);
}
static const struct net_device_ops l2tp_eth_netdev_ops = {
@@ -348,13 +345,11 @@ err:
return rc;
}
-
static const struct l2tp_nl_cmd_ops l2tp_eth_nl_cmd_ops = {
.session_create = l2tp_eth_create,
.session_delete = l2tp_session_delete,
};
-
static int __init l2tp_eth_init(void)
{
int err = 0;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 2a3fd31fb589..a159cb2bf0f4 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * L2TPv3 IP encapsulation support
+/* L2TPv3 IP encapsulation support
*
* Copyright (c) 2008,2009,2010 Katalix Systems Ltd
*/
@@ -125,8 +124,9 @@ static int l2tp_ip_recv(struct sk_buff *skb)
goto discard;
/* Point to L2TP header */
- optr = ptr = skb->data;
- session_id = ntohl(*((__be32 *) ptr));
+ optr = skb->data;
+ ptr = skb->data;
+ session_id = ntohl(*((__be32 *)ptr));
ptr += 4;
/* RFC3931: L2TP/IP packets have the first 4 bytes containing
@@ -154,7 +154,8 @@ static int l2tp_ip_recv(struct sk_buff *skb)
goto discard_sess;
/* Point to L2TP header */
- optr = ptr = skb->data;
+ optr = skb->data;
+ ptr = skb->data;
ptr += 4;
pr_debug("%s: ip recv\n", tunnel->name);
print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
@@ -176,7 +177,7 @@ pass_up:
if ((skb->data[0] & 0xc0) != 0xc0)
goto discard;
- tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
+ tunnel_id = ntohl(*(__be32 *)&skb->data[4]);
iph = (struct iphdr *)skb_network_header(skb);
read_lock_bh(&l2tp_ip_lock);
@@ -260,7 +261,7 @@ static void l2tp_ip_destroy_sock(struct sock *sk)
static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct inet_sock *inet = inet_sk(sk);
- struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *) uaddr;
+ struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *)uaddr;
struct net *net = sock_net(sk);
int ret;
int chk_addr_ret;
@@ -285,8 +286,10 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
goto out;
- if (addr->l2tp_addr.s_addr)
- inet->inet_rcv_saddr = inet->inet_saddr = addr->l2tp_addr.s_addr;
+ if (addr->l2tp_addr.s_addr) {
+ inet->inet_rcv_saddr = addr->l2tp_addr.s_addr;
+ inet->inet_saddr = addr->l2tp_addr.s_addr;
+ }
if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
inet->inet_saddr = 0; /* Use device */
@@ -316,7 +319,7 @@ out:
static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
- struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr;
+ struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *)uaddr;
int rc;
if (addr_len < sizeof(*lsa))
@@ -375,6 +378,7 @@ static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
lsa->l2tp_addr.s_addr = inet->inet_daddr;
} else {
__be32 addr = inet->inet_rcv_saddr;
+
if (!addr)
addr = inet->inet_saddr;
lsa->l2tp_conn_id = lsk->conn_id;
@@ -422,6 +426,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
/* Get and verify the address. */
if (msg->msg_name) {
DECLARE_SOCKADDR(struct sockaddr_l2tpip *, lip, msg->msg_name);
+
rc = -EINVAL;
if (msg->msg_namelen < sizeof(*lip))
goto out;
@@ -456,7 +461,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
skb_reset_transport_header(skb);
/* Insert 0 session_id */
- *((__be32 *) skb_put(skb, 4)) = 0;
+ *((__be32 *)skb_put(skb, 4)) = 0;
/* Copy user data into skb */
rc = memcpy_from_msg(skb_put(skb, len), msg, len);
@@ -467,10 +472,10 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl4 = &inet->cork.fl.u.ip4;
if (connected)
- rt = (struct rtable *) __sk_dst_check(sk, 0);
+ rt = (struct rtable *)__sk_dst_check(sk, 0);
rcu_read_lock();
- if (rt == NULL) {
+ if (!rt) {
const struct ip_options_rcu *inet_opt;
inet_opt = rcu_dereference(inet->inet_opt);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 4799bec87b33..bc757bc7e264 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * L2TPv3 IP encapsulation support for IPv6
+/* L2TPv3 IP encapsulation support for IPv6
*
* Copyright (c) 2012 Katalix Systems Ltd
*/
@@ -38,7 +37,8 @@ struct l2tp_ip6_sock {
u32 peer_conn_id;
/* ipv6_pinfo has to be the last member of l2tp_ip6_sock, see
- inet6_sk_generic */
+ * inet6_sk_generic
+ */
struct ipv6_pinfo inet6;
};
@@ -137,8 +137,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
goto discard;
/* Point to L2TP header */
- optr = ptr = skb->data;
- session_id = ntohl(*((__be32 *) ptr));
+ optr = skb->data;
+ ptr = skb->data;
+ session_id = ntohl(*((__be32 *)ptr));
ptr += 4;
/* RFC3931: L2TP/IP packets have the first 4 bytes containing
@@ -166,7 +167,8 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
goto discard_sess;
/* Point to L2TP header */
- optr = ptr = skb->data;
+ optr = skb->data;
+ ptr = skb->data;
ptr += 4;
pr_debug("%s: ip recv\n", tunnel->name);
print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
@@ -188,7 +190,7 @@ pass_up:
if ((skb->data[0] & 0xc0) != 0xc0)
goto discard;
- tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
+ tunnel_id = ntohl(*(__be32 *)&skb->data[4]);
iph = ipv6_hdr(skb);
read_lock_bh(&l2tp_ip6_lock);
@@ -276,7 +278,7 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
- struct sockaddr_l2tpip6 *addr = (struct sockaddr_l2tpip6 *) uaddr;
+ struct sockaddr_l2tpip6 *addr = (struct sockaddr_l2tpip6 *)uaddr;
struct net *net = sock_net(sk);
__be32 v4addr = 0;
int bound_dev_if;
@@ -375,8 +377,8 @@ out_unlock:
static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
- struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *) uaddr;
- struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
+ struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *)uaddr;
+ struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr;
struct in6_addr *daddr;
int addr_type;
int rc;
@@ -486,7 +488,7 @@ static int l2tp_ip6_push_pending_frames(struct sock *sk)
int err = 0;
skb = skb_peek(&sk->sk_write_queue);
- if (skb == NULL)
+ if (!skb)
goto out;
transhdr = (__be32 *)skb_transport_header(skb);
@@ -519,7 +521,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int err;
/* Rough check on arithmetic overflow,
- better check is made in ip6_append_data().
+ * better check is made in ip6_append_data().
*/
if (len > INT_MAX)
return -EMSGSIZE;
@@ -528,9 +530,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_flags & MSG_OOB)
return -EOPNOTSUPP;
- /*
- * Get and verify the address.
- */
+ /* Get and verify the address */
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_mark = sk->sk_mark;
@@ -548,15 +548,14 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
daddr = &lsa->l2tp_addr;
if (np->sndflow) {
fl6.flowlabel = lsa->l2tp_flowinfo & IPV6_FLOWINFO_MASK;
- if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
+ if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) {
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (IS_ERR(flowlabel))
return -EINVAL;
}
}
- /*
- * Otherwise it will be difficult to maintain
+ /* Otherwise it will be difficult to maintain
* sk->sk_dst_cache.
*/
if (sk->sk_state == TCP_ESTABLISHED &&
@@ -594,7 +593,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (IS_ERR(flowlabel))
return -EINVAL;
}
- if (!(opt->opt_nflen|opt->opt_flen))
+ if (!(opt->opt_nflen | opt->opt_flen))
opt = NULL;
}
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index ebb381c3f1b9..35716a6e1e2c 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/*
- * L2TP netlink layer, for management
+/* L2TP netlink layer, for management
*
* Copyright (c) 2008,2009,2010 Katalix Systems Ltd
*
@@ -27,7 +26,6 @@
#include "l2tp_core.h"
-
static struct genl_family l2tp_nl_family;
static const struct genl_multicast_group l2tp_multicast_group[] = {
@@ -157,81 +155,82 @@ static int l2tp_session_notify(struct genl_family *family,
return ret;
}
+static int l2tp_nl_cmd_tunnel_create_get_addr(struct nlattr **attrs, struct l2tp_tunnel_cfg *cfg)
+{
+ if (attrs[L2TP_ATTR_UDP_SPORT])
+ cfg->local_udp_port = nla_get_u16(attrs[L2TP_ATTR_UDP_SPORT]);
+ if (attrs[L2TP_ATTR_UDP_DPORT])
+ cfg->peer_udp_port = nla_get_u16(attrs[L2TP_ATTR_UDP_DPORT]);
+ cfg->use_udp_checksums = nla_get_flag(attrs[L2TP_ATTR_UDP_CSUM]);
+
+ /* Must have either AF_INET or AF_INET6 address for source and destination */
+#if IS_ENABLED(CONFIG_IPV6)
+ if (attrs[L2TP_ATTR_IP6_SADDR] && attrs[L2TP_ATTR_IP6_DADDR]) {
+ cfg->local_ip6 = nla_data(attrs[L2TP_ATTR_IP6_SADDR]);
+ cfg->peer_ip6 = nla_data(attrs[L2TP_ATTR_IP6_DADDR]);
+ cfg->udp6_zero_tx_checksums = nla_get_flag(attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]);
+ cfg->udp6_zero_rx_checksums = nla_get_flag(attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]);
+ return 0;
+ }
+#endif
+ if (attrs[L2TP_ATTR_IP_SADDR] && attrs[L2TP_ATTR_IP_DADDR]) {
+ cfg->local_ip.s_addr = nla_get_in_addr(attrs[L2TP_ATTR_IP_SADDR]);
+ cfg->peer_ip.s_addr = nla_get_in_addr(attrs[L2TP_ATTR_IP_DADDR]);
+ return 0;
+ }
+ return -EINVAL;
+}
+
static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info)
{
u32 tunnel_id;
u32 peer_tunnel_id;
int proto_version;
- int fd;
+ int fd = -1;
int ret = 0;
struct l2tp_tunnel_cfg cfg = { 0, };
struct l2tp_tunnel *tunnel;
struct net *net = genl_info_net(info);
+ struct nlattr **attrs = info->attrs;
- if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+ if (!attrs[L2TP_ATTR_CONN_ID]) {
ret = -EINVAL;
goto out;
}
- tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+ tunnel_id = nla_get_u32(attrs[L2TP_ATTR_CONN_ID]);
- if (!info->attrs[L2TP_ATTR_PEER_CONN_ID]) {
+ if (!attrs[L2TP_ATTR_PEER_CONN_ID]) {
ret = -EINVAL;
goto out;
}
- peer_tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_CONN_ID]);
+ peer_tunnel_id = nla_get_u32(attrs[L2TP_ATTR_PEER_CONN_ID]);
- if (!info->attrs[L2TP_ATTR_PROTO_VERSION]) {
+ if (!attrs[L2TP_ATTR_PROTO_VERSION]) {
ret = -EINVAL;
goto out;
}
- proto_version = nla_get_u8(info->attrs[L2TP_ATTR_PROTO_VERSION]);
+ proto_version = nla_get_u8(attrs[L2TP_ATTR_PROTO_VERSION]);
- if (!info->attrs[L2TP_ATTR_ENCAP_TYPE]) {
+ if (!attrs[L2TP_ATTR_ENCAP_TYPE]) {
ret = -EINVAL;
goto out;
}
- cfg.encap = nla_get_u16(info->attrs[L2TP_ATTR_ENCAP_TYPE]);
-
- fd = -1;
- if (info->attrs[L2TP_ATTR_FD]) {
- fd = nla_get_u32(info->attrs[L2TP_ATTR_FD]);
+ cfg.encap = nla_get_u16(attrs[L2TP_ATTR_ENCAP_TYPE]);
+
+ /* Managed tunnels take the tunnel socket from userspace.
+ * Unmanaged tunnels must call out the source and destination addresses
+ * for the kernel to create the tunnel socket itself.
+ */
+ if (attrs[L2TP_ATTR_FD]) {
+ fd = nla_get_u32(attrs[L2TP_ATTR_FD]);
} else {
-#if IS_ENABLED(CONFIG_IPV6)
- if (info->attrs[L2TP_ATTR_IP6_SADDR] &&
- info->attrs[L2TP_ATTR_IP6_DADDR]) {
- cfg.local_ip6 = nla_data(
- info->attrs[L2TP_ATTR_IP6_SADDR]);
- cfg.peer_ip6 = nla_data(
- info->attrs[L2TP_ATTR_IP6_DADDR]);
- } else
-#endif
- if (info->attrs[L2TP_ATTR_IP_SADDR] &&
- info->attrs[L2TP_ATTR_IP_DADDR]) {
- cfg.local_ip.s_addr = nla_get_in_addr(
- info->attrs[L2TP_ATTR_IP_SADDR]);
- cfg.peer_ip.s_addr = nla_get_in_addr(
- info->attrs[L2TP_ATTR_IP_DADDR]);
- } else {
- ret = -EINVAL;
+ ret = l2tp_nl_cmd_tunnel_create_get_addr(attrs, &cfg);
+ if (ret < 0)
goto out;
- }
- if (info->attrs[L2TP_ATTR_UDP_SPORT])
- cfg.local_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_SPORT]);
- if (info->attrs[L2TP_ATTR_UDP_DPORT])
- cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]);
- cfg.use_udp_checksums = nla_get_flag(
- info->attrs[L2TP_ATTR_UDP_CSUM]);
-
-#if IS_ENABLED(CONFIG_IPV6)
- cfg.udp6_zero_tx_checksums = nla_get_flag(
- info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]);
- cfg.udp6_zero_rx_checksums = nla_get_flag(
- info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]);
-#endif
}
- if (info->attrs[L2TP_ATTR_DEBUG])
- cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+ if (attrs[L2TP_ATTR_DEBUG])
+ cfg.debug = nla_get_u32(attrs[L2TP_ATTR_DEBUG]);
ret = -EINVAL;
switch (cfg.encap) {
@@ -320,16 +319,79 @@ out:
return ret;
}
+#if IS_ENABLED(CONFIG_IPV6)
+static int l2tp_nl_tunnel_send_addr6(struct sk_buff *skb, struct sock *sk,
+ enum l2tp_encap_type encap)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ switch (encap) {
+ case L2TP_ENCAPTYPE_UDP:
+ if (udp_get_no_check6_tx(sk) &&
+ nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_TX))
+ return -1;
+ if (udp_get_no_check6_rx(sk) &&
+ nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_RX))
+ return -1;
+ if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) ||
+ nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)))
+ return -1;
+ fallthrough;
+ case L2TP_ENCAPTYPE_IP:
+ if (nla_put_in6_addr(skb, L2TP_ATTR_IP6_SADDR, &np->saddr) ||
+ nla_put_in6_addr(skb, L2TP_ATTR_IP6_DADDR, &sk->sk_v6_daddr))
+ return -1;
+ break;
+ }
+ return 0;
+}
+#endif
+
+static int l2tp_nl_tunnel_send_addr4(struct sk_buff *skb, struct sock *sk,
+ enum l2tp_encap_type encap)
+{
+ struct inet_sock *inet = inet_sk(sk);
+
+ switch (encap) {
+ case L2TP_ENCAPTYPE_UDP:
+ if (nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx) ||
+ nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) ||
+ nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)))
+ return -1;
+ fallthrough;
+ case L2TP_ENCAPTYPE_IP:
+ if (nla_put_in_addr(skb, L2TP_ATTR_IP_SADDR, inet->inet_saddr) ||
+ nla_put_in_addr(skb, L2TP_ATTR_IP_DADDR, inet->inet_daddr))
+ return -1;
+ break;
+ }
+
+ return 0;
+}
+
+/* Append attributes for the tunnel address, handling the different attribute types
+ * used for different tunnel encapsulation and AF_INET v.s. AF_INET6.
+ */
+static int l2tp_nl_tunnel_send_addr(struct sk_buff *skb, struct l2tp_tunnel *tunnel)
+{
+ struct sock *sk = tunnel->sock;
+
+ if (!sk)
+ return 0;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ return l2tp_nl_tunnel_send_addr6(skb, sk, tunnel->encap);
+#endif
+ return l2tp_nl_tunnel_send_addr4(skb, sk, tunnel->encap);
+}
+
static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int flags,
struct l2tp_tunnel *tunnel, u8 cmd)
{
void *hdr;
struct nlattr *nest;
- struct sock *sk = NULL;
- struct inet_sock *inet;
-#if IS_ENABLED(CONFIG_IPV6)
- struct ipv6_pinfo *np = NULL;
-#endif
hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, cmd);
if (!hdr)
@@ -343,7 +405,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
goto nla_put_failure;
nest = nla_nest_start_noflag(skb, L2TP_ATTR_STATS);
- if (nest == NULL)
+ if (!nest)
goto nla_put_failure;
if (nla_put_u64_64bit(skb, L2TP_ATTR_TX_PACKETS,
@@ -373,58 +435,9 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
goto nla_put_failure;
nla_nest_end(skb, nest);
- sk = tunnel->sock;
- if (!sk)
- goto out;
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- np = inet6_sk(sk);
-#endif
-
- inet = inet_sk(sk);
-
- switch (tunnel->encap) {
- case L2TP_ENCAPTYPE_UDP:
- switch (sk->sk_family) {
- case AF_INET:
- if (nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx))
- goto nla_put_failure;
- break;
-#if IS_ENABLED(CONFIG_IPV6)
- case AF_INET6:
- if (udp_get_no_check6_tx(sk) &&
- nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_TX))
- goto nla_put_failure;
- if (udp_get_no_check6_rx(sk) &&
- nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_RX))
- goto nla_put_failure;
- break;
-#endif
- }
- if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) ||
- nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)))
- goto nla_put_failure;
- /* fall through */
- case L2TP_ENCAPTYPE_IP:
-#if IS_ENABLED(CONFIG_IPV6)
- if (np) {
- if (nla_put_in6_addr(skb, L2TP_ATTR_IP6_SADDR,
- &np->saddr) ||
- nla_put_in6_addr(skb, L2TP_ATTR_IP6_DADDR,
- &sk->sk_v6_daddr))
- goto nla_put_failure;
- } else
-#endif
- if (nla_put_in_addr(skb, L2TP_ATTR_IP_SADDR,
- inet->inet_saddr) ||
- nla_put_in_addr(skb, L2TP_ATTR_IP_DADDR,
- inet->inet_daddr))
- goto nla_put_failure;
- break;
- }
+ if (l2tp_nl_tunnel_send_addr(skb, tunnel))
+ goto nla_put_failure;
-out:
genlmsg_end(skb, hdr);
return 0;
@@ -485,7 +498,7 @@ static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback
for (;;) {
tunnel = l2tp_tunnel_get_nth(net, ti);
- if (tunnel == NULL)
+ if (!tunnel)
goto out;
if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid,
@@ -570,6 +583,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
if (info->attrs[L2TP_ATTR_COOKIE]) {
u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]);
+
if (len > 8) {
ret = -EINVAL;
goto out_tunnel;
@@ -579,6 +593,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
}
if (info->attrs[L2TP_ATTR_PEER_COOKIE]) {
u16 len = nla_len(info->attrs[L2TP_ATTR_PEER_COOKIE]);
+
if (len > 8) {
ret = -EINVAL;
goto out_tunnel;
@@ -606,14 +621,13 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
cfg.reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]);
#ifdef CONFIG_MODULES
- if (l2tp_nl_cmd_ops[cfg.pw_type] == NULL) {
+ if (!l2tp_nl_cmd_ops[cfg.pw_type]) {
genl_unlock();
request_module("net-l2tp-type-%u", cfg.pw_type);
genl_lock();
}
#endif
- if ((l2tp_nl_cmd_ops[cfg.pw_type] == NULL) ||
- (l2tp_nl_cmd_ops[cfg.pw_type]->session_create == NULL)) {
+ if (!l2tp_nl_cmd_ops[cfg.pw_type] || !l2tp_nl_cmd_ops[cfg.pw_type]->session_create) {
ret = -EPROTONOSUPPORT;
goto out_tunnel;
}
@@ -645,7 +659,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
u16 pw_type;
session = l2tp_nl_session_get(info);
- if (session == NULL) {
+ if (!session) {
ret = -ENODEV;
goto out;
}
@@ -670,7 +684,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
struct l2tp_session *session;
session = l2tp_nl_session_get(info);
- if (session == NULL) {
+ if (!session) {
ret = -ENODEV;
goto out;
}
@@ -715,8 +729,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
if (nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) ||
nla_put_u32(skb, L2TP_ATTR_SESSION_ID, session->session_id) ||
nla_put_u32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id) ||
- nla_put_u32(skb, L2TP_ATTR_PEER_SESSION_ID,
- session->peer_session_id) ||
+ nla_put_u32(skb, L2TP_ATTR_PEER_SESSION_ID, session->peer_session_id) ||
nla_put_u32(skb, L2TP_ATTR_DEBUG, session->debug) ||
nla_put_u16(skb, L2TP_ATTR_PW_TYPE, session->pwtype))
goto nla_put_failure;
@@ -724,11 +737,9 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
if ((session->ifname[0] &&
nla_put_string(skb, L2TP_ATTR_IFNAME, session->ifname)) ||
(session->cookie_len &&
- nla_put(skb, L2TP_ATTR_COOKIE, session->cookie_len,
- &session->cookie[0])) ||
+ nla_put(skb, L2TP_ATTR_COOKIE, session->cookie_len, session->cookie)) ||
(session->peer_cookie_len &&
- nla_put(skb, L2TP_ATTR_PEER_COOKIE, session->peer_cookie_len,
- &session->peer_cookie[0])) ||
+ nla_put(skb, L2TP_ATTR_PEER_COOKIE, session->peer_cookie_len, session->peer_cookie)) ||
nla_put_u8(skb, L2TP_ATTR_RECV_SEQ, session->recv_seq) ||
nla_put_u8(skb, L2TP_ATTR_SEND_SEQ, session->send_seq) ||
nla_put_u8(skb, L2TP_ATTR_LNS_MODE, session->lns_mode) ||
@@ -740,7 +751,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
goto nla_put_failure;
nest = nla_nest_start_noflag(skb, L2TP_ATTR_STATS);
- if (nest == NULL)
+ if (!nest)
goto nla_put_failure;
if (nla_put_u64_64bit(skb, L2TP_ATTR_TX_PACKETS,
@@ -785,7 +796,7 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
int ret;
session = l2tp_nl_session_get(info);
- if (session == NULL) {
+ if (!session) {
ret = -ENODEV;
goto err;
}
@@ -824,14 +835,14 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
int si = cb->args[1];
for (;;) {
- if (tunnel == NULL) {
+ if (!tunnel) {
tunnel = l2tp_tunnel_get_nth(net, ti);
- if (tunnel == NULL)
+ if (!tunnel)
goto out;
}
session = l2tp_session_get_nth(tunnel, si);
- if (session == NULL) {
+ if (!session) {
ti++;
l2tp_tunnel_dec_refcount(tunnel);
tunnel = NULL;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index c54cb59593ef..13c3153b40d6 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -117,8 +117,7 @@ struct pppol2tp_session {
int owner; /* pid that opened the socket */
struct mutex sk_lock; /* Protects .sk */
- struct sock __rcu *sk; /* Pointer to the session
- * PPPoX socket */
+ struct sock __rcu *sk; /* Pointer to the session PPPoX socket */
struct sock *__sk; /* Copy of .sk, for cleanup */
struct rcu_head rcu; /* For asynchronous release */
};
@@ -155,17 +154,20 @@ static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
{
struct l2tp_session *session;
- if (sk == NULL)
+ if (!sk)
return NULL;
sock_hold(sk);
session = (struct l2tp_session *)(sk->sk_user_data);
- if (session == NULL) {
+ if (!session) {
+ sock_put(sk);
+ goto out;
+ }
+ if (WARN_ON(session->magic != L2TP_SESSION_MAGIC)) {
+ session = NULL;
sock_put(sk);
goto out;
}
-
- BUG_ON(session->magic != L2TP_SESSION_MAGIC);
out:
return session;
@@ -218,7 +220,7 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
*/
rcu_read_lock();
sk = rcu_dereference(ps->sk);
- if (sk == NULL)
+ if (!sk)
goto no_sock;
/* If the first two bytes are 0xFF03, consider that it is the PPP's
@@ -286,7 +288,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
/* Get session and tunnel contexts */
error = -EBADF;
session = pppol2tp_sock_to_session(sk);
- if (session == NULL)
+ if (!session)
goto error;
tunnel = session->tunnel;
@@ -351,7 +353,7 @@ error:
*/
static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
{
- struct sock *sk = (struct sock *) chan->private;
+ struct sock *sk = (struct sock *)chan->private;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel;
int uhlen, headroom;
@@ -361,7 +363,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
/* Get session and tunnel contexts from the socket */
session = pppol2tp_sock_to_session(sk);
- if (session == NULL)
+ if (!session)
goto abort;
tunnel = session->tunnel;
@@ -420,7 +422,8 @@ static void pppol2tp_session_destruct(struct sock *sk)
if (session) {
sk->sk_user_data = NULL;
- BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+ if (WARN_ON(session->magic != L2TP_SESSION_MAGIC))
+ return;
l2tp_session_dec_refcount(session);
}
}
@@ -704,7 +707,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
* tunnel id.
*/
if (!info.session_id && !info.peer_session_id) {
- if (tunnel == NULL) {
+ if (!tunnel) {
struct l2tp_tunnel_cfg tcfg = {
.encap = L2TP_ENCAPTYPE_UDP,
.debug = 0,
@@ -739,11 +742,11 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
} else {
/* Error if we can't find the tunnel */
error = -ENOENT;
- if (tunnel == NULL)
+ if (!tunnel)
goto end;
/* Error if socket is not prepped */
- if (tunnel->sock == NULL)
+ if (!tunnel->sock)
goto end;
}
@@ -803,8 +806,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
* the internal context for use by ioctl() and sockopt()
* handlers.
*/
- if ((session->session_id == 0) &&
- (session->peer_session_id == 0)) {
+ if (session->session_id == 0 && session->peer_session_id == 0) {
error = 0;
goto out_no_ppp;
}
@@ -912,22 +914,23 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
struct pppol2tp_session *pls;
error = -ENOTCONN;
- if (sk == NULL)
+ if (!sk)
goto end;
if (!(sk->sk_state & PPPOX_CONNECTED))
goto end;
error = -EBADF;
session = pppol2tp_sock_to_session(sk);
- if (session == NULL)
+ if (!session)
goto end;
pls = l2tp_session_priv(session);
tunnel = session->tunnel;
inet = inet_sk(tunnel->sock);
- if ((tunnel->version == 2) && (tunnel->sock->sk_family == AF_INET)) {
+ if (tunnel->version == 2 && tunnel->sock->sk_family == AF_INET) {
struct sockaddr_pppol2tp sp;
+
len = sizeof(sp);
memset(&sp, 0, len);
sp.sa_family = AF_PPPOX;
@@ -943,8 +946,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
memcpy(uaddr, &sp, len);
#if IS_ENABLED(CONFIG_IPV6)
- } else if ((tunnel->version == 2) &&
- (tunnel->sock->sk_family == AF_INET6)) {
+ } else if (tunnel->version == 2 && tunnel->sock->sk_family == AF_INET6) {
struct sockaddr_pppol2tpin6 sp;
len = sizeof(sp);
@@ -962,8 +964,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
memcpy(&sp.pppol2tp.addr.sin6_addr, &tunnel->sock->sk_v6_daddr,
sizeof(tunnel->sock->sk_v6_daddr));
memcpy(uaddr, &sp, len);
- } else if ((tunnel->version == 3) &&
- (tunnel->sock->sk_family == AF_INET6)) {
+ } else if (tunnel->version == 3 && tunnel->sock->sk_family == AF_INET6) {
struct sockaddr_pppol2tpv3in6 sp;
len = sizeof(sp);
@@ -984,6 +985,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
#endif
} else if (tunnel->version == 3) {
struct sockaddr_pppol2tpv3 sp;
+
len = sizeof(sp);
memset(&sp, 0, len);
sp.sa_family = AF_PPPOX;
@@ -1178,7 +1180,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
switch (optname) {
case PPPOL2TP_SO_RECVSEQ:
- if ((val != 0) && (val != 1)) {
+ if (val != 0 && val != 1) {
err = -EINVAL;
break;
}
@@ -1189,7 +1191,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
break;
case PPPOL2TP_SO_SENDSEQ:
- if ((val != 0) && (val != 1)) {
+ if (val != 0 && val != 1) {
err = -EINVAL;
break;
}
@@ -1207,7 +1209,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
break;
case PPPOL2TP_SO_LNSMODE:
- if ((val != 0) && (val != 1)) {
+ if (val != 0 && val != 1) {
err = -EINVAL;
break;
}
@@ -1244,7 +1246,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
* session or the special tunnel type.
*/
static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct l2tp_session *session;
@@ -1258,23 +1260,22 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
return -EFAULT;
err = -ENOTCONN;
- if (sk->sk_user_data == NULL)
+ if (!sk->sk_user_data)
goto end;
/* Get session context from the socket */
err = -EBADF;
session = pppol2tp_sock_to_session(sk);
- if (session == NULL)
+ if (!session)
goto end;
/* Special case: if session_id == 0x0000, treat as operation on tunnel
*/
- if ((session->session_id == 0) &&
- (session->peer_session_id == 0)) {
+ if (session->session_id == 0 && session->peer_session_id == 0) {
tunnel = session->tunnel;
err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
} else {
@@ -1343,7 +1344,7 @@ static int pppol2tp_session_getsockopt(struct sock *sk,
break;
case PPPOL2TP_SO_REORDERTO:
- *val = (int) jiffies_to_msecs(session->reorder_timeout);
+ *val = (int)jiffies_to_msecs(session->reorder_timeout);
l2tp_info(session, L2TP_MSG_CONTROL,
"%s: get reorder_timeout=%d\n", session->name, *val);
break;
@@ -1381,18 +1382,17 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
err = -ENOTCONN;
- if (sk->sk_user_data == NULL)
+ if (!sk->sk_user_data)
goto end;
/* Get the session context */
err = -EBADF;
session = pppol2tp_sock_to_session(sk);
- if (session == NULL)
+ if (!session)
goto end;
/* Special case: if session_id == 0x0000, treat as operation on tunnel */
- if ((session->session_id == 0) &&
- (session->peer_session_id == 0)) {
+ if (session->session_id == 0 && session->peer_session_id == 0) {
tunnel = session->tunnel;
err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
if (err)
@@ -1407,7 +1407,7 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
if (put_user(len, optlen))
goto end_put_sess;
- if (copy_to_user((void __user *) optval, &val, len))
+ if (copy_to_user((void __user *)optval, &val, len))
goto end_put_sess;
err = 0;
@@ -1463,7 +1463,7 @@ static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
pd->session_idx++;
- if (pd->session == NULL) {
+ if (!pd->session) {
pd->session_idx = 0;
pppol2tp_next_tunnel(net, pd);
}
@@ -1478,17 +1478,21 @@ static void *pppol2tp_seq_start(struct seq_file *m, loff_t *offs)
if (!pos)
goto out;
- BUG_ON(m->private == NULL);
+ if (WARN_ON(!m->private)) {
+ pd = NULL;
+ goto out;
+ }
+
pd = m->private;
net = seq_file_net(m);
- if (pd->tunnel == NULL)
+ if (!pd->tunnel)
pppol2tp_next_tunnel(net, pd);
else
pppol2tp_next_session(net, pd);
/* NULL tunnel and session indicates end of list */
- if ((pd->tunnel == NULL) && (pd->session == NULL))
+ if (!pd->tunnel && !pd->session)
pd = NULL;
out:
@@ -1551,6 +1555,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
if (tunnel->sock) {
struct inet_sock *inet = inet_sk(tunnel->sock);
+
ip = ntohl(inet->inet_saddr);
port = ntohs(inet->inet_sport);
}
@@ -1564,8 +1569,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
user_data_ok = 'N';
}
- seq_printf(m, " SESSION '%s' %08X/%d %04X/%04X -> "
- "%04X/%04X %d %c\n",
+ seq_printf(m, " SESSION '%s' %08X/%d %04X/%04X -> %04X/%04X %d %c\n",
session->name, ip, port,
tunnel->tunnel_id,
session->session_id,
@@ -1604,8 +1608,7 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v)
seq_puts(m, "PPPoL2TP driver info, " PPPOL2TP_DRV_VERSION "\n");
seq_puts(m, "TUNNEL name, user-data-ok session-count\n");
seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
- seq_puts(m, " SESSION name, addr/port src-tid/sid "
- "dest-tid/sid state user-data-ok\n");
+ seq_puts(m, " SESSION name, addr/port src-tid/sid dest-tid/sid state user-data-ok\n");
seq_puts(m, " mtu/mru/rcvseq/sendseq/lns debug reorderto\n");
seq_puts(m, " nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
goto out;
@@ -1638,7 +1641,7 @@ static __net_init int pppol2tp_init_net(struct net *net)
int err = 0;
pde = proc_create_net("pppol2tp", 0444, net->proc_net,
- &pppol2tp_seq_ops, sizeof(struct pppol2tp_seq_data));
+ &pppol2tp_seq_ops, sizeof(struct pppol2tp_seq_data));
if (!pde) {
err = -ENOMEM;
goto out;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 6140a3e46c26..7180979114e4 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -1053,7 +1053,7 @@ static int llc_ui_ioctl(struct socket *sock, unsigned int cmd,
* Set various connection specific parameters.
*/
static int llc_ui_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
@@ -1063,7 +1063,7 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname,
lock_sock(sk);
if (unlikely(level != SOL_LLC || optlen != sizeof(int)))
goto out;
- rc = get_user(opt, (int __user *)optval);
+ rc = copy_from_sockptr(&opt, optval, sizeof(opt));
if (rc)
goto out;
rc = -EINVAL;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 19707c07efc1..3bc56eb608d8 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -709,6 +709,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
* additional ack.
*/
subflow->fully_established = 1;
+ WRITE_ONCE(msk->fully_established, true);
goto fully_established;
}
@@ -724,9 +725,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
- subflow->fully_established = 1;
- subflow->remote_key = mp_opt->sndr_key;
- subflow->can_ack = 1;
+ mptcp_subflow_fully_established(subflow, mp_opt);
fully_established:
if (likely(subflow->pm_notified))
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index f0b0b503c262..2891ae8a1028 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -460,15 +460,20 @@ static void mptcp_clean_una(struct sock *sk)
dfrag = mptcp_rtx_head(sk);
if (dfrag && after64(snd_una, dfrag->data_seq)) {
- u64 delta = dfrag->data_seq + dfrag->data_len - snd_una;
+ u64 delta = snd_una - dfrag->data_seq;
+
+ if (WARN_ON_ONCE(delta > dfrag->data_len))
+ goto out;
dfrag->data_seq += delta;
+ dfrag->offset += delta;
dfrag->data_len -= delta;
dfrag_uncharge(sk, delta);
cleaned = true;
}
+out:
if (cleaned) {
sk_mem_reclaim_partial(sk);
@@ -1517,6 +1522,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
msk->subflow = NULL;
+ WRITE_ONCE(msk->fully_established, false);
msk->write_seq = subflow_req->idsn + 1;
atomic64_set(&msk->snd_una, msk->write_seq);
@@ -1600,7 +1606,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
newsk = new_mptcp_sock;
mptcp_copy_inaddrs(newsk, ssk);
list_add(&subflow->node, &msk->conn_list);
- inet_sk_state_store(newsk, TCP_ESTABLISHED);
mptcp_rcv_space_init(msk, ssk);
bh_unlock_sock(new_mptcp_sock);
@@ -1627,7 +1632,7 @@ static void mptcp_destroy(struct sock *sk)
}
static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = (struct sock *)msk;
struct socket *ssock;
@@ -1658,7 +1663,7 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
}
static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = (struct sock *)msk;
int ret = -EOPNOTSUPP;
@@ -1685,7 +1690,7 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
}
static int mptcp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct sock *ssk;
@@ -1814,7 +1819,6 @@ void mptcp_finish_connect(struct sock *ssk)
ack_seq++;
subflow->map_seq = ack_seq;
subflow->map_subflow_seq = 1;
- subflow->rel_write_seq = 1;
/* the socket is not connected yet, no msk/subflow ops can access/race
* accessing the field below
@@ -1851,7 +1855,7 @@ bool mptcp_finish_join(struct sock *sk)
pr_debug("msk=%p, subflow=%p", msk, subflow);
/* mptcp socket already closing? */
- if (inet_sk_state_load(parent) != TCP_ESTABLISHED)
+ if (!mptcp_is_fully_established(parent))
return false;
if (!msk->pm.server_side)
@@ -1940,6 +1944,13 @@ unlock:
return err;
}
+static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow)
+{
+ subflow->request_mptcp = 0;
+ __mptcp_do_fallback(msk);
+}
+
static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
@@ -1971,10 +1982,10 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
* TCP option space.
*/
if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
- subflow->request_mptcp = 0;
+ mptcp_subflow_early_fallback(msk, subflow);
#endif
if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk))
- subflow->request_mptcp = 0;
+ mptcp_subflow_early_fallback(msk, subflow);
do_connect:
err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index e5baaef5ec89..67634b595466 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -198,6 +198,7 @@ struct mptcp_sock {
u32 token;
unsigned long flags;
bool can_ack;
+ bool fully_established;
spinlock_t join_list_lock;
struct work_struct work;
struct list_head conn_list;
@@ -342,6 +343,8 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
}
int mptcp_is_enabled(struct net *net);
+void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
+ struct mptcp_options_received *mp_opt);
bool mptcp_subflow_data_available(struct sock *sk);
void __init mptcp_subflow_init(void);
@@ -373,6 +376,11 @@ void mptcp_get_options(const struct sk_buff *skb,
struct mptcp_options_received *mp_opt);
void mptcp_finish_connect(struct sock *sk);
+static inline bool mptcp_is_fully_established(struct sock *sk)
+{
+ return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
+ READ_ONCE(mptcp_sk(sk)->fully_established);
+}
void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk);
void mptcp_data_ready(struct sock *sk, struct sock *ssk);
bool mptcp_finish_join(struct sock *sk);
@@ -396,17 +404,6 @@ struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot,
void mptcp_token_destroy(struct mptcp_sock *msk);
void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
-static inline void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn)
-{
- /* we might consider a faster version that computes the key as a
- * hash of some information available in the MPTCP socket. Use
- * random data at the moment, as it's probably the safest option
- * in case multiple sockets are opened in different namespaces at
- * the same time.
- */
- get_random_bytes(key, sizeof(u64));
- mptcp_crypto_key_sha(*key, token, idsn);
-}
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 519122e66f17..e645483d1200 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -53,6 +53,12 @@ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
}
+static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk)
+{
+ return mptcp_is_fully_established((void *)msk) &&
+ READ_ONCE(msk->pm.accept_subflow);
+}
+
/* validate received token and create truncated hmac and nonce for SYN-ACK */
static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
const struct sk_buff *skb)
@@ -200,49 +206,40 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (subflow->conn_finished)
return;
+ subflow->rel_write_seq = 1;
subflow->conn_finished = 1;
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
mptcp_get_options(skb, &mp_opt);
- if (subflow->request_mptcp && mp_opt.mp_capable) {
+ if (subflow->request_mptcp) {
+ if (!mp_opt.mp_capable) {
+ MPTCP_INC_STATS(sock_net(sk),
+ MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
+ mptcp_do_fallback(sk);
+ pr_fallback(mptcp_sk(subflow->conn));
+ goto fallback;
+ }
+
subflow->mp_capable = 1;
subflow->can_ack = 1;
subflow->remote_key = mp_opt.sndr_key;
pr_debug("subflow=%p, remote_key=%llu", subflow,
subflow->remote_key);
- } else if (subflow->request_join && mp_opt.mp_join) {
- subflow->mp_join = 1;
+ mptcp_finish_connect(sk);
+ } else if (subflow->request_join) {
+ u8 hmac[SHA256_DIGEST_SIZE];
+
+ if (!mp_opt.mp_join)
+ goto do_reset;
+
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
subflow->thmac, subflow->remote_nonce);
- } else {
- if (subflow->request_mptcp)
- MPTCP_INC_STATS(sock_net(sk),
- MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
- mptcp_do_fallback(sk);
- pr_fallback(mptcp_sk(subflow->conn));
- }
-
- if (mptcp_check_fallback(sk)) {
- mptcp_rcv_space_init(mptcp_sk(parent), sk);
- return;
- }
-
- if (subflow->mp_capable) {
- pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
- subflow->remote_key);
- mptcp_finish_connect(sk);
- } else if (subflow->mp_join) {
- u8 hmac[SHA256_DIGEST_SIZE];
- pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u",
- subflow, subflow->thmac,
- subflow->remote_nonce);
if (!subflow_thmac_valid(subflow)) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
- subflow->mp_join = 0;
goto do_reset;
}
@@ -250,18 +247,22 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->local_nonce,
subflow->remote_nonce,
hmac);
-
memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
if (!mptcp_finish_join(sk))
goto do_reset;
+ subflow->mp_join = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
- } else {
-do_reset:
- tcp_send_active_reset(sk, GFP_ATOMIC);
- tcp_done(sk);
+ } else if (mptcp_check_fallback(sk)) {
+fallback:
+ mptcp_rcv_space_init(mptcp_sk(parent), sk);
}
+ return;
+
+do_reset:
+ tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_done(sk);
}
static struct request_sock_ops subflow_request_sock_ops;
@@ -386,6 +387,17 @@ static void subflow_drop_ctx(struct sock *ssk)
kfree_rcu(ctx, rcu);
}
+void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
+ struct mptcp_options_received *mp_opt)
+{
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ subflow->remote_key = mp_opt->sndr_key;
+ subflow->fully_established = 1;
+ subflow->can_ack = 1;
+ WRITE_ONCE(msk->fully_established, true);
+}
+
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct sk_buff *skb,
struct request_sock *req,
@@ -409,7 +421,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
/* hopefully temporary handling for MP_JOIN+syncookie */
subflow_req = mptcp_subflow_rsk(req);
- fallback_is_fatal = subflow_req->mp_join;
+ fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join;
fallback = !tcp_rsk(req)->is_mptcp;
if (fallback)
goto create_child;
@@ -437,6 +449,7 @@ create_msk:
} else if (subflow_req->mp_join) {
mptcp_get_options(skb, &mp_opt);
if (!mp_opt.mp_join ||
+ !mptcp_can_accept_new_subflow(subflow_req->msk) ||
!subflow_hmac_valid(req, &mp_opt)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
fallback = true;
@@ -465,6 +478,11 @@ create_child:
}
if (ctx->mp_capable) {
+ /* this can't race with mptcp_close(), as the msk is
+ * not yet exposted to user-space
+ */
+ inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED);
+
/* new mpc subflow takes ownership of the newly
* created mptcp socket
*/
@@ -477,9 +495,8 @@ create_child:
/* with OoO packets we can reach here without ingress
* mpc option
*/
- ctx->remote_key = mp_opt.sndr_key;
- ctx->fully_established = mp_opt.mp_capable;
- ctx->can_ack = mp_opt.mp_capable;
+ if (mp_opt.mp_capable)
+ mptcp_subflow_fully_established(ctx, &mp_opt);
} else if (ctx->mp_join) {
struct mptcp_sock *owner;
@@ -514,9 +531,9 @@ out:
dispose_child:
subflow_drop_ctx(child);
tcp_rsk(req)->drop_req = true;
- tcp_send_active_reset(child, GFP_ATOMIC);
inet_csk_prepare_for_destroy_sock(child);
tcp_done(child);
+ req->rsk_ops->send_reset(sk, skb);
/* The last child reference will be released by the caller */
return child;
@@ -966,7 +983,7 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex,
int addrlen;
int err;
- if (sk->sk_state != TCP_ESTABLISHED)
+ if (!mptcp_is_fully_established(sk))
return -ENOTCONN;
err = mptcp_subflow_create_socket(sk, &sf);
diff --git a/net/mptcp/token.c b/net/mptcp/token.c
index 7d8106026081..97cfc45bcc4f 100644
--- a/net/mptcp/token.c
+++ b/net/mptcp/token.c
@@ -83,6 +83,18 @@ static bool __token_bucket_busy(struct token_bucket *t, u32 token)
__token_lookup_req(t, token) || __token_lookup_msk(t, token);
}
+static void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn)
+{
+ /* we might consider a faster version that computes the key as a
+ * hash of some information available in the MPTCP socket. Use
+ * random data at the moment, as it's probably the safest option
+ * in case multiple sockets are opened in different namespaces at
+ * the same time.
+ */
+ get_random_bytes(key, sizeof(u64));
+ mptcp_crypto_key_sha(*key, token, idsn);
+}
+
/**
* mptcp_token_new_request - create new key/idsn/token for subflow_request
* @req: the request socket
@@ -356,7 +368,7 @@ void __init mptcp_token_init(void)
sizeof(struct token_bucket),
0,
20,/* one slot per 1MB of memory */
- 0,
+ HASH_ZERO,
NULL,
&token_mask,
0,
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index a94bb59793f0..5b1f4ec66dd9 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -471,7 +471,7 @@ static int ncsi_rsp_handler_sma(struct ncsi_request *nr)
memcpy(&ncf->addrs[index], cmd->mac, ETH_ALEN);
} else {
clear_bit(cmd->index - 1, bitmap);
- memset(&ncf->addrs[index], 0, ETH_ALEN);
+ eth_zero_addr(&ncf->addrs[index]);
}
spin_unlock_irqrestore(&nc->lock, flags);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 4af83f466dfc..bcac316addab 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2434,7 +2434,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
}
static int
-do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len)
{
struct net *net = sock_net(sk);
int ret;
@@ -2458,7 +2458,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
return -EINVAL;
}
- if (copy_from_user(arg, user, len) != 0)
+ if (copy_from_sockptr(arg, ptr, len) != 0)
return -EFAULT;
/* Handle daemons since they have another lock */
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index 90469b1f628a..34afcd03b6f6 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -89,7 +89,7 @@ out:
return ops;
}
-int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt,
+int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, sockptr_t opt,
unsigned int len)
{
struct nf_sockopt_ops *ops;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 32bab45af7e4..b97eb4b538fd 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1028,9 +1028,9 @@ int xt_check_target(struct xt_tgchk_param *par,
EXPORT_SYMBOL_GPL(xt_check_target);
/**
- * xt_copy_counters_from_user - copy counters and metadata from userspace
+ * xt_copy_counters - copy counters and metadata from a sockptr_t
*
- * @user: src pointer to userspace memory
+ * @arg: src sockptr
* @len: alleged size of userspace memory
* @info: where to store the xt_counters_info metadata
*
@@ -1047,8 +1047,8 @@ EXPORT_SYMBOL_GPL(xt_check_target);
* Return: returns pointer that caller has to test via IS_ERR().
* If IS_ERR is false, caller has to vfree the pointer.
*/
-void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
- struct xt_counters_info *info)
+void *xt_copy_counters(sockptr_t arg, unsigned int len,
+ struct xt_counters_info *info)
{
void *mem;
u64 size;
@@ -1062,12 +1062,12 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
return ERR_PTR(-EINVAL);
len -= sizeof(compat_tmp);
- if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0)
+ if (copy_from_sockptr(&compat_tmp, arg, sizeof(compat_tmp)) != 0)
return ERR_PTR(-EFAULT);
memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1);
info->num_counters = compat_tmp.num_counters;
- user += sizeof(compat_tmp);
+ sockptr_advance(arg, sizeof(compat_tmp));
} else
#endif
{
@@ -1075,10 +1075,10 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
return ERR_PTR(-EINVAL);
len -= sizeof(*info);
- if (copy_from_user(info, user, sizeof(*info)) != 0)
+ if (copy_from_sockptr(info, arg, sizeof(*info)) != 0)
return ERR_PTR(-EFAULT);
- user += sizeof(*info);
+ sockptr_advance(arg, sizeof(*info));
}
info->name[sizeof(info->name) - 1] = '\0';
@@ -1092,13 +1092,13 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
if (!mem)
return ERR_PTR(-ENOMEM);
- if (copy_from_user(mem, user, len) == 0)
+ if (copy_from_sockptr(mem, arg, len) == 0)
return mem;
vfree(mem);
return ERR_PTR(-EFAULT);
}
-EXPORT_SYMBOL_GPL(xt_copy_counters_from_user);
+EXPORT_SYMBOL_GPL(xt_copy_counters);
#ifdef CONFIG_COMPAT
int xt_compat_target_offset(const struct xt_target *target)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4f2c3b14ddbf..d8921b833744 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -60,6 +60,7 @@
#include <linux/genetlink.h>
#include <linux/net_namespace.h>
#include <linux/nospec.h>
+#include <linux/btf_ids.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -1620,7 +1621,7 @@ static void netlink_update_socket_mc(struct netlink_sock *nlk,
}
static int netlink_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
@@ -1631,7 +1632,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
return -ENOPROTOOPT;
if (optlen >= sizeof(int) &&
- get_user(val, (unsigned int __user *)optval))
+ copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
switch (optname) {
@@ -2803,7 +2804,10 @@ static const struct rhashtable_params netlink_rhashtable_params = {
};
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
-static const struct bpf_iter_reg netlink_reg_info = {
+BTF_ID_LIST(btf_netlink_sock_id)
+BTF_ID(struct, netlink_sock)
+
+static struct bpf_iter_reg netlink_reg_info = {
.target = "netlink",
.seq_ops = &netlink_seq_ops,
.init_seq_private = bpf_iter_init_seq_net,
@@ -2818,6 +2822,7 @@ static const struct bpf_iter_reg netlink_reg_info = {
static int __init bpf_iter_register(void)
{
+ netlink_reg_info.ctx_arg_info[0].btf_id = *btf_netlink_sock_id;
return bpf_iter_reg_target(&netlink_reg_info);
}
#endif
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index f90ef6934b8f..6d16e1ab1a8a 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -294,7 +294,7 @@ void nr_destroy_socket(struct sock *sk)
*/
static int nr_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct nr_sock *nr = nr_sk(sk);
@@ -306,7 +306,7 @@ static int nr_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(unsigned int))
return -EINVAL;
- if (get_user(opt, (unsigned int __user *)optval))
+ if (copy_from_sockptr(&opt, optval, sizeof(unsigned int)))
return -EFAULT;
switch (optname) {
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 6da1e2334bb6..d257ed3b732a 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -218,7 +218,7 @@ error:
}
static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
@@ -241,7 +241,7 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (get_user(opt, (u32 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
err = -EFAULT;
break;
}
@@ -263,7 +263,7 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (get_user(opt, (u32 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
err = -EFAULT;
break;
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 95805f0e27bd..6b6822f82f70 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1655,7 +1655,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_destroy_reply;
ovs_dp_set_net(dp, sock_net(skb->sk));
- INIT_DELAYED_WORK(&dp->masks_rebalance, ovs_dp_masks_rebalance);
/* Allocate table. */
err = ovs_flow_tbl_init(&dp->table);
@@ -1715,9 +1714,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
- schedule_delayed_work(&dp->masks_rebalance,
- msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
-
ovs_unlock();
ovs_notify(&dp_datapath_genl_family, reply, info);
@@ -1762,9 +1758,6 @@ static void __dp_destroy(struct datapath *dp)
/* RCU destroy the flow table */
call_rcu(&dp->rcu, destroy_dp_rcu);
-
- /* Cancel remaining work. */
- cancel_delayed_work_sync(&dp->masks_rebalance);
}
static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
@@ -2349,14 +2342,18 @@ out:
static void ovs_dp_masks_rebalance(struct work_struct *work)
{
- struct datapath *dp = container_of(work, struct datapath,
- masks_rebalance.work);
+ struct ovs_net *ovs_net = container_of(work, struct ovs_net,
+ masks_rebalance.work);
+ struct datapath *dp;
ovs_lock();
- ovs_flow_masks_rebalance(&dp->table);
+
+ list_for_each_entry(dp, &ovs_net->dps, list_node)
+ ovs_flow_masks_rebalance(&dp->table);
+
ovs_unlock();
- schedule_delayed_work(&dp->masks_rebalance,
+ schedule_delayed_work(&ovs_net->masks_rebalance,
msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
}
@@ -2454,6 +2451,9 @@ static int __net_init ovs_init_net(struct net *net)
INIT_LIST_HEAD(&ovs_net->dps);
INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
+ INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance);
+ schedule_delayed_work(&ovs_net->masks_rebalance,
+ msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
return ovs_ct_init(net);
}
@@ -2508,6 +2508,7 @@ static void __net_exit ovs_exit_net(struct net *dnet)
ovs_unlock();
+ cancel_delayed_work_sync(&ovs_net->masks_rebalance);
cancel_work_sync(&ovs_net->dp_notify_work);
}
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 697a2354194b..24fcec22fde2 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -84,9 +84,6 @@ struct datapath {
/* Switch meters. */
struct dp_meter_table meter_tbl;
-
- /* re-balance flow masks timer */
- struct delayed_work masks_rebalance;
};
/**
@@ -135,6 +132,7 @@ struct dp_upcall_info {
struct ovs_net {
struct list_head dps;
struct work_struct dp_notify_work;
+ struct delayed_work masks_rebalance;
#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
struct ovs_ct_limit_info *ct_limit_info;
#endif
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c240fb5de3f0..0b8160d1a6e0 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1536,7 +1536,7 @@ static void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new)
}
}
-static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
+static int fanout_set_data_cbpf(struct packet_sock *po, sockptr_t data,
unsigned int len)
{
struct bpf_prog *new;
@@ -1558,7 +1558,7 @@ static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
return 0;
}
-static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
+static int fanout_set_data_ebpf(struct packet_sock *po, sockptr_t data,
unsigned int len)
{
struct bpf_prog *new;
@@ -1568,7 +1568,7 @@ static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
return -EPERM;
if (len != sizeof(fd))
return -EINVAL;
- if (copy_from_user(&fd, data, len))
+ if (copy_from_sockptr(&fd, data, len))
return -EFAULT;
new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
@@ -1579,7 +1579,7 @@ static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
return 0;
}
-static int fanout_set_data(struct packet_sock *po, char __user *data,
+static int fanout_set_data(struct packet_sock *po, sockptr_t data,
unsigned int len)
{
switch (po->fanout->type) {
@@ -3652,7 +3652,8 @@ static void packet_flush_mclist(struct sock *sk)
}
static int
-packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
+packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
+ unsigned int optlen)
{
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
@@ -3672,7 +3673,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
return -EINVAL;
if (len > sizeof(mreq))
len = sizeof(mreq);
- if (copy_from_user(&mreq, optval, len))
+ if (copy_from_sockptr(&mreq, optval, len))
return -EFAULT;
if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
return -EINVAL;
@@ -3703,7 +3704,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen < len) {
ret = -EINVAL;
} else {
- if (copy_from_user(&req_u.req, optval, len))
+ if (copy_from_sockptr(&req_u.req, optval, len))
ret = -EFAULT;
else
ret = packet_set_ring(sk, &req_u, 0,
@@ -3718,7 +3719,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
pkt_sk(sk)->copy_thresh = val;
@@ -3730,7 +3731,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
switch (val) {
case TPACKET_V1:
@@ -3756,7 +3757,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
if (val > INT_MAX)
return -EINVAL;
@@ -3776,7 +3777,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
lock_sock(sk);
@@ -3795,7 +3796,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen < sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
lock_sock(sk);
@@ -3809,7 +3810,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen < sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
lock_sock(sk);
@@ -3825,7 +3826,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
return -EINVAL;
if (optlen < sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
lock_sock(sk);
@@ -3844,7 +3845,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
po->tp_tstamp = val;
@@ -3856,7 +3857,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
return fanout_add(sk, val & 0xffff, val >> 16);
@@ -3874,7 +3875,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
if (val < 0 || val > 1)
return -EINVAL;
@@ -3888,7 +3889,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
lock_sock(sk);
@@ -3907,7 +3908,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
po->xmit = val ? packet_direct_xmit : dev_queue_xmit;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 4577e43cb777..e47d09aca4af 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -975,7 +975,7 @@ static int pep_init(struct sock *sk)
}
static int pep_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct pep_sock *pn = pep_sk(sk);
int val = 0, err = 0;
@@ -983,7 +983,7 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
if (level != SOL_PNPIPE)
return -ENOPROTOOPT;
if (optlen >= sizeof(int)) {
- if (get_user(val, (int __user *) optval))
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
return -EFAULT;
}
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 1a5bf3fa4578..b239120dd9ca 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -290,8 +290,7 @@ static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
return 0;
}
-static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
- int len)
+static int rds_cancel_sent_to(struct rds_sock *rs, sockptr_t optval, int len)
{
struct sockaddr_in6 sin6;
struct sockaddr_in sin;
@@ -308,14 +307,15 @@ static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
goto out;
} else if (len < sizeof(struct sockaddr_in6)) {
/* Assume IPv4 */
- if (copy_from_user(&sin, optval, sizeof(struct sockaddr_in))) {
+ if (copy_from_sockptr(&sin, optval,
+ sizeof(struct sockaddr_in))) {
ret = -EFAULT;
goto out;
}
ipv6_addr_set_v4mapped(sin.sin_addr.s_addr, &sin6.sin6_addr);
sin6.sin6_port = sin.sin_port;
} else {
- if (copy_from_user(&sin6, optval,
+ if (copy_from_sockptr(&sin6, optval,
sizeof(struct sockaddr_in6))) {
ret = -EFAULT;
goto out;
@@ -327,21 +327,20 @@ out:
return ret;
}
-static int rds_set_bool_option(unsigned char *optvar, char __user *optval,
+static int rds_set_bool_option(unsigned char *optvar, sockptr_t optval,
int optlen)
{
int value;
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(value, (int __user *) optval))
+ if (copy_from_sockptr(&value, optval, sizeof(int)))
return -EFAULT;
*optvar = !!value;
return 0;
}
-static int rds_cong_monitor(struct rds_sock *rs, char __user *optval,
- int optlen)
+static int rds_cong_monitor(struct rds_sock *rs, sockptr_t optval, int optlen)
{
int ret;
@@ -358,8 +357,7 @@ static int rds_cong_monitor(struct rds_sock *rs, char __user *optval,
return ret;
}
-static int rds_set_transport(struct rds_sock *rs, char __user *optval,
- int optlen)
+static int rds_set_transport(struct rds_sock *rs, sockptr_t optval, int optlen)
{
int t_type;
@@ -369,7 +367,7 @@ static int rds_set_transport(struct rds_sock *rs, char __user *optval,
if (optlen != sizeof(int))
return -EINVAL;
- if (copy_from_user(&t_type, (int __user *)optval, sizeof(t_type)))
+ if (copy_from_sockptr(&t_type, optval, sizeof(t_type)))
return -EFAULT;
if (t_type < 0 || t_type >= RDS_TRANS_COUNT)
@@ -380,7 +378,7 @@ static int rds_set_transport(struct rds_sock *rs, char __user *optval,
return rs->rs_transport ? 0 : -ENOPROTOOPT;
}
-static int rds_enable_recvtstamp(struct sock *sk, char __user *optval,
+static int rds_enable_recvtstamp(struct sock *sk, sockptr_t optval,
int optlen, int optname)
{
int val, valbool;
@@ -388,7 +386,7 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval,
if (optlen != sizeof(int))
return -EFAULT;
- if (get_user(val, (int __user *)optval))
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
return -EFAULT;
valbool = val ? 1 : 0;
@@ -404,7 +402,7 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval,
return 0;
}
-static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval,
+static int rds_recv_track_latency(struct rds_sock *rs, sockptr_t optval,
int optlen)
{
struct rds_rx_trace_so trace;
@@ -413,7 +411,7 @@ static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval,
if (optlen != sizeof(struct rds_rx_trace_so))
return -EFAULT;
- if (copy_from_user(&trace, optval, sizeof(trace)))
+ if (copy_from_sockptr(&trace, optval, sizeof(trace)))
return -EFAULT;
if (trace.rx_traces > RDS_MSG_RX_DGRAM_TRACE_MAX)
@@ -432,7 +430,7 @@ static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval,
}
static int rds_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct rds_sock *rs = rds_sk_to_rs(sock->sk);
int ret;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index a7ae11846cd7..ccdd304eae0a 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -353,21 +353,20 @@ out:
return ret;
}
-int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen)
+int rds_get_mr(struct rds_sock *rs, sockptr_t optval, int optlen)
{
struct rds_get_mr_args args;
if (optlen != sizeof(struct rds_get_mr_args))
return -EINVAL;
- if (copy_from_user(&args, (struct rds_get_mr_args __user *)optval,
- sizeof(struct rds_get_mr_args)))
+ if (copy_from_sockptr(&args, optval, sizeof(struct rds_get_mr_args)))
return -EFAULT;
return __rds_rdma_map(rs, &args, NULL, NULL, NULL);
}
-int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen)
+int rds_get_mr_for_dest(struct rds_sock *rs, sockptr_t optval, int optlen)
{
struct rds_get_mr_for_dest_args args;
struct rds_get_mr_args new_args;
@@ -375,7 +374,7 @@ int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen)
if (optlen != sizeof(struct rds_get_mr_for_dest_args))
return -EINVAL;
- if (copy_from_user(&args, (struct rds_get_mr_for_dest_args __user *)optval,
+ if (copy_from_sockptr(&args, optval,
sizeof(struct rds_get_mr_for_dest_args)))
return -EFAULT;
@@ -394,7 +393,7 @@ int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen)
/*
* Free the MR indicated by the given R_Key
*/
-int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen)
+int rds_free_mr(struct rds_sock *rs, sockptr_t optval, int optlen)
{
struct rds_free_mr_args args;
struct rds_mr *mr;
@@ -403,8 +402,7 @@ int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen)
if (optlen != sizeof(struct rds_free_mr_args))
return -EINVAL;
- if (copy_from_user(&args, (struct rds_free_mr_args __user *)optval,
- sizeof(struct rds_free_mr_args)))
+ if (copy_from_sockptr(&args, optval, sizeof(struct rds_free_mr_args)))
return -EFAULT;
/* Special case - a null cookie means flush all unused MRs */
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 106e862996b9..d35d1fc39807 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -924,9 +924,9 @@ int rds_send_pong(struct rds_conn_path *cp, __be16 dport);
/* rdma.c */
void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force);
-int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen);
-int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen);
-int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen);
+int rds_get_mr(struct rds_sock *rs, sockptr_t optval, int optlen);
+int rds_get_mr_for_dest(struct rds_sock *rs, sockptr_t optval, int optlen);
+int rds_free_mr(struct rds_sock *rs, sockptr_t optval, int optlen);
void rds_rdma_drop_keys(struct rds_sock *rs);
int rds_rdma_extra_size(struct rds_rdma_args *args,
struct rds_iov_vector *iov);
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index ce85656ac9c1..cf7d974e0f61 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -365,7 +365,7 @@ void rose_destroy_socket(struct sock *sk)
*/
static int rose_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct rose_sock *rose = rose_sk(sk);
@@ -377,7 +377,7 @@ static int rose_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(opt, (int __user *)optval))
+ if (copy_from_sockptr(&opt, optval, sizeof(int)))
return -EFAULT;
switch (optname) {
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index cd7d0d204c74..e6725a6de015 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -588,7 +588,7 @@ EXPORT_SYMBOL(rxrpc_sock_set_min_security_level);
* set RxRPC socket options
*/
static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
unsigned int min_sec_level;
@@ -639,8 +639,8 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
ret = -EISCONN;
if (rx->sk.sk_state != RXRPC_UNBOUND)
goto error;
- ret = get_user(min_sec_level,
- (unsigned int __user *) optval);
+ ret = copy_from_sockptr(&min_sec_level, optval,
+ sizeof(unsigned int));
if (ret < 0)
goto error;
ret = -EINVAL;
@@ -658,7 +658,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
if (rx->sk.sk_state != RXRPC_SERVER_BOUND2)
goto error;
ret = -EFAULT;
- if (copy_from_user(service_upgrade, optval,
+ if (copy_from_sockptr(service_upgrade, optval,
sizeof(service_upgrade)) != 0)
goto error;
ret = -EINVAL;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 9a2139ebd67d..6d29a3603a3e 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -909,8 +909,8 @@ extern const struct rxrpc_security rxrpc_no_security;
extern struct key_type key_type_rxrpc;
extern struct key_type key_type_rxrpc_s;
-int rxrpc_request_key(struct rxrpc_sock *, char __user *, int);
-int rxrpc_server_keyring(struct rxrpc_sock *, char __user *, int);
+int rxrpc_request_key(struct rxrpc_sock *, sockptr_t , int);
+int rxrpc_server_keyring(struct rxrpc_sock *, sockptr_t, int);
int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time64_t,
u32);
diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index 0c98313dd7a8..94c3df392651 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c
@@ -896,7 +896,7 @@ static void rxrpc_describe(const struct key *key, struct seq_file *m)
/*
* grab the security key for a socket
*/
-int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen)
+int rxrpc_request_key(struct rxrpc_sock *rx, sockptr_t optval, int optlen)
{
struct key *key;
char *description;
@@ -906,7 +906,7 @@ int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen)
if (optlen <= 0 || optlen > PAGE_SIZE - 1)
return -EINVAL;
- description = memdup_user_nul(optval, optlen);
+ description = memdup_sockptr_nul(optval, optlen);
if (IS_ERR(description))
return PTR_ERR(description);
@@ -926,8 +926,7 @@ int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen)
/*
* grab the security keyring for a server socket
*/
-int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval,
- int optlen)
+int rxrpc_server_keyring(struct rxrpc_sock *rx, sockptr_t optval, int optlen)
{
struct key *key;
char *description;
@@ -937,7 +936,7 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval,
if (optlen <= 0 || optlen > PAGE_SIZE - 1)
return -EINVAL;
- description = memdup_user_nul(optval, optlen);
+ description = memdup_sockptr_nul(optval, optlen);
if (IS_ERR(description))
return PTR_ERR(description);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index acd8e05c2ba5..a4f7ef1de7e7 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -64,6 +64,7 @@ struct fl_flow_key {
};
} tp_range;
struct flow_dissector_key_ct ct;
+ struct flow_dissector_key_hash hash;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
struct fl_flow_mask_range {
@@ -318,6 +319,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
skb_flow_dissect_ct(skb, &mask->dissector, &skb_key,
fl_ct_info_to_flower_map,
ARRAY_SIZE(fl_ct_info_to_flower_map));
+ skb_flow_dissect_hash(skb, &mask->dissector, &skb_key);
skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);
f = fl_mask_lookup(mask, &skb_key);
@@ -695,6 +697,9 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_CT_LABELS_MASK] = { .type = NLA_BINARY,
.len = 128 / BITS_PER_BYTE },
[TCA_FLOWER_FLAGS] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_HASH] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_HASH_MASK] = { .type = NLA_U32 },
+
};
static const struct nla_policy
@@ -1626,6 +1631,10 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
fl_set_key_ip(tb, true, &key->enc_ip, &mask->enc_ip);
+ fl_set_key_val(tb, &key->hash.hash, TCA_FLOWER_KEY_HASH,
+ &mask->hash.hash, TCA_FLOWER_KEY_HASH_MASK,
+ sizeof(key->hash.hash));
+
if (tb[TCA_FLOWER_KEY_ENC_OPTS]) {
ret = fl_set_enc_opt(tb, key, mask, extack);
if (ret)
@@ -1740,6 +1749,8 @@ static void fl_init_dissector(struct flow_dissector *dissector,
FLOW_DISSECTOR_KEY_ENC_OPTS, enc_opts);
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_CT, ct);
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_HASH, hash);
skb_flow_dissector_init(dissector, keys, cnt);
}
@@ -2960,6 +2971,11 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
goto nla_put_failure;
+ if (fl_dump_key_val(skb, &key->hash.hash, TCA_FLOWER_KEY_HASH,
+ &mask->hash.hash, TCA_FLOWER_KEY_HASH_MASK,
+ sizeof(key->hash.hash)))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 7ecaf7d575c0..d19db22262fd 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1367,15 +1367,15 @@ static struct pernet_operations sctp_ctrlsock_ops = {
/* Initialize the universe into something sensible. */
static __init int sctp_init(void)
{
- int i;
- int status = -EINVAL;
- unsigned long goal;
- unsigned long limit;
unsigned long nr_pages = totalram_pages();
+ unsigned long limit;
+ unsigned long goal;
+ int max_entry_order;
+ int num_entries;
int max_share;
+ int status;
int order;
- int num_entries;
- int max_entry_order;
+ int i;
sock_skb_cb_check_size(sizeof(struct sctp_ulpevent));
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9a767f359718..ec1fba1fbe71 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2749,31 +2749,12 @@ static void sctp_apply_asoc_delayed_ack(struct sctp_sack_info *params,
* timer to expire. The default value for this is 2, setting this
* value to 1 will disable the delayed sack algorithm.
*/
-
-static int sctp_setsockopt_delayed_ack(struct sock *sk,
- struct sctp_sack_info *params,
- unsigned int optlen)
+static int __sctp_setsockopt_delayed_ack(struct sock *sk,
+ struct sctp_sack_info *params)
{
struct sctp_sock *sp = sctp_sk(sk);
struct sctp_association *asoc;
- if (optlen == sizeof(struct sctp_sack_info)) {
- if (params->sack_delay == 0 && params->sack_freq == 0)
- return 0;
- } else if (optlen == sizeof(struct sctp_assoc_value)) {
- pr_warn_ratelimited(DEPRECATED
- "%s (pid %d) "
- "Use of struct sctp_assoc_value in delayed_ack socket option.\n"
- "Use struct sctp_sack_info instead\n",
- current->comm, task_pid_nr(current));
-
- if (params->sack_delay == 0)
- params->sack_freq = 1;
- else
- params->sack_freq = 0;
- } else
- return -EINVAL;
-
/* Validate value parameter. */
if (params->sack_delay > 500)
return -EINVAL;
@@ -2821,6 +2802,33 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk,
return 0;
}
+static int sctp_setsockopt_delayed_ack(struct sock *sk,
+ struct sctp_sack_info *params,
+ unsigned int optlen)
+{
+ if (optlen == sizeof(struct sctp_assoc_value)) {
+ struct sctp_assoc_value *v = (struct sctp_assoc_value *)params;
+ struct sctp_sack_info p;
+
+ pr_warn_ratelimited(DEPRECATED
+ "%s (pid %d) "
+ "Use of struct sctp_assoc_value in delayed_ack socket option.\n"
+ "Use struct sctp_sack_info instead\n",
+ current->comm, task_pid_nr(current));
+
+ p.sack_assoc_id = v->assoc_id;
+ p.sack_delay = v->assoc_value;
+ p.sack_freq = v->assoc_value ? 0 : 1;
+ return __sctp_setsockopt_delayed_ack(sk, &p);
+ }
+
+ if (optlen != sizeof(struct sctp_sack_info))
+ return -EINVAL;
+ if (params->sack_delay == 0 && params->sack_freq == 0)
+ return 0;
+ return __sctp_setsockopt_delayed_ack(sk, params);
+}
+
/* 7.1.3 Initialization Parameters (SCTP_INITMSG)
*
* Applications can specify protocol parameters for the default association
@@ -4429,7 +4437,7 @@ out:
* optlen - the size of the buffer.
*/
static int sctp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
void *kopt = NULL;
int retval = 0;
@@ -4449,7 +4457,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
}
if (optlen > 0) {
- kopt = memdup_user(optval, optlen);
+ kopt = memdup_sockptr(optval, optlen);
if (IS_ERR(kopt))
return PTR_ERR(kopt);
}
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 9711c9e0e515..4ac1d4de6676 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1731,7 +1731,7 @@ out:
}
static int smc_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct smc_sock *smc;
@@ -1754,7 +1754,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
return -EFAULT;
lock_sock(sk);
diff --git a/net/socket.c b/net/socket.c
index 93846568c2fb..94ca4547cd7c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2094,10 +2094,10 @@ static bool sock_use_custom_sol_socket(const struct socket *sock)
* Set a socket option. Because we don't know the option lengths we have
* to pass the user mode parameter for the protocols to sort out.
*/
-int __sys_setsockopt(int fd, int level, int optname, char __user *optval,
+int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
int optlen)
{
- mm_segment_t oldfs = get_fs();
+ sockptr_t optval;
char *kernel_optval = NULL;
int err, fput_needed;
struct socket *sock;
@@ -2105,6 +2105,10 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *optval,
if (optlen < 0)
return -EINVAL;
+ err = init_user_sockptr(&optval, user_optval);
+ if (err)
+ return err;
+
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
return err;
@@ -2115,7 +2119,7 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *optval,
if (!in_compat_syscall())
err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
- optval, &optlen,
+ user_optval, &optlen,
&kernel_optval);
if (err < 0)
goto out_put;
@@ -2124,11 +2128,8 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *optval,
goto out_put;
}
- if (kernel_optval) {
- set_fs(KERNEL_DS);
- optval = (char __user __force *)kernel_optval;
- }
-
+ if (kernel_optval)
+ optval = KERNEL_SOCKPTR(kernel_optval);
if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
err = sock_setsockopt(sock, level, optname, optval, optlen);
else if (unlikely(!sock->ops->setsockopt))
@@ -2136,12 +2137,7 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *optval,
else
err = sock->ops->setsockopt(sock, level, optname, optval,
optlen);
-
- if (kernel_optval) {
- set_fs(oldfs);
- kfree(kernel_optval);
- }
-
+ kfree(kernel_optval);
out_put:
fput_light(sock->file, fput_needed);
return err;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index fc388cef6471..07419f36116a 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3103,7 +3103,7 @@ static int tipc_sk_leave(struct tipc_sock *tsk)
* Returns 0 on success, errno otherwise
*/
static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
- char __user *ov, unsigned int ol)
+ sockptr_t ov, unsigned int ol)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
@@ -3124,17 +3124,17 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
case TIPC_NODELAY:
if (ol < sizeof(value))
return -EINVAL;
- if (get_user(value, (u32 __user *)ov))
+ if (copy_from_sockptr(&value, ov, sizeof(u32)))
return -EFAULT;
break;
case TIPC_GROUP_JOIN:
if (ol < sizeof(mreq))
return -EINVAL;
- if (copy_from_user(&mreq, ov, sizeof(mreq)))
+ if (copy_from_sockptr(&mreq, ov, sizeof(mreq)))
return -EFAULT;
break;
default:
- if (ov || ol)
+ if (!sockptr_is_null(ov) || ol)
return -EINVAL;
}
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index ec10041c6b7d..d77f7d821130 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -450,7 +450,7 @@ static int tls_getsockopt(struct sock *sk, int level, int optname,
return do_tls_getsockopt(sk, optname, optval, optlen);
}
-static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
+static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
unsigned int optlen, int tx)
{
struct tls_crypto_info *crypto_info;
@@ -460,7 +460,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
int rc = 0;
int conf;
- if (!optval || (optlen < sizeof(*crypto_info))) {
+ if (sockptr_is_null(optval) || (optlen < sizeof(*crypto_info))) {
rc = -EINVAL;
goto out;
}
@@ -479,7 +479,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
goto out;
}
- rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info));
+ rc = copy_from_sockptr(crypto_info, optval, sizeof(*crypto_info));
if (rc) {
rc = -EFAULT;
goto err_crypto_info;
@@ -522,8 +522,9 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
goto err_crypto_info;
}
- rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info),
- optlen - sizeof(*crypto_info));
+ sockptr_advance(optval, sizeof(*crypto_info));
+ rc = copy_from_sockptr(crypto_info + 1, optval,
+ optlen - sizeof(*crypto_info));
if (rc) {
rc = -EFAULT;
goto err_crypto_info;
@@ -579,8 +580,8 @@ out:
return rc;
}
-static int do_tls_setsockopt(struct sock *sk, int optname,
- char __user *optval, unsigned int optlen)
+static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval,
+ unsigned int optlen)
{
int rc = 0;
@@ -600,7 +601,7 @@ static int do_tls_setsockopt(struct sock *sk, int optname,
}
static int tls_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct tls_context *ctx = tls_get_ctx(sk);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index df204c6761c4..27bbcfad9c17 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1517,7 +1517,7 @@ static void vsock_update_buffer_size(struct vsock_sock *vsk,
static int vsock_stream_setsockopt(struct socket *sock,
int level,
int optname,
- char __user *optval,
+ sockptr_t optval,
unsigned int optlen)
{
int err;
@@ -1535,7 +1535,7 @@ static int vsock_stream_setsockopt(struct socket *sock,
err = -EINVAL; \
goto exit; \
} \
- if (copy_from_user(&_v, optval, sizeof(_v)) != 0) { \
+ if (copy_from_sockptr(&_v, optval, sizeof(_v)) != 0) { \
err = -EFAULT; \
goto exit; \
} \
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index d5b09bbff375..0bbb283f23c9 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -431,7 +431,7 @@ void x25_destroy_socket_from_timer(struct sock *sk)
*/
static int x25_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
int opt;
struct sock *sk = sock->sk;
@@ -445,7 +445,7 @@ static int x25_setsockopt(struct socket *sock, int level, int optname,
goto out;
rc = -EFAULT;
- if (get_user(opt, (int __user *)optval))
+ if (copy_from_sockptr(&opt, optval, sizeof(int)))
goto out;
if (opt)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 26e3bba8c204..2e94a7e94671 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -702,7 +702,7 @@ struct xdp_umem_reg_v1 {
};
static int xsk_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
@@ -720,7 +720,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(entries))
return -EINVAL;
- if (copy_from_user(&entries, optval, sizeof(entries)))
+ if (copy_from_sockptr(&entries, optval, sizeof(entries)))
return -EFAULT;
mutex_lock(&xs->mutex);
@@ -747,7 +747,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
else if (optlen < sizeof(mr))
mr_size = sizeof(struct xdp_umem_reg_v1);
- if (copy_from_user(&mr, optval, mr_size))
+ if (copy_from_sockptr(&mr, optval, mr_size))
return -EFAULT;
mutex_lock(&xs->mutex);
@@ -774,7 +774,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xsk_queue **q;
int entries;
- if (copy_from_user(&entries, optval, sizeof(entries)))
+ if (copy_from_sockptr(&entries, optval, sizeof(entries)))
return -EFAULT;
mutex_lock(&xs->mutex);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 8be2d926acc2..69520ad3d83b 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2264,7 +2264,7 @@ static bool km_is_alive(const struct km_event *c)
return is_alive;
}
-int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
+int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen)
{
int err;
u8 *data;
@@ -2274,7 +2274,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
if (in_compat_syscall())
return -EOPNOTSUPP;
- if (!optval && !optlen) {
+ if (sockptr_is_null(optval) && !optlen) {
xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
__sk_dst_reset(sk);
@@ -2284,7 +2284,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
if (optlen <= 0 || optlen > PAGE_SIZE)
return -EMSGSIZE;
- data = memdup_user(optval, optlen);
+ data = memdup_sockptr(optval, optlen);
if (IS_ERR(data))
return PTR_ERR(data);
diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c
index d459f73412a4..e74ee1cd4b9c 100644
--- a/samples/bpf/offwaketime_kern.c
+++ b/samples/bpf/offwaketime_kern.c
@@ -12,7 +12,12 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#define _(P) ({typeof(P) val; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define _(P) \
+ ({ \
+ typeof(P) val; \
+ bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
+ val; \
+ })
#define MINBLOCK_US 1
diff --git a/samples/bpf/test_overhead_kprobe_kern.c b/samples/bpf/test_overhead_kprobe_kern.c
index 8b811c29dc79..f6d593e47037 100644
--- a/samples/bpf/test_overhead_kprobe_kern.c
+++ b/samples/bpf/test_overhead_kprobe_kern.c
@@ -10,7 +10,12 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define _(P) \
+ ({ \
+ typeof(P) val = 0; \
+ bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
+ val; \
+ })
SEC("kprobe/__set_task_comm")
int prog(struct pt_regs *ctx)
@@ -25,8 +30,9 @@ int prog(struct pt_regs *ctx)
tsk = (void *)PT_REGS_PARM1(ctx);
pid = _(tsk->pid);
- bpf_probe_read(oldcomm, sizeof(oldcomm), &tsk->comm);
- bpf_probe_read(newcomm, sizeof(newcomm), (void *)PT_REGS_PARM2(ctx));
+ bpf_probe_read_kernel(oldcomm, sizeof(oldcomm), &tsk->comm);
+ bpf_probe_read_kernel(newcomm, sizeof(newcomm),
+ (void *)PT_REGS_PARM2(ctx));
signal = _(tsk->signal);
oom_score_adj = _(signal->oom_score_adj);
return 0;
diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c
index 8e2610e14475..3f4599c9a202 100644
--- a/samples/bpf/tracex1_kern.c
+++ b/samples/bpf/tracex1_kern.c
@@ -11,7 +11,12 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define _(P) \
+ ({ \
+ typeof(P) val = 0; \
+ bpf_probe_read_kernel(&val, sizeof(val), &(P)); \
+ val; \
+ })
/* kprobe is NOT a stable ABI
* kernel functions can be removed, renamed or completely change semantics.
@@ -34,7 +39,7 @@ int bpf_prog1(struct pt_regs *ctx)
dev = _(skb->dev);
len = _(skb->len);
- bpf_probe_read(devname, sizeof(devname), dev->name);
+ bpf_probe_read_kernel(devname, sizeof(devname), dev->name);
if (devname[0] == 'l' && devname[1] == 'o') {
char fmt[] = "skb %p len %d\n";
diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c
index 32b49e8ab6bd..64a1f7550d7e 100644
--- a/samples/bpf/tracex5_kern.c
+++ b/samples/bpf/tracex5_kern.c
@@ -47,7 +47,7 @@ PROG(SYS__NR_write)(struct pt_regs *ctx)
{
struct seccomp_data sd;
- bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
+ bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
if (sd.args[2] == 512) {
char fmt[] = "write(fd=%d, buf=%p, size=%d)\n";
bpf_trace_printk(fmt, sizeof(fmt),
@@ -60,7 +60,7 @@ PROG(SYS__NR_read)(struct pt_regs *ctx)
{
struct seccomp_data sd;
- bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
+ bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
if (sd.args[2] > 128 && sd.args[2] <= 1024) {
char fmt[] = "read(fd=%d, buf=%p, size=%d)\n";
bpf_trace_printk(fmt, sizeof(fmt),
diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c
index 2baf8db1f7e7..8255025dea97 100644
--- a/samples/bpf/xdp_redirect_cpu_kern.c
+++ b/samples/bpf/xdp_redirect_cpu_kern.c
@@ -21,7 +21,7 @@
struct {
__uint(type, BPF_MAP_TYPE_CPUMAP);
__uint(key_size, sizeof(u32));
- __uint(value_size, sizeof(u32));
+ __uint(value_size, sizeof(struct bpf_cpumap_val));
__uint(max_entries, MAX_CPUS);
} cpu_map SEC(".maps");
@@ -30,6 +30,9 @@ struct datarec {
__u64 processed;
__u64 dropped;
__u64 issue;
+ __u64 xdp_pass;
+ __u64 xdp_drop;
+ __u64 xdp_redirect;
};
/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
@@ -692,13 +695,16 @@ int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
* Code in: kernel/include/trace/events/xdp.h
*/
struct cpumap_kthread_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int map_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int cpu; // offset:16; size:4; signed:1;
- unsigned int drops; // offset:20; size:4; signed:0;
- unsigned int processed; // offset:24; size:4; signed:0;
- int sched; // offset:28; size:4; signed:1;
+ u64 __pad; // First 8 bytes are not accessible
+ int map_id; // offset:8; size:4; signed:1;
+ u32 act; // offset:12; size:4; signed:0;
+ int cpu; // offset:16; size:4; signed:1;
+ unsigned int drops; // offset:20; size:4; signed:0;
+ unsigned int processed; // offset:24; size:4; signed:0;
+ int sched; // offset:28; size:4; signed:1;
+ unsigned int xdp_pass; // offset:32; size:4; signed:0;
+ unsigned int xdp_drop; // offset:36; size:4; signed:0;
+ unsigned int xdp_redirect; // offset:40; size:4; signed:0;
};
SEC("tracepoint/xdp/xdp_cpumap_kthread")
@@ -712,6 +718,9 @@ int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
return 0;
rec->processed += ctx->processed;
rec->dropped += ctx->drops;
+ rec->xdp_pass += ctx->xdp_pass;
+ rec->xdp_drop += ctx->xdp_drop;
+ rec->xdp_redirect += ctx->xdp_redirect;
/* Count times kthread yielded CPU via schedule call */
if (ctx->sched)
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index f4e755e0dd73..004c0622c913 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -70,6 +70,11 @@ static const struct option long_options[] = {
{"stress-mode", no_argument, NULL, 'x' },
{"no-separators", no_argument, NULL, 'z' },
{"force", no_argument, NULL, 'F' },
+ {"mprog-disable", no_argument, NULL, 'n' },
+ {"mprog-name", required_argument, NULL, 'e' },
+ {"mprog-filename", required_argument, NULL, 'f' },
+ {"redirect-device", required_argument, NULL, 'r' },
+ {"redirect-map", required_argument, NULL, 'm' },
{0, 0, NULL, 0 }
};
@@ -156,6 +161,9 @@ struct datarec {
__u64 processed;
__u64 dropped;
__u64 issue;
+ __u64 xdp_pass;
+ __u64 xdp_drop;
+ __u64 xdp_redirect;
};
struct record {
__u64 timestamp;
@@ -175,6 +183,9 @@ static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
/* For percpu maps, userspace gets a value per possible CPU */
unsigned int nr_cpus = bpf_num_possible_cpus();
struct datarec values[nr_cpus];
+ __u64 sum_xdp_redirect = 0;
+ __u64 sum_xdp_pass = 0;
+ __u64 sum_xdp_drop = 0;
__u64 sum_processed = 0;
__u64 sum_dropped = 0;
__u64 sum_issue = 0;
@@ -196,10 +207,19 @@ static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
sum_dropped += values[i].dropped;
rec->cpu[i].issue = values[i].issue;
sum_issue += values[i].issue;
+ rec->cpu[i].xdp_pass = values[i].xdp_pass;
+ sum_xdp_pass += values[i].xdp_pass;
+ rec->cpu[i].xdp_drop = values[i].xdp_drop;
+ sum_xdp_drop += values[i].xdp_drop;
+ rec->cpu[i].xdp_redirect = values[i].xdp_redirect;
+ sum_xdp_redirect += values[i].xdp_redirect;
}
rec->total.processed = sum_processed;
rec->total.dropped = sum_dropped;
rec->total.issue = sum_issue;
+ rec->total.xdp_pass = sum_xdp_pass;
+ rec->total.xdp_drop = sum_xdp_drop;
+ rec->total.xdp_redirect = sum_xdp_redirect;
return true;
}
@@ -300,17 +320,33 @@ static __u64 calc_errs_pps(struct datarec *r,
return pps;
}
+static void calc_xdp_pps(struct datarec *r, struct datarec *p,
+ double *xdp_pass, double *xdp_drop,
+ double *xdp_redirect, double period_)
+{
+ *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0;
+ if (period_ > 0) {
+ *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_;
+ *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_;
+ *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_;
+ }
+}
+
static void stats_print(struct stats_record *stats_rec,
struct stats_record *stats_prev,
- char *prog_name)
+ char *prog_name, char *mprog_name, int mprog_fd)
{
unsigned int nr_cpus = bpf_num_possible_cpus();
double pps = 0, drop = 0, err = 0;
+ bool mprog_enabled = false;
struct record *rec, *prev;
int to_cpu;
double t;
int i;
+ if (mprog_fd > 0)
+ mprog_enabled = true;
+
/* Header */
printf("Running XDP/eBPF prog_name:%s\n", prog_name);
printf("%-15s %-7s %-14s %-11s %-9s\n",
@@ -455,6 +491,34 @@ static void stats_print(struct stats_record *stats_rec,
printf(fm2_err, "xdp_exception", "total", pps, drop);
}
+ /* CPUMAP attached XDP program that runs on remote/destination CPU */
+ if (mprog_enabled) {
+ char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f\n";
+ char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f\n";
+ double xdp_pass, xdp_drop, xdp_redirect;
+
+ printf("\n2nd remote XDP/eBPF prog_name: %s\n", mprog_name);
+ printf("%-15s %-7s %-14s %-11s %-9s\n",
+ "XDP-cpumap", "CPU:to", "xdp-pass", "xdp-drop", "xdp-redir");
+
+ rec = &stats_rec->kthread;
+ prev = &stats_prev->kthread;
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ calc_xdp_pps(r, p, &xdp_pass, &xdp_drop,
+ &xdp_redirect, t);
+ if (xdp_pass > 0 || xdp_drop > 0 || xdp_redirect > 0)
+ printf(fmt_k, "xdp-in-kthread", i, xdp_pass, xdp_drop,
+ xdp_redirect);
+ }
+ calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop,
+ &xdp_redirect, t);
+ printf(fm2_k, "xdp-in-kthread", "total", xdp_pass, xdp_drop, xdp_redirect);
+ }
+
printf("\n");
fflush(stdout);
}
@@ -491,7 +555,7 @@ static inline void swap(struct stats_record **a, struct stats_record **b)
*b = tmp;
}
-static int create_cpu_entry(__u32 cpu, __u32 queue_size,
+static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
__u32 avail_idx, bool new)
{
__u32 curr_cpus_count = 0;
@@ -501,7 +565,7 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size,
/* Add a CPU entry to cpumap, as this allocate a cpu entry in
* the kernel for the cpu.
*/
- ret = bpf_map_update_elem(cpu_map_fd, &cpu, &queue_size, 0);
+ ret = bpf_map_update_elem(cpu_map_fd, &cpu, value, 0);
if (ret) {
fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret);
exit(EXIT_FAIL_BPF);
@@ -532,9 +596,9 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size,
}
}
/* map_fd[7] = cpus_iterator */
- printf("%s CPU:%u as idx:%u queue_size:%d (total cpus_count:%u)\n",
+ printf("%s CPU:%u as idx:%u qsize:%d prog_fd: %d (cpus_count:%u)\n",
new ? "Add-new":"Replace", cpu, avail_idx,
- queue_size, curr_cpus_count);
+ value->qsize, value->bpf_prog.fd, curr_cpus_count);
return 0;
}
@@ -558,21 +622,26 @@ static void mark_cpus_unavailable(void)
}
/* Stress cpumap management code by concurrently changing underlying cpumap */
-static void stress_cpumap(void)
+static void stress_cpumap(struct bpf_cpumap_val *value)
{
/* Changing qsize will cause kernel to free and alloc a new
* bpf_cpu_map_entry, with an associated/complicated tear-down
* procedure.
*/
- create_cpu_entry(1, 1024, 0, false);
- create_cpu_entry(1, 8, 0, false);
- create_cpu_entry(1, 16000, 0, false);
+ value->qsize = 1024;
+ create_cpu_entry(1, value, 0, false);
+ value->qsize = 8;
+ create_cpu_entry(1, value, 0, false);
+ value->qsize = 16000;
+ create_cpu_entry(1, value, 0, false);
}
static void stats_poll(int interval, bool use_separators, char *prog_name,
+ char *mprog_name, struct bpf_cpumap_val *value,
bool stress_mode)
{
struct stats_record *record, *prev;
+ int mprog_fd;
record = alloc_stats_record();
prev = alloc_stats_record();
@@ -584,11 +653,12 @@ static void stats_poll(int interval, bool use_separators, char *prog_name,
while (1) {
swap(&prev, &record);
+ mprog_fd = value->bpf_prog.fd;
stats_collect(record);
- stats_print(record, prev, prog_name);
+ stats_print(record, prev, prog_name, mprog_name, mprog_fd);
sleep(interval);
if (stress_mode)
- stress_cpumap();
+ stress_cpumap(value);
}
free_stats_record(record);
@@ -661,15 +731,66 @@ static int init_map_fds(struct bpf_object *obj)
return 0;
}
+static int load_cpumap_prog(char *file_name, char *prog_name,
+ char *redir_interface, char *redir_map)
+{
+ struct bpf_prog_load_attr prog_load_attr = {
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .expected_attach_type = BPF_XDP_CPUMAP,
+ .file = file_name,
+ };
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int fd;
+
+ if (bpf_prog_load_xattr(&prog_load_attr, &obj, &fd))
+ return -1;
+
+ if (fd < 0) {
+ fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
+ strerror(errno));
+ return fd;
+ }
+
+ if (redir_interface && redir_map) {
+ int err, map_fd, ifindex_out, key = 0;
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, redir_map);
+ if (map_fd < 0)
+ return map_fd;
+
+ ifindex_out = if_nametoindex(redir_interface);
+ if (!ifindex_out)
+ return -1;
+
+ err = bpf_map_update_elem(map_fd, &key, &ifindex_out, 0);
+ if (err < 0)
+ return err;
+ }
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (!prog) {
+ fprintf(stderr, "bpf_object__find_program_by_title failed\n");
+ return EXIT_FAIL;
+ }
+
+ return bpf_program__fd(prog);
+}
+
int main(int argc, char **argv)
{
struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs";
+ char *mprog_filename = "xdp_redirect_kern.o";
+ char *redir_interface = NULL, *redir_map = NULL;
+ char *mprog_name = "xdp_redirect_dummy";
+ bool mprog_disable = false;
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_UNSPEC,
};
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
+ struct bpf_cpumap_val value;
bool use_separators = true;
bool stress_mode = false;
struct bpf_program *prog;
@@ -681,6 +802,7 @@ int main(int argc, char **argv)
int add_cpu = -1;
int opt, err;
int prog_fd;
+ int *cpu, i;
__u32 qsize;
n_cpus = get_nprocs_conf();
@@ -716,8 +838,15 @@ int main(int argc, char **argv)
}
mark_cpus_unavailable();
+ cpu = malloc(n_cpus * sizeof(int));
+ if (!cpu) {
+ fprintf(stderr, "failed to allocate cpu array\n");
+ return EXIT_FAIL;
+ }
+ memset(cpu, 0, n_cpus * sizeof(int));
+
/* Parse commands line args */
- while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzF",
+ while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:",
long_options, &longindex)) != -1) {
switch (opt) {
case 'd':
@@ -751,6 +880,21 @@ int main(int argc, char **argv)
/* Selecting eBPF prog to load */
prog_name = optarg;
break;
+ case 'n':
+ mprog_disable = true;
+ break;
+ case 'f':
+ mprog_filename = optarg;
+ break;
+ case 'e':
+ mprog_name = optarg;
+ break;
+ case 'r':
+ redir_interface = optarg;
+ break;
+ case 'm':
+ redir_map = optarg;
+ break;
case 'c':
/* Add multiple CPUs */
add_cpu = strtoul(optarg, NULL, 0);
@@ -760,8 +904,7 @@ int main(int argc, char **argv)
errno, strerror(errno));
goto error;
}
- create_cpu_entry(add_cpu, qsize, added_cpus, true);
- added_cpus++;
+ cpu[added_cpus++] = add_cpu;
break;
case 'q':
qsize = atoi(optarg);
@@ -772,6 +915,7 @@ int main(int argc, char **argv)
case 'h':
error:
default:
+ free(cpu);
usage(argv, obj);
return EXIT_FAIL_OPTION;
}
@@ -784,15 +928,30 @@ int main(int argc, char **argv)
if (ifindex == -1) {
fprintf(stderr, "ERR: required option --dev missing\n");
usage(argv, obj);
- return EXIT_FAIL_OPTION;
+ err = EXIT_FAIL_OPTION;
+ goto out;
}
/* Required option */
if (add_cpu == -1) {
fprintf(stderr, "ERR: required option --cpu missing\n");
fprintf(stderr, " Specify multiple --cpu option to add more\n");
usage(argv, obj);
- return EXIT_FAIL_OPTION;
+ err = EXIT_FAIL_OPTION;
+ goto out;
+ }
+
+ value.bpf_prog.fd = 0;
+ if (!mprog_disable)
+ value.bpf_prog.fd = load_cpumap_prog(mprog_filename, mprog_name,
+ redir_interface, redir_map);
+ if (value.bpf_prog.fd < 0) {
+ err = value.bpf_prog.fd;
+ goto out;
}
+ value.qsize = qsize;
+
+ for (i = 0; i < added_cpus; i++)
+ create_cpu_entry(cpu[i], &value, i, true);
/* Remove XDP program when program is interrupted or killed */
signal(SIGINT, int_exit);
@@ -801,27 +960,33 @@ int main(int argc, char **argv)
prog = bpf_object__find_program_by_title(obj, prog_name);
if (!prog) {
fprintf(stderr, "bpf_object__find_program_by_title failed\n");
- return EXIT_FAIL;
+ err = EXIT_FAIL;
+ goto out;
}
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
fprintf(stderr, "bpf_program__fd failed\n");
- return EXIT_FAIL;
+ err = EXIT_FAIL;
+ goto out;
}
if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
fprintf(stderr, "link set xdp fd failed\n");
- return EXIT_FAIL_XDP;
+ err = EXIT_FAIL_XDP;
+ goto out;
}
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
if (err) {
printf("can't get prog info - %s\n", strerror(errno));
- return err;
+ goto out;
}
prog_id = info.id;
- stats_poll(interval, use_separators, prog_name, stress_mode);
- return EXIT_OK;
+ stats_poll(interval, use_separators, prog_name, mprog_name,
+ &value, stress_mode);
+out:
+ free(cpu);
+ return err;
}
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 6843376733df..5bfa448b4704 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -404,6 +404,7 @@ class PrinterHelpers(Printer):
type_fwds = [
'struct bpf_fib_lookup',
+ 'struct bpf_sk_lookup',
'struct bpf_perf_event_data',
'struct bpf_perf_event_value',
'struct bpf_pidns_info',
@@ -450,6 +451,7 @@ class PrinterHelpers(Printer):
'struct bpf_perf_event_data',
'struct bpf_perf_event_value',
'struct bpf_pidns_info',
+ 'struct bpf_sk_lookup',
'struct bpf_sock',
'struct bpf_sock_addr',
'struct bpf_sock_ops',
@@ -487,6 +489,11 @@ class PrinterHelpers(Printer):
'struct sk_msg_buff': 'struct sk_msg_md',
'struct xdp_buff': 'struct xdp_md',
}
+ # Helpers overloaded for different context types.
+ overloaded_helpers = [
+ 'bpf_get_socket_cookie',
+ 'bpf_sk_assign',
+ ]
def print_header(self):
header = '''\
@@ -543,7 +550,7 @@ class PrinterHelpers(Printer):
for i, a in enumerate(proto['args']):
t = a['type']
n = a['name']
- if proto['name'] == 'bpf_get_socket_cookie' and i == 0:
+ if proto['name'] in self.overloaded_helpers and i == 0:
t = 'void'
n = 'ctx'
one_arg = '{}{}'.format(comma, self.map_type(t))
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 412ea3d9bf7f..82e356b664e8 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -45,7 +45,7 @@ PROG COMMANDS
| **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
| **cgroup/getsockopt** | **cgroup/setsockopt** |
-| **struct_ops** | **fentry** | **fexit** | **freplace**
+| **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
| }
| *ATTACH_TYPE* := {
| **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 25b25aca1112..7b137264ea3a 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -479,7 +479,7 @@ _bpftool()
cgroup/post_bind4 cgroup/post_bind6 \
cgroup/sysctl cgroup/getsockopt \
cgroup/setsockopt struct_ops \
- fentry fexit freplace" -- \
+ fentry fexit freplace sk_lookup" -- \
"$cur" ) )
return 0
;;
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 29f4e7611ae8..65303664417e 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -1,10 +1,11 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
+#define _GNU_SOURCE
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
-#include <fts.h>
+#include <ftw.h>
#include <libgen.h>
#include <mntent.h>
#include <stdbool.h>
@@ -64,6 +65,7 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
[BPF_TRACE_FEXIT] = "fexit",
[BPF_MODIFY_RETURN] = "mod_ret",
[BPF_LSM_MAC] = "lsm_mac",
+ [BPF_SK_LOOKUP] = "sk_lookup",
};
void p_err(const char *fmt, ...)
@@ -160,24 +162,35 @@ int mount_tracefs(const char *target)
return err;
}
-int open_obj_pinned(char *path, bool quiet)
+int open_obj_pinned(const char *path, bool quiet)
{
- int fd;
+ char *pname;
+ int fd = -1;
+
+ pname = strdup(path);
+ if (!pname) {
+ if (!quiet)
+ p_err("mem alloc failed");
+ goto out_ret;
+ }
- fd = bpf_obj_get(path);
+ fd = bpf_obj_get(pname);
if (fd < 0) {
if (!quiet)
- p_err("bpf obj get (%s): %s", path,
- errno == EACCES && !is_bpffs(dirname(path)) ?
+ p_err("bpf obj get (%s): %s", pname,
+ errno == EACCES && !is_bpffs(dirname(pname)) ?
"directory not in bpf file system (bpffs)" :
strerror(errno));
- return -1;
+ goto out_free;
}
+out_free:
+ free(pname);
+out_ret:
return fd;
}
-int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type)
+int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type)
{
enum bpf_obj_type type;
int fd;
@@ -367,71 +380,82 @@ void print_hex_data_json(uint8_t *data, size_t len)
jsonw_end_array(json_wtr);
}
+/* extra params for nftw cb */
+static struct pinned_obj_table *build_fn_table;
+static enum bpf_obj_type build_fn_type;
+
+static int do_build_table_cb(const char *fpath, const struct stat *sb,
+ int typeflag, struct FTW *ftwbuf)
+{
+ struct bpf_prog_info pinned_info;
+ __u32 len = sizeof(pinned_info);
+ struct pinned_obj *obj_node;
+ enum bpf_obj_type objtype;
+ int fd, err = 0;
+
+ if (typeflag != FTW_F)
+ goto out_ret;
+
+ fd = open_obj_pinned(fpath, true);
+ if (fd < 0)
+ goto out_ret;
+
+ objtype = get_fd_type(fd);
+ if (objtype != build_fn_type)
+ goto out_close;
+
+ memset(&pinned_info, 0, sizeof(pinned_info));
+ if (bpf_obj_get_info_by_fd(fd, &pinned_info, &len))
+ goto out_close;
+
+ obj_node = calloc(1, sizeof(*obj_node));
+ if (!obj_node) {
+ err = -1;
+ goto out_close;
+ }
+
+ obj_node->id = pinned_info.id;
+ obj_node->path = strdup(fpath);
+ if (!obj_node->path) {
+ err = -1;
+ free(obj_node);
+ goto out_close;
+ }
+
+ hash_add(build_fn_table->table, &obj_node->hash, obj_node->id);
+out_close:
+ close(fd);
+out_ret:
+ return err;
+}
+
int build_pinned_obj_table(struct pinned_obj_table *tab,
enum bpf_obj_type type)
{
- struct bpf_prog_info pinned_info = {};
- struct pinned_obj *obj_node = NULL;
- __u32 len = sizeof(pinned_info);
struct mntent *mntent = NULL;
- enum bpf_obj_type objtype;
FILE *mntfile = NULL;
- FTSENT *ftse = NULL;
- FTS *fts = NULL;
- int fd, err;
+ int flags = FTW_PHYS;
+ int nopenfd = 16;
+ int err = 0;
mntfile = setmntent("/proc/mounts", "r");
if (!mntfile)
return -1;
+ build_fn_table = tab;
+ build_fn_type = type;
+
while ((mntent = getmntent(mntfile))) {
- char *path[] = { mntent->mnt_dir, NULL };
+ char *path = mntent->mnt_dir;
if (strncmp(mntent->mnt_type, "bpf", 3) != 0)
continue;
-
- fts = fts_open(path, 0, NULL);
- if (!fts)
- continue;
-
- while ((ftse = fts_read(fts))) {
- if (!(ftse->fts_info & FTS_F))
- continue;
- fd = open_obj_pinned(ftse->fts_path, true);
- if (fd < 0)
- continue;
-
- objtype = get_fd_type(fd);
- if (objtype != type) {
- close(fd);
- continue;
- }
- memset(&pinned_info, 0, sizeof(pinned_info));
- err = bpf_obj_get_info_by_fd(fd, &pinned_info, &len);
- if (err) {
- close(fd);
- continue;
- }
-
- obj_node = malloc(sizeof(*obj_node));
- if (!obj_node) {
- close(fd);
- fts_close(fts);
- fclose(mntfile);
- return -1;
- }
-
- memset(obj_node, 0, sizeof(*obj_node));
- obj_node->id = pinned_info.id;
- obj_node->path = strdup(ftse->fts_path);
- hash_add(tab->table, &obj_node->hash, obj_node->id);
-
- close(fd);
- }
- fts_close(fts);
+ err = nftw(path, do_build_table_cb, nopenfd, flags);
+ if (err)
+ break;
}
fclose(mntfile);
- return 0;
+ return err;
}
void delete_pinned_obj_table(struct pinned_obj_table *tab)
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index b59d26e89367..8a4c2b3b0cd6 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -302,8 +302,11 @@ static int do_skeleton(int argc, char **argv)
opts.object_name = obj_name;
obj = bpf_object__open_mem(obj_data, file_sz, &opts);
if (IS_ERR(obj)) {
+ char err_buf[256];
+
+ libbpf_strerror(PTR_ERR(obj), err_buf, sizeof(err_buf));
+ p_err("failed to open BPF object file: %s", err_buf);
obj = NULL;
- p_err("failed to open BPF object file: %ld", PTR_ERR(obj));
goto out;
}
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 78d34e860713..e3a79b5a9960 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -152,8 +152,8 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv,
int get_fd_type(int fd);
const char *get_fd_type_name(enum bpf_obj_type type);
char *get_fdinfo(int fd, const char *key);
-int open_obj_pinned(char *path, bool quiet);
-int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
+int open_obj_pinned(const char *path, bool quiet);
+int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type);
int mount_bpffs_for_pin(const char *name);
int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***));
int do_pin_fd(int fd, const char *name);
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 6863c57effd0..3e6ecc6332e2 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -59,6 +59,7 @@ const char * const prog_type_name[] = {
[BPF_PROG_TYPE_TRACING] = "tracing",
[BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
[BPF_PROG_TYPE_EXT] = "ext",
+ [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
};
const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name);
@@ -1905,7 +1906,7 @@ static int do_help(int argc, char **argv)
" cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n"
" cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
" cgroup/getsockopt | cgroup/setsockopt |\n"
- " struct_ops | fentry | fexit | freplace }\n"
+ " struct_ops | fentry | fexit | freplace | sk_lookup }\n"
" ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
" flow_dissector }\n"
" METRIC := { cycles | instructions | l1d_loads | llc_misses }\n"
diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
index 8468a608911e..d9b420972934 100644
--- a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
+++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
@@ -71,7 +71,8 @@ int iter(struct bpf_iter__task_file *ctx)
e.pid = task->tgid;
e.id = get_obj_id(file->private_data, obj_type);
- bpf_probe_read(&e.comm, sizeof(e.comm), task->group_leader->comm);
+ bpf_probe_read_kernel(&e.comm, sizeof(e.comm),
+ task->group_leader->comm);
bpf_seq_write(ctx->meta->seq, &e, sizeof(e));
return 0;
diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h
index fe019774f8a7..4867d549e3c1 100644
--- a/tools/include/linux/btf_ids.h
+++ b/tools/include/linux/btf_ids.h
@@ -3,6 +3,8 @@
#ifndef _LINUX_BTF_IDS_H
#define _LINUX_BTF_IDS_H
+#ifdef CONFIG_DEBUG_INFO_BTF
+
#include <linux/compiler.h> /* for __PASTE */
/*
@@ -21,7 +23,7 @@
asm( \
".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
".local " #symbol " ; \n" \
-".type " #symbol ", @object; \n" \
+".type " #symbol ", STT_OBJECT; \n" \
".size " #symbol ", 4; \n" \
#symbol ": \n" \
".zero 4 \n" \
@@ -55,17 +57,20 @@ asm( \
* .zero 4
*
*/
-#define __BTF_ID_LIST(name) \
+#define __BTF_ID_LIST(name, scope) \
asm( \
".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \
-".local " #name "; \n" \
+"." #scope " " #name "; \n" \
#name ":; \n" \
".popsection; \n"); \
#define BTF_ID_LIST(name) \
-__BTF_ID_LIST(name) \
+__BTF_ID_LIST(name, local) \
extern u32 name[];
+#define BTF_ID_LIST_GLOBAL(name) \
+__BTF_ID_LIST(name, globl)
+
/*
* The BTF_ID_UNUSED macro defines 4 zero bytes.
* It's used when we want to define 'unused' entry
@@ -83,5 +88,43 @@ asm( \
".zero 4 \n" \
".popsection; \n");
+#else
+
+#define BTF_ID_LIST(name) static u32 name[5];
+#define BTF_ID(prefix, name)
+#define BTF_ID_UNUSED
+#define BTF_ID_LIST_GLOBAL(name) u32 name[1];
+
+#endif /* CONFIG_DEBUG_INFO_BTF */
+
+#ifdef CONFIG_NET
+/* Define a list of socket types which can be the argument for
+ * skc_to_*_sock() helpers. All these sockets should have
+ * sock_common as the first argument in its memory layout.
+ */
+#define BTF_SOCK_TYPE_xxx \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, inet_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, inet_connection_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, inet_request_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, inet_timewait_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, request_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, sock_common) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, tcp_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, tcp_request_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
+
+enum {
+#define BTF_SOCK_TYPE(name, str) name,
+BTF_SOCK_TYPE_xxx
+#undef BTF_SOCK_TYPE
+MAX_BTF_SOCK_TYPE,
+};
+
+extern u32 btf_sock_ids[];
+#endif
#endif
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 5e386389913a..54d0c886e3ba 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -189,6 +189,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_STRUCT_OPS,
BPF_PROG_TYPE_EXT,
BPF_PROG_TYPE_LSM,
+ BPF_PROG_TYPE_SK_LOOKUP,
};
enum bpf_attach_type {
@@ -227,6 +228,8 @@ enum bpf_attach_type {
BPF_CGROUP_INET6_GETSOCKNAME,
BPF_XDP_DEVMAP,
BPF_CGROUP_INET_SOCK_RELEASE,
+ BPF_XDP_CPUMAP,
+ BPF_SK_LOOKUP,
__MAX_BPF_ATTACH_TYPE
};
@@ -2419,7 +2422,7 @@ union bpf_attr {
* Look for an IPv6 socket.
*
* If the *netns* is a negative signed 32-bit integer, then the
- * socket lookup table in the netns associated with the *ctx* will
+ * socket lookup table in the netns associated with the *ctx*
* will be used. For the TC hooks, this is the netns of the device
* in the skb. For socket hooks, this is the netns of the socket.
* If *netns* is any other signed 32-bit value greater than or
@@ -2456,7 +2459,7 @@ union bpf_attr {
* Look for an IPv6 socket.
*
* If the *netns* is a negative signed 32-bit integer, then the
- * socket lookup table in the netns associated with the *ctx* will
+ * socket lookup table in the netns associated with the *ctx*
* will be used. For the TC hooks, this is the netns of the device
* in the skb. For socket hooks, this is the netns of the socket.
* If *netns* is any other signed 32-bit value greater than or
@@ -3068,6 +3071,10 @@ union bpf_attr {
*
* long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
* Description
+ * Helper is overloaded depending on BPF program type. This
+ * description applies to **BPF_PROG_TYPE_SCHED_CLS** and
+ * **BPF_PROG_TYPE_SCHED_ACT** programs.
+ *
* Assign the *sk* to the *skb*. When combined with appropriate
* routing configuration to receive the packet towards the socket,
* will cause *skb* to be delivered to the specified socket.
@@ -3093,6 +3100,56 @@ union bpf_attr {
* **-ESOCKTNOSUPPORT** if the socket type is not supported
* (reuseport).
*
+ * long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags)
+ * Description
+ * Helper is overloaded depending on BPF program type. This
+ * description applies to **BPF_PROG_TYPE_SK_LOOKUP** programs.
+ *
+ * Select the *sk* as a result of a socket lookup.
+ *
+ * For the operation to succeed passed socket must be compatible
+ * with the packet description provided by the *ctx* object.
+ *
+ * L4 protocol (**IPPROTO_TCP** or **IPPROTO_UDP**) must
+ * be an exact match. While IP family (**AF_INET** or
+ * **AF_INET6**) must be compatible, that is IPv6 sockets
+ * that are not v6-only can be selected for IPv4 packets.
+ *
+ * Only TCP listeners and UDP unconnected sockets can be
+ * selected. *sk* can also be NULL to reset any previous
+ * selection.
+ *
+ * *flags* argument can combination of following values:
+ *
+ * * **BPF_SK_LOOKUP_F_REPLACE** to override the previous
+ * socket selection, potentially done by a BPF program
+ * that ran before us.
+ *
+ * * **BPF_SK_LOOKUP_F_NO_REUSEPORT** to skip
+ * load-balancing within reuseport group for the socket
+ * being selected.
+ *
+ * On success *ctx->sk* will point to the selected socket.
+ *
+ * Return
+ * 0 on success, or a negative errno in case of failure.
+ *
+ * * **-EAFNOSUPPORT** if socket family (*sk->family*) is
+ * not compatible with packet family (*ctx->family*).
+ *
+ * * **-EEXIST** if socket has been already selected,
+ * potentially by another program, and
+ * **BPF_SK_LOOKUP_F_REPLACE** flag was not specified.
+ *
+ * * **-EINVAL** if unsupported flags were specified.
+ *
+ * * **-EPROTOTYPE** if socket L4 protocol
+ * (*sk->protocol*) doesn't match packet protocol
+ * (*ctx->protocol*).
+ *
+ * * **-ESOCKTNOSUPPORT** if socket is not in allowed
+ * state (TCP listening or UDP unconnected).
+ *
* u64 bpf_ktime_get_boot_ns(void)
* Description
* Return the time elapsed since system boot, in nanoseconds.
@@ -3606,6 +3663,12 @@ enum {
BPF_RINGBUF_HDR_SZ = 8,
};
+/* BPF_FUNC_sk_assign flags in bpf_sk_lookup context. */
+enum {
+ BPF_SK_LOOKUP_F_REPLACE = (1ULL << 0),
+ BPF_SK_LOOKUP_F_NO_REUSEPORT = (1ULL << 1),
+};
+
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET,
@@ -3849,6 +3912,19 @@ struct bpf_devmap_val {
} bpf_prog;
};
+/* CPUMAP map-value layout
+ *
+ * The struct data-layout of map-value is a configuration interface.
+ * New members can only be added to the end of this structure.
+ */
+struct bpf_cpumap_val {
+ __u32 qsize; /* queue size to remote target CPU */
+ union {
+ int fd; /* prog fd on map write */
+ __u32 id; /* prog id on map read */
+ } bpf_prog;
+};
+
enum sk_action {
SK_DROP = 0,
SK_PASS,
@@ -3986,7 +4062,7 @@ struct bpf_link_info {
/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
* by user and intended to be used by socket (e.g. to bind to, depends on
- * attach attach type).
+ * attach type).
*/
struct bpf_sock_addr {
__u32 user_family; /* Allows 4-byte read, but no write. */
@@ -4335,4 +4411,19 @@ struct bpf_pidns_info {
__u32 pid;
__u32 tgid;
};
+
+/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
+struct bpf_sk_lookup {
+ __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+
+ __u32 family; /* Protocol family (AF_INET, AF_INET6) */
+ __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
+ __u32 remote_ip4; /* Network byte order */
+ __u32 remote_ip6[4]; /* Network byte order */
+ __u32 remote_port; /* Network byte order */
+ __u32 local_ip4; /* Network byte order */
+ __u32 local_ip6[4]; /* Network byte order */
+ __u32 local_port; /* Host byte order */
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index a510d8ed716f..bc14db706b88 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -40,7 +40,7 @@
* Helper macro to manipulate data structures
*/
#ifndef offsetof
-#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
+#define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER)
#endif
#ifndef container_of
#define container_of(ptr, type, member) \
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 4489f95f1d1a..846164c79df1 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6799,6 +6799,7 @@ BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
+BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
enum bpf_attach_type
bpf_program__get_expected_attach_type(struct bpf_program *prog)
@@ -6912,6 +6913,8 @@ static const struct bpf_sec_def section_defs[] = {
.attach_fn = attach_iter),
BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP,
BPF_XDP_DEVMAP),
+ BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP,
+ BPF_XDP_CPUMAP),
BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
@@ -6979,6 +6982,8 @@ static const struct bpf_sec_def section_defs[] = {
BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT,
BPF_CGROUP_SETSOCKOPT),
BPF_PROG_SEC("struct_ops", BPF_PROG_TYPE_STRUCT_OPS),
+ BPF_EAPROG_SEC("sk_lookup/", BPF_PROG_TYPE_SK_LOOKUP,
+ BPF_SK_LOOKUP),
};
#undef BPF_PROG_SEC_IMPL
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 2335971ed0bd..c2272132e929 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -350,6 +350,7 @@ LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog);
LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
@@ -377,6 +378,7 @@ LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog);
/*
* No need for __attribute__((packed)), all members of 'bpf_map_def'
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index c5d5c7664c3b..6f0856abe299 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -287,6 +287,8 @@ LIBBPF_0.1.0 {
bpf_map__type;
bpf_map__value_size;
bpf_program__autoload;
+ bpf_program__is_sk_lookup;
bpf_program__set_autoload;
+ bpf_program__set_sk_lookup;
btf__set_fd;
} LIBBPF_0.0.9;
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 10cd8d1891f5..5a3d3f078408 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -78,6 +78,9 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT;
break;
+ case BPF_PROG_TYPE_SK_LOOKUP:
+ xattr.expected_attach_type = BPF_SK_LOOKUP;
+ break;
case BPF_PROG_TYPE_KPROBE:
xattr.kern_version = get_kernel_version();
break;
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index acd08715be2e..f56655690f9b 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -73,29 +73,8 @@ int start_server(int family, int type, const char *addr_str, __u16 port,
socklen_t len;
int fd;
- if (family == AF_INET) {
- struct sockaddr_in *sin = (void *)&addr;
-
- sin->sin_family = AF_INET;
- sin->sin_port = htons(port);
- if (addr_str &&
- inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) {
- log_err("inet_pton(AF_INET, %s)", addr_str);
- return -1;
- }
- len = sizeof(*sin);
- } else {
- struct sockaddr_in6 *sin6 = (void *)&addr;
-
- sin6->sin6_family = AF_INET6;
- sin6->sin6_port = htons(port);
- if (addr_str &&
- inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) {
- log_err("inet_pton(AF_INET6, %s)", addr_str);
- return -1;
- }
- len = sizeof(*sin6);
- }
+ if (make_sockaddr(family, addr_str, port, &addr, &len))
+ return -1;
fd = socket(family, type, 0);
if (fd < 0) {
@@ -194,3 +173,36 @@ int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
return 0;
}
+
+int make_sockaddr(int family, const char *addr_str, __u16 port,
+ struct sockaddr_storage *addr, socklen_t *len)
+{
+ if (family == AF_INET) {
+ struct sockaddr_in *sin = (void *)addr;
+
+ sin->sin_family = AF_INET;
+ sin->sin_port = htons(port);
+ if (addr_str &&
+ inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) {
+ log_err("inet_pton(AF_INET, %s)", addr_str);
+ return -1;
+ }
+ if (len)
+ *len = sizeof(*sin);
+ return 0;
+ } else if (family == AF_INET6) {
+ struct sockaddr_in6 *sin6 = (void *)addr;
+
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = htons(port);
+ if (addr_str &&
+ inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) {
+ log_err("inet_pton(AF_INET6, %s)", addr_str);
+ return -1;
+ }
+ if (len)
+ *len = sizeof(*sin6);
+ return 0;
+ }
+ return -1;
+}
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index f580e82fda58..c3728f6667e4 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -37,5 +37,7 @@ int start_server(int family, int type, const char *addr, __u16 port,
int timeout_ms);
int connect_to_fd(int server_fd, int timeout_ms);
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
+int make_sockaddr(int family, const char *addr_str, __u16 port,
+ struct sockaddr_storage *addr, socklen_t *len);
#endif
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
index 403be6f36cba..3b127cab4864 100644
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -6,6 +6,7 @@
#include <bpf/libbpf.h>
#include <linux/btf.h>
#include <linux/kernel.h>
+#define CONFIG_DEBUG_INFO_BTF
#include <linux/btf_ids.h>
#include "test_progs.h"
@@ -27,7 +28,17 @@ struct symbol test_symbols[] = {
{ "func", BTF_KIND_FUNC, -1 },
};
-BTF_ID_LIST(test_list)
+BTF_ID_LIST(test_list_local)
+BTF_ID_UNUSED
+BTF_ID(typedef, S)
+BTF_ID(typedef, T)
+BTF_ID(typedef, U)
+BTF_ID(struct, S)
+BTF_ID(union, U)
+BTF_ID(func, func)
+
+extern __u32 test_list_global[];
+BTF_ID_LIST_GLOBAL(test_list_global)
BTF_ID_UNUSED
BTF_ID(typedef, S)
BTF_ID(typedef, T)
@@ -93,18 +104,25 @@ static int resolve_symbols(void)
int test_resolve_btfids(void)
{
- unsigned int i;
+ __u32 *test_list, *test_lists[] = { test_list_local, test_list_global };
+ unsigned int i, j;
int ret = 0;
if (resolve_symbols())
return -1;
- /* Check BTF_ID_LIST(test_list) IDs */
- for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) {
- ret = CHECK(test_list[i] != test_symbols[i].id,
- "id_check",
- "wrong ID for %s (%d != %d)\n", test_symbols[i].name,
- test_list[i], test_symbols[i].id);
+ /* Check BTF_ID_LIST(test_list_local) and
+ * BTF_ID_LIST_GLOBAL(test_list_global) IDs
+ */
+ for (j = 0; j < ARRAY_SIZE(test_lists); j++) {
+ test_list = test_lists[j];
+ for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) {
+ ret = CHECK(test_list[i] != test_symbols[i].id,
+ "id_check",
+ "wrong ID for %s (%d != %d)\n",
+ test_symbols[i].name,
+ test_list[i], test_symbols[i].id);
+ }
}
return ret;
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
new file mode 100644
index 000000000000..f1784ae4565a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -0,0 +1,1282 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2020 Cloudflare
+/*
+ * Test BPF attach point for INET socket lookup (BPF_SK_LOOKUP).
+ *
+ * Tests exercise:
+ * - attaching/detaching/querying programs to BPF_SK_LOOKUP hook,
+ * - redirecting socket lookup to a socket selected by BPF program,
+ * - failing a socket lookup on BPF program's request,
+ * - error scenarios for selecting a socket from BPF program,
+ * - accessing BPF program context,
+ * - attaching and running multiple BPF programs.
+ *
+ * Tests run in a dedicated network namespace.
+ */
+
+#define _GNU_SOURCE
+#include <arpa/inet.h>
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+
+#include "test_progs.h"
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "test_sk_lookup.skel.h"
+
+/* External (address, port) pairs the client sends packets to. */
+#define EXT_IP4 "127.0.0.1"
+#define EXT_IP6 "fd00::1"
+#define EXT_PORT 7007
+
+/* Internal (address, port) pairs the server listens/receives at. */
+#define INT_IP4 "127.0.0.2"
+#define INT_IP4_V6 "::ffff:127.0.0.2"
+#define INT_IP6 "fd00::2"
+#define INT_PORT 8008
+
+#define IO_TIMEOUT_SEC 3
+
+enum server {
+ SERVER_A = 0,
+ SERVER_B = 1,
+ MAX_SERVERS,
+};
+
+enum {
+ PROG1 = 0,
+ PROG2,
+};
+
+struct inet_addr {
+ const char *ip;
+ unsigned short port;
+};
+
+struct test {
+ const char *desc;
+ struct bpf_program *lookup_prog;
+ struct bpf_program *reuseport_prog;
+ struct bpf_map *sock_map;
+ int sotype;
+ struct inet_addr connect_to;
+ struct inet_addr listen_at;
+ enum server accept_on;
+};
+
+static __u32 duration; /* for CHECK macro */
+
+static bool is_ipv6(const char *ip)
+{
+ return !!strchr(ip, ':');
+}
+
+static int attach_reuseport(int sock_fd, struct bpf_program *reuseport_prog)
+{
+ int err, prog_fd;
+
+ prog_fd = bpf_program__fd(reuseport_prog);
+ if (prog_fd < 0) {
+ errno = -prog_fd;
+ return -1;
+ }
+
+ err = setsockopt(sock_fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF,
+ &prog_fd, sizeof(prog_fd));
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static socklen_t inetaddr_len(const struct sockaddr_storage *addr)
+{
+ return (addr->ss_family == AF_INET ? sizeof(struct sockaddr_in) :
+ addr->ss_family == AF_INET6 ? sizeof(struct sockaddr_in6) : 0);
+}
+
+static int make_socket(int sotype, const char *ip, int port,
+ struct sockaddr_storage *addr)
+{
+ struct timeval timeo = { .tv_sec = IO_TIMEOUT_SEC };
+ int err, family, fd;
+
+ family = is_ipv6(ip) ? AF_INET6 : AF_INET;
+ err = make_sockaddr(family, ip, port, addr, NULL);
+ if (CHECK(err, "make_address", "failed\n"))
+ return -1;
+
+ fd = socket(addr->ss_family, sotype, 0);
+ if (CHECK(fd < 0, "socket", "failed\n")) {
+ log_err("failed to make socket");
+ return -1;
+ }
+
+ err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
+ if (CHECK(err, "setsockopt(SO_SNDTIMEO)", "failed\n")) {
+ log_err("failed to set SNDTIMEO");
+ close(fd);
+ return -1;
+ }
+
+ err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
+ if (CHECK(err, "setsockopt(SO_RCVTIMEO)", "failed\n")) {
+ log_err("failed to set RCVTIMEO");
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+static int make_server(int sotype, const char *ip, int port,
+ struct bpf_program *reuseport_prog)
+{
+ struct sockaddr_storage addr = {0};
+ const int one = 1;
+ int err, fd = -1;
+
+ fd = make_socket(sotype, ip, port, &addr);
+ if (fd < 0)
+ return -1;
+
+ /* Enabled for UDPv6 sockets for IPv4-mapped IPv6 to work. */
+ if (sotype == SOCK_DGRAM) {
+ err = setsockopt(fd, SOL_IP, IP_RECVORIGDSTADDR, &one,
+ sizeof(one));
+ if (CHECK(err, "setsockopt(IP_RECVORIGDSTADDR)", "failed\n")) {
+ log_err("failed to enable IP_RECVORIGDSTADDR");
+ goto fail;
+ }
+ }
+
+ if (sotype == SOCK_DGRAM && addr.ss_family == AF_INET6) {
+ err = setsockopt(fd, SOL_IPV6, IPV6_RECVORIGDSTADDR, &one,
+ sizeof(one));
+ if (CHECK(err, "setsockopt(IPV6_RECVORIGDSTADDR)", "failed\n")) {
+ log_err("failed to enable IPV6_RECVORIGDSTADDR");
+ goto fail;
+ }
+ }
+
+ if (sotype == SOCK_STREAM) {
+ err = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one,
+ sizeof(one));
+ if (CHECK(err, "setsockopt(SO_REUSEADDR)", "failed\n")) {
+ log_err("failed to enable SO_REUSEADDR");
+ goto fail;
+ }
+ }
+
+ if (reuseport_prog) {
+ err = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one,
+ sizeof(one));
+ if (CHECK(err, "setsockopt(SO_REUSEPORT)", "failed\n")) {
+ log_err("failed to enable SO_REUSEPORT");
+ goto fail;
+ }
+ }
+
+ err = bind(fd, (void *)&addr, inetaddr_len(&addr));
+ if (CHECK(err, "bind", "failed\n")) {
+ log_err("failed to bind listen socket");
+ goto fail;
+ }
+
+ if (sotype == SOCK_STREAM) {
+ err = listen(fd, SOMAXCONN);
+ if (CHECK(err, "make_server", "listen")) {
+ log_err("failed to listen on port %d", port);
+ goto fail;
+ }
+ }
+
+ /* Late attach reuseport prog so we can have one init path */
+ if (reuseport_prog) {
+ err = attach_reuseport(fd, reuseport_prog);
+ if (CHECK(err, "attach_reuseport", "failed\n")) {
+ log_err("failed to attach reuseport prog");
+ goto fail;
+ }
+ }
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+static int make_client(int sotype, const char *ip, int port)
+{
+ struct sockaddr_storage addr = {0};
+ int err, fd;
+
+ fd = make_socket(sotype, ip, port, &addr);
+ if (fd < 0)
+ return -1;
+
+ err = connect(fd, (void *)&addr, inetaddr_len(&addr));
+ if (CHECK(err, "make_client", "connect")) {
+ log_err("failed to connect client socket");
+ goto fail;
+ }
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+static int send_byte(int fd)
+{
+ ssize_t n;
+
+ errno = 0;
+ n = send(fd, "a", 1, 0);
+ if (CHECK(n <= 0, "send_byte", "send")) {
+ log_err("failed/partial send");
+ return -1;
+ }
+ return 0;
+}
+
+static int recv_byte(int fd)
+{
+ char buf[1];
+ ssize_t n;
+
+ n = recv(fd, buf, sizeof(buf), 0);
+ if (CHECK(n <= 0, "recv_byte", "recv")) {
+ log_err("failed/partial recv");
+ return -1;
+ }
+ return 0;
+}
+
+static int tcp_recv_send(int server_fd)
+{
+ char buf[1];
+ int ret, fd;
+ ssize_t n;
+
+ fd = accept(server_fd, NULL, NULL);
+ if (CHECK(fd < 0, "accept", "failed\n")) {
+ log_err("failed to accept");
+ return -1;
+ }
+
+ n = recv(fd, buf, sizeof(buf), 0);
+ if (CHECK(n <= 0, "recv", "failed\n")) {
+ log_err("failed/partial recv");
+ ret = -1;
+ goto close;
+ }
+
+ n = send(fd, buf, n, 0);
+ if (CHECK(n <= 0, "send", "failed\n")) {
+ log_err("failed/partial send");
+ ret = -1;
+ goto close;
+ }
+
+ ret = 0;
+close:
+ close(fd);
+ return ret;
+}
+
+static void v4_to_v6(struct sockaddr_storage *ss)
+{
+ struct sockaddr_in6 *v6 = (struct sockaddr_in6 *)ss;
+ struct sockaddr_in v4 = *(struct sockaddr_in *)ss;
+
+ v6->sin6_family = AF_INET6;
+ v6->sin6_port = v4.sin_port;
+ v6->sin6_addr.s6_addr[10] = 0xff;
+ v6->sin6_addr.s6_addr[11] = 0xff;
+ memcpy(&v6->sin6_addr.s6_addr[12], &v4.sin_addr.s_addr, 4);
+}
+
+static int udp_recv_send(int server_fd)
+{
+ char cmsg_buf[CMSG_SPACE(sizeof(struct sockaddr_storage))];
+ struct sockaddr_storage _src_addr = { 0 };
+ struct sockaddr_storage *src_addr = &_src_addr;
+ struct sockaddr_storage *dst_addr = NULL;
+ struct msghdr msg = { 0 };
+ struct iovec iov = { 0 };
+ struct cmsghdr *cm;
+ char buf[1];
+ int ret, fd;
+ ssize_t n;
+
+ iov.iov_base = buf;
+ iov.iov_len = sizeof(buf);
+
+ msg.msg_name = src_addr;
+ msg.msg_namelen = sizeof(*src_addr);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ errno = 0;
+ n = recvmsg(server_fd, &msg, 0);
+ if (CHECK(n <= 0, "recvmsg", "failed\n")) {
+ log_err("failed to receive");
+ return -1;
+ }
+ if (CHECK(msg.msg_flags & MSG_CTRUNC, "recvmsg", "truncated cmsg\n"))
+ return -1;
+
+ for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
+ if ((cm->cmsg_level == SOL_IP &&
+ cm->cmsg_type == IP_ORIGDSTADDR) ||
+ (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type == IPV6_ORIGDSTADDR)) {
+ dst_addr = (struct sockaddr_storage *)CMSG_DATA(cm);
+ break;
+ }
+ log_err("warning: ignored cmsg at level %d type %d",
+ cm->cmsg_level, cm->cmsg_type);
+ }
+ if (CHECK(!dst_addr, "recvmsg", "missing ORIGDSTADDR\n"))
+ return -1;
+
+ /* Server socket bound to IPv4-mapped IPv6 address */
+ if (src_addr->ss_family == AF_INET6 &&
+ dst_addr->ss_family == AF_INET) {
+ v4_to_v6(dst_addr);
+ }
+
+ /* Reply from original destination address. */
+ fd = socket(dst_addr->ss_family, SOCK_DGRAM, 0);
+ if (CHECK(fd < 0, "socket", "failed\n")) {
+ log_err("failed to create tx socket");
+ return -1;
+ }
+
+ ret = bind(fd, (struct sockaddr *)dst_addr, sizeof(*dst_addr));
+ if (CHECK(ret, "bind", "failed\n")) {
+ log_err("failed to bind tx socket");
+ goto out;
+ }
+
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ n = sendmsg(fd, &msg, 0);
+ if (CHECK(n <= 0, "sendmsg", "failed\n")) {
+ log_err("failed to send echo reply");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ close(fd);
+ return ret;
+}
+
+static int tcp_echo_test(int client_fd, int server_fd)
+{
+ int err;
+
+ err = send_byte(client_fd);
+ if (err)
+ return -1;
+ err = tcp_recv_send(server_fd);
+ if (err)
+ return -1;
+ err = recv_byte(client_fd);
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static int udp_echo_test(int client_fd, int server_fd)
+{
+ int err;
+
+ err = send_byte(client_fd);
+ if (err)
+ return -1;
+ err = udp_recv_send(server_fd);
+ if (err)
+ return -1;
+ err = recv_byte(client_fd);
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static struct bpf_link *attach_lookup_prog(struct bpf_program *prog)
+{
+ struct bpf_link *link;
+ int net_fd;
+
+ net_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (CHECK(net_fd < 0, "open", "failed\n")) {
+ log_err("failed to open /proc/self/ns/net");
+ return NULL;
+ }
+
+ link = bpf_program__attach_netns(prog, net_fd);
+ if (CHECK(IS_ERR(link), "bpf_program__attach_netns", "failed\n")) {
+ errno = -PTR_ERR(link);
+ log_err("failed to attach program '%s' to netns",
+ bpf_program__name(prog));
+ link = NULL;
+ }
+
+ close(net_fd);
+ return link;
+}
+
+static int update_lookup_map(struct bpf_map *map, int index, int sock_fd)
+{
+ int err, map_fd;
+ uint64_t value;
+
+ map_fd = bpf_map__fd(map);
+ if (CHECK(map_fd < 0, "bpf_map__fd", "failed\n")) {
+ errno = -map_fd;
+ log_err("failed to get map FD");
+ return -1;
+ }
+
+ value = (uint64_t)sock_fd;
+ err = bpf_map_update_elem(map_fd, &index, &value, BPF_NOEXIST);
+ if (CHECK(err, "bpf_map_update_elem", "failed\n")) {
+ log_err("failed to update redir_map @ %d", index);
+ return -1;
+ }
+
+ return 0;
+}
+
+static __u32 link_info_prog_id(struct bpf_link *link)
+{
+ struct bpf_link_info info = {};
+ __u32 info_len = sizeof(info);
+ int link_fd, err;
+
+ link_fd = bpf_link__fd(link);
+ if (CHECK(link_fd < 0, "bpf_link__fd", "failed\n")) {
+ errno = -link_fd;
+ log_err("bpf_link__fd failed");
+ return 0;
+ }
+
+ err = bpf_obj_get_info_by_fd(link_fd, &info, &info_len);
+ if (CHECK(err, "bpf_obj_get_info_by_fd", "failed\n")) {
+ log_err("bpf_obj_get_info_by_fd");
+ return 0;
+ }
+ if (CHECK(info_len != sizeof(info), "bpf_obj_get_info_by_fd",
+ "unexpected info len %u\n", info_len))
+ return 0;
+
+ return info.prog_id;
+}
+
+static void query_lookup_prog(struct test_sk_lookup *skel)
+{
+ struct bpf_link *link[3] = {};
+ __u32 attach_flags = 0;
+ __u32 prog_ids[3] = {};
+ __u32 prog_cnt = 3;
+ __u32 prog_id;
+ int net_fd;
+ int err;
+
+ net_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (CHECK(net_fd < 0, "open", "failed\n")) {
+ log_err("failed to open /proc/self/ns/net");
+ return;
+ }
+
+ link[0] = attach_lookup_prog(skel->progs.lookup_pass);
+ if (!link[0])
+ goto close;
+ link[1] = attach_lookup_prog(skel->progs.lookup_pass);
+ if (!link[1])
+ goto detach;
+ link[2] = attach_lookup_prog(skel->progs.lookup_drop);
+ if (!link[2])
+ goto detach;
+
+ err = bpf_prog_query(net_fd, BPF_SK_LOOKUP, 0 /* query flags */,
+ &attach_flags, prog_ids, &prog_cnt);
+ if (CHECK(err, "bpf_prog_query", "failed\n")) {
+ log_err("failed to query lookup prog");
+ goto detach;
+ }
+
+ errno = 0;
+ if (CHECK(attach_flags != 0, "bpf_prog_query",
+ "wrong attach_flags on query: %u", attach_flags))
+ goto detach;
+ if (CHECK(prog_cnt != 3, "bpf_prog_query",
+ "wrong program count on query: %u", prog_cnt))
+ goto detach;
+ prog_id = link_info_prog_id(link[0]);
+ CHECK(prog_ids[0] != prog_id, "bpf_prog_query",
+ "invalid program #0 id on query: %u != %u\n",
+ prog_ids[0], prog_id);
+ prog_id = link_info_prog_id(link[1]);
+ CHECK(prog_ids[1] != prog_id, "bpf_prog_query",
+ "invalid program #1 id on query: %u != %u\n",
+ prog_ids[1], prog_id);
+ prog_id = link_info_prog_id(link[2]);
+ CHECK(prog_ids[2] != prog_id, "bpf_prog_query",
+ "invalid program #2 id on query: %u != %u\n",
+ prog_ids[2], prog_id);
+
+detach:
+ if (link[2])
+ bpf_link__destroy(link[2]);
+ if (link[1])
+ bpf_link__destroy(link[1]);
+ if (link[0])
+ bpf_link__destroy(link[0]);
+close:
+ close(net_fd);
+}
+
+static void run_lookup_prog(const struct test *t)
+{
+ int client_fd, server_fds[MAX_SERVERS] = { -1 };
+ struct bpf_link *lookup_link;
+ int i, err;
+
+ lookup_link = attach_lookup_prog(t->lookup_prog);
+ if (!lookup_link)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+ server_fds[i] = make_server(t->sotype, t->listen_at.ip,
+ t->listen_at.port,
+ t->reuseport_prog);
+ if (server_fds[i] < 0)
+ goto close;
+
+ err = update_lookup_map(t->sock_map, i, server_fds[i]);
+ if (err)
+ goto close;
+
+ /* want just one server for non-reuseport test */
+ if (!t->reuseport_prog)
+ break;
+ }
+
+ client_fd = make_client(t->sotype, t->connect_to.ip, t->connect_to.port);
+ if (client_fd < 0)
+ goto close;
+
+ if (t->sotype == SOCK_STREAM)
+ tcp_echo_test(client_fd, server_fds[t->accept_on]);
+ else
+ udp_echo_test(client_fd, server_fds[t->accept_on]);
+
+ close(client_fd);
+close:
+ for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+ if (server_fds[i] != -1)
+ close(server_fds[i]);
+ }
+ bpf_link__destroy(lookup_link);
+}
+
+static void test_redirect_lookup(struct test_sk_lookup *skel)
+{
+ const struct test tests[] = {
+ {
+ .desc = "TCP IPv4 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, INT_PORT },
+ },
+ {
+ .desc = "TCP IPv4 redir addr",
+ .lookup_prog = skel->progs.redir_ip4,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "TCP IPv4 redir with reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ .accept_on = SERVER_B,
+ },
+ {
+ .desc = "TCP IPv4 redir skip reuseport",
+ .lookup_prog = skel->progs.select_sock_a_no_reuseport,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ .accept_on = SERVER_A,
+ },
+ {
+ .desc = "TCP IPv6 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, INT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 redir addr",
+ .lookup_prog = skel->progs.redir_ip6,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, EXT_PORT },
+ },
+ {
+ .desc = "TCP IPv4->IPv6 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4_V6, INT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 redir with reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ .accept_on = SERVER_B,
+ },
+ {
+ .desc = "TCP IPv6 redir skip reuseport",
+ .lookup_prog = skel->progs.select_sock_a_no_reuseport,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ .accept_on = SERVER_A,
+ },
+ {
+ .desc = "UDP IPv4 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, INT_PORT },
+ },
+ {
+ .desc = "UDP IPv4 redir addr",
+ .lookup_prog = skel->progs.redir_ip4,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv4 redir with reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ .accept_on = SERVER_B,
+ },
+ {
+ .desc = "UDP IPv4 redir skip reuseport",
+ .lookup_prog = skel->progs.select_sock_a_no_reuseport,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ .accept_on = SERVER_A,
+ },
+ {
+ .desc = "UDP IPv6 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, INT_PORT },
+ },
+ {
+ .desc = "UDP IPv6 redir addr",
+ .lookup_prog = skel->progs.redir_ip6,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv4->IPv6 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .listen_at = { INT_IP4_V6, INT_PORT },
+ .connect_to = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv6 redir and reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ .accept_on = SERVER_B,
+ },
+ {
+ .desc = "UDP IPv6 redir skip reuseport",
+ .lookup_prog = skel->progs.select_sock_a_no_reuseport,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ .accept_on = SERVER_A,
+ },
+ };
+ const struct test *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ if (test__start_subtest(t->desc))
+ run_lookup_prog(t);
+ }
+}
+
+static void drop_on_lookup(const struct test *t)
+{
+ struct sockaddr_storage dst = {};
+ int client_fd, server_fd, err;
+ struct bpf_link *lookup_link;
+ ssize_t n;
+
+ lookup_link = attach_lookup_prog(t->lookup_prog);
+ if (!lookup_link)
+ return;
+
+ server_fd = make_server(t->sotype, t->listen_at.ip, t->listen_at.port,
+ t->reuseport_prog);
+ if (server_fd < 0)
+ goto detach;
+
+ client_fd = make_socket(t->sotype, t->connect_to.ip,
+ t->connect_to.port, &dst);
+ if (client_fd < 0)
+ goto close_srv;
+
+ err = connect(client_fd, (void *)&dst, inetaddr_len(&dst));
+ if (t->sotype == SOCK_DGRAM) {
+ err = send_byte(client_fd);
+ if (err)
+ goto close_all;
+
+ /* Read out asynchronous error */
+ n = recv(client_fd, NULL, 0, 0);
+ err = n == -1;
+ }
+ if (CHECK(!err || errno != ECONNREFUSED, "connect",
+ "unexpected success or error\n"))
+ log_err("expected ECONNREFUSED on connect");
+
+close_all:
+ close(client_fd);
+close_srv:
+ close(server_fd);
+detach:
+ bpf_link__destroy(lookup_link);
+}
+
+static void test_drop_on_lookup(struct test_sk_lookup *skel)
+{
+ const struct test tests[] = {
+ {
+ .desc = "TCP IPv4 drop on lookup",
+ .lookup_prog = skel->progs.lookup_drop,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 drop on lookup",
+ .lookup_prog = skel->progs.lookup_drop,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv4 drop on lookup",
+ .lookup_prog = skel->progs.lookup_drop,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv6 drop on lookup",
+ .lookup_prog = skel->progs.lookup_drop,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, INT_PORT },
+ },
+ };
+ const struct test *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ if (test__start_subtest(t->desc))
+ drop_on_lookup(t);
+ }
+}
+
+static void drop_on_reuseport(const struct test *t)
+{
+ struct sockaddr_storage dst = { 0 };
+ int client, server1, server2, err;
+ struct bpf_link *lookup_link;
+ ssize_t n;
+
+ lookup_link = attach_lookup_prog(t->lookup_prog);
+ if (!lookup_link)
+ return;
+
+ server1 = make_server(t->sotype, t->listen_at.ip, t->listen_at.port,
+ t->reuseport_prog);
+ if (server1 < 0)
+ goto detach;
+
+ err = update_lookup_map(t->sock_map, SERVER_A, server1);
+ if (err)
+ goto detach;
+
+ /* second server on destination address we should never reach */
+ server2 = make_server(t->sotype, t->connect_to.ip, t->connect_to.port,
+ NULL /* reuseport prog */);
+ if (server2 < 0)
+ goto close_srv1;
+
+ client = make_socket(t->sotype, t->connect_to.ip,
+ t->connect_to.port, &dst);
+ if (client < 0)
+ goto close_srv2;
+
+ err = connect(client, (void *)&dst, inetaddr_len(&dst));
+ if (t->sotype == SOCK_DGRAM) {
+ err = send_byte(client);
+ if (err)
+ goto close_all;
+
+ /* Read out asynchronous error */
+ n = recv(client, NULL, 0, 0);
+ err = n == -1;
+ }
+ if (CHECK(!err || errno != ECONNREFUSED, "connect",
+ "unexpected success or error\n"))
+ log_err("expected ECONNREFUSED on connect");
+
+close_all:
+ close(client);
+close_srv2:
+ close(server2);
+close_srv1:
+ close(server1);
+detach:
+ bpf_link__destroy(lookup_link);
+}
+
+static void test_drop_on_reuseport(struct test_sk_lookup *skel)
+{
+ const struct test tests[] = {
+ {
+ .desc = "TCP IPv4 drop on reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.reuseport_drop,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 drop on reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.reuseport_drop,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ },
+ {
+ .desc = "UDP IPv4 drop on reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.reuseport_drop,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 drop on reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.reuseport_drop,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ },
+ };
+ const struct test *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ if (test__start_subtest(t->desc))
+ drop_on_reuseport(t);
+ }
+}
+
+static void run_sk_assign(struct test_sk_lookup *skel,
+ struct bpf_program *lookup_prog,
+ const char *listen_ip, const char *connect_ip)
+{
+ int client_fd, peer_fd, server_fds[MAX_SERVERS] = { -1 };
+ struct bpf_link *lookup_link;
+ int i, err;
+
+ lookup_link = attach_lookup_prog(lookup_prog);
+ if (!lookup_link)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+ server_fds[i] = make_server(SOCK_STREAM, listen_ip, 0, NULL);
+ if (server_fds[i] < 0)
+ goto close_servers;
+
+ err = update_lookup_map(skel->maps.redir_map, i,
+ server_fds[i]);
+ if (err)
+ goto close_servers;
+ }
+
+ client_fd = make_client(SOCK_STREAM, connect_ip, EXT_PORT);
+ if (client_fd < 0)
+ goto close_servers;
+
+ peer_fd = accept(server_fds[SERVER_B], NULL, NULL);
+ if (CHECK(peer_fd < 0, "accept", "failed\n"))
+ goto close_client;
+
+ close(peer_fd);
+close_client:
+ close(client_fd);
+close_servers:
+ for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+ if (server_fds[i] != -1)
+ close(server_fds[i]);
+ }
+ bpf_link__destroy(lookup_link);
+}
+
+static void run_sk_assign_v4(struct test_sk_lookup *skel,
+ struct bpf_program *lookup_prog)
+{
+ run_sk_assign(skel, lookup_prog, INT_IP4, EXT_IP4);
+}
+
+static void run_sk_assign_v6(struct test_sk_lookup *skel,
+ struct bpf_program *lookup_prog)
+{
+ run_sk_assign(skel, lookup_prog, INT_IP6, EXT_IP6);
+}
+
+static void run_sk_assign_connected(struct test_sk_lookup *skel,
+ int sotype)
+{
+ int err, client_fd, connected_fd, server_fd;
+ struct bpf_link *lookup_link;
+
+ server_fd = make_server(sotype, EXT_IP4, EXT_PORT, NULL);
+ if (server_fd < 0)
+ return;
+
+ connected_fd = make_client(sotype, EXT_IP4, EXT_PORT);
+ if (connected_fd < 0)
+ goto out_close_server;
+
+ /* Put a connected socket in redirect map */
+ err = update_lookup_map(skel->maps.redir_map, SERVER_A, connected_fd);
+ if (err)
+ goto out_close_connected;
+
+ lookup_link = attach_lookup_prog(skel->progs.sk_assign_esocknosupport);
+ if (!lookup_link)
+ goto out_close_connected;
+
+ /* Try to redirect TCP SYN / UDP packet to a connected socket */
+ client_fd = make_client(sotype, EXT_IP4, EXT_PORT);
+ if (client_fd < 0)
+ goto out_unlink_prog;
+ if (sotype == SOCK_DGRAM) {
+ send_byte(client_fd);
+ recv_byte(server_fd);
+ }
+
+ close(client_fd);
+out_unlink_prog:
+ bpf_link__destroy(lookup_link);
+out_close_connected:
+ close(connected_fd);
+out_close_server:
+ close(server_fd);
+}
+
+static void test_sk_assign_helper(struct test_sk_lookup *skel)
+{
+ if (test__start_subtest("sk_assign returns EEXIST"))
+ run_sk_assign_v4(skel, skel->progs.sk_assign_eexist);
+ if (test__start_subtest("sk_assign honors F_REPLACE"))
+ run_sk_assign_v4(skel, skel->progs.sk_assign_replace_flag);
+ if (test__start_subtest("sk_assign accepts NULL socket"))
+ run_sk_assign_v4(skel, skel->progs.sk_assign_null);
+ if (test__start_subtest("access ctx->sk"))
+ run_sk_assign_v4(skel, skel->progs.access_ctx_sk);
+ if (test__start_subtest("narrow access to ctx v4"))
+ run_sk_assign_v4(skel, skel->progs.ctx_narrow_access);
+ if (test__start_subtest("narrow access to ctx v6"))
+ run_sk_assign_v6(skel, skel->progs.ctx_narrow_access);
+ if (test__start_subtest("sk_assign rejects TCP established"))
+ run_sk_assign_connected(skel, SOCK_STREAM);
+ if (test__start_subtest("sk_assign rejects UDP connected"))
+ run_sk_assign_connected(skel, SOCK_DGRAM);
+}
+
+struct test_multi_prog {
+ const char *desc;
+ struct bpf_program *prog1;
+ struct bpf_program *prog2;
+ struct bpf_map *redir_map;
+ struct bpf_map *run_map;
+ int expect_errno;
+ struct inet_addr listen_at;
+};
+
+static void run_multi_prog_lookup(const struct test_multi_prog *t)
+{
+ struct sockaddr_storage dst = {};
+ int map_fd, server_fd, client_fd;
+ struct bpf_link *link1, *link2;
+ int prog_idx, done, err;
+
+ map_fd = bpf_map__fd(t->run_map);
+
+ done = 0;
+ prog_idx = PROG1;
+ err = bpf_map_update_elem(map_fd, &prog_idx, &done, BPF_ANY);
+ if (CHECK(err, "bpf_map_update_elem", "failed\n"))
+ return;
+ prog_idx = PROG2;
+ err = bpf_map_update_elem(map_fd, &prog_idx, &done, BPF_ANY);
+ if (CHECK(err, "bpf_map_update_elem", "failed\n"))
+ return;
+
+ link1 = attach_lookup_prog(t->prog1);
+ if (!link1)
+ return;
+ link2 = attach_lookup_prog(t->prog2);
+ if (!link2)
+ goto out_unlink1;
+
+ server_fd = make_server(SOCK_STREAM, t->listen_at.ip,
+ t->listen_at.port, NULL);
+ if (server_fd < 0)
+ goto out_unlink2;
+
+ err = update_lookup_map(t->redir_map, SERVER_A, server_fd);
+ if (err)
+ goto out_close_server;
+
+ client_fd = make_socket(SOCK_STREAM, EXT_IP4, EXT_PORT, &dst);
+ if (client_fd < 0)
+ goto out_close_server;
+
+ err = connect(client_fd, (void *)&dst, inetaddr_len(&dst));
+ if (CHECK(err && !t->expect_errno, "connect",
+ "unexpected error %d\n", errno))
+ goto out_close_client;
+ if (CHECK(err && t->expect_errno && errno != t->expect_errno,
+ "connect", "unexpected error %d\n", errno))
+ goto out_close_client;
+
+ done = 0;
+ prog_idx = PROG1;
+ err = bpf_map_lookup_elem(map_fd, &prog_idx, &done);
+ CHECK(err, "bpf_map_lookup_elem", "failed\n");
+ CHECK(!done, "bpf_map_lookup_elem", "PROG1 !done\n");
+
+ done = 0;
+ prog_idx = PROG2;
+ err = bpf_map_lookup_elem(map_fd, &prog_idx, &done);
+ CHECK(err, "bpf_map_lookup_elem", "failed\n");
+ CHECK(!done, "bpf_map_lookup_elem", "PROG2 !done\n");
+
+out_close_client:
+ close(client_fd);
+out_close_server:
+ close(server_fd);
+out_unlink2:
+ bpf_link__destroy(link2);
+out_unlink1:
+ bpf_link__destroy(link1);
+}
+
+static void test_multi_prog_lookup(struct test_sk_lookup *skel)
+{
+ struct test_multi_prog tests[] = {
+ {
+ .desc = "multi prog - pass, pass",
+ .prog1 = skel->progs.multi_prog_pass1,
+ .prog2 = skel->progs.multi_prog_pass2,
+ .listen_at = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "multi prog - drop, drop",
+ .prog1 = skel->progs.multi_prog_drop1,
+ .prog2 = skel->progs.multi_prog_drop2,
+ .listen_at = { EXT_IP4, EXT_PORT },
+ .expect_errno = ECONNREFUSED,
+ },
+ {
+ .desc = "multi prog - pass, drop",
+ .prog1 = skel->progs.multi_prog_pass1,
+ .prog2 = skel->progs.multi_prog_drop2,
+ .listen_at = { EXT_IP4, EXT_PORT },
+ .expect_errno = ECONNREFUSED,
+ },
+ {
+ .desc = "multi prog - drop, pass",
+ .prog1 = skel->progs.multi_prog_drop1,
+ .prog2 = skel->progs.multi_prog_pass2,
+ .listen_at = { EXT_IP4, EXT_PORT },
+ .expect_errno = ECONNREFUSED,
+ },
+ {
+ .desc = "multi prog - pass, redir",
+ .prog1 = skel->progs.multi_prog_pass1,
+ .prog2 = skel->progs.multi_prog_redir2,
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "multi prog - redir, pass",
+ .prog1 = skel->progs.multi_prog_redir1,
+ .prog2 = skel->progs.multi_prog_pass2,
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "multi prog - drop, redir",
+ .prog1 = skel->progs.multi_prog_drop1,
+ .prog2 = skel->progs.multi_prog_redir2,
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "multi prog - redir, drop",
+ .prog1 = skel->progs.multi_prog_redir1,
+ .prog2 = skel->progs.multi_prog_drop2,
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "multi prog - redir, redir",
+ .prog1 = skel->progs.multi_prog_redir1,
+ .prog2 = skel->progs.multi_prog_redir2,
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ };
+ struct test_multi_prog *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ t->redir_map = skel->maps.redir_map;
+ t->run_map = skel->maps.run_map;
+ if (test__start_subtest(t->desc))
+ run_multi_prog_lookup(t);
+ }
+}
+
+static void run_tests(struct test_sk_lookup *skel)
+{
+ if (test__start_subtest("query lookup prog"))
+ query_lookup_prog(skel);
+ test_redirect_lookup(skel);
+ test_drop_on_lookup(skel);
+ test_drop_on_reuseport(skel);
+ test_sk_assign_helper(skel);
+ test_multi_prog_lookup(skel);
+}
+
+static int switch_netns(void)
+{
+ static const char * const setup_script[] = {
+ "ip -6 addr add dev lo " EXT_IP6 "/128 nodad",
+ "ip -6 addr add dev lo " INT_IP6 "/128 nodad",
+ "ip link set dev lo up",
+ NULL,
+ };
+ const char * const *cmd;
+ int err;
+
+ err = unshare(CLONE_NEWNET);
+ if (CHECK(err, "unshare", "failed\n")) {
+ log_err("unshare(CLONE_NEWNET)");
+ return -1;
+ }
+
+ for (cmd = setup_script; *cmd; cmd++) {
+ err = system(*cmd);
+ if (CHECK(err, "system", "failed\n")) {
+ log_err("system(%s)", *cmd);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+void test_sk_lookup(void)
+{
+ struct test_sk_lookup *skel;
+ int err;
+
+ err = switch_netns();
+ if (err)
+ return;
+
+ skel = test_sk_lookup__open_and_load();
+ if (CHECK(!skel, "skel open_and_load", "failed\n"))
+ return;
+
+ run_tests(skel);
+
+ test_sk_lookup__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
index 8547ecbdc61f..ec281b0363b8 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
@@ -193,11 +193,10 @@ static void run_test(int cgroup_fd)
if (CHECK_FAIL(server_fd < 0))
goto close_bpf_object;
+ pthread_mutex_lock(&server_started_mtx);
if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
(void *)&server_fd)))
goto close_server_fd;
-
- pthread_mutex_lock(&server_started_mtx);
pthread_cond_wait(&server_started, &server_started_mtx);
pthread_mutex_unlock(&server_started_mtx);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
new file mode 100644
index 000000000000..0176573fe4e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <uapi/linux/bpf.h>
+#include <linux/if_link.h>
+#include <test_progs.h>
+
+#include "test_xdp_with_cpumap_helpers.skel.h"
+
+#define IFINDEX_LO 1
+
+void test_xdp_with_cpumap_helpers(void)
+{
+ struct test_xdp_with_cpumap_helpers *skel;
+ struct bpf_prog_info info = {};
+ struct bpf_cpumap_val val = {
+ .qsize = 192,
+ };
+ __u32 duration = 0, idx = 0;
+ __u32 len = sizeof(info);
+ int err, prog_fd, map_fd;
+
+ skel = test_xdp_with_cpumap_helpers__open_and_load();
+ if (CHECK_FAIL(!skel)) {
+ perror("test_xdp_with_cpumap_helpers__open_and_load");
+ return;
+ }
+
+ /* can not attach program with cpumaps that allow programs
+ * as xdp generic
+ */
+ prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+ CHECK(err == 0, "Generic attach of program with 8-byte CPUMAP",
+ "should have failed\n");
+
+ prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
+ map_fd = bpf_map__fd(skel->maps.cpu_map);
+ err = bpf_obj_get_info_by_fd(prog_fd, &info, &len);
+ if (CHECK_FAIL(err))
+ goto out_close;
+
+ val.bpf_prog.fd = prog_fd;
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ CHECK(err, "Add program to cpumap entry", "err %d errno %d\n",
+ err, errno);
+
+ err = bpf_map_lookup_elem(map_fd, &idx, &val);
+ CHECK(err, "Read cpumap entry", "err %d errno %d\n", err, errno);
+ CHECK(info.id != val.bpf_prog.id, "Expected program id in cpumap entry",
+ "expected %u read %u\n", info.id, val.bpf_prog.id);
+
+ /* can not attach BPF_XDP_CPUMAP program to a device */
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+ CHECK(err == 0, "Attach of BPF_XDP_CPUMAP program",
+ "should have failed\n");
+
+ val.qsize = 192;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ CHECK(err == 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry",
+ "should have failed\n");
+
+out_close:
+ test_xdp_with_cpumap_helpers__destroy(skel);
+}
+
+void test_xdp_cpumap_attach(void)
+{
+ if (test__start_subtest("cpumap_with_progs"))
+ test_xdp_with_cpumap_helpers();
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
index 7de98a68599a..95989f4c99b5 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -36,10 +36,10 @@ int dump_netlink(struct bpf_iter__netlink *ctx)
if (!nlk->groups) {
group = 0;
} else {
- /* FIXME: temporary use bpf_probe_read here, needs
+ /* FIXME: temporary use bpf_probe_read_kernel here, needs
* verifier support to do direct access.
*/
- bpf_probe_read(&group, sizeof(group), &nlk->groups[0]);
+ bpf_probe_read_kernel(&group, sizeof(group), &nlk->groups[0]);
}
BPF_SEQ_PRINTF(seq, "%-10u %08x %-8d %-8d %-5d %-8d ",
nlk->portid, (u32)group,
@@ -56,7 +56,7 @@ int dump_netlink(struct bpf_iter__netlink *ctx)
* with current verifier.
*/
inode = SOCK_INODE(sk);
- bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
}
BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n", s->sk_drops.counter, ino);
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
index 30fd587cb325..54380c5e1069 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -57,7 +57,7 @@ static long sock_i_ino(const struct sock *sk)
return 0;
inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
- bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
return ino;
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
index 10dec4392031..b4fbddfa4e10 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -57,7 +57,7 @@ static long sock_i_ino(const struct sock *sk)
return 0;
inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
- bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
return ino;
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
index 7053784575e4..f258583afbbd 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -18,7 +18,7 @@ static long sock_i_ino(const struct sock *sk)
return 0;
inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
- bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
return ino;
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
index c1175a6ecf43..65f93bb03f0f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -25,7 +25,7 @@ static long sock_i_ino(const struct sock *sk)
return 0;
inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
- bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
return ino;
}
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
new file mode 100644
index 000000000000..bbf8296f4d66
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -0,0 +1,641 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2020 Cloudflare
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#define IP4(a, b, c, d) \
+ bpf_htonl((((__u32)(a) & 0xffU) << 24) | \
+ (((__u32)(b) & 0xffU) << 16) | \
+ (((__u32)(c) & 0xffU) << 8) | \
+ (((__u32)(d) & 0xffU) << 0))
+#define IP6(aaaa, bbbb, cccc, dddd) \
+ { bpf_htonl(aaaa), bpf_htonl(bbbb), bpf_htonl(cccc), bpf_htonl(dddd) }
+
+#define MAX_SOCKS 32
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, MAX_SOCKS);
+ __type(key, __u32);
+ __type(value, __u64);
+} redir_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, int);
+} run_map SEC(".maps");
+
+enum {
+ PROG1 = 0,
+ PROG2,
+};
+
+enum {
+ SERVER_A = 0,
+ SERVER_B,
+};
+
+/* Addressable key/value constants for convenience */
+static const int KEY_PROG1 = PROG1;
+static const int KEY_PROG2 = PROG2;
+static const int PROG_DONE = 1;
+
+static const __u32 KEY_SERVER_A = SERVER_A;
+static const __u32 KEY_SERVER_B = SERVER_B;
+
+static const __u16 DST_PORT = 7007; /* Host byte order */
+static const __u32 DST_IP4 = IP4(127, 0, 0, 1);
+static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001);
+
+SEC("sk_lookup/lookup_pass")
+int lookup_pass(struct bpf_sk_lookup *ctx)
+{
+ return SK_PASS;
+}
+
+SEC("sk_lookup/lookup_drop")
+int lookup_drop(struct bpf_sk_lookup *ctx)
+{
+ return SK_DROP;
+}
+
+SEC("sk_reuseport/reuse_pass")
+int reuseport_pass(struct sk_reuseport_md *ctx)
+{
+ return SK_PASS;
+}
+
+SEC("sk_reuseport/reuse_drop")
+int reuseport_drop(struct sk_reuseport_md *ctx)
+{
+ return SK_DROP;
+}
+
+/* Redirect packets destined for port DST_PORT to socket at redir_map[0]. */
+SEC("sk_lookup/redir_port")
+int redir_port(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ if (ctx->local_port != DST_PORT)
+ return SK_PASS;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_PASS;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ return err ? SK_DROP : SK_PASS;
+}
+
+/* Redirect packets destined for DST_IP4 address to socket at redir_map[0]. */
+SEC("sk_lookup/redir_ip4")
+int redir_ip4(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ if (ctx->family != AF_INET)
+ return SK_PASS;
+ if (ctx->local_port != DST_PORT)
+ return SK_PASS;
+ if (ctx->local_ip4 != DST_IP4)
+ return SK_PASS;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_PASS;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ return err ? SK_DROP : SK_PASS;
+}
+
+/* Redirect packets destined for DST_IP6 address to socket at redir_map[0]. */
+SEC("sk_lookup/redir_ip6")
+int redir_ip6(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ if (ctx->family != AF_INET6)
+ return SK_PASS;
+ if (ctx->local_port != DST_PORT)
+ return SK_PASS;
+ if (ctx->local_ip6[0] != DST_IP6[0] ||
+ ctx->local_ip6[1] != DST_IP6[1] ||
+ ctx->local_ip6[2] != DST_IP6[2] ||
+ ctx->local_ip6[3] != DST_IP6[3])
+ return SK_PASS;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_PASS;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ return err ? SK_DROP : SK_PASS;
+}
+
+SEC("sk_lookup/select_sock_a")
+int select_sock_a(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_PASS;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ return err ? SK_DROP : SK_PASS;
+}
+
+SEC("sk_lookup/select_sock_a_no_reuseport")
+int select_sock_a_no_reuseport(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_DROP;
+
+ err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_NO_REUSEPORT);
+ bpf_sk_release(sk);
+ return err ? SK_DROP : SK_PASS;
+}
+
+SEC("sk_reuseport/select_sock_b")
+int select_sock_b(struct sk_reuseport_md *ctx)
+{
+ __u32 key = KEY_SERVER_B;
+ int err;
+
+ err = bpf_sk_select_reuseport(ctx, &redir_map, &key, 0);
+ return err ? SK_DROP : SK_PASS;
+}
+
+/* Check that bpf_sk_assign() returns -EEXIST if socket already selected. */
+SEC("sk_lookup/sk_assign_eexist")
+int sk_assign_eexist(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err, ret;
+
+ ret = SK_DROP;
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+ if (!sk)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, 0);
+ if (err)
+ goto out;
+ bpf_sk_release(sk);
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, 0);
+ if (err != -EEXIST) {
+ bpf_printk("sk_assign returned %d, expected %d\n",
+ err, -EEXIST);
+ goto out;
+ }
+
+ ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+ if (sk)
+ bpf_sk_release(sk);
+ return ret;
+}
+
+/* Check that bpf_sk_assign(BPF_SK_LOOKUP_F_REPLACE) can override selection. */
+SEC("sk_lookup/sk_assign_replace_flag")
+int sk_assign_replace_flag(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err, ret;
+
+ ret = SK_DROP;
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, 0);
+ if (err)
+ goto out;
+ bpf_sk_release(sk);
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+ if (!sk)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
+ if (err) {
+ bpf_printk("sk_assign returned %d, expected 0\n", err);
+ goto out;
+ }
+
+ ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+ if (sk)
+ bpf_sk_release(sk);
+ return ret;
+}
+
+/* Check that bpf_sk_assign(sk=NULL) is accepted. */
+SEC("sk_lookup/sk_assign_null")
+int sk_assign_null(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk = NULL;
+ int err, ret;
+
+ ret = SK_DROP;
+
+ err = bpf_sk_assign(ctx, NULL, 0);
+ if (err) {
+ bpf_printk("sk_assign returned %d, expected 0\n", err);
+ goto out;
+ }
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+ if (!sk)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
+ if (err) {
+ bpf_printk("sk_assign returned %d, expected 0\n", err);
+ goto out;
+ }
+
+ if (ctx->sk != sk)
+ goto out;
+ err = bpf_sk_assign(ctx, NULL, 0);
+ if (err != -EEXIST)
+ goto out;
+ err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE);
+ if (err)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
+ if (err)
+ goto out;
+
+ ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+ if (sk)
+ bpf_sk_release(sk);
+ return ret;
+}
+
+/* Check that selected sk is accessible through context. */
+SEC("sk_lookup/access_ctx_sk")
+int access_ctx_sk(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk1 = NULL, *sk2 = NULL;
+ int err, ret;
+
+ ret = SK_DROP;
+
+ /* Try accessing unassigned (NULL) ctx->sk field */
+ if (ctx->sk && ctx->sk->family != AF_INET)
+ goto out;
+
+ /* Assign a value to ctx->sk */
+ sk1 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk1)
+ goto out;
+ err = bpf_sk_assign(ctx, sk1, 0);
+ if (err)
+ goto out;
+ if (ctx->sk != sk1)
+ goto out;
+
+ /* Access ctx->sk fields */
+ if (ctx->sk->family != AF_INET ||
+ ctx->sk->type != SOCK_STREAM ||
+ ctx->sk->state != BPF_TCP_LISTEN)
+ goto out;
+
+ /* Reset selection */
+ err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE);
+ if (err)
+ goto out;
+ if (ctx->sk)
+ goto out;
+
+ /* Assign another socket */
+ sk2 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+ if (!sk2)
+ goto out;
+ err = bpf_sk_assign(ctx, sk2, BPF_SK_LOOKUP_F_REPLACE);
+ if (err)
+ goto out;
+ if (ctx->sk != sk2)
+ goto out;
+
+ /* Access reassigned ctx->sk fields */
+ if (ctx->sk->family != AF_INET ||
+ ctx->sk->type != SOCK_STREAM ||
+ ctx->sk->state != BPF_TCP_LISTEN)
+ goto out;
+
+ ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+ if (sk1)
+ bpf_sk_release(sk1);
+ if (sk2)
+ bpf_sk_release(sk2);
+ return ret;
+}
+
+/* Check narrow loads from ctx fields that support them.
+ *
+ * Narrow loads of size >= target field size from a non-zero offset
+ * are not covered because they give bogus results, that is the
+ * verifier ignores the offset.
+ */
+SEC("sk_lookup/ctx_narrow_access")
+int ctx_narrow_access(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err, family;
+ __u16 *half;
+ __u8 *byte;
+ bool v4;
+
+ v4 = (ctx->family == AF_INET);
+
+ /* Narrow loads from family field */
+ byte = (__u8 *)&ctx->family;
+ half = (__u16 *)&ctx->family;
+ if (byte[0] != (v4 ? AF_INET : AF_INET6) ||
+ byte[1] != 0 || byte[2] != 0 || byte[3] != 0)
+ return SK_DROP;
+ if (half[0] != (v4 ? AF_INET : AF_INET6))
+ return SK_DROP;
+
+ byte = (__u8 *)&ctx->protocol;
+ if (byte[0] != IPPROTO_TCP ||
+ byte[1] != 0 || byte[2] != 0 || byte[3] != 0)
+ return SK_DROP;
+ half = (__u16 *)&ctx->protocol;
+ if (half[0] != IPPROTO_TCP)
+ return SK_DROP;
+
+ /* Narrow loads from remote_port field. Expect non-0 value. */
+ byte = (__u8 *)&ctx->remote_port;
+ if (byte[0] == 0 && byte[1] == 0 && byte[2] == 0 && byte[3] == 0)
+ return SK_DROP;
+ half = (__u16 *)&ctx->remote_port;
+ if (half[0] == 0)
+ return SK_DROP;
+
+ /* Narrow loads from local_port field. Expect DST_PORT. */
+ byte = (__u8 *)&ctx->local_port;
+ if (byte[0] != ((DST_PORT >> 0) & 0xff) ||
+ byte[1] != ((DST_PORT >> 8) & 0xff) ||
+ byte[2] != 0 || byte[3] != 0)
+ return SK_DROP;
+ half = (__u16 *)&ctx->local_port;
+ if (half[0] != DST_PORT)
+ return SK_DROP;
+
+ /* Narrow loads from IPv4 fields */
+ if (v4) {
+ /* Expect non-0.0.0.0 in remote_ip4 */
+ byte = (__u8 *)&ctx->remote_ip4;
+ if (byte[0] == 0 && byte[1] == 0 &&
+ byte[2] == 0 && byte[3] == 0)
+ return SK_DROP;
+ half = (__u16 *)&ctx->remote_ip4;
+ if (half[0] == 0 && half[1] == 0)
+ return SK_DROP;
+
+ /* Expect DST_IP4 in local_ip4 */
+ byte = (__u8 *)&ctx->local_ip4;
+ if (byte[0] != ((DST_IP4 >> 0) & 0xff) ||
+ byte[1] != ((DST_IP4 >> 8) & 0xff) ||
+ byte[2] != ((DST_IP4 >> 16) & 0xff) ||
+ byte[3] != ((DST_IP4 >> 24) & 0xff))
+ return SK_DROP;
+ half = (__u16 *)&ctx->local_ip4;
+ if (half[0] != ((DST_IP4 >> 0) & 0xffff) ||
+ half[1] != ((DST_IP4 >> 16) & 0xffff))
+ return SK_DROP;
+ } else {
+ /* Expect 0.0.0.0 IPs when family != AF_INET */
+ byte = (__u8 *)&ctx->remote_ip4;
+ if (byte[0] != 0 || byte[1] != 0 &&
+ byte[2] != 0 || byte[3] != 0)
+ return SK_DROP;
+ half = (__u16 *)&ctx->remote_ip4;
+ if (half[0] != 0 || half[1] != 0)
+ return SK_DROP;
+
+ byte = (__u8 *)&ctx->local_ip4;
+ if (byte[0] != 0 || byte[1] != 0 &&
+ byte[2] != 0 || byte[3] != 0)
+ return SK_DROP;
+ half = (__u16 *)&ctx->local_ip4;
+ if (half[0] != 0 || half[1] != 0)
+ return SK_DROP;
+ }
+
+ /* Narrow loads from IPv6 fields */
+ if (!v4) {
+ /* Expenct non-:: IP in remote_ip6 */
+ byte = (__u8 *)&ctx->remote_ip6;
+ if (byte[0] == 0 && byte[1] == 0 &&
+ byte[2] == 0 && byte[3] == 0 &&
+ byte[4] == 0 && byte[5] == 0 &&
+ byte[6] == 0 && byte[7] == 0 &&
+ byte[8] == 0 && byte[9] == 0 &&
+ byte[10] == 0 && byte[11] == 0 &&
+ byte[12] == 0 && byte[13] == 0 &&
+ byte[14] == 0 && byte[15] == 0)
+ return SK_DROP;
+ half = (__u16 *)&ctx->remote_ip6;
+ if (half[0] == 0 && half[1] == 0 &&
+ half[2] == 0 && half[3] == 0 &&
+ half[4] == 0 && half[5] == 0 &&
+ half[6] == 0 && half[7] == 0)
+ return SK_DROP;
+
+ /* Expect DST_IP6 in local_ip6 */
+ byte = (__u8 *)&ctx->local_ip6;
+ if (byte[0] != ((DST_IP6[0] >> 0) & 0xff) ||
+ byte[1] != ((DST_IP6[0] >> 8) & 0xff) ||
+ byte[2] != ((DST_IP6[0] >> 16) & 0xff) ||
+ byte[3] != ((DST_IP6[0] >> 24) & 0xff) ||
+ byte[4] != ((DST_IP6[1] >> 0) & 0xff) ||
+ byte[5] != ((DST_IP6[1] >> 8) & 0xff) ||
+ byte[6] != ((DST_IP6[1] >> 16) & 0xff) ||
+ byte[7] != ((DST_IP6[1] >> 24) & 0xff) ||
+ byte[8] != ((DST_IP6[2] >> 0) & 0xff) ||
+ byte[9] != ((DST_IP6[2] >> 8) & 0xff) ||
+ byte[10] != ((DST_IP6[2] >> 16) & 0xff) ||
+ byte[11] != ((DST_IP6[2] >> 24) & 0xff) ||
+ byte[12] != ((DST_IP6[3] >> 0) & 0xff) ||
+ byte[13] != ((DST_IP6[3] >> 8) & 0xff) ||
+ byte[14] != ((DST_IP6[3] >> 16) & 0xff) ||
+ byte[15] != ((DST_IP6[3] >> 24) & 0xff))
+ return SK_DROP;
+ half = (__u16 *)&ctx->local_ip6;
+ if (half[0] != ((DST_IP6[0] >> 0) & 0xffff) ||
+ half[1] != ((DST_IP6[0] >> 16) & 0xffff) ||
+ half[2] != ((DST_IP6[1] >> 0) & 0xffff) ||
+ half[3] != ((DST_IP6[1] >> 16) & 0xffff) ||
+ half[4] != ((DST_IP6[2] >> 0) & 0xffff) ||
+ half[5] != ((DST_IP6[2] >> 16) & 0xffff) ||
+ half[6] != ((DST_IP6[3] >> 0) & 0xffff) ||
+ half[7] != ((DST_IP6[3] >> 16) & 0xffff))
+ return SK_DROP;
+ } else {
+ /* Expect :: IPs when family != AF_INET6 */
+ byte = (__u8 *)&ctx->remote_ip6;
+ if (byte[0] != 0 || byte[1] != 0 ||
+ byte[2] != 0 || byte[3] != 0 ||
+ byte[4] != 0 || byte[5] != 0 ||
+ byte[6] != 0 || byte[7] != 0 ||
+ byte[8] != 0 || byte[9] != 0 ||
+ byte[10] != 0 || byte[11] != 0 ||
+ byte[12] != 0 || byte[13] != 0 ||
+ byte[14] != 0 || byte[15] != 0)
+ return SK_DROP;
+ half = (__u16 *)&ctx->remote_ip6;
+ if (half[0] != 0 || half[1] != 0 ||
+ half[2] != 0 || half[3] != 0 ||
+ half[4] != 0 || half[5] != 0 ||
+ half[6] != 0 || half[7] != 0)
+ return SK_DROP;
+
+ byte = (__u8 *)&ctx->local_ip6;
+ if (byte[0] != 0 || byte[1] != 0 ||
+ byte[2] != 0 || byte[3] != 0 ||
+ byte[4] != 0 || byte[5] != 0 ||
+ byte[6] != 0 || byte[7] != 0 ||
+ byte[8] != 0 || byte[9] != 0 ||
+ byte[10] != 0 || byte[11] != 0 ||
+ byte[12] != 0 || byte[13] != 0 ||
+ byte[14] != 0 || byte[15] != 0)
+ return SK_DROP;
+ half = (__u16 *)&ctx->local_ip6;
+ if (half[0] != 0 || half[1] != 0 ||
+ half[2] != 0 || half[3] != 0 ||
+ half[4] != 0 || half[5] != 0 ||
+ half[6] != 0 || half[7] != 0)
+ return SK_DROP;
+ }
+
+ /* Success, redirect to KEY_SERVER_B */
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+ if (sk) {
+ bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ }
+ return SK_PASS;
+}
+
+/* Check that sk_assign rejects SERVER_A socket with -ESOCKNOSUPPORT */
+SEC("sk_lookup/sk_assign_esocknosupport")
+int sk_assign_esocknosupport(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err, ret;
+
+ ret = SK_DROP;
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ goto out;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ if (err != -ESOCKTNOSUPPORT) {
+ bpf_printk("sk_assign returned %d, expected %d\n",
+ err, -ESOCKTNOSUPPORT);
+ goto out;
+ }
+
+ ret = SK_PASS; /* Success, pass to regular lookup */
+out:
+ if (sk)
+ bpf_sk_release(sk);
+ return ret;
+}
+
+SEC("sk_lookup/multi_prog_pass1")
+int multi_prog_pass1(struct bpf_sk_lookup *ctx)
+{
+ bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
+ return SK_PASS;
+}
+
+SEC("sk_lookup/multi_prog_pass2")
+int multi_prog_pass2(struct bpf_sk_lookup *ctx)
+{
+ bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
+ return SK_PASS;
+}
+
+SEC("sk_lookup/multi_prog_drop1")
+int multi_prog_drop1(struct bpf_sk_lookup *ctx)
+{
+ bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
+ return SK_DROP;
+}
+
+SEC("sk_lookup/multi_prog_drop2")
+int multi_prog_drop2(struct bpf_sk_lookup *ctx)
+{
+ bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
+ return SK_DROP;
+}
+
+static __always_inline int select_server_a(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_DROP;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ if (err)
+ return SK_DROP;
+
+ return SK_PASS;
+}
+
+SEC("sk_lookup/multi_prog_redir1")
+int multi_prog_redir1(struct bpf_sk_lookup *ctx)
+{
+ int ret;
+
+ ret = select_server_a(ctx);
+ bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
+ return SK_PASS;
+}
+
+SEC("sk_lookup/multi_prog_redir2")
+int multi_prog_redir2(struct bpf_sk_lookup *ctx)
+{
+ int ret;
+
+ ret = select_server_a(ctx);
+ bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "Dual BSD/GPL";
+__u32 _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
new file mode 100644
index 000000000000..59ee4f182ff8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define IFINDEX_LO 1
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CPUMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_cpumap_val));
+ __uint(max_entries, 4);
+} cpu_map SEC(".maps");
+
+SEC("xdp_redir")
+int xdp_redir_prog(struct xdp_md *ctx)
+{
+ return bpf_redirect_map(&cpu_map, 1, 0);
+}
+
+SEC("xdp_dummy")
+int xdp_dummy_prog(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+SEC("xdp_cpumap/dummy_cm")
+int xdp_dummy_cm(struct xdp_md *ctx)
+{
+ if (ctx->ingress_ifindex == IFINDEX_LO)
+ return XDP_DROP;
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh
index 9df0d2ac45f8..4f6444bcd53f 100755
--- a/tools/testing/selftests/bpf/test_kmod.sh
+++ b/tools/testing/selftests/bpf/test_kmod.sh
@@ -10,7 +10,13 @@ if [ "$(id -u)" != "0" ]; then
exit $ksft_skip
fi
-SRC_TREE=../../../../
+if [ "$building_out_of_srctree" ]; then
+ # We are in linux-build/kselftest/bpf
+ OUTPUT=../../
+else
+ # We are in linux/tools/testing/selftests/bpf
+ OUTPUT=../../../../
+fi
test_run()
{
@@ -19,8 +25,8 @@ test_run()
echo "[ JIT enabled:$1 hardened:$2 ]"
dmesg -C
- if [ -f ${SRC_TREE}/lib/test_bpf.ko ]; then
- insmod ${SRC_TREE}/lib/test_bpf.ko 2> /dev/null
+ if [ -f ${OUTPUT}/lib/test_bpf.ko ]; then
+ insmod ${OUTPUT}/lib/test_bpf.ko 2> /dev/null
if [ $? -ne 0 ]; then
rc=1
fi
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
index 785eabf2a593..5620919fde9e 100755
--- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
@@ -140,7 +140,7 @@ ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
-kill -INT $!
+kill -TERM $!
if [[ $(< $TMP_FILE) != "foobar" ]]; then
exit 1
diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
new file mode 100644
index 000000000000..2ad5f974451c
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
@@ -0,0 +1,492 @@
+{
+ "valid 1,2,4,8-byte reads from bpf_sk_lookup",
+ .insns = {
+ /* 1-byte read from family field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family) + 3),
+ /* 2-byte read from family field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family) + 2),
+ /* 4-byte read from family field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family)),
+
+ /* 1-byte read from protocol field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol) + 3),
+ /* 2-byte read from protocol field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol) + 2),
+ /* 4-byte read from protocol field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol)),
+
+ /* 1-byte read from remote_ip4 field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4) + 3),
+ /* 2-byte read from remote_ip4 field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4) + 2),
+ /* 4-byte read from remote_ip4 field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4)),
+
+ /* 1-byte read from remote_ip6 field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 5),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 6),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 7),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 9),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 10),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 11),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 13),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 14),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 15),
+ /* 2-byte read from remote_ip6 field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 2),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 6),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 10),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 14),
+ /* 4-byte read from remote_ip6 field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
+
+ /* 1-byte read from remote_port field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port) + 3),
+ /* 2-byte read from remote_port field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port) + 2),
+ /* 4-byte read from remote_port field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port)),
+
+ /* 1-byte read from local_ip4 field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4) + 3),
+ /* 2-byte read from local_ip4 field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4) + 2),
+ /* 4-byte read from local_ip4 field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4)),
+
+ /* 1-byte read from local_ip6 field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 4),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 5),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 6),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 7),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 8),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 9),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 10),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 11),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 12),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 13),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 14),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 15),
+ /* 2-byte read from local_ip6 field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 2),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 4),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 6),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 8),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 10),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 12),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 14),
+ /* 4-byte read from local_ip6 field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 12),
+
+ /* 1-byte read from local_port field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port) + 3),
+ /* 2-byte read from local_port field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port) + 2),
+ /* 4-byte read from local_port field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port)),
+
+ /* 8-byte read from sk field */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, sk)),
+
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+/* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */
+{
+ "invalid 8-byte read from bpf_sk_lookup family field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup protocol field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup remote_ip4 field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup remote_ip6 field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup remote_port field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup local_ip4 field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup local_ip6 field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup local_port field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+/* invalid 1,2,4-byte reads from 8-byte fields in bpf_sk_lookup */
+{
+ "invalid 4-byte read from bpf_sk_lookup sk field",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, sk)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 2-byte read from bpf_sk_lookup sk field",
+ .insns = {
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, sk)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 1-byte read from bpf_sk_lookup sk field",
+ .insns = {
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, sk)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+/* out of bounds and unaligned reads from bpf_sk_lookup */
+{
+ "invalid 4-byte read past end of bpf_sk_lookup",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ sizeof(struct bpf_sk_lookup)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 4-byte unaligned read from bpf_sk_lookup at odd offset",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 1),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 4-byte unaligned read from bpf_sk_lookup at even offset",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 2),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+/* in-bound and out-of-bound writes to bpf_sk_lookup */
+{
+ "invalid 8-byte write to bpf_sk_lookup",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 4-byte write to bpf_sk_lookup",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 2-byte write to bpf_sk_lookup",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 1-byte write to bpf_sk_lookup",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 4-byte write past end of bpf_sk_lookup",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ sizeof(struct bpf_sk_lookup)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh
index 5274f4a1fba1..18b982d611de 100755
--- a/tools/testing/selftests/net/vrf_strict_mode_test.sh
+++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh
@@ -379,6 +379,12 @@ if [ ! -x "$(command -v ip)" ]; then
exit 0
fi
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+ echo "SKIP: vrf sysctl does not exist"
+ exit 0
+fi
+
cleanup &> /dev/null
setup